AGGREGATE world: Difference between revisions
From NoSQLZoo
No edit summary |
No edit summary |
||
| Line 159: | Line 159: | ||
{"$group":{ | {"$group":{ | ||
"_id":{ | "_id":{ | ||
"$cond": [ | "$cond": ["$eq" :["$continent","Eurasia"]},"Europe","$continent"] | ||
}, | }, | ||
"area":{"$sum": "$area"} | "area":{"$sum": "$area"} | ||
| Line 178: | Line 178: | ||
{"$group":{ | {"$group":{ | ||
"_id":{ | "_id":{ | ||
"$cond": [ | "$cond": ["$eq" :["$continent","Eurasia"]},"Europe","$continent"] | ||
}, | }, | ||
"area":{"$sum": "$area"} | "area":{"$sum": "$area"} | ||
Revision as of 13:21, 17 July 2015
#ENCODING
import io
import sys
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-16')
#MONGO
from pymongo import MongoClient
client = MongoClient()
client.progzoo.authenticate('scott','tiger')
db = client['progzoo']
#PRETTY
import pprint
pp = pprint.PrettyPrinter(indent=4)
Country Profile
For these questions you should use aggregate([]) on the collection world
Give the name and the per capita GDP for those countries with a population of at least 200 million.
per capita GDP is the GDP divided by the population.
pp.pprint(list(
db.world.aggregate([
{"$match":{
"population":{"$gte":250000000}
}},
{"$project":{
"_id":0,
"name":1,
"per capita GDP": {"$divide": ["$gdp",1000000]}
}}
])
))
Give the name and the population density of all countries. Ignore results where the density is "None".
population density is the population divided by the area
Use a $match. {"area":{"$ne":0}}
pp.pprint(list(
db.world.aggregate([
{"$project":{
"_id":0,
"name":1,
"density": {"$divide": [10000,"$area"]}
}},
{"$match":{
"density": {"$ne":None}
}}
])
))
pp.pprint(list(db.world.aggregate([{"$match":{"area":{"$ne":0}}},{"$project":{"_id":0,"name":1,"density":{"$divide":["$population","$area"]}}},{"$match":{"density":{"$ne":None}}}])))
Show the name and population in millions for the countries of the continent South America. Divide the population by 1000000 to get population in millions.
pp.pprint(list(
db.world.aggregate([
{"$match":{
}},
{"$project":{
"_id":0,
"name":1
}}
])
))
pp.pprint(list(db.world.aggregate([{"$match":{"continent":{"$eq":"South America"}}},{"$project":{"_id":0,"name":1,"population":{"$divide":["$population",1000000]}}}])))
Show the name and population density for France, Germany, and Italy
pp.pprint(list(
db.world.aggregate([
{"$match":{
"name": {"$in":['United Kingdom','United States','Brazil']},
"population": {"$ne": None},
"area": {"$ne": 0}
}},
{"$project":{
"_id":0,
"name":1
}}
])
))
pp.pprint(list(db.world.aggregate([{"$match":{"name":{"$in":['France','Germany','Brazil']},"population":{"$ne":None},"area":{"$ne":0}}},{"$project":{"_id":0,"name":1,"population density":{"$divide":["$population","$area"]}}}])))
Order the continents by area from most to least.
pp.pprint(list(
db.world.aggregate([
{"$group":{
"_id":"$name",
"area":{"$max": "$area"}
}},
{"$sort":{
"area": -1
}},
{"$project":{
"_id":1,
"area":1
}}
])
))
pp.pprint(list(
db.world.aggregate([
{"$group":{
"_id":"$continent",
"area":{"$sum": "$area"}
}},
{"$sort":{
"area": -1
}},
{"$project":{
"_id":1,
"area":1
}}
])
))
Harder Questions
Using Conditions
$cond is similar to a CASE statement in other languages.
It has the form "$cond": [{<comparison> :[<field or value>,<field or value>]},<true case>,<false case>]
Using $cond, reattempt the above question but change Eurasia to Europe
pp.pprint(list(
db.world.aggregate([
{"$group":{
"_id":{
"$cond": ["$eq" :["$continent","Eurasia"]},"Europe","$continent"]
},
"area":{"$sum": "$area"}
}},
{"$sort":{
"area": -1
}},
{"$project":{
"_id":1,
"area":1
}}
])
))
pp.pprint(list(
db.world.aggregate([
{"$group":{
"_id":{
"$cond": ["$eq" :["$continent","Eurasia"]},"Europe","$continent"]
},
"area":{"$sum": "$area"}
}},
{"$sort":{
"area": -1
}},
{"$project":{
"_id":1,
"area":1
}}
])
))