Difference between revisions of "AGGREGATE world"
Line 159: | Line 159: | ||
{"$group":{ | {"$group":{ | ||
"_id":{ | "_id":{ | ||
− | "$cond": ["$eq" :["$continent","Eurasia"]},"Europe","$continent"] | + | "$cond": [{"$eq":["$continent","Eurasia"]},"Europe","$continent"] |
}, | }, | ||
"area":{"$sum": "$area"} | "area":{"$sum": "$area"} | ||
Line 178: | Line 178: | ||
{"$group":{ | {"$group":{ | ||
"_id":{ | "_id":{ | ||
− | "$cond": ["$eq" :["$continent","Eurasia"]},"Europe","$continent"] | + | "$cond": [{"$eq":["$continent","Eurasia"]},"Europe","$continent"] |
}, | }, | ||
"area":{"$sum": "$area"} | "area":{"$sum": "$area"} |
Revision as of 13:22, 17 July 2015
#ENCODING import io import sys sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-16') #MONGO from pymongo import MongoClient client = MongoClient() client.progzoo.authenticate('scott','tiger') db = client['progzoo'] #PRETTY import pprint pp = pprint.PrettyPrinter(indent=4)
Country Profile
For these questions you should use aggregate([])
on the collection world
Give the name
and the per capita GDP
for those countries with a population
of at least 200 million.
per capita GDP is the GDP divided by the population.
pp.pprint(list( db.world.aggregate([ {"$match":{ "population":{"$gte":250000000} }}, {"$project":{ "_id":0, "name":1, "per capita GDP": {"$divide": ["$gdp",1000000]} }} ]) ))
Give the name
and the population density
of all countries. Ignore results where the density is "None".
population density is the population divided by the area
Use a
$match
. {"area":{"$ne":0}}
pp.pprint(list(
db.world.aggregate([
{"$project":{
"_id":0,
"name":1,
"density": {"$divide": [10000,"$area"]}
}},
{"$match":{
"density": {"$ne":None}
}}
])
))
pp.pprint(list(db.world.aggregate([{"$match":{"area":{"$ne":0}}},{"$project":{"_id":0,"name":1,"density":{"$divide":["$population","$area"]}}},{"$match":{"density":{"$ne":None}}}])))
Show the name
and population
in millions for the countries of the continent South America. Divide the population by 1000000 to get population in millions.
pp.pprint(list(
db.world.aggregate([
{"$match":{
}},
{"$project":{
"_id":0,
"name":1
}}
])
))
pp.pprint(list(db.world.aggregate([{"$match":{"continent":{"$eq":"South America"}}},{"$project":{"_id":0,"name":1,"population":{"$divide":["$population",1000000]}}}])))
Show the name
and population density
for France, Germany, and Italy
pp.pprint(list(
db.world.aggregate([
{"$match":{
"name": {"$in":['United Kingdom','United States','Brazil']},
"population": {"$ne": None},
"area": {"$ne": 0}
}},
{"$project":{
"_id":0,
"name":1
}}
])
))
pp.pprint(list(db.world.aggregate([{"$match":{"name":{"$in":['France','Germany','Brazil']},"population":{"$ne":None},"area":{"$ne":0}}},{"$project":{"_id":0,"name":1,"population density":{"$divide":["$population","$area"]}}}])))
Order the continents
by area
from most to least.
pp.pprint(list(
db.world.aggregate([
{"$group":{
"_id":"$name",
"area":{"$max": "$area"}
}},
{"$sort":{
"area": -1
}},
{"$project":{
"_id":1,
"area":1
}}
])
))
pp.pprint(list(
db.world.aggregate([
{"$group":{
"_id":"$continent",
"area":{"$sum": "$area"}
}},
{"$sort":{
"area": -1
}},
{"$project":{
"_id":1,
"area":1
}}
])
))
Harder Questions
Using Conditions
$cond
is similar to a CASE
statement in other languages.
It has the form "$cond": [{<comparison> :[<field or value>,<field or value>]},<true case>,<false case>]
Using $cond
, reattempt the above question but change Eurasia to Europe
pp.pprint(list(
db.world.aggregate([
{"$group":{
"_id":{
"$cond": [{"$eq":["$continent","Eurasia"]},"Europe","$continent"]
},
"area":{"$sum": "$area"}
}},
{"$sort":{
"area": -1
}},
{"$project":{
"_id":1,
"area":1
}}
])
))
pp.pprint(list(
db.world.aggregate([
{"$group":{
"_id":{
"$cond": [{"$eq":["$continent","Eurasia"]},"Europe","$continent"]
},
"area":{"$sum": "$area"}
}},
{"$sort":{
"area": -1
}},
{"$project":{
"_id":1,
"area":1
}}
])
))