Difference between revisions of "AGGREGATE world"
m (40166222 moved page AGGREGATE world (PyMongo) to AGGREGATE world over a redirect without leaving a redirect) |
|
(No difference)
|
Revision as of 12:44, 22 July 2015
#ENCODING import io import sys sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-16') #MONGO from pymongo import MongoClient client = MongoClient() client.progzoo.authenticate('scott','tiger') db = client['progzoo'] #PRETTY import pprint pp = pprint.PrettyPrinter(indent=4)
Country Profile
For these questions you should use aggregate([])
on the collection world
Give the name
and the per capita GDP
for those countries with a population
of at least 200 million.
per capita GDP is the GDP divided by the population.
pp.pprint(list( db.world.aggregate([ {"$match":{ "population":{"$gte":250000000} }}, {"$project":{ "_id":0, "name":1, "per capita GDP": {"$divide": ["$gdp",1000000]} }} ]) ))
Give the name
and the population density
of all countries. Ignore results where the density is "None".
population density is the population divided by the area
Use a
$match
. {"area":{"$ne":0}}
pp.pprint(list(
db.world.aggregate([
{"$project":{
"_id":0,
"name":1,
"density": {"$divide": ["$population","$area"]}
}},
{"$match":{
"density": {"$ne":None}
}}
])
))
pp.pprint(list(db.world.aggregate([{"$match":{"area":{"$ne":0}}},{"$project":{"_id":0,"name":1,"density":{"$divide":["$population","$area"]}}},{"$match":{"density":{"$ne":None}}}])))
Show the name
and population
in millions for the countries of the continent South America. Divide the population by 1000000 to get population in millions.
pp.pprint(list(
db.world.aggregate([
{"$match":{
}},
{"$project":{
"_id":0,
"name":1
}}
])
))
pp.pprint(list(db.world.aggregate([{"$match":{"continent":{"$eq":"South America"}}},{"$project":{"_id":0,"name":1,"population":{"$divide":["$population",1000000]}}}])))
Show the name
and population density
for France, Germany, and Italy
pp.pprint(list(
db.world.aggregate([
{"$match":{
"name": {"$in":['United Kingdom','United States','Brazil']},
"population": {"$ne": None},
"area": {"$ne": 0}
}},
{"$project":{
"_id":0,
"name":1
}}
])
))
pp.pprint(list(db.world.aggregate([{"$match":{"name":{"$in":['France','Germany','Brazil']},"population":{"$ne":None},"area":{"$ne":0}}},{"$project":{"_id":0,"name":1,"population density":{"$divide":["$population","$area"]}}}])))
Order the continents
by area
from most to least.
pp.pprint(list(
db.world.aggregate([
{"$group":{
"_id":"$name",
"area":{"$max": "$area"}
}},
{"$sort":{
"area": -1
}},
{"$project":{
"_id":1,
"area":1
}}
])
))
pp.pprint(list(
db.world.aggregate([
{"$group":{
"_id":"$continent",
"area":{"$sum": "$area"}
}},
{"$sort":{
"area": -1
}},
{"$project":{
"_id":1,
"area":1
}}
])
))
Harder Questions
Using Conditions
$cond
is similar to a CASE
statement in other languages.
It has the form "$cond": [{<comparison> :[<field or value>,<field or value>]},<true case>,<false case>]
Using $cond
, reattempt the above question but change Eurasia to Europe
pp.pprint(list(
db.world.aggregate([
{"$group":{
"_id":{
"$cond": [{"$eq":["$continent","Eurasia"]},"Europe","$continent"]
},
"area":{"$sum": "$area"}
}},
{"$sort":{
"area": -1
}},
{"$project":{
"_id":1,
"area":1
}}
])
))
pp.pprint(list(db.world.aggregate([{"$group":{"_id":{"$cond":[{"$eq":["$continent","Eurasia"]},"Europe","$continent"]},"area":{"$sum":"$area"}}},{"$sort":{"area":-1}},{"$project":{"_id":1,"area":1}}])))
Combine North America and South America to America, and then list the continents by area. Biggest first.
pp.pprint(list(
db.world.aggregate([
{"$group":{
"_id":{
"$cond": [{"$eq":["$continent","North America"]},"America",
{"$cond": [{"$eq":["$continent","Asia"]},"The East","$continent"]}]
},
"area":{"$sum": "$area"}
}},
{"$sort":{
"area": -1
}},
{"$project":{
"_id":1,
"area":1
}}
])
))
pp.pprint(list(db.world.aggregate([{"$group":{"_id":{"$cond":[{"$eq":["$continent","South America"]},"America",{"$cond":[{"$eq":["$continent","North America"]},"America","$continent"]}]},"area":{"$sum":"$area"}}},{"$sort":{"area":-1}},{"$project":{"_id":1,"area":1}}])))
Show the name and the continent - but substitute Australasia for Oceania - for countries beginning with N.
pp.pprint(list(
db.world.aggregate([
{"$match":{
"name":{"$regex":"^N"}
}},
{"$project":{
"_id":0,
"name":1
}}
])
))
pp.pprint(list(db.world.aggregate([{"$match":{"name":{"$regex":"^N"}}},{"$project":{"_id":0,"name":1,"continent":{"$cond":[{"$eq":["$continent","Oceania"]},"Australasia","$continent"]}}}])))
Show the name and the continent but:
substitute Eurasia for Europe and Asia.
substitute America - for each country in North America or South America or Caribbean.
Only show countries beginning with A or B
If you're struggling you may want to experiment with $and
,$or
, etc.
pp.pprint(list(
db.world.aggregate([
{"$match":{
"name":{"$regex":"^A|^B"}
}},
{"$project":{
"_id":0,
"name":1,
"continent": {
"$cond": [{
"$or":[
{"$eq":["$continent","Europe"]},
{"$eq":["$continent","Asia"]}
]},"Eurasia",{
"$cond": [
{"$or":[
{"$eq":["$continent","North America"]},
{"$eq":["$continent","South America"]},
{"$eq":["$continent","Caribbean"]}
]},"America","$continent"]}
]}
}}
])
))
Put the continents right...
Oceania becomes Australasia
Countries in Eurasia and Turkey go to Europe/Asia
Caribbean islands starting with 'B' go to North America, other Caribbean islands go to South America
Show the name, the original continent and the new continent of all countries.
pp.pprint(list(
db.world.aggregate([
{"$project":{
"_id":0,
"name":1,
"original": "$continent",
"new": {
"$cond": [
{"$or":[
{"$eq":["$continent","Eurasia"]},
{"$eq":["$name","Turkey"]}
]},"Europe/Asia",{
"$cond":[
{"$eq":["$continent","Oceania"]},"Australasia",{
"$cond":[
{"$and":[
{"$eq":["$continent","Caribbean"]},
{"$eq":[{"$substr":["$name",0,1]}, "B"]}
]},"North America",{
"$cond":[
{"$and":[
{"$eq":["$continent","Caribbean"]},
{"$ne":[{"$substr":["$name",0,1]}, "B"]}
]},"South America","$continent"
]
}
]
}
]
}
]
}
}}
])
))