Difference between revisions of "AGGREGATE world"
(60 intermediate revisions by 2 users not shown) | |||
Line 11: | Line 11: | ||
#PRETTY | #PRETTY | ||
import pprint | import pprint | ||
− | pp = pprint.PrettyPrinter(indent=4) | + | pp = pprint.PrettyPrinter(indent=4, width=160) |
</pre> | </pre> | ||
==Country Profile== | ==Country Profile== | ||
− | For these questions you should use | + | For these questions you should use <code>aggregate([])</code> on the collection <code>world</code> |
+ | <div class='extra_space' style='width:1em; height:6em;'></div> | ||
+ | <div class=q data-lang="py3"> | ||
+ | Give the <code>name</code> and the <code>per capita GDP</code> for those countries with a <code>population</code> of at least 200 million. | ||
+ | <div class="hint" title="How to calculate per capita GDP"> | ||
+ | per capita GDP is the GDP divided by the population. | ||
+ | </div> | ||
+ | <pre class=def> | ||
+ | pp.pprint(list( | ||
+ | db.world.aggregate([ | ||
+ | {"$match":{ | ||
+ | "population":{"$gte":250000000} | ||
+ | }}, | ||
+ | {"$project":{ | ||
+ | "_id":0, | ||
+ | "name":1, | ||
+ | "per capita GDP": {"$divide": ["$gdp",1000000]} | ||
+ | }} | ||
+ | ]) | ||
+ | )) | ||
+ | </pre> | ||
+ | <div class=ans>pp.pprint(list(db.world.aggregate([{"$match":{"population":{"$gte":200000000}}},{"$project":{"_id":0,"name":1,"per capita GDP": {"$divide": ["$gdp","$population"]}}}]))) | ||
+ | </div> | ||
+ | </div> | ||
+ | |||
+ | <div class=q data-lang="py3"> | ||
+ | Give the <code>name</code> and the <code>population density</code> of all countries. Ignore results where the density is "None". | ||
+ | <div class="hint" title="How to calculate population density"> | ||
+ | population density is the population divided by the area | ||
+ | </div> | ||
+ | <div class="hint" title="Division by 0 error?"> | ||
+ | Use a <code>$match</code>. <code>{"area":{"$ne":0}}</code> | ||
+ | </div> | ||
+ | <pre class=def> | ||
+ | pp.pprint(list( | ||
+ | db.world.aggregate([ | ||
+ | {"$project":{ | ||
+ | "_id":0, | ||
+ | "name":1, | ||
+ | "density": {"$divide": ["$population","$area"]} | ||
+ | }}, | ||
+ | {"$match":{ | ||
+ | "density": {"$ne":None} | ||
+ | }} | ||
+ | ]) | ||
+ | )) | ||
+ | </pre> | ||
+ | <div class=ans> | ||
+ | pp.pprint(list(db.world.aggregate([{"$match":{"area":{"$ne":0}}},{"$project":{"_id":0,"name":1,"density":{"$divide":["$population","$area"]}}},{"$match":{"density":{"$ne":None}}}]))) | ||
+ | </div> | ||
+ | </div> | ||
+ | |||
+ | <div class=q data-lang="py3"> | ||
+ | Show the <code>name</code> and <code>population</code> in millions for the countries of the continent <b>South America</b>. Divide the population by 1000000 to get population in millions. | ||
+ | <pre class=def> | ||
+ | pp.pprint(list( | ||
+ | db.world.aggregate([ | ||
+ | {"$match":{ | ||
+ | |||
+ | }}, | ||
+ | {"$project":{ | ||
+ | "_id":0, | ||
+ | "name":1 | ||
+ | }} | ||
+ | ]) | ||
+ | )) | ||
+ | </pre> | ||
+ | <div class=ans> | ||
+ | pp.pprint(list(db.world.aggregate([{"$match":{"continent":{"$eq":"South America"}}},{"$project":{"_id":0,"name":1,"population":{"$divide":["$population",1000000]}}}]))) | ||
+ | </div> | ||
+ | </div> | ||
+ | |||
+ | |||
+ | <div class=q data-lang="py3"> | ||
+ | Show the <code>name</code> and <code>population density</code> for <b>France</b>, <b>Germany</b>, and <b>Italy</b> | ||
+ | <pre class=def> | ||
+ | pp.pprint(list( | ||
+ | db.world.aggregate([ | ||
+ | {"$match":{ | ||
+ | "name": {"$in":['United Kingdom','United States','Brazil']}, | ||
+ | "population": {"$ne": None}, | ||
+ | "area": {"$ne": 0} | ||
+ | }}, | ||
+ | {"$project":{ | ||
+ | "_id":0, | ||
+ | "name":1 | ||
+ | }} | ||
+ | ]) | ||
+ | )) | ||
+ | </pre> | ||
+ | <div class=ans> | ||
+ | pp.pprint(list(db.world.aggregate([{"$match":{"name":{"$in":['France','Germany','Italy']},"population":{"$ne":None},"area":{"$ne":0}}},{"$project":{"_id":0,"name":1,"population density":{"$divide":["$population","$area"]}}}]))) | ||
+ | </div> | ||
+ | </div> | ||
+ | |||
+ | <div class=q data-lang="py3"> | ||
+ | Order the <code>continents</code> by <code>area</code> from most to least. | ||
+ | <pre class=def> | ||
+ | pp.pprint(list( | ||
+ | db.world.aggregate([ | ||
+ | {"$group":{ | ||
+ | "_id":"$name", | ||
+ | "area":{"$max": "$area"} | ||
+ | }}, | ||
+ | {"$sort":{ | ||
+ | "area": -1 | ||
+ | }}, | ||
+ | {"$project":{ | ||
+ | "_id":1, | ||
+ | "area":1 | ||
+ | }} | ||
+ | ]) | ||
+ | )) | ||
+ | </pre> | ||
+ | <div class=ans> | ||
+ | pp.pprint(list( | ||
+ | db.world.aggregate([ | ||
+ | {"$group":{ | ||
+ | "_id":"$continent", | ||
+ | "area":{"$sum": "$area"} | ||
+ | }}, | ||
+ | {"$sort":{ | ||
+ | "area": -1 | ||
+ | }}, | ||
+ | {"$project":{ | ||
+ | "_id":1, | ||
+ | "area":1 | ||
+ | }} | ||
+ | ]) | ||
+ | )) | ||
+ | </div> | ||
+ | </div> | ||
+ | ==Harder Questions== | ||
<div class=q data-lang="py3"> | <div class=q data-lang="py3"> | ||
− | + | Print a list of names for countries in the continent of "North America" change United States to USA | |
<pre class=def> | <pre class=def> | ||
pp.pprint(list( | pp.pprint(list( | ||
− | db.world. | + | db.world.aggregate([ |
+ | {"$match":{ | ||
+ | "continent":"North America" | ||
+ | }}, | ||
+ | {"$project":{ | ||
+ | "_id":0, | ||
+ | "name":1 | ||
+ | }} | ||
+ | ]) | ||
)) | )) | ||
</pre> | </pre> | ||
+ | <div class=ans> | ||
+ | pp.pprint(list(db.world.aggregate([{"$match":{"continent":"North America"}},{"$project":{"_id":0,"name":{"$cond": [{"$eq":["$name","United States"]},"USA","$name"]}}}]))) | ||
+ | </div> | ||
+ | </div> | ||
+ | |||
+ | <div class=q data-lang="py3"> | ||
+ | Combine <b>North America</b> and <b>South America</b> to <b>America</b>, and then list the continents by area. Biggest first. | ||
+ | <pre class=def> | ||
+ | pp.pprint(list( | ||
+ | db.world.aggregate([ | ||
+ | {"$group":{ | ||
+ | "_id":{ | ||
+ | "$cond": [{"$eq":["$continent","North America"]},"America", | ||
+ | {"$cond": [{"$eq":["$continent","Asia"]},"The East","$continent"]}] | ||
+ | }, | ||
+ | "area":{"$sum": "$area"} | ||
+ | }}, | ||
+ | {"$sort":{ | ||
+ | "area": -1 | ||
+ | }}, | ||
+ | {"$project":{ | ||
+ | "_id":1, | ||
+ | "area":1 | ||
+ | }} | ||
+ | ]) | ||
+ | )) | ||
+ | </pre> | ||
+ | <div class=ans> | ||
+ | pp.pprint(list(db.world.aggregate([{"$group":{"_id":{"$cond":[{"$eq":["$continent","South America"]},"America",{"$cond":[{"$eq":["$continent","North America"]},"America","$continent"]}]},"area":{"$sum":"$area"}}},{"$sort":{"area":-1}},{"$project":{"_id":1,"area":1}}]))) | ||
+ | </div> | ||
+ | </div> | ||
+ | |||
+ | <div class=q data-lang="py3"> | ||
+ | Show the <b>name</b> and the <b>continent</b> for countries beginning with N - but replace the continent <b>Oceania</b> with <b>Australasia</b>. | ||
+ | <pre class=def> | ||
+ | pp.pprint(list( | ||
+ | db.world.aggregate([ | ||
+ | {"$match":{ | ||
+ | "name":{"$regex":"^N"} | ||
+ | }}, | ||
+ | {"$project":{ | ||
+ | "_id":0, | ||
+ | "name":1 | ||
+ | }} | ||
+ | ]) | ||
+ | )) | ||
+ | </pre> | ||
+ | <div class=ans> | ||
+ | pp.pprint(list(db.world.aggregate([{"$match":{"name":{"$regex":"^N"}}},{"$project":{"_id":0,"name":1,"continent":{"$cond":[{"$eq":["$continent","Oceania"]},"Australasia","$continent"]}}}]))) | ||
+ | </div> | ||
+ | </div> | ||
+ | |||
+ | <div class=q data-lang="py3"> | ||
+ | Show the <b>name</b> and the <b>continent</b> but:<br/><br/> | ||
+ | substitute <b>Eurasia</b> for <b>Europe</b> and <b>Asia</b>.<br/> | ||
+ | substitute <b>America</b> - for each country in <b>North America</b> or <b>South America</b> or <b>Caribbean</b>.<br/><br/>Only show countries beginning with <b>A</b> or <b>B</b><br/> | ||
+ | If you're struggling you may want to experiment with <code>$and</code>,<code>$or</code>, etc. | ||
+ | <pre class=def></pre> | ||
+ | <div class=ans> | ||
+ | pp.pprint(list( | ||
+ | db.world.aggregate([ | ||
+ | {"$match":{ | ||
+ | "name":{"$regex":"^A|^B"} | ||
+ | }}, | ||
+ | {"$project":{ | ||
+ | "_id":0, | ||
+ | "name":1, | ||
+ | "continent": { | ||
+ | "$cond": [{ | ||
+ | "$or":[ | ||
+ | {"$eq":["$continent","Europe"]}, | ||
+ | {"$eq":["$continent","Asia"]} | ||
+ | ]},"Eurasia",{ | ||
+ | "$cond": [ | ||
+ | {"$or":[ | ||
+ | {"$eq":["$continent","North America"]}, | ||
+ | {"$eq":["$continent","South America"]}, | ||
+ | {"$eq":["$continent","Caribbean"]} | ||
+ | ]},"America","$continent"]} | ||
+ | ]} | ||
+ | }} | ||
+ | ]) | ||
+ | )) | ||
+ | </div> | ||
+ | </div> | ||
+ | |||
+ | <div class=q data-lang="py3"> | ||
+ | Put the continents right... | ||
+ | <br/><br/> | ||
+ | <b>Oceania</b> becomes <b>Australasia</b><br/> | ||
+ | Countries in <b>Eurasia</b> and <b>Turkey</b> go to <b>Europe/Asia</b><br/> | ||
+ | <b>Caribbean</b> islands starting with '<b>B</b>' go to <b>North America</b>, other Caribbean islands go to <b>South America</b><br/> | ||
+ | <br/><br/> | ||
+ | <b>Show the name, the original continent and the new continent of all countries.</b> | ||
+ | <pre class=def></pre> | ||
<div class=ans> | <div class=ans> | ||
pp.pprint(list( | pp.pprint(list( | ||
− | db.world. | + | db.world.aggregate([ |
+ | {"$project":{ | ||
+ | "_id":0, | ||
+ | "name":1, | ||
+ | "original": "$continent", | ||
+ | "new": { | ||
+ | "$cond": [ | ||
+ | {"$or":[ | ||
+ | {"$eq":["$continent","Eurasia"]}, | ||
+ | {"$eq":["$name","Turkey"]} | ||
+ | ]},"Europe/Asia",{ | ||
+ | "$cond":[ | ||
+ | {"$eq":["$continent","Oceania"]},"Australasia",{ | ||
+ | "$cond":[ | ||
+ | {"$and":[ | ||
+ | {"$eq":["$continent","Caribbean"]}, | ||
+ | {"$eq":[{"$substr":["$name",0,1]}, "B"]} | ||
+ | ]},"North America",{ | ||
+ | "$cond":[ | ||
+ | {"$and":[ | ||
+ | {"$eq":["$continent","Caribbean"]}, | ||
+ | {"$ne":[{"$substr":["$name",0,1]}, "B"]} | ||
+ | ]},"South America","$continent" | ||
+ | ] | ||
+ | } | ||
+ | ] | ||
+ | } | ||
+ | ] | ||
+ | } | ||
+ | ] | ||
+ | } | ||
+ | }} | ||
+ | ]) | ||
)) | )) | ||
</div> | </div> | ||
</div> | </div> |
Latest revision as of 14:55, 30 July 2015
#ENCODING import io import sys sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-16') #MONGO from pymongo import MongoClient client = MongoClient() client.progzoo.authenticate('scott','tiger') db = client['progzoo'] #PRETTY import pprint pp = pprint.PrettyPrinter(indent=4, width=160)
Country Profile
For these questions you should use aggregate([])
on the collection world
Give the name
and the per capita GDP
for those countries with a population
of at least 200 million.
per capita GDP is the GDP divided by the population.
pp.pprint(list( db.world.aggregate([ {"$match":{ "population":{"$gte":250000000} }}, {"$project":{ "_id":0, "name":1, "per capita GDP": {"$divide": ["$gdp",1000000]} }} ]) ))
Give the name
and the population density
of all countries. Ignore results where the density is "None".
population density is the population divided by the area
Use a $match
. {"area":{"$ne":0}}
pp.pprint(list( db.world.aggregate([ {"$project":{ "_id":0, "name":1, "density": {"$divide": ["$population","$area"]} }}, {"$match":{ "density": {"$ne":None} }} ]) ))
pp.pprint(list(db.world.aggregate([{"$match":{"area":{"$ne":0}}},{"$project":{"_id":0,"name":1,"density":{"$divide":["$population","$area"]}}},{"$match":{"density":{"$ne":None}}}])))
Show the name
and population
in millions for the countries of the continent South America. Divide the population by 1000000 to get population in millions.
pp.pprint(list( db.world.aggregate([ {"$match":{ }}, {"$project":{ "_id":0, "name":1 }} ]) ))
pp.pprint(list(db.world.aggregate([{"$match":{"continent":{"$eq":"South America"}}},{"$project":{"_id":0,"name":1,"population":{"$divide":["$population",1000000]}}}])))
Show the name
and population density
for France, Germany, and Italy
pp.pprint(list( db.world.aggregate([ {"$match":{ "name": {"$in":['United Kingdom','United States','Brazil']}, "population": {"$ne": None}, "area": {"$ne": 0} }}, {"$project":{ "_id":0, "name":1 }} ]) ))
pp.pprint(list(db.world.aggregate([{"$match":{"name":{"$in":['France','Germany','Italy']},"population":{"$ne":None},"area":{"$ne":0}}},{"$project":{"_id":0,"name":1,"population density":{"$divide":["$population","$area"]}}}])))
Order the continents
by area
from most to least.
pp.pprint(list( db.world.aggregate([ {"$group":{ "_id":"$name", "area":{"$max": "$area"} }}, {"$sort":{ "area": -1 }}, {"$project":{ "_id":1, "area":1 }} ]) ))
pp.pprint(list(
db.world.aggregate([ {"$group":{ "_id":"$continent", "area":{"$sum": "$area"} }}, {"$sort":{ "area": -1 }}, {"$project":{ "_id":1, "area":1 }} ])
))
Harder Questions
Print a list of names for countries in the continent of "North America" change United States to USA
pp.pprint(list( db.world.aggregate([ {"$match":{ "continent":"North America" }}, {"$project":{ "_id":0, "name":1 }} ]) ))
pp.pprint(list(db.world.aggregate([{"$match":{"continent":"North America"}},{"$project":{"_id":0,"name":{"$cond": [{"$eq":["$name","United States"]},"USA","$name"]}}}])))
Combine North America and South America to America, and then list the continents by area. Biggest first.
pp.pprint(list( db.world.aggregate([ {"$group":{ "_id":{ "$cond": [{"$eq":["$continent","North America"]},"America", {"$cond": [{"$eq":["$continent","Asia"]},"The East","$continent"]}] }, "area":{"$sum": "$area"} }}, {"$sort":{ "area": -1 }}, {"$project":{ "_id":1, "area":1 }} ]) ))
pp.pprint(list(db.world.aggregate([{"$group":{"_id":{"$cond":[{"$eq":["$continent","South America"]},"America",{"$cond":[{"$eq":["$continent","North America"]},"America","$continent"]}]},"area":{"$sum":"$area"}}},{"$sort":{"area":-1}},{"$project":{"_id":1,"area":1}}])))
Show the name and the continent for countries beginning with N - but replace the continent Oceania with Australasia.
pp.pprint(list( db.world.aggregate([ {"$match":{ "name":{"$regex":"^N"} }}, {"$project":{ "_id":0, "name":1 }} ]) ))
pp.pprint(list(db.world.aggregate([{"$match":{"name":{"$regex":"^N"}}},{"$project":{"_id":0,"name":1,"continent":{"$cond":[{"$eq":["$continent","Oceania"]},"Australasia","$continent"]}}}])))
Show the name and the continent but:
substitute Eurasia for Europe and Asia.
substitute America - for each country in North America or South America or Caribbean.
Only show countries beginning with A or B
If you're struggling you may want to experiment with $and
,$or
, etc.
pp.pprint(list(
db.world.aggregate([ {"$match":{ "name":{"$regex":"^A|^B"} }}, {"$project":{ "_id":0, "name":1, "continent": { "$cond": [{ "$or":[ {"$eq":["$continent","Europe"]}, {"$eq":["$continent","Asia"]} ]},"Eurasia",{ "$cond": [ {"$or":[ {"$eq":["$continent","North America"]}, {"$eq":["$continent","South America"]}, {"$eq":["$continent","Caribbean"]} ]},"America","$continent"]} ]} }} ])
))
Put the continents right...
Oceania becomes Australasia
Countries in Eurasia and Turkey go to Europe/Asia
Caribbean islands starting with 'B' go to North America, other Caribbean islands go to South America
Show the name, the original continent and the new continent of all countries.
pp.pprint(list(
db.world.aggregate([ {"$project":{ "_id":0, "name":1, "original": "$continent", "new": { "$cond": [ {"$or":[ {"$eq":["$continent","Eurasia"]}, {"$eq":["$name","Turkey"]} ]},"Europe/Asia",{ "$cond":[ {"$eq":["$continent","Oceania"]},"Australasia",{ "$cond":[ {"$and":[ {"$eq":["$continent","Caribbean"]}, {"$eq":[{"$substr":["$name",0,1]}, "B"]} ]},"North America",{ "$cond":[ {"$and":[ {"$eq":["$continent","Caribbean"]}, {"$ne":[{"$substr":["$name",0,1]}, "B"]} ]},"South America","$continent" ] } ] } ] } ] } }} ])
))