Cookies help us deliver our services. By using our services, you agree to our use of cookies. More information

Difference between revisions of "AGGREGATE world"

From NoSQLZoo
Jump to: navigation, search
Line 257: Line 257:
 
     ])
 
     ])
 
))
 
))
 +
</div>
 +
</div>
 +
 +
<div class=q data-lang="py3">
 +
Put the continents right...
 +
&nbsp;&nbsp;&nbsp;&nbsp;<b>Oceania</b> becomes <b>Australasia</b>
 +
&nbsp;&nbsp;&nbsp;Countries in <b>Eurasia</b> and <b>Turkey</b> go to <b>Europe/Asia</b>
 +
&nbsp;&nbsp;&nbsp;<b>Caribbean</b> islands starting with '<b>B</b>' go to <b>North America</b>, other Caribbean islands go to <b>South America</b>
 +
 +
<b>Show the name, the original continent and the new continent of all countries.<b>
 +
<pre class=def></pre>
 +
<div class=ans>
  
 
</div>
 
</div>
 
</div>
 
</div>

Revision as of 15:26, 17 July 2015

#ENCODING
import io
import sys
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-16')
#MONGO
from pymongo import MongoClient
client = MongoClient()
client.progzoo.authenticate('scott','tiger')
db = client['progzoo']
#PRETTY
import pprint
pp = pprint.PrettyPrinter(indent=4)

Country Profile

For these questions you should use aggregate([]) on the collection world

Give the name and the per capita GDP for those countries with a population of at least 200 million.

per capita GDP is the GDP divided by the population.

pp.pprint(list(
    db.world.aggregate([
        {"$match":{
            "population":{"$gte":250000000}
        }},
        {"$project":{
            "_id":0,
            "name":1,
            "per capita GDP": {"$divide": ["$gdp",1000000]}
        }}
    ])
))
pp.pprint(list(db.world.aggregate([{"$match":{"population":{"$gte":200000000}}},{"$project":{"_id":0,"name":1,"per capita GDP": {"$divide": ["$gdp","$population"]}}}])))

Give the name and the population density of all countries. Ignore results where the density is "None".

population density is the population divided by the area

Use a $match. {"area":{"$ne":0}}

pp.pprint(list(
    db.world.aggregate([
        {"$project":{
            "_id":0,
            "name":1,
            "density": {"$divide": [10000,"$area"]}
        }},
        {"$match":{
            "density": {"$ne":None}
        }}
    ])
))

pp.pprint(list(db.world.aggregate([{"$match":{"area":{"$ne":0}}},{"$project":{"_id":0,"name":1,"density":{"$divide":["$population","$area"]}}},{"$match":{"density":{"$ne":None}}}])))

Show the name and population in millions for the countries of the continent South America. Divide the population by 1000000 to get population in millions.

pp.pprint(list(
    db.world.aggregate([
        {"$match":{
            
        }},
        {"$project":{
            "_id":0,
            "name":1
        }}
    ])
))

pp.pprint(list(db.world.aggregate([{"$match":{"continent":{"$eq":"South America"}}},{"$project":{"_id":0,"name":1,"population":{"$divide":["$population",1000000]}}}])))


Show the name and population density for France, Germany, and Italy

pp.pprint(list(
    db.world.aggregate([
        {"$match":{
            "name": {"$in":['United Kingdom','United States','Brazil']},
            "population": {"$ne": None},
            "area": {"$ne": 0}
        }},
        {"$project":{
            "_id":0,
            "name":1
        }}
    ])
))

pp.pprint(list(db.world.aggregate([{"$match":{"name":{"$in":['France','Germany','Brazil']},"population":{"$ne":None},"area":{"$ne":0}}},{"$project":{"_id":0,"name":1,"population density":{"$divide":["$population","$area"]}}}])))

Order the continents by area from most to least.

pp.pprint(list(
    db.world.aggregate([
        {"$group":{
            "_id":"$name",
            "area":{"$max": "$area"}
        }},
        {"$sort":{
            "area": -1
        }},
        {"$project":{
            "_id":1,
            "area":1
        }}
    ])
))

pp.pprint(list(

   db.world.aggregate([
       {"$group":{
           "_id":"$continent",
           "area":{"$sum": "$area"}
       }},
       {"$sort":{
           "area": -1
       }},
       {"$project":{
           "_id":1,
           "area":1
       }}
   ])

))

Harder Questions

Using Conditions

$cond is similar to a CASE statement in other languages.
It has the form "$cond": [{<comparison> :[<field or value>,<field or value>]},<true case>,<false case>]

Using $cond, reattempt the above question but change Eurasia to Europe

pp.pprint(list(
    db.world.aggregate([
        {"$group":{
            "_id":{
                "$cond": [{"$eq":["$continent","Eurasia"]},"Europe","$continent"]
            },
            "area":{"$sum": "$area"}
        }},
        {"$sort":{
            "area": -1
        }},
        {"$project":{
            "_id":1,
            "area":1
        }}
    ])
))

pp.pprint(list(db.world.aggregate([{"$group":{"_id":{"$cond":[{"$eq":["$continent","Eurasia"]},"Europe","$continent"]},"area":{"$sum":"$area"}}},{"$sort":{"area":-1}},{"$project":{"_id":1,"area":1}}])))

Combine North America and South America to America, and then list the continents by area. Biggest first.

pp.pprint(list(
    db.world.aggregate([
        {"$group":{
            "_id":{
                "$cond": [{"$eq":["$continent","North America"]},"America",
                    {"$cond": [{"$eq":["$continent","Asia"]},"The East","$continent"]}]
            },
            "area":{"$sum": "$area"}
        }},
        {"$sort":{
            "area": -1
        }},
        {"$project":{
            "_id":1,
            "area":1
        }}
    ])
))

pp.pprint(list(db.world.aggregate([{"$group":{"_id":{"$cond":[{"$eq":["$continent","South America"]},"America",{"$cond":[{"$eq":["$continent","North America"]},"America","$continent"]}]},"area":{"$sum":"$area"}}},{"$sort":{"area":-1}},{"$project":{"_id":1,"area":1}}])))

Show the name and the continent - but substitute Australasia for Oceania - for countries beginning with N.

pp.pprint(list(
    db.world.aggregate([
        {"$match":{
            "name":{"$regex":"^N"}
        }},
        {"$project":{
            "_id":0,
            "name":1
        }}
    ])
))

pp.pprint(list(db.world.aggregate([{"$match":{"name":{"$regex":"^N"}}},{"$project":{"_id":0,"name":1,"continent":{"$cond":[{"$eq":["$continent","Oceania"]},"Australasia","$continent"]}}}])))

Show the name and the continent but:

    substitute Eurasia for Europe and Asia.
    substitute America - for each country in North America or South America or Caribbean.

Only show countries beginning with A or B
You may want to experiment with $and and $or


pp.pprint(list(

   db.world.aggregate([
       {"$match":{
           "name":{"$regex":"^A|^B"}
       }},
       {"$project":{
           "_id":0,
           "name":1,
           "continent": {
               "$cond": [{
                   "$or":[ 
                       {"$eq":["$continent","Europe"]},
                       {"$eq":["$continent","Asia"]}
                   ]},"Eurasia",{
                       "$cond": [
                           {"$or":[ 
                               {"$eq":["$continent","North America"]},
                               {"$eq":["$continent","South America"]},
                               {"$eq":["$continent","Caribbean"]}
                           ]},"America","$continent"]}
               ]}
       }}
   ])

))

Put the continents right...     Oceania becomes Australasia    Countries in Eurasia and Turkey go to Europe/Asia    Caribbean islands starting with 'B' go to North America, other Caribbean islands go to South America

Show the name, the original continent and the new continent of all countries.