Cookies help us deliver our services. By using our services, you agree to our use of cookies. More information

Difference between revisions of "AGGREGATE world"

From NoSQLZoo
Jump to: navigation, search
Line 270: Line 270:
 
<pre class=def></pre>
 
<pre class=def></pre>
 
<div class=ans>
 
<div class=ans>
 
+
pp.pprint(list(
 +
    db.world.aggregate([
 +
        {"$project":{
 +
            "_id":0,
 +
            "name":1,
 +
            "original": "$continent",
 +
            "new": {
 +
                "$cond": [
 +
                    {"$or":[
 +
                        {"$eq":["$continent","Eurasia"]},
 +
                        {"$eq":["$name","Turkey"]}
 +
                    ]},"Europe/Asia",{
 +
                        "$cond":[
 +
                            {"$eq":["$continent","Oceania"]},"Australasia",{
 +
                                "$cond":[
 +
                                    {"$and":[
 +
                                        {"$eq":["$continent","Caribbean"]},
 +
                                        {"$eq":[{"$substr":["$name",0,1]}, "B"]}
 +
                                    ]},"North America",{
 +
                                        "$cond":[
 +
                                            {"$and":[
 +
                                                {"$eq":["$continent","Caribbean"]},
 +
                                                {"$ne":[{"$substr":["$name",0,1]}, "B"]}
 +
                                            ]},"South America","$continent"
 +
                                        ]
 +
                                    }
 +
                                ]
 +
                            }
 +
                        ]
 +
                  }
 +
                ]
 +
            }
 +
        }}
 +
    ])
 +
))
 
</div>
 
</div>
 
</div>
 
</div>

Revision as of 15:27, 17 July 2015

#ENCODING
import io
import sys
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-16')
#MONGO
from pymongo import MongoClient
client = MongoClient()
client.progzoo.authenticate('scott','tiger')
db = client['progzoo']
#PRETTY
import pprint
pp = pprint.PrettyPrinter(indent=4)

Country Profile

For these questions you should use aggregate([]) on the collection world

Give the name and the per capita GDP for those countries with a population of at least 200 million.

per capita GDP is the GDP divided by the population.

pp.pprint(list(
    db.world.aggregate([
        {"$match":{
            "population":{"$gte":250000000}
        }},
        {"$project":{
            "_id":0,
            "name":1,
            "per capita GDP": {"$divide": ["$gdp",1000000]}
        }}
    ])
))
pp.pprint(list(db.world.aggregate([{"$match":{"population":{"$gte":200000000}}},{"$project":{"_id":0,"name":1,"per capita GDP": {"$divide": ["$gdp","$population"]}}}])))

Give the name and the population density of all countries. Ignore results where the density is "None".

population density is the population divided by the area

Use a $match. {"area":{"$ne":0}}

pp.pprint(list(
    db.world.aggregate([
        {"$project":{
            "_id":0,
            "name":1,
            "density": {"$divide": [10000,"$area"]}
        }},
        {"$match":{
            "density": {"$ne":None}
        }}
    ])
))

pp.pprint(list(db.world.aggregate([{"$match":{"area":{"$ne":0}}},{"$project":{"_id":0,"name":1,"density":{"$divide":["$population","$area"]}}},{"$match":{"density":{"$ne":None}}}])))

Show the name and population in millions for the countries of the continent South America. Divide the population by 1000000 to get population in millions.

pp.pprint(list(
    db.world.aggregate([
        {"$match":{
            
        }},
        {"$project":{
            "_id":0,
            "name":1
        }}
    ])
))

pp.pprint(list(db.world.aggregate([{"$match":{"continent":{"$eq":"South America"}}},{"$project":{"_id":0,"name":1,"population":{"$divide":["$population",1000000]}}}])))


Show the name and population density for France, Germany, and Italy

pp.pprint(list(
    db.world.aggregate([
        {"$match":{
            "name": {"$in":['United Kingdom','United States','Brazil']},
            "population": {"$ne": None},
            "area": {"$ne": 0}
        }},
        {"$project":{
            "_id":0,
            "name":1
        }}
    ])
))

pp.pprint(list(db.world.aggregate([{"$match":{"name":{"$in":['France','Germany','Brazil']},"population":{"$ne":None},"area":{"$ne":0}}},{"$project":{"_id":0,"name":1,"population density":{"$divide":["$population","$area"]}}}])))

Order the continents by area from most to least.

pp.pprint(list(
    db.world.aggregate([
        {"$group":{
            "_id":"$name",
            "area":{"$max": "$area"}
        }},
        {"$sort":{
            "area": -1
        }},
        {"$project":{
            "_id":1,
            "area":1
        }}
    ])
))

pp.pprint(list(

   db.world.aggregate([
       {"$group":{
           "_id":"$continent",
           "area":{"$sum": "$area"}
       }},
       {"$sort":{
           "area": -1
       }},
       {"$project":{
           "_id":1,
           "area":1
       }}
   ])

))

Harder Questions

Using Conditions

$cond is similar to a CASE statement in other languages.
It has the form "$cond": [{<comparison> :[<field or value>,<field or value>]},<true case>,<false case>]

Using $cond, reattempt the above question but change Eurasia to Europe

pp.pprint(list(
    db.world.aggregate([
        {"$group":{
            "_id":{
                "$cond": [{"$eq":["$continent","Eurasia"]},"Europe","$continent"]
            },
            "area":{"$sum": "$area"}
        }},
        {"$sort":{
            "area": -1
        }},
        {"$project":{
            "_id":1,
            "area":1
        }}
    ])
))

pp.pprint(list(db.world.aggregate([{"$group":{"_id":{"$cond":[{"$eq":["$continent","Eurasia"]},"Europe","$continent"]},"area":{"$sum":"$area"}}},{"$sort":{"area":-1}},{"$project":{"_id":1,"area":1}}])))

Combine North America and South America to America, and then list the continents by area. Biggest first.

pp.pprint(list(
    db.world.aggregate([
        {"$group":{
            "_id":{
                "$cond": [{"$eq":["$continent","North America"]},"America",
                    {"$cond": [{"$eq":["$continent","Asia"]},"The East","$continent"]}]
            },
            "area":{"$sum": "$area"}
        }},
        {"$sort":{
            "area": -1
        }},
        {"$project":{
            "_id":1,
            "area":1
        }}
    ])
))

pp.pprint(list(db.world.aggregate([{"$group":{"_id":{"$cond":[{"$eq":["$continent","South America"]},"America",{"$cond":[{"$eq":["$continent","North America"]},"America","$continent"]}]},"area":{"$sum":"$area"}}},{"$sort":{"area":-1}},{"$project":{"_id":1,"area":1}}])))

Show the name and the continent - but substitute Australasia for Oceania - for countries beginning with N.

pp.pprint(list(
    db.world.aggregate([
        {"$match":{
            "name":{"$regex":"^N"}
        }},
        {"$project":{
            "_id":0,
            "name":1
        }}
    ])
))

pp.pprint(list(db.world.aggregate([{"$match":{"name":{"$regex":"^N"}}},{"$project":{"_id":0,"name":1,"continent":{"$cond":[{"$eq":["$continent","Oceania"]},"Australasia","$continent"]}}}])))

Show the name and the continent but:

    substitute Eurasia for Europe and Asia.
    substitute America - for each country in North America or South America or Caribbean.

Only show countries beginning with A or B
If you're struggling you may want to experiment with $and,$or, etc.


pp.pprint(list(

   db.world.aggregate([
       {"$match":{
           "name":{"$regex":"^A|^B"}
       }},
       {"$project":{
           "_id":0,
           "name":1,
           "continent": {
               "$cond": [{
                   "$or":[ 
                       {"$eq":["$continent","Europe"]},
                       {"$eq":["$continent","Asia"]}
                   ]},"Eurasia",{
                       "$cond": [
                           {"$or":[ 
                               {"$eq":["$continent","North America"]},
                               {"$eq":["$continent","South America"]},
                               {"$eq":["$continent","Caribbean"]}
                           ]},"America","$continent"]}
               ]}
       }}
   ])

))

Put the continents right...

    Oceania becomes Australasia
    Countries in Eurasia and Turkey go to Europe/Asia
    Caribbean islands starting with 'B' go to North America, other Caribbean islands go to South America


Show the name, the original continent and the new continent of all countries.


pp.pprint(list(

   db.world.aggregate([
       {"$project":{
           "_id":0,
           "name":1,
           "original": "$continent",
           "new": {
               "$cond": [
                   {"$or":[ 
                       {"$eq":["$continent","Eurasia"]},
                       {"$eq":["$name","Turkey"]}
                   ]},"Europe/Asia",{
                       "$cond":[
                           {"$eq":["$continent","Oceania"]},"Australasia",{
                               "$cond":[
                                   {"$and":[
                                       {"$eq":["$continent","Caribbean"]},
                                       {"$eq":[{"$substr":["$name",0,1]}, "B"]}
                                   ]},"North America",{
                                       "$cond":[
                                           {"$and":[
                                               {"$eq":["$continent","Caribbean"]},
                                               {"$ne":[{"$substr":["$name",0,1]}, "B"]}
                                           ]},"South America","$continent"
                                       ]
                                   }
                               ]
                           }
                       ]
                  }
               ]
           }
       }}
   ])

))