Cookies help us deliver our services. By using our services, you agree to our use of cookies. More information

Difference between revisions of "AGGREGATE world"

From NoSQLZoo
Jump to: navigation, search
 
(60 intermediate revisions by 2 users not shown)
Line 11: Line 11:
 
#PRETTY
 
#PRETTY
 
import pprint
 
import pprint
pp = pprint.PrettyPrinter(indent=4)
+
pp = pprint.PrettyPrinter(indent=4, width=160)
 
</pre>
 
</pre>
  
 
==Country Profile==
 
==Country Profile==
For these questions you should use find() on the collection <code>world</code>
+
For these questions you should use <code>aggregate([])</code> on the collection <code>world</code>
 +
<div class='extra_space' style='width:1em; height:6em;'></div>
 +
<div class=q data-lang="py3">
 +
Give the <code>name</code> and the <code>per capita GDP</code> for those countries with a <code>population</code> of at least 200 million.
 +
<div class="hint" title="How to calculate per capita GDP">
 +
per capita GDP is the GDP divided by the population.
 +
</div>
 +
<pre class=def>
 +
pp.pprint(list(
 +
    db.world.aggregate([
 +
        {"$match":{
 +
            "population":{"$gte":250000000}
 +
        }},
 +
        {"$project":{
 +
            "_id":0,
 +
            "name":1,
 +
            "per capita GDP": {"$divide": ["$gdp",1000000]}
 +
        }}
 +
    ])
 +
))
 +
</pre>
 +
<div class=ans>pp.pprint(list(db.world.aggregate([{"$match":{"population":{"$gte":200000000}}},{"$project":{"_id":0,"name":1,"per capita GDP": {"$divide": ["$gdp","$population"]}}}])))
 +
</div>
 +
</div>
 +
 
 +
<div class=q data-lang="py3">
 +
Give the <code>name</code> and the <code>population density</code> of all countries. Ignore results where the density is "None".
 +
<div class="hint" title="How to calculate population density">
 +
population density is the population divided by the area
 +
</div>
 +
<div class="hint" title="Division by 0 error?">
 +
Use a <code>$match</code>. <code>{"area":{"$ne":0}}</code>
 +
</div>
 +
<pre class=def>
 +
pp.pprint(list(
 +
    db.world.aggregate([
 +
        {"$project":{
 +
            "_id":0,
 +
            "name":1,
 +
            "density": {"$divide": ["$population","$area"]}
 +
        }},
 +
        {"$match":{
 +
            "density": {"$ne":None}
 +
        }}
 +
    ])
 +
))
 +
</pre>
 +
<div class=ans>
 +
pp.pprint(list(db.world.aggregate([{"$match":{"area":{"$ne":0}}},{"$project":{"_id":0,"name":1,"density":{"$divide":["$population","$area"]}}},{"$match":{"density":{"$ne":None}}}])))
 +
</div>
 +
</div>
 +
 
 +
<div class=q data-lang="py3">
 +
Show the <code>name</code> and <code>population</code> in millions for the countries of the continent <b>South America</b>. Divide the population by 1000000 to get population in millions.
 +
<pre class=def>
 +
pp.pprint(list(
 +
    db.world.aggregate([
 +
        {"$match":{
 +
           
 +
        }},
 +
        {"$project":{
 +
            "_id":0,
 +
            "name":1
 +
        }}
 +
    ])
 +
))
 +
</pre>
 +
<div class=ans>
 +
pp.pprint(list(db.world.aggregate([{"$match":{"continent":{"$eq":"South America"}}},{"$project":{"_id":0,"name":1,"population":{"$divide":["$population",1000000]}}}])))
 +
</div>
 +
</div>
 +
 
 +
 
 +
<div class=q data-lang="py3">
 +
Show the <code>name</code> and <code>population density</code> for <b>France</b>, <b>Germany</b>, and <b>Italy</b>
 +
<pre class=def>
 +
pp.pprint(list(
 +
    db.world.aggregate([
 +
        {"$match":{
 +
            "name": {"$in":['United Kingdom','United States','Brazil']},
 +
            "population": {"$ne": None},
 +
            "area": {"$ne": 0}
 +
        }},
 +
        {"$project":{
 +
            "_id":0,
 +
            "name":1
 +
        }}
 +
    ])
 +
))
 +
</pre>
 +
<div class=ans>
 +
pp.pprint(list(db.world.aggregate([{"$match":{"name":{"$in":['France','Germany','Italy']},"population":{"$ne":None},"area":{"$ne":0}}},{"$project":{"_id":0,"name":1,"population density":{"$divide":["$population","$area"]}}}])))
 +
</div>
 +
</div>
 +
 
 +
<div class=q data-lang="py3">
 +
Order the <code>continents</code> by <code>area</code> from most to least.
 +
<pre class=def>
 +
pp.pprint(list(
 +
    db.world.aggregate([
 +
        {"$group":{
 +
            "_id":"$name",
 +
            "area":{"$max": "$area"}
 +
        }},
 +
        {"$sort":{
 +
            "area": -1
 +
        }},
 +
        {"$project":{
 +
            "_id":1,
 +
            "area":1
 +
        }}
 +
    ])
 +
))
 +
</pre>
 +
<div class=ans>
 +
pp.pprint(list(
 +
    db.world.aggregate([
 +
        {"$group":{
 +
            "_id":"$continent",
 +
            "area":{"$sum": "$area"}
 +
        }},
 +
        {"$sort":{
 +
            "area": -1
 +
        }},
 +
        {"$project":{
 +
            "_id":1,
 +
            "area":1
 +
        }}
 +
    ])
 +
))
 +
</div>
 +
</div>
  
 +
==Harder Questions==
 
<div class=q data-lang="py3">
 
<div class=q data-lang="py3">
Show the name and population for the countries that have a population of at least 200 million.
+
Print a list of names for countries in the continent of "North America" change United States to USA
 
<pre class=def>
 
<pre class=def>
 
pp.pprint(list(
 
pp.pprint(list(
     db.world.find({},{"name":1,"_id":0})
+
     db.world.aggregate([
 +
      {"$match":{
 +
          "continent":"North America"
 +
      }},
 +
      {"$project":{
 +
          "_id":0,
 +
          "name":1
 +
      }}
 +
    ])
 
))
 
))
 
</pre>
 
</pre>
 +
<div class=ans>
 +
pp.pprint(list(db.world.aggregate([{"$match":{"continent":"North America"}},{"$project":{"_id":0,"name":{"$cond": [{"$eq":["$name","United States"]},"USA","$name"]}}}])))
 +
</div>
 +
</div>
 +
 +
<div class=q data-lang="py3">
 +
Combine <b>North America</b> and <b>South America</b> to <b>America</b>, and then list the continents by area. Biggest first.
 +
<pre class=def>
 +
pp.pprint(list(
 +
    db.world.aggregate([
 +
        {"$group":{
 +
            "_id":{
 +
                "$cond": [{"$eq":["$continent","North America"]},"America",
 +
                    {"$cond": [{"$eq":["$continent","Asia"]},"The East","$continent"]}]
 +
            },
 +
            "area":{"$sum": "$area"}
 +
        }},
 +
        {"$sort":{
 +
            "area": -1
 +
        }},
 +
        {"$project":{
 +
            "_id":1,
 +
            "area":1
 +
        }}
 +
    ])
 +
))
 +
</pre>
 +
<div class=ans>
 +
pp.pprint(list(db.world.aggregate([{"$group":{"_id":{"$cond":[{"$eq":["$continent","South America"]},"America",{"$cond":[{"$eq":["$continent","North America"]},"America","$continent"]}]},"area":{"$sum":"$area"}}},{"$sort":{"area":-1}},{"$project":{"_id":1,"area":1}}])))
 +
</div>
 +
</div>
 +
 +
<div class=q data-lang="py3">
 +
Show the <b>name</b> and the <b>continent</b> for countries beginning with N - but replace the continent <b>Oceania</b> with <b>Australasia</b>.
 +
<pre class=def>
 +
pp.pprint(list(
 +
    db.world.aggregate([
 +
        {"$match":{
 +
            "name":{"$regex":"^N"}
 +
        }},
 +
        {"$project":{
 +
            "_id":0,
 +
            "name":1
 +
        }}
 +
    ])
 +
))
 +
</pre>
 +
<div class=ans>
 +
pp.pprint(list(db.world.aggregate([{"$match":{"name":{"$regex":"^N"}}},{"$project":{"_id":0,"name":1,"continent":{"$cond":[{"$eq":["$continent","Oceania"]},"Australasia","$continent"]}}}])))
 +
</div>
 +
</div>
 +
 +
<div class=q data-lang="py3">
 +
Show the <b>name</b> and the <b>continent</b> but:<br/><br/>
 +
&nbsp;&nbsp;&nbsp;&nbsp;substitute <b>Eurasia</b> for <b>Europe</b> and <b>Asia</b>.<br/>
 +
&nbsp;&nbsp;&nbsp;&nbsp;substitute <b>America</b> - for each country in <b>North America</b> or <b>South America</b> or <b>Caribbean</b>.<br/><br/>Only show countries beginning with <b>A</b> or <b>B</b><br/>
 +
If you're struggling you may want to experiment with <code>$and</code>,<code>$or</code>, etc.
 +
<pre class=def></pre>
 +
<div class=ans>
 +
pp.pprint(list(
 +
    db.world.aggregate([
 +
        {"$match":{
 +
            "name":{"$regex":"^A|^B"}
 +
        }},
 +
        {"$project":{
 +
            "_id":0,
 +
            "name":1,
 +
            "continent": {
 +
                "$cond": [{
 +
                    "$or":[
 +
                        {"$eq":["$continent","Europe"]},
 +
                        {"$eq":["$continent","Asia"]}
 +
                    ]},"Eurasia",{
 +
                        "$cond": [
 +
                            {"$or":[
 +
                                {"$eq":["$continent","North America"]},
 +
                                {"$eq":["$continent","South America"]},
 +
                                {"$eq":["$continent","Caribbean"]}
 +
                            ]},"America","$continent"]}
 +
                ]}
 +
        }}
 +
    ])
 +
))
 +
</div>
 +
</div>
 +
 +
<div class=q data-lang="py3">
 +
Put the continents right...
 +
<br/><br/>
 +
&nbsp;&nbsp;&nbsp;&nbsp;<b>Oceania</b> becomes <b>Australasia</b><br/>
 +
&nbsp;&nbsp;&nbsp;&nbsp;Countries in <b>Eurasia</b> and <b>Turkey</b> go to <b>Europe/Asia</b><br/>
 +
&nbsp;&nbsp;&nbsp;&nbsp;<b>Caribbean</b> islands starting with '<b>B</b>' go to <b>North America</b>, other Caribbean islands go to <b>South America</b><br/>
 +
<br/><br/>
 +
<b>Show the name, the original continent and the new continent of all countries.</b>
 +
<pre class=def></pre>
 
<div class=ans>
 
<div class=ans>
 
pp.pprint(list(
 
pp.pprint(list(
     db.world.find({"population":{"$gt":20000000}},{"name":1,"population":1,"_id":0})
+
     db.world.aggregate([
 +
        {"$project":{
 +
            "_id":0,
 +
            "name":1,
 +
            "original": "$continent",
 +
            "new": {
 +
                "$cond": [
 +
                    {"$or":[
 +
                        {"$eq":["$continent","Eurasia"]},
 +
                        {"$eq":["$name","Turkey"]}
 +
                    ]},"Europe/Asia",{
 +
                        "$cond":[
 +
                            {"$eq":["$continent","Oceania"]},"Australasia",{
 +
                                "$cond":[
 +
                                    {"$and":[
 +
                                        {"$eq":["$continent","Caribbean"]},
 +
                                        {"$eq":[{"$substr":["$name",0,1]}, "B"]}
 +
                                    ]},"North America",{
 +
                                        "$cond":[
 +
                                            {"$and":[
 +
                                                {"$eq":["$continent","Caribbean"]},
 +
                                                {"$ne":[{"$substr":["$name",0,1]}, "B"]}
 +
                                            ]},"South America","$continent"
 +
                                        ]
 +
                                    }
 +
                                ]
 +
                            }
 +
                        ]
 +
                  }
 +
                ]
 +
            }
 +
        }}
 +
    ])
 
))
 
))
 
</div>
 
</div>
 
</div>
 
</div>

Latest revision as of 14:55, 30 July 2015

#ENCODING
import io
import sys
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-16')
#MONGO
from pymongo import MongoClient
client = MongoClient()
client.progzoo.authenticate('scott','tiger')
db = client['progzoo']
#PRETTY
import pprint
pp = pprint.PrettyPrinter(indent=4, width=160)

Country Profile

For these questions you should use aggregate([]) on the collection world

Give the name and the per capita GDP for those countries with a population of at least 200 million.

per capita GDP is the GDP divided by the population.

pp.pprint(list(
    db.world.aggregate([
        {"$match":{
            "population":{"$gte":250000000}
        }},
        {"$project":{
            "_id":0,
            "name":1,
            "per capita GDP": {"$divide": ["$gdp",1000000]}
        }}
    ])
))
pp.pprint(list(db.world.aggregate([{"$match":{"population":{"$gte":200000000}}},{"$project":{"_id":0,"name":1,"per capita GDP": {"$divide": ["$gdp","$population"]}}}])))

Give the name and the population density of all countries. Ignore results where the density is "None".

population density is the population divided by the area

Use a $match. {"area":{"$ne":0}}

pp.pprint(list(
    db.world.aggregate([
        {"$project":{
            "_id":0,
            "name":1,
            "density": {"$divide": ["$population","$area"]}
        }},
        {"$match":{
            "density": {"$ne":None}
        }}
    ])
))

pp.pprint(list(db.world.aggregate([{"$match":{"area":{"$ne":0}}},{"$project":{"_id":0,"name":1,"density":{"$divide":["$population","$area"]}}},{"$match":{"density":{"$ne":None}}}])))

Show the name and population in millions for the countries of the continent South America. Divide the population by 1000000 to get population in millions.

pp.pprint(list(
    db.world.aggregate([
        {"$match":{
            
        }},
        {"$project":{
            "_id":0,
            "name":1
        }}
    ])
))

pp.pprint(list(db.world.aggregate([{"$match":{"continent":{"$eq":"South America"}}},{"$project":{"_id":0,"name":1,"population":{"$divide":["$population",1000000]}}}])))


Show the name and population density for France, Germany, and Italy

pp.pprint(list(
    db.world.aggregate([
        {"$match":{
            "name": {"$in":['United Kingdom','United States','Brazil']},
            "population": {"$ne": None},
            "area": {"$ne": 0}
        }},
        {"$project":{
            "_id":0,
            "name":1
        }}
    ])
))

pp.pprint(list(db.world.aggregate([{"$match":{"name":{"$in":['France','Germany','Italy']},"population":{"$ne":None},"area":{"$ne":0}}},{"$project":{"_id":0,"name":1,"population density":{"$divide":["$population","$area"]}}}])))

Order the continents by area from most to least.

pp.pprint(list(
    db.world.aggregate([
        {"$group":{
            "_id":"$name",
            "area":{"$max": "$area"}
        }},
        {"$sort":{
            "area": -1
        }},
        {"$project":{
            "_id":1,
            "area":1
        }}
    ])
))

pp.pprint(list(

   db.world.aggregate([
       {"$group":{
           "_id":"$continent",
           "area":{"$sum": "$area"}
       }},
       {"$sort":{
           "area": -1
       }},
       {"$project":{
           "_id":1,
           "area":1
       }}
   ])

))

Harder Questions

Print a list of names for countries in the continent of "North America" change United States to USA

pp.pprint(list(
    db.world.aggregate([
      {"$match":{
          "continent":"North America"
      }},
      {"$project":{
          "_id":0,
          "name":1
      }}
    ])
))

pp.pprint(list(db.world.aggregate([{"$match":{"continent":"North America"}},{"$project":{"_id":0,"name":{"$cond": [{"$eq":["$name","United States"]},"USA","$name"]}}}])))

Combine North America and South America to America, and then list the continents by area. Biggest first.

pp.pprint(list(
    db.world.aggregate([
        {"$group":{
            "_id":{
                "$cond": [{"$eq":["$continent","North America"]},"America",
                    {"$cond": [{"$eq":["$continent","Asia"]},"The East","$continent"]}]
            },
            "area":{"$sum": "$area"}
        }},
        {"$sort":{
            "area": -1
        }},
        {"$project":{
            "_id":1,
            "area":1
        }}
    ])
))

pp.pprint(list(db.world.aggregate([{"$group":{"_id":{"$cond":[{"$eq":["$continent","South America"]},"America",{"$cond":[{"$eq":["$continent","North America"]},"America","$continent"]}]},"area":{"$sum":"$area"}}},{"$sort":{"area":-1}},{"$project":{"_id":1,"area":1}}])))

Show the name and the continent for countries beginning with N - but replace the continent Oceania with Australasia.

pp.pprint(list(
    db.world.aggregate([
        {"$match":{
            "name":{"$regex":"^N"}
        }},
        {"$project":{
            "_id":0,
            "name":1
        }}
    ])
))

pp.pprint(list(db.world.aggregate([{"$match":{"name":{"$regex":"^N"}}},{"$project":{"_id":0,"name":1,"continent":{"$cond":[{"$eq":["$continent","Oceania"]},"Australasia","$continent"]}}}])))

Show the name and the continent but:

    substitute Eurasia for Europe and Asia.
    substitute America - for each country in North America or South America or Caribbean.

Only show countries beginning with A or B
If you're struggling you may want to experiment with $and,$or, etc.


pp.pprint(list(

   db.world.aggregate([
       {"$match":{
           "name":{"$regex":"^A|^B"}
       }},
       {"$project":{
           "_id":0,
           "name":1,
           "continent": {
               "$cond": [{
                   "$or":[ 
                       {"$eq":["$continent","Europe"]},
                       {"$eq":["$continent","Asia"]}
                   ]},"Eurasia",{
                       "$cond": [
                           {"$or":[ 
                               {"$eq":["$continent","North America"]},
                               {"$eq":["$continent","South America"]},
                               {"$eq":["$continent","Caribbean"]}
                           ]},"America","$continent"]}
               ]}
       }}
   ])

))

Put the continents right...

    Oceania becomes Australasia
    Countries in Eurasia and Turkey go to Europe/Asia
    Caribbean islands starting with 'B' go to North America, other Caribbean islands go to South America


Show the name, the original continent and the new continent of all countries.


pp.pprint(list(

   db.world.aggregate([
       {"$project":{
           "_id":0,
           "name":1,
           "original": "$continent",
           "new": {
               "$cond": [
                   {"$or":[ 
                       {"$eq":["$continent","Eurasia"]},
                       {"$eq":["$name","Turkey"]}
                   ]},"Europe/Asia",{
                       "$cond":[
                           {"$eq":["$continent","Oceania"]},"Australasia",{
                               "$cond":[
                                   {"$and":[
                                       {"$eq":["$continent","Caribbean"]},
                                       {"$eq":[{"$substr":["$name",0,1]}, "B"]}
                                   ]},"North America",{
                                       "$cond":[
                                           {"$and":[
                                               {"$eq":["$continent","Caribbean"]},
                                               {"$ne":[{"$substr":["$name",0,1]}, "B"]}
                                           ]},"South America","$continent"
                                       ]
                                   }
                               ]
                           }
                       ]
                  }
               ]
           }
       }}
   ])

))