Cookies help us deliver our services. By using our services, you agree to our use of cookies. More information

Difference between revisions of "AGGREGATE world"

From NoSQLZoo
Jump to: navigation, search
 
(32 intermediate revisions by 2 users not shown)
Line 11: Line 11:
 
#PRETTY
 
#PRETTY
 
import pprint
 
import pprint
pp = pprint.PrettyPrinter(indent=4)
+
pp = pprint.PrettyPrinter(indent=4, width=160)
 
</pre>
 
</pre>
  
Line 46: Line 46:
 
</div>
 
</div>
 
<div class="hint" title="Division by 0 error?">
 
<div class="hint" title="Division by 0 error?">
<code> Use a <code>$match</code>. <code>{"area":{"$ne":0}}</code>
+
Use a <code>$match</code>. <code>{"area":{"$ne":0}}</code>
 
</div>
 
</div>
 
<pre class=def>
 
<pre class=def>
Line 54: Line 54:
 
             "_id":0,
 
             "_id":0,
 
             "name":1,
 
             "name":1,
             "density": {"$divide": [10000,"$area"]}
+
             "density": {"$divide": ["$population","$area"]}
 
         }},
 
         }},
 
         {"$match":{
 
         {"$match":{
Line 106: Line 106:
 
</pre>
 
</pre>
 
<div class=ans>
 
<div class=ans>
pp.pprint(list(db.world.aggregate([{"$match":{"name":{"$in":['France','Germany','Brazil']},"population":{"$ne":None},"area":{"$ne":0}}},{"$project":{"_id":0,"name":1,"population density":{"$divide":["$population","$area"]}}}])))
+
pp.pprint(list(db.world.aggregate([{"$match":{"name":{"$in":['France','Germany','Italy']},"population":{"$ne":None},"area":{"$ne":0}}},{"$project":{"_id":0,"name":1,"population density":{"$divide":["$population","$area"]}}}])))
 
</div>
 
</div>
 
</div>
 
</div>
Line 150: Line 150:
 
==Harder Questions==
 
==Harder Questions==
 
<div class=q data-lang="py3">
 
<div class=q data-lang="py3">
Using Conditions<br/>
+
Print a list of names for countries in the continent of "North America" change United States to USA
<code>$cond</code> is similar to a <code>CASE</code> statement in other languages.<br/>
+
<pre class=def>
It has the form <code>"$cond": [{<comparison> :[<field or value>,<field or value>]},<true case>,<false case>]</code><br/>
+
pp.pprint(list(
Using <code>$cond</code>, reattempt the above question but change <b>Eurasia</b> to <b>Europe</b>
+
    db.world.aggregate([
 +
      {"$match":{
 +
          "continent":"North America"
 +
      }},
 +
      {"$project":{
 +
          "_id":0,
 +
          "name":1
 +
      }}
 +
    ])
 +
))
 +
</pre>
 +
<div class=ans>
 +
pp.pprint(list(db.world.aggregate([{"$match":{"continent":"North America"}},{"$project":{"_id":0,"name":{"$cond": [{"$eq":["$name","United States"]},"USA","$name"]}}}])))
 +
</div>
 +
</div>
 +
 
 +
<div class=q data-lang="py3">
 +
Combine <b>North America</b> and <b>South America</b> to <b>America</b>, and then list the continents by area. Biggest first.
 
<pre class=def>
 
<pre class=def>
 
pp.pprint(list(
 
pp.pprint(list(
Line 159: Line 176:
 
         {"$group":{
 
         {"$group":{
 
             "_id":{
 
             "_id":{
                 "$cond": [{"$eq" :["$continent","Eurasia"]},"Europe","$continent"]
+
                 "$cond": [{"$eq":["$continent","North America"]},"America",
 +
                    {"$cond": [{"$eq":["$continent","Asia"]},"The East","$continent"]}]
 
             },
 
             },
 
             "area":{"$sum": "$area"}
 
             "area":{"$sum": "$area"}
Line 174: Line 192:
 
</pre>
 
</pre>
 
<div class=ans>
 
<div class=ans>
 +
pp.pprint(list(db.world.aggregate([{"$group":{"_id":{"$cond":[{"$eq":["$continent","South America"]},"America",{"$cond":[{"$eq":["$continent","North America"]},"America","$continent"]}]},"area":{"$sum":"$area"}}},{"$sort":{"area":-1}},{"$project":{"_id":1,"area":1}}])))
 +
</div>
 +
</div>
 +
 +
<div class=q data-lang="py3">
 +
Show the <b>name</b> and the <b>continent</b> for countries beginning with N - but replace the continent <b>Oceania</b> with <b>Australasia</b>.
 +
<pre class=def>
 
pp.pprint(list(
 
pp.pprint(list(
 
     db.world.aggregate([
 
     db.world.aggregate([
         {"$group":{
+
         {"$match":{
             "_id":{
+
             "name":{"$regex":"^N"}
                "$cond": [{"$eq" :["$continent","Eurasia"]},"Europe","$continent"]
 
            },
 
            "area":{"$sum": "$area"}
 
 
         }},
 
         }},
         {"$sort":{
+
         {"$project":{
             "area": -1
+
             "_id":0,
 +
            "name":1
 +
        }}
 +
    ])
 +
))
 +
</pre>
 +
<div class=ans>
 +
pp.pprint(list(db.world.aggregate([{"$match":{"name":{"$regex":"^N"}}},{"$project":{"_id":0,"name":1,"continent":{"$cond":[{"$eq":["$continent","Oceania"]},"Australasia","$continent"]}}}])))
 +
</div>
 +
</div>
 +
 
 +
<div class=q data-lang="py3">
 +
Show the <b>name</b> and the <b>continent</b> but:<br/><br/>
 +
&nbsp;&nbsp;&nbsp;&nbsp;substitute <b>Eurasia</b> for <b>Europe</b> and <b>Asia</b>.<br/>
 +
&nbsp;&nbsp;&nbsp;&nbsp;substitute <b>America</b> - for each country in <b>North America</b> or <b>South America</b> or <b>Caribbean</b>.<br/><br/>Only show countries beginning with <b>A</b> or <b>B</b><br/>
 +
If you're struggling you may want to experiment with <code>$and</code>,<code>$or</code>, etc.
 +
<pre class=def></pre>
 +
<div class=ans>
 +
pp.pprint(list(
 +
    db.world.aggregate([
 +
        {"$match":{
 +
            "name":{"$regex":"^A|^B"}
 
         }},
 
         }},
 
         {"$project":{
 
         {"$project":{
             "_id":1,
+
             "_id":0,
             "area":1
+
            "name":1,
 +
             "continent": {
 +
                "$cond": [{
 +
                    "$or":[
 +
                        {"$eq":["$continent","Europe"]},
 +
                        {"$eq":["$continent","Asia"]}
 +
                    ]},"Eurasia",{
 +
                        "$cond": [
 +
                            {"$or":[
 +
                                {"$eq":["$continent","North America"]},
 +
                                {"$eq":["$continent","South America"]},
 +
                                {"$eq":["$continent","Caribbean"]}
 +
                            ]},"America","$continent"]}
 +
                ]}
 +
        }}
 +
    ])
 +
))
 +
</div>
 +
</div>
 +
 
 +
<div class=q data-lang="py3">
 +
Put the continents right...
 +
<br/><br/>
 +
&nbsp;&nbsp;&nbsp;&nbsp;<b>Oceania</b> becomes <b>Australasia</b><br/>
 +
&nbsp;&nbsp;&nbsp;&nbsp;Countries in <b>Eurasia</b> and <b>Turkey</b> go to <b>Europe/Asia</b><br/>
 +
&nbsp;&nbsp;&nbsp;&nbsp;<b>Caribbean</b> islands starting with '<b>B</b>' go to <b>North America</b>, other Caribbean islands go to <b>South America</b><br/>
 +
<br/><br/>
 +
<b>Show the name, the original continent and the new continent of all countries.</b>
 +
<pre class=def></pre>
 +
<div class=ans>
 +
pp.pprint(list(
 +
    db.world.aggregate([
 +
        {"$project":{
 +
            "_id":0,
 +
            "name":1,
 +
            "original": "$continent",
 +
            "new": {
 +
                "$cond": [
 +
                    {"$or":[
 +
                        {"$eq":["$continent","Eurasia"]},
 +
                        {"$eq":["$name","Turkey"]}
 +
                    ]},"Europe/Asia",{
 +
                        "$cond":[
 +
                            {"$eq":["$continent","Oceania"]},"Australasia",{
 +
                                "$cond":[
 +
                                    {"$and":[
 +
                                        {"$eq":["$continent","Caribbean"]},
 +
                                        {"$eq":[{"$substr":["$name",0,1]}, "B"]}
 +
                                    ]},"North America",{
 +
                                        "$cond":[
 +
                                            {"$and":[
 +
                                                {"$eq":["$continent","Caribbean"]},
 +
                                                {"$ne":[{"$substr":["$name",0,1]}, "B"]}
 +
                                            ]},"South America","$continent"
 +
                                        ]
 +
                                    }
 +
                                ]
 +
                            }
 +
                        ]
 +
                  }
 +
                ]
 +
            }
 
         }}
 
         }}
 
     ])
 
     ])

Latest revision as of 15:55, 30 July 2015

#ENCODING
import io
import sys
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-16')
#MONGO
from pymongo import MongoClient
client = MongoClient()
client.progzoo.authenticate('scott','tiger')
db = client['progzoo']
#PRETTY
import pprint
pp = pprint.PrettyPrinter(indent=4, width=160)

Country Profile

For these questions you should use aggregate([]) on the collection world

Give the name and the per capita GDP for those countries with a population of at least 200 million.

per capita GDP is the GDP divided by the population.

pp.pprint(list(
    db.world.aggregate([
        {"$match":{
            "population":{"$gte":250000000}
        }},
        {"$project":{
            "_id":0,
            "name":1,
            "per capita GDP": {"$divide": ["$gdp",1000000]}
        }}
    ])
))
pp.pprint(list(db.world.aggregate([{"$match":{"population":{"$gte":200000000}}},{"$project":{"_id":0,"name":1,"per capita GDP": {"$divide": ["$gdp","$population"]}}}])))

Give the name and the population density of all countries. Ignore results where the density is "None".

population density is the population divided by the area

Use a $match. {"area":{"$ne":0}}

pp.pprint(list(
    db.world.aggregate([
        {"$project":{
            "_id":0,
            "name":1,
            "density": {"$divide": ["$population","$area"]}
        }},
        {"$match":{
            "density": {"$ne":None}
        }}
    ])
))

pp.pprint(list(db.world.aggregate([{"$match":{"area":{"$ne":0}}},{"$project":{"_id":0,"name":1,"density":{"$divide":["$population","$area"]}}},{"$match":{"density":{"$ne":None}}}])))

Show the name and population in millions for the countries of the continent South America. Divide the population by 1000000 to get population in millions.

pp.pprint(list(
    db.world.aggregate([
        {"$match":{
            
        }},
        {"$project":{
            "_id":0,
            "name":1
        }}
    ])
))

pp.pprint(list(db.world.aggregate([{"$match":{"continent":{"$eq":"South America"}}},{"$project":{"_id":0,"name":1,"population":{"$divide":["$population",1000000]}}}])))


Show the name and population density for France, Germany, and Italy

pp.pprint(list(
    db.world.aggregate([
        {"$match":{
            "name": {"$in":['United Kingdom','United States','Brazil']},
            "population": {"$ne": None},
            "area": {"$ne": 0}
        }},
        {"$project":{
            "_id":0,
            "name":1
        }}
    ])
))

pp.pprint(list(db.world.aggregate([{"$match":{"name":{"$in":['France','Germany','Italy']},"population":{"$ne":None},"area":{"$ne":0}}},{"$project":{"_id":0,"name":1,"population density":{"$divide":["$population","$area"]}}}])))

Order the continents by area from most to least.

pp.pprint(list(
    db.world.aggregate([
        {"$group":{
            "_id":"$name",
            "area":{"$max": "$area"}
        }},
        {"$sort":{
            "area": -1
        }},
        {"$project":{
            "_id":1,
            "area":1
        }}
    ])
))

pp.pprint(list(

   db.world.aggregate([
       {"$group":{
           "_id":"$continent",
           "area":{"$sum": "$area"}
       }},
       {"$sort":{
           "area": -1
       }},
       {"$project":{
           "_id":1,
           "area":1
       }}
   ])

))

Harder Questions

Print a list of names for countries in the continent of "North America" change United States to USA

pp.pprint(list(
    db.world.aggregate([
      {"$match":{
          "continent":"North America"
      }},
      {"$project":{
          "_id":0,
          "name":1
      }}
    ])
))

pp.pprint(list(db.world.aggregate([{"$match":{"continent":"North America"}},{"$project":{"_id":0,"name":{"$cond": [{"$eq":["$name","United States"]},"USA","$name"]}}}])))

Combine North America and South America to America, and then list the continents by area. Biggest first.

pp.pprint(list(
    db.world.aggregate([
        {"$group":{
            "_id":{
                "$cond": [{"$eq":["$continent","North America"]},"America",
                    {"$cond": [{"$eq":["$continent","Asia"]},"The East","$continent"]}]
            },
            "area":{"$sum": "$area"}
        }},
        {"$sort":{
            "area": -1
        }},
        {"$project":{
            "_id":1,
            "area":1
        }}
    ])
))

pp.pprint(list(db.world.aggregate([{"$group":{"_id":{"$cond":[{"$eq":["$continent","South America"]},"America",{"$cond":[{"$eq":["$continent","North America"]},"America","$continent"]}]},"area":{"$sum":"$area"}}},{"$sort":{"area":-1}},{"$project":{"_id":1,"area":1}}])))

Show the name and the continent for countries beginning with N - but replace the continent Oceania with Australasia.

pp.pprint(list(
    db.world.aggregate([
        {"$match":{
            "name":{"$regex":"^N"}
        }},
        {"$project":{
            "_id":0,
            "name":1
        }}
    ])
))

pp.pprint(list(db.world.aggregate([{"$match":{"name":{"$regex":"^N"}}},{"$project":{"_id":0,"name":1,"continent":{"$cond":[{"$eq":["$continent","Oceania"]},"Australasia","$continent"]}}}])))

Show the name and the continent but:

    substitute Eurasia for Europe and Asia.
    substitute America - for each country in North America or South America or Caribbean.

Only show countries beginning with A or B
If you're struggling you may want to experiment with $and,$or, etc.


pp.pprint(list(

   db.world.aggregate([
       {"$match":{
           "name":{"$regex":"^A|^B"}
       }},
       {"$project":{
           "_id":0,
           "name":1,
           "continent": {
               "$cond": [{
                   "$or":[ 
                       {"$eq":["$continent","Europe"]},
                       {"$eq":["$continent","Asia"]}
                   ]},"Eurasia",{
                       "$cond": [
                           {"$or":[ 
                               {"$eq":["$continent","North America"]},
                               {"$eq":["$continent","South America"]},
                               {"$eq":["$continent","Caribbean"]}
                           ]},"America","$continent"]}
               ]}
       }}
   ])

))

Put the continents right...

    Oceania becomes Australasia
    Countries in Eurasia and Turkey go to Europe/Asia
    Caribbean islands starting with 'B' go to North America, other Caribbean islands go to South America


Show the name, the original continent and the new continent of all countries.


pp.pprint(list(

   db.world.aggregate([
       {"$project":{
           "_id":0,
           "name":1,
           "original": "$continent",
           "new": {
               "$cond": [
                   {"$or":[ 
                       {"$eq":["$continent","Eurasia"]},
                       {"$eq":["$name","Turkey"]}
                   ]},"Europe/Asia",{
                       "$cond":[
                           {"$eq":["$continent","Oceania"]},"Australasia",{
                               "$cond":[
                                   {"$and":[
                                       {"$eq":["$continent","Caribbean"]},
                                       {"$eq":[{"$substr":["$name",0,1]}, "B"]}
                                   ]},"North America",{
                                       "$cond":[
                                           {"$and":[
                                               {"$eq":["$continent","Caribbean"]},
                                               {"$ne":[{"$substr":["$name",0,1]}, "B"]}
                                           ]},"South America","$continent"
                                       ]
                                   }
                               ]
                           }
                       ]
                  }
               ]
           }
       }}
   ])

))