Cookies help us deliver our services. By using our services, you agree to our use of cookies. More information

Difference between revisions of "FIND from WORLD"

From NoSQLZoo
Jump to: navigation, search
Line 170: Line 170:
 
         out={"inline":1}
 
         out={"inline":1}
 
)
 
)
 
 
pp.pprint(temp['results']);
 
pp.pprint(temp['results']);
 
</pre>
 
</pre>
 
<div class=ans>
 
<div class=ans>
from bson.code import Code  
+
from bson.code import Code
temp = db.world.map_reduce(query={"continent":"South America"},map=Code("function(){emit(this.name,{'gdp':this.gdp,'pop':this.population});}"), reduce=Code("""function(key, values){ return values} """),finalize=Code("""function(k, v){return {'pop in millions':math.Round(v['pop']/1000000)/1000000,'gdp in billions':math.Round(v['gdp']/1000000000)/1000000000};}"""),out={"inline":1})
+
temp = db.world.map_reduce(query={"continent":"South America"},map=Code("function(){emit(this.name, {'gdp':this.gdp,'pop':this.population});}"),reduce=Code("""function(key, values){ return values} """),finalize=Code("""function(k, v){return {'pop in millions':Math.round(v['pop']*1000000)/1000000,'gdp in billions':Math.round(v['gdp']*1000000000)/1000000000};}"""),out={"inline":1})
 
 
 
pp.pprint(temp['results']);
 
pp.pprint(temp['results']);
 
</div>
 
</div>
Line 199: Line 197:
 
<div class=ans>
 
<div class=ans>
 
from bson.code import Code
 
from bson.code import Code
temp = db.world.map_reduce(map=Code("function(){if(this.gdp > 1000000000000)emit(this.name,{'gdp':this.gdp,'pop':this.population});}"),reduce=Code("""function(key, values){ return values} """),finalize=Code("""function(k, v){return {'per-capita GDP':Math.round((v['gdp']/v['pop'])/1000)*1000};}"""),out={"inline":1})pp.pprint(temp['results']);
+
temp = db.world.map_reduce(map=Code("function(){if(this.gdp > 1000000000000)emit(this.name,{'gdp':this.gdp,'pop':this.population});}"),reduce=Code("""function(key, values){ return values} """),finalize=Code("""function(k, v){
 +
return {'per-capita GDP':Math.round((v['gdp']/v['pop'])/1000)*1000};}"""),out={"inline":1})
 +
pp.pprint(temp['results']);
 
</div>
 
</div>
 
</div>
 
</div>

Revision as of 13:44, 29 July 2015

#ENCODING
import io
import sys
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-16')
#MONGO
from pymongo import MongoClient
client = MongoClient()
client.progzoo.authenticate('scott','tiger')
db = client['progzoo']
#PRETTY
import pprint
pp = pprint.PrettyPrinter(indent=4)

Working with the world collection of countries

This tutorial introduces NoSQL using MonogDB and PyMongo under Python3.4. We will be using the find() command and comparison functions on the collection world which contains details of around 250 countries of the world:

FIND examples are available.


Observe the result of running a simple mongoDB command.

FIND name, continent, population FROM world

pp.pprint(list(
    db.world.find({},{
      "name":1,
      "continent":1,
      "population":1,
      "_id":0
    })
))

pp.pprint(list(db.world.find({},{"name":1,"continent":1,"population":1,"_id":0})))


Show the name for the countries that have a population of at least 200 million. 200 million is 200000000, there are eight zeros.

pp.pprint(list(
    db.world.find(
      {"population":{"$gt":200000000}},
      {"name":1, "_id":0}
    )
))

pp.pprint(list(db.world.find({"population":{"$gt":200000000}},{"name":1, "_id":0})))

Give the name and the per capita GDP for those countries with a population of at least 200 million.

AGGREGATE examples are available.

per capita GDP is the GDP divided by the population GDP/population
pp.pprint(list(
    db.world.aggregate([
        {"$match":{"population":{"$gt":200000000}}},
        {"$project":{"name":1, "gdp/population":{"$divide":["$gdp","$population"]}, "_id":0}}
    ])
))

pp.pprint(list(db.world.aggregate([{"$match":{"population":{"$gt":200000000}}},{"$project":{"name":1, "gdp/population":{"$divide":["$gdp","$population"]}, "_id":0}}])))

Show the name and population in millions for the countries of the continent 'South America'. Divide the population by 1000000 to get population in millions.

pp.pprint(list(
    db.world.aggregate([
        {"$match":{"continent":{"$eq":"South America"}}},
        {"$project":{"name":1, "population in millions":{"$divide":["$population",1000000]}, "_id":0}}
    ])
))

pp.pprint(list(db.world.aggregate([{"$match":{"continent":{"$eq":"South America"}}},{"$project":{"name":1, "population in millions":{"$divide":["$population",1000000]}, "_id":0}}])))

Show the name and population for France, Germany, Italy

pp.pprint(list(
    db.world.aggregate([
        {"$match":{"name":{"$in":["France", "Germany", "Italy"]}}},
        {"$project":{"name":1, "population":1, "_id":0}}
    ])
))

pp.pprint(list(db.world.aggregate([{"$match":{"name":{"$in":["France", "Germany", "Italy"]}}},{"$project":{"name":1, "population":1, "_id":0}}])))

Show the countries which have a name that includes the word 'United'

pp.pprint(list(
    db.world.find(
        {"name":{"$regex":"\s?United\s?"}},
        {"name":1, "_id":0}
    )
))

pp.pprint(list(db.world.find({"name":{"$regex":"\s?United\s?"}},{"name":1, "_id":0})))

Two ways to be big: A country is big if it has an area of more than 3 million sq km or it has a population of more than 250 million.

Show the countries that are big by area or big by population. Show name, population and area

pp.pprint(list(
    db.world.find(
        {"$or":[{"area":{"$gt":3000000}},{"population":{"$gt":250000000}}]},
        {"name":1,"population":1,"area":1,"_id":0}
    )
))

pp.pprint(list(db.world.find({"$or":[{"area":{"$gt":3000000}},{"population":{"$gt":250000000}}]},{"name":1,"population":1,"area":1,"_id":0})))

USA and China are big in population and big by area. Exclude these countries.

Show the countries that are big by area or big by population but not both. Show name, population and area.

pp.pprint(list(
    db.world.find(
        {"$or":[
          {"area":{"$gt":3000000},"population":{"$lt":250000000}},
          {"area":{"$lt":3000000},"population":{"$gt":250000000}}
        ]},    
        {"name":1,"population":1,"area":1,"_id":0}
    )
))

pp.pprint(list(db.world.find({"$or":[{"area":{"$gt":3000000},"population":{"$lt":250000000}},{"area":{"$lt":3000000},"population":{"$gt":250000000}}]}, {"name":1,"population":1,"area":1,"_id":0})))

Show the name and population in millions and the GDP in billions for the countries of the continent 'South America'. Use the ROUND function to show the values to two decimal places.
Introducing MapReduce

Divide by 1000000 (6 zeros) for millions. Divide by 1000000000 (9 zeros) for billions.

For South America show population in millions and GDP in billions to 2 decimal places.

from bson.code import Code
temp = db.world.map_reduce(
        query={"continent":"South America"},
        map=Code("function(){emit(this.name,{'gdp':this.gdp,'pop':this.population});}"), 
        reduce=Code("""function(key, values){ return values} """),
        finalize=Code("""function(k, v){
               return {
                   'pop in millions':Math.round(v['pop']*1000000)/1000000,
                   'gdp in billions':Math.round(v['gdp']*1000000000)/1000000000
               };
           }
        """),
        out={"inline":1}
)
pp.pprint(temp['results']);

from bson.code import Code temp = db.world.map_reduce(query={"continent":"South America"},map=Code("function(){emit(this.name, {'gdp':this.gdp,'pop':this.population});}"),reduce=Code("""function(key, values){ return values} """),finalize=Code("""function(k, v){return {'pop in millions':Math.round(v['pop']*1000000)/1000000,'gdp in billions':Math.round(v['gdp']*1000000000)/1000000000};}"""),out={"inline":1}) pp.pprint(temp['results']);

Show the per-capita GDP for those countries with a GDP of at least one trillion (1000000000000; that is 12 zeros). Round this value to the nearest 1000.

Show per-capita GDP for the trillion dollar countries to the nearest $1000.

from bson.code import Code
temp = db.world.map_reduce(
        map=Code("function(){if(this.gdp > 1000000000000)emit(this.name,{'gdp':this.gdp,'pop':this.population});}"), 
        reduce=Code("""function(key, values){ return values} """),
        finalize=Code("""function(k, v){
               return {'per-capita GDP':Math.round((v['gdp']/v['pop'])/1000)*1000};
           }
        """),
        out={"inline":1}
)
pp.pprint(temp['results']);

from bson.code import Code temp = db.world.map_reduce(map=Code("function(){if(this.gdp > 1000000000000)emit(this.name,{'gdp':this.gdp,'pop':this.population});}"),reduce=Code("""function(key, values){ return values} """),finalize=Code("""function(k, v){ return {'per-capita GDP':Math.round((v['gdp']/v['pop'])/1000)*1000};}"""),out={"inline":1}) pp.pprint(temp['results']);