MAPREDUCE basics
#ENCODING import io import sys sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-16') #MONGO from pymongo import MongoClient client = MongoClient() client.progzoo.authenticate('scott','tiger') db = client['progzoo'] #PRETTY import pprint pp = pprint.PrettyPrinter(indent=4, width=160)
MapReduce the basics
This tutorial introduces the MapReduce
command.
MapReduce examples are available.
Find the total population of the each continent
from bson.code import Code temp = db.world.map_reduce( map=Code("function(){emit(this.continent, this.population)}"), reduce=Code("""function(key, values){ return Array.sum(values); }"""), out={"inline":1}) pp.pprint(temp["results"])
from bson.code import Code temp = db.world.map_reduce(
map=Code("function(){emit(this.continent, this.population)}"), reduce=Code("""function(key, values){ return Array.sum(values); }"""), out={"inline":1})
pp.pprint(temp["results"])
Use the previous answer to find the population of the world to the nearest million
from bson.code import Code temp = db.world.map_reduce(
map=Code("function(){emit('World Population in Millions', this.population)}"), reduce=Code("""function(key, values){ return Math.round(Array.sum(values)/1000000)*1000000; }"""), out={"inline":1})
pp.pprint(temp["results"])
Count number of countries by first letter
from bson.code import Code temp = db.world.map_reduce(
map=Code("""function(){ emit((this.name).substring(0,1), 1)}"""), reduce=Code("""function(key, values){ return Array.sum(values); }"""), out={"inline":1})
pp.pprint(temp["results"])
Show the number of countries on each continent
from bson.code import Code temp = db.world.map_reduce(
map=Code("function(){emit(this.continent, 1)}"), reduce=Code("""function(key, values){ return Array.sum(values); }"""), out={"inline":1})
pp.pprint(temp["results"])
Show the smallest 3 countries name and area (ignore areas of 0 or None)
from bson.code import Code temp = db.world.map_reduce(
query={"$and":[{"area":{"$ne":None}}, {"area":{"$ne":0}}]}, sort={"area":1}, limit=3, map=Code("function(){emit(this.name, this.area)}"), reduce=Code("function(key, values){}"), out={"inline":1},
)
pp.pprint(
temp["results"]
)
Return the first and last country based on name order for each continent
from bson.code import Code temp = db.world.map_reduce(
map=Code("function(){emit(this.continent, {first:this.name,last:this.name})}"), reduce=Code("""function(key, values){ var ret = {first:'ZZZ',last:'AAA'}; for(var i=0;i<values.length;i++){ if (ret.first>values[i].first) ret.first=values[i].first; if (ret.last<values[i].last) ret.last=values[i].last; } return ret; }"""), out={"inline":1})
pp.pprint(temp["results"])
Without using query
return the name and capital for any country or capital that starts with an 'L'
from bson.code import Code temp = db.world.map_reduce(
map=Code("""function(){ if((this.name).startsWith('L')|(this.capital).startsWith('L'))emit(this.name, this.capital)}"""), reduce=Code("""function(key, values){ return values; }"""), out={"inline":1})
pp.pprint(temp["results"])
Count number of countries by first letter
from bson.code import Code temp = db.world.map_reduce(
map=Code("""function(){ emit((this.name).substring(0,1), 1)}"""), reduce=Code("""function(key, values){ return Array.sum(values); }"""), out={"inline":1})
pp.pprint(temp["results"])
Show the first and last city for each letter and the count of cities
from bson.code import Code temp = db.world.map_reduce(
map=Code("""function(){ if(this.capital)emit((this.capital).substring(0,1), {first:this.capital, last:this.capital})}"""), reduce=Code("""function(key, values){ var ret = {first:'ZZZ',last:'AAA', count:0}; for(var i=0;i<values.length;i++){ if (ret.first>values[i].first) ret.first=values[i].first; if (ret.last<values[i].last) ret.last=values[i].last; ret.count += 1; }
return ret; }"""), finalize=Code("""function(key, val){ if(!val.count){ val.count = 1; return val; }else return val; }"""), out={"inline":1})
pp.pprint(temp["results"])