MAPREDUCE basics
From NoSQLZoo
#ENCODING
import io
import sys
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-16')
#MONGO
from pymongo import MongoClient
client = MongoClient()
client.progzoo.authenticate('scott','tiger')
db = client['progzoo']
#PRETTY
import pprint
pp = pprint.PrettyPrinter(indent=4)
MapReduce the basics
This tutorial introduces the MapReduce command.
MapReduce examples are available.
Show a list of countries found in North America
from bson.code import Code
temp = db.world.map_reduce(
query={"continent":"Africa"},
map=Code("function(){emit(this.continent, this.name)}"),
reduce=Code("function(key, values){return values.toString()}"),
out={"inline":1}
)
pp.pprint(
temp["results"]
)
from bson.code import Code temp = db.world.map_reduce(query={"continent":"North America"},map=Code("function(){emit(this.continent, this.name)}"), reduce=Code("function(key, values){return values.toString()}"),out={"inline":1},) pp.pprint(temp["results"])
Show the number of countries on each continent
from bson.code import Code temp = db.world.map_reduce(
map=Code("function(){emit(this.continent, 1)}"),
reduce=Code("""function(key, values){
return Array.sum(values);
}"""),
out={"inline":1})
pp.pprint(temp["results"])
Show the smallest 3 countries (ignore areas of 0 or None)
from bson.code import Code temp = db.world.map_reduce(
query={"$and":[{"area":{"$ne":None}}, {"area":{"$ne":0}}]},
sort={"area":1},
limit=3,
map=Code("function(){emit(this.name, this.area)}"),
reduce=Code("function(key, values){}"),
out={"inline":1},
)
pp.pprint(
temp["results"]
)
Return the first and last country based on name order for each continent
from bson.code import Code temp = db.world.map_reduce(
map=Code("function(){emit(this.continent, {min:this.name,max:this.name})}"),
reduce=Code("""function(key, values){
var ret = {min:'ZZZ',max:'AAA'};
for(var i=0;i<values.length;i++){
if (ret.min>values[i].min) ret.min=values[i].min;
if (ret.max<values[i].max) ret.max=values[i].max;
}
return ret;
}"""),
out={"inline":1})
pp.pprint(temp["results"])