Difference between revisions of "MAPREDUCE basics"
Line 20: | Line 20: | ||
<div class=q data-lang="py3"> | <div class=q data-lang="py3"> | ||
− | <p class=strong> | + | <p class=strong>Find the total population of the each continent</p> |
<pre class=def> | <pre class=def> | ||
from bson.code import Code | from bson.code import Code | ||
temp = db.world.map_reduce( | temp = db.world.map_reduce( | ||
− | + | map=Code("function(){emit(this.continent, this.population)}"), | |
− | + | reduce=Code("""function(key, values){ | |
− | + | return Array.sum(values); | |
− | + | }"""), | |
− | ) | + | out={"inline":1}) |
− | + | pp.pprint(temp["results"]) | |
− | pp.pprint( | ||
− | |||
− | ) | ||
</pre> | </pre> | ||
<div class=ans> | <div class=ans> | ||
from bson.code import Code | from bson.code import Code | ||
− | temp = db.world.map_reduce( | + | temp = db.world.map_reduce( |
+ | map=Code("function(){emit(this.continent, this.population)}"), | ||
+ | reduce=Code("""function(key, values){ | ||
+ | return Array.sum(values); | ||
+ | }"""), | ||
+ | out={"inline":1}) | ||
pp.pprint(temp["results"]) | pp.pprint(temp["results"]) | ||
</div> | </div> | ||
Line 44: | Line 46: | ||
<p class=strong>Show the number of countries on each continent</p> | <p class=strong>Show the number of countries on each continent</p> | ||
<pre class=def> | <pre class=def> | ||
+ | from bson.code import Code | ||
+ | temp = db.world.map_reduce( | ||
+ | map=Code("function(){emit(this.continent, 1)}"), | ||
+ | reduce=Code("""function(key, values){ | ||
+ | return Array.sum(values); | ||
+ | }"""), | ||
+ | out={"inline":1}) | ||
+ | pp.pprint(temp["results"]) | ||
</pre> | </pre> | ||
<div class=ans> | <div class=ans> | ||
Line 75: | Line 85: | ||
temp["results"] | temp["results"] | ||
) | ) | ||
+ | </div> | ||
+ | </div> | ||
+ | |||
+ | <div class=q data-lang="py3"> | ||
+ | <p class=strong>Return the first and last country based on name order for each continent</p> | ||
+ | <pre class=def> | ||
+ | </pre> | ||
+ | <div class=ans> | ||
+ | from bson.code import Code | ||
+ | temp = db.world.map_reduce( | ||
+ | map=Code("function(){emit(this.continent, {min:this.name,max:this.name})}"), | ||
+ | reduce=Code("""function(key, values){ | ||
+ | var ret = {min:'ZZZ',max:'AAA'}; | ||
+ | for(var i=0;i<values.length;i++){ | ||
+ | if (ret.min>values[i].min) ret.min=values[i].min; | ||
+ | if (ret.max<values[i].max) ret.max=values[i].max; | ||
+ | } | ||
+ | return ret; | ||
+ | }"""), | ||
+ | out={"inline":1}) | ||
+ | pp.pprint(temp["results"]) | ||
</div> | </div> | ||
</div> | </div> |
Revision as of 10:40, 30 July 2015
#ENCODING import io import sys sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-16') #MONGO from pymongo import MongoClient client = MongoClient() client.progzoo.authenticate('scott','tiger') db = client['progzoo'] #PRETTY import pprint pp = pprint.PrettyPrinter(indent=4)
MapReduce the basics
This tutorial introduces the MapReduce
command.
MapReduce examples are available.
Find the total population of the each continent
from bson.code import Code temp = db.world.map_reduce( map=Code("function(){emit(this.continent, this.population)}"), reduce=Code("""function(key, values){ return Array.sum(values); }"""), out={"inline":1}) pp.pprint(temp["results"])
from bson.code import Code temp = db.world.map_reduce(
map=Code("function(){emit(this.continent, this.population)}"), reduce=Code("""function(key, values){ return Array.sum(values); }"""), out={"inline":1})
pp.pprint(temp["results"])
Show the number of countries on each continent
from bson.code import Code temp = db.world.map_reduce( map=Code("function(){emit(this.continent, 1)}"), reduce=Code("""function(key, values){ return Array.sum(values); }"""), out={"inline":1}) pp.pprint(temp["results"])
from bson.code import Code temp = db.world.map_reduce(
map=Code("function(){emit(this.continent, 1)}"), reduce=Code("""function(key, values){ return Array.sum(values); }"""), out={"inline":1})
pp.pprint(temp["results"])
Show the smallest 3 countries (ignore areas of 0 or None)
from bson.code import Code temp = db.world.map_reduce(
query={"$and":[{"area":{"$ne":None}}, {"area":{"$ne":0}}]}, sort={"area":1}, limit=3, map=Code("function(){emit(this.name, this.area)}"), reduce=Code("function(key, values){}"), out={"inline":1},
)
pp.pprint(
temp["results"]
)
Return the first and last country based on name order for each continent
from bson.code import Code temp = db.world.map_reduce(
map=Code("function(){emit(this.continent, {min:this.name,max:this.name})}"), reduce=Code("""function(key, values){ var ret = {min:'ZZZ',max:'AAA'}; for(var i=0;i<values.length;i++){ if (ret.min>values[i].min) ret.min=values[i].min; if (ret.max<values[i].max) ret.max=values[i].max; } return ret; }"""), out={"inline":1})
pp.pprint(temp["results"])
Return the first and last country based on name order for each continent
from bson.code import Code temp = db.world.map_reduce(
map=Code("function(){emit(this.continent, {min:this.name,max:this.name})}"), reduce=Code("""function(key, values){ var ret = {min:'ZZZ',max:'AAA'}; for(var i=0;i<values.length;i++){ if (ret.min>values[i].min) ret.min=values[i].min; if (ret.max<values[i].max) ret.max=values[i].max; } return ret; }"""), out={"inline":1})
pp.pprint(temp["results"])