Difference between revisions of "MAPREDUCE basics"
(9 intermediate revisions by 2 users not shown) | |||
Line 11: | Line 11: | ||
#PRETTY | #PRETTY | ||
import pprint | import pprint | ||
− | pp = pprint.PrettyPrinter(indent=4) | + | pp = pprint.PrettyPrinter(indent=4, width=160) |
</pre> | </pre> | ||
+ | |||
+ | ==MapReduce the basics== | ||
+ | <p>This tutorial introduces the <code>MapReduce</code> command.</p> | ||
+ | [[MapReduce]] examples are available. | ||
+ | <div class='extra_space' style='width:1em; height:6em;'></div> | ||
+ | |||
+ | |||
+ | <div class=q data-lang="py3"> | ||
+ | <p class=strong>Find the total population of the each continent</p> | ||
+ | <pre class=def> | ||
+ | from bson.code import Code | ||
+ | temp = db.world.map_reduce( | ||
+ | map=Code("function(){emit(this.continent, this.population)}"), | ||
+ | reduce=Code("""function(key, values){ | ||
+ | return Array.sum(values); | ||
+ | }"""), | ||
+ | out={"inline":1}) | ||
+ | pp.pprint(temp["results"]) | ||
+ | </pre> | ||
+ | <div class=ans> | ||
+ | from bson.code import Code | ||
+ | temp = db.world.map_reduce( | ||
+ | map=Code("function(){emit(this.continent, this.population)}"), | ||
+ | reduce=Code("""function(key, values){ | ||
+ | return Array.sum(values); | ||
+ | }"""), | ||
+ | out={"inline":1}) | ||
+ | pp.pprint(temp["results"]) | ||
+ | </div> | ||
+ | </div> | ||
<div class=q data-lang="py3"> | <div class=q data-lang="py3"> | ||
− | <p class=strong> | + | <p class=strong>Use the previous answer to find the population of the world to the nearest million</p> |
+ | <div class=hint title="How to round to the nearest million">Use the JavaScript round function : Math.round(population/1000000)*1000000 </div> | ||
<pre class=def> | <pre class=def> | ||
+ | </pre> | ||
+ | <div class=ans> | ||
from bson.code import Code | from bson.code import Code | ||
temp = db.world.map_reduce( | temp = db.world.map_reduce( | ||
− | query={" | + | map=Code("function(){emit('World Population in Millions', this.population)}"), |
− | map=Code("function(){emit(this. | + | reduce=Code("""function(key, values){ |
− | reduce=Code("function(key, values){ | + | return Math.round(Array.sum(values)/1000000)*1000000; |
− | out={"inline":1} | + | }"""), |
+ | out={"inline":1}) | ||
+ | pp.pprint(temp["results"]) | ||
+ | </div> | ||
+ | </div> | ||
+ | |||
+ | <div class=q data-lang="py3"> | ||
+ | <p class=strong>Count number of countries by first letter</p> | ||
+ | <pre class=def> | ||
+ | </pre> | ||
+ | <div class=ans> | ||
+ | from bson.code import Code | ||
+ | temp = db.world.map_reduce( | ||
+ | map=Code("""function(){ emit((this.name).substring(0,1), 1)}"""), | ||
+ | reduce=Code("""function(key, values){ | ||
+ | return Array.sum(values); | ||
+ | }"""), | ||
+ | out={"inline":1}) | ||
+ | pp.pprint(temp["results"]) | ||
+ | </div> | ||
+ | </div> | ||
+ | |||
+ | <div class=q data-lang="py3"> | ||
+ | <p class=strong>Show the number of countries on each continent</p> | ||
+ | <pre class=def> | ||
+ | </pre> | ||
+ | <div class=ans> | ||
+ | from bson.code import Code | ||
+ | temp = db.world.map_reduce( | ||
+ | map=Code("function(){emit(this.continent, 1)}"), | ||
+ | reduce=Code("""function(key, values){ | ||
+ | return Array.sum(values); | ||
+ | }"""), | ||
+ | out={"inline":1}) | ||
+ | pp.pprint(temp["results"]) | ||
+ | </div> | ||
+ | </div> | ||
+ | |||
+ | <div class=q data-lang="py3"> | ||
+ | <p class=strong>Show the smallest 3 countries name and area (ignore areas of 0 or None)</p> | ||
+ | <pre class=def> | ||
+ | </pre> | ||
+ | <div class=ans> | ||
+ | from bson.code import Code | ||
+ | temp = db.world.map_reduce( | ||
+ | query={"$and":[{"area":{"$ne":None}}, {"area":{"$ne":0}}]}, | ||
+ | sort={"area":1}, | ||
+ | limit=3, | ||
+ | map=Code("function(){emit(this.name, this.area)}"), | ||
+ | reduce=Code("function(key, values){}"), | ||
+ | out={"inline":1}, | ||
) | ) | ||
Line 28: | Line 111: | ||
temp["results"] | temp["results"] | ||
) | ) | ||
+ | </div> | ||
+ | </div> | ||
+ | |||
+ | <div class=q data-lang="py3"> | ||
+ | <p class=strong>Return the first and last country based on name order for each continent</p> | ||
+ | <pre class=def> | ||
+ | </pre> | ||
+ | <div class=ans> | ||
+ | from bson.code import Code | ||
+ | temp = db.world.map_reduce( | ||
+ | map=Code("function(){emit(this.continent, {first:this.name,last:this.name})}"), | ||
+ | reduce=Code("""function(key, values){ | ||
+ | var ret = {first:'ZZZ',last:'AAA'}; | ||
+ | for(var i=0;i<values.length;i++){ | ||
+ | if (ret.first>values[i].first) ret.first=values[i].first; | ||
+ | if (ret.last<values[i].last) ret.last=values[i].last; | ||
+ | } | ||
+ | return ret; | ||
+ | }"""), | ||
+ | out={"inline":1}) | ||
+ | pp.pprint(temp["results"]) | ||
+ | </div> | ||
+ | </div> | ||
+ | |||
+ | <div class=q data-lang="py3"> | ||
+ | <p class=strong>Return country name or capital city that starts with a letter 'M'</p> | ||
+ | <pre class=def> | ||
</pre> | </pre> | ||
<div class=ans> | <div class=ans> | ||
from bson.code import Code | from bson.code import Code | ||
− | temp = db.world.map_reduce( | + | temp = db.world.map_reduce( |
+ | map=Code("""function(){ | ||
+ | if((this.name).startsWith('M')) | ||
+ | emit(this.name,null); | ||
+ | if((this.capital).startsWith('M')) | ||
+ | emit(this.capital,null); | ||
+ | }"""), | ||
+ | reduce=Code("""function(key, values){ | ||
+ | return values; | ||
+ | }"""), | ||
+ | |||
+ | out={"inline":1}) | ||
pp.pprint(temp["results"]) | pp.pprint(temp["results"]) | ||
</div> | </div> | ||
Line 37: | Line 158: | ||
<div class=q data-lang="py3"> | <div class=q data-lang="py3"> | ||
− | <p class=strong>Show the | + | <p class=strong>Show the first and last city for each letter and the count of cities</p> |
+ | <pre class=def> | ||
+ | </pre> | ||
+ | <div class=ans> | ||
+ | from bson.code import Code | ||
+ | temp = db.world.map_reduce( | ||
+ | map=Code("""function(){ if(this.capital)emit((this.capital).substring(0,1), {first:this.capital, last:this.capital})}"""), | ||
+ | reduce=Code("""function(key, values){ | ||
+ | var ret = {first:'ZZZ',last:'AAA', count:0}; | ||
+ | for(var i=0;i<values.length;i++){ | ||
+ | if (ret.first>values[i].first) ret.first=values[i].first; | ||
+ | if (ret.last<values[i].last) ret.last=values[i].last; | ||
+ | ret.count += 1; | ||
+ | } | ||
+ | |||
+ | return ret; | ||
+ | }"""), | ||
+ | finalize=Code("""function(key, val){ | ||
+ | if(!val.count){ | ||
+ | val.count = 1; | ||
+ | return val; | ||
+ | }else | ||
+ | return val; | ||
+ | }"""), | ||
+ | out={"inline":1}) | ||
+ | pp.pprint(temp["results"]) | ||
+ | </div> | ||
+ | </div> | ||
+ | |||
+ | <div class=q data-lang="py3"> | ||
+ | <p class=strong>Show country count for countries in the ranges</p> | ||
+ | 0 to 1000000 | ||
+ | 1000000 to 2000000 | ||
+ | 2000000 to 3000000 | ||
+ | 3000000 to 5000000 | ||
+ | 5000000 to 10000000 | ||
+ | 10000000 to 15000000 | ||
+ | More than 15000000 | ||
<pre class=def> | <pre class=def> | ||
</pre> | </pre> | ||
<div class=ans> | <div class=ans> | ||
from bson.code import Code | from bson.code import Code | ||
− | temp = db.world.map_reduce(map=Code("function(){emit( | + | temp = db.world.map_reduce( |
+ | map=Code("""function(){ | ||
+ | var pop = this.population; | ||
+ | switch(true){ | ||
+ | case pop<1000000: | ||
+ | emit("0 TO 1000000", 1); | ||
+ | break; | ||
+ | case pop<2000000: | ||
+ | emit("1000000 TO 2000000", 1); | ||
+ | break; | ||
+ | case pop<3000000: | ||
+ | emit("2000000 TO 3000000", 1); | ||
+ | break; | ||
+ | case pop<5000000: | ||
+ | emit("3000000 TO 5000000", 1); | ||
+ | break; | ||
+ | case pop<10000000: | ||
+ | emit("5000000 TO 10000000", 1); | ||
+ | break; | ||
+ | case pop<15000000: | ||
+ | emit("10000000 TO 15000000", 1); | ||
+ | break | ||
+ | case pop>15000000: | ||
+ | emit("MORE THAN 15000000", 1); | ||
+ | break; | ||
+ | } | ||
+ | }"""), | ||
+ | reduce=Code("""function(key, values){ return Array.sum(values); }"""), | ||
+ | out={"inline":1}) | ||
pp.pprint(temp["results"]) | pp.pprint(temp["results"]) | ||
</div> | </div> | ||
</div> | </div> |
Latest revision as of 11:18, 27 June 2016
#ENCODING import io import sys sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-16') #MONGO from pymongo import MongoClient client = MongoClient() client.progzoo.authenticate('scott','tiger') db = client['progzoo'] #PRETTY import pprint pp = pprint.PrettyPrinter(indent=4, width=160)
MapReduce the basics
This tutorial introduces the MapReduce
command.
MapReduce examples are available.
Find the total population of the each continent
from bson.code import Code temp = db.world.map_reduce( map=Code("function(){emit(this.continent, this.population)}"), reduce=Code("""function(key, values){ return Array.sum(values); }"""), out={"inline":1}) pp.pprint(temp["results"])
from bson.code import Code temp = db.world.map_reduce(
map=Code("function(){emit(this.continent, this.population)}"), reduce=Code("""function(key, values){ return Array.sum(values); }"""), out={"inline":1})
pp.pprint(temp["results"])
Use the previous answer to find the population of the world to the nearest million
from bson.code import Code temp = db.world.map_reduce(
map=Code("function(){emit('World Population in Millions', this.population)}"), reduce=Code("""function(key, values){ return Math.round(Array.sum(values)/1000000)*1000000; }"""), out={"inline":1})
pp.pprint(temp["results"])
Count number of countries by first letter
from bson.code import Code temp = db.world.map_reduce(
map=Code("""function(){ emit((this.name).substring(0,1), 1)}"""), reduce=Code("""function(key, values){ return Array.sum(values); }"""), out={"inline":1})
pp.pprint(temp["results"])
Show the number of countries on each continent
from bson.code import Code temp = db.world.map_reduce(
map=Code("function(){emit(this.continent, 1)}"), reduce=Code("""function(key, values){ return Array.sum(values); }"""), out={"inline":1})
pp.pprint(temp["results"])
Show the smallest 3 countries name and area (ignore areas of 0 or None)
from bson.code import Code temp = db.world.map_reduce(
query={"$and":[{"area":{"$ne":None}}, {"area":{"$ne":0}}]}, sort={"area":1}, limit=3, map=Code("function(){emit(this.name, this.area)}"), reduce=Code("function(key, values){}"), out={"inline":1},
)
pp.pprint(
temp["results"]
)
Return the first and last country based on name order for each continent
from bson.code import Code temp = db.world.map_reduce(
map=Code("function(){emit(this.continent, {first:this.name,last:this.name})}"), reduce=Code("""function(key, values){ var ret = {first:'ZZZ',last:'AAA'}; for(var i=0;i<values.length;i++){ if (ret.first>values[i].first) ret.first=values[i].first; if (ret.last<values[i].last) ret.last=values[i].last; } return ret; }"""), out={"inline":1})
pp.pprint(temp["results"])
Return country name or capital city that starts with a letter 'M'
from bson.code import Code temp = db.world.map_reduce(
map=Code("""function(){ if((this.name).startsWith('M')) emit(this.name,null); if((this.capital).startsWith('M')) emit(this.capital,null); }"""), reduce=Code("""function(key, values){ return values; }"""), out={"inline":1})
pp.pprint(temp["results"])
Show the first and last city for each letter and the count of cities
from bson.code import Code temp = db.world.map_reduce(
map=Code("""function(){ if(this.capital)emit((this.capital).substring(0,1), {first:this.capital, last:this.capital})}"""), reduce=Code("""function(key, values){ var ret = {first:'ZZZ',last:'AAA', count:0}; for(var i=0;i<values.length;i++){ if (ret.first>values[i].first) ret.first=values[i].first; if (ret.last<values[i].last) ret.last=values[i].last; ret.count += 1; }
return ret; }"""), finalize=Code("""function(key, val){ if(!val.count){ val.count = 1; return val; }else return val; }"""), out={"inline":1})
pp.pprint(temp["results"])
Show country count for countries in the ranges
0 to 1000000 1000000 to 2000000 2000000 to 3000000 3000000 to 5000000 5000000 to 10000000 10000000 to 15000000 More than 15000000
from bson.code import Code temp = db.world.map_reduce(
map=Code("""function(){ var pop = this.population; switch(true){ case pop<1000000: emit("0 TO 1000000", 1); break; case pop<2000000: emit("1000000 TO 2000000", 1); break; case pop<3000000: emit("2000000 TO 3000000", 1); break; case pop<5000000: emit("3000000 TO 5000000", 1); break; case pop<10000000: emit("5000000 TO 10000000", 1); break; case pop<15000000: emit("10000000 TO 15000000", 1); break case pop>15000000: emit("MORE THAN 15000000", 1); break; } }"""), reduce=Code("""function(key, values){ return Array.sum(values); }"""), out={"inline":1})
pp.pprint(temp["results"])