Difference between revisions of "MAPREDUCE Tutorial"
(Created page with "==MapReduce the basics== <p>This tutorial introduces the <code>MapReduce</code> command.</p> MapReduce examples are available. <div>The MapReduce function allows to func...") |
(Convert answers to mongo shell and tidy.) |
||
Line 1: | Line 1: | ||
==MapReduce the basics== | ==MapReduce the basics== | ||
− | + | <p>This tutorial introduces the <code>MapReduce</code> command.</p> | |
[[MapReduce]] examples are available. | [[MapReduce]] examples are available. | ||
<div>The MapReduce function allows to functions to be applied over a collection. The two functions map and reduce are important because: | <div>The MapReduce function allows to functions to be applied over a collection. The two functions map and reduce are important because: | ||
− | *Together they can solve a huge range of common database queries | + | *Together they can solve a huge range of common database queries. |
− | *They can be distributed effectively; you can share the work load among a number of processors and get your answer faster | + | *They can be distributed effectively; you can share the work load among a number of processors and get your answer faster. |
− | |||
+ | It is recommended to have some familiarity with JavaScript before attempting the following questions: [https://developer.mozilla.org/bm/docs/Web/JavaScript | MDN] | ||
+ | <div class='extra_space' style='width:1em; height:6em;'></div></div> | ||
==Map== | ==Map== | ||
The map function can be used to filter and transform the data: | The map function can be used to filter and transform the data: | ||
− | <div class=q data-lang="mongo"> | + | <div class="q" data-lang="mongo"> |
− | <p class=strong>Use a map function to get the countries | + | <p class="strong">Use a map function to get the names of countries in Europe.</p> |
− | <pre class=def> | + | <pre class="def"> |
db.world.mapReduce( | db.world.mapReduce( | ||
− | function(){if (this.continent=="Europe") emit(this.name,null);}, | + | function(){ |
+ | if (this.continent === "Europe"){ | ||
+ | emit(this.name, null); | ||
+ | } | ||
+ | }, | ||
+ | function(k, v){ | ||
+ | return v; | ||
+ | }, | ||
{out:{inline:1}} | {out:{inline:1}} | ||
) | ) | ||
</pre> | </pre> | ||
− | < | + | <pre class="ans"> |
db.world.mapReduce( | db.world.mapReduce( | ||
− | function(){if (this.continent=="Europe") emit(this.name,null);}, | + | function(){if (this.continent=="Europe"){emit(this.name,null);}}, |
+ | function(k, v){ return v; }, | ||
{out:{inline:1}} | {out:{inline:1}} | ||
) | ) | ||
− | </ | + | </pre> |
</div> | </div> | ||
− | <div class=q data-lang=" | + | <div class="q" data-lang="mongo"> |
− | <p class=strong>Use the previous answer to find the population of the world to the nearest million</p> | + | <p class="strong">Use the previous answer to find the population of the world to the nearest million.</p> |
− | <div class=hint title="How to round to the nearest million">Use the JavaScript round function : Math.round(population/1000000)*1000000 </div> | + | <div class="hint" title="How to round to the nearest million">Use the JavaScript round function : Math.round(population/1000000)*1000000 </div> |
− | <pre class=def> | + | <pre class="def"> |
+ | db.world.mapReduce( | ||
+ | function(){ | ||
+ | emit(null, null); | ||
+ | }, | ||
+ | function(k, v){ | ||
+ | return null; | ||
+ | }, | ||
+ | {out:{inline:1}} | ||
+ | ) | ||
</pre> | </pre> | ||
− | < | + | <pre class="ans"> |
− | + | db.world.mapReduce( | |
− | + | function(){emit('World Population in Millions', this.population)}, | |
− | + | function(key, values){ | |
− | |||
return Math.round(Array.sum(values)/1000000)*1000000; | return Math.round(Array.sum(values)/1000000)*1000000; | ||
− | } | + | }, |
− | out | + | {out: {inline :1}} |
− | + | ) | |
− | </ | + | </pre> |
</div> | </div> | ||
− | <div class=q data-lang=" | + | <div class="q" data-lang="mongo"> |
− | <p class=strong> | + | <p class="strong">For each letter, determine how many country names begin with that letter.</p> |
− | <pre class=def> | + | <pre class="def"> |
+ | db.world.mapReduce( | ||
+ | function(){ | ||
+ | emit(null, null); | ||
+ | }, | ||
+ | function(k, v){ | ||
+ | return null; | ||
+ | }, | ||
+ | {out:{inline:1}} | ||
+ | ) | ||
</pre> | </pre> | ||
− | < | + | <pre class="ans"> |
− | + | db.world.mapReduce( | |
− | + | function(){ emit((this.name).substring(0,1), 1)}, | |
− | + | function(key, values){ | |
− | |||
return Array.sum(values); | return Array.sum(values); | ||
− | } | + | }, |
− | out | + | {out: {inline:1}} |
− | + | ) | |
− | </ | + | </pre> |
</div> | </div> | ||
− | <div class=q data-lang=" | + | <div class="q" data-lang="mongo"> |
− | <p class=strong>Show the number of countries | + | <p class="strong">Show the number of countries in each continent.</p> |
− | <pre class=def> | + | <pre class="def"> |
+ | db.world.mapReduce( | ||
+ | function(){ | ||
+ | emit(null, null); | ||
+ | }, | ||
+ | function(k, v){ | ||
+ | return null; | ||
+ | }, | ||
+ | {out:{inline:1}} | ||
+ | ) | ||
</pre> | </pre> | ||
− | < | + | <pre class="ans"> |
− | + | db.world.mapReduce( | |
− | + | function(){emit(this.continent, 1)}, | |
− | + | function(key, values){ | |
− | |||
return Array.sum(values); | return Array.sum(values); | ||
− | } | + | }, |
− | out | + | {out: {inline :1}} |
− | + | ) | |
− | </ | + | </pre> |
</div> | </div> | ||
− | <div class=q data-lang=" | + | <div class="q" data-lang="mongo"> |
− | <p class=strong>Show the smallest 3 countries | + | <p class="strong">Show the name and area of the smallest 3 countries by area. Ignore records where the area is 0 or null.</p> |
− | <pre class=def> | + | <pre class="def"> |
+ | db.world.mapReduce( | ||
+ | function(){ | ||
+ | emit(null, null); | ||
+ | }, | ||
+ | function(k, v){ | ||
+ | return null; | ||
+ | }, | ||
+ | {out:{inline:1}} | ||
+ | ) | ||
</pre> | </pre> | ||
− | < | + | <pre class="ans"> |
− | + | db.world.mapReduce( | |
− | + | function(){emit(this.name, this.area);}, | |
− | query | + | function(k, v){ return v;}, |
− | + | { | |
− | + | limit: 3, | |
− | + | out: {inline:1}, | |
− | + | query: {"$and": [{"area": {"$ne": null}}, {"area": {"$ne": 0}}]}, | |
− | + | sort: {"area": 1} | |
+ | } | ||
) | ) | ||
+ | </pre> | ||
+ | </div> | ||
− | + | <div class="q" data-lang="mongo"> | |
− | + | <p class="strong">For each continent, find the first and last country alphabetically.</p> | |
+ | <pre class="def"> | ||
+ | db.world.mapReduce( | ||
+ | function(){ | ||
+ | emit(null, null); | ||
+ | }, | ||
+ | function(k, v){ | ||
+ | return null; | ||
+ | }, | ||
+ | {out:{inline:1}} | ||
) | ) | ||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
</pre> | </pre> | ||
− | < | + | <pre class="ans"> |
− | + | db.world.mapReduce( | |
− | + | function(){emit(this.continent, {first:this.name, last:this.name});}, | |
− | + | function(key, values){ | |
− | + | let ret = {first:'ZZZ', last:'AAA'}; | |
− | + | for(let i=0; i < values.length; i++){ | |
− | for( | + | if (ret.first > values[i].first) { ret.first = values[i].first; } |
− | if (ret.first>values[i].first) ret.first=values[i].first; | + | if (ret.last < values[i].last) { ret.last = values[i].last; } |
− | if (ret.last<values[i].last) ret.last=values[i].last; | ||
} | } | ||
return ret; | return ret; | ||
− | } | + | }, |
− | out | + | {out: {inline:1}} |
− | + | ) | |
+ | </pre> | ||
</div> | </div> | ||
− | + | <div class="q" data-lang="mongo"> | |
− | + | <p class="strong">Return country names or capital cities that start with a letter 'M' as keys, use null as the value.</p> | |
− | <div class=q data-lang=" | + | <pre class="def"> |
− | <p class=strong>Return country | + | db.world.mapReduce( |
− | <pre class=def> | + | function(){ |
+ | emit(null, null); | ||
+ | }, | ||
+ | function(k, v){ | ||
+ | return null; | ||
+ | }, | ||
+ | {out:{inline:1}} | ||
+ | ) | ||
</pre> | </pre> | ||
− | < | + | <pre class="ans"> |
− | + | db.world.mapReduce( | |
− | + | function(){ | |
− | + | if((this.name).startsWith('M')) {emit(this.name,null); } | |
− | + | if((this.capital).startsWith('M')) {emit(this.capital,null); } | |
− | + | }, | |
− | + | function(key, values){ | |
− | |||
− | |||
− | |||
return values; | return values; | ||
− | } | + | }, |
− | + | {out: {inline :1}} | |
− | + | ) | |
− | + | </pre> | |
− | </ | ||
</div> | </div> | ||
− | <div class=q data-lang=" | + | <div class="q" data-lang="mongo"> |
− | <p class=strong> | + | <p class="strong">For each letter in the alphabet, find the amount of cities and the cities that come first and last alphabetically. e.g.</p> |
− | <pre | + | <pre> |
+ | { | ||
+ | "_id" : "B", | ||
+ | "value" : { | ||
+ | "first" : "Baghdad", | ||
+ | "last" : "Bujumbura", | ||
+ | "count" : 25 | ||
+ | } | ||
+ | }, | ||
</pre> | </pre> | ||
− | < | + | <pre class="def"> |
− | + | db.world.mapReduce( | |
− | + | function(){ | |
− | + | emit(null, null); | |
− | + | }, | |
− | + | function(k, v){ | |
− | for( | + | return null; |
− | if (ret.first>values[i].first) ret.first=values[i].first; | + | }, |
− | if (ret.last<values[i].last) ret.last=values[i].last; | + | {out:{inline:1}} |
+ | ) | ||
+ | </pre> | ||
+ | <pre class="ans"> | ||
+ | db.world.mapReduce( | ||
+ | function(){ if(this.capital)emit((this.capital).substring(0,1), {first:this.capital, last:this.capital})}, | ||
+ | function(key, values){ | ||
+ | let ret = {first:'ZZZ',last:'AAA', count:0}; | ||
+ | for(let i=0; i < values.length; i++){ | ||
+ | if (ret.first>values[i].first) { ret.first = values[i].first }; | ||
+ | if (ret.last<values[i].last) { ret.last = values[i].last }; | ||
ret.count += 1; | ret.count += 1; | ||
} | } | ||
− | |||
return ret; | return ret; | ||
− | } | + | }, |
− | finalize | + | { |
− | + | finalize: function(key, val){ | |
− | + | if(!val.count){ | |
− | + | val.count = 1; | |
− | + | } | |
− | + | return val; | |
− | + | }, | |
− | + | out: {inline: 1} | |
− | + | } | |
− | </ | + | ) |
+ | </pre> | ||
</div> | </div> | ||
− | <div class=q data-lang=" | + | <div class="q" data-lang="mongo"> |
− | <p class=strong>Show country count for countries in the ranges</p> | + | <p class="strong">Show country count for countries in the following ranges:</p> |
0 to 1000000 | 0 to 1000000 | ||
1000000 to 2000000 | 1000000 to 2000000 | ||
Line 177: | Line 246: | ||
10000000 to 15000000 | 10000000 to 15000000 | ||
More than 15000000 | More than 15000000 | ||
− | <pre class=def> | + | <pre class="def"> |
+ | db.world.mapReduce( | ||
+ | function(){ | ||
+ | emit(null, null); | ||
+ | }, | ||
+ | function(k, v){ | ||
+ | return null; | ||
+ | }, | ||
+ | {out:{inline:1}} | ||
+ | ) | ||
+ | </pre> | ||
+ | <pre class="ans"> | ||
+ | db.world.mapReduce( | ||
+ | function(){ | ||
+ | var pop = this.population; | ||
+ | switch(true){ | ||
+ | case pop<1000000: | ||
+ | emit("0 TO 1000000", 1); | ||
+ | break; | ||
+ | case pop<2000000: | ||
+ | emit("1000000 TO 2000000", 1); | ||
+ | break; | ||
+ | case pop<3000000: | ||
+ | emit("2000000 TO 3000000", 1); | ||
+ | break; | ||
+ | case pop<5000000: | ||
+ | emit("3000000 TO 5000000", 1); | ||
+ | break; | ||
+ | case pop<10000000: | ||
+ | emit("5000000 TO 10000000", 1); | ||
+ | break; | ||
+ | case pop<15000000: | ||
+ | emit("10000000 TO 15000000", 1); | ||
+ | break | ||
+ | case pop>15000000: | ||
+ | emit("MORE THAN 15000000", 1); | ||
+ | break; | ||
+ | } | ||
+ | }, | ||
+ | function(key, values){ return Array.sum(values); }, | ||
+ | {out: {inline: 1}} | ||
+ | ) | ||
</pre> | </pre> | ||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
</div> | </div> |
Revision as of 02:25, 6 April 2018
MapReduce the basics
This tutorial introduces the MapReduce
command.
MapReduce examples are available.
- Together they can solve a huge range of common database queries.
- They can be distributed effectively; you can share the work load among a number of processors and get your answer faster.
It is recommended to have some familiarity with JavaScript before attempting the following questions: | MDN
Map
The map function can be used to filter and transform the data:
Use a map function to get the names of countries in Europe.
db.world.mapReduce( function(){ if (this.continent === "Europe"){ emit(this.name, null); } }, function(k, v){ return v; }, {out:{inline:1}} )
db.world.mapReduce( function(){if (this.continent=="Europe"){emit(this.name,null);}}, function(k, v){ return v; }, {out:{inline:1}} )
Use the previous answer to find the population of the world to the nearest million.
db.world.mapReduce( function(){ emit(null, null); }, function(k, v){ return null; }, {out:{inline:1}} )
db.world.mapReduce( function(){emit('World Population in Millions', this.population)}, function(key, values){ return Math.round(Array.sum(values)/1000000)*1000000; }, {out: {inline :1}} )
For each letter, determine how many country names begin with that letter.
db.world.mapReduce( function(){ emit(null, null); }, function(k, v){ return null; }, {out:{inline:1}} )
db.world.mapReduce( function(){ emit((this.name).substring(0,1), 1)}, function(key, values){ return Array.sum(values); }, {out: {inline:1}} )
Show the number of countries in each continent.
db.world.mapReduce( function(){ emit(null, null); }, function(k, v){ return null; }, {out:{inline:1}} )
db.world.mapReduce( function(){emit(this.continent, 1)}, function(key, values){ return Array.sum(values); }, {out: {inline :1}} )
Show the name and area of the smallest 3 countries by area. Ignore records where the area is 0 or null.
db.world.mapReduce( function(){ emit(null, null); }, function(k, v){ return null; }, {out:{inline:1}} )
db.world.mapReduce( function(){emit(this.name, this.area);}, function(k, v){ return v;}, { limit: 3, out: {inline:1}, query: {"$and": [{"area": {"$ne": null}}, {"area": {"$ne": 0}}]}, sort: {"area": 1} } )
For each continent, find the first and last country alphabetically.
db.world.mapReduce( function(){ emit(null, null); }, function(k, v){ return null; }, {out:{inline:1}} )
db.world.mapReduce( function(){emit(this.continent, {first:this.name, last:this.name});}, function(key, values){ let ret = {first:'ZZZ', last:'AAA'}; for(let i=0; i < values.length; i++){ if (ret.first > values[i].first) { ret.first = values[i].first; } if (ret.last < values[i].last) { ret.last = values[i].last; } } return ret; }, {out: {inline:1}} )
Return country names or capital cities that start with a letter 'M' as keys, use null as the value.
db.world.mapReduce( function(){ emit(null, null); }, function(k, v){ return null; }, {out:{inline:1}} )
db.world.mapReduce( function(){ if((this.name).startsWith('M')) {emit(this.name,null); } if((this.capital).startsWith('M')) {emit(this.capital,null); } }, function(key, values){ return values; }, {out: {inline :1}} )
For each letter in the alphabet, find the amount of cities and the cities that come first and last alphabetically. e.g.
{ "_id" : "B", "value" : { "first" : "Baghdad", "last" : "Bujumbura", "count" : 25 } },
db.world.mapReduce( function(){ emit(null, null); }, function(k, v){ return null; }, {out:{inline:1}} )
db.world.mapReduce( function(){ if(this.capital)emit((this.capital).substring(0,1), {first:this.capital, last:this.capital})}, function(key, values){ let ret = {first:'ZZZ',last:'AAA', count:0}; for(let i=0; i < values.length; i++){ if (ret.first>values[i].first) { ret.first = values[i].first }; if (ret.last<values[i].last) { ret.last = values[i].last }; ret.count += 1; } return ret; }, { finalize: function(key, val){ if(!val.count){ val.count = 1; } return val; }, out: {inline: 1} } )
Show country count for countries in the following ranges:
0 to 1000000 1000000 to 2000000 2000000 to 3000000 3000000 to 5000000 5000000 to 10000000 10000000 to 15000000 More than 15000000
db.world.mapReduce( function(){ emit(null, null); }, function(k, v){ return null; }, {out:{inline:1}} )
db.world.mapReduce( function(){ var pop = this.population; switch(true){ case pop<1000000: emit("0 TO 1000000", 1); break; case pop<2000000: emit("1000000 TO 2000000", 1); break; case pop<3000000: emit("2000000 TO 3000000", 1); break; case pop<5000000: emit("3000000 TO 5000000", 1); break; case pop<10000000: emit("5000000 TO 10000000", 1); break; case pop<15000000: emit("10000000 TO 15000000", 1); break case pop>15000000: emit("MORE THAN 15000000", 1); break; } }, function(key, values){ return Array.sum(values); }, {out: {inline: 1}} )