Cookies help us deliver our services. By using our services, you agree to our use of cookies. More information

Difference between revisions of "MAPREDUCE Tutorial"

From NoSQLZoo
Jump to: navigation, search
(Provided a much better target answer for Q8.)
 
(10 intermediate revisions by the same user not shown)
Line 1: Line 1:
 
==MapReduce - The Basics==
 
==MapReduce - The Basics==
<p>This tutorial introduces the <code>MapReduce</code> command.</p>
+
<p>This tutorial introduces the '''MapReduce''' command.</p>
 
[[MapReduce]] examples are available.
 
[[MapReduce]] examples are available.
 
<div>The MapReduce function allows to functions to be applied over a collection. The two functions map and reduce are important because:
 
<div>The MapReduce function allows to functions to be applied over a collection. The two functions map and reduce are important because:
Line 6: Line 6:
 
*They can be distributed effectively; you can share the work load among a number of processors and get your answer faster.
 
*They can be distributed effectively; you can share the work load among a number of processors and get your answer faster.
  
It is recommended to have some familiarity with JavaScript before attempting the following questions: [https://developer.mozilla.org/bm/docs/Web/JavaScript JavaScript | MDN]
+
It is recommended to have some familiarity with JavaScript before attempting the following questions:  
 +
[[JavaScript: Fundamentals]]<br/>
 +
[https://developer.mozilla.org/bm/docs/Web/JavaScript JavaScript | MDN]
 
<div class='extra_space' style='width:1em; height:6em;'></div></div>
 
<div class='extra_space' style='width:1em; height:6em;'></div></div>
  
Line 15: Line 17:
 
<pre class="def"><nowiki>
 
<pre class="def"><nowiki>
 
db.world.mapReduce(
 
db.world.mapReduce(
   function(){
+
   function () {
 
     if (this.continent === "Europe"){
 
     if (this.continent === "Europe"){
 
       emit(this.name, null);
 
       emit(this.name, null);
 
     }
 
     }
 
   },
 
   },
   function(k, v){  
+
   function (k, v) {
     return v;  
+
     return v;
 
   },
 
   },
   {out: {inline: 1}}
+
   {out: {inline: 1}}
 
);</nowiki></pre>
 
);</nowiki></pre>
 
<pre class="ans"><nowiki>db.world.mapReduce(function(){if(this.continent=="Europe"){emit(this.name,null)}},function(_,v){return v},{out:{inline:1}})</nowiki></pre>
 
<pre class="ans"><nowiki>db.world.mapReduce(function(){if(this.continent=="Europe"){emit(this.name,null)}},function(_,v){return v},{out:{inline:1}})</nowiki></pre>
Line 33: Line 35:
 
<pre class="def"><nowiki>
 
<pre class="def"><nowiki>
 
db.world.mapReduce(
 
db.world.mapReduce(
   function(){
+
   function () {
 
     emit(null, null);
 
     emit(null, null);
 
   },
 
   },
   function(k, v){  
+
   function (k, v) {
     return null;  
+
     return null;
 
   },
 
   },
   {out:{inline:1}}
+
   {out: {inline: 1}}
 
);</nowiki></pre>
 
);</nowiki></pre>
 
<pre class="ans"><nowiki>db.world.mapReduce(function(){emit('World Population in Millions',this.population)},function(_,values){return Math.round(Array.sum(values)/1000000)*1000000},{out:{inline:1}});</nowiki></pre>
 
<pre class="ans"><nowiki>db.world.mapReduce(function(){emit('World Population in Millions',this.population)},function(_,values){return Math.round(Array.sum(values)/1000000)*1000000},{out:{inline:1}});</nowiki></pre>
Line 48: Line 50:
 
<pre class="def"><nowiki>
 
<pre class="def"><nowiki>
 
db.world.mapReduce(
 
db.world.mapReduce(
   function(){
+
   function () {
 
     emit(null, null);
 
     emit(null, null);
 
   },
 
   },
   function(k, v){  
+
   function (k, v) {
     return null;  
+
     return null;
 
   },
 
   },
   {out: {inline: 1}}
+
   {out: {inline: 1}}
 
);</nowiki></pre>
 
);</nowiki></pre>
 
<pre class="ans"><nowiki>db.world.mapReduce(function(){emit((this.name).substring(0,1),1)},function(_,values){return Array.sum(values)},{out:{inline:1}});</nowiki></pre>
 
<pre class="ans"><nowiki>db.world.mapReduce(function(){emit((this.name).substring(0,1),1)},function(_,values){return Array.sum(values)},{out:{inline:1}});</nowiki></pre>
Line 63: Line 65:
 
<pre class="def"><nowiki>
 
<pre class="def"><nowiki>
 
db.world.mapReduce(
 
db.world.mapReduce(
   function(){
+
   function () {
 
     emit(null, null);
 
     emit(null, null);
 
   },
 
   },
   function(k, v){  
+
   function (k, v) {
     return null;  
+
     return null;
 
   },
 
   },
   {out: {inline: 1}}
+
   {out: {inline: 1}}
);
+
);</nowiki></pre>
</nowiki></pre>
 
 
<pre class="ans"><nowiki>db.world.mapReduce(function(){emit(this.continent,1)},function(_,v){return Array.sum(v)},{out:{inline:1}})</nowiki></pre>
 
<pre class="ans"><nowiki>db.world.mapReduce(function(){emit(this.continent,1)},function(_,v){return Array.sum(v)},{out:{inline:1}})</nowiki></pre>
 
</div>
 
</div>
Line 79: Line 80:
 
<pre class="def"><nowiki>
 
<pre class="def"><nowiki>
 
db.world.mapReduce(
 
db.world.mapReduce(
   function(){
+
   function () {
 
     emit(null, null);
 
     emit(null, null);
 
   },
 
   },
   function(k, v){  
+
   function (k, v) {
     return null;  
+
     return null;
 
   },
 
   },
   {out: {inline: 1}}
+
   {out: {inline: 1}}
);
+
);</nowiki></pre>
</nowiki></pre>
 
 
<pre class="ans"><nowiki>db.world.mapReduce(function(){emit(this.name,this.area)},function(_,v){return v},{limit:3,out:{inline:1},query:{"$and":[{"area":{"$ne":null}},{"area":{"$ne":0}}]},sort:{"area":1}});</nowiki></pre>
 
<pre class="ans"><nowiki>db.world.mapReduce(function(){emit(this.name,this.area)},function(_,v){return v},{limit:3,out:{inline:1},query:{"$and":[{"area":{"$ne":null}},{"area":{"$ne":0}}]},sort:{"area":1}});</nowiki></pre>
 
</div>
 
</div>
Line 95: Line 95:
 
<pre class="def"><nowiki>
 
<pre class="def"><nowiki>
 
db.world.mapReduce(
 
db.world.mapReduce(
   function(){
+
   function () {
 
     emit(null, null);
 
     emit(null, null);
 
   },
 
   },
   function(k, v){  
+
   function (k, v) {
     return null;  
+
     return null;
 
   },
 
   },
   {out:{inline:1}}
+
   {out: {inline: 1}}
 
);</nowiki></pre>
 
);</nowiki></pre>
 
<pre class="ans"><nowiki>db.world.mapReduce(function(){emit(this.continent,{first:this.name,last:this.name})},function(_,v){let r={first:'ZZZ',last:'AAA'};for(let i=0;i<v.length;i++){if(r.first>v[i].first){r.first=v[i].first}
 
<pre class="ans"><nowiki>db.world.mapReduce(function(){emit(this.continent,{first:this.name,last:this.name})},function(_,v){let r={first:'ZZZ',last:'AAA'};for(let i=0;i<v.length;i++){if(r.first>v[i].first){r.first=v[i].first}
Line 111: Line 111:
 
<pre class="def"><nowiki>
 
<pre class="def"><nowiki>
 
db.world.mapReduce(
 
db.world.mapReduce(
   function(){
+
   function () {
 
     emit(null, null);
 
     emit(null, null);
 
   },
 
   },
   function(k, v){  
+
   function (k, v) {
     return null;  
+
     return null;
 
   },
 
   },
   {out: {inline: 1}}
+
   {out: {inline: 1}}
);
+
);</nowiki></pre>
</nowiki></pre>
 
 
<pre class="ans"><nowiki>db.world.mapReduce(function(){if((this.name).startsWith('M')){emit(this.name,null)}
 
<pre class="ans"><nowiki>db.world.mapReduce(function(){if((this.name).startsWith('M')){emit(this.name,null)}
 
if((this.capital).startsWith('M')){emit(this.capital,null)}},function(_,v){return v},{out:{inline:1}});</nowiki></pre>
 
if((this.capital).startsWith('M')){emit(this.capital,null)}},function(_,v){return v},{out:{inline:1}});</nowiki></pre>
 
</div>
 
</div>
 
 
<div class="q" data-lang="mongo">
 
<div class="q" data-lang="mongo">
<p class="strong">For each letter in the alphabet, find the amount of cities and the cities that come first and last alphabetically. e.g.</p>
+
<p class="strong">For each letter in the alphabet find the amount of cities that start with that letter. <br/>
<pre>
+
Additionally, find the two cities that come first and last alphabetically for that letter. e.g.</p>
 +
<syntaxhighlight lang="JavaScript">
 
{
 
{
 
"_id" : "B",
 
"_id" : "B",
Line 135: Line 134:
 
}
 
}
 
},
 
},
</pre>
+
</syntaxhighlight>
 
<pre class="def"><nowiki>
 
<pre class="def"><nowiki>
 
db.world.mapReduce(
 
db.world.mapReduce(
   function(){
+
   function () {
 
     emit(null, null);
 
     emit(null, null);
 
   },
 
   },
   function(k, v){  
+
   function (k, v) {
     return null;  
+
     return null;
 
   },
 
   },
   {out:{inline:1}}
+
   {out: {inline: 1}}
 
);</nowiki></pre>
 
);</nowiki></pre>
<pre class="ans"><nowiki>db.world.mapReduce(function(){if(this.capital){emit((this.capital).substring(0,1),{first:this.capital,last:this.capital})}},function(_,v){let r={first:'ZZZ',last:'AAA',count:0};for(let i=0;i<v.length;i++){if(r.first>v[i].first){r.first=v[i].first};if(r.last<v[i].last){r.last=v[i].last};r.count+=1}
+
<pre class="ans"><nowiki>db.world.mapReduce(function(){this.capital&&emit(this.capital[0],{first:this.capital,last:this.capital,count:1})},function(t,i){return i.sort(function(t,i){return t.first<i.first?-1:1}),{first:i[0].first,last:i[i.length-1].first,count:i.length}},{out:{inline:1}});</nowiki></pre>
return r},{finalize:function(_,v){if(!v.count){v.count=1}
 
return v},out:{inline:1}});</nowiki></pre>
 
 
</div>
 
</div>
  
Line 159: Line 156:
 
   5000000 to 10000000
 
   5000000 to 10000000
 
   10000000 to 15000000
 
   10000000 to 15000000
   More than 15000000
+
   More than 15000000
 
<pre class="def"><nowiki>
 
<pre class="def"><nowiki>
 
db.world.mapReduce(
 
db.world.mapReduce(
   function(){
+
   function () {
 
     emit(null, null);
 
     emit(null, null);
 
   },
 
   },
   function(k, v){  
+
   function (k, v) {
     return null;  
+
     return null;
 
   },
 
   },
   {out: {inline: 1}}
+
   {out: {inline: 1}}
 
);</nowiki></pre>
 
);</nowiki></pre>
 
<pre class="ans"><nowiki>db.world.mapReduce(function(){var p=this.population;switch(!0){case p<1000000:emit("0 TO 1000000",1);break;case p<2000000:emit("1000000 TO 2000000",1);break;case p<3000000:emit("2000000 TO 3000000",1);break;case p<5000000:emit("3000000 TO 5000000",1);break;case p<10000000:emit("5000000 TO 10000000",1);break;case p<15000000:emit("10000000 TO 15000000",1);break
 
<pre class="ans"><nowiki>db.world.mapReduce(function(){var p=this.population;switch(!0){case p<1000000:emit("0 TO 1000000",1);break;case p<2000000:emit("1000000 TO 2000000",1);break;case p<3000000:emit("2000000 TO 3000000",1);break;case p<5000000:emit("3000000 TO 5000000",1);break;case p<10000000:emit("5000000 TO 10000000",1);break;case p<15000000:emit("10000000 TO 15000000",1);break
 
case p>15000000:emit("MORE THAN 15000000",1);break}},function(_,v){return Array.sum(v)},{out:{inline:1}});</nowiki></pre>
 
case p>15000000:emit("MORE THAN 15000000",1);break}},function(_,v){return Array.sum(v)},{out:{inline:1}});</nowiki></pre>
 
</div>
 
</div>
 +
[https://goo.gl/forms/ep8rBbCQSa381ic82 {{huge| Survey}}] <br/>
 +
Do you have thoughts about this website that you would like to share? Help improve NoSQLZoo!

Latest revision as of 20:19, 6 February 2019

MapReduce - The Basics

This tutorial introduces the MapReduce command.

MapReduce examples are available.

The MapReduce function allows to functions to be applied over a collection. The two functions map and reduce are important because:
  • Together they can solve a huge range of common database queries.
  • They can be distributed effectively; you can share the work load among a number of processors and get your answer faster.

It is recommended to have some familiarity with JavaScript before attempting the following questions: JavaScript: Fundamentals
JavaScript | MDN

Map

The map function can be used to filter and transform the data:

Use a map function to get the names of countries in Europe.

db.world.mapReduce(
  function () {
    if (this.continent === "Europe"){
      emit(this.name, null);
    }
  },
  function (k, v) {
    return v;
  },
  {out: {inline: 1}}
);
db.world.mapReduce(function(){if(this.continent=="Europe"){emit(this.name,null)}},function(_,v){return v},{out:{inline:1}})

Use the previous answer to find the population of the world to the nearest million.

Use the JavaScript round function : Math.round(population/1000000)*1000000
db.world.mapReduce(
  function () {
    emit(null, null);
  },
  function (k, v) {
    return null;
  },
  {out: {inline: 1}}
);
db.world.mapReduce(function(){emit('World Population in Millions',this.population)},function(_,values){return Math.round(Array.sum(values)/1000000)*1000000},{out:{inline:1}});

For each letter, determine how many country names begin with that letter.

db.world.mapReduce(
  function () {
    emit(null, null);
  },
  function (k, v) {
    return null;
  },
  {out: {inline: 1}}
);
db.world.mapReduce(function(){emit((this.name).substring(0,1),1)},function(_,values){return Array.sum(values)},{out:{inline:1}});

Show the number of countries in each continent.

db.world.mapReduce(
  function () {
    emit(null, null);
  },
  function (k, v) {
    return null;
  },
  {out: {inline: 1}}
);
db.world.mapReduce(function(){emit(this.continent,1)},function(_,v){return Array.sum(v)},{out:{inline:1}})

Show the name and area of the smallest 3 countries by area. Ignore records where the area is 0 or null.

db.world.mapReduce(
  function () {
    emit(null, null);
  },
  function (k, v) {
    return null;
  },
  {out: {inline: 1}}
);
db.world.mapReduce(function(){emit(this.name,this.area)},function(_,v){return v},{limit:3,out:{inline:1},query:{"$and":[{"area":{"$ne":null}},{"area":{"$ne":0}}]},sort:{"area":1}});

For each continent, find the first and last country alphabetically.

db.world.mapReduce(
  function () {
    emit(null, null);
  },
  function (k, v) {
    return null;
  },
  {out: {inline: 1}}
);
db.world.mapReduce(function(){emit(this.continent,{first:this.name,last:this.name})},function(_,v){let r={first:'ZZZ',last:'AAA'};for(let i=0;i<v.length;i++){if(r.first>v[i].first){r.first=v[i].first}
if(r.last<v[i].last){r.last=v[i].last}}
return r},{out:{inline:1}});

Return country names or capital cities that start with a letter 'M' as keys, use null as the value.

db.world.mapReduce(
  function () {
    emit(null, null);
  },
  function (k, v) {
    return null;
  },
  {out: {inline: 1}}
);
db.world.mapReduce(function(){if((this.name).startsWith('M')){emit(this.name,null)}
if((this.capital).startsWith('M')){emit(this.capital,null)}},function(_,v){return v},{out:{inline:1}});

For each letter in the alphabet find the amount of cities that start with that letter.
Additionally, find the two cities that come first and last alphabetically for that letter. e.g.

{
	"_id" : "B",
	"value" : {
		"first" : "Baghdad",
		"last" : "Bujumbura",
		"count" : 25
	}
},
db.world.mapReduce(
  function () {
    emit(null, null);
  },
  function (k, v) {
    return null;
  },
  {out: {inline: 1}}
);
db.world.mapReduce(function(){this.capital&&emit(this.capital[0],{first:this.capital,last:this.capital,count:1})},function(t,i){return i.sort(function(t,i){return t.first<i.first?-1:1}),{first:i[0].first,last:i[i.length-1].first,count:i.length}},{out:{inline:1}});

Show country count for countries in the following ranges:

 0 to 1000000
 1000000 to 2000000
 2000000 to 3000000
 3000000 to 5000000
 5000000 to 10000000
 10000000 to 15000000
 More than 15000000
db.world.mapReduce(
  function () {
    emit(null, null);
  },
  function (k, v) {
    return null;
  },
  {out: {inline: 1}}
);
db.world.mapReduce(function(){var p=this.population;switch(!0){case p<1000000:emit("0 TO 1000000",1);break;case p<2000000:emit("1000000 TO 2000000",1);break;case p<3000000:emit("2000000 TO 3000000",1);break;case p<5000000:emit("3000000 TO 5000000",1);break;case p<10000000:emit("5000000 TO 10000000",1);break;case p<15000000:emit("10000000 TO 15000000",1);break
case p>15000000:emit("MORE THAN 15000000",1);break}},function(_,v){return Array.sum(v)},{out:{inline:1}});

Survey
Do you have thoughts about this website that you would like to share? Help improve NoSQLZoo!