Cookies help us deliver our services. By using our services, you agree to our use of cookies. More information

Difference between revisions of "MAPREDUCE Tutorial"

From NoSQLZoo
Jump to: navigation, search
(Created page with "==MapReduce the basics== <p>This tutorial introduces the <code>MapReduce</code> command.</p> MapReduce examples are available. <div>The MapReduce function allows to func...")
 
(Convert answers to mongo shell and tidy.)
Line 1: Line 1:
 
==MapReduce the basics==
 
==MapReduce the basics==
  <p>This tutorial introduces the <code>MapReduce</code> command.</p>
+
<p>This tutorial introduces the <code>MapReduce</code> command.</p>
 
[[MapReduce]] examples are available.
 
[[MapReduce]] examples are available.
 
<div>The MapReduce function allows to functions to be applied over a collection. The two functions map and reduce are important because:
 
<div>The MapReduce function allows to functions to be applied over a collection. The two functions map and reduce are important because:
*Together they can solve a huge range of common database queries
+
*Together they can solve a huge range of common database queries.
*They can be distributed effectively; you can share the work load among a number of processors and get your answer faster
+
*They can be distributed effectively; you can share the work load among a number of processors and get your answer faster.
<div class='extra_space' style='width:1em; height:6em;'></div>
 
  
 +
It is recommended to have some familiarity with JavaScript before attempting the following questions: [https://developer.mozilla.org/bm/docs/Web/JavaScript | MDN]
 +
<div class='extra_space' style='width:1em; height:6em;'></div></div>
 
==Map==
 
==Map==
 
The map function can be used to filter and transform the data:
 
The map function can be used to filter and transform the data:
<div class=q data-lang="mongo">
+
<div class="q" data-lang="mongo">
<p class=strong>Use a map function to get the countries of Europe</p>
+
<p class="strong">Use a map function to get the names of countries in Europe.</p>
<pre class=def>
+
<pre class="def">
 
db.world.mapReduce(
 
db.world.mapReduce(
   function(){if (this.continent=="Europe") emit(this.name,null);},
+
   function(){
 +
    if (this.continent === "Europe"){
 +
      emit(this.name, null);
 +
    }
 +
  },
 +
  function(k, v){
 +
    return v;
 +
  },
 
   {out:{inline:1}}   
 
   {out:{inline:1}}   
 
)
 
)
 
</pre>
 
</pre>
<div class=ans>
+
<pre class="ans">
 
db.world.mapReduce(
 
db.world.mapReduce(
   function(){if (this.continent=="Europe") emit(this.name,null);},
+
   function(){if (this.continent=="Europe"){emit(this.name,null);}},
 +
  function(k, v){ return v; },
 
   {out:{inline:1}}   
 
   {out:{inline:1}}   
 
)
 
)
</div>
+
</pre>
 
</div>
 
</div>
  
<div class=q data-lang="py3">
+
<div class="q" data-lang="mongo">
<p class=strong>Use the previous answer to find the population of the world to the nearest million</p>
+
<p class="strong">Use the previous answer to find the population of the world to the nearest million.</p>
<div class=hint title="How to round to the nearest million">Use the JavaScript round function : Math.round(population/1000000)*1000000 </div>
+
<div class="hint" title="How to round to the nearest million">Use the JavaScript round function : Math.round(population/1000000)*1000000 </div>
<pre class=def>
+
<pre class="def">
 +
db.world.mapReduce(
 +
  function(){
 +
    emit(null, null);
 +
  },
 +
  function(k, v){
 +
    return null;
 +
  },
 +
  {out:{inline:1}} 
 +
)
 
</pre>
 
</pre>
<div class=ans>
+
<pre class="ans">
from bson.code import Code
+
db.world.mapReduce(
temp = db.world.map_reduce(
+
     function(){emit('World Population in Millions', this.population)},
     map=Code("function(){emit('World Population in Millions', this.population)}"),
+
     function(key, values){
     reduce=Code("""function(key, values){
 
 
       return Math.round(Array.sum(values)/1000000)*1000000;
 
       return Math.round(Array.sum(values)/1000000)*1000000;
     }"""),
+
     },
     out={"inline":1})
+
     {out: {inline :1}}
pp.pprint(temp["results"])
+
)
</div>
+
</pre>
 
</div>
 
</div>
  
<div class=q data-lang="py3">
+
<div class="q" data-lang="mongo">
<p class=strong>Count number of countries by first letter</p>
+
<p class="strong">For each letter, determine how many country names begin with that letter.</p>
<pre class=def>
+
<pre class="def">
 +
db.world.mapReduce(
 +
  function(){
 +
    emit(null, null);
 +
  },
 +
  function(k, v){
 +
    return null;
 +
  },
 +
  {out:{inline:1}} 
 +
)
 
</pre>
 
</pre>
<div class=ans>
+
<pre class="ans">
from bson.code import Code
+
db.world.mapReduce(
temp = db.world.map_reduce(
+
     function(){ emit((this.name).substring(0,1), 1)},
     map=Code("""function(){ emit((this.name).substring(0,1), 1)}"""),
+
     function(key, values){
     reduce=Code("""function(key, values){
 
 
       return Array.sum(values);
 
       return Array.sum(values);
     }"""),
+
     },
     out={"inline":1})
+
     {out: {inline:1}}
pp.pprint(temp["results"])
+
)
</div>
+
</pre>
 
</div>
 
</div>
  
<div class=q data-lang="py3">
+
<div class="q" data-lang="mongo">
<p class=strong>Show the number of countries on each continent</p>
+
<p class="strong">Show the number of countries in each continent.</p>
<pre class=def>
+
<pre class="def">
 +
db.world.mapReduce(
 +
  function(){
 +
    emit(null, null);
 +
  },
 +
  function(k, v){
 +
    return null;
 +
  },
 +
  {out:{inline:1}} 
 +
)
 
</pre>
 
</pre>
<div class=ans>
+
<pre class="ans">
from bson.code import Code
+
db.world.mapReduce(
temp = db.world.map_reduce(
+
     function(){emit(this.continent, 1)},
     map=Code("function(){emit(this.continent, 1)}"),
+
     function(key, values){
     reduce=Code("""function(key, values){
 
 
       return Array.sum(values);
 
       return Array.sum(values);
     }"""),
+
     },
     out={"inline":1})
+
     {out: {inline :1}}
pp.pprint(temp["results"])
+
)
</div>
+
</pre>
 
</div>
 
</div>
  
<div class=q data-lang="py3">
+
<div class="q" data-lang="mongo">
<p class=strong>Show the smallest 3 countries name and area (ignore areas of 0 or None)</p>
+
<p class="strong">Show the name and area of the smallest 3 countries by area. Ignore records where the area is 0 or null.</p>
<pre class=def>
+
<pre class="def">
 +
db.world.mapReduce(
 +
  function(){
 +
    emit(null, null);
 +
  },
 +
  function(k, v){
 +
    return null;
 +
  },
 +
  {out:{inline:1}} 
 +
)
 
</pre>
 
</pre>
<div class=ans>
+
<pre class="ans">
from bson.code import Code
+
db.world.mapReduce(
temp = db.world.map_reduce(
+
         function(){emit(this.name, this.area);},
         query={"$and":[{"area":{"$ne":None}}, {"area":{"$ne":0}}]},
+
        function(k, v){ return v;},
        sort={"area":1},
+
        {
        limit=3,
+
            limit: 3,
         map=Code("function(){emit(this.name, this.area)}"),
+
            out: {inline:1},
        reduce=Code("function(key, values){}"),
+
            query: {"$and": [{"area": {"$ne": null}}, {"area": {"$ne": 0}}]},
        out={"inline":1},
+
            sort: {"area": 1}
 +
         }
 
)
 
)
 +
</pre>
 +
</div>
  
pp.pprint(
+
<div class="q" data-lang="mongo">
  temp["results"]
+
<p class="strong">For each continent, find the first and last country alphabetically.</p>
 +
<pre class="def">
 +
db.world.mapReduce(
 +
  function(){
 +
    emit(null, null);
 +
  },
 +
  function(k, v){
 +
    return null;
 +
  },
 +
  {out:{inline:1}} 
 
)
 
)
</div>
 
</div>
 
 
<div class=q data-lang="py3">
 
<p class=strong>Return the first and last country based on name order for each continent</p>
 
<pre class=def>
 
 
</pre>
 
</pre>
<div class=ans>
+
<pre class="ans">
from bson.code import Code
+
db.world.mapReduce(
temp = db.world.map_reduce(
+
     function(){emit(this.continent, {first:this.name, last:this.name});},
     map=Code("function(){emit(this.continent, {first:this.name,last:this.name})}"),
+
     function(key, values){
     reduce=Code("""function(key, values){
+
       let ret = {first:'ZZZ', last:'AAA'};
       var ret = {first:'ZZZ',last:'AAA'};
+
       for(let i=0; i < values.length; i++){
       for(var i=0;i<values.length;i++){
+
         if (ret.first > values[i].first) { ret.first = values[i].first; }
         if (ret.first>values[i].first) ret.first=values[i].first;
+
         if (ret.last < values[i].last) { ret.last = values[i].last; }
         if (ret.last<values[i].last) ret.last=values[i].last;
 
 
       }
 
       }
 
       return ret;
 
       return ret;
     }"""),
+
     },
     out={"inline":1})
+
     {out: {inline:1}}
pp.pprint(temp["results"])
+
)
 +
</pre>
 
</div>
 
</div>
</div>
+
<div class="q" data-lang="mongo">
 
+
<p class="strong">Return country names or capital cities that start with a letter 'M' as keys, use null as the value.</p>
<div class=q data-lang="py3">
+
<pre class="def">
<p class=strong>Return country name or capital city that starts with a letter 'M'</p>
+
db.world.mapReduce(
<pre class=def>
+
  function(){
 +
    emit(null, null);
 +
  },
 +
  function(k, v){
 +
    return null;
 +
  },
 +
  {out:{inline:1}} 
 +
)
 
</pre>
 
</pre>
<div class=ans>
+
<pre class="ans">
from bson.code import Code
+
db.world.mapReduce(
temp = db.world.map_reduce(
+
     function(){  
     map=Code("""function(){  
+
        if((this.name).startsWith('M')) {emit(this.name,null); }
                if((this.name).startsWith('M'))  
+
        if((this.capital).startsWith('M')) {emit(this.capital,null); }
                      emit(this.name,null);
+
    },
                if((this.capital).startsWith('M'))  
+
     function(key, values){
                  emit(this.capital,null);
 
            }"""),
 
     reduce=Code("""function(key, values){
 
 
       return values;
 
       return values;
     }"""),
+
     },  
      
+
     {out: {inline :1}}
    out={"inline":1})
+
)
pp.pprint(temp["results"])
+
</pre>
</div>
 
 
</div>
 
</div>
  
<div class=q data-lang="py3">
+
<div class="q" data-lang="mongo">
<p class=strong>Show the first and last city for each letter and the count of cities</p>
+
<p class="strong">For each letter in the alphabet, find the amount of cities and the cities that come first and last alphabetically. e.g.</p>
<pre class=def>
+
<pre>
 +
{
 +
"_id" : "B",
 +
"value" : {
 +
"first" : "Baghdad",
 +
"last" : "Bujumbura",
 +
"count" : 25
 +
}
 +
},
 
</pre>
 
</pre>
<div class=ans>
+
<pre class="def">
from bson.code import Code
+
db.world.mapReduce(
temp = db.world.map_reduce(
+
  function(){
     map=Code("""function(){ if(this.capital)emit((this.capital).substring(0,1), {first:this.capital, last:this.capital})}"""),
+
    emit(null, null);
     reduce=Code("""function(key, values){
+
  },
      var ret = {first:'ZZZ',last:'AAA', count:0};
+
  function(k, v){
       for(var i=0;i<values.length;i++){
+
    return null;
         if (ret.first>values[i].first) ret.first=values[i].first;
+
  },
         if (ret.last<values[i].last) ret.last=values[i].last;
+
  {out:{inline:1}} 
 +
)
 +
</pre>
 +
<pre class="ans">
 +
db.world.mapReduce(
 +
     function(){ if(this.capital)emit((this.capital).substring(0,1), {first:this.capital, last:this.capital})},
 +
     function(key, values){
 +
      let ret = {first:'ZZZ',last:'AAA', count:0};
 +
       for(let i=0; i < values.length; i++){
 +
         if (ret.first>values[i].first) { ret.first = values[i].first };
 +
         if (ret.last<values[i].last) { ret.last = values[i].last };
 
         ret.count += 1;
 
         ret.count += 1;
 
       }
 
       }
 
 
       return ret;
 
       return ret;
     }"""),
+
     },
     finalize=Code("""function(key, val){
+
     {
      if(!val.count){
+
        finalize: function(key, val){
        val.count = 1;
+
            if(!val.count){
        return val;
+
              val.count = 1;
      }else
+
            }
        return val;
+
            return val;
    }"""),
+
        },
    out={"inline":1})
+
        out: {inline: 1}
pp.pprint(temp["results"])
+
    }
</div>
+
)
 +
</pre>
 
</div>
 
</div>
  
<div class=q data-lang="py3">
+
<div class="q" data-lang="mongo">
<p class=strong>Show country count for countries in the ranges</p>
+
<p class="strong">Show country count for countries in the following ranges:</p>
 
   0 to 1000000
 
   0 to 1000000
 
   1000000 to 2000000
 
   1000000 to 2000000
Line 177: Line 246:
 
   10000000 to 15000000
 
   10000000 to 15000000
 
   More than 15000000   
 
   More than 15000000   
<pre class=def>
+
<pre class="def">
 +
db.world.mapReduce(
 +
  function(){
 +
    emit(null, null);
 +
  },
 +
  function(k, v){
 +
    return null;
 +
  },
 +
  {out:{inline:1}} 
 +
)
 +
</pre>
 +
<pre class="ans">
 +
db.world.mapReduce(
 +
    function(){
 +
        var pop = this.population;
 +
        switch(true){
 +
            case pop<1000000:
 +
              emit("0 TO 1000000", 1);
 +
              break;
 +
            case pop<2000000:
 +
              emit("1000000 TO 2000000", 1);
 +
              break;
 +
            case pop<3000000:
 +
              emit("2000000 TO 3000000", 1);
 +
              break;
 +
            case pop<5000000:
 +
              emit("3000000 TO 5000000", 1);
 +
              break;
 +
            case pop<10000000:
 +
              emit("5000000 TO 10000000", 1);
 +
              break;
 +
            case pop<15000000:
 +
              emit("10000000 TO 15000000", 1);
 +
              break
 +
            case pop>15000000:
 +
              emit("MORE THAN 15000000", 1);
 +
              break;
 +
        }
 +
    },
 +
    function(key, values){ return Array.sum(values); },
 +
    {out: {inline: 1}}
 +
)
 
</pre>
 
</pre>
<div class=ans>
 
from bson.code import Code
 
temp = db.world.map_reduce(
 
    map=Code("""function(){
 
                  var pop = this.population;
 
                  switch(true){
 
                    case pop<1000000:
 
                      emit("0 TO 1000000", 1);
 
                      break;
 
                    case pop<2000000:
 
                      emit("1000000 TO 2000000", 1);
 
                      break;
 
                    case pop<3000000:
 
                      emit("2000000 TO 3000000", 1);
 
                      break;
 
                    case pop<5000000:
 
                      emit("3000000 TO 5000000", 1);
 
                      break;
 
                    case pop<10000000:
 
                      emit("5000000 TO 10000000", 1);
 
                      break;
 
                    case pop<15000000:
 
                      emit("10000000 TO 15000000", 1);
 
                      break
 
                    case pop>15000000:
 
                      emit("MORE THAN 15000000", 1);
 
                      break;
 
                  }
 
}"""),
 
    reduce=Code("""function(key, values){ return Array.sum(values); }"""),     
 
    out={"inline":1})
 
pp.pprint(temp["results"])
 
</div>
 
 
</div>
 
</div>

Revision as of 02:25, 6 April 2018

MapReduce the basics

This tutorial introduces the MapReduce command.

MapReduce examples are available.

The MapReduce function allows to functions to be applied over a collection. The two functions map and reduce are important because:
  • Together they can solve a huge range of common database queries.
  • They can be distributed effectively; you can share the work load among a number of processors and get your answer faster.

It is recommended to have some familiarity with JavaScript before attempting the following questions: | MDN

Map

The map function can be used to filter and transform the data:

Use a map function to get the names of countries in Europe.

db.world.mapReduce(
  function(){
    if (this.continent === "Europe"){
      emit(this.name, null);
    }
  },
  function(k, v){ 
    return v; 
  },
  {out:{inline:1}}  
)
db.world.mapReduce(
  function(){if (this.continent=="Europe"){emit(this.name,null);}},
  function(k, v){ return v; },
  {out:{inline:1}}  
)

Use the previous answer to find the population of the world to the nearest million.

Use the JavaScript round function : Math.round(population/1000000)*1000000
db.world.mapReduce(
  function(){
    emit(null, null);
  },
  function(k, v){ 
    return null; 
  },
  {out:{inline:1}}  
)
db.world.mapReduce(
    function(){emit('World Population in Millions', this.population)},
    function(key, values){
      return Math.round(Array.sum(values)/1000000)*1000000;
    },
    {out: {inline :1}}
)

For each letter, determine how many country names begin with that letter.

db.world.mapReduce(
  function(){
    emit(null, null);
  },
  function(k, v){ 
    return null; 
  },
  {out:{inline:1}}  
)
db.world.mapReduce(
    function(){ emit((this.name).substring(0,1), 1)},
    function(key, values){
       return Array.sum(values);
    },
    {out: {inline:1}}
)

Show the number of countries in each continent.

db.world.mapReduce(
  function(){
    emit(null, null);
  },
  function(k, v){ 
    return null; 
  },
  {out:{inline:1}}  
)
db.world.mapReduce(
    function(){emit(this.continent, 1)},
    function(key, values){
      return Array.sum(values);
    },
    {out: {inline :1}}
)

Show the name and area of the smallest 3 countries by area. Ignore records where the area is 0 or null.

db.world.mapReduce(
  function(){
    emit(null, null);
  },
  function(k, v){ 
    return null; 
  },
  {out:{inline:1}}  
)
db.world.mapReduce(
        function(){emit(this.name, this.area);}, 
        function(k, v){ return v;},
        {
            limit: 3,
            out: {inline:1},
            query: {"$and": [{"area": {"$ne": null}}, {"area": {"$ne": 0}}]},
            sort: {"area": 1}
        }
)

For each continent, find the first and last country alphabetically.

db.world.mapReduce(
  function(){
    emit(null, null);
  },
  function(k, v){ 
    return null; 
  },
  {out:{inline:1}}  
)
db.world.mapReduce(
    function(){emit(this.continent, {first:this.name, last:this.name});},
    function(key, values){
      let ret = {first:'ZZZ', last:'AAA'};
      for(let i=0; i < values.length; i++){
        if (ret.first > values[i].first) { ret.first = values[i].first; }
        if (ret.last < values[i].last) { ret.last = values[i].last; }
      }
      return ret;
    },
    {out: {inline:1}}
)

Return country names or capital cities that start with a letter 'M' as keys, use null as the value.

db.world.mapReduce(
  function(){
    emit(null, null);
  },
  function(k, v){ 
    return null; 
  },
  {out:{inline:1}}  
)
db.world.mapReduce(
    function(){ 
        if((this.name).startsWith('M')) {emit(this.name,null); }
        if((this.capital).startsWith('M')) {emit(this.capital,null); }
    },
    function(key, values){
       return values;
    },    
    {out: {inline :1}}
)

For each letter in the alphabet, find the amount of cities and the cities that come first and last alphabetically. e.g.

{
	"_id" : "B",
	"value" : {
		"first" : "Baghdad",
		"last" : "Bujumbura",
		"count" : 25
	}
},
db.world.mapReduce(
  function(){
    emit(null, null);
  },
  function(k, v){ 
    return null; 
  },
  {out:{inline:1}}  
)
db.world.mapReduce(
    function(){ if(this.capital)emit((this.capital).substring(0,1), {first:this.capital, last:this.capital})},
    function(key, values){
      let ret = {first:'ZZZ',last:'AAA', count:0};
      for(let i=0; i < values.length; i++){
        if (ret.first>values[i].first) { ret.first = values[i].first };
        if (ret.last<values[i].last) { ret.last = values[i].last };
        ret.count += 1;
      }
      return ret;
    },
    {
        finalize: function(key, val){
            if(!val.count){
               val.count = 1;
            }
            return val;
        },
        out: {inline: 1}
    }
)

Show country count for countries in the following ranges:

 0 to 1000000
 1000000 to 2000000
 2000000 to 3000000
 3000000 to 5000000
 5000000 to 10000000
 10000000 to 15000000
 More than 15000000  
db.world.mapReduce(
  function(){
    emit(null, null);
  },
  function(k, v){ 
    return null; 
  },
  {out:{inline:1}}  
)
db.world.mapReduce(
    function(){
        var pop = this.population;
        switch(true){
            case pop<1000000:
               emit("0 TO 1000000", 1);
               break;
            case pop<2000000:
               emit("1000000 TO 2000000", 1);
               break;
            case pop<3000000:
               emit("2000000 TO 3000000", 1);
               break;
            case pop<5000000:
               emit("3000000 TO 5000000", 1);
               break;
            case pop<10000000:
               emit("5000000 TO 10000000", 1);
               break;
            case pop<15000000:
               emit("10000000 TO 15000000", 1);
               break
            case pop>15000000:
               emit("MORE THAN 15000000", 1);
               break;
        }
    },
    function(key, values){ return Array.sum(values); },
    {out: {inline: 1}} 
)