Cookies help us deliver our services. By using our services, you agree to our use of cookies. More information

Difference between revisions of "MAPREDUCE basics"

From NoSQLZoo
Jump to: navigation, search
m (Reverted edits by Andr3w (talk) to last revision by 40082593)
 
(9 intermediate revisions by 2 users not shown)
Line 11: Line 11:
 
#PRETTY
 
#PRETTY
 
import pprint
 
import pprint
pp = pprint.PrettyPrinter(indent=4)
+
pp = pprint.PrettyPrinter(indent=4, width=160)
 
</pre>
 
</pre>
 +
 +
==MapReduce the basics==
 +
  <p>This tutorial introduces the <code>MapReduce</code> command.</p>
 +
[[MapReduce]] examples are available.
 +
<div class='extra_space' style='width:1em; height:6em;'></div>
 +
 +
 +
<div class=q data-lang="py3">
 +
<p class=strong>Find the total population of the each continent</p>
 +
<pre class=def>
 +
from bson.code import Code
 +
temp = db.world.map_reduce(
 +
    map=Code("function(){emit(this.continent, this.population)}"),
 +
    reduce=Code("""function(key, values){
 +
      return Array.sum(values);
 +
    }"""),
 +
    out={"inline":1})
 +
pp.pprint(temp["results"])
 +
</pre>
 +
<div class=ans>
 +
from bson.code import Code
 +
temp = db.world.map_reduce(
 +
    map=Code("function(){emit(this.continent, this.population)}"),
 +
    reduce=Code("""function(key, values){
 +
      return Array.sum(values);
 +
    }"""),
 +
    out={"inline":1})
 +
pp.pprint(temp["results"])
 +
</div>
 +
</div>
  
 
<div class=q data-lang="py3">
 
<div class=q data-lang="py3">
<p class=strong>Show a list of countries found in North America</p>
+
<p class=strong>Use the previous answer to find the population of the world to the nearest million</p>
 +
<div class=hint title="How to round to the nearest million">Use the JavaScript round function : Math.round(population/1000000)*1000000 </div>
 
<pre class=def>
 
<pre class=def>
 +
</pre>
 +
<div class=ans>
 
from bson.code import Code
 
from bson.code import Code
 
temp = db.world.map_reduce(
 
temp = db.world.map_reduce(
         query={"continent":"Africa"},
+
    map=Code("function(){emit('World Population in Millions', this.population)}"),
         map=Code("function(){emit(this.continent, this.name)}"),  
+
    reduce=Code("""function(key, values){
         reduce=Code("function(key, values){return values.toString()}"),
+
      return Math.round(Array.sum(values)/1000000)*1000000;
         out={"inline":1}
+
    }"""),
 +
    out={"inline":1})
 +
pp.pprint(temp["results"])
 +
</div>
 +
</div>
 +
 
 +
<div class=q data-lang="py3">
 +
<p class=strong>Count number of countries by first letter</p>
 +
<pre class=def>
 +
</pre>
 +
<div class=ans>
 +
from bson.code import Code
 +
temp = db.world.map_reduce(
 +
    map=Code("""function(){ emit((this.name).substring(0,1), 1)}"""),
 +
    reduce=Code("""function(key, values){
 +
      return Array.sum(values);
 +
    }"""),
 +
    out={"inline":1})
 +
pp.pprint(temp["results"])
 +
</div>
 +
</div>
 +
 
 +
<div class=q data-lang="py3">
 +
<p class=strong>Show the number of countries on each continent</p>
 +
<pre class=def>
 +
</pre>
 +
<div class=ans>
 +
from bson.code import Code
 +
temp = db.world.map_reduce(
 +
    map=Code("function(){emit(this.continent, 1)}"),
 +
    reduce=Code("""function(key, values){
 +
      return Array.sum(values);
 +
    }"""),
 +
    out={"inline":1})
 +
pp.pprint(temp["results"])
 +
</div>
 +
</div>
 +
 
 +
<div class=q data-lang="py3">
 +
<p class=strong>Show the smallest 3 countries name and area (ignore areas of 0 or None)</p>
 +
<pre class=def>
 +
</pre>
 +
<div class=ans>
 +
from bson.code import Code
 +
temp = db.world.map_reduce(
 +
         query={"$and":[{"area":{"$ne":None}}, {"area":{"$ne":0}}]},
 +
        sort={"area":1},
 +
        limit=3,
 +
         map=Code("function(){emit(this.name, this.area)}"),  
 +
         reduce=Code("function(key, values){}"),
 +
         out={"inline":1},
 
)
 
)
  
Line 28: Line 111:
 
   temp["results"]
 
   temp["results"]
 
)
 
)
 +
</div>
 +
</div>
 +
 +
<div class=q data-lang="py3">
 +
<p class=strong>Return the first and last country based on name order for each continent</p>
 +
<pre class=def>
 +
</pre>
 +
<div class=ans>
 +
from bson.code import Code
 +
temp = db.world.map_reduce(
 +
    map=Code("function(){emit(this.continent, {first:this.name,last:this.name})}"),
 +
    reduce=Code("""function(key, values){
 +
      var ret = {first:'ZZZ',last:'AAA'};
 +
      for(var i=0;i<values.length;i++){
 +
        if (ret.first>values[i].first) ret.first=values[i].first;
 +
        if (ret.last<values[i].last) ret.last=values[i].last;
 +
      }
 +
      return ret;
 +
    }"""),
 +
    out={"inline":1})
 +
pp.pprint(temp["results"])
 +
</div>
 +
</div>
 +
 +
<div class=q data-lang="py3">
 +
<p class=strong>Return country name or capital city that starts with a letter 'M'</p>
 +
<pre class=def>
 
</pre>
 
</pre>
 
<div class=ans>
 
<div class=ans>
 
from bson.code import Code
 
from bson.code import Code
temp = db.world.map_reduce(query={"continent":"North America"},map=Code("function(){emit(this.continent, this.name)}"), reduce=Code("function(key, values){return values.toString()}"),out={"inline":1},)
+
temp = db.world.map_reduce(
 +
    map=Code("""function(){  
 +
                if((this.name).startsWith('M'))
 +
                      emit(this.name,null);
 +
                if((this.capital).startsWith('M'))
 +
                  emit(this.capital,null);
 +
            }"""),
 +
    reduce=Code("""function(key, values){
 +
      return values;
 +
    }"""),
 +
   
 +
    out={"inline":1})
 
pp.pprint(temp["results"])
 
pp.pprint(temp["results"])
 
</div>
 
</div>
Line 37: Line 158:
  
 
<div class=q data-lang="py3">
 
<div class=q data-lang="py3">
<p class=strong>Show the number of countries on each continent</p>
+
<p class=strong>Show the first and last city for each letter and the count of cities</p>
 +
<pre class=def>
 +
</pre>
 +
<div class=ans>
 +
from bson.code import Code
 +
temp = db.world.map_reduce(
 +
    map=Code("""function(){ if(this.capital)emit((this.capital).substring(0,1), {first:this.capital, last:this.capital})}"""),
 +
    reduce=Code("""function(key, values){
 +
      var ret = {first:'ZZZ',last:'AAA', count:0};
 +
      for(var i=0;i<values.length;i++){
 +
        if (ret.first>values[i].first) ret.first=values[i].first;
 +
        if (ret.last<values[i].last) ret.last=values[i].last;
 +
        ret.count += 1;
 +
      }
 +
 
 +
      return ret;
 +
    }"""),
 +
    finalize=Code("""function(key, val){
 +
      if(!val.count){
 +
        val.count = 1;
 +
        return val;
 +
      }else
 +
        return val;
 +
    }"""),
 +
    out={"inline":1})
 +
pp.pprint(temp["results"])
 +
</div>
 +
</div>
 +
 
 +
<div class=q data-lang="py3">
 +
<p class=strong>Show country count for countries in the ranges</p>
 +
  0 to 1000000
 +
  1000000 to 2000000
 +
  2000000 to 3000000
 +
  3000000 to 5000000
 +
  5000000 to 10000000
 +
  10000000 to 15000000
 +
  More than 15000000 
 
<pre class=def>
 
<pre class=def>
 
</pre>
 
</pre>
 
<div class=ans>
 
<div class=ans>
 
from bson.code import Code
 
from bson.code import Code
temp = db.world.map_reduce(map=Code("function(){emit(this.continent, this.name)}"), reduce=Code("function(key, values){return values.length}"),out={"inline":1})
+
temp = db.world.map_reduce(
 +
    map=Code("""function(){
 +
                  var pop = this.population;
 +
                  switch(true){
 +
                    case pop<1000000:
 +
                      emit("0 TO 1000000", 1);
 +
                      break;
 +
                    case pop<2000000:
 +
                      emit("1000000 TO 2000000", 1);
 +
                      break;
 +
                    case pop<3000000:
 +
                      emit("2000000 TO 3000000", 1);
 +
                      break;
 +
                    case pop<5000000:
 +
                      emit("3000000 TO 5000000", 1);
 +
                      break;
 +
                    case pop<10000000:
 +
                      emit("5000000 TO 10000000", 1);
 +
                      break;
 +
                    case pop<15000000:
 +
                      emit("10000000 TO 15000000", 1);
 +
                      break
 +
                    case pop>15000000:
 +
                      emit("MORE THAN 15000000", 1);
 +
                      break;
 +
                  }
 +
}"""),
 +
    reduce=Code("""function(key, values){ return Array.sum(values); }"""),    
 +
    out={"inline":1})
 
pp.pprint(temp["results"])
 
pp.pprint(temp["results"])
 
</div>
 
</div>
 
</div>
 
</div>

Latest revision as of 11:18, 27 June 2016

#ENCODING
import io
import sys
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-16')
#MONGO
from pymongo import MongoClient
client = MongoClient()
client.progzoo.authenticate('scott','tiger')
db = client['progzoo']
#PRETTY
import pprint
pp = pprint.PrettyPrinter(indent=4, width=160)

MapReduce the basics

This tutorial introduces the MapReduce command.

MapReduce examples are available.


Find the total population of the each continent

from bson.code import Code
temp = db.world.map_reduce(
    map=Code("function(){emit(this.continent, this.population)}"),
    reduce=Code("""function(key, values){
      return Array.sum(values);
    }"""),
    out={"inline":1})
pp.pprint(temp["results"])

from bson.code import Code temp = db.world.map_reduce(

   map=Code("function(){emit(this.continent, this.population)}"),
   reduce=Code("""function(key, values){
     return Array.sum(values);
   }"""),
   out={"inline":1})

pp.pprint(temp["results"])

Use the previous answer to find the population of the world to the nearest million

Use the JavaScript round function : Math.round(population/1000000)*1000000

from bson.code import Code temp = db.world.map_reduce(

   map=Code("function(){emit('World Population in Millions', this.population)}"),
   reduce=Code("""function(key, values){
     return Math.round(Array.sum(values)/1000000)*1000000;
   }"""),
   out={"inline":1})

pp.pprint(temp["results"])

Count number of countries by first letter


from bson.code import Code temp = db.world.map_reduce(

   map=Code("""function(){ emit((this.name).substring(0,1), 1)}"""),
   reduce=Code("""function(key, values){
      return Array.sum(values);
   }"""),
   out={"inline":1})

pp.pprint(temp["results"])

Show the number of countries on each continent


from bson.code import Code temp = db.world.map_reduce(

   map=Code("function(){emit(this.continent, 1)}"),
   reduce=Code("""function(key, values){
     return Array.sum(values);
   }"""),
   out={"inline":1})

pp.pprint(temp["results"])

Show the smallest 3 countries name and area (ignore areas of 0 or None)


from bson.code import Code temp = db.world.map_reduce(

       query={"$and":[{"area":{"$ne":None}}, {"area":{"$ne":0}}]},
       sort={"area":1},
       limit=3,
       map=Code("function(){emit(this.name, this.area)}"), 
       reduce=Code("function(key, values){}"),
       out={"inline":1},

)

pp.pprint(

  temp["results"]

)

Return the first and last country based on name order for each continent


from bson.code import Code temp = db.world.map_reduce(

   map=Code("function(){emit(this.continent, {first:this.name,last:this.name})}"),
   reduce=Code("""function(key, values){
     var ret = {first:'ZZZ',last:'AAA'};
     for(var i=0;i<values.length;i++){
       if (ret.first>values[i].first) ret.first=values[i].first;
       if (ret.last<values[i].last) ret.last=values[i].last;
     }
     return ret;
   }"""),
   out={"inline":1})

pp.pprint(temp["results"])

Return country name or capital city that starts with a letter 'M'


from bson.code import Code temp = db.world.map_reduce(

   map=Code("""function(){ 
                if((this.name).startsWith('M')) 
                     emit(this.name,null);
                if((this.capital).startsWith('M')) 
                  emit(this.capital,null);
            }"""),
   reduce=Code("""function(key, values){
      return values;
   }"""),
   
   out={"inline":1})

pp.pprint(temp["results"])

Show the first and last city for each letter and the count of cities


from bson.code import Code temp = db.world.map_reduce(

   map=Code("""function(){ if(this.capital)emit((this.capital).substring(0,1), {first:this.capital, last:this.capital})}"""),
   reduce=Code("""function(key, values){
      var ret = {first:'ZZZ',last:'AAA', count:0};
     for(var i=0;i<values.length;i++){
       if (ret.first>values[i].first) ret.first=values[i].first;
       if (ret.last<values[i].last) ret.last=values[i].last;
       ret.count += 1;
     }
     return ret;
   }"""),
   finalize=Code("""function(key, val){
     if(!val.count){
        val.count = 1;
        return val;
     }else
        return val;
   }"""),
   out={"inline":1})

pp.pprint(temp["results"])

Show country count for countries in the ranges

 0 to 1000000
 1000000 to 2000000
 2000000 to 3000000
 3000000 to 5000000
 5000000 to 10000000
 10000000 to 15000000
 More than 15000000  

from bson.code import Code temp = db.world.map_reduce(

   map=Code("""function(){
                 var pop = this.population;
                 switch(true){
                   case pop<1000000:
                      emit("0 TO 1000000", 1);
                      break;
                   case pop<2000000:
                      emit("1000000 TO 2000000", 1);
                      break;
                   case pop<3000000:
                      emit("2000000 TO 3000000", 1);
                      break;
                   case pop<5000000:
                      emit("3000000 TO 5000000", 1);
                      break;
                   case pop<10000000:
                      emit("5000000 TO 10000000", 1);
                      break;
                   case pop<15000000:
                      emit("10000000 TO 15000000", 1);
                      break
                   case pop>15000000:
                      emit("MORE THAN 15000000", 1);
                      break;
                 }
}"""),
   reduce=Code("""function(key, values){ return Array.sum(values); }"""),      
   out={"inline":1})

pp.pprint(temp["results"])