|
|
Line 26: |
Line 26: |
| The reduce function has two inputs, for every distinct key emitted by map the reduce function is called with a list of the corresponding values. | | The reduce function has two inputs, for every distinct key emitted by map the reduce function is called with a list of the corresponding values. |
|
| |
|
| ==How many countries in each continent== | | ==emit all continents== |
| <div class=q data-lang="mongo"> | | <div class=q data-lang="mongo"> |
| This example returns the number of countries in each continent. | | This example returns the number of countries in each continent. |
| | |
| <pre class=def> | | <pre class=def> |
| db.world.mapReduce( | | db.world.mapReduce( |
| function(){emit(this.continent, 1);}, | | function(){emit(this.continent, this.name);}, |
| function(k, v){ return v.length; } | | function(k, v){ return v.length; } |
| out={"inline":1} | | {out:{"inline":1}} |
| ) | | ) |
| </pre> | | </pre> |
| </div> | | </div> |
|
| |
|
| <div class=q data-lang="py3"><code>query</code> can be used to filter the <b>input</b> documents to map.<br/> | | ==emit only some continents== |
| <p class="strong">Find the GDP for each continent, but only include data from countries that start with the letter A or B.</p>
| | <div class=q data-lang="mongo"> |
| <pre class=def>
| | The map function may emit only sometimes. |
| from bson.code import Code
| |
| temp = db.world.map_reduce(
| |
| query={"name": {"$regex":"^(A|B)"}},
| |
| map=Code("function(){emit(this.continent, this.gdp)}"),
| |
| reduce=Code("""function(key, values){
| |
| return Array.sum(values)
| |
| }
| |
| """),
| |
| out={"inline":1},
| |
| )
| |
|
| |
|
| pp.pprint(
| | In the example we are only counting the countries that have a large population |
| temp["results"]
| |
| )
| |
| </pre>
| |
| <div class="ans">
| |
| from bson.code import Code;temp = db.world.map_reduce(query={"name": {"$regex":"^(A|B)"}},map=Code("function(){emit(this.continent, this.gdp)}"),reduce=Code("function(key, values){return Array.sum(values)}"),out={"inline":1});import operator;pp.pprint(temp["results"])
| |
| </div>
| |
| </div>
| |
| | |
| | |
| <div class=q data-lang="py3"><code>scope</code> takes in a <b>document</b>:<code>{}</code> and lets you create global variables.<br/>
| |
| It's syntax is: <code>scope={}</code>.<br/>
| |
| <p class="strong">Using <code>scope</code>, list all the countries with a higher population than Mexico.</p>
| |
| <pre class=def> | | <pre class=def> |
| mexico_data = db.world.find_one({"name":"Mexico"})
| | db.world.mapReduce( |
| pp.pprint(mexico_data)
| | function(){ |
| | | if (this.population>100000000) |
| from bson.code import Code
| | emit(this.continent, this.name);}, |
| temp = db.world.map_reduce(
| | function(k, v){ return v.length; } |
| scope = {"MEXICO":mexico_data},
| | {out:{"inline":1}} |
| map = Code("""function(){
| |
| if (this.population > MEXICO.population) emit(this.name, this.population)
| |
| }
| |
| """),
| |
| reduce=Code("function(key, values){return values}"),
| |
| out={"inline":1},
| |
| )
| |
| pp.pprint(
| |
| temp["results"]
| |
| )
| |
| </pre>
| |
| <div class="ans">
| |
| mexico_data = db.world.find_one({"name":"Mexico"}); pp.pprint(mexico_data); from bson.code import Code; temp = db.world.map_reduce( scope={"MEXICO":mexico_data}, map=Code("function(){if (this.population > MEXICO.population) emit(this.name, this.population)}"), reduce=Code("function(key, values){return values}"), out={"inline":1});pp.pprint(temp['results'])
| |
| </div>
| |
| </div>
| |
| | |
| <div class=q data-lang="py3"><code>sort</code> and <code>limit</code><br/>
| |
| Sort allows us to sort the <b>input</b> documents that are passed to <b>map</b><br/>Limit is self explanatory and also applies to the <b>input</b> documents that are passed to <b>map</b>
| |
| <p class="strong">Get the five countries with the highest GDPs</p>
| |
| <pre class=def>
| |
| from bson.code import Code
| |
| temp = db.world.map_reduce(
| |
| query={"gdp":{"$ne":None}},
| |
| sort={"gdp":-1},
| |
| limit=5,
| |
| map=Code("function(){emit(this.name, this.gdp)}"),
| |
| reduce=Code("function(key, values){return values}"),
| |
| out={"inline":1},
| |
| )
| |
| | |
| pp.pprint(
| |
| temp["results"]
| |
| ) | | ) |
| </pre> | | </pre> |
| <div class="ans">
| |
| from bson.code import Code; temp = db.world.map_reduce( query={"gdp":{"$ne":None}}, sort={"gdp":-1}, limit=5, map=Code("function(){emit(this.name, this.gdp)}"), reduce=Code("function(key, values){return values}"), out={"inline":1}, );pp.pprint(temp["results"])
| |
| </div>
| |
| </div>
| |
|
| |
| <div class=q data-lang="py3"><code>finalize</code> is an optional additional step that allows you to modify the data produce by <code>reduce</code><br/>
| |
| <p class="strong">Show the top 15 countries by population, then show their population as a percentage of Mexico's population.</p>
| |
| <pre class=def>
| |
| mexico_data = db.world.find_one({"name":"Mexico"})
| |
|
| |
| from bson.code import Code
| |
| temp = db.world.map_reduce(
| |
| scope = {"MEXICO":mexico_data},
| |
| query={"population":{"$ne":None}},
| |
| sort={"population":-1},
| |
| limit=15,
| |
| map=Code("function(){emit(this.name, this.population)}"),
| |
| reduce=Code("function(key, values){return values}"),
| |
| out={"inline":1},
| |
| finalize=Code("""function(key, values){
| |
| return 100*(values/MEXICO.population)+"%"
| |
| }
| |
| """)
| |
| )
| |
|
| |
| pp.pprint(
| |
| temp["results"]
| |
| )
| |
| </pre>
| |
| <div class="ans">
| |
| mexico_data = db.world.find_one({"name":"Mexico"});from bson.code import Code; temp = db.world.map_reduce( scope = {"MEXICO":mexico_data}, query={"population":{"$ne":None}}, sort={"population":-1}, limit=15, map=Code("function(){emit(this.name, this.population)}"), reduce=Code("function(key, values){return values}"), out={"inline":1}, finalize=Code("""function(key, values){return 100*(values/MEXICO.population)+"%"} """) );pp.pprint(temp["results"] );
| |
| </div>
| |
| </div>
| |
|
| |
| <div class=q data-lang="py3">Rounding can also be performed by using JavaScript.<br/>
| |
| <p class="strong">Show the top 15 countries by population, then show their population as a whole number percentage of Mexico's population.</p>
| |
| <pre class=def>
| |
| mexico_data = db.world.find_one({"name":"Mexico"})
| |
|
| |
| from bson.code import Code
| |
| temp = db.world.map_reduce(
| |
| scope = {"MEXICO":mexico_data},
| |
| query={"population":{"$ne":None}},
| |
| sort={"population":-1},
| |
| limit=15,
| |
| map=Code("function(){emit(this.name, this.population)}"),
| |
| reduce=Code("function(key, values){return values}"),
| |
| out={"inline":1},
| |
| finalize=Code("""function(key, values){
| |
| return Math.round(100*(values/MEXICO.population))+"%"
| |
| }
| |
| """)
| |
| )
| |
|
| |
| pp.pprint(
| |
| temp["results"]
| |
| )
| |
| </pre>
| |
| <div class="ans">
| |
| mexico_data = db.world.find_one({"name":"Mexico"});from bson.code import Code;temp=db.world.map_reduce(scope ={"MEXICO":mexico_data},query={"population":{"$ne":None}},sort={"population":-1},limit=15,map=Code("function(){emit(this.name,this.population)}"),reduce=Code("function(key, values){return values}"), out={"inline":1},finalize=Code("function(key,values){return Math.round(100*(values/MEXICO.population))+'%'}"));pp.pprint(temp["results"])
| |
| </div>
| |
| </div>
| |