|
|
Line 26: |
Line 26: |
| The reduce function has two inputs, for every distinct key emitted by map the reduce function is called with a list of the corresponding values. | | The reduce function has two inputs, for every distinct key emitted by map the reduce function is called with a list of the corresponding values. |
| | | |
− | ==How many countries in each continent== | + | ==emit all continents== |
| <div class=q data-lang="mongo"> | | <div class=q data-lang="mongo"> |
| This example returns the number of countries in each continent. | | This example returns the number of countries in each continent. |
| + | |
| <pre class=def> | | <pre class=def> |
| db.world.mapReduce( | | db.world.mapReduce( |
− | function(){emit(this.continent, 1);}, | + | function(){emit(this.continent, this.name);}, |
| function(k, v){ return v.length; } | | function(k, v){ return v.length; } |
− | out={"inline":1} | + | {out:{"inline":1}} |
| ) | | ) |
| </pre> | | </pre> |
| </div> | | </div> |
| | | |
− | <div class=q data-lang="py3"><code>query</code> can be used to filter the <b>input</b> documents to map.<br/> | + | ==emit only some continents== |
− | <p class="strong">Find the GDP for each continent, but only include data from countries that start with the letter A or B.</p>
| + | <div class=q data-lang="mongo"> |
− | <pre class=def>
| + | The map function may emit only sometimes. |
− | from bson.code import Code
| |
− | temp = db.world.map_reduce(
| |
− | query={"name": {"$regex":"^(A|B)"}},
| |
− | map=Code("function(){emit(this.continent, this.gdp)}"),
| |
− | reduce=Code("""function(key, values){
| |
− | return Array.sum(values)
| |
− | }
| |
− | """),
| |
− | out={"inline":1},
| |
− | )
| |
| | | |
− | pp.pprint(
| + | In the example we are only counting the countries that have a large population |
− | temp["results"]
| |
− | )
| |
− | </pre>
| |
− | <div class="ans">
| |
− | from bson.code import Code;temp = db.world.map_reduce(query={"name": {"$regex":"^(A|B)"}},map=Code("function(){emit(this.continent, this.gdp)}"),reduce=Code("function(key, values){return Array.sum(values)}"),out={"inline":1});import operator;pp.pprint(temp["results"])
| |
− | </div>
| |
− | </div>
| |
− | | |
− | | |
− | <div class=q data-lang="py3"><code>scope</code> takes in a <b>document</b>:<code>{}</code> and lets you create global variables.<br/>
| |
− | It's syntax is: <code>scope={}</code>.<br/>
| |
− | <p class="strong">Using <code>scope</code>, list all the countries with a higher population than Mexico.</p>
| |
| <pre class=def> | | <pre class=def> |
− | mexico_data = db.world.find_one({"name":"Mexico"})
| + | db.world.mapReduce( |
− | pp.pprint(mexico_data)
| + | function(){ |
− | | + | if (this.population>100000000) |
− | from bson.code import Code
| + | emit(this.continent, this.name);}, |
− | temp = db.world.map_reduce(
| + | function(k, v){ return v.length; } |
− | scope = {"MEXICO":mexico_data},
| + | {out:{"inline":1}} |
− | map = Code("""function(){
| |
− | if (this.population > MEXICO.population) emit(this.name, this.population)
| |
− | }
| |
− | """),
| |
− | reduce=Code("function(key, values){return values}"),
| |
− | out={"inline":1},
| |
− | )
| |
− | pp.pprint(
| |
− | temp["results"]
| |
− | )
| |
− | </pre>
| |
− | <div class="ans">
| |
− | mexico_data = db.world.find_one({"name":"Mexico"}); pp.pprint(mexico_data); from bson.code import Code; temp = db.world.map_reduce( scope={"MEXICO":mexico_data}, map=Code("function(){if (this.population > MEXICO.population) emit(this.name, this.population)}"), reduce=Code("function(key, values){return values}"), out={"inline":1});pp.pprint(temp['results'])
| |
− | </div>
| |
− | </div>
| |
− | | |
− | <div class=q data-lang="py3"><code>sort</code> and <code>limit</code><br/>
| |
− | Sort allows us to sort the <b>input</b> documents that are passed to <b>map</b><br/>Limit is self explanatory and also applies to the <b>input</b> documents that are passed to <b>map</b>
| |
− | <p class="strong">Get the five countries with the highest GDPs</p>
| |
− | <pre class=def>
| |
− | from bson.code import Code
| |
− | temp = db.world.map_reduce(
| |
− | query={"gdp":{"$ne":None}},
| |
− | sort={"gdp":-1},
| |
− | limit=5,
| |
− | map=Code("function(){emit(this.name, this.gdp)}"),
| |
− | reduce=Code("function(key, values){return values}"),
| |
− | out={"inline":1},
| |
− | )
| |
− | | |
− | pp.pprint(
| |
− | temp["results"]
| |
| ) | | ) |
| </pre> | | </pre> |
− | <div class="ans">
| |
− | from bson.code import Code; temp = db.world.map_reduce( query={"gdp":{"$ne":None}}, sort={"gdp":-1}, limit=5, map=Code("function(){emit(this.name, this.gdp)}"), reduce=Code("function(key, values){return values}"), out={"inline":1}, );pp.pprint(temp["results"])
| |
− | </div>
| |
− | </div>
| |
− |
| |
− | <div class=q data-lang="py3"><code>finalize</code> is an optional additional step that allows you to modify the data produce by <code>reduce</code><br/>
| |
− | <p class="strong">Show the top 15 countries by population, then show their population as a percentage of Mexico's population.</p>
| |
− | <pre class=def>
| |
− | mexico_data = db.world.find_one({"name":"Mexico"})
| |
− |
| |
− | from bson.code import Code
| |
− | temp = db.world.map_reduce(
| |
− | scope = {"MEXICO":mexico_data},
| |
− | query={"population":{"$ne":None}},
| |
− | sort={"population":-1},
| |
− | limit=15,
| |
− | map=Code("function(){emit(this.name, this.population)}"),
| |
− | reduce=Code("function(key, values){return values}"),
| |
− | out={"inline":1},
| |
− | finalize=Code("""function(key, values){
| |
− | return 100*(values/MEXICO.population)+"%"
| |
− | }
| |
− | """)
| |
− | )
| |
− |
| |
− | pp.pprint(
| |
− | temp["results"]
| |
− | )
| |
− | </pre>
| |
− | <div class="ans">
| |
− | mexico_data = db.world.find_one({"name":"Mexico"});from bson.code import Code; temp = db.world.map_reduce( scope = {"MEXICO":mexico_data}, query={"population":{"$ne":None}}, sort={"population":-1}, limit=15, map=Code("function(){emit(this.name, this.population)}"), reduce=Code("function(key, values){return values}"), out={"inline":1}, finalize=Code("""function(key, values){return 100*(values/MEXICO.population)+"%"} """) );pp.pprint(temp["results"] );
| |
− | </div>
| |
− | </div>
| |
− |
| |
− | <div class=q data-lang="py3">Rounding can also be performed by using JavaScript.<br/>
| |
− | <p class="strong">Show the top 15 countries by population, then show their population as a whole number percentage of Mexico's population.</p>
| |
− | <pre class=def>
| |
− | mexico_data = db.world.find_one({"name":"Mexico"})
| |
− |
| |
− | from bson.code import Code
| |
− | temp = db.world.map_reduce(
| |
− | scope = {"MEXICO":mexico_data},
| |
− | query={"population":{"$ne":None}},
| |
− | sort={"population":-1},
| |
− | limit=15,
| |
− | map=Code("function(){emit(this.name, this.population)}"),
| |
− | reduce=Code("function(key, values){return values}"),
| |
− | out={"inline":1},
| |
− | finalize=Code("""function(key, values){
| |
− | return Math.round(100*(values/MEXICO.population))+"%"
| |
− | }
| |
− | """)
| |
− | )
| |
− |
| |
− | pp.pprint(
| |
− | temp["results"]
| |
− | )
| |
− | </pre>
| |
− | <div class="ans">
| |
− | mexico_data = db.world.find_one({"name":"Mexico"});from bson.code import Code;temp=db.world.map_reduce(scope ={"MEXICO":mexico_data},query={"population":{"$ne":None}},sort={"population":-1},limit=15,map=Code("function(){emit(this.name,this.population)}"),reduce=Code("function(key, values){return values}"), out={"inline":1},finalize=Code("function(key,values){return Math.round(100*(values/MEXICO.population))+'%'}"));pp.pprint(temp["results"])
| |
− | </div>
| |
− | </div>
| |