日期:2014-05-16  浏览次数:20591 次

mongo 使用mapreduce统计
原始数据:
> db.sdk_counter.find();
{ "_id" : ObjectId("53395c10cb8d5d3a6b878f01"), "stat_date" : "2014-03-25", "show_count" : 4, "click_count" : 0 }
{ "_id" : ObjectId("53395c14cb8d5d3a6b878f02"), "stat_date" : "2014-03-23", "show_count" : 4, "click_count" : 0 }
{ "_id" : ObjectId("53395c1ccb8d5d3a6b878f03"), "stat_date" : "2014-03-24", "show_count" : 4, "click_count" : 10 }
{ "_id" : ObjectId("53395c23cb8d5d3a6b878f04"), "stat_date" : "2014-03-24", "show_count" : 3, "click_count" : 1 }

字段含义:
stat_date:日期
show_count:展示量
click_count:点击量

一、实现功能:分年、分月、分日期、分年和月查看展示量和点击量
==================================
var m = function(){
var year = this.stat_date.substr(0,4);
var month = this.stat_date.substr(5,2);
var day = this.stat_date.substr(8,2);
var yearAndMonth = this.stat_date.substr(0,7);
//此处填写year/month/day/yearAndMonth即可以实现功能
emit(yearAndMonth,{show_count:this.show_count,click_count:this.click_count});
};
var r = function(key,values){
    var show = 0;
    var click = 0;
    for(var i in values){
      show += values[i].show_count;
      click += values[i].click_count;
    }
    return {"show_count":show,"click_count":click};
};
db.sdk_counter.mapReduce(m,r,"sdk_counter_tmp");
db.sdk_counter_tmp.find();

按月份> db.sdk_counter_tmp.find();
{ "_id" : "03", "value" : { "show_count" : 15, "click_count" : 11 } }
按年份> db.sdk_counter_tmp.find();
{ "_id" : "2014", "value" : { "show_count" : 15, "click_count" : 11 } }
按日期> db.sdk_counter_tmp.find();
{ "_id" : "23", "value" : { "show_count" : 4, "click_count" : 0 } }
{ "_id" : "24", "value" : { "show_count" : 7, "click_count" : 11 } }
{ "_id" : "25", "value" : { "show_count" : 4, "click_count" : 0 } }


二、算总量
var m = function(){emit(1,{show_count:this.show_count,click_count:this.click_count})};
var r = function(key,values){
    var show = 0;
    var click = 0;
    for(var i in values){
      show += values[i].show_count;
      click += values[i].click_count;
    }
    return {"show_count":show,"click_count":click};
};
db.sdk_counter.mapReduce(m,r,"sdk_counter_tmp");
db.sdk_counter_tmp.find();
> db.imacha_sdk_counter_tmp.find();
{ "_id" : 1, "value" : { "show_count" : 15, "click_count" : 11 } }

总结:mongo使用mapreduce实现类似mysql中分组统计的需求,关键是找对分组key,而其map中的emit函数的key参数至关重要,它是字段值而不是字段!