日期:2014-05-16  浏览次数:20425 次

MongoDB 聚合

1.count

db.user.find().count();
db.user.count();
db.user.count({"name": "路人甲0"});

2.distinct

//语法
db.collection.distinct( key )
//e.g.
db.user.distinct("name");

3.group

//语法
db.collection.group( { [key|$keyf] : ...[, [cond/condition]: ...], initial: ..., reduce : ...[, finalize: ...] } );
db.runCommand({"group": { "ns": ..., [key|$keyf] : ...[, [cond/condition]: ...], initial: ..., $reduce : ...[, finalize: ...] }});
key: 分组依据的键
$keyf: 分组函数
cond/condition: 过滤条件
initial: 每一分组的初始化状态,将传递给reduce函数
reduce/$reduce: 累加器,系统会传递两个参数,当前文档和当前文档所属分组的累加结果
finalize: 对分组结果进行调整,参数为每个分组的累加结果
e.g.
db.user.group({
	"key": {"name": 1, "age": 1},
	"cond": {"age": {"$gt": 3}},
	"initial": {"count": 0},
	"reduce": function(doc, result) {
		result.count++;
	}
});

db.user.group({
	"$keyf": function(doc) {return {"name": doc.name.substr(2)};},
	"condition": {"age": {"$gt": 3}},
	"initial": {"count": 0},
	"reduce": function(doc, result) {
		result.count++;
	}
});

db.user.group({
	"key": {"name": 1},
	"condition": {"age": {"$gt": 3}},
	"initial": {"count": 0},
	"reduce": function(doc, result) {
		result.count++;
	},
	"finalize": function(result){
		result.name = result.name.substr(2);
	}
});

4.MapReduce

语法

db.collection.mapReduce( mapFunction , reduceFunction , <optional params> );
mapFunction: map函数
reduceFunction: reduce函数,一定要能被反复调用
<optional params>:
--out: 存放结果的集合名
--query: 过滤条件

db.runCommand({"mapreduce": ..., "map": ..., "reduce": ...,, <optional params> });
mapreduce:集合名
map:map函数
reduce:reduce函数
<optional params>:
--finalize
--keeptemp:连接关闭时结果集合是否保存?
--output:存放结果的集合名(隐含着keeptemp: true)
--query:
--sort:在发送map前是否先给文档排序
--limit:发往map函数的文档数量的上限
--scope:javascript中要用到的变量
--verbose:是否产生更加详尽的服务器日志

e.g.

db.user.mapReduce(
function(){
	for (var key in this) {
		emit(key, {"count": 1});
	}
},
function(key, values){
	var count = 0;
	for (var value in values) {
		count++;
	}
	return {"count": count};
},
{
	"out": "key_count_temp",
	"query": {"age": {"$gt": 3}}
});

结果:

{
        "result" : "key_count_temp",
        "timeMillis" : 162,
        "counts" : {
                "input" : 50,
                "emit" : 250,
                "reduce" : 5,
                "output" : 5
        },
        "ok" : 1,
}
result:存放结果的集合名
timeMillis:操作花费的时间
counts:
--input:发送到map函数的文档个数
--emit:在map函数中emit被调用的次数
--reduce:reduce被调用的次数
--output:结果集合中创建的文档数量