1
MongoDB的新手問題:在「日誌」收集使用MapReduce的生成HTTP流
我有很多HTTP日誌的存儲與數據結構如下集合:
{
'client': {
'ip_address': '1.2.3.4',
'referrer':"http://....",
'user_agent':'Mozilla..."
},
'request':{
"stream": "stream1",
"method": "GET",
"fragment_id": 97,
"date": 13482181,
'response':{
'status':200,
'size': 654
}
}
的每個文檔介紹一個HTTP請求(從客戶端到內容流)。由於每個流分割成更小的碎片,我想用我的收藏「MapReduce的」,然後創建一個「通用流請求」的文件,如下圖所示:
{
'client_ip': '1.2.3.4',
'user_agent': 'Mozilla',
'streams':[
{
'stream':"stream1",
'referrer':'http://...',
'requests':[
{
'fragment_id':97,
'status':200,
'date': 13482181,
'size': 654
...
},
{
'fragment_id':98,
'status':200,
'date': 13482192,
'size': 624
...
}, [...]
]
}, [...]
]
這裏是我的嘗試:
map = function(){
emit({client_ip:this.client.ip,user_agent:this.client.user_agent},{
stream:this.request.stream,
referrer:this.client.referer,
status:this.response.status,
date:this.request.date,
size:this.response.total_size,
fragment_id:this.request.fragment_infos[1]
});
}
reduce = function(key,values){
r = {'count':0,'request':[]};
values.forEach(function(v){
r.count += 1;
r.request.push(v);
});
return r;
}
但這裏是我得到的結果:
"_id" : {
"client_ip" : "1.2.3.4",
"user_agent" : "Mozilla\/4.0"
},
"value" : {
"client_ip" : "1.2.3.4",
"user_agent" : "Mozilla\/4.0",
"count" : 17,
"request" : {
"0" : {
"client_ip" : "1.2.3.4",
"user_agent" : "Mozilla\/4.0",
"count" : 2,
"request" : {
"0" : {
"stream" : "stream1.isml",
"referrer" : null,
"status" : 200,
"date" : 1341706566,
"size" : 456,
"fragment_id" : null,
"count" : 1
},
"1" : {
"stream" : "stream1.isml",
"referrer" : null,
"status" : 200,
"date" : 1341706566,
"size" : null,
"fragment_id" : null,
"count" : 1
}
}
},
"1" : {
"client_ip" : "1.2.3.4",
"user_agent" : "Mozilla\/4.0",
"count" : 3,
"request" : {
"0" : {
"client_ip" : "1.2.3.4",
"user_agent" : "Mozilla\/4.0",
"count" : 2,
"request" : {
"0" : {
"stream" : "stream1.isml",
"referrer" : null,
"status" : 200,
"date" : 1341706568,
"size" : null,
"fragment_id" : null,
"count" : 1
.........
我在哪裏錯了?