2014-11-22 39 views
1

我使用HTTP模塊在Node.js的這個簡單的HTMLParser:Node.js的HTMLParser的迭代一次以上

var http = require('http'); 
var options = { 
    hostname: 'www.google.com', 
    port: 80, 
    path: '/', 
    method: 'GET' 
}; 

var req = http.request(options, function(res) { 
    res.setEncoding('utf8'); 
    res.on('data', function (chunk) { 
    var title1 = chunk.indexOf("<title>"); 
    var title2 = chunk.indexOf("</title>"); 
    var titl = chunk.substring(title1 + 7); 
    var result = titl.substring(0, titl.indexOf("</title>")); 
    console.log("Title is : " + result); 
    }); 
req.end(); 
}); 

req.on('error', function(e) { 
    console.log('problem with request: ' + e.message); 
}); 

req.end(); 

和執行時,迭代不止一次,所以我得到的命令該輸出線,它會變化,但總是迭代不止一次。

Title is: Google 
Title is: 
Title is: 
Title is: 
Title is: 
Title is: 
Title is: 
Title is: 
Title is: 
Title is: 
Title is: 
Title is: 
Title is: 
Title is: 
Title is: 
Title is: 
Title is: 
Title is: 
Title is: 
Title is: 
Title is: 
Title is: 
Title is: 
Title is: 
Title is: 
Title is: 
Title is: 
Title is: 
Title is: 
Title is: 

任何幫助?提前致謝!

+0

FWIW,如果你的最終目標是有一個實際可用的解析器,你可能會更好用像['cheerio'](https://github.com/cheeriojs/cheerio)。 – mscdex 2014-11-22 00:47:00

+0

請參閱http://stackoverflow.com/a/7373003/1481489關於解析HTML的更好方法的信息 – zamnuts 2014-11-22 00:47:44

回答

0

當信息傳入時,data事件可能觸發不止一次。您需要將傳入的Buffer(即chunk)存儲到您自己的緩衝區中,並在響應完成時進行分析。這就是爲什麼它被稱爲 - 它是部分數據。

var req = http.request(options, function(res) { 
    res.setEncoding('utf8'); 
    var content = ''; 
    res.on('data', function (chunk) { 
    content += chunk; // concatenate incoming data chunk to a response buffer 
    }); 
    res.once('end', function() { // once the response has ended (it is complete) 
    var title1 = content.indexOf("<title>"); // parse 
    var title2 = content.indexOf("</title>"); 
    var titl = content.substring(title1 + 7); 
    var result = titl.substring(0, titl.indexOf("</title>")); 
    console.log("Title is : " + result); 
    }); 
}); 

您可能還需要清理的響應事件data處理程序上end還有:

function storeChunk(chunk) { 
    content += chunk; 
} 
res.on('data',storeChunk); 
res.once('end',function() { 
    res.removeListener('data',storeChunk); 
    // ... 
}); 
+0

「將多次觸發」有點強烈。它*可能*不止一次發射。 – mscdex 2014-11-22 00:47:43

+0

@mscdex正確,更新了措辭 – zamnuts 2014-11-22 00:48:44