2016-07-08 119 views
0

因此,我需要將房地產廣告放入nidax.json文件中。我轉到所有廣告頁面,並使用指向個別廣告的鏈接來獲取我需要的數據。我使用的是NodeJS Xray刮刀,但由於某種原因它不起作用。NodeJS Xray無法抓取到多個網站來抓取數據

有時它不會返回任何內容,有時它只返回單個廣告的鏈接。

var Xray = require('x-ray'); 
var x= Xray(); 
x('http://nidax-nekretnine.rs/nekretnine/','div.kutija-veca_dno > div.read-more` span ',[{ 
    url: '[email protected]' 
    items: x('div.kutija-veca_dno > div.read-more > span > [email protected]', { 
    location: 'body > div.contentarea-novo > div > div.info-part > div.one-third div.osnovni-podaci > p:nth-child(2) > span.orange-text', 
}), // follow link to google images 
}]).write('nidax.json'); 

回答

0

當以下pull request正在被批准時,您可以訂閱。

同時,我建議您將解決方案應用到您下載的X射線模塊中。這是一行代碼,我在兩個項目中測試過,它很簡單。看看在第237行的index.js文件,看到「返回」後長評論:

function WalkHTML (xray, selector, scope, filters) { 
    return function walkHTML ($, fn) { 
    walk(selector, function (v, k, next) { 
     if (typeof v === 'string') { 
     var value = resolve($, root(scope), v, filters) 
     return next(null, value) 
     } else if (typeof v === 'function') { 
     return v($, function (err, obj) { 
      if (err) return next(err) 
      return next(null, obj) 
     }) 
     } else if (isArray(v)) { 
     if (typeof v[0] === 'string') { 
      return next(null, resolve($, root(scope), v, filters)) 
     } else if (typeof v[0] === 'object') { 
      var $scope = $.find ? $.find(scope) : $(scope) 
      var pending = $scope.length 
      var out = [] 

      // Handle the empty result set (thanks @jenbennings!) 
      if (!pending) return next(null, out) 

      $scope.each(function (i, el) { 
      var $innerscope = $scope.eq(i) 
      var node = xray(scope, v[0]) 
      node($innerscope, function (err, obj) { 
       if (err) return next(err) 
       out[i] = obj 
       if (!--pending) { 
       return next(null, compact(out)) 
       } 
      }) 
      }) 
      // Nested crawling broken on 'master'. When to merge 'bugfix/nested-crawling' #111, Needed to exit this without calling next, the problem was that it returned to the "finished" callback before it had retrived all pending request. it should wait for "return next(null, compact(out))" 
      return 
     } 
     } 
     return next() 
    }, function (err, obj) { 
     if (err) return fn(err) 
     fn(null, obj, $) 
    }) 
    } 
}