2016-11-30 41 views
9

我在這裏發現了很多這方面的問題,但不知道爲什麼他們沒有回答。使用PhatomJs登錄後如何獲取下一頁?

我想與此代碼登錄後抓取網頁:source

var steps=[]; 
var testindex = 0; 
var loadInProgress = false;//This is set to true when a page is still loading 

/*********SETTINGS*********************/ 
var webPage = require('webpage'); 
var page = webPage.create(); 
page.settings.userAgent = 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/44.0.2403.157 Safari/537.36'; 
page.settings.javascriptEnabled = true; 
page.settings.loadImages = false;//Script is much faster with this field set to false 
phantom.cookiesEnabled = true; 
phantom.javascriptEnabled = true; 
/*********SETTINGS END*****************/ 

console.log('All settings loaded, start with execution'); 
page.onConsoleMessage = function(msg) { 
    console.log(msg); 
}; 
/**********DEFINE STEPS THAT FANTOM SHOULD DO***********************/ 
steps = [ 

    //Step 1 - Open Amazon home page 
    function(){ 
     console.log('Step 1 - Abrindo página de login'); 
     page.open("http://parceriascury.housecrm.com.br", function(status){ 

     }); 
    }, 
    //Step 3 - Populate and submit the login form 
    function(){ 
     console.log('Step 3 - Preenchendo o form'); 
     page.evaluate(function(){ 
      document.getElementById("login").value="xxxxx"; 
      document.getElementById("senha").value="xxxxx"; 
      document.getElementById("frmlandingpage").submit(); 
     }); 
    }, 
    //Step 4 - Wait Amazon to login user. After user is successfully logged in, user is redirected to home page. Content of the home page is saved to AmazonLoggedIn.html. You can find this file where phantomjs.exe file is. You can open this file using Chrome to ensure that you are logged in. 
    function(){ 
     console.log("Step 4 - Wait Amazon to login user. After user is successfully logged in, user is redirected to home page. Content of the home page is saved to AmazonLoggedIn.html. You can find this file where phantomjs.exe file is. You can open this file using Chrome to ensure that you are logged in."); 
     var fs = require('fs'); 
     var result = page.evaluate(function() { 
      return document.documentElement.outerHTML; 
     }); 
     fs.write('C:\\phantomjs\\logado_cury_10.html',result,'w'); 
    }, 
]; 
/**********END STEPS THAT FANTOM SHOULD DO***********************/ 

//Execute steps one by one 
interval = setInterval(executeRequestsStepByStep,5000); 

function executeRequestsStepByStep(){ 
    if (loadInProgress == false && typeof steps[testindex] == "function") { 
     //console.log("step " + (testindex + 1)); 
     steps[testindex](); 
     testindex++; 
    } 
    if (typeof steps[testindex] != "function") { 
     console.log("test complete!"); 
     phantom.exit(); 
    } 
} 

/** 
* These listeners are very important in order to phantom work properly. Using these listeners, we control loadInProgress marker which controls, weather a page is fully loaded. 
* Without this, we will get content of the page, even a page is not fully loaded. 
*/ 
page.onLoadStarted = function() { 
    loadInProgress = true; 
    console.log('Loading started'); 
}; 
page.onLoadFinished = function() { 
    loadInProgress = false; 
    console.log('Loading finished'); 
}; 
page.onConsoleMessage = function(msg) { 
    console.log(msg); 
}; 

但響應僅此:

<html><head></head><body>ok</body></html> 

我需要得到下一個頁面與內容URL:

http://parceriascury.housecrm.com.br/parceiro_busca 

我可以直接訪問這個頁面,但不能全部補充,因爲它需要登錄。

沒有錯誤,我不知道我在哪裏犯了一個錯誤。

編輯 其他的解決方案是值得歡迎的,我想也許curl ...但是JS加載後...

對不起我的英語不好。

+0

看起來像你得到認證後架,空頁面,只是說「OK」。添加一個計時器或一個選擇器更改,以確保您等待足夠長的時間重定向發生 – xShirase

+0

是的,看起來像這樣,但是當我用手動方法登錄,登錄後,重定向到另一個頁面,我不能看到'ok':'' http:// parceriascury.housecrm.com.br/parceiro_busca',你對我的成功有點小費? Tks for ... – MagicHat

+0

您可以添加[phantom.onError](http://phantomjs.org/api/phantom/handler/on-error.html)回調和[page.onError](http:// phantomjs。 org/api/webpage/handler/on-error.html) – 2016-11-30 14:07:28

回答

6

此代碼可能會更好:

var loadInProgress = false;//This is set to true when a page is still loading 

/*********SETTINGS*********************/ 
var page = require('webpage').create({viewportSize:{width: 1600,height: 900}, 
settings:{userAgent:'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/44.0.2403.157 Safari/537.36', 
javascriptEnabled:'true', 
loadImages:'false' 
}}); 
var fs = require('fs'); 
/*********SETTINGS END*****************/ 
console.log('All settings loaded, start with execution'); 

/** 
* These listeners are very important in order to phantom work properly. Using these listeners, we control loadInProgress marker which controls, weather a page is fully loaded. 
* Without this, we will get content of the page, even a page is not fully loaded. 
*/ 
page.onLoadStarted = function() { 
    loadInProgress = true; 
    console.log('Loading started'); 
}; 
page.onLoadFinished = function() { 
    loadInProgress = false; 
    console.log('Loading finished'); 
}; 
page.onConsoleMessage = function(msg) { 
    console.log(msg); 
}; 

//Log in to your account, then view the cookie you got, now you can use these cookies to login 
    // the site will recognize you with your cookies. 

//for freebitco.in auth 
phantom.cookies = [{// an array of objects 
    'name'  : 'btc_address', 
    'value' : '1AuMxR6sPtB2Z6TkahSnpmm1H4KpYPBKqe', 
    'domain' : 'freebitco.in',   
    'path'  : '/', 
    'httponly' : false, 
    'secure' : true, 
    'expires' : (new Date()).getTime() + (1000 * 60 * 60 * 43800) //5 years 
},{ 'name'  : 'password', 
    'value' : 'f574ca68a8650d1264d38da4b7687ca3bf631e6dfc59a98c89dd2564c7601f84', 
    'domain' : 'freebitco.in',   
    'path'  : '/', 
    'httponly' : false, 
    'secure' : true, 
    'expires' : (new Date()).getTime() + (1000 * 60 * 60 * 43800) }] 

//Execute steps one by one 
page.open("http://parceriascury.housecrm.com.br/parceiro_busca", function(status){ 
console.log('Step 1 has been completed - we are on the target page!'); 
setTimeout(step2,5000);// Maybe we don't need to wait here, we can execute step2 immediately. 
function step2(){ 
console.log("Step 2 - Content of the home page is saved to AmazonLoggedIn.html. You can find this file where phantomjs.exe file is. You can open this file using Chrome to ensure that you are logged in."); 
var result = page.evaluate(function(){ return document.documentElement.outerHTML; }); 
fs.write('C:\\phantomjs\\logado_cury_10.html',result,'w'); 
phantom.exit(); 
} 
}); 
+2

你能解釋爲什麼上面的代碼比OP發佈的更好嗎? – Sid

+2

男人tks的注意,但這一個不開始... – MagicHat

+0

@MagicHat哦,對不起。 錯誤在這裏:'console.log('第一步已經完成 - 我們在登錄頁面上!');' - '我們'不應該有一個撇號,或者至少應該是'我們' – 2016-11-30 07:32:42