2017-08-13 74 views
0

目標是貫穿約10,000條鏈接。確定哪些頁面編號> 3並突出顯示第一列。我已經完成了所有這些工作,但問題是它需要Url Fetch時間過長,我遇到了最大運行時錯誤。無論如何,我可以加快這個代碼,所以我可以通過10,000行?加速UrlFetch Google App腳本?

function readColumns() { 
    //program is going to run through column 3 by going through the amount of rows, truncating last three characters to see if pdf, then highlighting first column 
    var sheet = SpreadsheetApp.getActiveSheet(); 
    var columns = sheet.getDataRange(); 
    var rowNum = columns.getNumRows(); 
    var values = columns.getValues(); 
    var html; 
    var htmlString; 

    for(var i = 1; i <= rowNum; i++){ 
    var columnLogger = values[i][2]; 
    try{ 
     html = UrlFetchApp.fetch(values[i][2], 
     { 
     muteHttpExceptions: true, 
     } 
    ); 
    }catch(e){ 
     Logger.log("Error at line " + i); 
     var error = true; 
    } 
    htmlString = html.getContentText(); 
    var index = htmlString.indexOf("Pages") + 6; 
    var pageNumber = parseInt(htmlString.charAt(index),10); 

    var lastChars = "" + columnLogger.charAt(columnLogger.length-3) + columnLogger.charAt(columnLogger.length-2) + columnLogger.charAt(columnLogger.length-1); 

    if((error) || (!lastChars.equals("pdf") && values[i][6].equals("") && !pageNumber >= 3)){ 

     //goes back to first column and highlights yellow 
     var cellRange = sheet.getRange(1, 1, rowNum, 3) 
     var cell = cellRange.getCell(i+1, 1) 
     cell.setBackground("yellow"); 
    } 


    } 


} 

編輯 - 短腳本:有了這個

var lastChars = "" + columnLogger.charAt(columnLogger.length-3) + columnLogger.charAt(columnLogger.length-2) + columnLogger.charAt(columnLogger.length-1); 

function foreverCall(){ 
    var start = 1480; 

    for(;;){ 
    readColumns(start); 
    start = start + 100; 
    } 

} 


function readColumns(start) { 
    //program is going to run through column 3 by going through the amount of rows, truncating last three characters to see if pdf, then highlighting first column 
    var sheet = SpreadsheetApp.getActiveSheet(); 
    var columns = sheet.getDataRange(); 
    var rowNum = columns.getNumRows(); 
    var values = columns.getValues(); 
    var html; 
    var htmlString; 
    var error; 

    for(var i = start; i < start+100; i++){ 
    if(loop(values, error, html, htmlString, rowNum, sheet, columns, i)){ 

     var cellRange = sheet.getRange(1, 1, rowNum, 3) 
     var cell = cellRange.getCell(i, 1) 
     cell.setBackground("yellow"); 

    } 
    } 


} 

function loop(values, error, html, htmlString, rowNum, sheet, columns, i){ 
    var columnLogger = values[i][2]; 


    var lastChars = columnLogger.slice(-4); 

    if(!lastChars.equals(".pdf") && values[i][6].equals("")){ 


     return true; 


    }else{ 

     try{ 
     error = false 
     html = UrlFetchApp.fetch(values[i][2].toString()); 
     if(html == null){ 
      error = true; 
     } 
     }catch(e){ 
     Logger.log("Error at line " + i); 
     error = true; 
     } 
     if(!error){ 
     htmlString = html.getContentText(); 
     var index = htmlString.indexOf("Pages") + 6; 
     var pageNumber = parseInt(htmlString.charAt(index),10); 

     } 
     //goes back to first column and highlights yellow 
     if(error || !pageNumber >= 3){ 
     return true; 
     } 
    } 

    return false; 

} 

回答

2

您可以更換該

var lastChars = columnLogger.slice(-3); 

您也可以從html側邊欄或對話框啓動抓取腳本來運行短批次,然後返回到成功處理程序,然後根據返回值啓動另一批次。返回值也可以用來在下一行開始下一批。它實際上需要更長的時間才能運行,但通過保持批量很小,您可以保持在腳本限制之下。

enter image description here

+0

感謝您的回答。所以,我真的只需要這個URL抓取應用程序的前50個字符,但它必須每次加載整個頁面。無論如何,爲了加快執行時間,使它只加載少量的html?也許某種超時(即20毫秒)會讓它跳過這個命令?這將希望截斷html。 –

+0

我真的不知道,但我想知道如果你也許可以禁用JavaScript和或cookie的抓取。您可能會得到明顯不同的頁面,但您可能能夠獲取所需的文本。真的,這是我的總猜測。 – Cooper

+0

好吧,沒關係。這是你建議將它保持在腳本限制之下嗎?我編輯了問題 –

0

可以與線替換

變種lastChars = columnLogger.slice(-3);

+0

你介意在代碼之前用四個空格格式化代碼,以便它在瀏覽器中更具可讀性,也許可以解釋爲什麼這會起作用? – PaSTE