目標是貫穿約10,000條鏈接。確定哪些頁面編號> 3並突出顯示第一列。我已經完成了所有這些工作,但問題是它需要Url Fetch時間過長,我遇到了最大運行時錯誤。無論如何,我可以加快這個代碼,所以我可以通過10,000行?加速UrlFetch Google App腳本?
function readColumns() {
//program is going to run through column 3 by going through the amount of rows, truncating last three characters to see if pdf, then highlighting first column
var sheet = SpreadsheetApp.getActiveSheet();
var columns = sheet.getDataRange();
var rowNum = columns.getNumRows();
var values = columns.getValues();
var html;
var htmlString;
for(var i = 1; i <= rowNum; i++){
var columnLogger = values[i][2];
try{
html = UrlFetchApp.fetch(values[i][2],
{
muteHttpExceptions: true,
}
);
}catch(e){
Logger.log("Error at line " + i);
var error = true;
}
htmlString = html.getContentText();
var index = htmlString.indexOf("Pages") + 6;
var pageNumber = parseInt(htmlString.charAt(index),10);
var lastChars = "" + columnLogger.charAt(columnLogger.length-3) + columnLogger.charAt(columnLogger.length-2) + columnLogger.charAt(columnLogger.length-1);
if((error) || (!lastChars.equals("pdf") && values[i][6].equals("") && !pageNumber >= 3)){
//goes back to first column and highlights yellow
var cellRange = sheet.getRange(1, 1, rowNum, 3)
var cell = cellRange.getCell(i+1, 1)
cell.setBackground("yellow");
}
}
}
編輯 - 短腳本:有了這個
var lastChars = "" + columnLogger.charAt(columnLogger.length-3) + columnLogger.charAt(columnLogger.length-2) + columnLogger.charAt(columnLogger.length-1);
:
function foreverCall(){
var start = 1480;
for(;;){
readColumns(start);
start = start + 100;
}
}
function readColumns(start) {
//program is going to run through column 3 by going through the amount of rows, truncating last three characters to see if pdf, then highlighting first column
var sheet = SpreadsheetApp.getActiveSheet();
var columns = sheet.getDataRange();
var rowNum = columns.getNumRows();
var values = columns.getValues();
var html;
var htmlString;
var error;
for(var i = start; i < start+100; i++){
if(loop(values, error, html, htmlString, rowNum, sheet, columns, i)){
var cellRange = sheet.getRange(1, 1, rowNum, 3)
var cell = cellRange.getCell(i, 1)
cell.setBackground("yellow");
}
}
}
function loop(values, error, html, htmlString, rowNum, sheet, columns, i){
var columnLogger = values[i][2];
var lastChars = columnLogger.slice(-4);
if(!lastChars.equals(".pdf") && values[i][6].equals("")){
return true;
}else{
try{
error = false
html = UrlFetchApp.fetch(values[i][2].toString());
if(html == null){
error = true;
}
}catch(e){
Logger.log("Error at line " + i);
error = true;
}
if(!error){
htmlString = html.getContentText();
var index = htmlString.indexOf("Pages") + 6;
var pageNumber = parseInt(htmlString.charAt(index),10);
}
//goes back to first column and highlights yellow
if(error || !pageNumber >= 3){
return true;
}
}
return false;
}
感謝您的回答。所以,我真的只需要這個URL抓取應用程序的前50個字符,但它必須每次加載整個頁面。無論如何,爲了加快執行時間,使它只加載少量的html?也許某種超時(即20毫秒)會讓它跳過這個命令?這將希望截斷html。 –
我真的不知道,但我想知道如果你也許可以禁用JavaScript和或cookie的抓取。您可能會得到明顯不同的頁面,但您可能能夠獲取所需的文本。真的,這是我的總猜測。 – Cooper
好吧,沒關係。這是你建議將它保持在腳本限制之下嗎?我編輯了問題 –