0
我有一個功能,登錄到一個網站,並搜索下一頁中的字符串。這個過程目前需要10秒鐘,但是想看看我能做些什麼來加速它。我想知道是否有可能讓curl登錄持續在客戶端會話或者可能更好地搜索文檔。加速cURL頁面登錄和刮
public function curlLogin($url, $post_values, $cookieJar) {
$timeout = 30;
$curl_connection = curl_init();
curl_setopt($curl_connection, CURLOPT_URL, $url);
curl_setopt($curl_connection, CURLOPT_TIMEOUT, $timeout);
curl_setopt($curl_connection, CURLOPT_USERAGENT,"Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1)");
curl_setopt($curl_connection, CURLOPT_COOKIEJAR, $cookieJar);
curl_setopt($curl_connection, CURLOPT_COOKIEFILE, $cookieJar);
curl_setopt($curl_connection, CURLOPT_COOKIESESSION, 0);
curl_setopt($curl_connection, CURLOPT_HEADER, 1);
curl_setopt($curl_connection, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($curl_connection, CURLOPT_SSL_VERIFYPEER, 0);
curl_setopt($curl_connection, CURLOPT_POST, 1);
curl_setopt($curl_connection, CURLOPT_POSTFIELDS, $post_values);
curl_setopt($curl_connection, CURLOPT_HTTPHEADER,
array("Content-type: application/x-www-form-urlencoded"));
curl_exec($curl_connection);
return $curl_connection;
}
public function curlPost($curl_connection, $url, $post_values, $cookieJar) {
$timeout = 30;
curl_setopt($curl_connection, CURLOPT_URL, $url);
curl_setopt($curl_connection, CURLOPT_TIMEOUT, $timeout);
curl_setopt($curl_connection, CURLOPT_USERAGENT,"Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1)");
curl_setopt($curl_connection, CURLOPT_COOKIEJAR, $cookieJar);
curl_setopt($curl_connection, CURLOPT_COOKIEFILE, $cookieJar);
curl_setopt($curl_connection, CURLOPT_COOKIESESSION, 0);
curl_setopt($curl_connection, CURLOPT_HEADER, 1);
curl_setopt($curl_connection, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($curl_connection, CURLOPT_SSL_VERIFYPEER, 0);
curl_setopt($curl_connection, CURLOPT_POST, 1);
curl_setopt($curl_connection, CURLOPT_POSTFIELDS, $post_values);
curl_setopt($curl_connection, CURLOPT_HTTPHEADER,
array("Content-type: application/x-www-form-urlencoded"));
$result = curl_exec($curl_connection);
return $result;
}
$cookieJar = tempnam ("/tmp", "CURLCOOKIE");
$curl_connection = $this->curlLogin($login_url, $post_values, $cookieJar);
$result = $this->curlPost($curl_connection, $next_url, $params, $cookieJar);
if (strpos($result,'string 1') > 0) {
$success = true;
$message = 'string 1 is present';
}else if (strpos($result,'string 2') > 0){
$success = false;
$message = 'string 2 is present';
}else if (strpos($result,'string 3') > 0){
$success = false;
$message = 'string 3 is present';
}else{
$success = false;
$message = 'None of the above strings are present.';
}
curl_close($curl_connection);
unlink($cookieJar);
可能的重複的[php - 最快的方式來檢查存在的文本在很多領域(1000以上)](http://stackoverflow.com/questions/12891689/php-fastest-way-to-check-presence-of - 如何在PHP中的curl請求期間防止服務器重載)(http://stackoverflow.com/questions/13461194/how-to-prevent-server-from-text-in-many-domains-above-1000) overload-curl-requests-in-php/13461652),[php從url獲取所有圖像,寬度和高度> = 200更快](http://stackoverflow.com/a/10036599/1226894) – Baba 2013-05-02 18:07:16
當你用firebug在firefox中加載這些頁面,你的頁面加載時間表示什麼? – Zak 2013-05-02 18:07:32
搜索子串與其他搜索相比非常快,通過查找這個方向你不會獲得任何東西。 – mzedeler 2013-05-02 18:07:43