2011-12-16 97 views
6

我正在嘗試編碼重定向檢查器,以檢查URL是否對搜索引擎友好。它必須檢查URL是否被重定向,如果被重定向,則必須告知它是否適用於搜索引擎優化(301狀態碼)或不是(302/304)。使用HTTP標頭和curl查找URL重定向?

下面是類似的東西,我發現:http://www.webconfs.com/redirect-check.php

還應該能夠按照多重定向(例如,從A到B到C),並告訴我,一個重定向到C.

這是我得到了什麼,到目前爲止,但它不工作很正確(例如:鍵入www.example.com,而它不找到重定向到www.example.com/page1)

<?php 
// You can edit the messages of the respective code over here 
$httpcode = array(); 
$httpcode["200"] = "Ok"; 
$httpcode["201"] = "Created"; 
$httpcode["302"] = "Found"; 
$httpcode["301"] = "Moved Permanently"; 
$httpcode["304"] = "Not Modified"; 
$httpcode["400"] = "Bad Request"; 


if(count($_POST)>0) 
{ 
    $url = $_POST["url"]; 
    $curlurl = "http://".$url."/"; 
    $ch = curl_init(); 
    // Set URL to download 
    curl_setopt($ch, CURLOPT_URL, $curlurl); 

    // User agent 
    curl_setopt($ch, CURLOPT_USERAGENT, $_SERVER["HTTP_USER_AGENT"]); 
    // Include header in result? (0 = yes, 1 = no) 
    curl_setopt($ch, CURLOPT_HEADER, 0); 

    // Should cURL return or print out the data? (true = return, false = print) 
    curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); 

    // Timeout in seconds 
    curl_setopt($ch, CURLOPT_TIMEOUT, 15); 

    // Download the given URL, and return output 
    $output = curl_exec($ch); 

    $curlinfo = curl_getinfo($ch); 

    if(($curlinfo["http_code"]=="301") || ($curlinfo["http_code"]=="302")) 
    { 
     $ch = curl_init(); 
     // Set URL to download 
     curl_setopt($ch, CURLOPT_URL, $curlurl); 

     // User agent 
     curl_setopt($ch, CURLOPT_USERAGENT, $_SERVER["HTTP_USER_AGENT"]); 
     // Include header in result? (0 = yes, 1 = no) 
     curl_setopt($ch, CURLOPT_HEADER, 0); 

     // Should cURL return or print out the data? (true = return, false = print) 
     curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); 

     // Timeout in seconds 
     curl_setopt($ch, CURLOPT_TIMEOUT, 15); 


     curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true); 
     // Download the given URL, and return output 
     $output = curl_exec($ch); 

     $curlinfo = curl_getinfo($ch); 
     echo $url." is redirected to ".$curlinfo["url"]; 
    } 
    else 
    { 
     echo $url." is not getting redirected"; 
    } 

    // Close the cURL resource, and free system resources 
    curl_close($ch); 
} 
?> 
<form action="" method="post"> 
http://<input type="text" name="url" size="30" />/ <b>e.g. www.google.com</b><br/> 
<input type="submit" value="Submit" /> 
</form> 
+0

有趣的是,是造成問題的一個`curl_setopt()`行是沒有註釋的一個。 – 2011-12-16 14:28:54

回答

7

好吧,如果你想記錄您必須自行實施並轉向的每個重定向關閉自動「以下位置」:

function curl_trace_redirects($url, $timeout = 15) { 

    $result = array(); 
    $ch = curl_init(); 

    $trace = true; 
    $currentUrl = $url; 

    $urlHist = array(); 
    while($trace && $timeout > 0 && !isset($urlHist[$currentUrl])) { 
     $urlHist[$currentUrl] = true; 

     curl_setopt($ch, CURLOPT_URL, $currentUrl); 
     curl_setopt($ch, CURLOPT_HEADER, true); 
     curl_setopt($ch, CURLOPT_NOBODY, true); 
     curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); 
     curl_setopt($ch, CURLOPT_FOLLOWLOCATION, false); 
     curl_setopt($ch, CURLOPT_TIMEOUT, $timeout); 

     $output = curl_exec($ch); 

     if($output === false) { 
      $traceItem = array(
       'errorno' => curl_errno($ch), 
       'error' => curl_error($ch), 
      ); 

      $trace = false; 
     } else { 
      $curlinfo = curl_getinfo($ch); 

      if(isset($curlinfo['total_time'])) { 
       $timeout -= $curlinfo['total_time']; 
      } 

      if(!isset($curlinfo['redirect_url'])) { 
       $curlinfo['redirect_url'] = get_redirect_url($output); 
      } 

      if(!empty($curlinfo['redirect_url'])) { 
       $currentUrl = $curlinfo['redirect_url']; 
      } else { 
       $trace = false; 
      } 

      $traceItem = $curlinfo; 
     } 

     $result[] = $traceItem; 
    } 

    if($timeout < 0) { 
     $result[] = array('timeout' => $timeout); 
    } 

    curl_close($ch); 

    return $result; 
} 

// apparently 'redirect_url' is not available on all curl-versions 
// so we fetch the location header ourselves 
function get_redirect_url($header) { 
    if(preg_match('/^Location:\s+(.*)$/mi', $header, $m)) { 
     return trim($m[1]); 
    } 

    return ""; 
} 

並且你使用這樣的:

$res = curl_trace_redirects("http://www.example.com"); 
foreach($res as $item) { 
    if(isset($item['timeout'])) { 
     echo "Timeout reached!\n"; 
    } else if(isset($item['error'])) { 
     echo "error: ", $item['error'], "\n"; 
    } else { 
     echo $item['url']; 
     if(!empty($item['redirect_url'])) { 
      // redirection 
      echo " -> (", $item['http_code'], ")"; 
     } 

     echo "\n"; 
    } 
} 

這有可能是我的代碼沒有完全想出來的,但我想這是一個良好的開端。

編輯

下面是一些示例輸出:

http://midas/~stefan/test/redirect/fritzli.html -> (302) 
http://midas/~stefan/test/redirect/hansli.html -> (301) 
http://midas/~stefan/test/redirect/heiri.html