好吧,我試圖建立我的第一個適當的cUrl作者:功能多捲曲PHP鏈接檢查,我使用NETTUTS捲曲(http://net.tutsplus.com/tutorials/php/techniques-and-resources-for-mastering-curl/)WordPress的鏈接檢查爲基礎,然後重做出於安全原因,對數據庫的訪問。我不知道爲什麼它不起作用,因爲我只重寫了數據庫訪問部分,並在第32行中做了一些更改。我還會發布Nettuts的原始代碼,我希望這可以幫助。該代碼假設檢查文檔(.PDF和.doc)的鏈接是否仍然存在,或者是否需要更新。與PDO數據庫訪問沒有發回任何迴應
任何幫助,將不勝感激!
原始代碼
// CONFIG
$db_host = 'localhost';
$db_user = 'root';
$db_pass = '';
$db_name = 'wordpress';
$excluded_domains = array(
'localhost', 'www.mydomain.com');
$max_connections = 10;
// initialize some variables
$url_list = array();
$working_urls = array();
$dead_urls = array();
$not_found_urls = array();
$active = null;
// connect to MySQL
if (!mysql_connect($db_host, $db_user, $db_pass)) {
die('Could not connect: ' . mysql_error());
}
if (!mysql_select_db($db_name)) {
die('Could not select db: ' . mysql_error());
}
// get all published posts that have links
$q = "SELECT post_content FROM wp_posts
WHERE post_content LIKE '%href=%'
AND post_status = 'publish'
AND post_type = 'post'";
$r = mysql_query($q) or die(mysql_error());
while ($d = mysql_fetch_assoc($r)) {
// get all links via regex
if (preg_match_all("!href=\"(.*?)\"!", $d['post_content'], $matches)) {
foreach ($matches[1] as $url) {
// exclude some domains
$tmp = parse_url($url);
if (in_array($tmp['host'], $excluded_domains)) {
continue;
}
// store the url
$url_list []= $url;
}
}
}
// remove duplicates
$url_list = array_values(array_unique($url_list));
if (!$url_list) {
die('No URL to check');
}
// 1. multi handle
$mh = curl_multi_init();
// 2. add multiple URLs to the multi handle
for ($i = 0; $i < $max_connections; $i++) {
add_url_to_multi_handle($mh, $url_list);
}
// 3. initial execution
do {
$mrc = curl_multi_exec($mh, $active);
} while ($mrc == CURLM_CALL_MULTI_PERFORM);
// 4. main loop
while ($active && $mrc == CURLM_OK) {
// 5. there is activity
if (curl_multi_select($mh) != -1) {
// 6. do work
do {
$mrc = curl_multi_exec($mh, $active);
} while ($mrc == CURLM_CALL_MULTI_PERFORM);
// 7. is there info?
if ($mhinfo = curl_multi_info_read($mh)) {
// this means one of the requests were finished
// 8. get the info on the curl handle
$chinfo = curl_getinfo($mhinfo['handle']);
// 9. dead link?
if (!$chinfo['http_code']) {
$dead_urls []= $chinfo['url'];
// 10. 404?
} else if ($chinfo['http_code'] == 404) {
$not_found_urls []= $chinfo['url'];
// 11. working
} else {
$working_urls []= $chinfo['url'];
}
// 12. remove the handle
curl_multi_remove_handle($mh, $mhinfo['handle']);
curl_close($mhinfo['handle']);
// 13. add a new url and do work
if (add_url_to_multi_handle($mh, $url_list)) {
do {
$mrc = curl_multi_exec($mh, $active);
} while ($mrc == CURLM_CALL_MULTI_PERFORM);
}
}
}
}
// 14. finished
curl_multi_close($mh);
echo "==Dead URLs==\n";
echo implode("\n",$dead_urls) . "\n\n";
echo "==404 URLs==\n";
echo implode("\n",$not_found_urls) . "\n\n";
echo "==Working URLs==\n";
echo implode("\n",$working_urls);
// 15. adds a url to the multi handle
function add_url_to_multi_handle($mh, $url_list) {
static $index = 0;
// if we have another url to get
if ($url_list[$index]) {
// new curl handle
$ch = curl_init();
// set the url
curl_setopt($ch, CURLOPT_URL, $url_list[$index]);
// to prevent the response from being outputted
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
// follow redirections
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1);
// do not need the body. this saves bandwidth and time
curl_setopt($ch, CURLOPT_NOBODY, 1);
// add it to the multi handle
curl_multi_add_handle($mh, $ch);
// increment so next url is used next time
$index++;
return true;
} else {
// we are done adding new URLs
return false;
}
}
我的代碼
<?php
/*Config*/
/*** mysql hostname ***/
$hostname = 'localhost';
/*** mysql username ***/
$username = 'root';
/*** mysql password ***/
$password = 'root';
/*curl setup of varibles*/
$excluded_domains = array(
'localhost', 'rollnstroll.se');
$max_connections = 10;
$url_list = array();
$working_urls = array();
$dead_urls = array();
$not_found_urls = array();
$active = null;
try {
$dbh = new PDO("mysql:host=$hostname;dbname=blankett", $username, $password);
$dbh->exec('SET CHARACTER SET utf8');
/*** echo a message saying we have connected ***/
/*** fetch into an PDOStatement object ***/
$sql = "SELECT link_forms FROM forms2 WHERE id = ?";
$stmt = $dbh->query($sql);
$result = $stmt->fetch(PDO::FETCH_ASSOC);
// get all links via regex
if (preg_match_all("!href=\"(.*?)\"!", $d['link_forms'], $matches)) {
foreach ($matches[1] as $url) {
// exclude some domains
$tmp = parse_url($url);
if (in_array($tmp['host'], $excluded_domains)) {
continue;
}
// store the url
$url_list []= $url;
}
}
// remove duplicates
$url_list = array_values(array_unique($url_list));
if (!$url_list) {
die('No URL to check');
}
}
catch(PDOException $e)
{
echo $e->getMessage();
}
// 1. multi handle
$mh = curl_multi_init();
// 2. add multiple URLs to the multi handle
for ($i = 0; $i < $max_connections; $i++) {
add_url_to_multi_handle($mh, $url_list);
}
// 3. initial execution
do {
$mrc = curl_multi_exec($mh, $active);
} while ($mrc == CURLM_CALL_MULTI_PERFORM);
// 4. main loop
while ($active && $mrc == CURLM_OK) {
// 5. there is activity
if (curl_multi_select($mh) != -1) {
// 6. do work
do {
$mrc = curl_multi_exec($mh, $active);
} while ($mrc == CURLM_CALL_MULTI_PERFORM);
// 7. is there info?
if ($mhinfo = curl_multi_info_read($mh)) {
// this means one of the requests were finished
// 8. get the info on the curl handle
$chinfo = curl_getinfo($mhinfo['handle']);
// 9. dead link?
if (!$chinfo['http_code']) {
$dead_urls []= $chinfo['url'];
// 10. 404?
} else if ($chinfo['http_code'] == 404) {
$not_found_urls []= $chinfo['url'];
// 11. working
} else {
$working_urls []= $chinfo['url'];
}
// 12. remove the handle
curl_multi_remove_handle($mh, $mhinfo['handle']);
curl_close($mhinfo['handle']);
// 13. add a new url and do work
if (add_url_to_multi_handle($mh, $url_list)) {
do {
$mrc = curl_multi_exec($mh, $active);
} while ($mrc == CURLM_CALL_MULTI_PERFORM);
}
}
}
}
// 14. finished
curl_multi_close($mh);
echo "==Dead URLs==\n";
echo implode("\n",$dead_urls) . "\n\n";
echo "==404 URLs==\n";
echo implode("\n",$not_found_urls) . "\n\n";
echo "==Working URLs==\n";
echo implode("\n",$working_urls);
// 15. adds a url to the multi handle
function add_url_to_multi_handle($mh, $url_list) {
static $index = 0;
// if we have another url to get
if ($url_list[$index]) {
// new curl handle
$ch = curl_init();
// set the url
curl_setopt($ch, CURLOPT_URL, $url_list[$index]);
// to prevent the response from being outputted
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
// follow redirections
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1);
// do not need the body. this saves bandwidth and time
curl_setopt($ch, CURLOPT_NOBODY, 1);
// add it to the multi handle
curl_multi_add_handle($mh, $ch);
// increment so next url is used next time
$index++;
return true;
} else {
// we are done adding new URLs
return false;
}
}
?>
我從原來的改寫什麼是數據庫連接,這意味着我有,因爲我使用的PDO縮進我的代碼。我也重寫了:
if (preg_match_all("!href=\"(.*?)\"!", $d['link_forms'], $matches)) {
from
if (preg_match_all("!href=\"(.*?)\"!", $d['post_content'], $matches)) {
我認爲問題在這裏,但我缺乏技能不讓我找到答案。 如果有一個更好的腳本來檢查死鏈接,重定向和功能鏈接,請讓我知道。
什麼不起作用?您是否收到錯誤消息或者沒有輸出? – 2013-05-03 15:16:23
準確地說,文件沒有輸出,沒有打印。我似乎無法找到任何語法錯誤。我認爲問題出現在***如果(preg_match_all(「!href = \」(。*?)\「)!「,$ d ['link_forms'],$ matches)){*** – StenW 2013-05-03 15:18:38