2014-09-19 55 views
0

我有一堆網站存儲爲3個MySQL表中的字符串。我的腳本將它們放入數組中,解析它,提取所有鏈接並將它們分成2個表格。它在3個相同的模塊中進行分類。雖然PHP運營商與MySQL截斷不工作

整件事情每隔30秒鐘就會執行一次。

出於某種原因,只有第一次按預期工作,以後什麼都不會發生。

在我到我的代碼之前,我爲折舊的mysql道歉,這個腳本將只在本地機器上使用,我會在時間正確時更新它。

這裏是我的代碼:

$i=1; 

$domain1 = 'example1.com'; 
$domain2 = 'example2.com'; 
$domain3 = 'example3.com'; 

$robots1 = array("url1", 
"url2", 
"url3"); 


$robots2 = array("url1", 
"url2", 
"url3"); 


$robots3 = array("url1", 
"url2", 
"url3"); 

require_once 'Normalizer.php'; 

$conn = mysql_connect('localhost:3306','user', 'pass', true); 
mysql_select_db('t1000', $conn); 

while ($i<=50000) { 


$query = 'SELECT * FROM dump1'; 
$result1=mysql_query($query, $conn); 
$strings1=array(); 
while ($row = mysql_fetch_assoc($result1)) { 
array_push($strings1, $row["link"]); 
} 
$query = 'TRUNCATE TABLE dump1'; 
$delete=mysql_query($query, $conn); 




$query = 'SELECT * FROM dump2'; 
$result1=mysql_query($query, $conn); 
$strings2=array(); 
while ($row = mysql_fetch_assoc($result1)) { 
array_push($strings2, $row["link"]); 
} 
$query = 'TRUNCATE TABLE dump2'; 
$delete=mysql_query($query, $conn); 




$query = 'SELECT * FROM dump3'; 
$result1=mysql_query($query, $conn); 
$strings3=array(); 
while ($row = mysql_fetch_assoc($result1)) { 
array_push($strings3, $row["link"]); 
} 
$query = 'TRUNCATE TABLE dump3'; 
$delete=mysql_query($query, $conn); 


// Module 1 start 

$ii=0; 
$links = array(); 
$edofollow = array(); 
$enofollow = array(); 
$internal = array(); 

foreach ($strings1 as $value) 
{ 
$input=$strings1[$ii]; 
$htm=stripcslashes($input); 

$doc = new DOMDocument(); 
@$doc->loadHTML($htm); 

    $arr = $doc->getElementsByTagName("a"); // DOMNodeList Object 
    foreach($arr as $item) { // DOMElement Object 
    $href = $item->getAttribute("href"); 
    $rel = $item->getAttribute("rel"); 
    $text = trim(preg_replace("/[\r\n]+/", " ", $item->nodeValue)); 
    $links[] = array(
     'href' => $href, 
     'rel' => $rel, 
     'text' => $text 
    ); 
if (strpos($href, '://')!==false AND strpos($href, $domain1)==false AND $rel!=='nofollow') 
    { 
    $un = new URL\Normalizer(); 
    $un->setUrl($href); 
    $href= parse_url($un->normalize(), PHP_URL_HOST); 
    array_push($edofollow, $href); 
} 

else if (strpos($href, '://')!==false AND strpos($href, $domain1)==false AND $rel=='nofollow') 

    { 
    $un1 = new URL\Normalizer(); 
    $un1->setUrl($href); 
    array_push($enofollow, $un1->normalize()); 

} 

else if (strpos($href,'://')==false or strpos($href,$domain1)!==false) 

    { 
    $un2 = new URL\Normalizer(); 
    $un2->setUrl($href); 
     $href1=$un2->normalize(); 
    if (strpos($href1, 'TRANSCRIPTS')==false AND strpos($href1, '(')==false AND strpos($href1, ')')==false AND strpos($href1, '#')==false AND strpos($href1, 'javascript')==false AND strpos($href1, '?')==false AND strpos($href1, 'void')==false)  
    { 
    if($href1=='' or $href1=='/') 
    {} 
    else{ 

     if (strpos($href1, '://')==false) 
     {$href1='http://'.$domain1.$href1;} 


if (in_array($href1, $robots1)) { } 
else { 
array_push($internal, $href1); 
}  
} 
    } 
} 
} 

$uedofollow = array_values(array_unique($edofollow)); 
foreach ($uedofollow as $value) { 
$query=mysql_query("select * from dofollow where link='".$value."' "); 
$duplicate=0; 
if($query){ 
$duplicate=mysql_num_rows($query); 
} 
    if($duplicate==0) 
    { 
    $sql='INSERT INTO dofollow (link) VALUES ("'.$value.'")'; 
    mysql_query($sql, $conn); 
    } 
} 

$uinternal = array_values(array_unique($internal)); 
foreach ($uinternal as $value2) { 
    $query=mysql_query("select * from joblist1 where link='".$value2."' "); 
if ($query) { 
$duplicate=0; 
$duplicate=mysql_num_rows($query); 
    if($duplicate==0) 
    { 
    $sql='INSERT INTO joblist1 (link) VALUES ("'.$value2.'")'; 
    mysql_query($sql, $conn); 
    } 
} 
} 

$ii=$ii+1; 
} 

// Module 1 ends 


// Module 2 start 

$links = array(); 
$edofollow = array(); 
$enofollow = array(); 
$internal = array(); 

$ii=0; 

foreach ($strings2 as $value) 
{ 
$input=$strings2[$ii]; 
$htm=stripcslashes($input); 

$doc = new DOMDocument(); 
@$doc->loadHTML($htm); 

    $arr = $doc->getElementsByTagName("a"); // DOMNodeList Object 
    foreach($arr as $item) { // DOMElement Object 
    $href = $item->getAttribute("href"); 
    $rel = $item->getAttribute("rel"); 
    $text = trim(preg_replace("/[\r\n]+/", " ", $item->nodeValue)); 
    $links[] = array(
     'href' => $href, 
     'rel' => $rel, 
     'text' => $text 
    ); 
if (strpos($href, '://')!==false AND strpos($href, $domain2)==false AND $rel!=='nofollow') 
    { 
    $un = new URL\Normalizer(); 
    $un->setUrl($href); 
    $href= parse_url($un->normalize(), PHP_URL_HOST); 
    array_push($edofollow, $href); 
} 

else if (strpos($href, '://')!==false AND strpos($href, $domain2)==false AND $rel=='nofollow') 

    { 
    $un1 = new URL\Normalizer(); 
    $un1->setUrl($href); 
    array_push($enofollow, $un1->normalize()); 

} 

else if (strpos($href,'://')==false or strpos($href,$domain2)!==false) 

    { 
    $un2 = new URL\Normalizer(); 
    $un2->setUrl($href); 
     $href1=$un2->normalize(); 
    if (strpos($href1, 'TRANSCRIPTS')==false AND strpos($href1, '(')==false AND strpos($href1, ')')==false AND strpos($href1, '#')==false AND strpos($href1, 'javascript')==false AND strpos($href1, '?')==false AND strpos($href1, 'void')==false)  
    { 
    if($href1=='' or $href1=='/') 
    {} 
    else{ 

     if (strpos($href1, '://')==false) 
     {$href1='http://'.$domain2.$href1;} 


if (in_array($href1, $robots2)) { } 
else { 
array_push($internal, $href1); 
}  
} 
    } 
} 
} 

$uedofollow = array_values(array_unique($edofollow)); 
foreach ($uedofollow as $value) { 
$query=mysql_query("select * from dofollow where link='".$value."' "); 
$duplicate=0; 
if($query){ 
$duplicate=mysql_num_rows($query); 
} 
    if($duplicate==0) 
    { 
    $sql='INSERT INTO dofollow (link) VALUES ("'.$value.'")'; 
    mysql_query($sql, $conn); 
    } 
} 

$uinternal = array_values(array_unique($internal)); 
foreach ($uinternal as $value2) { 
    $query=mysql_query("select * from joblist2 where link='".$value2."' "); 
if ($query) { 
$duplicate=0; 
$duplicate=mysql_num_rows($query); 
    if($duplicate==0) 
    { 
    $sql='INSERT INTO joblist2 (link) VALUES ("'.$value2.'")'; 
    mysql_query($sql, $conn); 
    } 
} 
} 

$ii=$ii+1; 
} 

// Module 2 Ends 


// Module 3 start 

$links = array(); 
$edofollow = array(); 
$enofollow = array(); 
$internal = array(); 

$ii=0; 

foreach ($strings3 as $value) 
{ 
$input=$strings3[$ii]; 
$htm=stripcslashes($input); 

$doc = new DOMDocument(); 
@$doc->loadHTML($htm); 

    $arr = $doc->getElementsByTagName("a"); // DOMNodeList Object 
    foreach($arr as $item) { // DOMElement Object 
    $href = $item->getAttribute("href"); 
    $rel = $item->getAttribute("rel"); 
    $text = trim(preg_replace("/[\r\n]+/", " ", $item->nodeValue)); 
    $links[] = array(
     'href' => $href, 
     'rel' => $rel, 
     'text' => $text 
    ); 
if (strpos($href, '://')!==false AND strpos($href, $domain3)==false AND $rel!=='nofollow') 
    { 
    $un = new URL\Normalizer(); 
    $un->setUrl($href); 
    $href= parse_url($un->normalize(), PHP_URL_HOST); 
    array_push($edofollow, $href); 
} 

else if (strpos($href, '://')!==false AND strpos($href, $domain3)==false AND $rel=='nofollow') 

    { 
    $un1 = new URL\Normalizer(); 
    $un1->setUrl($href); 
    array_push($enofollow, $un1->normalize()); 

} 

else if (strpos($href,'://')==false or strpos($href,$domain3)!==false) 

    { 
    $un2 = new URL\Normalizer(); 
    $un2->setUrl($href); 
     $href1=$un2->normalize(); 
    if (strpos($href1, 'TRANSCRIPTS')==false AND strpos($href1, '(')==false AND strpos($href1, ')')==false AND strpos($href1, '#')==false AND strpos($href1, 'javascript')==false AND strpos($href1, '?')==false AND strpos($href1, 'void')==false)  
    { 
    if($href1=='' or $href1=='/') 
    {} 
    else{ 

     if (strpos($href1, '://')==false) 
     {$href1='http://'.$domain3.$href1;} 


if (in_array($href1, $robots3)) { } 
else { 
array_push($internal, $href1); 
}  
} 
    } 
} 
} 

$uedofollow = array_values(array_unique($edofollow)); 
foreach ($uedofollow as $value) { 
$query=mysql_query("select * from dofollow where link='".$value."' "); 
$duplicate=0; 
if($query){ 
$duplicate=mysql_num_rows($query); 
} 
    if($duplicate==0) 
    { 
    $sql='INSERT INTO dofollow (link) VALUES ("'.$value.'")'; 
    mysql_query($sql, $conn); 
    } 
} 

$uinternal = array_values(array_unique($internal)); 
foreach ($uinternal as $value2) { 
    $query=mysql_query("select * from joblist3 where link='".$value2."' "); 
if ($query) { 
$duplicate=0; 
$duplicate=mysql_num_rows($query); 
    if($duplicate==0) 
    { 
    $sql='INSERT INTO joblist3 (link) VALUES ("'.$value2.'")'; 
    mysql_query($sql, $conn); 
    } 
} 
} 

$ii=$ii+1; 
} 

// Module 3 ends 

sleep(30); 
$i=$i++; 
} 

我試圖找到附近解決好幾天,混合的東西,但沒有運氣...

+0

是你的mysql用戶允許使用truncate命令檢查您的權限? – Dave 2014-09-19 14:53:32

+0

是的,第一次按照預期清空表格 – Acidon 2014-09-19 14:55:00

+0

腳本超時?檢查Apache/PHP的錯誤日誌?它可以是包含數字的表名不知道如果mysql有問題,可能需要圍繞表名與'等 – Dave 2014-09-19 14:56:33

回答

1

嘗試在一個do {},而圍繞它包裝( );

即:

$i = 1; 
do { 
    echo "some crap $i<br>\n"; 
} while($i<=50000); 
+0

其實,這沒有竅門,速度不是責怪:)我不知道你可以扭動while運算符以'做'的方式。謝謝,邁克爾! – Acidon 2014-09-19 15:16:24

+0

不用擔心男人,是的,while()和do ... while()很漂亮。他們兩人之間的主要區別是do-while在塊運行之後檢查條件,而不是之前。 – Michael 2014-09-19 18:53:13