我有一堆網站存儲爲3個MySQL表中的字符串。我的腳本將它們放入數組中,解析它,提取所有鏈接並將它們分成2個表格。它在3個相同的模塊中進行分類。雖然PHP運營商與MySQL截斷不工作
整件事情每隔30秒鐘就會執行一次。
出於某種原因,只有第一次按預期工作,以後什麼都不會發生。
在我到我的代碼之前,我爲折舊的mysql道歉,這個腳本將只在本地機器上使用,我會在時間正確時更新它。
這裏是我的代碼:
$i=1;
$domain1 = 'example1.com';
$domain2 = 'example2.com';
$domain3 = 'example3.com';
$robots1 = array("url1",
"url2",
"url3");
$robots2 = array("url1",
"url2",
"url3");
$robots3 = array("url1",
"url2",
"url3");
require_once 'Normalizer.php';
$conn = mysql_connect('localhost:3306','user', 'pass', true);
mysql_select_db('t1000', $conn);
while ($i<=50000) {
$query = 'SELECT * FROM dump1';
$result1=mysql_query($query, $conn);
$strings1=array();
while ($row = mysql_fetch_assoc($result1)) {
array_push($strings1, $row["link"]);
}
$query = 'TRUNCATE TABLE dump1';
$delete=mysql_query($query, $conn);
$query = 'SELECT * FROM dump2';
$result1=mysql_query($query, $conn);
$strings2=array();
while ($row = mysql_fetch_assoc($result1)) {
array_push($strings2, $row["link"]);
}
$query = 'TRUNCATE TABLE dump2';
$delete=mysql_query($query, $conn);
$query = 'SELECT * FROM dump3';
$result1=mysql_query($query, $conn);
$strings3=array();
while ($row = mysql_fetch_assoc($result1)) {
array_push($strings3, $row["link"]);
}
$query = 'TRUNCATE TABLE dump3';
$delete=mysql_query($query, $conn);
// Module 1 start
$ii=0;
$links = array();
$edofollow = array();
$enofollow = array();
$internal = array();
foreach ($strings1 as $value)
{
$input=$strings1[$ii];
$htm=stripcslashes($input);
$doc = new DOMDocument();
@$doc->loadHTML($htm);
$arr = $doc->getElementsByTagName("a"); // DOMNodeList Object
foreach($arr as $item) { // DOMElement Object
$href = $item->getAttribute("href");
$rel = $item->getAttribute("rel");
$text = trim(preg_replace("/[\r\n]+/", " ", $item->nodeValue));
$links[] = array(
'href' => $href,
'rel' => $rel,
'text' => $text
);
if (strpos($href, '://')!==false AND strpos($href, $domain1)==false AND $rel!=='nofollow')
{
$un = new URL\Normalizer();
$un->setUrl($href);
$href= parse_url($un->normalize(), PHP_URL_HOST);
array_push($edofollow, $href);
}
else if (strpos($href, '://')!==false AND strpos($href, $domain1)==false AND $rel=='nofollow')
{
$un1 = new URL\Normalizer();
$un1->setUrl($href);
array_push($enofollow, $un1->normalize());
}
else if (strpos($href,'://')==false or strpos($href,$domain1)!==false)
{
$un2 = new URL\Normalizer();
$un2->setUrl($href);
$href1=$un2->normalize();
if (strpos($href1, 'TRANSCRIPTS')==false AND strpos($href1, '(')==false AND strpos($href1, ')')==false AND strpos($href1, '#')==false AND strpos($href1, 'javascript')==false AND strpos($href1, '?')==false AND strpos($href1, 'void')==false)
{
if($href1=='' or $href1=='/')
{}
else{
if (strpos($href1, '://')==false)
{$href1='http://'.$domain1.$href1;}
if (in_array($href1, $robots1)) { }
else {
array_push($internal, $href1);
}
}
}
}
}
$uedofollow = array_values(array_unique($edofollow));
foreach ($uedofollow as $value) {
$query=mysql_query("select * from dofollow where link='".$value."' ");
$duplicate=0;
if($query){
$duplicate=mysql_num_rows($query);
}
if($duplicate==0)
{
$sql='INSERT INTO dofollow (link) VALUES ("'.$value.'")';
mysql_query($sql, $conn);
}
}
$uinternal = array_values(array_unique($internal));
foreach ($uinternal as $value2) {
$query=mysql_query("select * from joblist1 where link='".$value2."' ");
if ($query) {
$duplicate=0;
$duplicate=mysql_num_rows($query);
if($duplicate==0)
{
$sql='INSERT INTO joblist1 (link) VALUES ("'.$value2.'")';
mysql_query($sql, $conn);
}
}
}
$ii=$ii+1;
}
// Module 1 ends
// Module 2 start
$links = array();
$edofollow = array();
$enofollow = array();
$internal = array();
$ii=0;
foreach ($strings2 as $value)
{
$input=$strings2[$ii];
$htm=stripcslashes($input);
$doc = new DOMDocument();
@$doc->loadHTML($htm);
$arr = $doc->getElementsByTagName("a"); // DOMNodeList Object
foreach($arr as $item) { // DOMElement Object
$href = $item->getAttribute("href");
$rel = $item->getAttribute("rel");
$text = trim(preg_replace("/[\r\n]+/", " ", $item->nodeValue));
$links[] = array(
'href' => $href,
'rel' => $rel,
'text' => $text
);
if (strpos($href, '://')!==false AND strpos($href, $domain2)==false AND $rel!=='nofollow')
{
$un = new URL\Normalizer();
$un->setUrl($href);
$href= parse_url($un->normalize(), PHP_URL_HOST);
array_push($edofollow, $href);
}
else if (strpos($href, '://')!==false AND strpos($href, $domain2)==false AND $rel=='nofollow')
{
$un1 = new URL\Normalizer();
$un1->setUrl($href);
array_push($enofollow, $un1->normalize());
}
else if (strpos($href,'://')==false or strpos($href,$domain2)!==false)
{
$un2 = new URL\Normalizer();
$un2->setUrl($href);
$href1=$un2->normalize();
if (strpos($href1, 'TRANSCRIPTS')==false AND strpos($href1, '(')==false AND strpos($href1, ')')==false AND strpos($href1, '#')==false AND strpos($href1, 'javascript')==false AND strpos($href1, '?')==false AND strpos($href1, 'void')==false)
{
if($href1=='' or $href1=='/')
{}
else{
if (strpos($href1, '://')==false)
{$href1='http://'.$domain2.$href1;}
if (in_array($href1, $robots2)) { }
else {
array_push($internal, $href1);
}
}
}
}
}
$uedofollow = array_values(array_unique($edofollow));
foreach ($uedofollow as $value) {
$query=mysql_query("select * from dofollow where link='".$value."' ");
$duplicate=0;
if($query){
$duplicate=mysql_num_rows($query);
}
if($duplicate==0)
{
$sql='INSERT INTO dofollow (link) VALUES ("'.$value.'")';
mysql_query($sql, $conn);
}
}
$uinternal = array_values(array_unique($internal));
foreach ($uinternal as $value2) {
$query=mysql_query("select * from joblist2 where link='".$value2."' ");
if ($query) {
$duplicate=0;
$duplicate=mysql_num_rows($query);
if($duplicate==0)
{
$sql='INSERT INTO joblist2 (link) VALUES ("'.$value2.'")';
mysql_query($sql, $conn);
}
}
}
$ii=$ii+1;
}
// Module 2 Ends
// Module 3 start
$links = array();
$edofollow = array();
$enofollow = array();
$internal = array();
$ii=0;
foreach ($strings3 as $value)
{
$input=$strings3[$ii];
$htm=stripcslashes($input);
$doc = new DOMDocument();
@$doc->loadHTML($htm);
$arr = $doc->getElementsByTagName("a"); // DOMNodeList Object
foreach($arr as $item) { // DOMElement Object
$href = $item->getAttribute("href");
$rel = $item->getAttribute("rel");
$text = trim(preg_replace("/[\r\n]+/", " ", $item->nodeValue));
$links[] = array(
'href' => $href,
'rel' => $rel,
'text' => $text
);
if (strpos($href, '://')!==false AND strpos($href, $domain3)==false AND $rel!=='nofollow')
{
$un = new URL\Normalizer();
$un->setUrl($href);
$href= parse_url($un->normalize(), PHP_URL_HOST);
array_push($edofollow, $href);
}
else if (strpos($href, '://')!==false AND strpos($href, $domain3)==false AND $rel=='nofollow')
{
$un1 = new URL\Normalizer();
$un1->setUrl($href);
array_push($enofollow, $un1->normalize());
}
else if (strpos($href,'://')==false or strpos($href,$domain3)!==false)
{
$un2 = new URL\Normalizer();
$un2->setUrl($href);
$href1=$un2->normalize();
if (strpos($href1, 'TRANSCRIPTS')==false AND strpos($href1, '(')==false AND strpos($href1, ')')==false AND strpos($href1, '#')==false AND strpos($href1, 'javascript')==false AND strpos($href1, '?')==false AND strpos($href1, 'void')==false)
{
if($href1=='' or $href1=='/')
{}
else{
if (strpos($href1, '://')==false)
{$href1='http://'.$domain3.$href1;}
if (in_array($href1, $robots3)) { }
else {
array_push($internal, $href1);
}
}
}
}
}
$uedofollow = array_values(array_unique($edofollow));
foreach ($uedofollow as $value) {
$query=mysql_query("select * from dofollow where link='".$value."' ");
$duplicate=0;
if($query){
$duplicate=mysql_num_rows($query);
}
if($duplicate==0)
{
$sql='INSERT INTO dofollow (link) VALUES ("'.$value.'")';
mysql_query($sql, $conn);
}
}
$uinternal = array_values(array_unique($internal));
foreach ($uinternal as $value2) {
$query=mysql_query("select * from joblist3 where link='".$value2."' ");
if ($query) {
$duplicate=0;
$duplicate=mysql_num_rows($query);
if($duplicate==0)
{
$sql='INSERT INTO joblist3 (link) VALUES ("'.$value2.'")';
mysql_query($sql, $conn);
}
}
}
$ii=$ii+1;
}
// Module 3 ends
sleep(30);
$i=$i++;
}
我試圖找到附近解決好幾天,混合的東西,但沒有運氣...
是你的mysql用戶允許使用truncate命令檢查您的權限? – Dave 2014-09-19 14:53:32
是的,第一次按照預期清空表格 – Acidon 2014-09-19 14:55:00
腳本超時?檢查Apache/PHP的錯誤日誌?它可以是包含數字的表名不知道如果mysql有問題,可能需要圍繞表名與'等 – Dave 2014-09-19 14:56:33