2011-01-08 60 views
0

。我的腳本在我的數據庫中輸入信息時遇到了更多問題。 下面的腳本抓取頁面,剝離必要的信息,然後下載相關的圖像文件。之後,它應該將從URL中收集的信息輸入到數據庫中。出於某種原因,腳本似乎遍歷了URL,因爲我爲每個URL下載了圖像,但是每個URL的產品都沒有輸入到數據庫中。該腳本將插入第一個產品的類別和產品信息,然後僅停止並繼續下載圖像。抓取圖像的webscraper,但不能將信息輸入數據庫

有什麼建議嗎?

<?php 

define('IN_PHPBB', true); 
$phpbb_root_path = (defined('PHPBB_ROOT_PATH')) ? PHPBB_ROOT_PATH : './'; 
$phpEx = substr(strrchr(__FILE__, '.'), 1); 
include($phpbb_root_path . 'common.' . $phpEx); 
include($phpbb_root_path . 'includes/simple_html_dom.' . $phpEx); 

// Start session management 
$user->session_begin(); 
$auth->acl($user->data); 
$user->setup(); 

set_time_limit(259200); 

function save($in, $out) 
{ 
    $ch = curl_init ($in); 
    curl_setopt($ch, CURLOPT_HEADER, 0); 
    curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1); 
    curl_setopt($ch, CURLOPT_BINARYTRANSFER,1); 
    $rawdata=curl_exec($ch); 
    curl_close ($ch); 
    if(file_exists($out)) 
    { 
     unlink($out); 
    } 
    $fp = fopen($out,'x'); 
    fwrite($fp, $rawdata); 
    fclose($fp); 
} 

function scrape($i) 
{ 
    $url = 'http:/xxxxxxxx/index.php?main_page=product_info&products_id='.$i.'&zenid=e4b7dde8de02e1df005d4549e2e3e529'; 
    echo "$url -- "; 
    $exists = file_get_contents($url); 
    if ($exists != false) 
    { 
     $html = file_get_html($url); 

     foreach($html->find('body') as $html) 
     { 
      $test = $html->find('#productName', 0); 
      if ($test) 
      { 
       $item['title'] = trim($html->find('#productName', 0)->plaintext); 
       $item['price'] = trim($html->find('#productPrices', 0)->plaintext); 
       $item['cat'] = $html->find('#navBreadCrumb', 0)->plaintext; 
       list($home, $item['cat'], $item['subcat'], $title) = explode("::", $item['cat']); 
       $item['cat'] = str_replace("&nbsp;", "", $item['cat']); 
       $item['subcat'] = str_replace("\n", "", str_replace("&nbsp;", "", $item['subcat'])); 
       $item['desc'] = trim($html->find('#productDescription', 0)->plaintext); 
       $item['model'] = $html->find('ul#productDetailsList', 0)->find('li', 0)->plaintext; 
       $item['model'] = explode(":", $item['model']); 
       $item['model'] = trim($item['model'][1]); 
       $item['manufacturer'] = $html->find('ul#productDetailsList', 0)->find('li', 1)->plaintext; 
       $item['manufacturer'] = explode(":", $item['manufacturer']); 
       $item['manufacturer'] = trim($item['manufacturer'][1]); 
       foreach($html->find('img') as $img) 
       { 
        if($img->alt == $item['title']) 
        { 
         $item['img_sm'] = $img->src; 
        } 
       } 

       $ret[] = $item; 

      } 
     } 
     $html->clear(); 
     unset($html); 
     unset($item); 
     return $ret; 
    } 
    else 
    { 
     echo "Could not find page<br />"; 
    } 
    unset($exists); 
} 

$i = 1; 
$end = 9999999; 

while($i < $end) 
{ 
    $ret = scrape($i); 

    if(isset($ret)) 
    { 
     foreach($ret as $v) 
     { 
      $item['title'] = $v['title']; 
      $item['price'] = $v['price']; 
      $item['desc'] = $v['desc']; 
      $item['model'] = $v['model']; 
      $item['manufacturer'] = $v['manufacturer']; 
      $item['image'] = $v['image']; 
      $item['cat'] = $v['cat']; 
      $item['subcat'] = $v['subcat']; 
      $item['img_sm'] = $v['img_sm']; 
     } 
     unset($ret); 
     unset($v); 

     $sm_img_src = "http://xxxxxx/".$item['img_sm']; 
     $ext = strrchr($item['img_sm'], '.'); 

     $filename = $item['model'] . $ext; 

     $lg_img_src = "http://xxxxx/images/STC/".$filename; 
     $new_sm = "./rip_images/small/{$filename}"; 
     $new_lg = "./rip_images/large/{$filename}"; 

     $item['image'] = $filename; 

     save($lg_img_src,$new_lg); 
     save($sm_img_src,$new_sm); 

     //see if parent cat exists 
     $sql = 'SELECT cat_id FROM ' . SHOP_CAT_TABLE . ' WHERE cat_name = "'.$db->sql_escape($item['cat']).'"'; 
     $result = $db->sql_query($sql); 
     $parent = $db->sql_fetchrow($result); 
     $db->sql_freeresult($result); 
     // if not exists 
     if($parent['cat_id'] == '') 
     { 
      //add the parent cat to the db 
      $sql_ary = array(
       'cat_name' => $item['cat'], 
       'cat_parent' => 0 
      ); 
      $sql = 'INSERT INTO '.SHOP_CAT_TABLE.' '.$db->sql_build_array('INSERT', $sql_ary); 
      $db->sql_query($sql); 
      $cat_id = $db->sql_nextid(); 

      //see if subcat exists 
      $sql = 'SELECT cat_id FROM ' . SHOP_CAT_TABLE . ' WHERE cat_name = "'.$db->sql_escape($item['subcat']).'"'; 
      $result = $db->sql_query($sql); 
      $row = $db->sql_fetchrow($result); 
      $db->sql_freeresult($result); 
      // if not exists 
      if($row['cat_id'] == '') 
      { 
       //add subcat to db 
       $sql_ary = array(
        'cat_name' => $db->sql_escape($item['subcat']), 
        'cat_parent' => $cat_id 
       ); 
       $sql = 'INSERT INTO '.SHOP_CAT_TABLE.' '.$db->sql_build_array('INSERT', $sql_ary); 
       $db->sql_query($sql); 
       $item_cat = $db->sql_nextid(); 
      } 
      else //if exists 
      { 
       $item_cat = $row['cat_id']; 
      } 
     } 
     else //if parent cat exists 
     { 
      //see if subcat exists 
      $sql = 'SELECT cat_id FROM ' . SHOP_CAT_TABLE . ' WHERE cat_name = "'.$db->sql_escape($item['subcat']).'"'; 
      $result = $db->sql_query($sql); 
      $row = $db->sql_fetchrow($result); 
      $db->sql_freeresult($result); 
      // if not exists 
      if($row['cat_id'] == '') 
      { 
       //add the subcat to the db 
       $sql_ary = array(
        'cat_name' => $db->sql_escape($item['subcat']), 
        'cat_parent' => $parent['cat_id'] 
       ); 
       $sql = 'INSERT INTO '.SHOP_CAT_TABLE.' '.$db->sql_build_array('INSERT', $sql_ary); 
       $db->sql_query($sql); 
       $item_cat = $db->sql_nextid(); 
      } 
      else //if exists 
      { 
       $item_cat = $row['cat_id']; 
      } 
     } 

     $sql_ary = array(
      'item_title'  => $db->sql_escape($item['title']), 
      'item_price'  => $db->sql_escape($item['price']), 
      'item_desc'  => $db->sql_escape($item['desc']), 
      'item_model' => $db->sql_escape($item['model']), 
      'item_manufacturer' => $db->sql_escape($item['manufacturer']), 
      'item_image' => $db->sql_escape($item['image']), 
      'item_cat'  => $db->sql_escape($item_cat) 
     ); 

     $sql = 'INSERT INTO ' . SHOP_ITEM_TABLE . ' ' . $db->sql_build_array('INSERT', $sql_ary); 
     $db->sql_query($sql); 
     garbage_collection(); 
     echo 'Done<br />'; 
    } 
    $i++; 
    unset($item); 
} 

?> 

回答

0
  1. 有你看着你的DB庫生成的實際查詢字符串?
  2. 您是否添加了任何調試以查看查詢是否成功?查詢調用失敗時,大多數PHP數據庫庫會返回布爾值FALSE。假設查詢成功,你會失明。
+0

我甚至沒有想過要添加調試。大聲笑。添加幾個失敗後,看起來腳本在檢查父類別是否存在時失敗。 – chaoskreator 2011-01-08 06:46:36