2014-12-03 106 views
0

我想在MySQL中使用scrapy。 所以我創造了這個類的管道Scrapy與Mysql得到了這個錯誤

類MySQLStorePipeline(對象):

def __init__(self, dbpool): 
    self.dbpool = dbpool 

@classmethod 
def from_settings(cls, settings): 
    dbargs = dict(
     host=settings['MYSQL_HOST'], 
     db=settings['MYSQL_DBNAME'], 
     user=settings['MYSQL_USER'], 
     passwd=settings['MYSQL_PASSWD'], 
     charset='utf8', 
     use_unicode=True, 
    ) 
    dbpool = adbapi.ConnectionPool('MySQLdb', **dbargs) 
    return cls(dbpool) 

def process_item(self, item, spider): 
    # run db query in the thread pool 
    d = self.dbpool.runInteraction(self._do_upsert, item, spider) 
    d.addErrback(self._handle_error, item, spider) 
    # at the end return the item in case of success or failure 
    d.addBoth(lambda _: item) 
    # return the deferred instead the item. This makes the engine to 
    # process next item (according to CONCURRENT_ITEMS setting) after this 
    # operation (deferred) has finished. 
    return d 

def _do_upsert(self, conn, item, spider): 
    """Perform an insert or update.""" 
    guid = self._get_guid(item) 
    now = datetime.utcnow().replace(microsecond=0).isoformat(' ') 

    conn.execute("""SELECT EXISTS(
     SELECT 1 FROM table WHERE guid = %s 
    )""", (guid,)) 
    ret = conn.fetchone()[0] 
    if ret: 
     conn.execute(""" 
      UPDATE table 
      SET topicName=%s, authorName=%s, content=%s, updated=%s 
      WHERE guid=%s 
     """, (item['topicName'], item['authorName'], item['content'], now, guid)) 
     spider.log("Item updated in db: %s %r" % (guid, item)) 
    else: 
     conn.execute(""" 
      INSERT INTO table (guid, topicName, authorName, content, updated) 
      VALUES (%s, %s, %s, %s, %s) 
     """, (guid, item['topicName'], item['authorName'], item['content'], now)) 
     spider.log("Item stored in db: %s %r" % (guid, item)) 

def _handle_error(self, failure, item, spider): 
    """Handle occurred on db interaction.""" 
    # do nothing, just log 
    log.err(failure) 

def _get_guid(self, item): 
    """Generates an unique identifier for a given item.""" 
    # hash based solely in the url field 
    return md5(item['content']).hexdigest() 

但是當我運行這個蜘蛛得到這個錯誤

2014-12-03 10:02:08+0800 [scrapy] INFO: Enabled downloader middlewares: HttpAuthMiddleware, DownloadTimeoutMiddleware, UserAgentMiddleware, RetryMiddleware, DefaultHeadersMiddleware, MetaRefreshMiddleware, HttpCompressionMiddleware, RedirectMiddleware, CookiesMiddleware, ChunkedTransferMiddleware, DownloaderStats 
2014-12-03 10:02:08+0800 [scrapy] INFO: Enabled spider middlewares: HttpErrorMiddleware, OffsiteMiddleware, RefererMiddleware, UrlLengthMiddleware, DepthMiddleware 
/Library/Python/2.7/site-packages/scrapy/contrib/pipeline/__init__.py:21: ScrapyDeprecationWarning: ITEM_PIPELINES defined as a list or a set is deprecated, switch to a dict 
    category=ScrapyDeprecationWarning, stacklevel=1) 
Traceback (most recent call last): 
    File "/usr/local/bin/scrapy", line 11, in <module> 
    sys.exit(execute()) 
    File "/Library/Python/2.7/site-packages/scrapy/cmdline.py", line 143, in execute 
    _run_print_help(parser, _run_command, cmd, args, opts) 
    File "/Library/Python/2.7/site-packages/scrapy/cmdline.py", line 89, in _run_print_help 
    func(*a, **kw) 
    File "/Library/Python/2.7/site-packages/scrapy/cmdline.py", line 150, in _run_command 
    cmd.run(args, opts) 
    File "/Library/Python/2.7/site-packages/scrapy/commands/crawl.py", line 60, in run 
    self.crawler_process.start() 
    File "/Library/Python/2.7/site-packages/scrapy/crawler.py", line 92, in start 
    if self.start_crawling(): 
    File "/Library/Python/2.7/site-packages/scrapy/crawler.py", line 124, in start_crawling 
    return self._start_crawler() is not None 
    File "/Library/Python/2.7/site-packages/scrapy/crawler.py", line 139, in _start_crawler 
    crawler.configure() 
    File "/Library/Python/2.7/site-packages/scrapy/crawler.py", line 47, in configure 
    self.engine = ExecutionEngine(self, self._spider_closed) 
    File "/Library/Python/2.7/site-packages/scrapy/core/engine.py", line 65, in __init__ 
    self.scraper = Scraper(crawler) 
    File "/Library/Python/2.7/site-packages/scrapy/core/scraper.py", line 66, in __init__ 
    self.itemproc = itemproc_cls.from_crawler(crawler) 
    File "/Library/Python/2.7/site-packages/scrapy/middleware.py", line 50, in from_crawler 
    return cls.from_settings(crawler.settings, crawler) 
    File "/Library/Python/2.7/site-packages/scrapy/middleware.py", line 33, in from_settings 
    mw = mwcls.from_settings(settings) 
    File "/Users/tony/Development/@Scrapy/myspider/myspider/pipelines.py", line 42, in from_settings 
    dbpool = adbapi.ConnectionPool('MySQLdb', **dbargs) 
    File "/System/Library/Frameworks/Python.framework/Versions/2.7/Extras/lib/python/twisted/enterprise/adbapi.py", line 203, in __init__ 
    self.dbapi = reflect.namedModule(dbapiName) 
    File "/System/Library/Frameworks/Python.framework/Versions/2.7/Extras/lib/python/twisted/python/_reflectpy3.py", line 137, in namedModule 
    topLevel = __import__(name) 
    File "/Library/Python/2.7/site-packages/MySQLdb/__init__.py", line 19, in <module> 
    import _mysql 
ImportError: dlopen(/Library/Python/2.7/site-packages/_mysql.so, 2): no suitable image found. Did find: 
    /Library/Python/2.7/site-packages/_mysql.so: mach-o, but wrong architecture 

是MySQL的使用Python安裝不正確?我如何解決這個問題。

回答

0

我使用MAC 通過這樣問題就解決了

nano ~/.bash_profile 

add these line 

export PATH=/usr/local/mysql/bin:${PATH} 
export DYLD_LIBRARY_PATH=/usr/local/mysql/lib/ 
export VERSIONER_PYTHON_PREFER_64_BIT=yes 
export VERSIONER_PYTHON_PREFER_32_BIT=yes 

then run source ~/.bash_profile 

python setup.py build 
python setup.py install