2016-03-03 29 views
0

我想寫入一個csv,每個數據觀察存儲在一行中。但是,我的所有觀察都駐留在一個單元格中。我試着用yield item替換item.append(item)return items,但它也沒有工作。Scrapy CSV導出駐留在1個單元中

import scrapy 
from selenium import webdriver 
import time 
from scrapy.selector import Selector 
from scrapy.selector import HtmlXPathSelector 
from gdp.items import gdpItem 
import unicodecsv as csv 

class gdp_spider2(scrapy.Spider): 
name = 'gdp_spider2' 
allowed_domains = ['statdb.dgbas.gov.tw/'] 
start_urls = ['http://statdb.dgbas.gov.tw/pxweb/Dialog/varval.asp?ma=NA8101A1Q&ti=Principal%20Figures%282008SNA%29-Quarterly&path=../PXfileE/NationalIncome/&lang=1&strList=L'] 

def __init__(self): 
    self.driver = webdriver.Firefox() 

def parse(self, response): 
    items = [] 

    driver = self.driver 
    driver.get(response.url) 
    driver.find_element_by_partial_link_text('Select all').click() 
    driver.find_element_by_xpath('//option[contains(text(),"GDP (Million N.T.$,at Current Prices)")]').click() 
    driver.find_element_by_xpath('//option[contains(text(),"Data")]').click() 
    driver.find_element_by_xpath('//input[@type="SUBMIT"]').click() 

    hxs = HtmlXPathSelector(text=self.driver.page_source) 
    data = hxs.xpath("//table[@class='pxtable']//tbody//tr") 

    for datum in data: 
     item = gdpItem() 
     item ["date"] = datum.xpath('//td[1]/text()').extract() 
     item ["data"] = datum.xpath('//td[2]/text()').extract() 

     items.append(item) 
     return items 

CSV image

回答

0

試試這個:

def parse(self, response): 
    items = [] 
    item = gdpItem() 
    driver = self.driver 
    driver.get(response.url) 
    driver.find_element_by_partial_link_text('Select all').click() 
    driver.find_element_by_xpath('//option[contains(text(),"GDP (Million N.T.$,at Current Prices)")]').click() 
    driver.find_element_by_xpath('//option[contains(text(),"Data")]').click() 
    driver.find_element_by_xpath('//input[@type="SUBMIT"]').click() 

    hxs = HtmlXPathSelector(text=self.driver.page_source) 
    data = hxs.xpath("//table[@class='pxtable']//tbody//tr") 

    for datum in data: 
     item ["date"] = datum.xpath('td[1]/text()').extract() 
     item ["data"] = datum.xpath('td[2]/text()').extract() 

     yield item 
+0

它的工作!謝謝!我可以問你爲什麼'item = gdpItem()'應該向上移動嗎? –

+0

項目作爲管道工作。您不需要一次又一次調用gdpItem()。 – Rahul