0
試圖使數據挖掘,我在陣列中的所有網址,但一旦我嘗試採取在刮板給了我這個錯誤:類型錯誤:列表索引必須是整數,而不是標籤 - 蟒蛇
$TypeError: list indices must be integers, not Tag -- python
這是我刮板全碼:
s = sched.scheduler(time.time, time.sleep)
def myScraper(sc):
csv_f = csv.reader(f)
quote_page = []
for row in csv_f:
quote_page.append(url+row[0])
i=1
for var in quote_page:
num_dat = []
txt_dat = []
num_dat2 = []
txt_dat2 = []
s.enter(5,1,myScraper, (sc,))
sleep(5)
print(quote_page[i])
page = urlopen(quote_page[i])
i = i+1
soup = BeautifulSoup(page, 'html.parser')
data_store = []
for tr in soup.find_all('tr'): # find table rows
tds = tr.find_all('td', attrs={'class': 'fieldData'}) # find all table cells
for i in tds: # returns all cells from html rows
if i != []: # pops out empty cells from returned data
data_store.append(i.text)
#print(i.text)
#print("\n")
data_store2 = []
for tr in soup.find_all('tr'):
tds2 = tr.find_all('td', attrs={'class': 'improvementsFieldData'})
for i in tds2:
if i != []:
data_store2.append(i.text)
for j in data_store:
if ',' in j and ' ' not in j:
lft_dec = j[:j.index(',')].replace('$', '')
rght_dec = j[j.index(','):].replace(',', '') # drop the decimal
num_dat.append(float(lft_dec+rght_dec)) # convert to numerical data
else:
txt_dat.append(j)
for j in data_store2:
if ',' in j and ' ' not in j:
lft_dec = j[:j.index(',')].replace('$', '')
rght_dec = j[j.index(','):].replace(',', '').replace('Sq. Ft', '') # drop the decimal and Sq
num_dat2.append(float(lft_dec+rght_dec)) # convert to numerical data
elif ('Sq. Ft' and ',') in j:
sqft_dat_befcm = j[:j.index(',')].replace(',', '')
sqft_dat_afcm = j[j.index(','):].replace(' ', '').replace('Sq.Ft', '').replace(',', '')
num_dat2.append(float(sqft_dat_befcm+sqft_dat_afcm))
else:
txt_dat2.append(j)
print(num_dat)
print(txt_dat)
print(num_dat2)
print(txt_dat2)
s.enter(5, 1, myScraper, (s,))
s.run()
f.close
基本上我對這一計劃的目標是給定的URL,我可以打開一個瀏覽器颳去第一個數組,接着等待的時間和重複間隔量直到數組完成。
編輯***對不起,第一次發佈在這個。下面是完整的堆棧跟蹤
Traceback (most recent call last):
File "C:\Users\Ahmad\Desktop\HouseProject\AhmadsScraper.py", line 85, in
<module>
s.run()
File "C:\Users\Ahmad\Anaconda2\lib\sched.py", line 117, in run
action(*argument)
File "C:\Users\Ahmad\Desktop\HouseProject\AhmadsScraper.py", line 32, in
myScraper
print(quote_page[i])
TypeError: list indices must be integers, not Tag
您能否提供完整的回溯?所以人們可以理解哪一行會拋出錯誤? –
是的!感謝您的迴應! – Matherz