-1
我正在循環查看csv文件以追加到DataFrame表中,但似乎每次我循環並追加時,都會在表中添加索引列。非常混亂,我很困難,任何幫助都會很棒。Python Pandas DataFrame在追加時添加索引列
我的代碼:
import sqlite3 as sql
import pandas as pd
import hashlib
import os
import csv
from pandas import ExcelWriter
def obtain_data(filename, connect, type):
writer =
ExcelWriter('path\\new_excel_sheets\\'+filename+'.xlsx')
table =
ExcelWriter('path\\new_excel_sheets\\hash_table.xlsx')
if type == True:
print(filename)
df = pd.DataFrame.from_csv('path'+filename,
index_col=None)
else:
workbook = pd.ExcelFile('path' + filename)
df = workbook.parse('Sheet1')
df = df.rename(columns={'INDEX': 'INDX'})
df = df.rename(columns={'Index': 'INDXS'})
headers = df.dtypes.index
header_list = str(headers.tolist())
header_list = ''.join(header_list)
hash_t = str(hashlib.md5(header_list.encode('utf-8')).hexdigest())
c = connect.cursor()
print(filename)
print(hash_t)
if hash_t == 'd22db04a2f009f222da57e91acdce21b':
next_open = df['DATE'][1]
next_open_value = df['DATE'][2]
df.insert(3, next_open, next_open_value)
headers = df.dtypes.index
header_list = str(headers.tolist())
header_list = ''.join(header_list)
new_hash_t = str(hashlib.md5(header_list.encode('utf-
8')).hexdigest())
df = df.drop(df.index[1:])
hashing = {str(new_hash_t): str(filename)}
df2 = pd.DataFrame.from_dict(hashing, orient='index')
try:
df2.to_sql(name='Hash Table', con=connect, if_exists='append')
df.to_sql(name=new_hash_t, con=connect, if_exists='append')
except:
raise IndexError('Could not transform ' + str(filename) + ' into
database.')
elif hash_t == '484fbe4de83acb41480dd935d82d7fbe':
next_open = df['DATE'][1]
next_open_value = df['DATE'][2]
df.insert(3, next_open, next_open_value)
headers = df.dtypes.index
header_list = str(headers.tolist())
header_list = ''.join(header_list)
new_hash_t = str(hashlib.md5(header_list.encode('utf-
8')).hexdigest())
df = df.drop(df.index[2])
df['DATE'][1] = df['DATE'][0]
hashing = {new_hash_t: filename}
df2 = pd.DataFrame.from_dict(hashing, orient='index')
try:
df2.to_sql(name='Hash Table', con=connect, if_exists='append')
df.to_sql(name=new_hash_t, con=connect, if_exists='append')
except:
raise IndexError('Could not transform ' + str(filename) + ' into
database.')
else:
hashing = {hash_t: filename}
df2 = pd.DataFrame.from_dict(hashing, orient='index')
try:
df2.to_sql(name='Hash Table', con=connect, if_exists='append',
index=False)
df.to_sql(name=hash_t, con=connect, if_exists='append',
index=True)
except:
raise IndexError('Could not transform ' + str(filename) + '
into database.')
df.to_excel(writer)
print(filename + ' has been completed succesfully.')
final_results = {'df': df, 'hash_t': hash_t}
return final_results
csv_files = []
usable_files = []
for filename in os.listdir(filepath):
if filename.endswith(".xlsx"):
print('Found an XLSX file ' + str(filename))
usable_files.append(filename)
elif filename.endswith('.CSV'):
print('Found a CSV File ' + filename)
csv_files.append(filename)
else:
print('Found an unusable file ' + str(filename))
for file in usable_files:
connect = sql.connect(SQLite3 connection)
obtain_data(file, connect, False)
for file in csv_files:
connect = sql.connect(SQLite3 connection)
obtain_data(file, connect, True)
print('All files have been made into Tables')
的sqlite3的數據庫做的一切權利,但是當我追加到它,它增加了一個索引列。我不知道如何把索引欄放在這裏(隨意教我),所以請在這裏忍受。該表從這樣看
rowid, 0 , 1, 2, etc
0, value, value, value, etc
1, value, value, value, etc
去,但是當我遍歷(比如4倍),它改變這個
rowid, index, 0, 1, 2, etc
0, 0, 0, 0, 0, value
0, 0, 0, 0, 0, value
這是一個很奇怪的問題,所以任何幫助,將不勝感激,謝謝!
考慮發佈下一更少的代碼用一個經過驗證的例子來隔離你的問題。 -1 –