2017-08-11 1972 views
-1

我正在循環查看csv文件以追加到DataFrame表中,但似乎每次我循環並追加時,都會在表中添加索引列。非常混亂,我很困難,任何幫助都會很棒。Python Pandas DataFrame在追加時添加索引列

我的代碼:

import sqlite3 as sql 
import pandas as pd 
import hashlib 
import os 
import csv 
from pandas import ExcelWriter 


def obtain_data(filename, connect, type): 
    writer = 

    ExcelWriter('path\\new_excel_sheets\\'+filename+'.xlsx') 
    table = 
    ExcelWriter('path\\new_excel_sheets\\hash_table.xlsx') 
    if type == True: 
     print(filename) 
     df = pd.DataFrame.from_csv('path'+filename, 
index_col=None) 
    else: 
     workbook = pd.ExcelFile('path' + filename) 
     df = workbook.parse('Sheet1') 
    df = df.rename(columns={'INDEX': 'INDX'}) 
    df = df.rename(columns={'Index': 'INDXS'}) 
    headers = df.dtypes.index 
    header_list = str(headers.tolist()) 
    header_list = ''.join(header_list) 
    hash_t = str(hashlib.md5(header_list.encode('utf-8')).hexdigest()) 
    c = connect.cursor() 
    print(filename) 
    print(hash_t) 
    if hash_t == 'd22db04a2f009f222da57e91acdce21b': 
     next_open = df['DATE'][1] 
     next_open_value = df['DATE'][2] 
     df.insert(3, next_open, next_open_value) 
     headers = df.dtypes.index 
     header_list = str(headers.tolist()) 
     header_list = ''.join(header_list) 
     new_hash_t = str(hashlib.md5(header_list.encode('utf- 
     8')).hexdigest()) 
     df = df.drop(df.index[1:]) 
     hashing = {str(new_hash_t): str(filename)} 
     df2 = pd.DataFrame.from_dict(hashing, orient='index') 
     try: 
      df2.to_sql(name='Hash Table', con=connect, if_exists='append') 
      df.to_sql(name=new_hash_t, con=connect, if_exists='append') 
     except: 
      raise IndexError('Could not transform ' + str(filename) + ' into 
      database.') 
    elif hash_t == '484fbe4de83acb41480dd935d82d7fbe': 
     next_open = df['DATE'][1] 
     next_open_value = df['DATE'][2] 
     df.insert(3, next_open, next_open_value) 
     headers = df.dtypes.index 
     header_list = str(headers.tolist()) 
     header_list = ''.join(header_list) 
     new_hash_t = str(hashlib.md5(header_list.encode('utf- 
     8')).hexdigest()) 
     df = df.drop(df.index[2]) 
     df['DATE'][1] = df['DATE'][0] 
     hashing = {new_hash_t: filename} 
     df2 = pd.DataFrame.from_dict(hashing, orient='index') 
     try: 
      df2.to_sql(name='Hash Table', con=connect, if_exists='append') 
      df.to_sql(name=new_hash_t, con=connect, if_exists='append') 
     except: 
      raise IndexError('Could not transform ' + str(filename) + ' into 
      database.') 
    else: 
     hashing = {hash_t: filename} 
     df2 = pd.DataFrame.from_dict(hashing, orient='index') 
     try: 
      df2.to_sql(name='Hash Table', con=connect, if_exists='append', 
      index=False) 
      df.to_sql(name=hash_t, con=connect, if_exists='append', 
       index=True) 
     except: 
      raise IndexError('Could not transform ' + str(filename) + ' 
       into database.') 
    df.to_excel(writer) 
    print(filename + ' has been completed succesfully.') 
    final_results = {'df': df, 'hash_t': hash_t} 
    return final_results 

csv_files = [] 
usable_files = [] 
for filename in os.listdir(filepath): 
    if filename.endswith(".xlsx"): 
     print('Found an XLSX file ' + str(filename)) 
     usable_files.append(filename) 
    elif filename.endswith('.CSV'): 
     print('Found a CSV File ' + filename) 
     csv_files.append(filename) 
    else: 
     print('Found an unusable file ' + str(filename)) 


for file in usable_files: 
    connect = sql.connect(SQLite3 connection) 
    obtain_data(file, connect, False) 
for file in csv_files: 
    connect = sql.connect(SQLite3 connection) 
    obtain_data(file, connect, True) 
print('All files have been made into Tables') 

的sqlite3的數據庫做的一切權利,但是當我追加到它,它增加了一個索引列。我不知道如何把索引欄放在這裏(隨意教我),所以請在這裏忍受。該表從這樣看

rowid, 0 , 1, 2, etc 
0, value, value, value, etc 
1, value, value, value, etc 

去,但是當我遍歷(比如4倍),它改變這個

rowid, index, 0, 1, 2, etc 
0, 0, 0, 0, 0, value 
0, 0, 0, 0, 0, value 

這是一個很奇怪的問題,所以任何幫助,將不勝感激,謝謝!

+0

考慮發佈下一更少的代碼用一個經過驗證的例子來隔離你的問題。 -1 –

回答

0

只需在所有to_sql()通話設置指數參數爲False(默認參數設置爲True):

df2.to_sql(name='Hash Table', con=connect, if_exists='append', index=False) 

而且任何平面文件輸出:

df.to_excel(writer, index=False) 

df.to_csv(filename, index=False) 
相關問題