0
我想比較兩個Excel文件並輸出一個文件,讓別人看到這兩個文件之間的差異。我得到一個關鍵錯誤,我不知道如何解決它? 目前我的全部代碼:Python熊貓關鍵錯誤
import pandas as pd
import numpy as np
def report_diff(x):
return x[0] if x[0] == x[1] else '{} ---> {}'.format(*x)
old = pd.read_excel('Y:\Client Files\Client\ClientBill\March 2017\SPT List Retro Bill Mar 17.xlsx', 'List Bill', na_values=['NA'])
new = pd.read_excel('Y:\Client Files\Client\Client Bill\March 2017\Updated SPT Mar 17.xlsx', 'List Bill', na_values=['NA'])
old['version'] = "old"
new['version'] = "new"
full_set = pd.concat([old,new],ignore_index=True)
changes = full_set.drop_duplicates(subset=[u'Employee ID', u'Benefit Plan Type',u'Sum of Premium'],keep='first')
dupe_accts = changes.set_index(u'Employee ID', u'Benefit Plan Type', u'Sum of Premium').index.get_duplicates()
dupes = changes[changes['Employee ID', 'Benefit Plan Type', 'Sum of Premium'].isin(dupe_accts)]
change_new = dupes[(dupes["version"] == "new")]
change_old = dupes[(dupes["version"] == "old")]
change_new = change_new.drop(['version'], axis=1)
change_old = change_old.drop(['version'], axis=1)
change_new.set_index(u'Employee ID', u'Benefit Plan Type', u'Sum of Premium',inplace=True)
change_old.set_index(u'Employee ID', u'Benefit Plan Type', u'Sum of Premium',inplace=True)
diff_panel = pd.Panel(dict(df1=change_old,df2=change_new))
diff_output = diff_panel.apply(report_diff, axis=0)
changes['duplicate']=changes[u'Employee ID', u'Benefit Plan Type', u'Sum of Premium'].isin(dupe_accts)
removed_accounts = changes[(changes["duplicate"] == False) & (changes["version"] == "old")]
new_account_set = full_set.drop_duplicates(subset=[u'Employee ID',u'Benefit Plan Type',u'Sum of Premium'],take_last=False)
new_account_set['duplicate']=new_account_set[u'Employee ID', u'Benefit Plan Type', u'Sum of Premium'].isin(dupe_accts)
added_accounts = new_account_set[(new_account_set["duplicate"] == False) & (new_account_set["version"] == "new")]
writer = pd.ExcelWriter("my-diff-2.xlsx")
diff_output.to_excel(writer,"changed")
removed_accounts.to_excel(writer,"removed",index=False,columns=[u'Employee ID',u'Benefit Plan Type',u'Sum of Premium'])
added_accounts.to_excel(writer,"added",index=False,columns=[u'Employee ID',u'Benefit Plan Type',u'Sum of Premium',])
writer.save()
我得到的錯誤與變量受騙者做。
Traceback (most recent call last):
File "C:\Python27\Scripts\ClientBill2.py", line 24, in <module>
dupes = changes[changes['Employee ID', 'Benefit Plan Type', 'Sum of
Premium'].isin(dupe_accts)]
File "C:\Python27\lib\site-packages\pandas\core\frame.py", line 2059, in
__get
item__
return self._getitem_column(key)
File "C:\Python27\lib\site-packages\pandas\core\frame.py", line 2066, in
_geti
tem_column
return self._get_item_cache(key)
File "C:\Python27\lib\site-packages\pandas\core\generic.py", line 1386, in
_ge
t_item_cache
values = self._data.get(item)
File "C:\Python27\lib\site-packages\pandas\core\internals.py", line 3543,
in g
et
loc = self.items.get_loc(item)
File "C:\Python27\lib\site-packages\pandas\indexes\base.py", line 2136, in
get
_loc
return self._engine.get_loc(self._maybe_cast_indexer(key))
File "pandas\index.pyx", line 132, in pandas.index.IndexEngine.get_loc (pandas
\index.c:4433)
File "pandas\index.pyx", line 154, in pandas.index.IndexEngine.get_loc (pandas
\index.c:4279)
File "pandas\src\hashtable_class_helper.pxi", line 732, in pandas.hashtable.Py
ObjectHashTable.get_item (pandas\hashtable.c:13742)
File "pandas\src\hashtable_class_helper.pxi", line 740, in pandas.hashtable.Py
ObjectHashTable.get_item (pandas\hashtable.c:13696)
KeyError: ('Employee ID', 'Benefit Plan Type', 'Sum of Premium')
這很簡單。現在我越來越attributeerror:列表對象沒有屬性isin –
@TroyR對不起,我錯過了更正第二個括號的位置。我用正確的線更新了答案。 –
這可能是與change_new變量相同的問題嗎? –