2015-09-07 92 views
0

我正在閱讀一堆日常文件,並使用glob將它們連接在一起形成單獨的dataframes.I最終將它們連接在一起,基本上創建了一個用於連接到儀表板的單個大文件。我不太熟悉Python,但我經常使用熊貓和sklearn。如你所見,我基本上只是讀取最後60(或更多)天的數據(最後60個文件)併爲每個數據創建一個數據幀。這有效,但我想知道是否有更pythonic /更好的方式?我觀看了關於pydata的視頻(關於不被PEP 8限制並確保你的代碼是pythonic),這很有趣。 (僅供參考 - 我需要閱讀60天的時間的原因是因爲客戶可以從很久以前發生的電話填寫調查。客戶今天填寫了關於發生在七月,我需要知道的是調用(它持續了多久,什麼主題是,等)。如何讓這段代碼更pythonic?

os.chdir(r'C:\\Users\Documents\FTP\\') 
loc = r'C:\\Users\Documents\\' 
rosterloc = r'\\mand\\' 
splitsname = r'Splits.csv' 
fcrname = r'global_disp_' 
npsname = r'survey_' 
ahtname = r'callbycall_' 
rostername = 'Daily_Roster.csv' 
vasname = r'vas_report_' 
ext ='.csv' 
startdate = dt.date.today() - Timedelta('60 day') 
enddate = dt.date.today() 
daterange = Timestamp(enddate) - Timestamp(startdate) 
daterange = (daterange/np.timedelta64(1, 'D')).astype(int) 

data = [] 
frames = [] 
calls = [] 
bracket = [] 
try: 
    for date_range in (Timestamp(startdate) + dt.timedelta(n) for n in range(daterange)): 
     aht = pd.read_csv(ahtname+date_range.strftime('%Y_%m_%d')+ext) 
     calls.append(aht) 
except IOError: 
     print('File does not exist:', ahtname+date_range.strftime('%Y_%m_%d')+ext) 
aht = pd.concat(calls) 
print('AHT Done')     
try: 
    for date_range in (Timestamp(startdate) + dt.timedelta(n) for n in range(daterange)): 
     fcr = pd.read_csv(fcrname+date_range.strftime('%m_%d_%Y')+ext, parse_dates = ['call_time']) 
     data.append(fcr) 
except IOError: 
     print('File does not exist:', fcrname+date_range.strftime('%m_%d_%Y')+ext) 
fcr = pd.concat(data) 
print('FCR Done')             
try: 
    for date_range in (Timestamp(enddate) - dt.timedelta(n) for n in range(3)): 
     nps = pd.read_csv(npsname+date_range.strftime('%m_%d_%Y')+ext, parse_dates = ['call_date','date_completed']) 
     frames.append(nps) 
except IOError: 
     print('File does not exist:', npsname+date_range.strftime('%m_%d_%Y')+ext) 
nps = pd.concat(frames) 
print('NPS Done')     
try: 
    for date_range in (Timestamp(startdate) + dt.timedelta(n) for n in range(daterange)): 
     vas = pd.read_csv(vasname+date_range.strftime('%m_%d_%Y')+ext, parse_dates = ['Call_date']) 
     bracket.append(vas) 
except IOError: 
     print('File does not exist:', vasname+date_range.strftime('%m_%d_%Y')+ext) 
vas = pd.concat(bracket) 
print('VAS Done')     
roster = pd.read_csv(loc+rostername) 
print('Roster Done') 
splits = pd.read_csv(loc+splitsname) 
print('Splits Done')  
+15

如果這是**工作代碼**,你認爲可能是改進後,請考慮在http://codereview.stackexchange.com上提問。 – jonrsharpe

回答

-1

我沒有更改名稱,但恕我直言,他們應該更詳細的如:PD ==熊貓?不確定。下面是一些pythonic的方式來寫它:

from functools import partial 
import logging 
from operator import add, sub 
import os 
import datetime as dt 
import contextlib 

os.chdir(r'C:\\Users\Documents\FTP\\') 
location = r'C:\\Users\Documents\\' 
roster_location = r'\\mand\\' 
splits_name = r'Splits.csv' 
fcr_name = r'global_disp_' 
nps_name = r'survey_' 
aht_name = r'callbycall_' 
roster_name = 'Daily_Roster.csv' 
vas_name = r'vas_report_' 
ext = '.csv' 
start_date = dt.date.today() - Timedelta('60 day') 
end_date = dt.date.today() 
daterange = Timestamp(end_date) - Timestamp(start_date) 
daterange = (daterange/np.timedelta64(1, 'D')).astype(int) 
logger = logging.getLogger() # logger is better than "print" in case, when you have multiple tiers to log. In this case: regular debug and exceptions 


def timestamps_in_range(daterange, method=add): # injected operation method instead of "if" statement in case of subtracting 
    for n in xrange(daterange): 
     yield method(Timestamp(start_date), dt.timedelta(n)) # use generators for creating series of data in place 


def read_csv(name, date_range, **kwargs): # use functions/methods to shorten (make more readable) long, repetitive method invocation 
    return pd.read_csv(name + date_range.strftime('%Y_%m_%d') + ext, kwargs) 


def log_done(module): # use functions/methods to shorten (make more readable) long, repetitive method invocation 
    logger.debug("%s Done" % module) 


@contextlib.contextmanager #contextmanager is great to separate business logic from exception handling 
def mapper(function, iterable): 
    try: 
     yield map(function, iterable) # map instead of executing function in "for" loop 
    except IOError, err: 
     logger.error('File does not exist: ', err.filename) 


# Following code is visualy tight and cleaner. 
# Shows only what's needed, hiding most insignificant details and repetitive code 

read_csv_aht = partial(read_csv, aht_name) # partial pre-fills function (first argument) with arguments of this function (remaining arguments). In this case it is useful for feeding "map" function - it takes one-argument function to execute on each element of a list 
with mapper(read_csv_aht, timestamps_in_range(daterange)) as calls: # contextmanager beautifully hides "dangerous" content, sharing only the "safe" result to be used 
    aht = pd.concat(calls) 
    log_done('AHT') 

read_csv_fcr = partial(read_csv, fcr_name) 
with mapper(read_csv_fcr, timestamps_in_range(daterange)) as data: 
    fcr = pd.concat(data) 
    log_done('FCR') 

read_csv_nps = partial(read_csv, nps_name, parse_dates=['call_date', 'date_completed']) 
with mapper(read_csv_nps, timestamps_in_range(3, sub)) as frames: 
    nps = pd.concat(frames) 
    log_done('NPS') 

read_csv_vas = partial(read_csv, vas_name, parse_dates=['Call_date']) 
with mapper(read_csv_vas, timestamps_in_range(daterange)) as bracket: 
    vas = pd.concat(bracket) 
    log_done('VAS') 

roster = pd.read_csv(location + roster_name) 
log_done('Roster') 

splits = pd.read_csv(location + splits_name) 
log_done('Splits') 
+0

這將有助於引用並解釋爲了使OP代碼更「pythonic」而做出的具體更改。 – bmhkim