2010-07-20 58 views
0
def get_houseid_list(): 
    """Returns a list of all house ids from db""" 
    print 'Building list of all HouseIDs...' 
    houseid_list = [] 
    houseids = session.query(Episode.HouseID).all() 
    for i in houseids: 
     houseid_list.append(i[0]) 
    return houseid_list 


def walkDir(top, ignore=[]): 
    """Returns a complete list of files from a directory, recursing through subfolders""" 
    print 'Building list of files...' 
    fflist = [] 
    for root, dirs, files in os.walk(top): 
     dirs[:] = [ dn for dn in dirs if dn not in ignore ] 
     file_list = [name for name in files if name[0] != '.'] 
     if len(file_list): 
      for f in file_list: 
       try: 
        houseid_parse(f) 
        print 'adding...', f 
        [fflist.append(join(root, f)) for f in file_list] 
       except HouseIdException: 
        print 'skipping...', f 
      print 'Found', len(file_list), 'files in', root 
    return fflist 


def get_nonmatches(houseid_list, finallist): 
    print 'Comparing files to HouseIDs...' 
    nonmatches = [] 
    for id in houseid_list: 
     print 'Searching for files to match', id 
     for f in finallist: 
      if re.search(id, f, re.IGNORECASE): 
       nonmatches.append(f) 
    return nonmatches 


def writeCSV(nonmatch): 
    print 'Writing nonmatches to CSV...' 
    csv.write('%s' % nonmatch) 


if __name__ == "__main__": 

    houseid_list = get_houseid_list() 
    print len(houseid_list), 'HouseIDs found' 
    wdirs = ['/Volumes/Assets/Projects'] 
    finallist = [] 
    for d in wdirs: 
     fflist = walkDir(d) 
    for f in fflist: 
     nonmatches = get_nonmatches(houseid_list,f) 
    print 'nonmatches', nonmatches 
+1

哪一部分無限循環? – 2010-07-20 06:51:22

+1

我有點困惑'fflist.append(join(root,f))for file_list]''''''你有'join'函數定義了什麼地方嗎?或者這是類似於'from os.path import join'的行的結果? – 2010-07-20 06:56:54

+0

只是要指出:看起來像不匹配每次在fflist的循環中被覆蓋。可能會使用擴展? nonmatches.extend(get_nonmatches(houseid_list,f)) – dmitko 2010-07-20 07:10:40

回答

2

在這段代碼有一些意見,在我們等待您給我們足夠的信息來解決問題..

它取決於副作用相當可怕的是這樣

[fflist.append(join(root, f)) for f in file_list] 

時你可以說

fflist.extend(join(root, f) for f in file_list) 

但是,這看起來像一個bug,你的意思是再次遍歷file_list在那裏?也許你只需要

fflist.append(join(root, f)) 

這部分似乎刪除的條件從它的效果

if len(file_list): 
    for f in file_list: 
     try: 
      houseid_parse(f) 
      print 'adding...', f 
      [fflist.append(join(root, f)) for f in file_list] 
     except HouseIdException: 
      print 'skipping...', f 
    print 'Found', len(file_list), 'files in', root 

爲什麼不寫這樣的嗎?

for f in file_list: 
    try: 
     houseid_parse(f) 
     print 'adding...', f 
     fflist.append(join(root, f)) 
    except HouseIdException: 
     print 'skipping...', f 
if file_list: 
    print 'Found', len(file_list), 'files in', root 

如果你只是要遍歷fflist,也許你可以把walkDir成發電機

def walkDir(top, ignore=[]): 
    """Returns a generator for a complete list of files from a directory, 
     recursing through subfolders""" 
    for root, dirs, files in os.walk(top): 
     dirs[:] = [ dn for dn in dirs if dn not in ignore ] 
     file_list = [name for name in files if name[0] != '.'] 
     for f in file_list: 
      try: 
       houseid_parse(f) 
       print 'yielding...', f 
       yield join(root, f) 
      except HouseIdException: 
       print 'skipping...', f 
     if file_list: 
      print 'Found', len(file_list), 'files in', root 

現在,也許你告訴我們該程序的輸出是什麼,爲什麼您確信這是一個無限循環,而不是花很長時間才能運行。對於所有我們可以告訴這行

houseids = session.query(Episode.HouseID).all() 

可能只是走了很長的時間來執行