1
def index_dir(self, base_path):
num_files_indexed = 0
allfiles = os.listdir(base_path)
#print allfiles
num_files_indexed = len(allfiles)
#print num_files_indexed
docnumber = 0
self._inverted_index = {} #dictionary
for file in allfiles:
self.documents = [base_path+file] #list of all text files
f = open(base_path+file, 'r')
lines = f.read()
# Tokenize the file into words
tokens = self.tokenize(lines)
docnumber = docnumber + 1
print 'docnumber', docnumber
for term in tokens:
# check if the key already exists in the dictionary, if yes,
# just add a new value for the key
#if self._inverted_index.has_key(term)
if term in sorted(self._inverted_index.keys()):
docnumlist = self._inverted_index.get(term)
docnumlist = docnumlist.append(docnumber)
else:
# if the key doesn't exist in dictionary, add the key (term)
# and associate the docnumber value with it.
self._inverted_index = self._inverted_index.update({term: docnumber})
#self._inverted_index[term] = docnumber
f.close()
print 'dictionary', self._inverted_index
print 'keys', self._inverted_index.keys()
return num_files_indexed
我工作的信息檢索項目,我們都應該通過多個文本文件抓取,標記化的文件和存儲在一個倒置的列表中的單詞(詞典)數據結構。的Python:「無類型」對象沒有屬性鍵
例如: doc1.txt: 「狗跑」 doc2.txt: 「貓睡」
_inverted_index = { '的':[0,1], '狗':[0] , '跑':[0], '貓':[1], '睡':[1] } 其中0,1是docIDs
,我發現了以下錯誤: 「 Nontype'對象沒有屬性鍵。行#95
所有的幫助,高度讚賞。
非常感謝,我只是沒有得到我的答案,但明白我做錯了什麼。再次感謝。 – csguy11 2010-09-12 06:50:25