from collections import Counter
from glob import iglob
import re
import os
def remove_garbage(text):
"""Replace non-word (non-alphanumeric) chars in text with spaces,
then convert and return a lowercase version of the result.
"""
text = re.sub(r'\W+', ' ', text)
text = text.lower()
return text
topwords = 50000
folderpath = 'd:/jktextall/'
counter = Counter()
for filepath in iglob(os.path.join(folderpath, '*.txt')):
with open(filepath) as file:
counter.update(remove_garbage(file.read()).split())
file1 = open("jkwords1.txt","w")
for word, count in counter.most_common(topwords):
file1.write (str(count) +"," + word + "\n")
file1.close
我修改了上面的代碼,將輸出寫入文本文件jkwords1.txt
。沒有任何內容正在寫入文本文件。但是,print (word,count)
的確在python控制檯中產生輸出。Python循環的輸出不會去文本文件
但是,如果使用print(count,word)
,則不使用file1.write
,而是使用屏幕輸出。
使用'開放的(...),因爲文件1:...',以確保一切都被寫入磁盤正常。 –