import csv
class CsvWriter(object):
"""
Wraps csv.writer in a partial file-API compatibility layer
"""
def __init__(self, fname, mode='w', *args, **kwargs):
super(CsvWriter, self).__init__()
self.f = open(fname, mode)
self.writer = csv.writer(self.f, *args, **kwargs)
def write(self, *args):
"""
Writes a row of data to the csv file
Can be called as
.write() puts a blank row
.write(2) puts a single cell
.write([1,2,3]) puts 3 cells
.write(1,2,3) puts 3 cells
"""
if len(args)==1 and hasattr(args[0], ('__iter__')):
# single argument, and it's a sequence - let it be the row data
rowdata = args[0]
else:
rowdata = args
self.writer.writerow(rowdata)
def close(self):
self.writer = None
self.f.close()
def __enter__(self):
return self
def __exit__(self, *exc):
self.close()
class NameSplitter(object):
def __init__(self, pre=None):
super(NameSplitter, self).__init__()
# list of accepted prefixes
if pre is None:
self.pre = set(['m','shk','bs'])
else:
self.pre = set([s.lower() for s in pre])
# is-a-prefix word tester
self.isPre = lambda x,p=self.pre: x.lower() in p
jn = lambda *args: ' '.join(*args)
# signature-based dispatch table
self.match = {}
self.match[(3,())] = lambda w,j=jn: (w[0], w[1], w[2])
self.match[(4,(0,))] = lambda w,j=jn: (j(w[0],w[1]), w[2], w[3])
self.match[(4,(1,))] = lambda w,j=jn: (w[0], j(w[1],w[2]), w[3])
self.match[(5,(0,2))] = lambda w,j=jn: (j(w[0],w[1]), j(w[2],w[3]), w[4])
def __call__(self, nameStr):
words = nameStr.split()
# build hashable signature
pres = tuple(n for n,word in enumerate(words) if self.isPre(word))
sig = (len(words), pres)
try:
do = self.match[sig]
return do(words)
except KeyError:
return None
def process(inf, outf, fn):
for line in inf:
res = fn(line)
if res is not None:
outf.write(res)
def main():
infname = "input.txt"
outfname = "output.csv"
with open(infname,'rU') as inf:
with CsvWriter(outfname) as outf:
process(inf, outf, NameSplitter())
if __name__=="__main__":
main()
從示例中不清楚「前綴」是什麼;例如,如何判斷「A B C D」是「(A B」,「C」,「D」)還是「(」A「,」B C「,」D「)`。請給出一個更完整的例子,並更具體地解釋「前綴」是什麼。 – 2011-01-13 08:47:30
如果前綴的長度是一個字母,並且沒有長度爲一個字母的名稱,則可以嘗試將`len()`過濾出來,並將它們與它們各自的名稱進行分組。只是一個想法。 – soulseekah 2011-01-13 08:55:55
只有三個前綴「M」,「Shk」和「BS」 – 3zzy 2011-01-13 09:12:54