簡短的故事:我有一個來自系統的輸出文件,分解爲「| |;」,我需要獲取管道之間的內容「 |」並將它們寫入另一個文件。(Python)將輸出文本文件分解爲令牌
這是輸出文件的樣子:
|Operation_ID|,|Operation_Name|,|business_group_name|,|business_unit_name|,|Program_ID|,|Program_Name|,|Project_ID|,|Project_Name|,|Program_Type_Name|,|Program_Cost_Type_Name|,|Start_date|,|Estimated_End_Date|,|End_Date|,|SQA_Name|,|CMA_Name|,|SSE_Name|,|PMs|,|TLs|,|PortfolioManager|,|Finished|,|Research|,|SQA_ID|,|CMA_ID|,|SSE_ID|
|20|,|XXX|,|YYY|,|ZZZ|,|1|,|WWW|,|2163|,|QQQ|,||,||,|15/12/2008|,||,|22/01/2009|,||,||,||,|EEE EEE |,||,||,|True|,||,||,||,||
|22|,|XXX|,|YYY|,|ZZZ|,|3|,|WWW|,|2165|,|QQQ|,||,||,|01/01/2009|,||,|09/04/2010|,||,||,||,|EEE EEE EEE|,||,||,|True|,|False|,||,||,||
|20|,|XXX|,|YYY|,|ZZZ|,|10|,|WWW|,|2164|,|QQQ|,|Development|,|Direct|,|15/12/2008|,||,|26/02/2010|,||,||,||,|EEE |,|EEE EEE ; EEE EEE ; EEE EEE |,||,|True|,|False|,||,||,||
|22|,|XXX|,|YYY|,|ZZZ|,|3|,|WWW|,|2166|,|QQQ|,||,||,|15/12/2008|,||,|31/05/2010|,||,||,||,||,||,||,|True|,|False|,||,||,||
|20|,|XXX|,|YYY|,|ZZZ|,|10|,|WWW|,|2168|,|QQQ|,|Development|,|Direct|,|05/01/2009|,||,|20/05/2009|,||,||,||,|EEE EEE EEE|,|EEE EEE |,||,|True|,||,||,||,||
|20|,|XXX|,|YYY|,|ZZZ|,|1|,|WWW|,|2169|,|QQQ|,||,||,|13/01/2009|,||,|22/05/2009|,||,||,||,|EEE EEE EEE|,|EEE EEE EEE EEE|,||,|True|,||,||,||,||
|21|,|XXX|,|YYY|,|ZZZ|,|2|,|WWW|,|2174|,|QQQ|,||,||,|08/01/2009|,||,|20/04/2009|,||,||,||,|EEE EEE |,|EEE EEE|,||,|True|,||,||,||,||
|23|,|XXX|,|YYY|,|ZZZ|,|47|,|WWW|,|2176|,|QQQ|,|Internal|,|Indirect|,|21/01/2009|,||,|17/12/2010|,||,||,||,|EEE EEE; EEE EEE|,||,||,|True|,|True|,||,||,||
|20|,|XXX|,|YYY|,|ZZZ|,|1|,|WWW|,|2142|,|QQQ|,||,||,|21/10/2008|,||,|13/05/2009|,||,||,||,|EEE EEE |,||,||,|True|,||,||,||,||
|20|,|XXX|,|YYY|,|ZZZ|,|1|,|WWW|,|2147|,|QQQ|,||,||,|07/11/2008|,||,|26/11/2008|,||,||,||,|EEE EEE EEE EEE |,|EEE EEE |,||,|True|,||,||,||,||
|20|,|XXX|,|YYY|,|ZZZ|,|1|,|WWW|,|2148|,|QQQ|,||,||,|07/11/2008|,||,|09/04/2009|,||,||,||,||,||,||,|True|,||,||,||,||
|22|,|XXX|,|YYY|,|ZZZ|,|3|,|WWW|,|2149|,|QQQ|,||,||,|01/11/2008|,|31/01/2011|,|01/12/2010|,||,||,||,|EEE EEE ; EEE EEE|,|EEE EEE; EEE EEE|,||,|True|,|False|,||,||,||
|22|,|XXX|,|YYY|,|ZZZ|,|20|,|WWW|,|2150|,|QQQ|,|Development|,||,|31/10/2008|,|31/10/2010|,|29/10/2010|,||,||,||,|EEE EEE |,|EEE EEE |,||,|True|,|False|,||,||,||
|20|,|XXX|,|YYY|,|ZZZ|,|1|,|WWW|,|2152|,|QQQ|,||,||,|26/11/2008|,||,|03/07/2009|,||,||,||,|EEE EEE EEE ; EEE EEE EEE EEE |,|EEE EEE |,||,|True|,||,||,||,||
|22|,|XXX|,|YYY|,|ZZZ|,|3|,|WWW|,|2151|,|QQQ|,||,||,|01/11/2008|,||,|29/01/2009|,||,||,||,||,||,||,|True|,||,||,||,||
|23|,|XXX|,|YYY|,|ZZZ|,|47|,|WWW|,|2187|,|QQQ|,|Internal|,|Indirect|,|21/01/2009|,||,|03/12/2009|,||,||,||,|EEE EEE|,|EEE EEE EEE|,||,|True|,|True|,||,||,||
|23|,|XXX|,|YYY|,|ZZZ|,|47|,|WWW|,|2192|,|QQQ|,|Internal|,|Indirect|,|21/01/2009|,||,|11/01/2011|,||,||,||,|EEE EEE EEE; EEE EEE|,||,||,|True|,|True|,||,||,||
|20|,|XXX|,|YYY|,|ZZZ|,|1|,|WWW|,|2196|,|QQQ|,||,||,|23/01/2009|,||,|24/03/2010|,||,||,||,|EEE EEE |,||,||,|True|,|False|,||,||,||
|21|,|XXX|,|YYY|,|ZZZ|,|41|,|WWW|,|2231|,|QQQ|,|Research|,||,|21/05/2009|,||,|01/12/2009|,||,||,||,||,||,||,|True|,|False|,||,||,||
|21|,|XXX|,|YYY|,|ZZZ|,|41|,|WWW|,|2230|,|QQQ|,|Research|,||,|21/05/2009|,||,|30/11/2009|,||,||,||,||,||,||,|True|,|False|,||,||,||
|21|,|XXX|,|YYY|,|ZZZ|,|41|,|WWW|,|2232|,|QQQ|,|Research|,||,|21/05/2009|,||,|09/07/2010|,||,||,||,||,|EEE EEE EEE|,||,|True|,|True|,||,||,||
|24|,|XXX|,|YYY|,|ZZZ|,|44|,|WWW|,|2237|,|QQQ|,|Research|,|Indirect|,|21/05/2009|,||,|22/01/2010|,||,||,||,||,||,||,|True|,|False|,||,||,||
|21|,|XXX|,|YYY|,|ZZZ|,|41|,|WWW|,|2238|,|QQQ|,|Research|,||,|21/05/2009|,||,|25/02/2010|,||,||,||,||,||,||,|True|,|False|,||,||,||
|21|,|XXX|,|YYY|,|ZZZ|,|41|,|WWW|,|2239|,|QQQ|,|Research|,||,|21/05/2009|,||,|04/01/2011|,||,||,||,||,||,||,|True|,|True|,||,||,||
|21|,|XXX|,|YYY|,|ZZZ|,|41|,|WWW|,|2240|,|QQQ|,|Research|,||,|21/05/2009|,||,|05/01/2011|,||,||,||,||,||,||,|True|,|True|,||,||,||
|26|,|XXX|,|YYY|,|ZZZ|,|50|,|WWW|,|2242|,|QQQ|,|Internal|,|Indirect|,|21/05/2009|,||,|14/10/2010|,||,||,||,||,||,||,|True|,|True|,||,||,||
|22|,|XXX|,|YYY|,|ZZZ|,|3|,|WWW|,|2273|,|QQQ|,||,||,|25/05/2009|,||,|29/01/2010|,||,||,||,||,|EEE EEE|,||,|True|,|False|,||,||,||
我是新來的Python /編程一般,所以我試着寫了下面的算法:
# => Reads the file test.txt;
# => Scans character by character for '|' character;
# => If character '|' is found, skips to next character and add subsequent
# characters to a 'token' array, until next character is '|' again;
# => When next character is '|', add 'token' array to 'array_of_tokens';
# => Once END OF FILE arrives, writes 'array_of_tokens' to 'test_output.txt'
# file;
test_file = 'test.txt'
test_output = 'test_output.txt'
token = []
array_of_tokens = []
index = 0
# => Reads the file test.txt;
with open(test_file) as file:
while True:
# => Scans character by character for '|' character;
character = file.read(1)
# => If character '|' is found,
if character == "|"
# skips to next character
character = next(character),
# until next character is '|' again;
while not character == '|'
# add subsequent characters to a 'token' array
token(index) = character
index ++
character = next(character)
# => When next character is '|', add 'token' array to 'array_of_tokens';
if next(character) == '|'
array_of_tokens = token
else if not character:
break
print "Read a character: ", character
# => Once END OF FILE arrives, writes 'array_of_tokens' to 'test_output.txt'
# file;
test_output.write(str(array_of_tokens))
而且它顯然沒有不行。事情是,我不完全確定我現在應該做什麼,我知道我需要的結果(寫在評論中),但我不知道如何讓我的代碼工作。任何人都可以幫忙嗎?此外,如果有任何提示尋找建議/資源,我可以考慮成爲一個更好的程序員,一個真正的程序員,我非常感謝!
在此先感謝!
似乎是良好的情況下使用[正則表達式](http://en.wikipedia.org/wiki/Regular_expression) – LeeNeverGup 2015-02-11 18:17:34