1
我盡我所能在PDI中的文件內容上做了一個正則表達式,但它給了我輸出中的空值。 Regex在Regex評估步驟的測試正則表達式部分中完美工作,但在預覽中沒有顯示相同的輸出。Pentaho正則表達式評估
這裏的文件內容:
我期待1:19:18.637s輸出,但它是null
。
下面是示例代碼。它不會在你的本地機器上工作,但它肯定會給你一個我想要實現的想法。下面是我想要的代碼:
<?xml version="1.0" encoding="UTF-8"?>
<transformation-steps>
<steps>
<step>
<name>Generate Rows</name>
<type>RowGenerator</type>
<description/>
<distribute>Y</distribute>
<custom_distribution/>
<copies>1</copies>
<partitioning>
<method>none</method>
<schema_name/>
</partitioning>
<fields>
</fields>
<limit>1</limit>
<never_ending>N</never_ending>
<interval_in_ms>5000</interval_in_ms>
<row_time_field>now</row_time_field>
<last_time_field>FiveSecondsAgo</last_time_field>
<cluster_schema/>
<remotesteps> <input> </input> <output> </output> </remotesteps> <GUI>
<xloc>318</xloc>
<yloc>286</yloc>
<draw>Y</draw>
</GUI>
</step>
<step>
<name>Get File Names</name>
<type>GetFileNames</type>
<description/>
<distribute>Y</distribute>
<custom_distribution/>
<copies>1</copies>
<partitioning>
<method>none</method>
<schema_name/>
</partitioning>
<filter>
<filterfiletype>all_files</filterfiletype>
</filter>
<doNotFailIfNoFile>N</doNotFailIfNoFile>
<rownum>N</rownum>
<isaddresult>Y</isaddresult>
<filefield>N</filefield>
<rownum_field/>
<filename_Field/>
<wildcard_Field/>
<exclude_wildcard_Field/>
<dynamic_include_subfolders>N</dynamic_include_subfolders>
<limit>10</limit>
<file>
<name>${DEVCI_DATA_HOME}/console_output/</name>
<filemask>.*txt</filemask>
<exclude_filemask/>
<file_required>N</file_required>
<include_subfolders>N</include_subfolders>
</file>
<cluster_schema/>
<remotesteps> <input> </input> <output> </output> </remotesteps> <GUI>
<xloc>438</xloc>
<yloc>286</yloc>
<draw>Y</draw>
</GUI>
</step>
<step>
<name>Get Variables</name>
<type>GetVariable</type>
<description/>
<distribute>Y</distribute>
<custom_distribution/>
<copies>1</copies>
<partitioning>
<method>none</method>
<schema_name/>
</partitioning>
<fields>
<field>
<name>issue_key_regex</name>
<variable>${issue_key_regex}</variable>
<type>String</type>
<format/>
<currency/>
<decimal/>
<group/>
<length>-1</length>
<precision>-1</precision>
<trim_type>none</trim_type>
</field>
</fields>
<cluster_schema/>
<remotesteps> <input> </input> <output> </output> </remotesteps> <GUI>
<xloc>438</xloc>
<yloc>126</yloc>
<draw>Y</draw>
</GUI>
</step>
<step>
<name>Load file content in memory</name>
<type>LoadFileInput</type>
<description/>
<distribute>Y</distribute>
<custom_distribution/>
<copies>1</copies>
<partitioning>
<method>none</method>
<schema_name/>
</partitioning>
<include>N</include>
<include_field>full_file_path</include_field>
<rownum>N</rownum>
<addresultfile>N</addresultfile>
<IsIgnoreEmptyFile>Y</IsIgnoreEmptyFile>
<rownum_field/>
<encoding/>
<file>
<name>C:\Users\nikhil.karkare\console_output\star-lin64-build-feature_VMESH120_29.txt</name>
<filemask/>
<exclude_filemask/>
<file_required>N</file_required>
<include_subfolders>N</include_subfolders>
</file>
<fields>
<field>
<name>File content</name>
<element_type>content</element_type>
<type>String</type>
<format/>
<currency/>
<decimal/>
<group/>
<length>-1</length>
<precision>-1</precision>
<trim_type>none</trim_type>
<repeat>N</repeat>
</field>
</fields>
<limit>0</limit>
<IsInFields>Y</IsInFields>
<DynamicFilenameField>filename</DynamicFilenameField>
<shortFileFieldName>file_name</shortFileFieldName>
<pathFieldName/>
<hiddenFieldName/>
<lastModificationTimeFieldName/>
<uriNameFieldName/>
<rootUriNameFieldName/>
<extensionFieldName/>
<cluster_schema/>
<remotesteps> <input> </input> <output> </output> </remotesteps> <GUI>
<xloc>938</xloc>
<yloc>286</yloc>
<draw>Y</draw>
</GUI>
</step>
<step>
<name>Regex Evaluation 3</name>
<type>RegexEval</type>
<description/>
<distribute>Y</distribute>
<custom_distribution/>
<copies>1</copies>
<partitioning>
<method>none</method>
<schema_name/>
</partitioning>
<script><![CDATA[.*console_output\/([A-Za-z0-9_\.\-]+).txt]]></script> <matcher>uri</matcher>
<resultfieldname/>
<usevar>N</usevar>
<allowcapturegroups>Y</allowcapturegroups>
<replacefields>Y</replacefields>
<canoneq>N</canoneq>
<caseinsensitive>N</caseinsensitive>
<comment>N</comment>
<dotall>N</dotall>
<multiline>N</multiline>
<unicode>N</unicode>
<unix>N</unix>
<fields>
<field>
<name>build_id_from_regex</name>
<type>String</type>
<format/>
<group/>
<decimal/>
<length>-1</length>
<precision>-1</precision>
<nullif/>
<ifnull/>
<trimtype>none</trimtype>
</field>
</fields>
<cluster_schema/>
<remotesteps> <input> </input> <output> </output> </remotesteps> <GUI>
<xloc>1098</xloc>
<yloc>286</yloc>
<draw>Y</draw>
</GUI>
</step>
<step>
<name>Regex Evaluation 4</name>
<type>RegexEval</type>
<description/>
<distribute>Y</distribute>
<custom_distribution/>
<copies>1</copies>
<partitioning>
<method>none</method>
<schema_name/>
</partitioning>
<script><![CDATA[^.*\[INFO\].*star\-maven.*SUCCESS.*\[(.*)\].*]]></script> <matcher>File content</matcher>
<resultfieldname>result</resultfieldname>
<usevar>N</usevar>
<allowcapturegroups>Y</allowcapturegroups>
<replacefields>Y</replacefields>
<canoneq>N</canoneq>
<caseinsensitive>N</caseinsensitive>
<comment>N</comment>
<dotall>N</dotall>
<multiline>N</multiline>
<unicode>N</unicode>
<unix>N</unix>
<fields>
<field>
<name>star_maven_time</name>
<type>String</type>
<format/>
<group/>
<decimal/>
<length>-1</length>
<precision>-1</precision>
<nullif/>
<ifnull/>
<trimtype>none</trimtype>
</field>
</fields>
<cluster_schema/>
<remotesteps> <input> </input> <output> </output> </remotesteps> <GUI>
<xloc>725</xloc>
<yloc>124</yloc>
<draw>Y</draw>
</GUI>
</step>
<step>
<name>Regex Evaluation 6</name>
<type>RegexEval</type>
<description/>
<distribute>Y</distribute>
<custom_distribution/>
<copies>1</copies>
<partitioning>
<method>none</method>
<schema_name/>
</partitioning>
<script><![CDATA[([A-Za-z0-9_\.\-]+).txt]]></script> <matcher>short_filename</matcher>
<resultfieldname/>
<usevar>N</usevar>
<allowcapturegroups>Y</allowcapturegroups>
<replacefields>Y</replacefields>
<canoneq>N</canoneq>
<caseinsensitive>N</caseinsensitive>
<comment>N</comment>
<dotall>N</dotall>
<multiline>N</multiline>
<unicode>N</unicode>
<unix>N</unix>
<fields>
<field>
<name>build_id_from_short_filename</name>
<type>String</type>
<format/>
<group/>
<decimal/>
<length>-1</length>
<precision>-1</precision>
<nullif/>
<ifnull/>
<trimtype>none</trimtype>
</field>
</fields>
<cluster_schema/>
<remotesteps> <input> </input> <output> </output> </remotesteps> <GUI>
<xloc>558</xloc>
<yloc>286</yloc>
<draw>Y</draw>
</GUI>
</step>
<step>
<name>Stream lookup 4</name>
<type>StreamLookup</type>
<description/>
<distribute>Y</distribute>
<custom_distribution/>
<copies>1</copies>
<partitioning>
<method>none</method>
<schema_name/>
</partitioning>
<from>Regex Evaluation 3</from>
<input_sorted>N</input_sorted>
<preserve_memory>Y</preserve_memory>
<sorted_list>N</sorted_list>
<integer_pair>N</integer_pair>
<lookup>
<key>
<name>build_id</name>
<field>build_id_from_regex</field>
</key>
<value>
<name>build_id_from_regex</name>
<rename>build_id_from_regex</rename>
<default/>
<type>String</type>
</value>
<value>
<name>File content</name>
<rename>File content</rename>
<default/>
<type>String</type>
</value>
</lookup>
<cluster_schema/>
<remotesteps> <input> </input> <output> </output> </remotesteps> <GUI>
<xloc>578</xloc>
<yloc>126</yloc>
<draw>Y</draw>
</GUI>
</step>
<step>
<name>Dummy (do nothing)</name>
<type>Dummy</type>
<description/>
<distribute>Y</distribute>
<custom_distribution/>
<copies>1</copies>
<partitioning>
<method>none</method>
<schema_name/>
</partitioning>
<cluster_schema/>
<remotesteps> <input> </input> <output> </output> </remotesteps> <GUI>
<xloc>1036</xloc>
<yloc>120</yloc>
<draw>Y</draw>
</GUI>
</step>
<step>
<name>Data Grid</name>
<type>DataGrid</type>
<description/>
<distribute>Y</distribute>
<custom_distribution/>
<copies>1</copies>
<partitioning>
<method>none</method>
<schema_name/>
</partitioning>
<fields>
<field>
<name>build_id</name>
<type>String</type>
<format/>
<currency/>
<decimal/>
<group/>
<length>-1</length>
<precision>-1</precision>
<set_empty_string>N</set_empty_string>
</field>
</fields>
<data>
<line> <item/> </line>
</data>
<cluster_schema/>
<remotesteps> <input> </input> <output> </output> </remotesteps> <GUI>
<xloc>308</xloc>
<yloc>126</yloc>
<draw>Y</draw>
</GUI>
</step>
</steps>
<order>
<hop> <from>Generate Rows</from><to>Get File Names</to><enabled>Y</enabled> </hop>
<hop> <from>Get File Names</from><to>Regex Evaluation 6</to><enabled>Y</enabled> </hop>
<hop> <from>Get Variables</from><to>Stream lookup 4</to><enabled>Y</enabled> </hop>
<hop> <from>Load file content in memory</from><to>Regex Evaluation 3</to><enabled>Y</enabled> </hop>
<hop> <from>Regex Evaluation 3</from><to>Stream lookup 4</to><enabled>Y</enabled> </hop>
<hop> <from>Regex Evaluation 4</from><to>Dummy (do nothing)</to><enabled>Y</enabled> </hop>
<hop> <from>Regex Evaluation 6</from><to>Load file content in memory</to><enabled>Y</enabled> </hop>
<hop> <from>Stream lookup 4</from><to>Regex Evaluation 4</to><enabled>Y</enabled> </hop>
<hop> <from>Data Grid</from><to>Get Variables</to><enabled>Y</enabled> </hop>
</order>
<notepads>
</notepads>
<step_error_handling>
</step_error_handling>
</transformation-steps>
任何建議將不勝感激。謝謝
你能附加一個完整的KTR文件,例如文件名硬編碼到轉換?這樣我們可以測試你的代碼。 – matthiash
您是否嘗試過pentaho市場中的「特殊字符刪除」步驟?可能幫助你! – Rishu
感謝Rishu的建議。但這一步與我試圖完成的事情無關。你的建議一定會在未來有所幫助,因爲我並不真的知道PDI中存在這樣的步驟。 – Nikhil