2016-09-19 97 views
1

我盡我所能在PDI中的文件內容上做了一個正則表達式,但它給了我輸出中的空值。 Regex在Regex評估步驟的測試正則表達式部分中完美工作,但在預覽中沒有顯示相同的輸出。Pentaho正則表達式評估

這裏的文件內容:

我期待1:19:18.637s輸出,但它是null

下面是示例代碼。它不會在你的本地機器上工作,但它肯定會給你一個我想要實現的想法。下面是我想要的代碼:

<?xml version="1.0" encoding="UTF-8"?> 
 
<transformation-steps> 
 
<steps> 
 
    <step> 
 
    <name>Generate Rows</name> 
 
    <type>RowGenerator</type> 
 
    <description/> 
 
    <distribute>Y</distribute> 
 
    <custom_distribution/> 
 
    <copies>1</copies> 
 
     <partitioning> 
 
      <method>none</method> 
 
      <schema_name/> 
 
      </partitioning> 
 
    <fields> 
 
    </fields> 
 
    <limit>1</limit> 
 
    <never_ending>N</never_ending> 
 
    <interval_in_ms>5000</interval_in_ms> 
 
    <row_time_field>now</row_time_field> 
 
    <last_time_field>FiveSecondsAgo</last_time_field> 
 
    <cluster_schema/> 
 
<remotesteps> <input> </input> <output> </output> </remotesteps> <GUI> 
 
     <xloc>318</xloc> 
 
     <yloc>286</yloc> 
 
     <draw>Y</draw> 
 
     </GUI> 
 
    </step> 
 

 
    <step> 
 
    <name>Get File Names</name> 
 
    <type>GetFileNames</type> 
 
    <description/> 
 
    <distribute>Y</distribute> 
 
    <custom_distribution/> 
 
    <copies>1</copies> 
 
     <partitioning> 
 
      <method>none</method> 
 
      <schema_name/> 
 
      </partitioning> 
 
    <filter> 
 
     <filterfiletype>all_files</filterfiletype> 
 
    </filter> 
 
    <doNotFailIfNoFile>N</doNotFailIfNoFile> 
 
    <rownum>N</rownum> 
 
    <isaddresult>Y</isaddresult> 
 
    <filefield>N</filefield> 
 
    <rownum_field/> 
 
    <filename_Field/> 
 
    <wildcard_Field/> 
 
    <exclude_wildcard_Field/> 
 
    <dynamic_include_subfolders>N</dynamic_include_subfolders> 
 
    <limit>10</limit> 
 
    <file> 
 
     <name>&#x24;&#x7b;DEVCI_DATA_HOME&#x7d;&#x2f;console_output&#x2f;</name> 
 
     <filemask>.&#x2a;txt</filemask> 
 
     <exclude_filemask/> 
 
     <file_required>N</file_required> 
 
     <include_subfolders>N</include_subfolders> 
 
    </file> 
 
    <cluster_schema/> 
 
<remotesteps> <input> </input> <output> </output> </remotesteps> <GUI> 
 
     <xloc>438</xloc> 
 
     <yloc>286</yloc> 
 
     <draw>Y</draw> 
 
     </GUI> 
 
    </step> 
 

 
    <step> 
 
    <name>Get Variables</name> 
 
    <type>GetVariable</type> 
 
    <description/> 
 
    <distribute>Y</distribute> 
 
    <custom_distribution/> 
 
    <copies>1</copies> 
 
     <partitioning> 
 
      <method>none</method> 
 
      <schema_name/> 
 
      </partitioning> 
 
    <fields> 
 
     <field> 
 
     <name>issue_key_regex</name> 
 
     <variable>&#x24;&#x7b;issue_key_regex&#x7d;</variable> 
 
     <type>String</type> 
 
     <format/> 
 
     <currency/> 
 
     <decimal/> 
 
     <group/> 
 
     <length>-1</length> 
 
     <precision>-1</precision> 
 
     <trim_type>none</trim_type> 
 
     </field> 
 
    </fields> 
 
    <cluster_schema/> 
 
<remotesteps> <input> </input> <output> </output> </remotesteps> <GUI> 
 
     <xloc>438</xloc> 
 
     <yloc>126</yloc> 
 
     <draw>Y</draw> 
 
     </GUI> 
 
    </step> 
 

 
    <step> 
 
    <name>Load file content in memory</name> 
 
    <type>LoadFileInput</type> 
 
    <description/> 
 
    <distribute>Y</distribute> 
 
    <custom_distribution/> 
 
    <copies>1</copies> 
 
     <partitioning> 
 
      <method>none</method> 
 
      <schema_name/> 
 
      </partitioning> 
 
    <include>N</include> 
 
    <include_field>full_file_path</include_field> 
 
    <rownum>N</rownum> 
 
    <addresultfile>N</addresultfile> 
 
    <IsIgnoreEmptyFile>Y</IsIgnoreEmptyFile> 
 
    <rownum_field/> 
 
    <encoding/> 
 
    <file> 
 
     <name>C&#x3a;&#x5c;Users&#x5c;nikhil.karkare&#x5c;console_output&#x5c;star-lin64-build-feature_VMESH120_29.txt</name> 
 
     <filemask/> 
 
     <exclude_filemask/> 
 
     <file_required>N</file_required> 
 
     <include_subfolders>N</include_subfolders> 
 
     </file> 
 
    <fields> 
 
     <field> 
 
     <name>File content</name> 
 
     <element_type>content</element_type> 
 
     <type>String</type> 
 
     <format/> 
 
     <currency/> 
 
     <decimal/> 
 
     <group/> 
 
     <length>-1</length> 
 
     <precision>-1</precision> 
 
     <trim_type>none</trim_type> 
 
     <repeat>N</repeat> 
 
     </field> 
 
     </fields> 
 
    <limit>0</limit> 
 
    <IsInFields>Y</IsInFields> 
 
    <DynamicFilenameField>filename</DynamicFilenameField> 
 
    <shortFileFieldName>file_name</shortFileFieldName> 
 
    <pathFieldName/> 
 
    <hiddenFieldName/> 
 
    <lastModificationTimeFieldName/> 
 
    <uriNameFieldName/> 
 
    <rootUriNameFieldName/> 
 
    <extensionFieldName/> 
 
    <cluster_schema/> 
 
<remotesteps> <input> </input> <output> </output> </remotesteps> <GUI> 
 
     <xloc>938</xloc> 
 
     <yloc>286</yloc> 
 
     <draw>Y</draw> 
 
     </GUI> 
 
    </step> 
 

 
    <step> 
 
    <name>Regex Evaluation 3</name> 
 
    <type>RegexEval</type> 
 
    <description/> 
 
    <distribute>Y</distribute> 
 
    <custom_distribution/> 
 
    <copies>1</copies> 
 
     <partitioning> 
 
      <method>none</method> 
 
      <schema_name/> 
 
      </partitioning> 
 
    <script><![CDATA[.*console_output\/([A-Za-z0-9_\.\-]+).txt]]></script> <matcher>uri</matcher> 
 
    <resultfieldname/> 
 
    <usevar>N</usevar> 
 
    <allowcapturegroups>Y</allowcapturegroups> 
 
    <replacefields>Y</replacefields> 
 
    <canoneq>N</canoneq> 
 
    <caseinsensitive>N</caseinsensitive> 
 
    <comment>N</comment> 
 
    <dotall>N</dotall> 
 
    <multiline>N</multiline> 
 
    <unicode>N</unicode> 
 
    <unix>N</unix> 
 
    <fields> 
 
     <field> 
 
     <name>build_id_from_regex</name> 
 
     <type>String</type> 
 
     <format/> 
 
     <group/> 
 
     <decimal/> 
 
     <length>-1</length> 
 
     <precision>-1</precision> 
 
     <nullif/> 
 
     <ifnull/> 
 
     <trimtype>none</trimtype> 
 
     </field> 
 
    </fields> 
 
    <cluster_schema/> 
 
<remotesteps> <input> </input> <output> </output> </remotesteps> <GUI> 
 
     <xloc>1098</xloc> 
 
     <yloc>286</yloc> 
 
     <draw>Y</draw> 
 
     </GUI> 
 
    </step> 
 

 
    <step> 
 
    <name>Regex Evaluation 4</name> 
 
    <type>RegexEval</type> 
 
    <description/> 
 
    <distribute>Y</distribute> 
 
    <custom_distribution/> 
 
    <copies>1</copies> 
 
     <partitioning> 
 
      <method>none</method> 
 
      <schema_name/> 
 
      </partitioning> 
 
    <script><![CDATA[^.*\[INFO\].*star\-maven.*SUCCESS.*\[(.*)\].*]]></script> <matcher>File content</matcher> 
 
    <resultfieldname>result</resultfieldname> 
 
    <usevar>N</usevar> 
 
    <allowcapturegroups>Y</allowcapturegroups> 
 
    <replacefields>Y</replacefields> 
 
    <canoneq>N</canoneq> 
 
    <caseinsensitive>N</caseinsensitive> 
 
    <comment>N</comment> 
 
    <dotall>N</dotall> 
 
    <multiline>N</multiline> 
 
    <unicode>N</unicode> 
 
    <unix>N</unix> 
 
    <fields> 
 
     <field> 
 
     <name>star_maven_time</name> 
 
     <type>String</type> 
 
     <format/> 
 
     <group/> 
 
     <decimal/> 
 
     <length>-1</length> 
 
     <precision>-1</precision> 
 
     <nullif/> 
 
     <ifnull/> 
 
     <trimtype>none</trimtype> 
 
     </field> 
 
    </fields> 
 
    <cluster_schema/> 
 
<remotesteps> <input> </input> <output> </output> </remotesteps> <GUI> 
 
     <xloc>725</xloc> 
 
     <yloc>124</yloc> 
 
     <draw>Y</draw> 
 
     </GUI> 
 
    </step> 
 

 
    <step> 
 
    <name>Regex Evaluation 6</name> 
 
    <type>RegexEval</type> 
 
    <description/> 
 
    <distribute>Y</distribute> 
 
    <custom_distribution/> 
 
    <copies>1</copies> 
 
     <partitioning> 
 
      <method>none</method> 
 
      <schema_name/> 
 
      </partitioning> 
 
    <script><![CDATA[([A-Za-z0-9_\.\-]+).txt]]></script> <matcher>short_filename</matcher> 
 
    <resultfieldname/> 
 
    <usevar>N</usevar> 
 
    <allowcapturegroups>Y</allowcapturegroups> 
 
    <replacefields>Y</replacefields> 
 
    <canoneq>N</canoneq> 
 
    <caseinsensitive>N</caseinsensitive> 
 
    <comment>N</comment> 
 
    <dotall>N</dotall> 
 
    <multiline>N</multiline> 
 
    <unicode>N</unicode> 
 
    <unix>N</unix> 
 
    <fields> 
 
     <field> 
 
     <name>build_id_from_short_filename</name> 
 
     <type>String</type> 
 
     <format/> 
 
     <group/> 
 
     <decimal/> 
 
     <length>-1</length> 
 
     <precision>-1</precision> 
 
     <nullif/> 
 
     <ifnull/> 
 
     <trimtype>none</trimtype> 
 
     </field> 
 
    </fields> 
 
    <cluster_schema/> 
 
<remotesteps> <input> </input> <output> </output> </remotesteps> <GUI> 
 
     <xloc>558</xloc> 
 
     <yloc>286</yloc> 
 
     <draw>Y</draw> 
 
     </GUI> 
 
    </step> 
 

 
    <step> 
 
    <name>Stream lookup 4</name> 
 
    <type>StreamLookup</type> 
 
    <description/> 
 
    <distribute>Y</distribute> 
 
    <custom_distribution/> 
 
    <copies>1</copies> 
 
     <partitioning> 
 
      <method>none</method> 
 
      <schema_name/> 
 
      </partitioning> 
 
    <from>Regex Evaluation 3</from> 
 
    <input_sorted>N</input_sorted> 
 
    <preserve_memory>Y</preserve_memory> 
 
    <sorted_list>N</sorted_list> 
 
    <integer_pair>N</integer_pair> 
 
    <lookup> 
 
     <key> 
 
     <name>build_id</name> 
 
     <field>build_id_from_regex</field> 
 
     </key> 
 
     <value> 
 
     <name>build_id_from_regex</name> 
 
     <rename>build_id_from_regex</rename> 
 
     <default/> 
 
     <type>String</type> 
 
     </value> 
 
     <value> 
 
     <name>File content</name> 
 
     <rename>File content</rename> 
 
     <default/> 
 
     <type>String</type> 
 
     </value> 
 
    </lookup> 
 
    <cluster_schema/> 
 
<remotesteps> <input> </input> <output> </output> </remotesteps> <GUI> 
 
     <xloc>578</xloc> 
 
     <yloc>126</yloc> 
 
     <draw>Y</draw> 
 
     </GUI> 
 
    </step> 
 

 
    <step> 
 
    <name>Dummy &#x28;do nothing&#x29;</name> 
 
    <type>Dummy</type> 
 
    <description/> 
 
    <distribute>Y</distribute> 
 
    <custom_distribution/> 
 
    <copies>1</copies> 
 
     <partitioning> 
 
      <method>none</method> 
 
      <schema_name/> 
 
      </partitioning> 
 
    <cluster_schema/> 
 
<remotesteps> <input> </input> <output> </output> </remotesteps> <GUI> 
 
     <xloc>1036</xloc> 
 
     <yloc>120</yloc> 
 
     <draw>Y</draw> 
 
     </GUI> 
 
    </step> 
 

 
    <step> 
 
    <name>Data Grid</name> 
 
    <type>DataGrid</type> 
 
    <description/> 
 
    <distribute>Y</distribute> 
 
    <custom_distribution/> 
 
    <copies>1</copies> 
 
     <partitioning> 
 
      <method>none</method> 
 
      <schema_name/> 
 
      </partitioning> 
 
    <fields> 
 
     <field> 
 
     <name>build_id</name> 
 
     <type>String</type> 
 
     <format/> 
 
     <currency/> 
 
     <decimal/> 
 
     <group/> 
 
     <length>-1</length> 
 
     <precision>-1</precision> 
 
     <set_empty_string>N</set_empty_string> 
 
     </field> 
 
    </fields> 
 
    <data> 
 
     <line> <item/> </line> 
 
    </data> 
 
    <cluster_schema/> 
 
<remotesteps> <input> </input> <output> </output> </remotesteps> <GUI> 
 
     <xloc>308</xloc> 
 
     <yloc>126</yloc> 
 
     <draw>Y</draw> 
 
     </GUI> 
 
    </step> 
 

 
</steps> 
 
<order> 
 
    <hop> <from>Generate Rows</from><to>Get File Names</to><enabled>Y</enabled> </hop> 
 
    <hop> <from>Get File Names</from><to>Regex Evaluation 6</to><enabled>Y</enabled> </hop> 
 
    <hop> <from>Get Variables</from><to>Stream lookup 4</to><enabled>Y</enabled> </hop> 
 
    <hop> <from>Load file content in memory</from><to>Regex Evaluation 3</to><enabled>Y</enabled> </hop> 
 
    <hop> <from>Regex Evaluation 3</from><to>Stream lookup 4</to><enabled>Y</enabled> </hop> 
 
    <hop> <from>Regex Evaluation 4</from><to>Dummy &#x28;do nothing&#x29;</to><enabled>Y</enabled> </hop> 
 
    <hop> <from>Regex Evaluation 6</from><to>Load file content in memory</to><enabled>Y</enabled> </hop> 
 
    <hop> <from>Stream lookup 4</from><to>Regex Evaluation 4</to><enabled>Y</enabled> </hop> 
 
    <hop> <from>Data Grid</from><to>Get Variables</to><enabled>Y</enabled> </hop> 
 
</order> 
 
<notepads> 
 
</notepads> 
 
<step_error_handling> 
 
</step_error_handling> 
 
</transformation-steps>

任何建議將不勝感激。謝謝

+0

你能附加一個完整的KTR文件,例如文件名硬編碼到轉換?這樣我們可以測試你的代碼。 – matthiash

+0

您是否嘗試過pentaho市場中的「特殊字符刪除」步驟?可能幫助你! – Rishu

+0

感謝Rishu的建議。但這一步與我試圖完成的事情無關。你的建議一定會在未來有所幫助,因爲我並不真的知道PDI中存在這樣的步驟。 – Nikhil

回答

0

沒關係。我正在使用的正則表達式需要根據我試圖提取的字符串進行更精細的處理。問題解決了。

結論:如果文件內容以兆字節爲單位,並且您想從中提取字符串,那麼您的RegEx應該非常特定於該字符串(它應該始終是特定的BTW)。當您在RegEx編譯器或RegEx評估步驟的Test實用程序上對其進行測試時,它可能會給您正確的結果,但是當您運行該轉換時,您只會看到空值。對RegEx進行返工並繼續對其進行優化,直到您看到要在輸出中看到提取的字符串。