2013-07-13 57 views
1

我目前正在使用Sphinx4,更具體地說是TranslatorDemo。但是,當我運行它的默認詞典和模型是隻輸出數字。這些指令說,要改變這個特定模型的config.xml文件,但我已經讀過它,這讓我感到困惑,一些嘗試改變字典名稱似乎並不奏效。語音到文本翻譯所需的Sphinx4配置

這裏是頁面上的說明:

http://cmusphinx.sourceforge.net/sphinx4/src/apps/edu/cmu/sphinx/demo/transcriber/README.html http://cmusphinx.sourceforge.net/sphinx4/doc/ProgrammersGuide.html

這是我的配置文件:

<?xml version="1.0" encoding="UTF-8"?> 

<!-- 
    Sphinx-4 Configuration file 
--> 

<!-- ******************************************************** --> 
<!-- an4 configuration file        --> 
<!-- ******************************************************** --> 

<config>   

    <!-- ******************************************************** --> 
    <!-- frequently tuned properties        --> 
    <!-- ******************************************************** --> 

    <property name="logLevel" value="WARNING"/> 

    <property name="absoluteBeamWidth" value="-1"/> 
    <property name="relativeBeamWidth" value="1E-80"/> 
    <property name="wordInsertionProbability" value="1E-36"/> 
    <property name="languageWeight"  value="8"/> 

    <property name="frontend" value="epFrontEnd"/> 
    <property name="recognizer" value="recognizer"/> 
    <property name="showCreations" value="false"/> 


    <!-- ******************************************************** --> 
    <!-- word recognizer configuration       --> 
    <!-- ******************************************************** --> 

    <component name="recognizer" type="edu.cmu.sphinx.recognizer.Recognizer"> 
     <property name="decoder" value="decoder"/> 
     <propertylist name="monitors"> 
      <item>accuracyTracker </item> 
      <item>speedTracker </item> 
      <item>memoryTracker </item> 
     </propertylist> 
    </component> 

    <!-- ******************************************************** --> 
    <!-- The Decoder configuration        --> 
    <!-- ******************************************************** --> 

    <component name="decoder" type="edu.cmu.sphinx.decoder.Decoder"> 
     <property name="searchManager" value="searchManager"/> 
    </component> 

    <component name="searchManager" 
     type="edu.cmu.sphinx.decoder.search.SimpleBreadthFirstSearchManager"> 
     <property name="logMath" value="logMath"/> 
     <property name="linguist" value="flatLinguist"/> 
     <property name="pruner" value="trivialPruner"/> 
     <property name="scorer" value="threadedScorer"/> 
     <property name="activeListFactory" value="activeList"/> 
    </component> 


    <component name="activeList" 
      type="edu.cmu.sphinx.decoder.search.PartitionActiveListFactory"> 
     <property name="logMath" value="logMath"/> 
     <property name="absoluteBeamWidth" value="${absoluteBeamWidth}"/> 
     <property name="relativeBeamWidth" value="${relativeBeamWidth}"/> 
    </component> 

    <component name="trivialPruner" 
       type="edu.cmu.sphinx.decoder.pruner.SimplePruner"/> 

    <component name="threadedScorer" 
       type="edu.cmu.sphinx.decoder.scorer.ThreadedAcousticScorer"> 
     <property name="frontend" value="${frontend}"/> 
    </component> 

    <!-- ******************************************************** --> 
    <!-- The linguist configuration        --> 
    <!-- ******************************************************** --> 

    <component name="flatLinguist" 
       type="edu.cmu.sphinx.linguist.flat.FlatLinguist"> 
     <property name="logMath" value="logMath"/> 
     <property name="grammar" value="jsgfGrammar"/> 
     <property name="acousticModel" value="wsj"/> 
     <property name="wordInsertionProbability" 
       value="${wordInsertionProbability}"/> 
     <property name="languageWeight" value="${languageWeight}"/> 
     <property name="unitManager" value="unitManager"/> 
    </component> 


    <!-- ******************************************************** --> 
    <!-- The Grammar configuration        --> 
    <!-- ******************************************************** --> 

    <component name="jsgfGrammar" type="edu.cmu.sphinx.jsgf.JSGFGrammar"> 
     <property name="dictionary" value="dictionary"/> 
     <property name="grammarLocation" 
      value="resource:/edu/cmu/sphinx/demo/transcriber/"/> 
     <property name="grammarName" value="digits"/> 
    <property name="logMath" value="logMath"/> 
    </component> 

    <!-- ******************************************************** --> 
    <!-- The Dictionary configuration       --> 
    <!-- ******************************************************** --> 
    <component name="dictionary" 
     type="edu.cmu.sphinx.linguist.dictionary.FastDictionary"> 
     <property name="dictionaryPath" 
        value="resource:/WSJ_8gau_13dCep_16k_40mel_130Hz_6800Hz/dict/cmudict.0.6d"/> 
     <property name="fillerPath" 
       value="resource:/WSJ_8gau_13dCep_16k_40mel_130Hz_6800Hz/noisedict"/> 
     <property name="addSilEndingPronunciation" value="false"/> 
     <property name="wordReplacement" value="&lt;sil&gt;"/> 
     <property name="unitManager" value="unitManager"/> 
    </component> 


    <!-- ******************************************************** --> 
    <!-- The acoustic model configuration       --> 
    <!-- ******************************************************** --> 
    <component name="wsj" 
       type="edu.cmu.sphinx.linguist.acoustic.tiedstate.TiedStateAcousticModel"> 
     <property name="loader" value="wsjLoader"/> 
     <property name="unitManager" value="unitManager"/> 
    </component> 

    <component name="wsjLoader" type="edu.cmu.sphinx.linguist.acoustic.tiedstate.Sphinx3Loader"> 
     <property name="logMath" value="logMath"/> 
     <property name="unitManager" value="unitManager"/> 
     <property name="location" value="resource:/WSJ_8gau_13dCep_16k_40mel_130Hz_6800Hz"/> 
    </component> 


    <!-- ******************************************************** --> 
    <!-- The unit manager configuration       --> 
    <!-- ******************************************************** --> 

    <component name="unitManager" 
     type="edu.cmu.sphinx.linguist.acoustic.UnitManager"/> 

    <!-- ******************************************************** --> 
    <!-- The live frontend configuration       --> 
    <!-- ******************************************************** --> 
    <component name="epFrontEnd" type="edu.cmu.sphinx.frontend.FrontEnd"> 
     <propertylist name="pipeline"> 
      <item>audioFileDataSource </item> 
      <item>dataBlocker </item> 
      <item>speechClassifier </item> 
      <item>speechMarker </item> 
      <item>nonSpeechDataFilter </item> 
      <item>preemphasizer </item> 
      <item>windower </item> 
      <item>fft </item> 
      <item>melFilterBank </item> 
      <item>dct </item> 
      <item>liveCMN </item> 
      <item>featureExtraction </item> 
     </propertylist> 
    </component> 

    <!-- ******************************************************** --> 
    <!-- The frontend pipelines         --> 
    <!-- ******************************************************** --> 

    <component name="audioFileDataSource" type="edu.cmu.sphinx.frontend.util.AudioFileDataSource"/> 

    <component name="dataBlocker" type="edu.cmu.sphinx.frontend.DataBlocker"/> 

    <component name="speechClassifier" type="edu.cmu.sphinx.frontend.endpoint.SpeechClassifier"/> 

    <component name="nonSpeechDataFilter" 
       type="edu.cmu.sphinx.frontend.endpoint.NonSpeechDataFilter"/> 

    <component name="speechMarker" type="edu.cmu.sphinx.frontend.endpoint.SpeechMarker" /> 

    <component name="preemphasizer" 
       type="edu.cmu.sphinx.frontend.filter.Preemphasizer"/> 

    <component name="windower" 
       type="edu.cmu.sphinx.frontend.window.RaisedCosineWindower"> 
    </component> 

    <component name="fft" 
      type="edu.cmu.sphinx.frontend.transform.DiscreteFourierTransform"> 
    </component> 

    <component name="melFilterBank" 
     type="edu.cmu.sphinx.frontend.frequencywarp.MelFrequencyFilterBank"> 
    </component> 

    <component name="dct" 
      type="edu.cmu.sphinx.frontend.transform.DiscreteCosineTransform"/> 

    <component name="liveCMN" 
       type="edu.cmu.sphinx.frontend.feature.LiveCMN"/> 

    <component name="featureExtraction" 
       type="edu.cmu.sphinx.frontend.feature.DeltasFeatureExtractor"/> 


    <!-- ******************************************************* --> 
    <!-- monitors            --> 
    <!-- ******************************************************* --> 

    <component name="accuracyTracker" 
       type="edu.cmu.sphinx.instrumentation.BestPathAccuracyTracker"> 
     <property name="recognizer" value="${recognizer}"/> 
     <property name="showAlignedResults" value="false"/> 
     <property name="showRawResults" value="false"/> 
    </component> 

    <component name="memoryTracker" 
       type="edu.cmu.sphinx.instrumentation.MemoryTracker"> 
     <property name="recognizer" value="${recognizer}"/> 
    <property name="showSummary" value="false"/> 
    <property name="showDetails" value="false"/> 
    </component> 

    <component name="speedTracker" 
       type="edu.cmu.sphinx.instrumentation.SpeedTracker"> 
     <property name="recognizer" value="${recognizer}"/> 
     <property name="frontend" value="${frontend}"/> 
    <property name="showSummary" value="true"/> 
    <property name="showDetails" value="false"/> 
    </component> 


    <!-- ******************************************************* --> 
    <!-- Miscellaneous components        --> 
    <!-- ******************************************************* --> 

    <component name="logMath" type="edu.cmu.sphinx.util.LogMath"> 
     <property name="logBase" value="1.0001"/> 
     <property name="useAddTable" value="true"/> 
    </component> 

</config> 

我曾幾次試圖更改字典比數字以外的東西我已經嘗試了一些其他的東西,但幫助將不勝感激,或至少簡化說明會更好。謝謝。

回答

0

更改字典是不夠的。您必須爲您的單詞創建兩個輔助條目,並將新單詞包含在語言模型中 - 在您的案例中是JSGF語法。

您將需要確定新單詞在應用程序中的顯示方式和時間,並創建適當的jsgf定義。

或者,您可以創建一個n-gram語言模型(即使僅使用unigrams)。它可以根據您的語料庫自動生成(使用sphinx utils的lmtool),也可以手動創建。