2014-10-03 67 views
0

我想創建一個識別文件.wav中的語音的程序。我想下面的代碼,但它拋出一個異常帶自定義配置的Sphinx4 OutOfMemoryError

Exception in thread "main" java.lang.OutOfMemoryError: GC overhead limit exceeded

即使我的「的eclipse.ini」有這樣的屬性:

--launcher.XXMaxPermSize 2048M 
--launcher.XXMaxPermSize 2048m 
-Xms2048m 
-Xmx2048m 

我怎樣才能解決這個例外?

Java代碼:

import java.net.MalformedURLException; 
import java.net.URL; 

import edu.cmu.sphinx.frontend.util.AudioFileDataSource; 
import edu.cmu.sphinx.recognizer.Recognizer; 
import edu.cmu.sphinx.result.Result; 
import edu.cmu.sphinx.util.props.ConfigurationManager; 

public class TestRecognizer { 

    public static void main(String[] args) { 
     ConfigurationManager cm; 

     if (args.length > 0) { 
      cm = new ConfigurationManager(args[0]); 
     } else { 
      cm = new ConfigurationManager("english_use_LexTreeLinguist.xml"); 
     } 

     URL audioURL = null; 
     try { 
      audioURL = new URL("file:./10001-90210-01803.wav"); 
     } catch (MalformedURLException e) { 
      e.printStackTrace(); 
     } 
     if(audioURL == null) 
      throw new IllegalArgumentException("Given audio file doesn't exist."); 

     // allocate the recognizer 
     System.out.println("Loading recognizer"); 
     Recognizer recognizer = (Recognizer) cm.lookup("recognizer"); 
     recognizer.allocate(); 
     System.out.println("Loading audio"); 
     AudioFileDataSource dataSource = (AudioFileDataSource) cm.lookup("audioFileDataSource"); 
     dataSource.setAudioFile(audioURL, null); 

     // loop the recognition until the programm exits. 
     Result result; 
     System.out.println("recognizing");`enter code here` 
     while ((result = recognizer.recognize())!= null) { 
      String resultText = result.getBestResultNoFiller(); 
      System.out.println(resultText); 
     } 
    } 

} 

XML配置文件:

<config> 
    <!-- ******************************************************** --> 
    <!-- frequently tuned properties        --> 
    <!-- ******************************************************** --> 

    <property name="absoluteBeamWidth"   value="-1"/> 
    <property name="relativeBeamWidth"   value="1E-80"/> 
    <property name="wordInsertionProbability" value=".1"/> 
    <property name="languageWeight"    value="8"/> 
    <property name="silenceInsertionProbability" value="1"/> 
    <property name="fillerInsertionProbability" value="1E-10"/> 
    <property name="logLevel"     value="WARNING"/> 
    <property name="recognizer" value="recognizer"/> 
    <property name="linguist" value="lexTreeLinguist"/> 
    <property name="frontend" value="mfcFrontEnd"/> 

    <!-- ******************************************************** --> 
    <!-- The Recognizer configuration    --> 
    <!-- ******************************************************** --> 

    <component name="recognizer" 
       type="edu.cmu.sphinx.recognizer.Recognizer"> 
     <property name="decoder" value="decoder"/> 
     <propertylist name="monitors"> 
     </propertylist> 
    </component> 

    <!-- ******************************************************** --> 
    <!-- The Decoder configuration        --> 
    <!-- ******************************************************** --> 

    <component name="decoder" type="edu.cmu.sphinx.decoder.Decoder"> 
     <property name="searchManager" value="searchManager"/> 
    </component> 

    <component name="searchManager" 
     type="edu.cmu.sphinx.decoder.search.SimpleBreadthFirstSearchManager"> 
     <property name="logMath" value="logMath"/> 
     <property name="linguist" value="${linguist}"/> 
     <property name="pruner" value="trivialPruner"/> 
     <property name="scorer" value="threadedScorer"/> 
     <property name="activeListFactory" value="activeList"/> 
    </component> 

    <component name="activeList" 
      type="edu.cmu.sphinx.decoder.search.SortingActiveListFactory"> 
     <property name="logMath" value="logMath"/> 
     <property name="absoluteBeamWidth" value="${absoluteBeamWidth}"/> 
     <property name="relativeBeamWidth" value="${relativeBeamWidth}"/> 
    </component> 

    <component name="trivialPruner" 
       type="edu.cmu.sphinx.decoder.pruner.SimplePruner"/> 

    <component name="threadedScorer" 
       type="edu.cmu.sphinx.decoder.scorer.ThreadedAcousticScorer"> 
     <property name="frontend" value="${frontend}"/> 
     <property name="isCpuRelative" value="true"/> 
     <property name="numThreads" value="0"/> 
     <property name="minScoreablesPerThread" value="10"/> 
     <property name="scoreablesKeepFeature" value="true"/> 
    </component> 

    <!-- ******************************************************** --> 
    <!-- The linguist configuration        --> 
    <!-- ******************************************************** --> 

    <component name="lexTreeLinguist" 
       type="edu.cmu.sphinx.linguist.lextree.LexTreeLinguist"> 
     <property name="logMath" value="logMath"/> 
     <property name="acousticModel" value="wsj"/> 
     <property name="languageModel" value="trigramModel"/> 
     <property name="dictionary" value="englishDict"/> 
     <property name="addFillerWords" value="false"/> 
     <property name="fillerInsertionProbability" value="${fillerInsertionProbability}"/> 
     <property name="generateUnitStates" value="false"/> 
     <property name="wantUnigramSmear" value="true"/> 
     <property name="unigramSmearWeight" value="1"/> 
     <property name="wordInsertionProbability" 
       value="${wordInsertionProbability}"/> 
     <property name="silenceInsertionProbability" 
       value="${silenceInsertionProbability}"/> 
     <property name="languageWeight" value="${languageWeight}"/> 
     <property name="unitManager" value="unitManager"/> 
    </component> 

    <!-- ******************************************************** --> 
    <!-- The Language Model configuration       --> 
    <!-- ******************************************************** --> 
    <component name="trigramModel" 
      type="edu.cmu.sphinx.linguist.language.ngram.large.LargeTrigramModel"> 
     <property name="unigramWeight" value=".5"/> 
     <property name="maxDepth" value="3"/> 
     <property name="logMath" value="logMath"/> 
     <property name="dictionary" value="englishDict"/> 
     <property name="location" 
      value="resource:/edu/cmu/sphinx/models/language/en-us.lm.dmp"/> 
    </component> 

    <!-- ******************************************************** --> 
    <!-- The Dictionary configuration       --> 
    <!-- ******************************************************** --> 
    <component name="englishDict" 
     type="edu.cmu.sphinx.linguist.dictionary.FastDictionary"> 
     <property name="dictionaryPath" 
        value="resource:/WSJ_8gau_13dCep_8kHz_31mel_200Hz_3500Hz/dict/cmudict.0.6d"/> 
     <property name="fillerPath" 
       value="resource:/WSJ_8gau_13dCep_8kHz_31mel_200Hz_3500Hz/noisedict"/> 
     <property name="addSilEndingPronunciation" value="false"/> 
     <property name="wordReplacement" value="&lt;sil&gt;"/> 
     <property name="unitManager" value="unitManager"/> 
    </component> 

    <!-- ******************************************************** --> 
    <!-- The acoustic model configuration       --> 
    <!-- ******************************************************** --> 

    <component name="wsj" 
       type="edu.cmu.sphinx.linguist.acoustic.tiedstate.TiedStateAcousticModel"> 
     <property name="loader" value="wsjLoader"/> 
     <property name="unitManager" value="unitManager"/> 
    </component> 

    <component name="wsjLoader" type="edu.cmu.sphinx.linguist.acoustic.tiedstate.Sphinx3Loader"> 
     <property name="logMath" value="logMath"/> 
     <property name="unitManager" value="unitManager"/> 
     <property name="location" value="resource:/WSJ_8gau_13dCep_8kHz_31mel_200Hz_3500Hz"/> 
    </component> 

    <!-- ******************************************************** --> 
    <!-- The unit manager configuration       --> 
    <!-- ******************************************************** --> 

    <component name="unitManager" 
     type="edu.cmu.sphinx.linguist.acoustic.UnitManager"/> 

    <!-- ******************************************************** --> 
    <!-- The frontend configuration        --> 
    <!-- ******************************************************** --> 

    <component name="mfcFrontEnd" type="edu.cmu.sphinx.frontend.FrontEnd"> 
     <propertylist name="pipeline"> 
      <!--item>streamDataSource </item--> 
      <item>audioFileDataSource </item> 
      <item>preemphasizer </item> 
      <item>windower </item> 
      <item>fft </item> 
      <item>melFilterBank </item> 
      <item>dct </item> 
      <item>batchCMN </item> 
      <item>featureExtraction </item> 
     </propertylist> 
    </component> 

    <component name="streamDataSource" 
       type="edu.cmu.sphinx.frontend.util.StreamDataSource"> 
     <property name="sampleRate" value="16000"/> 
    <property name="bitsPerSample" value="16"/> 
    <property name="bigEndianData" value="false"/> 
    <property name="signedData" value="true"/> 
    </component> 

    <component name="audioFileDataSource" type="edu.cmu.sphinx.frontend.util.AudioFileDataSource"/> 

    <component name="preemphasizer" 
     type="edu.cmu.sphinx.frontend.filter.Preemphasizer"/> 

    <component name="windower" 
       type="edu.cmu.sphinx.frontend.window.RaisedCosineWindower"/> 

    <component name="fft" 
      type="edu.cmu.sphinx.frontend.transform.DiscreteFourierTransform"/> 

    <component name="melFilterBank" 
      type="edu.cmu.sphinx.frontend.frequencywarp.MelFrequencyFilterBank"/> 

    <component name="dct" 
      type="edu.cmu.sphinx.frontend.transform.DiscreteCosineTransform"/> 

    <component name="batchCMN" 
       type="edu.cmu.sphinx.frontend.feature.BatchCMN"/> 

    <component name="featureExtraction" 
     type="edu.cmu.sphinx.frontend.feature.DeltasFeatureExtractor"/> 

    <!-- ******************************************************* --> 
    <!-- Miscellaneous components        --> 
    <!-- ******************************************************* --> 

    <component name="logMath" type="edu.cmu.sphinx.util.LogMath"> 
     <property name="logBase" value="1.0001"/> 
     <property name="useAddTable" value="true"/> 
    </component> 

</config> 

回答

0

你的配置是完全錯誤的,梁太寬,前端配置不正確。

如果您想進行修改或僅使用高級API而不使用XML文件,則需要使用默認配置default.config.xml。爲了獲得最佳解碼精度,您需要在下載中使用en-us-8khz聲學模型。

如果你想轉錄8khz音頻,你還需要調用recognizer.setSampleRate(8000);