2015-10-14 77 views
0

我一直在嘗試使用BlueMix SpeechToText Java庫,尤其是SpeechToText類。IBM沃森語音到文本com.ibm.watson.developer_cloud.speech_to_text.v1內處理大文件

我有很長的WAV文件,我想轉換爲文本。這些文件是〜70MB。目標是使用java API(http://www.ibm.com/smarterplanet/us/en/ibmwatson/developercloud/speech-to-text/api/v1/?java#recognize)來識別文本。我意識到自翻譯結束後,我需要每30秒檢查一次通話狀態,我只需30秒即可檢索最終結果。

爲了做到這一點,而使用REST API,我需要創建一個會話,然後結合我的搜索引擎對所說的對話,這樣我可以查詢在會話中運行的作業的狀態。

我試圖創建一個會話,但會話從不可用。我已經驗證它似乎在提供的webapp(https://stream.watsonplatform.net/speech-to-text/api/v1/sessions?Method=GET)上工作。

此外,我試圖寫我自己的客戶端,我試圖設置從會話中檢索該cookie創建並沒有任何工作。

我也試圖通過安全的WebSockets連接,但無法實現成功連接。

下面是我一直使用的一些示例代碼。

任何想法?

public class Speech2Text extends WatsonService { 
private static final Logger logger = LoggerFactory   .getLogger(Speech2Text.class); 
public static void main(String[] args) throws FileNotFoundException,   UnsupportedEncodingException, InterruptedException { 
    Speech2Text s2t = new Speech2Text(); 
    s2t.httpClient(); 
    // try { 
    // s2t.webSocketClient(); 
    // } catch (URISyntaxException e) { 
    // TODO Auto-generated catch block 
    // e.printStackTrace(); 
    // } catch (IOException e) { 
    // TODO Auto-generated catch block 
    // e.printStackTrace(); 
    // } 
} 
public void httpClient() throws FileNotFoundException,UnsupportedEncodingException { 
    logger.info("Running http client"); 
    final Stopwatch stopwatch = Stopwatch.createStarted(); 
    SpeechToText service = new SpeechToText(); 
    service.setUsernameAndPassword("XXXXXX","XXXXX"); 
    List<SpeechModel> models = service.getModels(); 
    for (SpeechModel model : models) { 
     logger.info(model.getName()); 
    } 
    SpeechSession session = service.createSession("en-US_NarrowbandModel"); 
    System.out.println(session.toString()); 
    SessionStatus status = service.getRecognizeStatus(session); 
    logger.info(status.getModel()); 
    logger.info(service.getEndPoint()); 
    File audio = new File("/home/baaron/watson-bluemix/answer_06.wav"); 
    Map params = new HashMap(); 
    params.put("audio", audio); 
    params.put("content_type", "audio/wav"); 
    params.put("continuous", "true"); 
    params.put("session_id", session.getSessionId()); 
    logger.info(service.getEndPoint()); 
    SpeechResults transcript = service.recognize(params); 
    PrintWriter writer = new PrintWriter("/home/baaron/watson-bluemix/PCCJPApart1test.transcript", "UTF-8"); 
    writer.println(transcript.toString()); 
    SessionStatus status1 = service.getRecognizeStatus(session.getSessionId()); 
    System.out.println(status1); 
    service.deleteSession(session.getSessionId()); 
    writer.close(); 
    stopwatch.stop(); 
    logger.info("Processing took: " + stopwatch + "."); 
} 
public void webSocketClient() throws URISyntaxException, IOException, 
     InterruptedException { 
    logger.info("Running web socket client"); 
    String encoding = new String(Base64.encodeBase64String("XXXXXXXXXX".getBytes())); 
    HttpPost httppost = new HttpPost(    "https://stream.watsonplatform.net/authorization/api/v1/token?url=https://stream.watsonplatform.net/speech-to-text/api"); 
    httppost.setHeader("Authorization", "Basic " + encoding); 
    System.out.println("executing request " + httppost.getRequestLine()); 
    DefaultHttpClient httpclient = new DefaultHttpClient(); 
    HttpResponse response = httpclient.execute(httppost); 
    HttpEntity entity = response.getEntity(); 
    logger.info(response.getStatusLine().getReasonPhrase()); 
    WebSocketImpl.DEBUG = true; 
    BufferedReader reader = new BufferedReader(new InputStreamReader(    entity.getContent())); 
    StringBuilder out = new StringBuilder(); 
    String line; 
    while ((line = reader.readLine()) != null) { 
     out.append(line); 
    } 
    String token = out.toString(); 
    final WebSocketClient client = new WebSocketClient(
      new URI("wss://stream.watsonplatform.net/speech-to-text-beta/api/v1/recognize?watson-token=" + token)) { 
     @Override 
     public void onMessage(String message) { 
      JSONObject obj = new JSONObject(message); 
      // String channel = obj.getString("channel"); 
     } 
     @Override 
     public void onOpen(ServerHandshake handshake) { 
      System.out.println("opened connection"); 
     } 
     @Override 
     public void onClose(int code, String reason, boolean remote) { 
      System.out.println("closed connection"); 
     } 
     @Override 
     public void onError(Exception ex) { 
      ex.printStackTrace(); 
     } 
    }; 
    // open websocket 
    SSLContext sslContext = null; 
    try { 
     sslContext = SSLContext.getInstance("TLS"); 
     sslContext.init(null, null, null); 
    } catch (NoSuchAlgorithmException e) { 
     e.printStackTrace(); 
    } catch (KeyManagementException e) { 
     e.printStackTrace(); 
    } 
    client.setWebSocketFactory(new DefaultSSLWebSocketClientFactory(
      sslContext)); 
    logger.info("CONNECTED: " + client.connectBlocking()); 
    JSONObject obj = new JSONObject(); 
    obj.put("action", "start"); 
    obj.put("content-type", "audio/wav"); 
    client.send(obj.toString()); 
    logger.info("Done"); 
    } 
} 
+0

看看這個答案http://stackoverflow.com/questions/36504879/watson-stt-java-varying-results-between-websockets -java-和HTTP-POST/36806207#36806207 –

回答

0

如果你想要的是錄製的音頻文件,你可以這樣做:

SpeechToText service = new SpeechToText(); 
service.setUsernameAndPassword("{username"}, "{password}"); 

RecognizeOptions options = new RecognizeOptions.Builder() 
    .contentType("audio/wav") 
    .continuous(true) 
    .model("en-US_NarrowbandModel") 
    .inactivityTimeout(-1) // Seconds after which the connection is closed if no audio is detected 
    .build(); 

String[] files = {"file1.wav", "file2.wav"}; 
for (String file : files) { 
    SpeechResults results = service.recognize(new File(file), options).execute(); 
    System.out.println(results); // print results(you could write them to a file) 
} 

確保您使用的Java SDK的最新版本。

Maven的

<dependency> 
    <groupId>com.ibm.watson.developer_cloud</groupId> 
    <artifactId>java-sdk</artifactId> 
    <version>3.8.0</version> 
</dependency> 

搖籃

compile 'com.ibm.watson.developer_cloud:java-sdk:3.8.0'