2010-04-18 81 views
6

問題:我正在嘗試使用espeak文本到語音引擎。 因此,我在linux上運行得很好(代碼如下)。 現在我想把這個基本程序移植到windows,但它幾乎是不可能的...Windows上的Espeak SAPI/dll使用情況?

部分問題是,Windows DLL只允許AUDIO_OUTPUT_SYNCHRONOUS,這意味着它需要一個回調,但我可以'不知道如何從回調中播放音頻...首先它崩潰了,然後我意識到,我需要一個回調函數,現在我在回調函數中獲取數據,但我不知道如何播放它。因爲它既不是wav文件,也不像在Linux上那樣自動播放。

SourceForge的網站是沒有用處的,因爲它基本上是說使用SAPI版本,但再有就是關於如何使用SAPI espeak的DLL沒有例子...

總之,這裏是我的代碼,任何人可以幫助?

#ifdef __cplusplus 
#include <cstdio> 
#include <cstdlib> 
#include <cstring> 
#else 
#include <stdio.h> 
#include <stdlib.h> 
#include <string.h> 
#endif 

#include <assert.h> 
#include <ctype.h> 

//#include "speak_lib.h" 
#include "espeak/speak_lib.h" 

// libespeak-dev: /usr/include/espeak/speak_lib.h 
// apt-get install libespeak-dev 
// apt-get install libportaudio-dev 

// g++ -o mine mine.cpp -lespeak 
// g++ -o mine mine.cpp -I/usr/include/espeak/ -lespeak 
// gcc -o mine mine.cpp -I/usr/include/espeak/ -lespeak 


char voicename[40]; 
int samplerate; 
int quiet = 0; 
static char genders[4] = {' ','M','F',' '}; 

//const char *data_path = "/usr/share/"; // /usr/share/espeak-data/ 
const char *data_path = NULL; // use default path for espeak-data 


int strrcmp(const char *s, const char *sub) 
{ 
int slen = strlen(s); 
int sublen = strlen(sub); 
return memcmp(s + slen - sublen, sub, sublen); 
} 


char * strrcpy(char *dest, const char *source) 
{ 
// Pre assertions 
assert(dest != NULL); 
assert(source != NULL); 
assert(dest != source); 

// tk: parentheses 
while((*dest++ = *source++)) 
    ; 
return(--dest); 
} 

const char* GetLanguageVoiceName(const char* pszShortSign) 
{ 
#define LANGUAGE_LENGTH 30 
static char szReturnValue[LANGUAGE_LENGTH] ; 
memset(szReturnValue, 0, LANGUAGE_LENGTH); 

for (int i = 0; pszShortSign[i] != '\0'; ++i) 
    szReturnValue[i] = (char) tolower(pszShortSign[i]); 

const espeak_VOICE **voices; 
espeak_VOICE voice_select; 
voices = espeak_ListVoices(NULL); 

const espeak_VOICE *v; 
for(int ix=0; (v = voices[ix]) != NULL; ix++) 
{ 
    if(!strrcmp(v->languages, szReturnValue)) 
    { 
     strcpy(szReturnValue, v->name); 
     return szReturnValue; 
    } 
} // End for 

strcpy(szReturnValue, "default"); 
return szReturnValue; 
} // End function getvoicename 


void ListVoices() 
{ 
const espeak_VOICE **voices; 
espeak_VOICE voice_select; 
voices = espeak_ListVoices(NULL); 

const espeak_VOICE *v; 
for(int ix=0; (v = voices[ix]) != NULL; ix++) 
{ 
    printf("Shortsign: %s\n", v->languages); 
    printf("age: %d\n", v->age); 
    printf("gender: %c\n", genders[v->gender]); 
    printf("name: %s\n", v->name); 
    printf("\n\n"); 
} // End for 
} // End function getvoicename 


int main() 
{ 
printf("Hello World!\n"); 
const char* szVersionInfo = espeak_Info(NULL); 

printf("Espeak version: %s\n", szVersionInfo); 
samplerate = espeak_Initialize(AUDIO_OUTPUT_PLAYBACK,0,data_path,0); 

strcpy(voicename, "default"); 
// espeak --voices 
strcpy(voicename, "german"); 
strcpy(voicename, GetLanguageVoiceName("DE")); 

if(espeak_SetVoiceByName(voicename) != EE_OK) 
{ 
    printf("Espeak setvoice error...\n"); 
} 

static char word[200] = "Hello World" ; 
strcpy(word, "TV-fäns aufgepasst, es ist 20 Uhr 15. Zeit für Rambo 3"); 
strcpy(word, "Unnamed Player wurde zum Opfer von GSG9"); 
int speed = 220; 
int volume = 500; // volume in range 0-100 0=silence 
int pitch = 50; // base pitch, range 0-100. 50=normal 

// espeak.cpp 625 
espeak_SetParameter(espeakRATE, speed, 0); 
espeak_SetParameter(espeakVOLUME,volume,0); 
espeak_SetParameter(espeakPITCH,pitch,0); 
// espeakRANGE: pitch range, range 0-100. 0-monotone, 50=normal 
// espeakPUNCTUATION: which punctuation characters to announce: 
    // value in espeak_PUNCT_TYPE (none, all, some), 
espeak_VOICE *voice_spec = espeak_GetCurrentVoice(); 
voice_spec->gender=2; // 0=none 1=male, 2=female, 
//voice_spec->age = age; 

espeak_SetVoiceByProperties(voice_spec); 


espeak_Synth((char*) word, strlen(word)+1, 0, POS_CHARACTER, 0, espeakCHARS_AUTO, NULL, NULL); 
espeak_Synchronize(); 

strcpy(voicename, GetLanguageVoiceName("EN")); 
espeak_SetVoiceByName(voicename); 
strcpy(word, "Geany was fragged by GSG9 Googlebot"); 
strcpy(word, "Googlebot"); 

espeak_Synth((char*) word, strlen(word)+1, 0, POS_CHARACTER, 0, espeakCHARS_AUTO, NULL, NULL); 
espeak_Synchronize(); 


espeak_Terminate(); 
printf("Espeak terminated\n"); 
return EXIT_SUCCESS; 
} 

/* 
if(espeak_SetVoiceByName(voicename) != EE_OK) 
{ 
    memset(&voice_select,0,sizeof(voice_select)); 
    voice_select.languages = voicename; 
    if(espeak_SetVoiceByProperties(&voice_select) != EE_OK) 
    { 
     fprintf(stderr,"%svoice '%s'\n",err_load,voicename); 
     exit(2); 
    } 
} 
*/ 

上述代碼適用於Linux。 下面的代碼是關於據我對Vista 64位(32位EMU)有:

#ifdef __cplusplus 
#include <cstdio> 
#include <cstdlib> 
#include <cstring> 
#else 
#include <stdio.h> 
#include <stdlib.h> 
#include <string.h> 
#endif 

#include <assert.h> 
#include <ctype.h> 

#include "speak_lib.h" 
//#include "espeak/speak_lib.h" 

// libespeak-dev: /usr/include/espeak/speak_lib.h 
// apt-get install libespeak-dev 
// apt-get install libportaudio-dev 

// g++ -o mine mine.cpp -lespeak 
// g++ -o mine mine.cpp -I/usr/include/espeak/ -lespeak 
// gcc -o mine mine.cpp -I/usr/include/espeak/ -lespeak 


char voicename[40]; 
int iSampleRate; 
int quiet = 0; 
static char genders[4] = {' ','M','F',' '}; 

//const char *data_path = "/usr/share/"; // /usr/share/espeak-data/ 
//const char *data_path = NULL; // use default path for espeak-data 
const char *data_path = "C:\\Users\\Username\\Desktop\\espeak-1.43-source\\espeak-1.43-source\\"; 


int strrcmp(const char *s, const char *sub) 
{ 
int slen = strlen(s); 
int sublen = strlen(sub); 
return memcmp(s + slen - sublen, sub, sublen); 
} 


char * strrcpy(char *dest, const char *source) 
{ 
// Pre assertions 
assert(dest != NULL); 
assert(source != NULL); 
assert(dest != source); 

// tk: parentheses 
while((*dest++ = *source++)) 
    ; 
return(--dest); 
} 

const char* GetLanguageVoiceName(const char* pszShortSign) 
{ 
#define LANGUAGE_LENGTH 30 
static char szReturnValue[LANGUAGE_LENGTH] ; 
memset(szReturnValue, 0, LANGUAGE_LENGTH); 

for (int i = 0; pszShortSign[i] != '\0'; ++i) 
    szReturnValue[i] = (char) tolower(pszShortSign[i]); 

const espeak_VOICE **voices; 
espeak_VOICE voice_select; 
voices = espeak_ListVoices(NULL); 

const espeak_VOICE *v; 
for(int ix=0; (v = voices[ix]) != NULL; ix++) 
{ 
    if(!strrcmp(v->languages, szReturnValue)) 
    { 
     strcpy(szReturnValue, v->name); 
     return szReturnValue; 
    } 
} // End for 

strcpy(szReturnValue, "default"); 
return szReturnValue; 
} // End function getvoicename 


void ListVoices() 
{ 
const espeak_VOICE **voices; 
espeak_VOICE voice_select; 
voices = espeak_ListVoices(NULL); 

const espeak_VOICE *v; 
for(int ix=0; (v = voices[ix]) != NULL; ix++) 
{ 
    printf("Shortsign: %s\n", v->languages); 
    printf("age: %d\n", v->age); 
    printf("gender: %c\n", genders[v->gender]); 
    printf("name: %s\n", v->name); 
    printf("\n\n"); 
} // End for 
} // End function getvoicename 


/* Callback from espeak. Directly speaks using AudioTrack. */ 
#define LOGI(x) printf("%s\n", x) 
static int AndroidEspeakDirectSpeechCallback(short *wav, int numsamples, espeak_EVENT *events) 
{ 
    char buf[100]; 
    sprintf(buf, "AndroidEspeakDirectSpeechCallback: %d samples", numsamples); 
    LOGI(buf); 

    if (wav == NULL) 
{ 
     LOGI("Null: speech has completed"); 
    } 

    if (numsamples > 0) 
{ 
     //audout->write(wav, sizeof(short) * numsamples); 
     sprintf(buf, "AudioTrack wrote: %d bytes", sizeof(short) * numsamples); 
     LOGI(buf); 
    } 

    return 0; // continue synthesis (1 is to abort) 
} 


static int AndroidEspeakSynthToFileCallback(short *wav, int numsamples,espeak_EVENT *events) 
{ 
    char buf[100]; 
    sprintf(buf, "AndroidEspeakSynthToFileCallback: %d samples", numsamples); 
    LOGI(buf); 

    if (wav == NULL) 
{ 
     LOGI("Null: speech has completed"); 
    } 

    // The user data should contain the file pointer of the file to write to 
    //void* user_data = events->user_data; 
FILE* user_data = fopen ("myfile1.wav" , "ab"); 

    FILE* fp = static_cast<FILE *>(user_data); 

    // Write all of the samples 
    fwrite(wav, sizeof(short), numsamples, fp); 
    return 0; // continue synthesis (1 is to abort) 
} 



int main() 
{ 
printf("Hello World!\n"); 
const char* szVersionInfo = espeak_Info(NULL); 

printf("Espeak version: %s\n", szVersionInfo); 

iSampleRate = espeak_Initialize(AUDIO_OUTPUT_SYNCHRONOUS, 4096, data_path, 0); 
if (iSampleRate <= 0) 
{ 
    printf("Unable to initialize espeak"); 
    return EXIT_FAILURE; 
} 

//samplerate = espeak_Initialize(AUDIO_OUTPUT_PLAYBACK,0,data_path,0); 

//ListVoices(); 

strcpy(voicename, "default"); 
// espeak --voices 
//strcpy(voicename, "german"); 
//strcpy(voicename, GetLanguageVoiceName("DE")); 

if(espeak_SetVoiceByName(voicename) != EE_OK) 
{ 
    printf("Espeak setvoice error...\n"); 
} 

static char word[200] = "Hello World" ; 
strcpy(word, "TV-fäns aufgepasst, es ist 20 Uhr 15. Zeit für Rambo 3"); 
strcpy(word, "Unnamed Player wurde zum Opfer von GSG9"); 
int speed = 220; 
int volume = 500; // volume in range 0-100 0=silence 
int pitch = 50; // base pitch, range 0-100. 50=normal 


// espeak.cpp 625 
espeak_SetParameter(espeakRATE, speed, 0); 
espeak_SetParameter(espeakVOLUME,volume,0); 
espeak_SetParameter(espeakPITCH,pitch,0); 
// espeakRANGE: pitch range, range 0-100. 0-monotone, 50=normal 
// espeakPUNCTUATION: which punctuation characters to announce: 
    // value in espeak_PUNCT_TYPE (none, all, some), 
//espeak_VOICE *voice_spec = espeak_GetCurrentVoice(); 
//voice_spec->gender=2; // 0=none 1=male, 2=female, 
//voice_spec->age = age; 

//espeak_SetVoiceByProperties(voice_spec); 

//espeak_SetSynthCallback(AndroidEspeakDirectSpeechCallback); 
espeak_SetSynthCallback(AndroidEspeakSynthToFileCallback); 

unsigned int unique_identifier; 
espeak_ERROR err = espeak_Synth((char*) word, strlen(word)+1, 0, POS_CHARACTER, 0, espeakCHARS_AUTO, &unique_identifier, NULL); 

err = espeak_Synchronize(); 



/* 
strcpy(voicename, GetLanguageVoiceName("EN")); 
espeak_SetVoiceByName(voicename); 
strcpy(word, "Geany was fragged by GSG9 Googlebot"); 
strcpy(word, "Googlebot"); 

espeak_Synth((char*) word, strlen(word)+1, 0, POS_CHARACTER, 0, espeakCHARS_AUTO, NULL, NULL); 
espeak_Synchronize(); 
*/ 

// espeak_Cancel(); 
espeak_Terminate(); 
printf("Espeak terminated\n"); 
system("pause"); 
return EXIT_SUCCESS; 
} 
+1

+1發佈一些示例代碼如何使用espeak作爲庫。我很難找到例子。謝謝。 – Noremac 2011-12-08 22:46:24

回答

1

你試過傳遞緩衝您獲得您的回調sndplaysnd()

Declare Function sndPlaySound Lib "winmm.dll" Alias "sndPlaySoundA" (ByVal lpszSoundName As String, ByVal uFlags As Long) As Long 

它的標準WINAPI如下:

sndPlaySound(buffer[0], SND_ASYNC | SND_MEMORY) 

或者,如果你有一個wav文件具有音頻播放:

sndPlaySound(filename, SND_ASYNC) 

playsound具有異步模式在音頻播放過程中不會阻止程序的執行。

注意:我已經在VB中使用它,上面的代碼片段用於VB。如果您使用VC++進行編碼,則可能需要相應地修改它們。但基本意圖仍然一樣;將緩衝區傳遞給sndPlaySound並設置了ASYNC標誌。

好運氣!

+0

sndPlaySound(wav,SND_ASYNC | SND_MEMORY);我想嘗試它,但它不工作(沒有崩潰,但沒有聲音,我也試過SND _)... sndPlaySound(wav [0],SND_ASYNC | SND_MEMORY);崩潰 – 2010-04-18 13:44:25

+0

因爲U使用這個,FILE * user_data = fopen(「myfile1.wav」,「ab」);輸出是否位於myfile1.wav中。嘗試播放使用sndPlaySound(文件名,SND_ASYNC)??。嘗試播放ur媒體付款人的myfile1.wav,並檢查它是否有任何語音數據。我很好奇... – TheCodeArtist 2010-04-18 13:48:03

+0

這不是一個WAV文件,它只是數據......它甚至不在媒體播放器中播放 – 2010-04-18 13:49:07

3

需要對源代碼進行若干更改才能使windows庫具有與Linux上相同的功能。我列出了變化here。準備使用的二進制文件也可用。

所有的補丁和描述也被髮送給espeak維護者(公開的,通過郵件列表和補丁跟蹤器),所以也許將來它可以直接使用。

相關問題