| /* |
| * baratinoo.c - Speech Dispatcher backend for Baratinoo (VoxyGen) |
| * |
| * Copyright (C) 2016 Brailcom, o.p.s. |
| * Copyright (C) 2019-2021 Samuel Thibault <samuel.thibault@ens-lyon.org> |
| * |
| * This is free software; you can redistribute it and/or modify it |
| * under the terms of the GNU General Public License as published by |
| * the Free Software Foundation; either version 2, or (at your option) |
| * any later version. |
| * |
| * This software is distributed in the hope that it will be useful, |
| * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| * General Public License for more details. |
| * |
| * You should have received a copy of the GNU General Public License |
| * along with this program. If not, see <https://www.gnu.org/licenses/>. |
| */ |
| |
| /* |
| * Input and output choices. |
| * |
| * - The input is sent to the engine through a BCinputTextBuffer. There is |
| * a single one of those at any given time, and it is filled in |
| * module_speak_sync(). |
| * |
| * This doesn't use an input callback generating a continuous flow (and |
| * blocking waiting for more data) even though it would be a fairly nice |
| * design and would allow not to set speech attributes like volume, pitch and |
| * rate as often. This is because the Baratinoo engine has 2 limitations on |
| * the input callback: |
| * |
| * * It consumes everything (or at least a lot) up until the callbacks |
| * reports the input end by returning 0. Alternatively one could use the |
| * \flush command followed by a newline, so this is not really limiting. |
| * |
| * * More problematic, as the buffer callback is expected to feed a single |
| * input, calling BCpurge() (for handling stop events) unregisters it, |
| * requiring to re-add it afterward. This renders the continuous flow a |
| * lot less useful, as speech attributes like volume, pitch and rate would |
| * have to be set again. |
| * |
| * - The output uses the signal buffer instead of callback. |
| * The output callback sends sound to the output module phonem by |
| * phonem, which cause noise parasits with ALSA due to a reset of |
| * parameters for each sound call. |
| */ |
| |
| #ifdef HAVE_CONFIG_H |
| #include <config.h> |
| #endif |
| |
| #ifdef BARATINOO_ABI_IS_STABLE_ENOUGH_FOR_ME |
| /* See below why this is problematic. It can however be useful to get the |
| * compiler help to check compatibility */ |
| # define BARATINOO_C_API |
| # include "baratinoo.h" |
| # include "baratinooio.h" |
| |
| #define VOICE_INFO_MEMBER(member_type, struct_p, member) \ |
| (((BaratinooVoiceInfo *)(struct_p))->member) |
| |
| #else |
| /*------------------------------ Baratinoo API ------------------------------*/ |
| /* |
| * This file does NOT include baratinoo.h and baratinooio.h on purpose. |
| * The reason is that Baratinoo does not provide ABI stability, and various |
| * things change in minor versions. This is a problem for this module that |
| * would like to support several versions at once. |
| * |
| * To work around this, we re-define all the API we need from the Baratinoo |
| * headers, and patch compatibility possibly dynamically. |
| * |
| * This has to be done with *EXTREME CARE* not to slip off face-first into the |
| * wall. What we keep is the lowest common denominator between the supported |
| * versions, and for the incompatible bits we use dynamic mapping and offsets. |
| * |
| * Currently supported versions: |
| * - 8.1 |
| * - 8.4 |
| * |
| * To add a new version, you need to: |
| * - First, check the diff between the oldest supported version and the new |
| * version. Diffing against the newest supported version can be handy but |
| * is not necessarily enough. |
| * - Once the incompatibilities are identified and they affect us, amend the |
| * code as necessary, keeping in mind to support older versions. You'll |
| * have to add a new `BV_` constant for the new version, and make sure |
| * everything that uses these constants handles the new value. |
| * - What to look for: |
| * - Members of structures that changed offsed (reordered, changed type, etc.) |
| * - Enumeration values that changed value (order changed, previous member |
| * getting a different default value, etc.) |
| * - Union that changed size. |
| * - Function arguments that changed type or order. |
| * - Update get_baratinoo_supported_version() to return the appropriate |
| * constant in the appropriate situation. If the new version did not break |
| * compatibility with an already supported version, you might just return |
| * the constant for that other version. |
| * |
| * GOTCHAS |
| * - do NOT access BaratinooVoiceInfo members directly, always use |
| * VOICE_INFO_MEMBER(type, struct_p, member) |
| * - Make sure the structures allocated on the stack are large enough for all |
| * versions. |
| */ |
| |
| #include "baratinoo_compat.h" |
| |
| /* Dynamic compatibility part */ |
| #include <glib.h> |
| |
| typedef enum { |
| BV_UNSUPPORTED = -1, |
| BV_8_1, |
| BV_8_4, |
| BV_8_6, |
| N_SUPPORTED_BARATINOO_VERSIONS |
| } SupportedBaratinooVersion; |
| |
| /* BARATINOO_UTF8 */ |
| static const BARATINOO_TEXT_ENCODING bv_BARATINOO_UTF8[N_SUPPORTED_BARATINOO_VERSIONS] = { |
| [BV_8_1] = BARATINOO_UTF8__V8_1, |
| [BV_8_4] = BARATINOO_UTF8__V8_4, |
| [BV_8_6] = BARATINOO_UTF8__V8_4, |
| }; |
| #define BARATINOO_UTF8 (bv_BARATINOO_UTF8[baratinoo_engine.supported_version]) |
| |
| /* BaratinooVoiceInfo */ |
| enum { |
| VI_name, |
| VI_language, |
| VI_iso639, |
| VI_iso3166, |
| VI_gender, |
| VI_age, |
| N_VI_MEMBERS |
| }; |
| static const size_t bv_VoiceInfo_offsets[N_SUPPORTED_BARATINOO_VERSIONS][N_VI_MEMBERS] = { |
| #define BVIF_MEMBER_DECL(struct, member) [VI_##member] = G_STRUCT_OFFSET(struct, member) |
| #define BVIF_ENTRY(struct) { BVIF_MEMBER_DECL(struct, name), \ |
| BVIF_MEMBER_DECL(struct, language), \ |
| BVIF_MEMBER_DECL(struct, iso639), \ |
| BVIF_MEMBER_DECL(struct, iso3166), \ |
| BVIF_MEMBER_DECL(struct, gender), \ |
| BVIF_MEMBER_DECL(struct, age), } |
| [BV_8_1] = BVIF_ENTRY(BaratinooVoiceInfo__V8_1), |
| [BV_8_4] = BVIF_ENTRY(BaratinooVoiceInfo__V8_4), |
| [BV_8_6] = BVIF_ENTRY(BaratinooVoiceInfo__V8_4), |
| #undef BVIF_ENTRY |
| #undef BVIF_MEMBER_DECL |
| }; |
| #define VOICE_INFO_MEMBER(member_type, struct_p, member) \ |
| G_STRUCT_MEMBER(member_type, struct_p, bv_VoiceInfo_offsets[baratinoo_engine.supported_version][VI_##member]) |
| |
| /* BCinputTextBufferNew() gained an additional "uri" parameter in 8.6 */ |
| typedef BCinputTextBuffer (*bv_BCinputTextBufferNew_t)(BARATINOO_PARSING parsing, BARATINOO_TEXT_ENCODING encoding, int voiceIndex, char *voiceModules); |
| typedef BCinputTextBuffer (*bv_BCinputTextBufferNew__V8_6_t)(BARATINOO_PARSING parsing, BARATINOO_TEXT_ENCODING encoding, int voiceIndex, char *voiceModules, const char *uri); |
| #define BCinputTextBufferNew__V8_1 BCinputTextBufferNew |
| static BCinputTextBuffer BCinputTextBufferNew__V8_6(BARATINOO_PARSING parsing, BARATINOO_TEXT_ENCODING encoding, int voiceIndex, char *voiceModules) |
| { |
| bv_BCinputTextBufferNew__V8_6_t func = (bv_BCinputTextBufferNew__V8_6_t) BCinputTextBufferNew; |
| return func(parsing, encoding, voiceIndex, voiceModules, NULL); |
| } |
| static const bv_BCinputTextBufferNew_t bv_BCinputTextBufferNew[] = { |
| [BV_8_1] = BCinputTextBufferNew__V8_1, |
| [BV_8_4] = BCinputTextBufferNew__V8_1, |
| [BV_8_6] = BCinputTextBufferNew__V8_6, |
| }; |
| #define BCinputTextBufferNew (bv_BCinputTextBufferNew[baratinoo_engine.supported_version]) |
| |
| #endif /* ! BARATINOO_ABI_IS_STABLE_ENOUGH_FOR_ME */ |
| |
| |
| /*------------------------ Speech-Dispatcher module ------------------------*/ |
| |
| #include <speechd_types.h> |
| |
| #include "module_utils.h" |
| |
| #define MODULE_NAME "baratinoo" |
| #define DBG_MODNAME "Baratinoo: " |
| #define MODULE_VERSION "0.2" |
| |
| #define DEBUG_MODULE 1 |
| DECLARE_DEBUG(); |
| |
| typedef struct { |
| #ifndef BARATINOO_ABI_IS_STABLE_ENOUGH_FOR_ME |
| SupportedBaratinooVersion supported_version; |
| #endif |
| |
| BCengine engine; |
| /* The buffer consumed by the TTS engine. */ |
| BCinputTextBuffer buffer; |
| |
| SPDVoice **voice_list; |
| |
| /* settings */ |
| int voice; |
| |
| /* request flags */ |
| gboolean stop_requested; |
| gboolean pause_requested; |
| gboolean pause_index_sent; |
| } Engine; |
| |
| /* engine and state */ |
| static Engine baratinoo_engine = { |
| .engine = NULL, |
| .buffer = NULL, |
| .voice_list = NULL, |
| .voice = 0, |
| .stop_requested = FALSE, |
| .pause_requested = FALSE, |
| .pause_index_sent = FALSE, |
| }; |
| |
| static gboolean BC_initialized = FALSE; |
| |
| /* Internal functions prototypes */ |
| static SPDVoice **baratinoo_list_voices(BCengine *engine); |
| /* Parameters */ |
| static void baratinoo_set_voice_type(SPDVoiceType voice); |
| static void baratinoo_set_language(char *lang); |
| static void baratinoo_set_synthesis_voice(char *synthesis_voice); |
| /* Engine callbacks */ |
| static void baratinoo_trace_cb(BaratinooTraceLevel level, int engine_num, const char *source, const void *data, const char *format, va_list args); |
| static int baratinoo_output_signal(void *privateData, const void *address, int length); |
| /* SSML conversion functions */ |
| static void append_ssml_as_proprietary(const Engine *engine, GString *buf, const char *data, gsize size); |
| |
| /* Module configuration options */ |
| MOD_OPTION_1_STR(BaratinooConfigPath); |
| MOD_OPTION_1_INT(BaratinooSampleRate); |
| MOD_OPTION_1_INT(BaratinooResponsiveness); |
| MOD_OPTION_1_INT(BaratinooQueueSize); |
| MOD_OPTION_1_INT(BaratinooMinRate); |
| MOD_OPTION_1_INT(BaratinooNormalRate); |
| MOD_OPTION_1_INT(BaratinooMaxRate); |
| MOD_OPTION_1_STR(BaratinooPunctuationList); |
| MOD_OPTION_1_STR(BaratinooIntonationList); |
| MOD_OPTION_1_STR(BaratinooNoIntonationList); |
| |
| /* Public functions */ |
| |
| int module_load(void) |
| { |
| const char *conf_env; |
| char *default_config = NULL; |
| |
| INIT_SETTINGS_TABLES(); |
| |
| REGISTER_DEBUG(); |
| |
| /* BaratinooConfigPath default value comes from the environment or |
| * user XDG configuration location */ |
| conf_env = getenv("BARATINOO_CONFIG_PATH"); |
| if (conf_env && conf_env[0] != '\0') { |
| default_config = g_strdup(conf_env); |
| } else { |
| default_config = g_build_filename(g_get_user_config_dir(), |
| "baratinoo.cfg", NULL); |
| } |
| MOD_OPTION_1_STR_REG(BaratinooConfigPath, default_config); |
| g_free(default_config); |
| |
| /* Sample rate. 16000Hz is the voices default, not requiring resampling */ |
| MOD_OPTION_1_INT_REG(BaratinooSampleRate, 16000); |
| |
| /* Let Baratinoo handle by default */ |
| MOD_OPTION_1_INT_REG(BaratinooResponsiveness, -1); |
| |
| /* Default to 20s queuing */ |
| MOD_OPTION_1_INT_REG(BaratinooQueueSize, 20*BaratinooSampleRate); |
| |
| /* Speech rate */ |
| MOD_OPTION_1_INT_REG(BaratinooMinRate, -100); |
| MOD_OPTION_1_INT_REG(BaratinooNormalRate, 0); |
| MOD_OPTION_1_INT_REG(BaratinooMaxRate, 100); |
| |
| /* Punctuation */ |
| MOD_OPTION_1_STR_REG(BaratinooPunctuationList, "@/+-_"); |
| MOD_OPTION_1_STR_REG(BaratinooIntonationList, "?!;:,."); |
| MOD_OPTION_1_STR_REG(BaratinooNoIntonationList, ""); |
| |
| return 0; |
| } |
| |
| #ifndef BARATINOO_ABI_IS_STABLE_ENOUGH_FOR_ME |
| static SupportedBaratinooVersion get_baratinoo_supported_version(void) |
| { |
| const BaratinooVersionStruct *version = BCgetBaratinooVersionStruct(); |
| |
| switch (version->major) { |
| case 8: switch (version->minor) { |
| case 1: return BV_8_1; |
| case 4: return BV_8_4; |
| case 6: return BV_8_6; |
| } break; |
| } |
| |
| return BV_UNSUPPORTED; |
| } |
| #endif |
| |
| int module_init(char **status_info) |
| { |
| Engine *engine = &baratinoo_engine; |
| BARATINOOC_STATE state; |
| |
| DBG(DBG_MODNAME "Module init"); |
| |
| module_audio_set_server(); |
| |
| DBG(DBG_MODNAME "BaratinooPunctuationList = %s", BaratinooPunctuationList); |
| DBG(DBG_MODNAME "BaratinooIntonationList = %s", BaratinooIntonationList); |
| DBG(DBG_MODNAME "BaratinooNoIntonationList = %s", BaratinooNoIntonationList); |
| |
| *status_info = NULL; |
| |
| /* Init Baratinoo */ |
| if (BCinitlib(baratinoo_trace_cb) != BARATINOO_INIT_OK) { |
| DBG(DBG_MODNAME "Failed to initialize library"); |
| *status_info = g_strdup("Failed to initialize Baratinoo. " |
| "Make sure your installation is " |
| "properly set up."); |
| return -1; |
| } |
| BC_initialized = TRUE; |
| DBG(DBG_MODNAME "Using Baratinoo %s", BCgetBaratinooVersion()); |
| |
| #ifndef BARATINOO_ABI_IS_STABLE_ENOUGH_FOR_ME |
| engine->supported_version = get_baratinoo_supported_version(); |
| if (engine->supported_version == BV_UNSUPPORTED) { |
| DBG(DBG_MODNAME "Unsupported library version"); |
| *status_info = g_strdup("Unsupported Baratinoo engine version."); |
| return -1; |
| } |
| DBG(DBG_MODNAME "Using Baratinoo compatibility level %d", engine->supported_version); |
| #endif |
| |
| engine->engine = BCnew(NULL); |
| if (!engine->engine) { |
| DBG(DBG_MODNAME "Failed to allocate engine"); |
| *status_info = g_strdup("Failed to create Baratinoo engine."); |
| return -1; |
| } |
| |
| BCinit(engine->engine, BaratinooConfigPath); |
| state = BCgetState(engine->engine); |
| if (state != BARATINOO_INITIALIZED) { |
| DBG(DBG_MODNAME "Failed to initialize engine"); |
| *status_info = g_strdup("Failed to initialize Baratinoo engine. " |
| "Make sure your setup is OK."); |
| return -1; |
| } |
| |
| /* Find voices */ |
| engine->voice_list = baratinoo_list_voices(engine->engine); |
| if (!engine->voice_list) { |
| DBG(DBG_MODNAME "No voice available"); |
| *status_info = g_strdup("No voice found. Make sure your setup " |
| "includes at least one voice."); |
| return -1; |
| } |
| |
| /* Setup output (audio) signal handling */ |
| DBG(DBG_MODNAME "Using PCM output at %dHz", BaratinooSampleRate); |
| BCsetOutputSignal(engine->engine, baratinoo_output_signal, engine, BARATINOO_PCM, BaratinooSampleRate); |
| if (BCgetState(engine->engine) != BARATINOO_INITIALIZED) { |
| DBG(DBG_MODNAME "Failed to initialize output signal handler"); |
| *status_info = g_strdup("Failed to initialize Baratinoo output " |
| "signal handler. Is the configured " |
| "sample rate correct?"); |
| return -1; |
| } |
| |
| BCsetWantedEvent(engine->engine, BARATINOO_MARKER_EVENT); |
| |
| DBG(DBG_MODNAME "Initialization successfully."); |
| *status_info = g_strdup("Baratinoo initialized successfully."); |
| |
| return 0; |
| } |
| |
| SPDVoice **module_list_voices(void) |
| { |
| Engine *engine = &baratinoo_engine; |
| |
| return engine->voice_list; |
| } |
| |
| void module_speak_sync(const gchar *data, size_t bytes, SPDMessageType msgtype) |
| { |
| Engine *engine = &baratinoo_engine; |
| GString *buffer = NULL; |
| int rate; |
| |
| DBG(DBG_MODNAME "Speech requested"); |
| |
| assert(msg_settings.rate >= -100 && msg_settings.rate <= +100); |
| assert(msg_settings.pitch >= -100 && msg_settings.pitch <= +100); |
| assert(msg_settings.pitch_range >= -100 && msg_settings.pitch_range <= +100); |
| assert(msg_settings.volume >= -100 && msg_settings.volume <= +100); |
| |
| if (engine->buffer != NULL) { |
| DBG(DBG_MODNAME "WARNING: module_speak() called during speech"); |
| module_speak_error(); |
| return; |
| } |
| |
| /* select voice following parameters. we don't use tags for this as |
| * we need to do some computation on our end anyway and need pass an |
| * ID when creating the buffer too */ |
| UPDATE_STRING_PARAMETER(voice.language, baratinoo_set_language); |
| UPDATE_PARAMETER(voice_type, baratinoo_set_voice_type); |
| UPDATE_STRING_PARAMETER(voice.name, baratinoo_set_synthesis_voice); |
| |
| engine->buffer = BCinputTextBufferNew(BARATINOO_PROPRIETARY_PARSING, |
| BARATINOO_UTF8, engine->voice, 0); |
| if (!engine->buffer) { |
| DBG(DBG_MODNAME "Failed to allocate input buffer"); |
| module_speak_error(); |
| goto err; |
| } |
| |
| buffer = g_string_new(NULL); |
| |
| /* Apply speech parameters */ |
| if (msg_settings.rate < 0) |
| rate = BaratinooNormalRate + (BaratinooNormalRate - BaratinooMinRate) * msg_settings.rate / 100; |
| else |
| rate = BaratinooNormalRate + (BaratinooMaxRate - BaratinooNormalRate) * msg_settings.rate / 100; |
| |
| if (rate != 0) { |
| g_string_append_printf(buffer, "\\rate{%+d%%}", rate); |
| } |
| if (msg_settings.pitch != 0 || msg_settings.pitch_range != 0) { |
| g_string_append_printf(buffer, "\\pitch{%+d%% %+d%%}", |
| msg_settings.pitch, |
| msg_settings.pitch_range); |
| } |
| if (msg_settings.volume != 0) { |
| g_string_append_printf(buffer, "\\volume{%+d%%}", |
| msg_settings.volume); |
| } |
| |
| switch (msgtype) { |
| case SPD_MSGTYPE_SPELL: /* FIXME: use \spell when Voxygen actuall implements it */ |
| /* TODO: in the meanwhile use a generic engine */ |
| case SPD_MSGTYPE_CHAR: |
| g_string_append(buffer, "\\sayas<{characters}"); |
| g_string_append_len(buffer, data, bytes); |
| g_string_append(buffer, "\\sayas>{}"); |
| break; |
| case SPD_MSGTYPE_KEY: /* TODO: use a generic engine */ |
| if (g_utf8_strlen(data, bytes) == 1) { |
| g_string_append(buffer, "\\sayas<{characters}"); |
| g_string_append_len(buffer, data, bytes); |
| g_string_append(buffer, "\\sayas>{}"); |
| } else { |
| gchar *c; |
| g_string_append_len(buffer, data, bytes); |
| for (c = buffer->str; *c; c++) |
| if (*c == '_') |
| *c = ' '; |
| } |
| break; |
| default: /* FIXME: */ |
| case SPD_MSGTYPE_TEXT: |
| append_ssml_as_proprietary(engine, buffer, data, bytes); |
| break; |
| } |
| |
| DBG(DBG_MODNAME "SSML input: %s", data); |
| DBG(DBG_MODNAME "Sending buffer: %s", buffer->str); |
| if (!BCinputTextBufferInit(engine->buffer, buffer->str)) { |
| DBG(DBG_MODNAME "Failed to initialize input buffer"); |
| module_speak_error(); |
| goto err; |
| } |
| |
| g_string_free(buffer, TRUE); |
| buffer = NULL; |
| |
| engine->stop_requested = FALSE; |
| engine->pause_requested = FALSE; |
| engine->pause_index_sent = FALSE; |
| |
| BARATINOOC_STATE state = BARATINOO_READY; |
| |
| state = BCinputTextBufferSetInEngine(engine->buffer, engine->engine); |
| if (state != BARATINOO_READY) { |
| DBG(DBG_MODNAME "Failed to set input buffer"); |
| module_speak_error(); |
| goto out; |
| } |
| |
| module_speak_ok(); |
| |
| module_report_event_begin(); |
| do { |
| if (engine->stop_requested || (engine->pause_requested && engine->pause_index_sent)) { |
| BCpurge(engine->engine); |
| engine->buffer = NULL; |
| break; |
| } |
| |
| /* Process server events in case we were told to stop in between */ |
| module_process(STDIN_FILENO, 0); |
| |
| state = BCprocessLoop(engine->engine, BaratinooResponsiveness); |
| if (state == BARATINOO_EVENT) { |
| BaratinooEvent event = BCgetEvent(engine->engine); |
| if (event.type == BARATINOO_MARKER_EVENT) { |
| DBG(DBG_MODNAME "Reached mark '%s' at sample %lu", event.data.marker.name, event.sampleStamp); |
| module_report_index_mark(event.data.marker.name); |
| if (engine->pause_requested && |
| !strncmp(event.data.marker.name, |
| INDEX_MARK_BODY, |
| INDEX_MARK_BODY_LEN)) { |
| engine->pause_index_sent = 1; |
| } |
| } |
| } |
| } while (state == BARATINOO_RUNNING || state == BARATINOO_EVENT); |
| |
| out: |
| if (engine->pause_requested) |
| module_report_event_pause(); |
| else if (engine->stop_requested) |
| module_report_event_stop(); |
| else |
| module_report_event_end(); |
| |
| BCinputTextBufferDelete(engine->buffer); |
| engine->buffer = NULL; |
| |
| DBG(DBG_MODNAME "leaving module_speak_sync() normally"); |
| return; |
| |
| err: |
| if (buffer) |
| g_string_free(buffer, TRUE); |
| if (engine->buffer) { |
| BCinputTextBufferDelete(engine->buffer); |
| engine->buffer = NULL; |
| } |
| |
| return; |
| } |
| |
| int module_stop(void) |
| { |
| Engine *engine = &baratinoo_engine; |
| |
| DBG(DBG_MODNAME "Stop requested"); |
| engine->stop_requested = TRUE; |
| |
| return 0; |
| } |
| |
| size_t module_pause(void) |
| { |
| Engine *engine = &baratinoo_engine; |
| |
| DBG(DBG_MODNAME "Pause requested"); |
| engine->stop_requested = TRUE; |
| |
| return 0; |
| } |
| |
| int module_close(void) |
| { |
| Engine *engine = &baratinoo_engine; |
| |
| DBG(DBG_MODNAME "close()"); |
| |
| /* destroy voice list */ |
| if (engine->voice_list != NULL) { |
| int i; |
| for (i = 0; engine->voice_list[i] != NULL; i++) { |
| g_free(engine->voice_list[i]->name); |
| g_free(engine->voice_list[i]->language); |
| g_free(engine->voice_list[i]->variant); |
| g_free(engine->voice_list[i]); |
| } |
| g_free(engine->voice_list); |
| engine->voice_list = NULL; |
| } |
| |
| /* destroy engine */ |
| if (engine->engine) { |
| BCdelete(engine->engine); |
| engine->engine = NULL; |
| } |
| |
| if (BC_initialized) { |
| /* uninitialize */ |
| BCterminatelib(); |
| BC_initialized = FALSE; |
| } |
| |
| DBG(DBG_MODNAME "Module closed."); |
| |
| return 0; |
| } |
| |
| /* Internal functions */ |
| |
| /** |
| * @brief Lists voices in SPD format |
| * @param engine An engine. |
| * @returns A NULL-terminated list of @c SPDVoice, or NULL if no voice found. |
| */ |
| static SPDVoice **baratinoo_list_voices(BCengine *engine) |
| { |
| SPDVoice **voices; |
| int n_voices; |
| int i; |
| |
| n_voices = BCgetNumberOfVoices(engine); |
| if (n_voices < 1) |
| return NULL; |
| |
| voices = g_malloc_n(n_voices + 1, sizeof *voices); |
| DBG(DBG_MODNAME "Got %d available voices:", n_voices); |
| for (i = 0; i < n_voices; i++) { |
| SPDVoice *voice; |
| const char *language; |
| BaratinooVoiceInfo voice_info_DO_NO_ACCESS_DIRECTLY = BCgetVoiceInfo(engine, i); |
| void *voice_info = &voice_info_DO_NO_ACCESS_DIRECTLY; |
| |
| DBG(DBG_MODNAME "\tVoice #%d: name=%s, language=%s, gender=%s", |
| i, VOICE_INFO_MEMBER(char *, voice_info, name), |
| VOICE_INFO_MEMBER(char *, voice_info, language), |
| VOICE_INFO_MEMBER(char *, voice_info, gender)); |
| |
| voice = g_malloc0(sizeof *voice); |
| voice->name = g_strdup(VOICE_INFO_MEMBER(char *, voice_info, name)); |
| |
| language = VOICE_INFO_MEMBER(char *, voice_info, language); |
| voice->language = g_strdup(language); |
| |
| voices[i] = voice; |
| } |
| voices[i] = NULL; |
| |
| return voices; |
| } |
| |
| /* Voice selection */ |
| |
| /** |
| * @brief Matches a Baratinoo voice info against a SPD language |
| * @param info A voice info to match. |
| * @param lang A SPD language to match against. |
| * @returns The quality of the match: the higher the better. |
| * |
| * Gives a score to a voice based on its compatibility with @p lang. |
| */ |
| static int lang_match_level(const void *vinfo, const char *lang) |
| { |
| int level = 0; |
| const char *language = VOICE_INFO_MEMBER(char *, vinfo, language); |
| const char *iso639 = VOICE_INFO_MEMBER(char *, vinfo, iso639); |
| const char *iso3166 = VOICE_INFO_MEMBER(char *, vinfo, iso3166); |
| |
| if (g_ascii_strcasecmp(lang, language) == 0) |
| level += 10; |
| else { |
| gchar **a = g_strsplit_set(language, "-", 2); |
| gchar **b = g_strsplit_set(lang, "-", 2); |
| |
| /* language */ |
| if (g_ascii_strcasecmp(a[0], b[0]) == 0) |
| level += 8; |
| else if (g_ascii_strcasecmp(iso639, b[0]) == 0) |
| level += 8; |
| else if (g_ascii_strncasecmp(a[0], b[0], 2) == 0) |
| level += 5; /* partial match */ |
| /* region */ |
| if (a[1] && b[1] && g_ascii_strcasecmp(a[1], b[1]) == 0) |
| level += 2; |
| else if (b[1] && g_ascii_strcasecmp(iso3166, b[1]) == 0) |
| level += 2; |
| else if (a[1] && b[1] && g_ascii_strncasecmp(a[1], b[1], 2) == 0) |
| level += 1; /* partial match */ |
| |
| g_strfreev(a); |
| g_strfreev(b); |
| } |
| |
| DBG(DBG_MODNAME "lang_match_level({language=%s, iso639=%s, iso3166=%s}, lang=%s) = %d", |
| language, iso639, iso3166, lang, level); |
| |
| return level; |
| } |
| |
| /** |
| * @brief Sort two Baratinoo voices by SPD criteria. |
| * @param a A voice info. |
| * @param b Another voice info. |
| * @param lang A SPD language. |
| * @param voice_code A SPD voice code. |
| * @returns < 0 if @p a is best, > 0 if @p b is best, and 0 if they are equally |
| * matching. Larger divergence from 0 means better match. |
| */ |
| static int sort_voice(const void *voice_a, const void *voice_b, const char *lang, SPDVoiceType voice_code) |
| { |
| int cmp = 0; |
| const char *a_gender = VOICE_INFO_MEMBER(char *, voice_a, gender); |
| const char *b_gender = VOICE_INFO_MEMBER(char *, voice_b, gender); |
| int a_age = VOICE_INFO_MEMBER(int, voice_a, age); |
| int b_age = VOICE_INFO_MEMBER(int, voice_b, age); |
| |
| cmp -= lang_match_level(voice_a, lang); |
| cmp += lang_match_level(voice_b, lang); |
| |
| if (strcmp(a_gender, b_gender) != 0) { |
| const char *gender; |
| |
| switch (voice_code) { |
| default: |
| case SPD_MALE1: |
| case SPD_MALE2: |
| case SPD_MALE3: |
| case SPD_CHILD_MALE: |
| gender = "male"; |
| break; |
| |
| case SPD_FEMALE1: |
| case SPD_FEMALE2: |
| case SPD_FEMALE3: |
| case SPD_CHILD_FEMALE: |
| gender = "female"; |
| break; |
| } |
| |
| if (strcmp(gender, a_gender) == 0) |
| cmp--; |
| if (strcmp(gender, b_gender) == 0) |
| cmp++; |
| } |
| |
| switch (voice_code) { |
| case SPD_CHILD_MALE: |
| case SPD_CHILD_FEMALE: |
| if (a_age && a_age <= 15) |
| cmp--; |
| if (b_age && b_age <= 15) |
| cmp++; |
| break; |
| default: |
| /* we expect mostly adult voices, so only compare if age is set */ |
| if (a_age && b_age) { |
| if (a_age > 15) |
| cmp--; |
| if (b_age > 15) |
| cmp++; |
| } |
| break; |
| } |
| |
| DBG(DBG_MODNAME "Comparing %s <> %s gives %d", |
| VOICE_INFO_MEMBER(char*, voice_a, name), |
| VOICE_INFO_MEMBER(char*, voice_b, name), |
| cmp); |
| |
| return cmp; |
| } |
| |
| /* Given a language code and SD voice code, gets the Baratinoo voice. */ |
| static int baratinoo_find_voice(const Engine *engine, const char *lang, SPDVoiceType voice_code) |
| { |
| int i; |
| int best_match = -1; |
| int nth_match = 0; |
| int offset = 0; /* nth voice we'd like */ |
| BaratinooVoiceInfo best_info; |
| |
| DBG(DBG_MODNAME "baratinoo_find_voice(lang=%s, voice_code=%d)", |
| lang, voice_code); |
| |
| switch (voice_code) { |
| case SPD_MALE3: |
| case SPD_FEMALE3: |
| offset++; |
| /* FALLTHRU */ |
| case SPD_MALE2: |
| case SPD_FEMALE2: |
| offset++; |
| /* FALLTHRU */ |
| default: |
| break; |
| } |
| |
| for (i = 0; i < BCgetNumberOfVoices(engine->engine); i++) { |
| if (i == 0) { |
| best_match = i; |
| best_info = BCgetVoiceInfo(engine->engine, i); |
| nth_match++; |
| } else { |
| BaratinooVoiceInfo info = BCgetVoiceInfo(engine->engine, i); |
| int cmp = sort_voice(&best_info, &info, lang, voice_code); |
| |
| if (cmp >= 0) { |
| if (cmp > 0) |
| nth_match = 0; |
| if (nth_match <= offset) { |
| best_match = i; |
| best_info = info; |
| } |
| nth_match++; |
| } |
| } |
| } |
| |
| return best_match; |
| } |
| |
| /* Given a language code and SD voice code, sets the voice. */ |
| static void baratinoo_set_language_and_voice(Engine *engine, const char *lang, SPDVoiceType voice_code) |
| { |
| int voice = baratinoo_find_voice(engine, lang, voice_code); |
| |
| if (voice < 0) { |
| DBG(DBG_MODNAME "No voice match found, not changing voice."); |
| } else { |
| DBG(DBG_MODNAME "Best voice match is %d.", voice); |
| engine->voice = voice; |
| } |
| } |
| |
| /* UPDATE_PARAMETER callback to set the voice type */ |
| static void baratinoo_set_voice_type(SPDVoiceType voice) |
| { |
| Engine *engine = &baratinoo_engine; |
| |
| assert(msg_settings.voice.language); |
| baratinoo_set_language_and_voice(engine, msg_settings.voice.language, voice); |
| } |
| |
| /* UPDATE_PARAMETER callback to set the voice language */ |
| static void baratinoo_set_language(char *lang) |
| { |
| Engine *engine = &baratinoo_engine; |
| |
| baratinoo_set_language_and_voice(engine, lang, msg_settings.voice_type); |
| } |
| |
| /* UPDATE_PARAMETER callback to set the voice by name */ |
| static void baratinoo_set_synthesis_voice(char *synthesis_voice) |
| { |
| Engine *engine = &baratinoo_engine; |
| int i; |
| |
| if (synthesis_voice == NULL) |
| return; |
| |
| for (i = 0; i < BCgetNumberOfVoices(engine->engine); i++) { |
| BaratinooVoiceInfo info = BCgetVoiceInfo(engine->engine, i); |
| |
| if (g_ascii_strcasecmp(synthesis_voice, VOICE_INFO_MEMBER(char*, &info, name)) == 0) { |
| engine->voice = i; |
| return; |
| } |
| } |
| |
| DBG(DBG_MODNAME "Failed to set synthesis voice to '%s': not found.", |
| synthesis_voice); |
| } |
| |
| /* Engine callbacks */ |
| |
| /** |
| * @brief Logs a message from Baratinoo |
| * @param level Message importance. |
| * @param engine_num ID of the engine that emitted the message, or 0 if it is a |
| * library message. |
| * @param source Message category. |
| * @param data Private data, unused. |
| * @param format printf-like @p format. |
| * @param args arguments for @p format. |
| */ |
| static void baratinoo_trace_cb(BaratinooTraceLevel level, int engine_num, const char *source, const void *data, const char *format, va_list args) |
| { |
| const char *prefix = ""; |
| |
| if (!Debug) { |
| switch (level) { |
| case BARATINOO_TRACE_INIT: |
| case BARATINOO_TRACE_INFO: |
| case BARATINOO_TRACE_DEBUG: |
| return; |
| default: |
| break; |
| } |
| } |
| |
| switch (level) { |
| case BARATINOO_TRACE_ERROR: |
| prefix = "ERROR"; |
| break; |
| case BARATINOO_TRACE_INIT: |
| prefix = "INIT"; |
| break; |
| case BARATINOO_TRACE_WARNING: |
| prefix = "WARNING"; |
| break; |
| case BARATINOO_TRACE_INFO: |
| prefix = "INFO"; |
| break; |
| case BARATINOO_TRACE_DEBUG: |
| prefix = "DEBUG"; |
| break; |
| } |
| |
| if (engine_num == 0) |
| fprintf(stderr, "Baratinoo library: "); |
| else |
| fprintf(stderr, "Baratinoo engine #%d: ", engine_num); |
| fprintf(stderr, "%s: %s ", prefix, source); |
| vfprintf(stderr, format, args); |
| fprintf(stderr, "\n"); |
| } |
| |
| /** |
| * @brief Output (sound) callback |
| * @param private_data An Engine structure. |
| * @param address Audio samples. |
| * @param length Length of @p address, in bytes. |
| * @returns Whether to break out of the process loop. |
| * |
| * Called by the engine during speech synthesis. |
| * |
| * @see BCprocessLoop() |
| */ |
| static int baratinoo_output_signal(void *private_data, const void *address, int length) |
| { |
| Engine *engine = private_data; |
| |
| /* If stop is requested during synthesis, abort here to stop speech as |
| * early as possible, even if the engine didn't finish its cycle yet. */ |
| if (engine->stop_requested) |
| { |
| DBG(DBG_MODNAME "Not playing message because it got stopped"); |
| return 1; |
| } |
| |
| AudioTrack track; |
| #if defined(BYTE_ORDER) && (BYTE_ORDER == BIG_ENDIAN) |
| AudioFormat format = SPD_AUDIO_BE; |
| #else |
| AudioFormat format = SPD_AUDIO_LE; |
| #endif |
| |
| /* We receive 16 bits PCM data */ |
| track.num_samples = length / 2; /* 16 bits per sample = 2 bytes */ |
| track.num_channels = 1; |
| track.sample_rate = BaratinooSampleRate; |
| track.bits = 16; |
| track.samples = (short *) address; |
| |
| DBG(DBG_MODNAME "Queueing %d samples", length / 2); |
| module_tts_output_server(&track, format); |
| |
| return engine->stop_requested; |
| } |
| |
| /* SSML conversion functions */ |
| |
| typedef struct { |
| const Engine *engine; |
| GString *buffer; |
| /* Voice ID stack for the current element */ |
| int voice_stack[32]; |
| unsigned int voice_stack_len; |
| } SsmlPraserState; |
| |
| /* Adds a language change command for @p lang if appropriate */ |
| static void ssml2baratinoo_push_lang(SsmlPraserState *state, const char *lang) |
| { |
| int voice; |
| |
| if (state->voice_stack_len > 0) |
| voice = state->voice_stack[state->voice_stack_len - 1]; |
| else |
| voice = state->engine->voice; |
| |
| if (lang) { |
| DBG(DBG_MODNAME "Processing xml:lang=\"%s\"", lang); |
| int new_voice = baratinoo_find_voice(&baratinoo_engine, lang, |
| msg_settings.voice_type); |
| if (new_voice >= 0 && new_voice != voice) { |
| g_string_append_printf(state->buffer, "\\vox{%d}", new_voice); |
| voice = new_voice; |
| } |
| } |
| |
| if (state->voice_stack_len >= G_N_ELEMENTS(state->voice_stack)) { |
| DBG(DBG_MODNAME "WARNING: voice stack exhausted, expect incorrect voices."); |
| } else { |
| state->voice_stack[state->voice_stack_len++] = voice; |
| } |
| } |
| |
| /* Pops a language pushed with @c ssml2baratinoo_push_lang() */ |
| static void ssml2baratinoo_pop_lang(SsmlPraserState *state) |
| { |
| if (state->voice_stack_len > 0) { |
| int cur_voice = state->voice_stack[--state->voice_stack_len]; |
| |
| if (state->voice_stack_len > 0) { |
| int new_voice = state->voice_stack[state->voice_stack_len - 1]; |
| |
| if (new_voice != cur_voice) |
| g_string_append_printf(state->buffer, "\\vox{%d}", new_voice); |
| } |
| } |
| } |
| |
| /* locates a string in a NULL-terminated array of strings |
| * Returns -1 if not found, the index otherwise. */ |
| static int attribute_index(const char **names, const char *name) |
| { |
| int i; |
| |
| for (i = 0; names && names[i] != NULL; i++) { |
| if (strcmp(names[i], name) == 0) |
| return i; |
| } |
| |
| return -1; |
| } |
| |
| /* Markup element start callback */ |
| static void ssml2baratinoo_start_element(GMarkupParseContext *ctx, |
| const gchar *element, |
| const gchar **attribute_names, |
| const gchar **attribute_values, |
| gpointer data, GError **error) |
| { |
| SsmlPraserState *state = data; |
| int lang_id; |
| |
| /* handle voice changes */ |
| lang_id = attribute_index(attribute_names, "xml:lang"); |
| ssml2baratinoo_push_lang(state, lang_id < 0 ? NULL : attribute_values[lang_id]); |
| |
| /* handle elements */ |
| if (strcmp(element, "mark") == 0) { |
| int i = attribute_index(attribute_names, "name"); |
| g_string_append_printf(state->buffer, "\\mark{%s}", |
| i < 0 ? "" : attribute_values[i]); |
| } else if (strcmp(element, "emphasis") == 0) { |
| int i = attribute_index(attribute_names, "level"); |
| g_string_append_printf(state->buffer, "\\emph<{%s}", |
| i < 0 ? "" : attribute_values[i]); |
| } else if (strcmp(element, "say-as") == 0) { |
| int i_as = attribute_index(attribute_names, "interpret-as"); |
| int i_fmt = attribute_index(attribute_names, "format"); |
| int i_detail = attribute_index(attribute_names, "detail"); |
| |
| if (i_as < 0) { |
| DBG(DBG_MODNAME "Missing required 'interpret-as' attribute of '<say-as>' tag"); |
| i_fmt = i_detail = -1; |
| } else if (i_fmt < 0 && i_detail >= 0) { |
| DBG(DBG_MODNAME "Ignoring 'detail' attribute of '<say-as>' tag because it is " |
| "not supported without a 'format' attribute"); |
| i_detail = -1; |
| } |
| |
| g_string_append_printf(state->buffer, "\\sayas<{%s %s %s}", |
| i_as < 0 ? "" : attribute_values[i_as], |
| i_fmt < 0 ? "" : attribute_values[i_fmt], |
| i_detail < 0 ? "" : attribute_values[i_detail]); |
| } else { |
| /* ignore other elements */ |
| /* TODO: handle more elements */ |
| } |
| } |
| |
| /* Markup element end callback */ |
| static void ssml2baratinoo_end_element(GMarkupParseContext *ctx, |
| const gchar *element, |
| gpointer data, GError **error) |
| { |
| SsmlPraserState *state = data; |
| |
| if (strcmp(element, "emphasis") == 0) { |
| g_string_append(state->buffer, "\\emph>{}"); |
| } else if (strcmp(element, "say-as") == 0) { |
| g_string_append(state->buffer, "\\sayas>{}"); |
| } |
| |
| ssml2baratinoo_pop_lang(state); |
| } |
| |
| /* Markup text node callback. |
| * |
| * This not only converts to the proprietary format (by escaping things that |
| * would be interpreted by it), but also pre-processes the text for some |
| * features that are missing from Baratinoo. |
| * |
| * - Punctuation speaking |
| * |
| * As the engine doesn't support speaking of the punctuation itself, we alter |
| * the input to explicitly tell the engine to do it. It is kind of tricky, |
| * because we want to keep the punctuation meaning of the characters, e.g. how |
| * they affect speech as means of intonation and pauses. |
| * |
| * The approach here is that for every punctuation character included in the |
| * selected mode (none/some/most/all), we wrap it in "\sayas<{characters}" markup |
| * so that it is spoken by the engine. But in order to keep the punctuation |
| * meaning of the character, in case it has some, we duplicate it outside the |
| * markup with a heuristic on whether it will or not affect speech intonation |
| * and pauses, and whether or not the engine would speak the character itself |
| * already (as we definitely don't want to get duplicated speech for a |
| * character). |
| * This heuristic is as follows: |
| * - If the character is listed in BaratinooIntonationList and the next |
| * character is not punctuation or alphanumeric, duplicate the character. |
| * - Always append a space after a duplicated character, hoping the engine |
| * won't consider speaking it. |
| * |
| * This won't always give the same results as the engine would by itself, but |
| * it isn't really possible as the engine behavior is language-dependent in a |
| * non-obvious fashion. For example, a French voice will speak "1.2.3" as |
| * "Un. Deux. Trois", while an English one will speak it as "One dot two dot |
| * three": the dot here didn't have the same interpretation, and wasn't spoken |
| * the same (once altering the voice, the other spoken plain and simple). |
| * |
| * However, the heuristic here should be highly unlikely to lead to duplicate |
| * character speaking, and catch most of the intonation and pause cases. |
| * |
| * - Why is this done that way? |
| * |
| * Another, possibly more robust, approach could be using 2 passes in the |
| * engine itself, and relying on events to get information on how the engine |
| * interprets the input in the first (silent) pass, and alter it as needed for |
| * a second (spoken) pass. This wouldn't guarantee the altered input would be |
| * interpreted the same, but it would seem like a safe enough bet. |
| * |
| * However, the engine is too slow for this to be viable in a real-time |
| * processing environment for anything but tiny input. Even about 25 lines of |
| * IRC conversation can easily take several seconds to process in the first |
| * pass (even without doing any actual pre-processing on our end), delaying |
| * the actual speech by an unacceptable amount of time. |
| * |
| * Ideally, the engine will some day support speaking punctuation itself, and |
| * this part of the pre-processing could be dropped. |
| */ |
| static void ssml2baratinoo_text(GMarkupParseContext *ctx, |
| const gchar *text, gsize len, |
| gpointer data, GError **error) |
| { |
| SsmlPraserState *state = data; |
| const gchar *p; |
| |
| for (p = text; p < (text + len); p = g_utf8_next_char(p)) { |
| if (*p == '\\') { |
| /* escape the \ by appending a comment so it won't be |
| * interpreted as a command */ |
| g_string_append(state->buffer, "\\\\{}"); |
| } else { |
| gboolean say_as_char, do_not_say; |
| gunichar ch = g_utf8_get_char(p); |
| |
| /* if punctuation mode is not NONE and the character |
| * should be spoken, manually wrap it with \sayas */ |
| say_as_char = (((msg_settings.punctuation_mode == SPD_PUNCT_SOME || |
| msg_settings.punctuation_mode == SPD_PUNCT_MOST) && |
| g_utf8_strchr(BaratinooPunctuationList, -1, ch)) || |
| (msg_settings.punctuation_mode == SPD_PUNCT_ALL && |
| g_unichar_ispunct(ch))); |
| do_not_say = ((msg_settings.punctuation_mode == SPD_PUNCT_NONE && |
| g_utf8_strchr(BaratinooNoIntonationList, -1, ch))); |
| |
| if (say_as_char) |
| g_string_append(state->buffer, "\\sayas<{characters}"); |
| if (!do_not_say) |
| g_string_append_unichar(state->buffer, ch); |
| if (say_as_char) { |
| g_string_append(state->buffer, "\\sayas>{}"); |
| |
| /* if the character should influence intonation, |
| * add it back, but *only* if it wouldn't be spoken */ |
| if (g_utf8_strchr(BaratinooIntonationList, -1, ch)) { |
| const gchar *next = g_utf8_next_char(p); |
| gunichar ch_next; |
| |
| if (next < text + len) |
| ch_next = g_utf8_get_char(next); |
| else |
| ch_next = '\n'; |
| |
| if (!g_unichar_isalnum(ch_next) && |
| !g_unichar_ispunct(ch_next)) { |
| g_string_append_unichar(state->buffer, ch); |
| /* Append an extra space to try and |
| * make sure it's considered as |
| * punctuation and isn't spoken. */ |
| g_string_append_c(state->buffer, ' '); |
| } |
| } |
| } |
| } |
| } |
| } |
| |
| /** |
| * @brief Converts SSML data to Baratinoo's proprietary format. |
| * @param buf A buffer to write to. |
| * @param data SSML data to convert. |
| * @param size Length of @p data |
| * |
| * @warning Only a subset of the input SSML is currently translated, the rest |
| * being discarded. |
| */ |
| static void append_ssml_as_proprietary(const Engine *engine, GString *buf, const char *data, gsize size) |
| { |
| /* FIXME: we could possibly use SSML mode, but the Baratinoo parser is |
| * very strict and *requires* "xmlns", "version" and "lang" attributes |
| * on the <speak> tag, which speech-dispatcher doesn't provide. |
| * |
| * Moreover, we need to add tags for volume/rate/pitch so we'd have to |
| * amend the data anyway. */ |
| static const GMarkupParser parser = { |
| .start_element = ssml2baratinoo_start_element, |
| .end_element = ssml2baratinoo_end_element, |
| .text = ssml2baratinoo_text, |
| }; |
| SsmlPraserState state = { |
| .engine = engine, |
| .buffer = buf, |
| .voice_stack_len = 0, |
| }; |
| GMarkupParseContext *ctx; |
| GError *err = NULL; |
| |
| ctx = g_markup_parse_context_new(&parser, G_MARKUP_TREAT_CDATA_AS_TEXT, |
| &state, NULL); |
| if (!g_markup_parse_context_parse(ctx, data, size, &err) || |
| !g_markup_parse_context_end_parse(ctx, &err)) { |
| DBG(DBG_MODNAME "Failed to convert SSML: %s", err->message); |
| g_error_free(err); |
| } |
| |
| g_markup_parse_context_free(ctx); |
| } |