blob: f63897b2b993ac04ee9f213f8f2107e197d8ee0f [file]
/*
* baratinoo.c - Speech Dispatcher backend for Baratinoo (VoxyGen)
*
* Copyright (C) 2016 Brailcom, o.p.s.
* Copyright (C) 2019-2021 Samuel Thibault <samuel.thibault@ens-lyon.org>
*
* This is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2, or (at your option)
* any later version.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
/*
* Input and output choices.
*
* - The input is sent to the engine through a BCinputTextBuffer. There is
* a single one of those at any given time, and it is filled in
* module_speak_sync().
*
* This doesn't use an input callback generating a continuous flow (and
* blocking waiting for more data) even though it would be a fairly nice
* design and would allow not to set speech attributes like volume, pitch and
* rate as often. This is because the Baratinoo engine has 2 limitations on
* the input callback:
*
* * It consumes everything (or at least a lot) up until the callbacks
* reports the input end by returning 0. Alternatively one could use the
* \flush command followed by a newline, so this is not really limiting.
*
* * More problematic, as the buffer callback is expected to feed a single
* input, calling BCpurge() (for handling stop events) unregisters it,
* requiring to re-add it afterward. This renders the continuous flow a
* lot less useful, as speech attributes like volume, pitch and rate would
* have to be set again.
*
* - The output uses the signal buffer instead of callback.
* The output callback sends sound to the output module phonem by
* phonem, which cause noise parasits with ALSA due to a reset of
* parameters for each sound call.
*/
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
#ifdef BARATINOO_ABI_IS_STABLE_ENOUGH_FOR_ME
/* See below why this is problematic. It can however be useful to get the
* compiler help to check compatibility */
# define BARATINOO_C_API
# include "baratinoo.h"
# include "baratinooio.h"
#define VOICE_INFO_MEMBER(member_type, struct_p, member) \
(((BaratinooVoiceInfo *)(struct_p))->member)
#else
/*------------------------------ Baratinoo API ------------------------------*/
/*
* This file does NOT include baratinoo.h and baratinooio.h on purpose.
* The reason is that Baratinoo does not provide ABI stability, and various
* things change in minor versions. This is a problem for this module that
* would like to support several versions at once.
*
* To work around this, we re-define all the API we need from the Baratinoo
* headers, and patch compatibility possibly dynamically.
*
* This has to be done with *EXTREME CARE* not to slip off face-first into the
* wall. What we keep is the lowest common denominator between the supported
* versions, and for the incompatible bits we use dynamic mapping and offsets.
*
* Currently supported versions:
* - 8.1
* - 8.4
*
* To add a new version, you need to:
* - First, check the diff between the oldest supported version and the new
* version. Diffing against the newest supported version can be handy but
* is not necessarily enough.
* - Once the incompatibilities are identified and they affect us, amend the
* code as necessary, keeping in mind to support older versions. You'll
* have to add a new `BV_` constant for the new version, and make sure
* everything that uses these constants handles the new value.
* - What to look for:
* - Members of structures that changed offsed (reordered, changed type, etc.)
* - Enumeration values that changed value (order changed, previous member
* getting a different default value, etc.)
* - Union that changed size.
* - Function arguments that changed type or order.
* - Update get_baratinoo_supported_version() to return the appropriate
* constant in the appropriate situation. If the new version did not break
* compatibility with an already supported version, you might just return
* the constant for that other version.
*
* GOTCHAS
* - do NOT access BaratinooVoiceInfo members directly, always use
* VOICE_INFO_MEMBER(type, struct_p, member)
* - Make sure the structures allocated on the stack are large enough for all
* versions.
*/
#include "baratinoo_compat.h"
/* Dynamic compatibility part */
#include <glib.h>
typedef enum {
BV_UNSUPPORTED = -1,
BV_8_1,
BV_8_4,
BV_8_6,
N_SUPPORTED_BARATINOO_VERSIONS
} SupportedBaratinooVersion;
/* BARATINOO_UTF8 */
static const BARATINOO_TEXT_ENCODING bv_BARATINOO_UTF8[N_SUPPORTED_BARATINOO_VERSIONS] = {
[BV_8_1] = BARATINOO_UTF8__V8_1,
[BV_8_4] = BARATINOO_UTF8__V8_4,
[BV_8_6] = BARATINOO_UTF8__V8_4,
};
#define BARATINOO_UTF8 (bv_BARATINOO_UTF8[baratinoo_engine.supported_version])
/* BaratinooVoiceInfo */
enum {
VI_name,
VI_language,
VI_iso639,
VI_iso3166,
VI_gender,
VI_age,
N_VI_MEMBERS
};
static const size_t bv_VoiceInfo_offsets[N_SUPPORTED_BARATINOO_VERSIONS][N_VI_MEMBERS] = {
#define BVIF_MEMBER_DECL(struct, member) [VI_##member] = G_STRUCT_OFFSET(struct, member)
#define BVIF_ENTRY(struct) { BVIF_MEMBER_DECL(struct, name), \
BVIF_MEMBER_DECL(struct, language), \
BVIF_MEMBER_DECL(struct, iso639), \
BVIF_MEMBER_DECL(struct, iso3166), \
BVIF_MEMBER_DECL(struct, gender), \
BVIF_MEMBER_DECL(struct, age), }
[BV_8_1] = BVIF_ENTRY(BaratinooVoiceInfo__V8_1),
[BV_8_4] = BVIF_ENTRY(BaratinooVoiceInfo__V8_4),
[BV_8_6] = BVIF_ENTRY(BaratinooVoiceInfo__V8_4),
#undef BVIF_ENTRY
#undef BVIF_MEMBER_DECL
};
#define VOICE_INFO_MEMBER(member_type, struct_p, member) \
G_STRUCT_MEMBER(member_type, struct_p, bv_VoiceInfo_offsets[baratinoo_engine.supported_version][VI_##member])
/* BCinputTextBufferNew() gained an additional "uri" parameter in 8.6 */
typedef BCinputTextBuffer (*bv_BCinputTextBufferNew_t)(BARATINOO_PARSING parsing, BARATINOO_TEXT_ENCODING encoding, int voiceIndex, char *voiceModules);
typedef BCinputTextBuffer (*bv_BCinputTextBufferNew__V8_6_t)(BARATINOO_PARSING parsing, BARATINOO_TEXT_ENCODING encoding, int voiceIndex, char *voiceModules, const char *uri);
#define BCinputTextBufferNew__V8_1 BCinputTextBufferNew
static BCinputTextBuffer BCinputTextBufferNew__V8_6(BARATINOO_PARSING parsing, BARATINOO_TEXT_ENCODING encoding, int voiceIndex, char *voiceModules)
{
bv_BCinputTextBufferNew__V8_6_t func = (bv_BCinputTextBufferNew__V8_6_t) BCinputTextBufferNew;
return func(parsing, encoding, voiceIndex, voiceModules, NULL);
}
static const bv_BCinputTextBufferNew_t bv_BCinputTextBufferNew[] = {
[BV_8_1] = BCinputTextBufferNew__V8_1,
[BV_8_4] = BCinputTextBufferNew__V8_1,
[BV_8_6] = BCinputTextBufferNew__V8_6,
};
#define BCinputTextBufferNew (bv_BCinputTextBufferNew[baratinoo_engine.supported_version])
#endif /* ! BARATINOO_ABI_IS_STABLE_ENOUGH_FOR_ME */
/*------------------------ Speech-Dispatcher module ------------------------*/
#include <speechd_types.h>
#include "module_utils.h"
#define MODULE_NAME "baratinoo"
#define DBG_MODNAME "Baratinoo: "
#define MODULE_VERSION "0.2"
#define DEBUG_MODULE 1
DECLARE_DEBUG();
typedef struct {
#ifndef BARATINOO_ABI_IS_STABLE_ENOUGH_FOR_ME
SupportedBaratinooVersion supported_version;
#endif
BCengine engine;
/* The buffer consumed by the TTS engine. */
BCinputTextBuffer buffer;
SPDVoice **voice_list;
/* settings */
int voice;
/* request flags */
gboolean stop_requested;
gboolean pause_requested;
gboolean pause_index_sent;
} Engine;
/* engine and state */
static Engine baratinoo_engine = {
.engine = NULL,
.buffer = NULL,
.voice_list = NULL,
.voice = 0,
.stop_requested = FALSE,
.pause_requested = FALSE,
.pause_index_sent = FALSE,
};
static gboolean BC_initialized = FALSE;
/* Internal functions prototypes */
static SPDVoice **baratinoo_list_voices(BCengine *engine);
/* Parameters */
static void baratinoo_set_voice_type(SPDVoiceType voice);
static void baratinoo_set_language(char *lang);
static void baratinoo_set_synthesis_voice(char *synthesis_voice);
/* Engine callbacks */
static void baratinoo_trace_cb(BaratinooTraceLevel level, int engine_num, const char *source, const void *data, const char *format, va_list args);
static int baratinoo_output_signal(void *privateData, const void *address, int length);
/* SSML conversion functions */
static void append_ssml_as_proprietary(const Engine *engine, GString *buf, const char *data, gsize size);
/* Module configuration options */
MOD_OPTION_1_STR(BaratinooConfigPath);
MOD_OPTION_1_INT(BaratinooSampleRate);
MOD_OPTION_1_INT(BaratinooResponsiveness);
MOD_OPTION_1_INT(BaratinooQueueSize);
MOD_OPTION_1_INT(BaratinooMinRate);
MOD_OPTION_1_INT(BaratinooNormalRate);
MOD_OPTION_1_INT(BaratinooMaxRate);
MOD_OPTION_1_STR(BaratinooPunctuationList);
MOD_OPTION_1_STR(BaratinooIntonationList);
MOD_OPTION_1_STR(BaratinooNoIntonationList);
/* Public functions */
int module_load(void)
{
const char *conf_env;
char *default_config = NULL;
INIT_SETTINGS_TABLES();
REGISTER_DEBUG();
/* BaratinooConfigPath default value comes from the environment or
* user XDG configuration location */
conf_env = getenv("BARATINOO_CONFIG_PATH");
if (conf_env && conf_env[0] != '\0') {
default_config = g_strdup(conf_env);
} else {
default_config = g_build_filename(g_get_user_config_dir(),
"baratinoo.cfg", NULL);
}
MOD_OPTION_1_STR_REG(BaratinooConfigPath, default_config);
g_free(default_config);
/* Sample rate. 16000Hz is the voices default, not requiring resampling */
MOD_OPTION_1_INT_REG(BaratinooSampleRate, 16000);
/* Let Baratinoo handle by default */
MOD_OPTION_1_INT_REG(BaratinooResponsiveness, -1);
/* Default to 20s queuing */
MOD_OPTION_1_INT_REG(BaratinooQueueSize, 20*BaratinooSampleRate);
/* Speech rate */
MOD_OPTION_1_INT_REG(BaratinooMinRate, -100);
MOD_OPTION_1_INT_REG(BaratinooNormalRate, 0);
MOD_OPTION_1_INT_REG(BaratinooMaxRate, 100);
/* Punctuation */
MOD_OPTION_1_STR_REG(BaratinooPunctuationList, "@/+-_");
MOD_OPTION_1_STR_REG(BaratinooIntonationList, "?!;:,.");
MOD_OPTION_1_STR_REG(BaratinooNoIntonationList, "");
return 0;
}
#ifndef BARATINOO_ABI_IS_STABLE_ENOUGH_FOR_ME
static SupportedBaratinooVersion get_baratinoo_supported_version(void)
{
const BaratinooVersionStruct *version = BCgetBaratinooVersionStruct();
switch (version->major) {
case 8: switch (version->minor) {
case 1: return BV_8_1;
case 4: return BV_8_4;
case 6: return BV_8_6;
} break;
}
return BV_UNSUPPORTED;
}
#endif
int module_init(char **status_info)
{
Engine *engine = &baratinoo_engine;
BARATINOOC_STATE state;
DBG(DBG_MODNAME "Module init");
module_audio_set_server();
DBG(DBG_MODNAME "BaratinooPunctuationList = %s", BaratinooPunctuationList);
DBG(DBG_MODNAME "BaratinooIntonationList = %s", BaratinooIntonationList);
DBG(DBG_MODNAME "BaratinooNoIntonationList = %s", BaratinooNoIntonationList);
*status_info = NULL;
/* Init Baratinoo */
if (BCinitlib(baratinoo_trace_cb) != BARATINOO_INIT_OK) {
DBG(DBG_MODNAME "Failed to initialize library");
*status_info = g_strdup("Failed to initialize Baratinoo. "
"Make sure your installation is "
"properly set up.");
return -1;
}
BC_initialized = TRUE;
DBG(DBG_MODNAME "Using Baratinoo %s", BCgetBaratinooVersion());
#ifndef BARATINOO_ABI_IS_STABLE_ENOUGH_FOR_ME
engine->supported_version = get_baratinoo_supported_version();
if (engine->supported_version == BV_UNSUPPORTED) {
DBG(DBG_MODNAME "Unsupported library version");
*status_info = g_strdup("Unsupported Baratinoo engine version.");
return -1;
}
DBG(DBG_MODNAME "Using Baratinoo compatibility level %d", engine->supported_version);
#endif
engine->engine = BCnew(NULL);
if (!engine->engine) {
DBG(DBG_MODNAME "Failed to allocate engine");
*status_info = g_strdup("Failed to create Baratinoo engine.");
return -1;
}
BCinit(engine->engine, BaratinooConfigPath);
state = BCgetState(engine->engine);
if (state != BARATINOO_INITIALIZED) {
DBG(DBG_MODNAME "Failed to initialize engine");
*status_info = g_strdup("Failed to initialize Baratinoo engine. "
"Make sure your setup is OK.");
return -1;
}
/* Find voices */
engine->voice_list = baratinoo_list_voices(engine->engine);
if (!engine->voice_list) {
DBG(DBG_MODNAME "No voice available");
*status_info = g_strdup("No voice found. Make sure your setup "
"includes at least one voice.");
return -1;
}
/* Setup output (audio) signal handling */
DBG(DBG_MODNAME "Using PCM output at %dHz", BaratinooSampleRate);
BCsetOutputSignal(engine->engine, baratinoo_output_signal, engine, BARATINOO_PCM, BaratinooSampleRate);
if (BCgetState(engine->engine) != BARATINOO_INITIALIZED) {
DBG(DBG_MODNAME "Failed to initialize output signal handler");
*status_info = g_strdup("Failed to initialize Baratinoo output "
"signal handler. Is the configured "
"sample rate correct?");
return -1;
}
BCsetWantedEvent(engine->engine, BARATINOO_MARKER_EVENT);
DBG(DBG_MODNAME "Initialization successfully.");
*status_info = g_strdup("Baratinoo initialized successfully.");
return 0;
}
SPDVoice **module_list_voices(void)
{
Engine *engine = &baratinoo_engine;
return engine->voice_list;
}
void module_speak_sync(const gchar *data, size_t bytes, SPDMessageType msgtype)
{
Engine *engine = &baratinoo_engine;
GString *buffer = NULL;
int rate;
DBG(DBG_MODNAME "Speech requested");
assert(msg_settings.rate >= -100 && msg_settings.rate <= +100);
assert(msg_settings.pitch >= -100 && msg_settings.pitch <= +100);
assert(msg_settings.pitch_range >= -100 && msg_settings.pitch_range <= +100);
assert(msg_settings.volume >= -100 && msg_settings.volume <= +100);
if (engine->buffer != NULL) {
DBG(DBG_MODNAME "WARNING: module_speak() called during speech");
module_speak_error();
return;
}
/* select voice following parameters. we don't use tags for this as
* we need to do some computation on our end anyway and need pass an
* ID when creating the buffer too */
UPDATE_STRING_PARAMETER(voice.language, baratinoo_set_language);
UPDATE_PARAMETER(voice_type, baratinoo_set_voice_type);
UPDATE_STRING_PARAMETER(voice.name, baratinoo_set_synthesis_voice);
engine->buffer = BCinputTextBufferNew(BARATINOO_PROPRIETARY_PARSING,
BARATINOO_UTF8, engine->voice, 0);
if (!engine->buffer) {
DBG(DBG_MODNAME "Failed to allocate input buffer");
module_speak_error();
goto err;
}
buffer = g_string_new(NULL);
/* Apply speech parameters */
if (msg_settings.rate < 0)
rate = BaratinooNormalRate + (BaratinooNormalRate - BaratinooMinRate) * msg_settings.rate / 100;
else
rate = BaratinooNormalRate + (BaratinooMaxRate - BaratinooNormalRate) * msg_settings.rate / 100;
if (rate != 0) {
g_string_append_printf(buffer, "\\rate{%+d%%}", rate);
}
if (msg_settings.pitch != 0 || msg_settings.pitch_range != 0) {
g_string_append_printf(buffer, "\\pitch{%+d%% %+d%%}",
msg_settings.pitch,
msg_settings.pitch_range);
}
if (msg_settings.volume != 0) {
g_string_append_printf(buffer, "\\volume{%+d%%}",
msg_settings.volume);
}
switch (msgtype) {
case SPD_MSGTYPE_SPELL: /* FIXME: use \spell when Voxygen actuall implements it */
/* TODO: in the meanwhile use a generic engine */
case SPD_MSGTYPE_CHAR:
g_string_append(buffer, "\\sayas<{characters}");
g_string_append_len(buffer, data, bytes);
g_string_append(buffer, "\\sayas>{}");
break;
case SPD_MSGTYPE_KEY: /* TODO: use a generic engine */
if (g_utf8_strlen(data, bytes) == 1) {
g_string_append(buffer, "\\sayas<{characters}");
g_string_append_len(buffer, data, bytes);
g_string_append(buffer, "\\sayas>{}");
} else {
gchar *c;
g_string_append_len(buffer, data, bytes);
for (c = buffer->str; *c; c++)
if (*c == '_')
*c = ' ';
}
break;
default: /* FIXME: */
case SPD_MSGTYPE_TEXT:
append_ssml_as_proprietary(engine, buffer, data, bytes);
break;
}
DBG(DBG_MODNAME "SSML input: %s", data);
DBG(DBG_MODNAME "Sending buffer: %s", buffer->str);
if (!BCinputTextBufferInit(engine->buffer, buffer->str)) {
DBG(DBG_MODNAME "Failed to initialize input buffer");
module_speak_error();
goto err;
}
g_string_free(buffer, TRUE);
buffer = NULL;
engine->stop_requested = FALSE;
engine->pause_requested = FALSE;
engine->pause_index_sent = FALSE;
BARATINOOC_STATE state = BARATINOO_READY;
state = BCinputTextBufferSetInEngine(engine->buffer, engine->engine);
if (state != BARATINOO_READY) {
DBG(DBG_MODNAME "Failed to set input buffer");
module_speak_error();
goto out;
}
module_speak_ok();
module_report_event_begin();
do {
if (engine->stop_requested || (engine->pause_requested && engine->pause_index_sent)) {
BCpurge(engine->engine);
engine->buffer = NULL;
break;
}
/* Process server events in case we were told to stop in between */
module_process(STDIN_FILENO, 0);
state = BCprocessLoop(engine->engine, BaratinooResponsiveness);
if (state == BARATINOO_EVENT) {
BaratinooEvent event = BCgetEvent(engine->engine);
if (event.type == BARATINOO_MARKER_EVENT) {
DBG(DBG_MODNAME "Reached mark '%s' at sample %lu", event.data.marker.name, event.sampleStamp);
module_report_index_mark(event.data.marker.name);
if (engine->pause_requested &&
!strncmp(event.data.marker.name,
INDEX_MARK_BODY,
INDEX_MARK_BODY_LEN)) {
engine->pause_index_sent = 1;
}
}
}
} while (state == BARATINOO_RUNNING || state == BARATINOO_EVENT);
out:
if (engine->pause_requested)
module_report_event_pause();
else if (engine->stop_requested)
module_report_event_stop();
else
module_report_event_end();
BCinputTextBufferDelete(engine->buffer);
engine->buffer = NULL;
DBG(DBG_MODNAME "leaving module_speak_sync() normally");
return;
err:
if (buffer)
g_string_free(buffer, TRUE);
if (engine->buffer) {
BCinputTextBufferDelete(engine->buffer);
engine->buffer = NULL;
}
return;
}
int module_stop(void)
{
Engine *engine = &baratinoo_engine;
DBG(DBG_MODNAME "Stop requested");
engine->stop_requested = TRUE;
return 0;
}
size_t module_pause(void)
{
Engine *engine = &baratinoo_engine;
DBG(DBG_MODNAME "Pause requested");
engine->stop_requested = TRUE;
return 0;
}
int module_close(void)
{
Engine *engine = &baratinoo_engine;
DBG(DBG_MODNAME "close()");
/* destroy voice list */
if (engine->voice_list != NULL) {
int i;
for (i = 0; engine->voice_list[i] != NULL; i++) {
g_free(engine->voice_list[i]->name);
g_free(engine->voice_list[i]->language);
g_free(engine->voice_list[i]->variant);
g_free(engine->voice_list[i]);
}
g_free(engine->voice_list);
engine->voice_list = NULL;
}
/* destroy engine */
if (engine->engine) {
BCdelete(engine->engine);
engine->engine = NULL;
}
if (BC_initialized) {
/* uninitialize */
BCterminatelib();
BC_initialized = FALSE;
}
DBG(DBG_MODNAME "Module closed.");
return 0;
}
/* Internal functions */
/**
* @brief Lists voices in SPD format
* @param engine An engine.
* @returns A NULL-terminated list of @c SPDVoice, or NULL if no voice found.
*/
static SPDVoice **baratinoo_list_voices(BCengine *engine)
{
SPDVoice **voices;
int n_voices;
int i;
n_voices = BCgetNumberOfVoices(engine);
if (n_voices < 1)
return NULL;
voices = g_malloc_n(n_voices + 1, sizeof *voices);
DBG(DBG_MODNAME "Got %d available voices:", n_voices);
for (i = 0; i < n_voices; i++) {
SPDVoice *voice;
const char *language;
BaratinooVoiceInfo voice_info_DO_NO_ACCESS_DIRECTLY = BCgetVoiceInfo(engine, i);
void *voice_info = &voice_info_DO_NO_ACCESS_DIRECTLY;
DBG(DBG_MODNAME "\tVoice #%d: name=%s, language=%s, gender=%s",
i, VOICE_INFO_MEMBER(char *, voice_info, name),
VOICE_INFO_MEMBER(char *, voice_info, language),
VOICE_INFO_MEMBER(char *, voice_info, gender));
voice = g_malloc0(sizeof *voice);
voice->name = g_strdup(VOICE_INFO_MEMBER(char *, voice_info, name));
language = VOICE_INFO_MEMBER(char *, voice_info, language);
voice->language = g_strdup(language);
voices[i] = voice;
}
voices[i] = NULL;
return voices;
}
/* Voice selection */
/**
* @brief Matches a Baratinoo voice info against a SPD language
* @param info A voice info to match.
* @param lang A SPD language to match against.
* @returns The quality of the match: the higher the better.
*
* Gives a score to a voice based on its compatibility with @p lang.
*/
static int lang_match_level(const void *vinfo, const char *lang)
{
int level = 0;
const char *language = VOICE_INFO_MEMBER(char *, vinfo, language);
const char *iso639 = VOICE_INFO_MEMBER(char *, vinfo, iso639);
const char *iso3166 = VOICE_INFO_MEMBER(char *, vinfo, iso3166);
if (g_ascii_strcasecmp(lang, language) == 0)
level += 10;
else {
gchar **a = g_strsplit_set(language, "-", 2);
gchar **b = g_strsplit_set(lang, "-", 2);
/* language */
if (g_ascii_strcasecmp(a[0], b[0]) == 0)
level += 8;
else if (g_ascii_strcasecmp(iso639, b[0]) == 0)
level += 8;
else if (g_ascii_strncasecmp(a[0], b[0], 2) == 0)
level += 5; /* partial match */
/* region */
if (a[1] && b[1] && g_ascii_strcasecmp(a[1], b[1]) == 0)
level += 2;
else if (b[1] && g_ascii_strcasecmp(iso3166, b[1]) == 0)
level += 2;
else if (a[1] && b[1] && g_ascii_strncasecmp(a[1], b[1], 2) == 0)
level += 1; /* partial match */
g_strfreev(a);
g_strfreev(b);
}
DBG(DBG_MODNAME "lang_match_level({language=%s, iso639=%s, iso3166=%s}, lang=%s) = %d",
language, iso639, iso3166, lang, level);
return level;
}
/**
* @brief Sort two Baratinoo voices by SPD criteria.
* @param a A voice info.
* @param b Another voice info.
* @param lang A SPD language.
* @param voice_code A SPD voice code.
* @returns < 0 if @p a is best, > 0 if @p b is best, and 0 if they are equally
* matching. Larger divergence from 0 means better match.
*/
static int sort_voice(const void *voice_a, const void *voice_b, const char *lang, SPDVoiceType voice_code)
{
int cmp = 0;
const char *a_gender = VOICE_INFO_MEMBER(char *, voice_a, gender);
const char *b_gender = VOICE_INFO_MEMBER(char *, voice_b, gender);
int a_age = VOICE_INFO_MEMBER(int, voice_a, age);
int b_age = VOICE_INFO_MEMBER(int, voice_b, age);
cmp -= lang_match_level(voice_a, lang);
cmp += lang_match_level(voice_b, lang);
if (strcmp(a_gender, b_gender) != 0) {
const char *gender;
switch (voice_code) {
default:
case SPD_MALE1:
case SPD_MALE2:
case SPD_MALE3:
case SPD_CHILD_MALE:
gender = "male";
break;
case SPD_FEMALE1:
case SPD_FEMALE2:
case SPD_FEMALE3:
case SPD_CHILD_FEMALE:
gender = "female";
break;
}
if (strcmp(gender, a_gender) == 0)
cmp--;
if (strcmp(gender, b_gender) == 0)
cmp++;
}
switch (voice_code) {
case SPD_CHILD_MALE:
case SPD_CHILD_FEMALE:
if (a_age && a_age <= 15)
cmp--;
if (b_age && b_age <= 15)
cmp++;
break;
default:
/* we expect mostly adult voices, so only compare if age is set */
if (a_age && b_age) {
if (a_age > 15)
cmp--;
if (b_age > 15)
cmp++;
}
break;
}
DBG(DBG_MODNAME "Comparing %s <> %s gives %d",
VOICE_INFO_MEMBER(char*, voice_a, name),
VOICE_INFO_MEMBER(char*, voice_b, name),
cmp);
return cmp;
}
/* Given a language code and SD voice code, gets the Baratinoo voice. */
static int baratinoo_find_voice(const Engine *engine, const char *lang, SPDVoiceType voice_code)
{
int i;
int best_match = -1;
int nth_match = 0;
int offset = 0; /* nth voice we'd like */
BaratinooVoiceInfo best_info;
DBG(DBG_MODNAME "baratinoo_find_voice(lang=%s, voice_code=%d)",
lang, voice_code);
switch (voice_code) {
case SPD_MALE3:
case SPD_FEMALE3:
offset++;
/* FALLTHRU */
case SPD_MALE2:
case SPD_FEMALE2:
offset++;
/* FALLTHRU */
default:
break;
}
for (i = 0; i < BCgetNumberOfVoices(engine->engine); i++) {
if (i == 0) {
best_match = i;
best_info = BCgetVoiceInfo(engine->engine, i);
nth_match++;
} else {
BaratinooVoiceInfo info = BCgetVoiceInfo(engine->engine, i);
int cmp = sort_voice(&best_info, &info, lang, voice_code);
if (cmp >= 0) {
if (cmp > 0)
nth_match = 0;
if (nth_match <= offset) {
best_match = i;
best_info = info;
}
nth_match++;
}
}
}
return best_match;
}
/* Given a language code and SD voice code, sets the voice. */
static void baratinoo_set_language_and_voice(Engine *engine, const char *lang, SPDVoiceType voice_code)
{
int voice = baratinoo_find_voice(engine, lang, voice_code);
if (voice < 0) {
DBG(DBG_MODNAME "No voice match found, not changing voice.");
} else {
DBG(DBG_MODNAME "Best voice match is %d.", voice);
engine->voice = voice;
}
}
/* UPDATE_PARAMETER callback to set the voice type */
static void baratinoo_set_voice_type(SPDVoiceType voice)
{
Engine *engine = &baratinoo_engine;
assert(msg_settings.voice.language);
baratinoo_set_language_and_voice(engine, msg_settings.voice.language, voice);
}
/* UPDATE_PARAMETER callback to set the voice language */
static void baratinoo_set_language(char *lang)
{
Engine *engine = &baratinoo_engine;
baratinoo_set_language_and_voice(engine, lang, msg_settings.voice_type);
}
/* UPDATE_PARAMETER callback to set the voice by name */
static void baratinoo_set_synthesis_voice(char *synthesis_voice)
{
Engine *engine = &baratinoo_engine;
int i;
if (synthesis_voice == NULL)
return;
for (i = 0; i < BCgetNumberOfVoices(engine->engine); i++) {
BaratinooVoiceInfo info = BCgetVoiceInfo(engine->engine, i);
if (g_ascii_strcasecmp(synthesis_voice, VOICE_INFO_MEMBER(char*, &info, name)) == 0) {
engine->voice = i;
return;
}
}
DBG(DBG_MODNAME "Failed to set synthesis voice to '%s': not found.",
synthesis_voice);
}
/* Engine callbacks */
/**
* @brief Logs a message from Baratinoo
* @param level Message importance.
* @param engine_num ID of the engine that emitted the message, or 0 if it is a
* library message.
* @param source Message category.
* @param data Private data, unused.
* @param format printf-like @p format.
* @param args arguments for @p format.
*/
static void baratinoo_trace_cb(BaratinooTraceLevel level, int engine_num, const char *source, const void *data, const char *format, va_list args)
{
const char *prefix = "";
if (!Debug) {
switch (level) {
case BARATINOO_TRACE_INIT:
case BARATINOO_TRACE_INFO:
case BARATINOO_TRACE_DEBUG:
return;
default:
break;
}
}
switch (level) {
case BARATINOO_TRACE_ERROR:
prefix = "ERROR";
break;
case BARATINOO_TRACE_INIT:
prefix = "INIT";
break;
case BARATINOO_TRACE_WARNING:
prefix = "WARNING";
break;
case BARATINOO_TRACE_INFO:
prefix = "INFO";
break;
case BARATINOO_TRACE_DEBUG:
prefix = "DEBUG";
break;
}
if (engine_num == 0)
fprintf(stderr, "Baratinoo library: ");
else
fprintf(stderr, "Baratinoo engine #%d: ", engine_num);
fprintf(stderr, "%s: %s ", prefix, source);
vfprintf(stderr, format, args);
fprintf(stderr, "\n");
}
/**
* @brief Output (sound) callback
* @param private_data An Engine structure.
* @param address Audio samples.
* @param length Length of @p address, in bytes.
* @returns Whether to break out of the process loop.
*
* Called by the engine during speech synthesis.
*
* @see BCprocessLoop()
*/
static int baratinoo_output_signal(void *private_data, const void *address, int length)
{
Engine *engine = private_data;
/* If stop is requested during synthesis, abort here to stop speech as
* early as possible, even if the engine didn't finish its cycle yet. */
if (engine->stop_requested)
{
DBG(DBG_MODNAME "Not playing message because it got stopped");
return 1;
}
AudioTrack track;
#if defined(BYTE_ORDER) && (BYTE_ORDER == BIG_ENDIAN)
AudioFormat format = SPD_AUDIO_BE;
#else
AudioFormat format = SPD_AUDIO_LE;
#endif
/* We receive 16 bits PCM data */
track.num_samples = length / 2; /* 16 bits per sample = 2 bytes */
track.num_channels = 1;
track.sample_rate = BaratinooSampleRate;
track.bits = 16;
track.samples = (short *) address;
DBG(DBG_MODNAME "Queueing %d samples", length / 2);
module_tts_output_server(&track, format);
return engine->stop_requested;
}
/* SSML conversion functions */
typedef struct {
const Engine *engine;
GString *buffer;
/* Voice ID stack for the current element */
int voice_stack[32];
unsigned int voice_stack_len;
} SsmlPraserState;
/* Adds a language change command for @p lang if appropriate */
static void ssml2baratinoo_push_lang(SsmlPraserState *state, const char *lang)
{
int voice;
if (state->voice_stack_len > 0)
voice = state->voice_stack[state->voice_stack_len - 1];
else
voice = state->engine->voice;
if (lang) {
DBG(DBG_MODNAME "Processing xml:lang=\"%s\"", lang);
int new_voice = baratinoo_find_voice(&baratinoo_engine, lang,
msg_settings.voice_type);
if (new_voice >= 0 && new_voice != voice) {
g_string_append_printf(state->buffer, "\\vox{%d}", new_voice);
voice = new_voice;
}
}
if (state->voice_stack_len >= G_N_ELEMENTS(state->voice_stack)) {
DBG(DBG_MODNAME "WARNING: voice stack exhausted, expect incorrect voices.");
} else {
state->voice_stack[state->voice_stack_len++] = voice;
}
}
/* Pops a language pushed with @c ssml2baratinoo_push_lang() */
static void ssml2baratinoo_pop_lang(SsmlPraserState *state)
{
if (state->voice_stack_len > 0) {
int cur_voice = state->voice_stack[--state->voice_stack_len];
if (state->voice_stack_len > 0) {
int new_voice = state->voice_stack[state->voice_stack_len - 1];
if (new_voice != cur_voice)
g_string_append_printf(state->buffer, "\\vox{%d}", new_voice);
}
}
}
/* locates a string in a NULL-terminated array of strings
* Returns -1 if not found, the index otherwise. */
static int attribute_index(const char **names, const char *name)
{
int i;
for (i = 0; names && names[i] != NULL; i++) {
if (strcmp(names[i], name) == 0)
return i;
}
return -1;
}
/* Markup element start callback */
static void ssml2baratinoo_start_element(GMarkupParseContext *ctx,
const gchar *element,
const gchar **attribute_names,
const gchar **attribute_values,
gpointer data, GError **error)
{
SsmlPraserState *state = data;
int lang_id;
/* handle voice changes */
lang_id = attribute_index(attribute_names, "xml:lang");
ssml2baratinoo_push_lang(state, lang_id < 0 ? NULL : attribute_values[lang_id]);
/* handle elements */
if (strcmp(element, "mark") == 0) {
int i = attribute_index(attribute_names, "name");
g_string_append_printf(state->buffer, "\\mark{%s}",
i < 0 ? "" : attribute_values[i]);
} else if (strcmp(element, "emphasis") == 0) {
int i = attribute_index(attribute_names, "level");
g_string_append_printf(state->buffer, "\\emph<{%s}",
i < 0 ? "" : attribute_values[i]);
} else if (strcmp(element, "say-as") == 0) {
int i_as = attribute_index(attribute_names, "interpret-as");
int i_fmt = attribute_index(attribute_names, "format");
int i_detail = attribute_index(attribute_names, "detail");
if (i_as < 0) {
DBG(DBG_MODNAME "Missing required 'interpret-as' attribute of '<say-as>' tag");
i_fmt = i_detail = -1;
} else if (i_fmt < 0 && i_detail >= 0) {
DBG(DBG_MODNAME "Ignoring 'detail' attribute of '<say-as>' tag because it is "
"not supported without a 'format' attribute");
i_detail = -1;
}
g_string_append_printf(state->buffer, "\\sayas<{%s %s %s}",
i_as < 0 ? "" : attribute_values[i_as],
i_fmt < 0 ? "" : attribute_values[i_fmt],
i_detail < 0 ? "" : attribute_values[i_detail]);
} else {
/* ignore other elements */
/* TODO: handle more elements */
}
}
/* Markup element end callback */
static void ssml2baratinoo_end_element(GMarkupParseContext *ctx,
const gchar *element,
gpointer data, GError **error)
{
SsmlPraserState *state = data;
if (strcmp(element, "emphasis") == 0) {
g_string_append(state->buffer, "\\emph>{}");
} else if (strcmp(element, "say-as") == 0) {
g_string_append(state->buffer, "\\sayas>{}");
}
ssml2baratinoo_pop_lang(state);
}
/* Markup text node callback.
*
* This not only converts to the proprietary format (by escaping things that
* would be interpreted by it), but also pre-processes the text for some
* features that are missing from Baratinoo.
*
* - Punctuation speaking
*
* As the engine doesn't support speaking of the punctuation itself, we alter
* the input to explicitly tell the engine to do it. It is kind of tricky,
* because we want to keep the punctuation meaning of the characters, e.g. how
* they affect speech as means of intonation and pauses.
*
* The approach here is that for every punctuation character included in the
* selected mode (none/some/most/all), we wrap it in "\sayas<{characters}" markup
* so that it is spoken by the engine. But in order to keep the punctuation
* meaning of the character, in case it has some, we duplicate it outside the
* markup with a heuristic on whether it will or not affect speech intonation
* and pauses, and whether or not the engine would speak the character itself
* already (as we definitely don't want to get duplicated speech for a
* character).
* This heuristic is as follows:
* - If the character is listed in BaratinooIntonationList and the next
* character is not punctuation or alphanumeric, duplicate the character.
* - Always append a space after a duplicated character, hoping the engine
* won't consider speaking it.
*
* This won't always give the same results as the engine would by itself, but
* it isn't really possible as the engine behavior is language-dependent in a
* non-obvious fashion. For example, a French voice will speak "1.2.3" as
* "Un. Deux. Trois", while an English one will speak it as "One dot two dot
* three": the dot here didn't have the same interpretation, and wasn't spoken
* the same (once altering the voice, the other spoken plain and simple).
*
* However, the heuristic here should be highly unlikely to lead to duplicate
* character speaking, and catch most of the intonation and pause cases.
*
* - Why is this done that way?
*
* Another, possibly more robust, approach could be using 2 passes in the
* engine itself, and relying on events to get information on how the engine
* interprets the input in the first (silent) pass, and alter it as needed for
* a second (spoken) pass. This wouldn't guarantee the altered input would be
* interpreted the same, but it would seem like a safe enough bet.
*
* However, the engine is too slow for this to be viable in a real-time
* processing environment for anything but tiny input. Even about 25 lines of
* IRC conversation can easily take several seconds to process in the first
* pass (even without doing any actual pre-processing on our end), delaying
* the actual speech by an unacceptable amount of time.
*
* Ideally, the engine will some day support speaking punctuation itself, and
* this part of the pre-processing could be dropped.
*/
static void ssml2baratinoo_text(GMarkupParseContext *ctx,
const gchar *text, gsize len,
gpointer data, GError **error)
{
SsmlPraserState *state = data;
const gchar *p;
for (p = text; p < (text + len); p = g_utf8_next_char(p)) {
if (*p == '\\') {
/* escape the \ by appending a comment so it won't be
* interpreted as a command */
g_string_append(state->buffer, "\\\\{}");
} else {
gboolean say_as_char, do_not_say;
gunichar ch = g_utf8_get_char(p);
/* if punctuation mode is not NONE and the character
* should be spoken, manually wrap it with \sayas */
say_as_char = (((msg_settings.punctuation_mode == SPD_PUNCT_SOME ||
msg_settings.punctuation_mode == SPD_PUNCT_MOST) &&
g_utf8_strchr(BaratinooPunctuationList, -1, ch)) ||
(msg_settings.punctuation_mode == SPD_PUNCT_ALL &&
g_unichar_ispunct(ch)));
do_not_say = ((msg_settings.punctuation_mode == SPD_PUNCT_NONE &&
g_utf8_strchr(BaratinooNoIntonationList, -1, ch)));
if (say_as_char)
g_string_append(state->buffer, "\\sayas<{characters}");
if (!do_not_say)
g_string_append_unichar(state->buffer, ch);
if (say_as_char) {
g_string_append(state->buffer, "\\sayas>{}");
/* if the character should influence intonation,
* add it back, but *only* if it wouldn't be spoken */
if (g_utf8_strchr(BaratinooIntonationList, -1, ch)) {
const gchar *next = g_utf8_next_char(p);
gunichar ch_next;
if (next < text + len)
ch_next = g_utf8_get_char(next);
else
ch_next = '\n';
if (!g_unichar_isalnum(ch_next) &&
!g_unichar_ispunct(ch_next)) {
g_string_append_unichar(state->buffer, ch);
/* Append an extra space to try and
* make sure it's considered as
* punctuation and isn't spoken. */
g_string_append_c(state->buffer, ' ');
}
}
}
}
}
}
/**
* @brief Converts SSML data to Baratinoo's proprietary format.
* @param buf A buffer to write to.
* @param data SSML data to convert.
* @param size Length of @p data
*
* @warning Only a subset of the input SSML is currently translated, the rest
* being discarded.
*/
static void append_ssml_as_proprietary(const Engine *engine, GString *buf, const char *data, gsize size)
{
/* FIXME: we could possibly use SSML mode, but the Baratinoo parser is
* very strict and *requires* "xmlns", "version" and "lang" attributes
* on the <speak> tag, which speech-dispatcher doesn't provide.
*
* Moreover, we need to add tags for volume/rate/pitch so we'd have to
* amend the data anyway. */
static const GMarkupParser parser = {
.start_element = ssml2baratinoo_start_element,
.end_element = ssml2baratinoo_end_element,
.text = ssml2baratinoo_text,
};
SsmlPraserState state = {
.engine = engine,
.buffer = buf,
.voice_stack_len = 0,
};
GMarkupParseContext *ctx;
GError *err = NULL;
ctx = g_markup_parse_context_new(&parser, G_MARKUP_TREAT_CDATA_AS_TEXT,
&state, NULL);
if (!g_markup_parse_context_parse(ctx, data, size, &err) ||
!g_markup_parse_context_end_parse(ctx, &err)) {
DBG(DBG_MODNAME "Failed to convert SSML: %s", err->message);
g_error_free(err);
}
g_markup_parse_context_free(ctx);
}