From 3f226faa0bc56cf96c67d039cb0caaf188c7428a Mon Sep 17 00:00:00 2001 From: Hussain Kadhem Date: Mon, 11 Apr 2022 19:58:47 +0100 Subject: [PATCH] Implemented a basic speech server for speech-dispatcher, based on the espeak server implementation. --- servers/native-speechd/Makefile | 38 ++ servers/native-speechd/tclspeechd.cpp | 469 +++++++++++++++++++++++ servers/speechd | 515 ++++++++++++++++++++++++++ 3 files changed, 1022 insertions(+) create mode 100644 servers/native-speechd/Makefile create mode 100644 servers/native-speechd/tclspeechd.cpp create mode 100755 servers/speechd diff --git a/servers/native-speechd/Makefile b/servers/native-speechd/Makefile new file mode 100644 index 0000000000..54a6488ff5 --- /dev/null +++ b/servers/native-speechd/Makefile @@ -0,0 +1,38 @@ +#$Id: Makefile 6198 2009-08-25 15:19:03Z tv.raman.tv $ + +INSTALL = install +PREFIX = /usr +LIBPARENTDIR = ${PREFIX}/share/emacs/site-lisp +LIBDIR =$(LIBPARENTDIR)/emacspeak/servers/native-speechd + +CXXFLAGS+= -g -O2 -fPIC -DPIC -pedantic -ansi \ +-Wall -Wno-long-long --std=c++11 + +ifeq ($(shell uname -s), Darwin) + + # Mac OS land. + LIBS= -framework tcl -lspeechd + TTS=tclspeechd.dylib +else + TCL_VERSION = 8.6 + TCL_INCLUDE= /usr/include/tcl$(TCL_VERSION) + LIBS= -ltcl$(TCL_VERSION) -lspeechd + TTS=tclspeechd.so + CXXFLAGS+=-I$(TCL_INCLUDE) +endif + +all: $(TTS) + +$(TTS): tclspeechd.o + $(CXX) $(LDFLAGS) -shared -o $@ $< $(LIBS) + +clean: + rm -f *.so *.o + +tidy: + clang-tidy -header-filter=.*-checks='*' -fix-errors tclspeechd.cpp -- -std=c++11 -I/usr/include/tcl8.6 + +install: $(TTS) + $(INSTALL) -d $(DESTDIR)$(LIBDIR) + $(INSTALL) $< $(DESTDIR)$(LIBDIR) + diff --git a/servers/native-speechd/tclspeechd.cpp b/servers/native-speechd/tclspeechd.cpp new file mode 100644 index 0000000000..a5797af8c5 --- /dev/null +++ b/servers/native-speechd/tclspeechd.cpp @@ -0,0 +1,469 @@ +// * Speech-Dispatcher server for emacspeak + +// +// +#include +#include +#include +#include +#include +#include +#include +#include +#include +using std::set; +using std::string; +using std::vector; + +#define PACKAGENAME "tts" +#define PACKAGEVERSION "1.0" +#define EXPORT + +//> +// +// +// + // + + return initLanguage(interp); +} + +int GetRate(ClientData handle, Tcl_Interp *interp, int objc, + Tcl_Obj *CONST objv[]) { + int rc, rate, voice; + if (objc != 2) { + Tcl_AppendResult(interp, "Usage: getRate voiceCode ", TCL_STATIC); + return TCL_ERROR; + } + rc = Tcl_GetIntFromObj(interp, objv[1], &voice); + if (rc != TCL_OK) + return rc; + + rate = spd_get_voice_rate(conn); + + Tcl_SetObjResult(interp, Tcl_NewIntObj(rate)); + return TCL_OK; +} + +int SetRate(ClientData handle, Tcl_Interp *interp, int objc, + Tcl_Obj *CONST objv[]) { + static int current_rate = -1; + int rc, rate, voice; + int success = 1; + if (objc != 3) { + Tcl_AppendResult(interp, "Usage: setRate voiceCode speechRate ", + TCL_STATIC); + return TCL_ERROR; + } + rc = Tcl_GetIntFromObj(interp, objv[1], &voice); + if (rc != TCL_OK) + return rc; + rc = Tcl_GetIntFromObj(interp, objv[2], &rate); + if (rc != TCL_OK) + return rc; + + if (rate != current_rate) { + success = (spd_set_voice_rate(conn, rate) != -1); + if (success) + current_rate = rate; + } + return success ? TCL_OK : TCL_ERROR; +} + +//> +//= str.size()) { + end = str.size(); + } + for (string::size_type i = start; i < end; ++i) { + if (c == str[i]) { + return i; + } + } + return string::npos; +} + +static bool closeTags(string &ssml) { + // check that a text (non whitespace) is present + int a_tag_count = 0; + bool a_text_is_present = false; + + for (auto tag = ssml.cbegin(); tag != ssml.cend(); ++tag) { + if (*tag == '<') { + a_tag_count++; + } + if ((a_tag_count == 0) && (*tag != ' ') && (*tag != '\n') && + (*tag != '\r') && (*tag != '\t')) { + a_text_is_present = true; + break; + } + if ((*tag == '>') && a_tag_count) { + a_tag_count--; + } + } + + if (a_text_is_present) { + string::size_type tag_pos = ssml.size(); + if (string::npos == tag_pos) { + fprintf(stderr, "Synthesizer argument of size (size_t)(-1), ignoring " + "last character\n"); + --tag_pos; + } + string::size_type prev_match = tag_pos; + while (string::npos != tag_pos) { + // look for a '<' + tag_pos = ssml.find_last_of('<', tag_pos); + if (string::npos != tag_pos) { + string::size_type end = findInRange(' ', ssml, tag_pos, prev_match); + if ((string::npos == end) && + (string::npos == findInRange('/', ssml, tag_pos, prev_match))) { + end = findInRange('>', ssml, tag_pos, prev_match); + } + if ((string::npos != end) && (tag_pos + 1 < end)) { + ssml.append("'); + } + prev_match = tag_pos; + tag_pos--; // Start search before previous tag to avoid infinite loop + } + } + } + return a_text_is_present; +} + +int Say(ClientData handle, Tcl_Interp *interp, int objc, + Tcl_Obj *CONST objv[]) { + int i; + for (i = 1; i < objc; i++) { + char *a_text = (char *)Tcl_GetStringFromObj(objv[i], NULL); + if (a_text) { + string a_ssml = a_text; + if (closeTags(a_ssml)) { + a_ssml = "" + a_ssml + ""; + if (spd_say(conn, SPD_MESSAGE, a_ssml.c_str()) == -1) { + Tcl_AppendResult( + interp, "Could not synthesize string: ", a_ssml.c_str()); + return TCL_ERROR; + } + } + } + } + return TCL_OK; +} + +//> +// + +int SpeakingP(ClientData handle, Tcl_Interp *interp, int objc, + Tcl_Obj *CONST objv[]) { + Tcl_SetObjResult(interp, Tcl_NewIntObj(0)); + return TCL_OK; +} + +int Pause(ClientData handle, Tcl_Interp *interp, int objc, + Tcl_Obj *CONST objv[]) { + spd_pause(conn); + return TCL_OK; +} + +int Resume(ClientData handle, Tcl_Interp *interp, int objc, + Tcl_Obj *CONST objv[]) { + spd_resume(conn); + return TCL_OK; +} + +//> +// +//< Punct + +int Punct(ClientData handle, Tcl_Interp *interp, int objc, + Tcl_Obj *CONST objv[]) { + char *a_mode = (char *)Tcl_GetStringFromObj(objv[1], NULL); + static const char *current_mode = ""; + if (a_mode && strcmp(a_mode, current_mode)) { + SPDPunctuation a_type = SPD_PUNCT_NONE; + + if (strcmp(a_mode, "none") == 0) { + a_type = SPD_PUNCT_NONE; + current_mode = "none"; + } else if (strcmp(a_mode, "all") == 0) { + a_type = SPD_PUNCT_ALL; + current_mode = "all"; + } else if (strcmp(a_mode, "some") == 0) { + a_type = SPD_PUNCT_SOME; + current_mode = "some"; + } + + spd_set_punctuation(conn, a_type); + } + return TCL_OK; +} + +//> +// +// available_languages; + +static int SetLanguageHelper(Tcl_Interp *interp, size_t aIndex) { + int voice_status = spd_set_language(conn, (char *)available_languages[aIndex].c_str()); + if (voice_status == -1) { + Tcl_AppendResult(interp, "could not set voice"); + return TCL_ERROR; + } + return TCL_OK; +} + +int SetLanguage(ClientData eciHandle, Tcl_Interp *interp, int objc, + Tcl_Obj *CONST objv[]) { + unsigned long aIndex = 0; + + if (getLangIndex(interp, &aIndex)) { + return SetLanguageHelper(interp, aIndex); + } + // TODO: Error reporting for this + return TCL_OK; +} + +//> +// unique_languages; + int i = 0; + unsigned long ui = 0; + char *envDefaultLang = (char *)getenv("LANGUAGE"); + if (envDefaultLang == NULL) { + envDefaultLang = (char *)getenv("LANG"); + if (envDefaultLang == NULL) { + envDefaultLang = (char *)"en"; + } + } + string aDefaultLang = envDefaultLang; + size_t remove = std::min(aDefaultLang.find('.', 0), aDefaultLang.find(':', 0)); + + // Snip off everything following a period. So en-us.utf8 becomes en-us. + if (remove != string::npos) { + aDefaultLang.erase(aDefaultLang.begin() + remove, aDefaultLang.end()); + } + // And replace _ with -, E.G. en_US becomes en-US. + for (string::iterator it = aDefaultLang.begin(); it != aDefaultLang.end(); + it++) { + if (*it == '_') { + *it = '-'; + } + } + + SPDVoice **voices = spd_list_synthesis_voices(conn); + + for (i = 0; voices[i] != 0; i++) { + string voice_lang = voices[i]->language; + unique_languages.insert(voice_lang); + } + available_languages.assign(unique_languages.begin(), unique_languages.end()); + vector::iterator it; + size_t lang_count = available_languages.size(); + size_t english_index = lang_count; + size_t default_index = lang_count; + char buffer[256]; + for (ui = 0; ui < lang_count; ui++) { + const char *aLangCode = available_languages[ui].c_str(); + snprintf(buffer, sizeof(buffer), "%lu", ui); + Tcl_SetVar2(interp, "langalias", aLangCode, buffer, 0); + Tcl_SetVar2(interp, "langcode", buffer, aLangCode, 0); + if (default_index == lang_count) { + if (strcasecmp(aDefaultLang.c_str(), aLangCode) == 0) { + Tcl_SetVar2(interp, "langsynth", "current", buffer, 0); + Tcl_SetVar2(interp, "langcode", "current", (char *)aLangCode, 0); + default_index = ui; + } + } + if (strcmp(aLangCode, "en") == 0) { + english_index = ui; + } + } + if ((default_index == lang_count) && (english_index == lang_count)) { + fprintf(stderr, "Could not find your default language, and English\n"); + fprintf(stderr, "doesn't seem to be available either. Bailing now.\n"); + exit(1); + } + if (default_index == lang_count) { + default_index = english_index; + fprintf(stderr, "Couldn't find your default language, using English.\n"); + snprintf(buffer, sizeof(buffer), "%lu", english_index); + Tcl_SetVar2(interp, "langsynth", "current", buffer, 0); + Tcl_SetVar2(interp, "langcode", "current", "en", 0); + } + + if (TCL_OK != SetLanguageHelper(interp, default_index)) { + return TCL_ERROR; + } + // Presumably we have at least one language, namely English, + // so no chance of underflowing size_t with this subtraction: + snprintf(buffer, sizeof(buffer), "%lu", lang_count - 1); + Tcl_SetVar2(interp, "langsynth", "top", buffer, 0); + return TCL_OK; +} + +static int getLangIndex(Tcl_Interp *interp, unsigned long *theIndex) { + int aStatus = 0; + const char *aInfo = Tcl_GetVar2(interp, "langsynth", "current", 0); + char *end = NULL; + if (aInfo) { + *theIndex = strtoul(aInfo, &end, 10); + + if (end && !*end) { + if ((*theIndex > 0) && (*theIndex < available_languages.size())) { + aStatus = 1; + } + } + } + return aStatus; +} + +//> +// diff --git a/servers/speechd b/servers/speechd new file mode 100755 index 0000000000..1576eff5c0 --- /dev/null +++ b/servers/speechd @@ -0,0 +1,515 @@ +#!/usr/bin/tclsh +# Keywords: Emacspeak, speechd , TCL +#{{{ LCD Entry: + +# LCD Archive Entry: +# emacspeak| T. V. Raman |raman@cs.cornell.edu +# A speech interface to Emacs | +# $Date: 2006-08-11 21:11:17 +0200 (ven, 11 aoĆ» 2006) $ | +# $Revision: 4047 $ | +# Location undetermined +# + +#}}} +#{{{ Copyright: +#Copyright (C) 1995 -- 2017, T. V. Raman +#All Rights Reserved +# +# This file is not part of GNU Emacs, but the same permissions apply. +# +# GNU Emacs is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2, or (at your option) +# any later version. +# +# GNU Emacs is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GNU Emacs; see the file COPYING. If not, write to +# the Free Software Foundation, 51 Franklin Street, Fifth Floor, Boston,MA 02110-1301, USA. + +#}}} +#{{{source common code + +package require Tclx +set wd [file dirname $argv0] +source $wd/tts-lib.tcl + +#}}} + +#{{{ procedures + +# Language switching +# +# langsynth: current and max index into table of synth languages. + +# langsynth(current): current synthesis language, +# Gives the code of the current synth language. +# This variable is set by the application +# For example: langsynth(current)=3 +# means finnish is the current language + +# langsynth(top): max available index. +# For example, if there are three available languages: +# langsynth(top)=2 + +# voicename: name of the current voice for announcements +# This variable is set by tclspeechd + +# langcode: language identifier +# e.g. langcode(0)="fi" +# This variable is set by tclspeechd + +# langalias converts a code language ("en", "en_GB",...) to its index in the language table. +# e.g. langalias(fi)=3 could mean "fi_FI" will be used if "fi" is required. + +set langsynth(current) 0 +set langsynth(top) 0 +set voicename "default" +set langcode(0) "en-us" +set langcode(current) "en-us" +set mswindows [expr { $tcl_platform(platform) == "windows" } ] + +# select the next synth language +proc set_next_lang {say_it} { + global langsynth + global langalias + global voicename + global langcode + + set index 0 + while { $index <= $langsynth(top) } { + if { $index == $langsynth(current) } { + break + } + incr index + } + + if { $index >= $langsynth(top) } { + set index 0 + } else { + incr index + } + + set langsynth(current) $index + set langcode(current) $langcode($index) + + setLanguage $langsynth(current) +puts stderr "Language: $langsynth(current) Voice: $voicename" + if { [info exists say_it]} { + tts_say "$voicename" + } +} + +# select the previous synth language +proc set_previous_lang {say_it} { + global langsynth + global langalias + global voicename + global langcode + + set index 0 + while { $index <= $langsynth(top) } { + if { $index == $langsynth(current) } { + break + } + incr index + } + + if { $index <= 0 } { + set index $langsynth(top) + } else { + incr index -1 + } + + set langsynth(current) $index + set langcode(current) $langcode($index) + setLanguage $langsynth(current) +puts stderr "Language: $langsynth(current) Voice: $voicename" + if { [info exists say_it]} { + tts_say "$voicename " + } +} + +# select a new synth language +# set_lang "en" +proc set_lang {{name "en"} {say_it "nil"}} { + global langsynth + global langalias + global langcode +global voicename + if { ![info exists langalias($name)]} { + return + } + + if { $langalias($name) == $langsynth(current) } { + return + } + + set langsynth(current) $langalias($name) + set langcode(current) $langcode($langalias($name)) + setLanguage $langsynth(current) + + if { $say_it == "t"} { + tts_say "$voicename" + } +} + +# set_preferred_lang "en" "en_GB" +proc set_preferred_lang {alias lang} { + global langsynth + global langalias + + if { ![info exists langalias($lang)]} { + return + } + set langalias($alias) $langalias($lang) +} + +#debug +proc list_lang {} { + global langcode + echo [ array get langcode ] +} + +proc list_langalias {} { + global langalias + echo [ array get langalias ] +} + + +proc version {} { + q " speechd [ttsVersion]" + d +} + +proc tts_set_punctuations {mode} { + global tts + + set tts(punctuations) $mode + punct $mode + service + return "" +} + +proc tts_set_speech_rate {rate} { + global tts + + set factor $tts(char_factor) + set tts(speech_rate) $rate + setRate 0 $rate + service + return "" +} + +proc tts_set_character_scale {factor} { + global tts + + set tts(say_rate) [round \ + [expr $tts(speech_rate) * $factor ]] + set tts(char_factor) $factor + service + return "" +} + +proc tts_say {text} { + global tts + global langcode + + service + set la $langcode(current) + + set prefix "" + regsub -all {\[\*\]} $text { } text + synth " $prefix $text" + service + return "" +} + +proc l {text} { + global tts + global langcode + + set la $langcode(current) + set prefix "" + if {[regexp {[A-Z]} $text]} { + # Use a relative pitch adjustment. +70% seems goodafter some testing. + set prefix "$prefix " + } + set tts(not_stopped) 1 + # TBD: say-as, format attribute: instead of characters/glyphs, define "word" + synth "$prefix $text" + service + return "" +} + +proc d {} { + service + speech_task +} + +proc tts_resume {} { + resume + return "" +} +proc tts_pause {} { + pause + return "" +} + +proc s {} { + global tts + + + if {$tts(not_stopped) == 1} { + + + set tts(not_stopped) 0 + stop + queue_clear + } else { + puts stderr StopNoOp + } +} + + + +proc t {{pitch 440} {duration 50}} { + global tts queue + if {$tts(beep)} { + b $pitch $duration + return "" + } + service +} + +proc sh {{duration 50}} { + global tts queue + + + set silence "" + set queue($tts(q_tail)) [list t $silence] + incr tts(q_tail) + service + return "" +} + +# Caps: this driver currently offers either +# - announcing each capitals (tts_split_caps) +# - or raising pitch (tts_capitalize) +# - or beeping (tts_allcaps_beep) +# +proc tts_split_caps {flag} { + global tts + if { $flag == 1 } { + caps "spelling" + } else { + if {$tts(capitalize) == 0 } { + caps "none" + } + } + service + return "" +} +proc tts_reset {} { + global tts + #synth -reset + + queue_clear + synth "Resetting engine to factory defaults." +} + +proc r {rate} { + global queue tts + + set queue($tts(q_tail)) [list r $rate] + incr tts(q_tail) + return "" +} + +proc useStereoOutput {} { + global tts + + + setOutput buffer +} + +#}}} +#{{{ speech task + +proc trackIndex {index} { + global tts + + set tts(last_index) $index +} + +proc stdin_readable_handler {} { + global stdin_is_readable + global timer_or_stdin_breaks + set stdin_is_readable 1 + set timer_or_stdin_breaks 1 +} +if $mswindows { fileevent stdin readable stdin_readable_handler } + +proc timer_handler {} { + global timer_ticks + global timer_or_stdin_breaks + after 200 timer_handler + set timer_ticks 1 + incr timer_or_stdin_breaks +} +if $mswindows { after 0 timer_handler } + +proc service {} { + global tts + global stdin_is_readable # used only by mswindows + + set talking [speakingP] + set stdin_is_readable 0 + while {$talking == 1} { + if $::mswindows { + # need to workaround lack of "select stdin" on windows + # set status to 1 if input is available at stdin + # if input is not available, timer will cause an exit from vwait + set status -1 + vwait timer_or_stdin_breaks + if { $stdin_is_readable > 0 } { + set status 1 + } + } else { + set status [lsearch [select [list stdin] {} {} 0.02] stdin] + } + + if { $status >= 0} { + set tts(talking?) 0 + set talking 0 + break + } else { + set talking [speakingP] + } + } + return $talking +} + +proc speech_task {} { + global queue tts + global langcode + + set tts(talking?) 1 + set tts(not_stopped) 1 + set length [queue_length] + set la $langcode(current) + + #set prefix "" + set prefix "" + loop index 0 $length { + + set event [queue_remove] + set event_type [lindex $event 0] + switch -exact -- $event_type { + s { + + set text [clean [lindex $event 1]] + synth " $prefix $text" + set retval [service] + set prefix "" + } + c { + set text [lindex $event 1] +set prefix "$text" + synth "$text" + set retval "" + } + a { + + set sound [lindex $event 1] + exec $tts(play) $sound >/dev/null & + } + b { + + if {$tts(beep)} { + lvarpop event + eval beep $event + } + } + r { + +# The first argument to setRate is ignored. + setRate 0 [lindex $event 1] + } + } + if {$tts(talking?) == 0} {break;} + } + + + set tts(talking?) 0 + service + return "" +} + +#}}} +#{{{clean + +#preprocess element before sending it out: +proc clean {element} { + global queue tts + + +# The text conversion is expected to be done by speech-dispatcher. +# For example, the * symbol will be said according to the selected language. +# + return $element +} + +#}}} +#{{{ Initialize and set state. + +#do not die if you see a control-c +signal ignore {sigint} + +# Set input encoding to utf-8 +fconfigure stdin -encoding utf-8 + +#initialize speech-dispatcher +tts_initialize +beep_initialize +set tts(input) stdin +if {[info exists server_p]} { + set tts(input) sock0 +} +set servers [file normalize [file dirname $argv0]] +set tclTTS $servers/native-speechd +load $tclTTS/tclspeechd[info sharedlibextension] +if {[info exists ::env(PULSE_SERVER)] && [file exists /usr/bin/paplay]} { +# WSLg and other systems with remote sound + set tts(play) /usr/bin/paplay +} elseif {[file exists /proc/asound]} { + set tts(play) /usr/bin/aplay +} +synth "speechd [ttsVersion]" +service + +#Start the main command loop: + +if $mswindows { + # there is a bug on windows, interactive -on does not work + # https://sourceforge.net/p/tclx/bugs/81/ + commandloop -interactive off +} else { + commandloop +} + +#}}} +#{{{ Emacs local variables + +### Local variables: +### mode: tcl +### voice-lock-mode: t +### folded-file: t +### End: + +#}}}