|
| 1 | +"""Speech synthesis (resp. TTS) via Amazon Polly |
| 2 | +
|
| 3 | +Blablu black magic applied here! |
| 4 | +
|
| 5 | +""" |
| 6 | +from boto3 import Session |
| 7 | +from botocore.exceptions import BotoCoreError, ClientError |
| 8 | +from contextlib import closing |
| 9 | +import os |
| 10 | +import sys |
| 11 | +from contextlib import suppress |
| 12 | +from threading import Thread |
| 13 | +from queue import LifoQueue, Empty |
| 14 | +from time import sleep |
| 15 | +from tempfile import gettempdir |
| 16 | + |
| 17 | +from stuff import vlc |
| 18 | + |
| 19 | +class SpeechSynthesizer: |
| 20 | + def __init__(self): |
| 21 | + # Queue holding the last speech utterance |
| 22 | + self._speak_queue = LifoQueue(1) |
| 23 | + |
| 24 | + self._session = Session(profile_name="mylespolly") |
| 25 | + self._polly = self._session.client("polly", region_name="eu-west-1") |
| 26 | + |
| 27 | + self._thread = Thread(target=self.run, args=()) |
| 28 | + self._thread.daemon = True |
| 29 | + self._thread.start() |
| 30 | + |
| 31 | + def request(self, text): |
| 32 | + """Clear queue (ignore it being empty) and add text, both non-blocking""" |
| 33 | + with suppress(Empty): |
| 34 | + self._speak_queue.get_nowait() |
| 35 | + self._speak_queue.put_nowait(text) |
| 36 | + |
| 37 | + def run(self): |
| 38 | + """Continuously process the queue and trigger speech outputs""" |
| 39 | + while True: |
| 40 | + text = self._speak_queue.get(True, None) |
| 41 | + |
| 42 | + print(text) |
| 43 | + |
| 44 | + try: |
| 45 | + response = self._polly.synthesize_speech(Text=text, OutputFormat="mp3", VoiceId="Salli") |
| 46 | + except (BotoCoreError, ClientError) as error: |
| 47 | + print(error) |
| 48 | + sys.exit(-1) |
| 49 | + |
| 50 | + # Access the audio stream from the response |
| 51 | + if "AudioStream" in response: |
| 52 | + # Note: Closing the stream is important as the service throttles on the |
| 53 | + # number of parallel connections. Here we are using contextlib.closing to |
| 54 | + # ensure the close method of the stream object will be called automatically |
| 55 | + # at the end of the with statement's scope. |
| 56 | + with closing(response["AudioStream"]) as stream: |
| 57 | + output = os.path.join(gettempdir(), "speech.mp3") |
| 58 | + print(output) |
| 59 | + try: |
| 60 | + # Open a file for writing the output as a binary stream |
| 61 | + with open(output, "wb") as file: |
| 62 | + file.write(stream.read()) |
| 63 | + except IOError as error: |
| 64 | + # Could not write to file, exit gracefully |
| 65 | + print(error) |
| 66 | + sys.exit(-1) |
| 67 | + else: |
| 68 | + # The response didn't contain audio data, exit gracefully |
| 69 | + print("Could not stream audio") |
| 70 | + sys.exit(-1) |
| 71 | + |
| 72 | + # Play the audio using VLC |
| 73 | + # see https://wiki.videolan.org/Python_bindings |
| 74 | + # see https://www.olivieraubert.net/vlc/python-ctypes/doc/index.html |
| 75 | + p = vlc.MediaPlayer(output) |
| 76 | + sleep(0.1) |
| 77 | + p.play() |
| 78 | + sleep(0.1) |
| 79 | + while p.is_playing(): |
| 80 | + pass |
| 81 | +# os.remove(output) |
| 82 | + |
| 83 | + |
| 84 | +## alternative: |
| 85 | +#from pygame import mixer |
| 86 | +#mixer.init(frequency=22050, size=-16, channels=2, buffer=4096) |
| 87 | +#mixer.music.load(output) |
| 88 | +#mixer.music.play() |
| 89 | +#while mixer.music.get_busy(): |
| 90 | +# pass |
| 91 | +#mixer.quit() |
0 commit comments