Skip to content

Commit

Permalink
Return audio response as mp3
Browse files Browse the repository at this point in the history
  • Loading branch information
tronikos committed Jan 6, 2023
1 parent fca6adb commit dc27db4
Show file tree
Hide file tree
Showing 5 changed files with 32 additions and 24 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ pytest

# Run command line interactive tool
python -m pip install click
python demo.py
python demo.py --display --audio_out

# Build package
python -m pip install build
Expand Down
12 changes: 5 additions & 7 deletions browser_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,18 +16,16 @@
import tempfile
import webbrowser

ASSISTANT_HTML_FILE = 'google-assistant-sdk-screen-out.html'


class SystemBrowser(object):
def __init__(self):
self.tempdir = tempfile.mkdtemp()
self.filename = os.path.join(self.tempdir, ASSISTANT_HTML_FILE)

def display(self, html):
with open(self.filename, 'wb') as f:
f.write(html)
webbrowser.open(self.filename, new=0)
def display(self, contents, filename):
full_filename = os.path.join(self.tempdir, filename)
with open(full_filename, 'wb') as f:
f.write(contents)
webbrowser.open(full_filename, new=0)


system_browser = SystemBrowser()
16 changes: 10 additions & 6 deletions demo.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,14 +47,17 @@
help='Language code of the Assistant')
@click.option('--display', is_flag=True, default=False,
help='Enable visual display of Assistant responses in HTML.')
@click.option('--audio_out', is_flag=True, default=False,
help='Enable audio response.')
@click.option('--verbose', '-v', is_flag=True, default=False,
help='Verbose logging.')
@click.option('--grpc-deadline', default=DEFAULT_GRPC_DEADLINE,
metavar='<grpc deadline>', show_default=True,
help='gRPC deadline in seconds')
def main(api_endpoint, credentials,
device_model_id, device_id, lang, display, verbose,
device_model_id, device_id, lang, display, audio_out, verbose,
grpc_deadline, *args, **kwargs):
system_browser = browser_helpers.system_browser
# Setup logging.
logging.basicConfig(level=logging.DEBUG if verbose else logging.INFO)

Expand All @@ -71,17 +74,18 @@ def main(api_endpoint, credentials,
'new OAuth 2.0 credentials.')
return

with TextAssistant(credentials, lang, device_model_id, device_id, display,
with TextAssistant(credentials, lang, device_model_id, device_id, display, audio_out,
grpc_deadline, api_endpoint) as assistant:
while True:
query = click.prompt('')
click.echo('<you> %s' % query)
response_text, response_html = assistant.assist(text_query=query)
if display and response_html:
system_browser = browser_helpers.system_browser
system_browser.display(response_html)
response_text, response_html, audio_response = assistant.assist(text_query=query)
if response_text:
click.echo('<@assistant> %s' % response_text)
if response_html:
system_browser.display(response_html, 'google-assistant-sdk-screen-out.html')
if audio_response:
system_browser.display(audio_response, 'google-assistant-sdk-audio-out.mp3')


if __name__ == '__main__':
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[project]
name = "gassist-text"
version = "0.0.7"
version = "0.0.8"
authors = [
{ name="tronikos", email="tronikos@gmail.com" },
]
Expand Down
24 changes: 15 additions & 9 deletions src/gassist_text/textinput.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
# - Added default values
# - Moved creation of the authorized gRPC channel in the constructor
# - Parse HTML response
# - Return audio response as mp3
# - Extracted command line tool to demo.py

import google.auth.transport.grpc
Expand Down Expand Up @@ -55,19 +56,22 @@ class TextAssistant(object):
device_model_id: identifier of the device model.
device_id: identifier of the registered device instance.
display: enable visual display of assistant response.
audio_out: enable audio response.
deadline_sec: gRPC deadline in seconds for Google Assistant API call.
api_endpoint: Address of Google Assistant API service.
"""

def __init__(self, credentials, language_code='en-US', device_model_id='default', device_id='default',
display=False, deadline_sec=DEFAULT_GRPC_DEADLINE, api_endpoint=ASSISTANT_API_ENDPOINT):
display=False, audio_out=False,
deadline_sec=DEFAULT_GRPC_DEADLINE, api_endpoint=ASSISTANT_API_ENDPOINT):
self.language_code = language_code
self.device_model_id = device_model_id
self.device_id = device_id
self.conversation_state = None
# Force reset of first conversation.
self.is_new_conversation = True
self.display = display
self.audio_out = audio_out
# Create an authorized gRPC channel.
channel = google.auth.transport.grpc.secure_authorized_channel(
credentials, google.auth.transport.requests.Request(), api_endpoint)
Expand All @@ -84,14 +88,13 @@ def __exit__(self, etype, e, traceback):
return False

def assist(self, text_query):
"""Send a text request to the Assistant and playback the response.
"""
"""Send a text request to the Assistant and return the response as a tuple of: [text, html, audio]."""
def iter_assist_requests():
config = embedded_assistant_pb2.AssistConfig(
audio_out_config=embedded_assistant_pb2.AudioOutConfig(
encoding='LINEAR16',
sample_rate_hertz=16000,
volume_percentage=0,
encoding='MP3',
sample_rate_hertz=24000,
volume_percentage=100,
),
dialog_state_in=embedded_assistant_pb2.DialogStateIn(
language_code=self.language_code,
Expand All @@ -113,18 +116,21 @@ def iter_assist_requests():

text_response = None
html_response = None
audio_response = b''
for resp in self.assistant.Assist(iter_assist_requests(),
self.deadline):
assistant_helpers.log_assist_response_without_audio(resp)
if resp.screen_out.data:
if self.display:
html_response = resp.screen_out.data
soup = BeautifulSoup(resp.screen_out.data, "html.parser")
divs = soup.find_all("div", class_="show_text_content")
text_response = '\n'.join(map(lambda div : div.text, divs))
divs = soup.find_all("div", id="assistant-card-content")
text_response = '\n'.join(map(lambda div : div.text, divs)).strip()
if resp.dialog_state_out.conversation_state:
conversation_state = resp.dialog_state_out.conversation_state
self.conversation_state = conversation_state
if resp.dialog_state_out.supplemental_display_text:
text_response = resp.dialog_state_out.supplemental_display_text
return text_response, html_response
if self.audio_out and resp.audio_out.audio_data:
audio_response += resp.audio_out.audio_data
return text_response, html_response, audio_response

0 comments on commit dc27db4

Please sign in to comment.