### Polly

In [1]:
import boto3

In [2]:
polly = boto3.client('polly')

In [3]:
text = "Hello, my name is Polly. I am a text to speech service provided by AWS. I can speak in 29 different languages. I can also speak in 60 different voices. I am very happy to meet you."

In [4]:
response = polly.synthesize_speech(
    Text=text,
    Engine='neural',
    OutputFormat='mp3',
    VoiceId='Matthew')

In [5]:
with open('polly.mp3', 'wb') as f:
    f.write(response['AudioStream'].read())

### Transcribe

In [6]:
s3 = boto3.client('s3')
s3.create_bucket(Bucket='polly-transcribe-translate-982347')

{'ResponseMetadata': {'RequestId': 'XE83NW64GQ90B9M7',
  'HostId': 'vAxxRVrnwQSuOEKgvRt35yZ+pX25w9wNGVMJfewFGobyQoEVOLgh1RKv1NWuNy3xRkY/PFPwVhE=',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amz-id-2': 'vAxxRVrnwQSuOEKgvRt35yZ+pX25w9wNGVMJfewFGobyQoEVOLgh1RKv1NWuNy3xRkY/PFPwVhE=',
   'x-amz-request-id': 'XE83NW64GQ90B9M7',
   'date': 'Thu, 28 Sep 2023 20:31:31 GMT',
   'location': '/polly-transcribe-translate-982347',
   'server': 'AmazonS3',
   'content-length': '0'},
  'RetryAttempts': 0},
 'Location': '/polly-transcribe-translate-982347'}

In [7]:
s3.upload_file('polly.mp3', 'polly-transcribe-translate-982347', 'polly.mp3')

In [8]:
transcribe = boto3.client('transcribe')


In [9]:
job_name = 'TranscriptionJob'

transcribe.start_transcription_job(
    TranscriptionJobName=job_name,
    LanguageCode='en-US',
    MediaFormat='mp3',
    Media={
        'MediaFileUri': 's3://polly-transcribe-translate-982347/polly.mp3'
    }
)

{'TranscriptionJob': {'TranscriptionJobName': 'TranscriptionJob',
  'TranscriptionJobStatus': 'IN_PROGRESS',
  'LanguageCode': 'en-US',
  'MediaFormat': 'mp3',
  'Media': {'MediaFileUri': 's3://polly-transcribe-translate-982347/polly.mp3'},
  'StartTime': datetime.datetime(2023, 9, 28, 16, 36, 19, 772000, tzinfo=tzlocal()),
  'CreationTime': datetime.datetime(2023, 9, 28, 16, 36, 19, 734000, tzinfo=tzlocal())},
 'ResponseMetadata': {'RequestId': 'f6222381-302e-44a8-8d2e-1b24c6e616b0',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amzn-requestid': 'f6222381-302e-44a8-8d2e-1b24c6e616b0',
   'content-type': 'application/x-amz-json-1.1',
   'content-length': '283',
   'date': 'Thu, 28 Sep 2023 20:36:19 GMT'},
  'RetryAttempts': 0}}

In [10]:
import time

while True:
    status = transcribe.get_transcription_job(TranscriptionJobName=job_name)
    if status['TranscriptionJob']['TranscriptionJobStatus'] in ['COMPLETED', 'FAILED']:
        break
    print('Still transcribing...')
    time.sleep(5)

print(status)

{'TranscriptionJob': {'TranscriptionJobName': 'TranscriptionJob', 'TranscriptionJobStatus': 'COMPLETED', 'LanguageCode': 'en-US', 'MediaSampleRateHertz': 24000, 'MediaFormat': 'mp3', 'Media': {'MediaFileUri': 's3://polly-transcribe-translate-982347/polly.mp3'}, 'Transcript': {'TranscriptFileUri': 'https://s3.us-east-1.amazonaws.com/aws-transcribe-us-east-1-prod/975022442119/TranscriptionJob/67bef544-0fb2-43ed-a4c7-5878f44e2401/asrOutput.json?X-Amz-Security-Token=IQoJb3JpZ2luX2VjELX%2F%2F%2F%2F%2F%2F%2F%2F%2F%2FwEaCXVzLWVhc3QtMSJIMEYCIQD9mOUN8O4wV%2BUB9UJYYb7UdIxf9K52SBo3dCKnzxtjyQIhAKqyE4lW2KjzVnK6whgPeoR0WGRlFRANnQ2x0Afyb88vKrsFCK3%2F%2F%2F%2F%2F%2F%2F%2F%2F%2FwEQBBoMMjc2NjU2NDMzMTUzIgx%2BXVSA87JlOn2RGJQqjwXlJe3LIdlOhkIi%2FghvA9Xfc2jnmpb433JBaX3%2FLQpKCfDv4%2BO9ux79vLLViOgX6%2FDj%2Fxg%2BHlJEnxfDcbJl0nMu00%2BBW0tIjZujEHoTLtNuSkwYRMy0WwgLAqheDI4IVX7H9njdsr%2FTEizIlbP3slyhibJqaY7sU5QI5vIq72fWPCXBnsHNdSmJ5LzZyVmKULwcaYtZTH%2BaJuzZtcrj0Ub%2F1LgCicBhJtrxXoKA0ywwqghJN98ChILb%2FGcWJyXUAU1V0qP

In [12]:
import requests

response = requests.get(status['TranscriptionJob']['Transcript']['TranscriptFileUri'])

In [13]:
with open('transcript.json', 'wb') as f:
    f.write(response.content)

In [16]:
import json
with open('transcript.json', 'r') as f:
    transcript = json.load(f)

transcript['results']['transcripts'][0]['transcript']

'Hello, my name is Polly. I am a text to speech service provided by Aws. I can speak in 29 different languages. I can also speak in 60 different voices. I am very happy to meet you.'

In [17]:
transcribe.delete_transcription_job(TranscriptionJobName=job_name)

{'ResponseMetadata': {'RequestId': '02e6a6f4-48a1-41b6-90df-c00ef90625e0',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amzn-requestid': '02e6a6f4-48a1-41b6-90df-c00ef90625e0',
   'content-type': 'application/x-amz-json-1.1',
   'content-length': '0',
   'date': 'Thu, 28 Sep 2023 20:42:17 GMT'},
  'RetryAttempts': 0}}

### Translate

In [19]:
translate = boto3.client('translate')

In [20]:
response = translate.translate_text(
    Text=transcript['results']['transcripts'][0]['transcript'],
    SourceLanguageCode='en',
    TargetLanguageCode='es')

In [21]:
response['TranslatedText']

'Hola, me llamo Polly. Soy un servicio de conversión de texto a voz proporcionado por Aws. Puedo hablar en 29 idiomas diferentes. También puedo hablar en 60 voces diferentes. Estoy muy contenta de conocerte.'