-
Notifications
You must be signed in to change notification settings - Fork 0
/
synthesis.py
159 lines (136 loc) · 5.62 KB
/
synthesis.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
#!/usr/bin/env python
# coding: utf-8
# Copyright (c) Microsoft. All rights reserved.
# Licensed under the MIT license. See LICENSE.md file in the project root for full license information.
# Source: https://github.com/Azure-Samples/cognitive-services-speech-sdk/tree/master/samples/batch-synthesis
import argparse
import json
import logging
import os
import sys
import time
# from pathlib import Path
from dotenv import load_dotenv
import requests
logging.basicConfig(stream=sys.stdout, level=logging.DEBUG,
format="[%(asctime)s] %(message)s", datefmt="%m/%d/%Y %I:%M:%S %p %Z")
logger = logging.getLogger(__name__)
# Your Speech resource key and region
# This example requires environment variables named "SPEECH_KEY" and "SPEECH_REGION"
load_dotenv('./variables.env')
SUBSCRIPTION_KEY = os.environ.get('SPEECH_KEY')
SERVICE_REGION = os.environ.get('SPEECH_LOCATION')
NAME = "Simple synthesis"
DESCRIPTION = "Simple synthesis description"
# The service host suffix.
# For azure.cn the host suffix is "customvoice.api.speech.azure.cn"
SERVICE_HOST = "customvoice.api.speech.microsoft.com"
def submit_synthesis(voice=None):
url = f'https://{SERVICE_REGION}.{SERVICE_HOST}/api/texttospeech/3.1-preview1/batchsynthesis'
header = {
'Ocp-Apim-Subscription-Key': SUBSCRIPTION_KEY,
'Content-Type': 'application/json'
}
# with open(Path(__file__).absolute().parent.parent / 'Gatsby-chapter1.txt', 'r') as f:
# text = f.read()
with open('./inputs/sample.txt', 'r') as f:
text = f.read()
payload = {
'displayName': NAME,
'description': DESCRIPTION,
"textType": "PlainText",
'synthesisConfig': {
"voice": voice,
},
# Replace with your custom voice name and deployment ID if you want to use custom voice.
# Multiple voices are supported, the mixture of custom voices and platform voices is allowed.
# Invalid voice name or deployment ID will be rejected.
'customVoices': {
# "YOUR_CUSTOM_VOICE_NAME": "YOUR_CUSTOM_VOICE_ID"
},
"inputs": [
{
"text": text
},
],
"properties": {
"outputFormat": "audio-24khz-160kbitrate-mono-mp3",
# "destinationContainerUrl": "<blob container url with SAS token>"
},
}
response = requests.post(url, json.dumps(payload), headers=header)
if response.status_code < 400:
logger.info('Batch synthesis job submitted successfully')
logger.info(f'Job ID: {response.json()["id"]}')
return response.json()["id"]
else:
logger.error(f'Failed to submit batch synthesis job: {response.text}')
def get_synthesis(job_id):
url = f'https://{SERVICE_REGION}.{SERVICE_HOST}/api/texttospeech/3.1-preview1/batchsynthesis/{job_id}'
header = {
'Ocp-Apim-Subscription-Key': SUBSCRIPTION_KEY
}
response = requests.get(url, headers=header)
if response.status_code < 400:
logger.info('Get batch synthesis job successfully')
logger.info(response.json())
return response.json()['status']
else:
logger.error(f'Failed to get batch synthesis job: {response.text}')
def download_results(job_id):
url = f'https://{SERVICE_REGION}.{SERVICE_HOST}/api/texttospeech/3.1-preview1/batchsynthesis/{job_id}'
header = {
'Ocp-Apim-Subscription-Key': SUBSCRIPTION_KEY
}
response = requests.get(url, headers=header)
if response.status_code < 400:
download_link = None
download_link = response.json()['outputs']['result']
if download_link:
# Create an outputs directory
if not os.path.exists('./outputs'):
os.makedirs('./outputs')
# Download response
response = requests.get(download_link, stream=True)
with open('./outputs/results.zip', 'wb') as fd:
for chunk in response.iter_content(chunk_size=128):
fd.write(chunk)
logger.info('Downloaded results successfully.')
else:
logger.error(f'Failed to download results: {response.text}')
def list_synthesis_jobs(skip: int = 0, top: int = 100):
"""List all batch synthesis jobs in the subscription"""
url = f'https://{SERVICE_REGION}.{SERVICE_HOST}/api/texttospeech/3.1-preview1/batchsynthesis?skip={skip}&top={top}'
header = {
'Ocp-Apim-Subscription-Key': SUBSCRIPTION_KEY
}
response = requests.get(url, headers=header)
if response.status_code < 400:
logger.info(f'List batch synthesis jobs successfully, got {len(response.json()["values"])} jobs')
logger.info(response.json())
else:
logger.error(f'Failed to list batch synthesis jobs: {response.text}')
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Long audio tool to submit voice synthesis requests.')
parser.add_argument('--voice', default=False, required=True, help='Specified voice.')
args = parser.parse_args()
# Submit job
try:
job_id = submit_synthesis(voice=args.voice)
except Exception as err:
print(err)
# Check on job
if job_id is not None:
while True:
status = get_synthesis(job_id)
if status == 'Succeeded':
logger.info('batch synthesis job succeeded')
# Download results
download_results(job_id)
break
elif status == 'Failed':
logger.error('batch synthesis job failed')
break
else:
logger.info(f'batch synthesis job is still running, status [{status}]')
time.sleep(5)