# 3. Generate Audio (HD Voice)

https://learn.microsoft.com/en-us/azure/ai-services/speech-service/high-definition-voices

In [1]:
from dotenv import load_dotenv
load_dotenv()

True

In [2]:
import json

with open("KFE_paper_sample_conversation.json","r") as f:
    jsfile = f.read()

conversation = json.loads(jsfile)
conversation

[{'speaker': 'Host (Jane)', 'text': '안녕하세요, 여러분. 오늘은 핵융합 에너지를 이야기해볼게요.'},
 {'speaker': 'Guest', 'text': '안녕하세요, 김범석입니다. 초대해 주셔서 감사합니다.'},
 {'speaker': 'Host (Jane)', 'text': '김 박사님, K-DEMO 프로젝트에 대해 좀 더 알려주세요.'},
 {'speaker': 'Guest', 'text': '네, K-DEMO는 한국의 핵융합 데모 프로젝트입니다.'},
 {'speaker': 'Host (Jane)', 'text': '핵융합에서 방사능 관리가 왜 그렇게 중요할까요?'},
 {'speaker': 'Guest', 'text': '방사능은 안전과 폐기물 관리에 큰 영향을 미치죠.'},
 {'speaker': 'Host (Jane)', 'text': '그렇군요. 방사능 붕괴열이란 무엇인가요?'},
 {'speaker': 'Guest', 'text': '붕괴열은 반응 후에도 남아있는 열이에요. 뜨거운 스토브 같죠.'},
 {'speaker': 'Host (Jane)', 'text': '그 열을 어떻게 식히나요?'},
 {'speaker': 'Guest', 'text': '냉각 시스템으로 열을 식혀야 해요. 시간이 중요하죠.'},
 {'speaker': 'Host (Jane)', 'text': '냉각 시스템 설계에서 중요한 점은 무엇인가요?'},
 {'speaker': 'Guest', 'text': '열을 효과적으로 분산시키는 게 핵심입니다.'},
 {'speaker': 'Host (Jane)', 'text': 'K-DEMO의 냉각 시스템은 어떻게 설계되었나요?'},
 {'speaker': 'Guest', 'text': '물 냉각 세라믹 브리더 블랭킷을 사용합니다.'},
 {'speaker': 'Host (Jane)', 'text': '방사성 폐기물 관리는 어떻게 하나요?'},
 {'speaker': 'Guest', 'text': '

## HD Options

Default [temparature parameter](https://learn.microsoft.com/en-us/azure/ai-services/speech-service/high-definition-voices#how-to-use-azure-ai-speech-hd-voices) is 1.0

```
<voice name='en-us-Andrew2:DragonHDLatestNeural' parameters='temperature=0.1'>저도 감사합니다, 제인님. 다음에 또 뵙길 바랍니다.</voice>
```

In [3]:
def generate_ssml(host_voice, guest_voice):
    
    ssml = "<speak version='1.0' xmlns='http://www.w3.org/2001/10/synthesis' xmlns:mstts='https://www.w3.org/2001/mstts' xml:lang='en-US'>"
    for r in conversation:
        #print(row.to_dict())
        
        if r['speaker'] == 'Host (Jane)':
            ssml += f"\n<voice name='{host_voice}'>{r['text']}</voice>"
        else:
            ssml += f"\n<voice name='{guest_voice}'>{r['text']}</voice>"
    ssml += "\n</speak>"

    #print(ssml)

    return ssml

In [4]:
import azure.cognitiveservices.speech as speechsdk
import os
speech_key = os.getenv('SPEECH_KEY')
service_region = os.getenv('SPEECH_REGION')
speech_config = speechsdk.SpeechConfig(subscription=speech_key, region=service_region)

In [5]:
import random
import string

def generate_random_filename(length=8):
    letters = string.ascii_lowercase
    return ''.join(random.choice(letters) for i in range(length))

def generate_podcast(ssml, filename=None):
    if filename is None:
        temporary_file= "./" + generate_random_filename() + ".wav"
    else:
        temporary_file = filename
        
    audio_output = speechsdk.audio.AudioOutputConfig(filename=temporary_file)

    # Creates a speech synthesizer using the Azure Speech Service.
    speech_synthesizer = speechsdk.SpeechSynthesizer(speech_config=speech_config, audio_config=audio_output)
    result = speech_synthesizer.speak_ssml_async(ssml).get()
    if result.reason == speechsdk.ResultReason.SynthesizingAudioCompleted:
        print("Speech synthesis was successful. Audio was written to '{}'".format(temporary_file))
    elif result.reason == speechsdk.ResultReason.Canceled:
        cancellation_details = result.cancellation_details
        print("Speech synthesis canceled: {}".format(cancellation_details.reason))
        if cancellation_details.reason == speechsdk.CancellationReason.Error:
            if cancellation_details.error_details:
                print("Error details: {}".format(cancellation_details.error_details))
        print("Did you update the subscription info?")

In [6]:
# HD voice
host_voice = 'en-us-Ava:DragonHDLatestNeural'
guest_voice = 'en-us-Andrew2:DragonHDLatestNeural'


In [7]:
temp = 0.9
ssml = generate_ssml(host_voice, guest_voice)
generate_podcast(ssml, "KFE_paper_sample_audio_hd.wav")

Speech synthesis was successful. Audio was written to 'KFE_paper_sample_audio_hd.wav'


In [None]:
# Non-HD voice
#host_voice = "ko-KR-SunHiNeural"
#guest_voice = "ko-KR-InJoonNeural"