In [1]:
import os
from dotenv import load_dotenv
from openai import AsyncAzureOpenAI

load_dotenv()

True

In [2]:
API_KEY = os.getenv("API_KEY")
API_BASE = os.getenv("API_BASE")

In [3]:
#openai = AsyncOpenAI(api_key=os.getenv("OPENAI_API_KEY"))
azure = AsyncAzureOpenAI(
    azure_endpoint=API_BASE,
    api_key=API_KEY,
    api_version="2025-03-01-preview",
)

In [4]:
async def generate_audio_file(input, output_path, voice_name="coral", instructions=None):
    """Generate audio file from OpenAI TTS and save to the given path"""
    async with azure.audio.speech.with_streaming_response.create(
        model="gpt-4o-mini-tts",
        voice=voice_name,
        input=input,
        instructions=instructions,
        response_format="mp3",
    ) as response:
        await response.stream_to_file(output_path)

async def generate_srt_file(input_file_path, output_path, language="en"):
    with open(input_file_path, "rb") as audio_file:
        # Create a transcription using OpenAI API
        try:
            transcription = await azure.audio.transcriptions.create(
                file=audio_file, # a flac, mp3, mp4, mpeg, mpga, m4a, ogg, wav, webm
                language=language,   # ISO code
                model="whisper",
                response_format="srt",  # also text, srt, verbose_json, or vtt
                )
            # Save the transcription to the specified output path
            with open(output_path, "w", encoding="utf-8") as srt_file:
                srt_file.write(transcription)
        except Exception as e:
            print(f"An API error occurred: {e}")

In [None]:
# max 2k tokens for input

input = """Thank you for reaching out, and I'm truly sorry about the unexpected charge on your bill. I completely understand how frustrating this must be, especially after your stay.\n\nAfter reviewing your reservation, I can confirm that this was an error on our part. I'll be issuing a full refund right away, and you should see the amount credited to your payment method within a few business days.\n\nI appreciate your understanding and patience, and I'm here if you need any further assistance. Thank you for allowing us to resolve this for you."""
voice = "coral"
instructions = """Voice Affect: Calm, composed, and reassuring. Competent and in control, instilling trust.\n\nTone: Sincere, empathetic, with genuine concern for the customer and understanding of the situation.\n\nPacing: Slower during the apology to allow for clarity and processing. Faster when offering solutions to signal action and resolution.\n\nEmotions: Calm reassurance, empathy, and gratitude.\n\nPronunciation: Clear, precise: Ensures clarity, especially with key details. Focus on key words like \"refund\" and \"patience.\" \n\nPauses: Before and after the apology to give space for processing the apology."""

mp3_output_path = "output.mp3"

await generate_audio_file(
    input=input,
    output_path=mp3_output_path,
    voice_name=voice,
    instructions=instructions
)

await generate_srt_file(
    input_file_path=mp3_output_path,
    output_path="output.srt",
    language="en"
)
