In [None]:
# OpenAI API test (non-streaming)
from openai import OpenAI
from IPython.display import Audio, display

# Initialize the client
client = OpenAI(api_key="dummy-key", base_url="http://localhost:8000/v1")

# Generate audio
response = client.audio.speech.create(
    model="tts-1",
    voice="echo",
    input="""
    以下是一些中英文对照的话语。 
    1. 早上好！希望你有美好的一天。Good morning! Wish you a wonderful day. 
    2. 你好呀，最近怎么样？Hello there, how have you been recently? 
    3. 别放弃，你能做到的！Don't give up, you can do it! 
    4. 继续努力，你的付出会有回报的。Keep up the good work, your efforts will pay off.
    """,
    response_format="wav",
)

# Get audio binary data
audio_data = response.content  # response.content is of type bytes

# Display and play in the Notebook
display(Audio(audio_data, autoplay=False))

In [None]:
# Test using the requests module, streaming mode
import requests
from IPython.display import Audio, display
import io

payload = {
    "model": "tts-1",
    "input": """
    以下是一些中英文对照的话语。 
    1. 早上好！希望你有美好的一天。Good morning! Wish you a wonderful day. 
    2. 你好呀，最近怎么样？Hello there, how have you been recently? 
    3. 别放弃，你能做到的！Don't give up, you can do it! 
    4. 继续努力，你的付出会有回报的。Keep up the good work, your efforts will pay off.
    """,
    "voice": "echo",
    "response_format": "wav",
    "stream": True,
}

try:
    response = requests.post(
        "http://localhost:8000/v1/audio/speech", json=payload, stream=True
    )
    response.raise_for_status()  # Check the status code

    audio_buffer = io.BytesIO()
    for chunk in response.iter_content(chunk_size=8192):
        if chunk:
            audio_buffer.write(chunk)

    audio_buffer.seek(0)
    display(Audio(audio_buffer.getvalue(), autoplay=False))
    print("Audio has been loaded into the Notebook and can be played manually")
except requests.exceptions.RequestException as e:
    print(f"Request failed: {str(e)}")
    if hasattr(e.response, "text"):
        print(f"Error details: {e.response.text}")

In [None]:
import subprocess

# Use pipeline to implement streaming playback, WAV format
cmd = (
    'curl -X POST "http://localhost:8000/v1/audio/speech" '
    '-H "Content-Type: application/json" '
    '-d \'{"model": "tts-1", "input": "以下是一些中英文对照的话语。 1. 早上好！希望你有美好的一天。Good morning! Wish you a wonderful day. 2. 你好呀，最近怎么样？Hello there, how have you been recently? 3. 别放弃，你能做到的！Dont give up, you can do it! 4. 继续努力，你的付出会有回报的。Keep up the good work, your efforts will pay off.", "voice": "echo", "response_format": "wav", "stream": true}\' '
    "-s | mpv --no-video -"
)
subprocess.run(cmd, shell=True, check=True)

In [None]:
import subprocess

# Use pipeline to implement streaming playback, MP3 format
cmd = (
    'curl -X POST "http://localhost:8000/v1/audio/speech" '
    '-H "Content-Type: application/json" '
    '-d \'{"model": "tts-1", "input": "以下是一些中英文对照的话语。 1. 早上好！希望你有美好的一天。Good morning! Wish you a wonderful day. 2. 你好呀，最近怎么样？Hello there, how have you been recently? 3. 别放弃，你能做到的！Dont give up, you can do it! 4. 继续努力，你的付出会有回报的。Keep up the good work, your efforts will pay off.", "voice": "echo", "response_format": "mp3", "stream": true}\' '
    "-s | mpv --no-video -"
)
subprocess.run(cmd, shell=True, check=True)

In [1]:
import subprocess

# Use pipeline to implement streaming playback, OGG format
cmd = (
    'curl -X POST "http://localhost:8000/v1/audio/speech" '
    '-H "Content-Type: application/json" '
    '-d \'{"model": "tts-1", "input": "以下是一些中英文对照的话语。 1. 早上好！希望你有美好的一天。Good morning! Wish you a wonderful day. 2. 你好呀，最近怎么样？Hello there, how have you been recently? 3. 别放弃，你能做到的！Dont give up, you can do it! 4. 继续努力，你的付出会有回报的。Keep up the good work, your efforts will pay off.", "voice": "echo", "response_format": "ogg", "stream": true}\' '
    "-s | mpv --no-video -"
)
subprocess.run(cmd, shell=True, check=True)

[file] Reading from stdin...
● Audio  --aid=1  (vorbis 2ch 48000 Hz 112 kbps)
[lavf] Linearizing discontinuity: 0.000000 -> 0.197333
[lavf] Linearizing discontinuity: 0.197333 -> 0.385333
[lavf] Linearizing discontinuity: 0.385333 -> 0.573333
[lavf] Linearizing discontinuity: 0.573333 -> 0.761333
[lavf] Linearizing discontinuity: 0.761333 -> 0.944000
[lavf] Linearizing discontinuity: 0.944000 -> 1.132000
[lavf] Linearizing discontinuity: 1.132000 -> 1.320000
[lavf] Linearizing discontinuity: 1.320000 -> 1.521333
[lavf] Linearizing discontinuity: 1.521333 -> 1.709333
[lavf] Linearizing discontinuity: 1.706667 -> 1.709333
AO: [pipewire] 48000Hz stereo 2ch floatp
A: 00:00:00 / 00:00:20 (0%) Cache: 20s/868KB
A: 00:00:00 / 00:00:20 (0%) Cache: 20s/867KB
A: 00:00:00 / 00:00:20 (0%) Cache: 20s/865KB
A: 00:00:00 / 00:00:20 (0%) Cache: 20s/856KB
A: 00:00:00 / 00:00:20 (0%) Cache: 20s/854KB
A: 00:00:00 / 00:00:20 (0%) Cache: 20s/853KB
A: 00:00:00 / 00:00:20 (0%) Cache: 19s/850KB
A: 00:00:00 / 00

CompletedProcess(args='curl -X POST "http://localhost:8000/v1/audio/speech" -H "Content-Type: application/json" -d \'{"model": "tts-1", "input": "以下是一些中英文对照的话语。 1. 早上好！希望你有美好的一天。Good morning! Wish you a wonderful day. 2. 你好呀，最近怎么样？Hello there, how have you been recently? 3. 别放弃，你能做到的！Dont give up, you can do it! 4. 继续努力，你的付出会有回报的。Keep up the good work, your efforts will pay off.", "voice": "echo", "response_format": "ogg", "stream": true}\' -s | mpv --no-video -', returncode=0)