In [1]:
import asyncio
import os
from typing import Annotated, Union
from pathlib import Path

import nest_asyncio
import uvicorn
from fastapi import FastAPI, Request, WebSocket
from fastapi.responses import HTMLResponse, JSONResponse
from fastapi.templating import Jinja2Templates
from fastapi.staticfiles import StaticFiles

from autogen.agentchat.realtime_agent import FunctionObserver, RealtimeAgent, WebsocketAudioAdapter



In [2]:
# Configuration
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
PORT = int(os.getenv("PORT", 5050))

if not OPENAI_API_KEY:
    raise ValueError("Missing the OpenAI API key. Please set it in the .env file.")

llm_config = {
    "timeout": 600,
    "cache_seed": 45,  # change the seed for different trials
    "config_list": [
        {
            "model": "gpt-4o-realtime-preview-2024-10-01",
            "api_key": OPENAI_API_KEY,
        }
    ],
    "temperature": 0.8,
}

In [3]:
nest_asyncio.apply()

In [4]:
app = FastAPI()

notebook_path=os.getcwd()

app.mount("/static", StaticFiles(directory=Path(notebook_path) / "agentchat_realtime_websocket" / "static"), name="static")

# Templates for HTML responses

templates = Jinja2Templates(directory=Path(notebook_path)  / "agentchat_realtime_websocket" / "templates")

@app.get("/", response_class=JSONResponse)
async def index_page():
    return {"message": "Websocket Audio Stream Server is running!"}

@app.get("/start-chat/", response_class=HTMLResponse)
async def start_chat(request: Request):
    """Endpoint to return the HTML page for audio chat."""
    port = PORT  # Extract the client's port
    return templates.TemplateResponse("chat.html", {"request": request, "port": port})

@app.websocket("/media-stream")
async def handle_media_stream(websocket: WebSocket):
    """Handle WebSocket connections providing audio stream and OpenAI."""
    await websocket.accept()

    audio_adapter = WebsocketAudioAdapter(websocket)
    openai_client = RealtimeAgent(
        name="Weather Bot",
        system_message="Hello there! I am an AI voice assistant powered by Autogen and the OpenAI Realtime API. You can ask me about weather, jokes, or anything you can imagine. Start by saying How can I help you?",
        llm_config=llm_config,
        audio_adapter=audio_adapter,
    )

    @openai_client.register_handover(name="get_weather", description="Get the current weather")
    def get_weather(location: Annotated[str, "city"]) -> str:
        ...
        return "The weather is cloudy." if location == "Seattle" else "The weather is sunny."

    await openai_client.run()


uvicorn.run(app, host="0.0.0.0", port=PORT)

INFO:     Started server process [60435]
INFO:     Waiting for application startup.
INFO:     Application startup complete.
INFO:     Uvicorn running on http://0.0.0.0:5050 (Press CTRL+C to quit)


INFO:     127.0.0.1:51198 - "GET /start-chat HTTP/1.1" 307 Temporary Redirect
INFO:     127.0.0.1:51198 - "GET /start-chat/ HTTP/1.1" 200 OK
INFO:     127.0.0.1:51198 - "GET /static/wavtools.js HTTP/1.1" 200 OK
INFO:     127.0.0.1:51204 - "GET /static/main.js HTTP/1.1" 200 OK
INFO:     127.0.0.1:51204 - "GET /static/Audio.js HTTP/1.1" 200 OK


INFO:     ('127.0.0.1', 51216) - "WebSocket /media-stream" [accepted]
INFO:     connection open


INFO:     127.0.0.1:51204 - "GET /favicon.ico HTTP/1.1" 404 Not Found
Sending session update: {"type": "session.update", "session": {"turn_detection": {"type": "server_vad"}, "voice": "alloy", "instructions": "Hello there! I am an AI voice assistant powered by Autogen and the OpenAI Realtime API. You can ask me about weather, jokes, or anything you can imagine. Start by saying How can I help you?", "modalities": ["audio"], "temperature": 0.8}}
Sending session update finished
Received event: session.created {'type': 'session.created', 'event_id': 'event_AgIuWHBuzW59zezATXxJ5', 'session': {'id': 'sess_AgIuVZh1p6dyoyNEqVSuQ', 'object': 'realtime.session', 'model': 'gpt-4o-realtime-preview-2024-10-01', 'expires_at': 1734647631, 'modalities': ['audio', 'text'], 'instructions': "Your knowledge cutoff is 2023-10. You are a helpful, witty, and friendly AI. Act like a human, but remember that you aren't a human and that you can't do human things in the real world. Your voice and personality sho

INFO:     ('127.0.0.1', 43670) - "WebSocket /media-stream" [accepted]
INFO:     connection open


Sending session update: {"type": "session.update", "session": {"turn_detection": {"type": "server_vad"}, "voice": "alloy", "instructions": "Hello there! I am an AI voice assistant powered by Autogen and the OpenAI Realtime API. You can ask me about weather, jokes, or anything you can imagine. Start by saying How can I help you?", "modalities": ["audio"], "temperature": 0.8}}
Sending session update finished
Received event: session.created {'type': 'session.created', 'event_id': 'event_AgIutGsw5qf3WwveWikTy', 'session': {'id': 'sess_AgIut2eUGPpxXodLrAE93', 'object': 'realtime.session', 'model': 'gpt-4o-realtime-preview-2024-10-01', 'expires_at': 1734647655, 'modalities': ['text', 'audio'], 'instructions': "Your knowledge cutoff is 2023-10. You are a helpful, witty, and friendly AI. Act like a human, but remember that you aren't a human and that you can't do human things in the real world. Your voice and personality should be warm and engaging, with a lively and playful tone. If interacti

INFO:     connection closed
INFO:     ('127.0.0.1', 37832) - "WebSocket /media-stream" [accepted]
INFO:     connection open


Sending session update: {"type": "session.update", "session": {"turn_detection": {"type": "server_vad"}, "voice": "alloy", "instructions": "Hello there! I am an AI voice assistant powered by Autogen and the OpenAI Realtime API. You can ask me about weather, jokes, or anything you can imagine. Start by saying How can I help you?", "modalities": ["audio"], "temperature": 0.8}}
Sending session update finished
Received event: session.created {'type': 'session.created', 'event_id': 'event_AgIv4LEN3eKdUMGopSU1q', 'session': {'id': 'sess_AgIv3xr2ZDNfzTCZ2GADs', 'object': 'realtime.session', 'model': 'gpt-4o-realtime-preview-2024-10-01', 'expires_at': 1734647665, 'modalities': ['audio', 'text'], 'instructions': "Your knowledge cutoff is 2023-10. You are a helpful, witty, and friendly AI. Act like a human, but remember that you aren't a human and that you can't do human things in the real world. Your voice and personality should be warm and engaging, with a lively and playful tone. If interacti

INFO:     ('127.0.0.1', 33534) - "WebSocket /media-stream" [accepted]
INFO:     connection open


Sending session update: {"type": "session.update", "session": {"turn_detection": {"type": "server_vad"}, "voice": "alloy", "instructions": "Hello there! I am an AI voice assistant powered by Autogen and the OpenAI Realtime API. You can ask me about weather, jokes, or anything you can imagine. Start by saying How can I help you?", "modalities": ["audio"], "temperature": 0.8}}
Sending session update finished
Received event: session.created {'type': 'session.created', 'event_id': 'event_AgIwz80goWYq1dsKVkpcN', 'session': {'id': 'sess_AgIwzi90bTt5bge8Xp4qA', 'object': 'realtime.session', 'model': 'gpt-4o-realtime-preview-2024-10-01', 'expires_at': 1734647785, 'modalities': ['text', 'audio'], 'instructions': "Your knowledge cutoff is 2023-10. You are a helpful, witty, and friendly AI. Act like a human, but remember that you aren't a human and that you can't do human things in the real world. Your voice and personality should be warm and engaging, with a lively and playful tone. If interacti

ERROR:    Exception in ASGI application
Traceback (most recent call last):
  File "/home/davorin/work/airt/ag2-develop/.env/lib/python3.11/site-packages/anyio/_backends/_asyncio.py", line 793, in __aexit__
    await _wait(self._tasks)
  File "/home/davorin/work/airt/ag2-develop/.env/lib/python3.11/site-packages/anyio/_backends/_asyncio.py", line 758, in _wait
    await waiter
  File "/home/davorin/anaconda3/lib/python3.11/asyncio/futures.py", line 287, in __await__
    yield self  # This tells Task to wait for completion.
    ^^^^^^^^^^
  File "/home/davorin/anaconda3/lib/python3.11/asyncio/tasks.py", line 339, in __wakeup
    future.result()
  File "/home/davorin/anaconda3/lib/python3.11/asyncio/futures.py", line 198, in result
    raise exc
asyncio.exceptions.CancelledError: Cancelled by cancel scope 75421d523150

During handling of the above exception, another exception occurred:

  + Exception Group Traceback (most recent call last):
  |   File "/home/davorin/work/airt/ag2-develop/

Error in _read_from_client: no close frame received or sent
Error in _read_from_client: sent 1011 (internal error) keepalive ping timeout; no close frame received
Error in _read_from_client: sent 1011 (internal error) keepalive ping timeout; no close frame received


INFO:     connection closed


Received event: input_audio_buffer.speech_started {'type': 'input_audio_buffer.speech_started', 'event_id': 'event_AgIxUU4lXFZQSCMoSwaQ3', 'audio_start_ms': 36192, 'item_id': 'item_AgIxULDzyUMGwF3VS9VEL'}
Speech started detected.
Interrupting response with id: item_AgIx2OMWb2RceSUnfQlLw
Handling speech started event.
Received event: error {'type': 'error', 'event_id': 'event_AgIxUJgyio4YmMMwUjU47', 'error': {'type': 'invalid_request_error', 'code': None, 'message': 'Audio content of 7400ms is already shorter than 29716ms', 'param': None, 'event_id': None}}
Received event: input_audio_buffer.speech_stopped {'type': 'input_audio_buffer.speech_stopped', 'event_id': 'event_AgIxWBkcvhdAAsEhV3TMT', 'audio_end_ms': 37888, 'item_id': 'item_AgIxULDzyUMGwF3VS9VEL'}
Received event: input_audio_buffer.committed {'type': 'input_audio_buffer.committed', 'event_id': 'event_AgIxWaFseiJpm1CGfHJca', 'previous_item_id': 'item_AgIx2OMWb2RceSUnfQlLw', 'item_id': 'item_AgIxULDzyUMGwF3VS9VEL'}
Received event

INFO:     connection closed


INFO:     127.0.0.1:33524 - "GET /static/Audio.js HTTP/1.1" 304 Not Modified


INFO:     ('127.0.0.1', 49718) - "WebSocket /media-stream" [accepted]
INFO:     connection open


Sending session update: {"type": "session.update", "session": {"turn_detection": {"type": "server_vad"}, "voice": "alloy", "instructions": "Hello there! I am an AI voice assistant powered by Autogen and the OpenAI Realtime API. You can ask me about weather, jokes, or anything you can imagine. Start by saying How can I help you?", "modalities": ["audio"], "temperature": 0.8}}
Sending session update finished
Received event: session.created {'type': 'session.created', 'event_id': 'event_AgIxqzPdEpGeINATkNRdN', 'session': {'id': 'sess_AgIxqOTBa59WVMwlzwwSx', 'object': 'realtime.session', 'model': 'gpt-4o-realtime-preview-2024-10-01', 'expires_at': 1734647838, 'modalities': ['audio', 'text'], 'instructions': "Your knowledge cutoff is 2023-10. You are a helpful, witty, and friendly AI. Act like a human, but remember that you aren't a human and that you can't do human things in the real world. Your voice and personality should be warm and engaging, with a lively and playful tone. If interacti

ERROR:    Exception in ASGI application
Traceback (most recent call last):
  File "/home/davorin/work/airt/ag2-develop/.env/lib/python3.11/site-packages/anyio/_backends/_asyncio.py", line 793, in __aexit__
    await _wait(self._tasks)
  File "/home/davorin/work/airt/ag2-develop/.env/lib/python3.11/site-packages/anyio/_backends/_asyncio.py", line 758, in _wait
    await waiter
  File "/home/davorin/anaconda3/lib/python3.11/asyncio/futures.py", line 287, in __await__
    yield self  # This tells Task to wait for completion.
    ^^^^^^^^^^
  File "/home/davorin/anaconda3/lib/python3.11/asyncio/tasks.py", line 339, in __wakeup
    future.result()
  File "/home/davorin/anaconda3/lib/python3.11/asyncio/futures.py", line 198, in result
    raise exc
asyncio.exceptions.CancelledError: Cancelled by cancel scope 75421c0a0c90

During handling of the above exception, another exception occurred:

  + Exception Group Traceback (most recent call last):
  |   File "/home/davorin/work/airt/ag2-develop/