# Union Summit Product Workshop 2026 - Part 2

In this notebook, we're going to deploy a text to speech app using Microsoft's VibeVoice.

## Setup

In [None]:
import os

if not os.path.exists('/content/VibeVoice'):
  !git clone https://github.com/kumare3/VibeVoice /content/VibeVoice -b flytified

In [None]:
%cd /content/VibeVoice

In [None]:
!uv sync
!uv pip install -e .
!uv pip install flyte==2.0.0b48 fastapi==0.128.0

In [None]:
!flyte create config \
--endpoint https://demo.hosted.unionai.cloud \
--auth-type headless \
--project flytesnacks \
--domain development \
--builder remote

## Authentication Sanity Check

In [None]:
import flyte
import pathlib
import logging


env = flyte.TaskEnvironment("workshop-env")

@env.task
def hello(x: int) -> int:
    return x + 1


flyte.init_from_config(
    "/content/VibeVoice/.flyte/config.yaml",
    root_dir=pathlib.Path("/content/VibeVoice"),
    log_level=logging.DEBUG
)
run = flyte.run(hello, x=1)
print(run.url)
run.wait()
print(f"outputs: {run.outputs()}")

## Define the app

In [None]:
from pathlib import Path

import flyte
import flyte.app
from flyte.app.extras import FastAPIAppEnvironment

# Import the existing FastAPI app from demo/web
from demo.web.app import app

# Build Docker image with requirements from pyproject.toml
# The image needs CUDA support for GPU-accelerated inference
root = "/content/VibeVoice"
image = (
    flyte.Image.from_debian_base(name="vibevoice-realtime", python_version=(3, 12))
    .with_apt_packages("ffmpeg", "git")  # ffmpeg for audio processing, git for model downloads
    .with_uv_project(Path(root) / "pyproject.toml", pre=True, project_install_mode="install_project")
    .with_pip_packages("flyte==2.0.0b48", "fastapi==0.128.0")
    .with_commands(commands=["mkdir -p demo/voices"])
    .with_source_folder(Path(root) / "demo", "demo")
)

# Configure Flyte environment for deployment
env = FastAPIAppEnvironment(
    name="vibevoice-app",
    app=app,
    description="High-quality streaming text-to-speech service powered by VibeVoice Realtime model.",
    image=image,
    # Resource allocation - GPU required for model inference
    resources=flyte.Resources(
        cpu=4,
        memory="16Gi",
        gpu=1,  # NVIDIA GPU required for VibeVoice model
        disk="20Gi",
        shm="20Gi",
    ),
    scaling=flyte.app.Scaling(
        replicas=(0, 1),
    ),
    links=[
        flyte.app.Link(
            path="/info",
            title="Info JSON",
            is_relative=True,
        ),
        flyte.app.Link(
            path="/docs",
            title="Open API playground",
            is_relative=True,
        ),
    ],
    # Environment variables for model configuration
    env_vars={
        "MODEL_PATH": "microsoft/VibeVoice-Realtime-0.5b",  # HuggingFace model ID
        "MODEL_DEVICE": "cuda",  # Use GPU for inference
        "VOICE_PRESET": "en-WHTest_man",  # Default voice preset
        "LOG_LEVEL": "10",
    },
)

@env.app.get("/info")
async def app_info() -> dict:
    """
    Get information about the deployed Flyte app.

    Returns deployment metadata including endpoint URL and service status.
    """
    return {
        "service": "VibeVoice Realtime TTS",
        "version": "0.0.1",
        "model": "microsoft/VibeVoice-Realtime-0.5b",
        "endpoint": getattr(env, "endpoint", "Not deployed"),
        "features": [
            "Real-time streaming text-to-speech",
            "WebSocket audio streaming",
            "Multiple voice presets",
            "Configurable CFG scale",
            "Adjustable inference steps",
            "Interactive web UI",
        ],
        "routes": {
            "index": "/",
            "websocket_stream": "/stream",
            "config": "/config",
            "info": "/info",
        },
    }

## Serve the app

In [None]:
print("=" * 70)
print("üé§ VibeVoice Realtime TTS Service")
print("=" * 70)
print("\nModel: microsoft/VibeVoice-Realtime-0.5b")
print("Features: High-quality streaming text-to-speech with multiple voices")
print("\nüìç Available Endpoints:")
print("   GET  /                        - Web UI (index.html)")
print("   WS   /stream                  - WebSocket audio streaming")
print("   GET  /config                  - Get available voice presets")
print("   GET  /info                    - Service information")
print("\n" + "=" * 70)
print("\nDeploying to Flyte...\n")

app_suffix = "workshop"  # üëà put your name here
flyte_app = (
    flyte.with_servecontext(
        interactive_mode=True,
        # version="0.0.0",
    )
    .serve(
        env.clone_with(
            name=f"vibevoice-app-{app_suffix}",
        )
    )
)
print(flyte_app.url)