# Union Summit Product Workshop 2026 - Part 1

In this notebook, we're going to:
- Build and run a Flyte 2 workflow that extracts text from a PDF and visualizes it.
- Build and serve a Flyte 2 app that exposes the PDF extraction workflow as a user-friendly UI.

## Setup

In [None]:
try:
    import google.colab
    IN_COLAB = True
except:
    IN_COLAB = False


if IN_COLAB:
    !git clone https://github.com/unionai/summit-workshop-2026
    %cd /content/summit-workshop-2026

In [None]:
%%bash
uv pip install flyte==2.0.0b48 fastapi==0.128.0

In [None]:
!flyte create config \
--endpoint https://demo.hosted.unionai.cloud \
--auth-type headless \
--project flytesnacks \
--domain development \
--builder remote \
--force

## Hello world task

In [7]:
import flyte
import pathlib

env = flyte.TaskEnvironment("workshop-env")


@env.task
def hello(x: int) -> int:
    return x + 1


flyte.init_from_config()
run = flyte.run(hello, x=1)
print(run.url)
run.wait()
print(f"outputs: {run.outputs()}")

https://demo.hosted.unionai.cloud/v2/domain/development/project/flytesnacks/runs/r8vhb2w99pjn2tkwg2hj


outputs: (2,)


## Configure Task Environment

In [None]:
import tempfile
from dataclasses import dataclass

import flyte
import flyte.report
from flyte.io import File

# Define the task environment with required packages
env = flyte.TaskEnvironment(
    name="pdf_wordcloud",
    image=flyte.Image.from_debian_base().with_pip_packages(
        "httpx",
        "pymupdf>=1.24.0",  # For PDF text extraction
        "wordcloud>=1.9.0",  # For wordcloud generation
        "matplotlib>=3.7.0",  # For plotting
    ),
    resources=flyte.Resources(cpu="4", memory="4Gi")
)


@dataclass
class PipelineOutput:
    """Output of the PDF wordcloud pipeline."""
    summary: str
    extracted_text: File
    wordcloud_image: File

## Task 1: `download_pdf`

In [None]:
@env.task
async def download_pdf(url: str) -> File:
    """
    Download a PDF file from a URL.

    Args:
        url: The URL of the PDF file to download.

    Returns:
        The raw bytes of the PDF file.
    """
    import httpx

    print(f"Downloading PDF from: {url}")

    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
        "Accept": "application/pdf,*/*",
        "Accept-Language": "en-US,en;q=0.9",
    }

    async with httpx.AsyncClient(follow_redirects=True, timeout=60.0, headers=headers) as client:
        response = await client.get(url)
        response.raise_for_status()

    with tempfile.NamedTemporaryFile(suffix=".pdf", delete=False) as tmp:
        tmp.write(response.content)
        tmp_path = tmp.name
        return await File.from_local(local_path=tmp_path)

## Task 2: `extract_text`

In [24]:
@env.task
async def extract_text(pdf_file: File) -> dict:
    """
    Extract all text from a PDF file.

    Args:
        pdf_bytes: The raw bytes of the PDF file.

    Returns:
        A dictionary containing extracted text and metadata.
    """
    import pymupdf

    result = {
        "pages": [],
        "metadata": {},
        "full_text": "",
    }

    # Open PDF from bytes
    # Read bytes from the async file handle and open with PyMuPDF (pymupdf)
    path = await pdf_file.download()
    doc = pymupdf.open(path, filetype="pdf")

    # Extract document metadata
    result["metadata"] = {
        "title": doc.metadata.get("title", ""),
        "author": doc.metadata.get("author", ""),
        "subject": doc.metadata.get("subject", ""),
        "keywords": doc.metadata.get("keywords", ""),
        "page_count": len(doc),
    }

    print(f"Processing PDF with {len(doc)} pages")

    all_text_parts = []
    for page_num, page in enumerate(doc):
        page_text = page.get_text("text")
        result["pages"].append({
            "page_number": page_num + 1,
            "text": page_text,
            "char_count": len(page_text),
        })
        all_text_parts.append(page_text)

    full_text = "\n\n".join(all_text_parts)
    full_text = full_text.replace("Property of AmericanRhetoric.com", "")
    full_text = full_text.replace("AmericanRhetoric.com", "")
    result["full_text"] = full_text
    for page in result["pages"]:
        page["text"] = page["text"].replace("AmericanRhetoric.com", "")
    doc.close()

    print(f"Extracted {len(result['full_text'])} characters total")
    return result

## Task 3: `generate_wordcloud`

In [25]:
@env.task
async def generate_wordcloud(text: str) -> File:
    """
    Generate a wordcloud image from the given text.

    Args:
        text: The text to create a wordcloud from.

    Returns:
        PNG image bytes of the wordcloud.
    """
    from wordcloud import WordCloud
    import matplotlib.pyplot as plt
    import io

    print("Generating wordcloud...")

    # Create the wordcloud
    wordcloud = WordCloud(
        width=1200,
        height=600,
        background_color="white",
        colormap="viridis",
        max_words=200,
        min_font_size=10,
        max_font_size=150,
    ).generate(text)

    # Save to bytes
    fig, ax = plt.subplots(figsize=(16, 8))
    ax.imshow(wordcloud, interpolation="bilinear")
    ax.axis("off")

    tmp_filename = "wordcloud.png"
    fig.savefig(tmp_filename, format="png", dpi=150, bbox_inches="tight", pad_inches=0.1)
    plt.close(fig)

    # Output a Flyte File object
    return await File.from_local(local_path=tmp_filename)

## Task 4: `generate_report`

In [26]:
@env.task(report=True)
async def generate_report(
    extracted_data: dict,
    wordcloud_image: File,
    source_url: str,
) -> str:
    """
    Generate a Flyte report displaying the extracted text and wordcloud.

    Args:
        extracted_data: Dictionary containing extracted text and metadata.
        wordcloud_image: PNG bytes of the wordcloud image.
        source_url: The original URL of the PDF.

    Returns:
        A summary string of the extraction.
    """
    import html
    import base64

    metadata = extracted_data.get("metadata", {})
    pages = extracted_data.get("pages", [])
    full_text = extracted_data.get("full_text", "")

    # Encode wordcloud image to base64 for embedding
    async with wordcloud_image.open("rb") as f:
        image_bytes = await f.read()
    wordcloud_b64 = base64.b64encode(image_bytes).decode("utf-8")

    # Calculate statistics
    total_chars = len(full_text)
    word_count = len(full_text.split())

    # Build the HTML report
    report_html = f"""
    <style>
        .container {{
            font-family: 'Segoe UI', system-ui, -apple-system, sans-serif;
            max-width: 1200px;
            margin: 0 auto;
            padding: 20px;
            background: linear-gradient(135deg, #1a1a2e 0%, #16213e 100%);
            color: #eee;
            min-height: 100vh;
        }}
        .header {{
            background: linear-gradient(90deg, #0f3460, #533483);
            padding: 30px;
            border-radius: 12px;
            margin-bottom: 30px;
            box-shadow: 0 8px 32px rgba(0,0,0,0.3);
        }}
        .header h1 {{
            margin: 0 0 15px 0;
            font-size: 2.2em;
            background: linear-gradient(90deg, #e94560, #ff6b6b);
            -webkit-background-clip: text;
            -webkit-text-fill-color: transparent;
            background-clip: text;
        }}
        .summary-stats {{
            display: flex;
            gap: 20px;
            flex-wrap: wrap;
            margin-bottom: 30px;
        }}
        .stat-card {{
            background: rgba(233, 69, 96, 0.1);
            border: 1px solid rgba(233, 69, 96, 0.3);
            padding: 20px;
            border-radius: 10px;
            text-align: center;
            min-width: 150px;
        }}
        .stat-value {{
            font-size: 2.5em;
            font-weight: 700;
            color: #e94560;
        }}
        .stat-label {{
            color: #888;
            font-size: 0.9em;
            margin-top: 5px;
        }}
        .wordcloud-section {{
            background: rgba(255,255,255,0.05);
            border-radius: 12px;
            padding: 20px;
            margin-bottom: 30px;
            text-align: center;
        }}
        .wordcloud-section h2 {{
            color: #e94560;
            margin-top: 0;
        }}
        .wordcloud-img {{
            max-width: 100%;
            border-radius: 8px;
            box-shadow: 0 4px 20px rgba(0,0,0,0.3);
        }}
        .metadata {{
            background: rgba(255,255,255,0.05);
            padding: 20px;
            border-radius: 8px;
            margin-bottom: 30px;
            border-left: 4px solid #e94560;
        }}
        .metadata h2 {{
            color: #e94560;
            margin-top: 0;
        }}
        .metadata-grid {{
            display: grid;
            grid-template-columns: repeat(auto-fit, minmax(200px, 1fr));
            gap: 15px;
        }}
        .metadata-item {{
            background: rgba(0,0,0,0.2);
            padding: 10px 15px;
            border-radius: 6px;
        }}
        .metadata-label {{
            color: #888;
            font-size: 0.85em;
            text-transform: uppercase;
        }}
        .metadata-value {{
            color: #fff;
            font-weight: 500;
            margin-top: 5px;
        }}
        .page-section {{
            background: rgba(255,255,255,0.03);
            border-radius: 12px;
            margin-bottom: 25px;
            overflow: hidden;
            border: 1px solid rgba(255,255,255,0.1);
        }}
        .page-header {{
            background: linear-gradient(90deg, #533483, #0f3460);
            padding: 15px 20px;
            font-weight: 600;
            font-size: 1.1em;
        }}
        .page-content {{
            padding: 20px;
        }}
        .text-box {{
            background: rgba(0,0,0,0.3);
            padding: 15px;
            border-radius: 8px;
            font-family: 'Fira Code', 'Monaco', monospace;
            font-size: 0.9em;
            line-height: 1.6;
            white-space: pre-wrap;
            word-break: break-word;
            max-height: 400px;
            overflow-y: auto;
            border: 1px solid rgba(255,255,255,0.1);
        }}
        .source-link {{
            color: #4ecdc4;
            word-break: break-all;
        }}
        .badge {{
            display: inline-block;
            padding: 4px 10px;
            border-radius: 12px;
            font-size: 0.75em;
            font-weight: 600;
            background: #0f3460;
            margin-left: 10px;
        }}
    </style>

    <div class="container">
        <div class="header">
            <h1>üìÑ PDF Text Extraction & Word Cloud Report</h1>
            <p>Source: <a href="{html.escape(source_url)}" class="source-link" target="_blank">{html.escape(source_url[:80])}...</a></p>
        </div>

        <div class="summary-stats">
            <div class="stat-card">
                <div class="stat-value">{metadata.get("page_count", 0)}</div>
                <div class="stat-label">Pages</div>
            </div>
            <div class="stat-card">
                <div class="stat-value">{total_chars:,}</div>
                <div class="stat-label">Characters</div>
            </div>
            <div class="stat-card">
                <div class="stat-value">{word_count:,}</div>
                <div class="stat-label">Words</div>
            </div>
        </div>

        <div class="wordcloud-section">
            <h2>‚òÅÔ∏è Word Cloud</h2>
            <img src="data:image/png;base64,{wordcloud_b64}" class="wordcloud-img" alt="Word Cloud">
        </div>

        <div class="metadata">
            <h2>üìã Document Metadata</h2>
            <div class="metadata-grid">
    """

    for key, value in metadata.items():
        if value:
            report_html += f"""
                <div class="metadata-item">
                    <div class="metadata-label">{html.escape(str(key))}</div>
                    <div class="metadata-value">{html.escape(str(value))}</div>
                </div>
            """

    report_html += """
            </div>
        </div>
    """

    # Add page sections
    for page in pages:
        page_num = page.get("page_number", 0)
        page_text = page.get("text", "")
        char_count = page.get("char_count", 0)

        report_html += f"""
        <div class="page-section">
            <div class="page-header">
                üìñ Page {page_num}
                <span class="badge">{char_count:,} chars</span>
            </div>
            <div class="page-content">
        """

        if page_text.strip():
            report_html += f"""
                <div class="text-box">{html.escape(page_text)}</div>
            """
        else:
            report_html += """
                <p style="color: #888; font-style: italic;">No text found on this page.</p>
            """

        report_html += """
            </div>
        </div>
        """

    report_html += "</div>"

    await flyte.report.log.aio(report_html, do_flush=True)

    summary = f"Extracted {word_count:,} words from {metadata.get('page_count', 0)} pages."
    return summary

## Task 5: `pdf_wordcloud_pipeline`

In [None]:
@env.task
async def pdf_wordcloud_pipeline(pdf_url: str) -> PipelineOutput:
    """
    Main pipeline that orchestrates PDF text extraction and wordcloud generation.

    Args:
        pdf_url: URL of the PDF to process.

    Returns:
        PipelineOutput containing summary, extracted text file, and wordcloud image file.
    """
    import os

    print("Starting PDF text extraction and wordcloud pipeline...")

    # Step 1: Download the PDF
    pdf_file = await download_pdf(pdf_url)

    # Step 2: Extract text from the PDF
    extracted_data = await extract_text(pdf_file)

    # Step 3: Generate wordcloud from the extracted text
    wordcloud_file = await generate_wordcloud(extracted_data["full_text"])

    # Step 4: Generate the report
    summary = await generate_report(extracted_data, wordcloud_file, pdf_url)

    # Step 5: Save extracted text to a file
    text_file_path = os.path.join(tempfile.gettempdir(), "extracted_text.txt")
    with open(text_file_path, "w", encoding="utf-8") as f:
        f.write(extracted_data["full_text"])
    extracted_text_file = await File.from_local(local_path=text_file_path)


    print(f"Pipeline complete: {summary}")
    return PipelineOutput(
        summary=summary,
        extracted_text=extracted_text_file,
        wordcloud_image=wordcloud_file,
    )

## Run the pipeline

In [None]:
# Run the pipeline with a sample PDF URL

# Example: Process a public PDF
pdf_url = "https://www.americanrhetoric.com/speeches/PDFFiles/Eleanor%20Roosevelt%20-%20The%20Struggle%20for%20Human%20Rights.pdf"
run = flyte.run(pdf_wordcloud_pipeline, pdf_url=pdf_url)

print(f"Run Name: {run.name}")
print(f"Run URL: {run.url}")
run.wait()
run.outputs()[0]

## Deploy the pipeline

In [None]:
import pdf_extraction

deployments = flyte.deploy(pdf_extraction.env)
print(f"Deployments: {deployments}")

You can find the deployed task here: https://demo.hosted.unionai.cloud/v2/domain/development/project/flytesnacks/tasks

## App Server: a simple PDF extraction service

In [None]:
!uv pip install fastapi==0.128.0 python-multipart

In [None]:
import os
from contextlib import asynccontextmanager

from fastapi import FastAPI, Form, HTTPException
from fastapi.responses import HTMLResponse, RedirectResponse, StreamingResponse
from starlette import status

import flyte.remote as remote
import flyte.app
from flyte.app.extras import FastAPIAppEnvironment, FastAPIPassthroughAuthMiddleware
from flyte.models import ActionPhase


@asynccontextmanager
async def lifespan(app: FastAPI):
    """Initialize Flyte with passthrough auth on startup."""
    endpoint = os.getenv("FLYTE_ENDPOINT", None)
    if not endpoint:
        raise RuntimeError("FLYTE_ENDPOINT environment variable not set")
    await flyte.init_passthrough.aio(
        endpoint=endpoint,
        project=os.getenv("FLYTE_INTERNAL_EXECUTION_PROJECT", None),
        domain=os.getenv("FLYTE_INTERNAL_EXECUTION_DOMAIN", None),
    )
    yield


pdf_app = FastAPI(
    title="PDF Wordcloud Generator",
    description="A web app that extracts text from PDFs and generates word clouds",
    version="1.0.0",
    lifespan=lifespan,
)

pdf_app.add_middleware(FastAPIPassthroughAuthMiddleware, excluded_paths={"/health"})


HTML_TEMPLATE = """
<!DOCTYPE html>
<html>
<head>
    <title>PDF Wordcloud Generator</title>
    <style>
        * {{ box-sizing: border-box; }}
        body {{
            font-family: 'Segoe UI', system-ui, -apple-system, sans-serif;
            background: linear-gradient(135deg, #1a1a2e 0%, #16213e 100%);
            color: #eee;
            min-height: 100vh;
            margin: 0;
            padding: 40px 20px;
        }}
        .container {{
            max-width: 800px;
            margin: 0 auto;
        }}
        h1 {{
            text-align: center;
            font-size: 2.5em;
            background: linear-gradient(90deg, #e94560, #ff6b6b);
            -webkit-background-clip: text;
            -webkit-text-fill-color: transparent;
            background-clip: text;
            margin-bottom: 10px;
        }}
        .subtitle {{
            text-align: center;
            color: #888;
            margin-bottom: 40px;
        }}
        .card {{
            background: rgba(255,255,255,0.05);
            border: 1px solid rgba(255,255,255,0.1);
            border-radius: 12px;
            padding: 30px;
            margin-bottom: 30px;
        }}
        .form-group {{
            margin-bottom: 20px;
        }}
        label {{
            display: block;
            margin-bottom: 8px;
            font-weight: 500;
            color: #e94560;
        }}
        input[type="text"] {{
            width: 100%;
            padding: 15px;
            border: 1px solid rgba(255,255,255,0.2);
            border-radius: 8px;
            background: rgba(0,0,0,0.3);
            color: #fff;
            font-size: 1em;
        }}
        input[type="text"]:focus {{
            outline: none;
            border-color: #e94560;
        }}
        button {{
            background: linear-gradient(90deg, #e94560, #ff6b6b);
            color: white;
            padding: 15px 30px;
            border: none;
            border-radius: 8px;
            font-size: 1.1em;
            font-weight: 600;
            cursor: pointer;
            width: 100%;
            transition: transform 0.2s, box-shadow 0.2s;
        }}
        button:hover {{
            transform: translateY(-2px);
            box-shadow: 0 8px 20px rgba(233, 69, 96, 0.3);
        }}
        .status {{
            padding: 20px;
            border-radius: 8px;
            margin-top: 20px;
        }}
        .status.running {{
            background: rgba(78, 205, 196, 0.1);
            border: 1px solid rgba(78, 205, 196, 0.3);
        }}
        .status.succeeded {{
            background: rgba(46, 204, 113, 0.1);
            border: 1px solid rgba(46, 204, 113, 0.3);
        }}
        .status.failed {{
            background: rgba(231, 76, 60, 0.1);
            border: 1px solid rgba(231, 76, 60, 0.3);
        }}
        .download-links {{
            display: flex;
            gap: 15px;
            margin-top: 20px;
        }}
        .download-btn {{
            flex: 1;
            text-align: center;
            padding: 15px;
            background: rgba(78, 205, 196, 0.1);
            border: 1px solid rgba(78, 205, 196, 0.3);
            border-radius: 8px;
            color: #4ecdc4;
            text-decoration: none;
            font-weight: 500;
            transition: background 0.2s;
        }}
        .download-btn:hover {{
            background: rgba(78, 205, 196, 0.2);
        }}
        .run-link {{
            color: #4ecdc4;
            text-decoration: none;
        }}
        .run-link:hover {{
            text-decoration: underline;
        }}
        .refresh-note {{
            text-align: center;
            color: #888;
            font-size: 0.9em;
            margin-top: 15px;
        }}
    </style>
</head>
<body>
    <div class="container">
        <h1>üìÑ PDF Wordcloud Generator</h1>
        <p class="subtitle">Extract text from PDFs and generate beautiful word clouds</p>

        <div class="card">
            <form action="/process" method="post">
                <div class="form-group">
                    <label for="pdf_url">PDF URL</label>
                    <input type="text" id="pdf_url" name="pdf_url"
                           placeholder="https://example.com/document.pdf"
                           value="{pdf_url}" required>
                </div>
                <button type="submit">üöÄ Generate Wordcloud</button>
            </form>
        </div>

        {status_section}
    </div>
</body>
</html>
"""


@pdf_app.get("/health")
async def health_check():
    """Health check endpoint."""
    return {"status": "healthy"}


DEFAULT_URL = "https://www.americanrhetoric.com/speeches/PDFFiles/Eleanor%20Roosevelt%20-%20The%20Struggle%20for%20Human%20Rights.pdf"


@pdf_app.get("/", response_class=HTMLResponse)
async def home(run_name: str = None, pdf_url: str = DEFAULT_URL):
    """Home page with PDF URL input form and optional run status."""
    status_section = ""

    if run_name:
        try:
            # Fetch the run status
            run = await remote.Run.get.aio(name=run_name)

            if run.phase == ActionPhase.SUCCEEDED:
                # Get outputs and provide download links
                outputs = await run.outputs.aio()
                outputs: PipelineOutput = outputs[0]
                async with outputs.wordcloud_image.open("rb") as f:
                    bytes = (await f.read()).to_bytes()

                # Encode image bytes to base64 for inline display
                import base64
                image_base64 = base64.b64encode(bytes).decode('utf-8')

                status_section = f"""
                <div class="card">
                    <h2 style="color: #2ecc71; margin-top: 0;">‚úÖ Processing Complete!</h2>
                    <p>Run: <a href="{run.url}" class="run-link" target="_blank">{run_name}</a></p>
                    <p><strong>Summary:</strong> {outputs.summary}</p>
                    <div style="margin: 20px 0;">
                        <img src="data:image/png;base64,{image_base64}" alt="Wordcloud" style="max-width: 100%; height: auto; border-radius: 8px; box-shadow: 0 4px 6px rgba(0,0,0,0.3);" />
                    </div>
                    <div class="download-links">
                        <a href="/download/wordcloud/{run_name}" class="download-btn">
                            üñºÔ∏è Download Wordcloud Image
                        </a>
                        <a href="/download/text/{run_name}" class="download-btn">
                            üìù Download Extracted Text
                        </a>
                    </div>
                </div>
                """
            elif run.phase in (
                ActionPhase.RUNNING,
                ActionPhase.QUEUED,
                ActionPhase.WAITING_FOR_RESOURCES,
                ActionPhase.INITIALIZING,
            ):
                status_section = f"""
                <div class="card">
                    <div class="status running">
                        <h3 style="margin-top: 0;">‚è≥ Processing...</h3>
                        <p>Status: <strong>{run.phase.name}</strong></p>
                        <p>Run: <a href="{run.url}" class="run-link" target="_blank">{run_name}</a></p>
                    </div>
                    <p class="refresh-note">üîÑ Page will auto-refresh in 5 seconds...</p>
                </div>
                <script>
                    setTimeout(function() {{
                        window.location.reload();
                    }}, 5000);
                </script>
                """
            else:
                status_section = f"""
                <div class="card">
                    <div class="status failed">
                        <h3 style="margin-top: 0;">‚ùå Processing Failed</h3>
                        <p>Status: <strong>{run.phase.name}</strong></p>
                        <p>Run: <a href="{run.url}" class="run-link" target="_blank">{run_name}</a></p>
                    </div>
                </div>
                """
        except Exception as e:
            status_section = f"""
            <div class="card">
                <div class="status failed">
                    <h3 style="margin-top: 0;">‚ùå Error</h3>
                    <p>Could not fetch run status: {str(e)}</p>
                </div>
            </div>
            """

    html = HTML_TEMPLATE.format(pdf_url=pdf_url, status_section=status_section)
    return HTMLResponse(content=html)


@pdf_app.post("/process")
async def process_pdf(pdf_url: str = Form(...)):
    """Start processing a PDF URL."""
    try:
        # Fetch the task and run it
        task = remote.Task.get(
            project=os.getenv("FLYTE_INTERNAL_EXECUTION_PROJECT", "flytesnacks"),
            domain=os.getenv("FLYTE_INTERNAL_EXECUTION_DOMAIN", "development"),
            name="pdf_wordcloud.pdf_wordcloud_pipeline",
            auto_version="latest",
        )
        run = await flyte.run.aio(task, pdf_url=pdf_url)

        # Redirect to home page with run name for status tracking
        return RedirectResponse(
            url=f"/?run_name={run.name}&pdf_url={pdf_url}",
            status_code=status.HTTP_303_SEE_OTHER,
        )
    except Exception as e:
        raise HTTPException(
            status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
            detail=f"Failed to start processing: {str(e)}",
        )


@pdf_app.get("/download/wordcloud/{run_name}")
async def download_wordcloud(run_name: str):
    """Download the wordcloud image from a completed run."""
    try:
        run = await remote.Run.get.aio(name=run_name)
        outputs = await run.outputs.aio()
        outputs: PipelineOutput = outputs[0]

        async def stream_file():
            async with outputs.wordcloud_image.open("rb") as f:
                while chunk := await f.read(8192):
                    yield chunk.to_bytes()

        return StreamingResponse(
            stream_file(),
            media_type="image/png",
            headers={"Content-Disposition": "attachment; filename=wordcloud.png"},
        )
    except Exception as e:
        raise HTTPException(
            status_code=status.HTTP_404_NOT_FOUND,
            detail=f"Could not download wordcloud: {str(e)}",
        )


@pdf_app.get("/download/text/{run_name}")
async def download_text(run_name: str):
    """Download the extracted text from a completed run."""
    try:
        run = await remote.Run.get.aio(name=run_name)
        outputs = await run.outputs.aio()
        outputs: PipelineOutput = outputs[0]

        async def stream_file():
            async with outputs.extracted_text.open("rb") as f:
                while chunk := await f.read(8192):
                    yield chunk.to_bytes()

        return StreamingResponse(
            stream_file(),
            media_type="text/plain",
            headers={"Content-Disposition": "attachment; filename=extracted_text.txt"},
        )
    except Exception as e:
        raise HTTPException(
            status_code=status.HTTP_404_NOT_FOUND,
            detail=f"Could not download text: {str(e)}",
        )


# Define the FastAPI App Environment
app_image = flyte.Image.from_debian_base().with_pip_packages(
    "fastapi",
    "uvicorn",
    "python-multipart",
    "httpx",
    "pymupdf>=1.24.0",
    "wordcloud>=1.9.0",
    "matplotlib>=3.7.0",
)

pdf_app_env = FastAPIAppEnvironment(
    name="pdf-wordcloud-app",
    app=pdf_app,
    description="A web app that extracts text from PDFs and generates word clouds",
    image=app_image,
    resources=flyte.Resources(cpu="2", memory="2Gi"),
    requires_auth=False,
    env_vars={
        "FLYTE_ENDPOINT": os.environ.get("_U_EP_OVERRIDE", "demo.hosted.unionai.cloud"),
    },
    depends_on=[env],  # Depends on the task environment
    scaling=flyte.app.Scaling(replicas=(0, 1)),
)

## Serve the App

In [None]:
# Deploy and serve the PDF Wordcloud app

app_suffix = "workshop"  # üëà put your name here
served_app = flyte.serve(pdf_app_env.clone_with(name=f"pdf-wordcloud-app-{app_suffix}"))

print(f"App URL: {served_app.url}")
print(f"App Endpoint: {served_app.endpoint}")

App URL: https://demo.hosted.unionai.cloud/v2/domain/development/project/flytesnacks/apps/pdf-wordcloud-app-workshop
App Endpoint: https://tight-shadow-a05f1.apps.demo.hosted.unionai.cloud


## Conclusion

üéâ Congrats! In this notebook, you:

- Ran your first set of Flyte tasks, which extracts text from a PDF and creates
  a wordcloud out of it.
- Served a Flyte app, which provides a user-friendly UI for the PDF extract
  pipeline.