# PDF Slides -> Text to Speech (TTS) -> Video Workflow

This notebook converts a **PDF (one page = one slide)** into a narrated video.

**Workflow (what the code does):**
- Input: PDF file
- Render: Create exactly one image per PDF page
- Extract text: Read per-page text via PyMuPDF; if too short/empty, use OCR as fallback
- Summarize: The LLM generates speaker narration for exactly one slide at a time**
- Speak: Create per-slide audio via OpenAI TTS
- Video: Combine slide images + per-slide audio into a single MP4.


## Libraries and API-keys

In [None]:
# Libraries
import os
import re
from pathlib import Path
from dotenv import find_dotenv, load_dotenv
from moviepy import AudioFileClip, ImageClip, concatenate_videoclips

# Load .env File
dotenv_path = find_dotenv(usecwd=True)
if dotenv_path:
    load_dotenv(dotenv_path)
    print(f"Loaded .env from: {dotenv_path}")
else:
    print("No .env file found (please create one with your OPENAI_API_KEY).")

OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
print(f"OPENAI_API_KEY set: {bool(OPENAI_API_KEY)}")

## Configuration

In [None]:
# --- CONFIG ---
INPUT_PDF_PATH = "data/Week_08_SP_Slides_4_5_6.pdf"
OUTPUT_VIDEO = "final_video.mp4"
TEMP_DIR = "temp_assets"

# Output language: EN, IT, FR, DE, ES
OUTPUT_LANG = "EN"

# Target minutes spoken per PDF page
TARGET_SPOKEN_MINUTES_PER_PAGE = 1

# Detail style: "summary" or "detailed"
DETAIL_STYLE = "summary"

# Estimated words per minute
SPEECH_WPM = 150

# Silent fallback when no API key is set
DEFAULT_PAGE_SECONDS = 5

# Render settings
PDF_DPI = 200
IMAGE_FORMAT = "jpg"  # jpg or png
IMAGE_QUALITY = 92  # used for jpg
CLEAN_OUTPUT_DIRS = True

# Optional slide range (1-indexed, inclusive). Use None for "until end"
SLIDE_START = 1
SLIDE_END = None

# OCR fallback (used when extracted text is empty/too short)
OCR_FALLBACK = True
OCR_MIN_TEXT_CHARS = 30

# TTS settings
TTS_MODEL = "gpt-4o-mini-tts"
TTS_VOICE = "alloy"  # alloy, echo, fable, onyx, nova, shimmer

# Save per-slide narration text
SAVE_SLIDE_TEXT = True

# --- NEW: script source selection (yes/no) ---
# If "yes": use user-corrected narration text if available.
# If "no": use the original LLM narration text.
USE_USER_CORRECTED_TEXT = "no"  # yes/no

# If "no": reuse existing LLM original scripts (no re-generation).
# If "yes": regenerate LLM scripts (originals are versioned; never overwritten).
FORCE_REGENERATE_LLM_TEXT = "no"  # yes/no

# If "yes": when a user-corrected script is missing, create a template from the LLM script
CREATE_USER_CORRECTION_TEMPLATES = "yes"  # yes/no

# Script folders under TEMP_DIR
LLM_SCRIPTS_SUBDIR = "scripts_llm_original"
USER_SCRIPTS_SUBDIR = "scripts_user_corrected"
USED_SCRIPTS_SUBDIR = "scripts_used"

# Language maps
LANG_MAP = {
    "EN": "English",
    "DE": "German",
    "FR": "French",
    "IT": "Italian",
    "ES": "Spanish",
}
TESSERACT_LANG_MAP = {
    "EN": "eng",
    "DE": "deu",
    "FR": "fra",
    "IT": "ita",
    "ES": "spa",
}

# Audio sample rate (used for silent fallback audio)
AUDIO_MP3_FPS = 44100

# MP4 encoding options (slides + audio)
VIDEO_FPS = 24
VIDEO_CODEC = "libx264"
VIDEO_AUDIO_CODEC = "aac"
VIDEO_TARGET_W = 1280
VIDEO_TARGET_H = 720
VIDEO_FFMPEG_VF = (
    f"scale={VIDEO_TARGET_W}:{VIDEO_TARGET_H}:force_original_aspect_ratio=decrease,"
    f"pad={VIDEO_TARGET_W}:{VIDEO_TARGET_H}:(ow-iw)/2:(oh-ih)/2,setsar=1"
 )
VIDEO_FFMPEG_PARAMS = [
    "-movflags",
    "+faststart",
    "-vf",
    VIDEO_FFMPEG_VF,
    "-pix_fmt",
    "yuv420p",
    "-profile:v",
    "baseline",
    "-level",
    "3.1",
]

# Auto-run at end of notebook
RUN_WORKFLOW = True

# Ensure temp dir exists
Path(TEMP_DIR).mkdir(parents=True, exist_ok=True)

# OpenAI is optional
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
USE_OPENAI = bool(OPENAI_API_KEY)

client = None
if USE_OPENAI:
    try:
        from openai import OpenAI  # openai>=1.x

        client = OpenAI(api_key=OPENAI_API_KEY)
    except Exception as exc:
        raise RuntimeError(
            "OpenAI client import failed. Install/upgrade with: pip install -U openai"
        ) from exc

## Helper functions

In [None]:
def normalize_lang(lang: str) -> str:
    if not isinstance(lang, str) or not lang.strip():
        raise ValueError("OUTPUT_LANG must be a non-empty string (e.g. 'EN').")

    code = lang.strip().upper()
    aliases = {
        "GERMAN": "DE",
        "DEUTSCH": "DE",
        "ENGLISH": "EN",
        "ITALIAN": "IT",
        "ITALIANO": "IT",
        "FRENCH": "FR",
        "FRANCAIS": "FR",
        "FRANÇAIS": "FR",
        "SPANISH": "ES",
        "ESPANOL": "ES",
        "ESPAÑOL": "ES",
    }
    code = aliases.get(code, code)

    if code not in LANG_MAP:
        supported = ", ".join(sorted(LANG_MAP.keys()))
        raise ValueError(f"Unsupported OUTPUT_LANG='{lang}'. Supported: {supported}")

    return code


def parse_yes_no(value, default: bool = False) -> bool:
    if isinstance(value, bool):
        return value
    if value is None:
        return default

    s = str(value).strip().lower()
    if s in {"y", "yes", "true", "1", "on"}:
        return True
    if s in {"n", "no", "false", "0", "off"}:
        return False

    return default


def clamp_minutes(minutes) -> float:
    try:
        value = float(minutes)
    except Exception as exc:
        raise ValueError(
            "TARGET_SPOKEN_MINUTES_PER_PAGE must be a number (e.g. 1 or 5)."
        ) from exc

    if value <= 0:
        raise ValueError("TARGET_SPOKEN_MINUTES_PER_PAGE must be > 0")

    return value


def max_words_for_minutes(minutes: float, wpm: int = 150) -> int:
    return max(30, int(round(minutes * wpm)))


def truncate_to_word_limit(text: str, max_words: int) -> str:
    words = text.split()
    if len(words) <= max_words:
        return text
    return " ".join(words[:max_words]).rstrip() + "…"


def clean_page_text(page_text: str) -> str:
    """Light cleanup to avoid headers/footers like 'Page 3' confusing the model."""
    if not isinstance(page_text, str):
        return ""

    lines = [ln.strip() for ln in page_text.splitlines()]
    cleaned: list[str] = []
    for ln in lines:
        if not ln:
            continue
        # Drop very short page markers (common PDF footer/header artifacts)
        if re.fullmatch(r"(?i)page\s*\d+(\s*of\s*\d+)?", ln) and len(ln) <= 20:
            continue
        if re.fullmatch(r"\d+\s*/\s*\d+", ln) and len(ln) <= 10:
            continue
        cleaned.append(ln)

    text = "\n".join(cleaned)
    text = re.sub(r"[ \t]+", " ", text)
    text = re.sub(r"\n{3,}", "\n\n", text)
    return text.strip()


def sanitize_script_output(text: str) -> str:
    """Remove common unwanted 'meta' prefixes and multi-page style markers."""
    if not isinstance(text, str):
        return ""

    t = text.strip()
    t = re.sub(r"(?is)^(slide|page)\s*\d+(\s*(/|of)\s*\d+)?\s*:\s*", "", t)
    t = re.sub(r"(?is)^(speaker\s*notes?|narration|script)\s*:\s*", "", t)
    t = re.sub(r"(?im)^\s*(continued|continue)\s*\.?\s*$", "", t)
    t = re.sub(r"(?im)^\s*(next\s+slide|on\s+the\s+next\s+slide)\b.*$", "", t)
    t = re.sub(r"\n{3,}", "\n\n", t).strip()
    return t


def normalize_detail_style(style: str) -> str:
    if not isinstance(style, str) or not style.strip():
        return "summary"

    normalized = style.strip().lower()
    if normalized in {"summary", "summarize", "short", "brief"}:
        return "summary"
    if normalized in {"detailed", "detail", "long"}:
        return "detailed"

    raise ValueError("DETAIL_STYLE must be one of: 'summary', 'detailed'")


def clamp_slide_range(start: int | None, end: int | None, total_pages: int) -> tuple[int, int]:
    if start is None:
        start = 1
    if end is None:
        end = total_pages

    if start < 1 or end < 1 or start > end:
        raise ValueError(f"Invalid slide range: start={start}, end={end}")
    if start > total_pages:
        raise ValueError(f"SLIDE_START={start} exceeds total pages={total_pages}")
    end = min(end, total_pages)
    return start, end


def clear_dir_by_patterns(dir_path: Path, patterns: list[str]) -> None:
    if not dir_path.exists():
        return
    for pattern in patterns:
        for p in dir_path.glob(pattern):
            try:
                if p.is_file():
                    p.unlink()
            except OSError:
                pass


def safe_write_text_no_overwrite(path: Path, text: str, encoding: str = "utf-8") -> Path:
    """Write text but never overwrite an existing file.

    If the target exists with identical content, keep it.
    If it exists with different content, write a versioned sibling (e.g. __v001).
    """
    path.parent.mkdir(parents=True, exist_ok=True)

    if path.exists():
        try:
            existing = path.read_text(encoding=encoding)
            if existing == text:
                return path
        except OSError:
            pass

        stem = path.stem
        suffix = path.suffix
        for k in range(1, 1000):
            candidate = path.with_name(f"{stem}__v{k:03d}{suffix}")
            if not candidate.exists():
                candidate.write_text(text, encoding=encoding)
                return candidate

        raise RuntimeError(f"Too many versions for {path.name}")

    path.write_text(text, encoding=encoding)
    return path


def read_text_or_empty(path: Path, encoding: str = "utf-8") -> str:
    try:
        if path.exists() and path.is_file():
            return path.read_text(encoding=encoding)
    except OSError:
        return ""
    return ""


def render_pdf_pages_to_images_pymupdf(
    pdf_path: str,
    out_dir: str,
    dpi: int = 200,
    slide_start: int | None = None,
    slide_end: int | None = None,
    image_format: str = "jpg",
    jpg_quality: int = 92,
) -> list[Path]:
    """Render exactly one image per PDF page using PyMuPDF (fitz)."""
    try:
        import fitz  # PyMuPDF
    except Exception as exc:
        raise RuntimeError(
            "PyMuPDF is required for rendering. Install with: pip install pymupdf"
        ) from exc

    pdf_path = str(Path(pdf_path).resolve())
    out_dir_path = Path(out_dir).resolve()
    out_dir_path.mkdir(parents=True, exist_ok=True)

    if CLEAN_OUTPUT_DIRS:
        clear_dir_by_patterns(
            out_dir_path,
            [
                "slide_*.jpg",
                "slide_*.png",
                "page_*.jpg",
                "page_*.png",
                "Slide*.JPG",
                "Slide*.PNG",
            ],
        )

    doc = fitz.open(pdf_path)
    total_pages = doc.page_count
    start, end = clamp_slide_range(slide_start, slide_end, total_pages)

    zoom = dpi / 72.0
    matrix = fitz.Matrix(zoom, zoom)

    paths: list[Path] = []
    try:
        for page_no in range(start - 1, end):
            page = doc.load_page(page_no)
            pix = page.get_pixmap(matrix=matrix, alpha=False)

            out_path = out_dir_path / f"slide_{page_no + 1:03d}.{image_format.lower()}"
            if image_format.lower() == "png":
                pix.save(str(out_path))
            elif image_format.lower() == "jpg":
                pix.save(str(out_path), output="jpeg", jpg_quality=int(jpg_quality))
            else:
                raise ValueError("IMAGE_FORMAT must be 'jpg' or 'png'")

            paths.append(out_path)
    finally:
        doc.close()

    if not paths:
        raise RuntimeError("No images produced from PDF.")

    return paths


def get_pdf_page_texts_pymupdf(pdf_path: str, slide_start: int, slide_end: int) -> list[str]:
    """Extract text per PDF page via PyMuPDF. Often better than PyPDF2."""
    try:
        import fitz  # PyMuPDF
    except Exception as exc:
        raise RuntimeError(
            "PyMuPDF is required for text extraction. Install with: pip install pymupdf"
        ) from exc

    pdf_path = str(Path(pdf_path).resolve())
    doc = fitz.open(pdf_path)
    total_pages = doc.page_count
    start, end = clamp_slide_range(slide_start, slide_end, total_pages)

    texts: list[str] = []
    try:
        for page_no in range(start - 1, end):
            page = doc.load_page(page_no)
            text = page.get_text("text") or ""
            texts.append(clean_page_text(text))
    finally:
        doc.close()

    return texts


def ocr_image_text(image_path: Path, output_lang: str) -> str:
    """OCR an image with pytesseract. Requires the Tesseract binary installed on the machine."""
    try:
        import pytesseract
        from PIL import Image
    except Exception as exc:
        raise RuntimeError(
            "OCR dependencies missing. Install with: pip install pytesseract pillow"
        ) from exc

    lang_code = normalize_lang(output_lang)
    tesseract_lang = TESSERACT_LANG_MAP.get(lang_code, "eng")

    try:
        img = Image.open(image_path)
        text = pytesseract.image_to_string(img, lang=tesseract_lang) or ""
        return clean_page_text(text)
    except Exception as exc:
        raise RuntimeError(
            "OCR failed. Ensure Tesseract OCR is installed and available on PATH. "
            "On Windows you can install it and (optionally) set pytesseract.pytesseract.tesseract_cmd."
        ) from exc


def get_slide_texts_with_ocr_fallback(
    pdf_path: str,
    slide_images: list[Path],
    slide_start: int,
    slide_end: int,
    output_lang: str,
) -> list[str]:
    extracted = get_pdf_page_texts_pymupdf(pdf_path, slide_start=slide_start, slide_end=slide_end)

    # Safety: keep lengths aligned
    if len(extracted) < len(slide_images):
        extracted.extend([""] * (len(slide_images) - len(extracted)))
    elif len(extracted) > len(slide_images):
        extracted = extracted[: len(slide_images)]

    if not OCR_FALLBACK:
        return extracted

    out: list[str] = []
    for i, img_path in enumerate(slide_images):
        text = extracted[i]
        if len(text.strip()) >= OCR_MIN_TEXT_CHARS:
            out.append(text)
            continue

        # OCR fallback if text is empty/too short
        try:
            ocr_text = ocr_image_text(img_path, output_lang=output_lang)
        except Exception as exc:
            print(f"OCR warning for {img_path.name}: {exc}")
            ocr_text = ""

        out.append(ocr_text if len(ocr_text.strip()) > len(text.strip()) else text)

    return out


def _norm_compact(s: str) -> str:
    s = s or ""
    s = s.strip().lower()
    s = re.sub(r"[^a-z0-9]+", "", s)
    return s


def detect_slide_headline(slide_text: str) -> str | None:
    """Best-effort headline detection from extracted slide text (first non-bullet short line)."""
    if not isinstance(slide_text, str):
        return None

    lines = [ln.strip() for ln in slide_text.splitlines() if ln.strip()]
    if not lines:
        return None

    for ln in lines[:3]:
        if len(ln) > 90:
            continue
        if ln.startswith(("-", "•", "*")):
            continue
        if re.match(r"^\d+[.)]\s+", ln):
            continue
        if len(ln) < 4:
            continue
        return ln

    return None


def remove_headline_line(slide_text: str, headline: str | None) -> str:
    if not headline or not isinstance(slide_text, str):
        return slide_text or ""

    target = _norm_compact(headline)
    if not target:
        return slide_text

    out_lines: list[str] = []
    removed = False
    for ln in slide_text.splitlines():
        if not removed and _norm_compact(ln) == target:
            removed = True
            continue
        out_lines.append(ln)

    return clean_page_text("\n".join(out_lines))


def remove_headline_echo(script: str, headline: str | None) -> str:
    if not headline or not isinstance(script, str):
        return script or ""

    s = script.strip()
    # Drop a first line that is just the headline
    lines = s.splitlines()
    if lines and _norm_compact(lines[0]) == _norm_compact(headline):
        s = "\n".join(lines[1:]).lstrip()

    # Drop leading patterns like 'Headline: ...' or 'Headline - ...'
    s = re.sub(
        rf"(?is)^\s*{re.escape(headline)}\s*[:\-–—]+\s*",
        "",
        s,
    ).strip()

    return s


def dedupe_sentences(text: str) -> str:
    """Remove exact/near-exact sentence repetitions (helps with title rephrasing loops)."""
    if not isinstance(text, str):
        return ""

    t = re.sub(r"\s+", " ", text.strip())
    if not t:
        return ""

    parts = re.split(r"(?<=[.!?])\s+", t)
    seen: set[str] = set()
    kept: list[str] = []
    for p in parts:
        s = p.strip()
        if not s:
            continue
        key = _norm_compact(s)
        if not key or key in seen:
            continue
        seen.add(key)
        kept.append(s)

    return " ".join(kept).strip()


def generate_script(
    slide_text: str,
    output_lang: str,
    minutes_per_page: float,
    detail_style: str,
) -> str:
    """Summarize ONE slide into narration text, used for TTS/video (single slide only)."""
    if client is None:
        raise RuntimeError("OpenAI client not initialized (OPENAI_API_KEY missing).")

    lang_code = normalize_lang(output_lang)
    lang_name = LANG_MAP[lang_code]

    minutes_per_page = clamp_minutes(minutes_per_page)
    detail_style = normalize_detail_style(detail_style)
    max_words = max_words_for_minutes(minutes_per_page, wpm=SPEECH_WPM)

    slide_text = clean_page_text(slide_text)
    headline = detect_slide_headline(slide_text)
    slide_body = remove_headline_line(slide_text, headline)
    if not slide_body.strip():
        slide_body = slide_text
    if not slide_body.strip():
        slide_body = "(No extractable text found on this slide.)"

    if detail_style == "summary":
        style_hint = "Summarize the key message and keep it essential."
    else:
        style_hint = "Explain clearly with a bit more context."

    response = client.chat.completions.create(
        model="gpt-4o",
        temperature=0.2,
        messages=[
            {
                "role": "system",
                "content": (
                    "You are a professional narrator. "
                    "You will receive the extracted content for EXACTLY ONE slide (one PDF page). "
                    f"Write the speaker narration in {lang_name} ({lang_code}). "
                    "IMPORTANT: Output ONLY the narration for THIS single slide. "
                    "Do NOT include page/slide numbers, headings, or any meta text. "
                    "Avoid repetition: do NOT repeat the slide headline/title verbatim, and do NOT start the narration by restating the headline. "
                    "Do NOT say phrases like 'This slide is about ...' or 'The title is ...'. Start directly with the explanation. "
                    "Do NOT mention other slides/pages, and do NOT add 'next slide' or continuations. "
                    "Return plain narration text (full sentences), suitable for TTS. "
                    f"Target duration: about {minutes_per_page:g} minute(s). "
                    f"Hard limit: max {max_words} words. "
                    f"Style: {detail_style}. "
                    f"Guidance: {style_hint}"
                ),
            },
            {
                "role": "user",
                "content": (
                    "SLIDE HEADLINE (do NOT repeat verbatim in narration):\n"
                    + (headline or "(unknown)")
                    + "\n\nSLIDE BODY CONTENT (single slide only):\n"
                    + slide_body
                    + "\n\nReturn ONLY the narration text for this slide."
                ),
            },
        ],
    )

    script = response.choices[0].message.content or ""
    script = sanitize_script_output(script)
    script = remove_headline_echo(script, headline)
    script = dedupe_sentences(script)
    return truncate_to_word_limit(script, max_words=max_words)


def generate_audio(text: str, filepath: str) -> None:
    """Generate an audio file using OpenAI TTS."""
    if client is None:
        raise RuntimeError("OpenAI client not initialized (OPENAI_API_KEY missing).")

    with client.audio.speech.with_streaming_response.create(
        model=TTS_MODEL,
        voice=TTS_VOICE,
        input=text,
    ) as response:
        response.stream_to_file(filepath)


def process_workflow(
    output_lang: str = OUTPUT_LANG,
    minutes_per_page: float = TARGET_SPOKEN_MINUTES_PER_PAGE,
    detail_style: str = DETAIL_STYLE,
) -> None:
    if not Path(INPUT_PDF_PATH).exists():
        raise FileNotFoundError(f"PDF not found: {INPUT_PDF_PATH}")

    output_lang = normalize_lang(output_lang)
    minutes_per_page = clamp_minutes(minutes_per_page)
    detail_style = normalize_detail_style(detail_style)

    use_user_corrected = parse_yes_no(USE_USER_CORRECTED_TEXT, default=False)
    force_regen_llm = parse_yes_no(FORCE_REGENERATE_LLM_TEXT, default=False)
    create_user_templates = parse_yes_no(CREATE_USER_CORRECTION_TEMPLATES, default=True)

    pages_dir = (Path(TEMP_DIR) / "pages").resolve()
    scripts_llm_dir = (Path(TEMP_DIR) / LLM_SCRIPTS_SUBDIR).resolve()
    scripts_user_dir = (Path(TEMP_DIR) / USER_SCRIPTS_SUBDIR).resolve()
    scripts_used_dir = (Path(TEMP_DIR) / USED_SCRIPTS_SUBDIR).resolve()

    pages_dir.mkdir(parents=True, exist_ok=True)
    scripts_llm_dir.mkdir(parents=True, exist_ok=True)
    scripts_user_dir.mkdir(parents=True, exist_ok=True)
    scripts_used_dir.mkdir(parents=True, exist_ok=True)

    # Safe cleanup: only clear "used" scripts (never delete LLM originals or user edits)
    if CLEAN_OUTPUT_DIRS:
        clear_dir_by_patterns(scripts_used_dir, ["slide_*.txt"])

    slide_images = render_pdf_pages_to_images_pymupdf(
        INPUT_PDF_PATH,
        str(pages_dir),
        dpi=PDF_DPI,
        slide_start=SLIDE_START,
        slide_end=SLIDE_END,
        image_format=IMAGE_FORMAT,
        jpg_quality=IMAGE_QUALITY,
    )

    # Resolve the actual clamped range for text extraction
    try:
        import fitz

        doc = fitz.open(str(Path(INPUT_PDF_PATH).resolve()))
        total_pages = doc.page_count
        doc.close()
    except Exception:
        total_pages = None

    if total_pages is not None:
        start, end = clamp_slide_range(SLIDE_START, SLIDE_END, total_pages)
    else:
        start, end = SLIDE_START, (SLIDE_START + len(slide_images) - 1)

    slide_texts = get_slide_texts_with_ocr_fallback(
        INPUT_PDF_PATH,
        slide_images=slide_images,
        slide_start=start,
        slide_end=end,
        output_lang=output_lang,
    )

    clips = []

    if not USE_OPENAI:
        print(
            "Note: OPENAI_API_KEY is not set. Rendering a silent video ",
            f"({DEFAULT_PAGE_SECONDS}s per slide).",
        )
    else:
        max_words = max_words_for_minutes(minutes_per_page, wpm=SPEECH_WPM)
        print(
            f"Output language: {output_lang} ({LANG_MAP[output_lang]}), ",
            f"target: {minutes_per_page:g} min/slide (~{max_words} words max), ",
            f"style: {detail_style}, ",
            f"use_user_corrected_text: {use_user_corrected}",
        )
        llm_folder_short = (Path(TEMP_DIR) / LLM_SCRIPTS_SUBDIR).as_posix()
        user_folder_short = (Path(TEMP_DIR) / USER_SCRIPTS_SUBDIR).as_posix()
        print(f"LLM originals folder: {llm_folder_short}")
        print(f"User-corrected folder: {user_folder_short}")

    for i, img_path in enumerate(slide_images):
        slide_number = start + i
        print(f"Processing slide {slide_number} ({i + 1}/{len(slide_images)})...")

        if USE_OPENAI:
            llm_script_target = scripts_llm_dir / f"slide_{slide_number:03d}.txt"
            user_script_path = scripts_user_dir / f"slide_{slide_number:03d}.txt"
            used_script_path = scripts_used_dir / f"slide_{slide_number:03d}.txt"

            user_text = read_text_or_empty(user_script_path)
            if use_user_corrected and user_text.strip():
                script = user_text.strip()
                script_source = "USER_CORRECTED"
            else:
                existing_llm = read_text_or_empty(llm_script_target)
                if existing_llm.strip() and not force_regen_llm:
                    script = existing_llm.strip()
                    script_source = "LLM_ORIGINAL(existing)"
                else:
                    script = generate_script(
                        slide_texts[i],
                        output_lang,
                        minutes_per_page,
                        detail_style,
                    )
                    saved_path = safe_write_text_no_overwrite(llm_script_target, script)
                    if saved_path != llm_script_target:
                        print(f"  LLM original preserved (versioned): {saved_path.name}")
                    script_source = "LLM_ORIGINAL(new)"

                if use_user_corrected and create_user_templates and not user_script_path.exists():
                    try:
                        user_script_path.write_text(script, encoding="utf-8")
                        print(f"  Created user-correction template: {user_script_path.name}")
                    except OSError as exc:
                        print(f"  Warning: could not create user template: {exc}")

            if SAVE_SLIDE_TEXT:
                used_script_path.write_text(script, encoding="utf-8")

            print(f"  Script source: {script_source}")

            audio_path = str((Path(TEMP_DIR) / f"audio_{slide_number:03d}.mp3").resolve())
            generate_audio(script, audio_path)

            audio_clip = AudioFileClip(audio_path)
            duration = audio_clip.duration
        else:
            duration = DEFAULT_PAGE_SECONDS
            audio_clip = None

        # This is the slide image shown in the final video
        slide_clip = ImageClip(str(img_path)).with_duration(duration)
        if audio_clip is not None:
            slide_clip = slide_clip.with_audio(audio_clip)

        clips.append(slide_clip)

    print("Rendering video...")
    final_video = concatenate_videoclips(clips, method="compose")
    final_video.write_videofile(
        OUTPUT_VIDEO,
        fps=VIDEO_FPS,
        codec=VIDEO_CODEC,
        audio_codec=VIDEO_AUDIO_CODEC,
        ffmpeg_params=VIDEO_FFMPEG_PARAMS,
    )
    print("Done.")


## Run workflow

In [None]:
# Run complete workflow
if RUN_WORKFLOW:
    process_workflow()