In [None]:
import subprocess, textwrap, operator, base64
from langgraph.graph import END, START, StateGraph
from langgraph.types import Send
from langchain.chat_models import init_chat_model
from typing import TypedDict
from openai import OpenAI
from typing_extensions import Annotated

llm = init_chat_model("openai:gpt-5-nano")


class State(TypedDict):
    video_file: str
    audio_file: str
    transcription: str
    summaries: Annotated[list[str], operator.add]
    thumbnail_prompts: Annotated[list[str], operator.add]
    thumbnail_sketches: Annotated[list[str], operator.add]
    final_summary: str

In [None]:
def extract_audio(state: State):
    output_file = state["video_file"].replace("mp4", "mp3")
    command = [
        "ffmpeg",
        "-i",
        state["video_file"],
        "-filter:a",
        "atempo=2.0",
        "-y",
        output_file
    ]
    subprocess.run(command)
    return {
        "audio_file": output_file
    }

def transcribe_audio(state: State):
    client = OpenAI()
    with open(state["audio_file"], "rb") as audio_file:
        transcription = client.audio.transcriptions.create(
            model="whisper-1",
            response_format="text",
            file=audio_file,
            # language="en",
            # prompt="to give the model some hints for audio file"
        )
        return {
            "transcription": transcription
        }

def dispatch_summarizers(state: State):
    transcription = state["transcription"]
    chunks = []
    for i, chunk in enumerate(textwrap.wrap(transcription, 500)):
        chunks.append({"id": i + 1, "chunk": chunk})
    return [Send("summarize_chunk", chunk) for chunk in chunks]

def summarize_chunk(chunk_item):
    chunk_id = chunk_item["id"]
    chunk = chunk_item["chunk"]
    # print(f"Summarizing chunk id: {chunk_id} chunk: {chunk[:100]}\n\n====\n\n")

    response = llm.invoke(
        f"""
        Please summarize the following text.

        Text: {chunk}
        """
    )
    summary = f"[Chunk {chunk_id}] {response.content}"
    return {
        "summaries": [summary]
    }

def mega_summary(state: State):
    all_summaries = "\n".join(state["summaries"])
    prompt = f"""
        You are given multiple summaries of different chunks from a video transcription.

        Please create a comprehensive final summary that combines all the key points.

        Individual summaries:

        {all_summaries}
    """
    response = llm.invoke(prompt)

    return {
        "final_summary": response.content
    }

def dispatch_artists(state: State):
    return [
        Send("generate_thumbnails", {"id": i, "summary": state["final_summary"]}) for i in [1, 2, 3]
    ]

def generate_thumbnails(args):
    concept_id = args["id"]
    summary = args["summary"]
    prompt = f"""
        Based on this video summary, create a detailed visual prompt for a YouTube thumbnail.

        Create a detailed prompt for generating a thumbnail image that would attract viewers. Include:
            - Main visual elements
            - Color scheme
            - Text overlay suggestions
            - Overall composition

        Summary: {summary}
    """
    response = llm.invoke(prompt)

    thumbnail_prompt = response.content
    
    client = OpenAI()

    result = client.images.generate(
        model="gpt-image-1",
        prompt=thumbnail_prompt,
        quality="low",
        moderation="low",
        size="auto"
    )

    image_bytes = base64.b64decode(result.data[0].b64_json)

    filename = f"thumbnail_{concept_id}.jpg"

    with open(filename, "wb") as file:
        file.write(image_bytes)
    
    return {
        "thumbnail_prompts": [thumbnail_prompt],
        "thumbnail_sketches": [filename]
    }



In [None]:
graph_builder = StateGraph(State)

graph_builder.add_node("extract_audio", extract_audio)
graph_builder.add_node("transcribe_audio", transcribe_audio)
graph_builder.add_node("summarize_chunk", summarize_chunk)
graph_builder.add_node("mega_summary", mega_summary)
graph_builder.add_node("generate_thumbnails", generate_thumbnails)
graph_builder.add_node("dispatch_summarizers", dispatch_summarizers)

graph_builder.add_edge(START, "extract_audio")
graph_builder.add_edge("extract_audio", "transcribe_audio")
graph_builder.add_conditional_edges("transcribe_audio", dispatch_summarizers, ["summarize_chunk"])
graph_builder.add_edge("summarize_chunk", "mega_summary")
graph_builder.add_conditional_edges("mega_summary", dispatch_artists, ["dispatch_summarizers"])
graph_builder.add_edge("dispatch_summarizers", END)

graph = graph_builder.compile()

In [None]:
graph.invoke({"video_file": "interview.mp4"})