In [1]:
from dotenv import load_dotenv
load_dotenv()

True

In [2]:
import os
import json
from langchain_groq import ChatGroq
from langchain.tools import tool
from pydantic import BaseModel, Field
from typing import List, Dict, TypedDict, Set, Union

## Planner agent

In [3]:
prompt = open("../prompts/planner.md").read()

# planner_prompt = SystemMessage("""
# You are a specialized agent whose job is to generate a plan for a 45-60 second educational video on a topic that the user would like to understand.
# Your job is to plan out the entire structure of the video, along with detailing 
# """)

In [4]:
class PlannerOutput(BaseModel):
    title: str = Field(..., description="The title of the video.")
    video_plan: str = Field(..., description="The plan of the entire video.")


planner = ChatGroq(
    name="content_planner",
    model="openai/gpt-oss-120b",
    disable_streaming=True,
)
# planner.with_structured_output(PlannerOutput)

In [5]:
user = input("What topic would you like to understand?: ")
plan = planner.invoke([
    ("system", prompt),
    ("human", f"I would like to understand {user}")
]).content

plan

'{\n  "project_title": "Seeing Pythagoras",\n  "target_audience": "High School Math Students",\n  "estimated_duration_seconds": 70,\n  "visual_hook": "A right‑angled triangle appears, then two separate squares sprout from its legs and a larger square from the hypotenuse, inviting the viewer to wonder how they fit together.",\n  "core_insight": "The combined area of the two squares on the legs exactly equals the area of the square on the hypotenuse (a²\u202f+\u202fb²\u202f=\u202fc²).",\n  "sections": [\n    {\n      "id": 1,\n      "section_type": "Intro",\n      "key_point": "What does a²\u202f+\u202fb²\u202f=\u202fc² look like in space?",\n      "visual_idea": "Show the equation fading in, then a right triangle draws itself; squares pop out of each side, each labeled a², b², and c²."\n    },\n    {\n      "id": 2,\n      "section_type": "Explanation",\n      "key_point": "If we cut the two smaller squares into pieces, they can be reassembled perfectly inside the big square.",\n      "

In [6]:
json.loads(plan)

{'project_title': 'Seeing Pythagoras',
 'target_audience': 'High School Math Students',
 'estimated_duration_seconds': 70,
 'visual_hook': 'A right‑angled triangle appears, then two separate squares sprout from its legs and a larger square from the hypotenuse, inviting the viewer to wonder how they fit together.',
 'core_insight': 'The combined area of the two squares on the legs exactly equals the area of the square on the hypotenuse (a²\u202f+\u202fb²\u202f=\u202fc²).',
 'sections': [{'id': 1,
   'section_type': 'Intro',
   'key_point': 'What does a²\u202f+\u202fb²\u202f=\u202fc² look like in space?',
   'visual_idea': 'Show the equation fading in, then a right triangle draws itself; squares pop out of each side, each labeled a², b², and c².'},
  {'id': 2,
   'section_type': 'Explanation',
   'key_point': 'If we cut the two smaller squares into pieces, they can be reassembled perfectly inside the big square.',
   'visual_idea': 'Animate the squares on the legs breaking into four cong

## Script writer

In [7]:
from tavily import TavilyClient

tavily = TavilyClient()

@tool
def web_search_tool(query: str, **kwargs):
    """
        The function definition for a web search tool
    
    :param query: The query that you want to search
    :type query: str
    
    :param kwargs: Possible key value pairs for the web search tool. These are the key value pairs you can utilize (extracted from the docstrings for the function we're using):
        search_depth: Literal['basic', 'advanced'] = None,
        topic: Literal['general', 'news', 'finance'] = None,
        time_range: Literal['day', 'week', 'month', 'year'] = None,
        start_date: str = None,
        end_date: str = None,
        days: int = None,
        max_results: int = None,
        include_domains: Sequence[str] = None,
        exclude_domains: Sequence[str] = None,
        include_answer: bool | Literal['basic', 'advanced'] = None,
        include_raw_content: bool | Literal['markdown', 'text'] = None
    """
    response = tavily.search(query=query, **kwargs)
    
    return response

In [8]:
writer_prompt = open('../prompts/writer.md').read()

In [9]:
from langchain.agents import create_agent
from langgraph.store.memory import InMemoryStore
from langchain.agents.structured_output import ToolStrategy

class WriterResponse(BaseModel):
    script_title: str
    total_estimated_duration: float
    blocks: List[Dict[str, str | int | float]]

agent = create_agent(
    # model="groq:moonshotai/kimi-k2-instruct-0905",
    model="groq:openai/gpt-oss-120b",
    tools=[web_search_tool, WriterResponse],
    system_prompt=writer_prompt,
    response_format=ToolStrategy(WriterResponse, tool_message_content="This tool is called by calling the 'WriterResponse' tool, in this exact format"),
    store=InMemoryStore(),
)

stream = agent.invoke({
    "messages": [
        ("human", json.dumps(json.loads(plan)))
    ]
})

In [10]:
stream

{'messages': [HumanMessage(content='{"project_title": "Seeing Pythagoras", "target_audience": "High School Math Students", "estimated_duration_seconds": 70, "visual_hook": "A right\\u2011angled triangle appears, then two separate squares sprout from its legs and a larger square from the hypotenuse, inviting the viewer to wonder how they fit together.", "core_insight": "The combined area of the two squares on the legs exactly equals the area of the square on the hypotenuse (a\\u00b2\\u202f+\\u202fb\\u00b2\\u202f=\\u202fc\\u00b2).", "sections": [{"id": 1, "section_type": "Intro", "key_point": "What does a\\u00b2\\u202f+\\u202fb\\u00b2\\u202f=\\u202fc\\u00b2 look like in space?", "visual_idea": "Show the equation fading in, then a right triangle draws itself; squares pop out of each side, each labeled a\\u00b2, b\\u00b2, and c\\u00b2."}, {"id": 2, "section_type": "Explanation", "key_point": "If we cut the two smaller squares into pieces, they can be reassembled perfectly inside the big sq

In [11]:
script_plan = stream['structured_response'].model_dump()
script_plan

{'script_title': 'Seeing Pythagoras',
 'total_estimated_duration': 58.6,
 'blocks': [{'end_time': 4.4,
   'estimated_duration': 4.4,
   'id': 1,
   'section_type': 'Hook',
   'start_time': 0,
   'text': 'Look at a right‑angled triangle, its two legs and the hypotenuse.',
   'visual_note_for_animator': 'Right‑angled triangle draws itself, outline appears.',
   'word_count': 11},
  {'end_time': 10,
   'estimated_duration': 5.6,
   'id': 2,
   'section_type': 'Hook',
   'start_time': 4.4,
   'text': 'Now squares sprout from each side, labeled a squared, b squared, and c squared.',
   'visual_note_for_animator': 'Squares pop out from each side of the triangle, each bearing the label a², b², c².',
   'word_count': 14},
  {'end_time': 23,
   'estimated_duration': 13,
   'id': 3,
   'section_type': 'Intro',
   'start_time': 10,
   'text': 'The equation a squared plus b squared equals c squared is just the statement that the total area of the two small squares matches the area of the big one.'

## Animation Director

In [12]:
animation_director_prompt = open("../prompts/animation_director.md").read()
animation_director_prompt

'# ROLE\nYou are the Animation Director. Your goal is to synthesize a "Video Blueprint" and a "Spoken Script" into a precise, object-oriented storyboard for the `Manim` Python library.\n\n# INPUT DATA\nYou will receive two JSON objects:\n1. `BLUEPRINT`: Contains the `visual_hook`, `core_insight`, and high-level `visual_idea` for each section.\n2. `SCRIPT`: Contains the verbatim spoken text, `word_count`, and precise `estimated_duration` for each block.\n\n# OBJECTIVE\nMerge the *Visual Intuition* from the Blueprint with the *Timing Constraints* of the Script to create a master **Scene Registry**.\n\n# STRATEGY: CROSS-REFERENCING\nThe Script tells you *when* things happen. The Blueprint tells you *what* the metaphor is.\n* **If the Script says:** "As we see here, the area grows..."\n* **And the Blueprint says:** "Visual Idea: Show a square turning into a larger grid."\n* **Your Job:** Combine them -> "Create a Square, then use Transform() to turn it into a NumberPlane."\n\n# CRITICAL CO

In [13]:
anim_dir_input = """
---- BLUEPRINT ----
{}
---- SCRIPT ----
{}
"""

In [14]:
from langchain.messages import HumanMessage, SystemMessage

class AnimationDirectorResponse(BaseModel):
    scenes: List[Dict[str, dict | str | int]]

anim_dir_agent = ChatGroq(
    name="animation_director",
    model="moonshotai/kimi-k2-instruct-0905",
    disable_streaming=True,
)
# anim_dir_agent = anim_dir_agent.with_structured_output(AnimationDirectorResponse)

animation_plan = anim_dir_agent.invoke([
    SystemMessage(animation_director_prompt),
    HumanMessage(anim_dir_input.format(plan, script_plan))
])

animation_plan

AIMessage(content='```json\n{\n  "scenes": [\n    {\n      "id": 1,\n      "related_script_id": 1,\n      "timing": {\n        "duration": 4.4,\n        "start_at": 0.0\n      },\n      "description": "Draw the right-angled triangle with three sides",\n      "objects_in_scene": [\n        {"name": "triangle", "type": "Polygon", "color": "WHITE", "pos": "ORIGIN", "points": "[[-1.5,-1],[0,1.5],[1.5,-1]]"}\n      ]\n    },\n    {\n      "id": 2,\n      "related_script_id": 2,\n      "timing": {\n        "duration": 5.6,\n        "start_at": 4.4\n      },\n      "description": "Squares pop out of each triangle side, labeled a², b², c²",\n      "objects_in_scene": [\n        {"name": "sq_a", "type": "Square", "color": "#F72585", "pos": "LEFT*2.5", "label": "a²"},\n        {"name": "sq_b", "type": "Square", "color": "#3A86FF", "pos": "DOWN*2.5", "label": "b²"},\n        {"name": "sq_c", "type": "Square", "color": "#FFD60A", "pos": "UP*2.5 + RIGHT*0.5", "label": "c²"}\n      ]\n    },\n    {\

In [15]:
# storyboard = animation_plan['messages'][-1].content
storyboard = animation_plan.content
storyboard = json.loads(storyboard[8:-3]) if str(storyboard).startswith("```json") else json.loads(storyboard)
storyboard

{'scenes': [{'id': 1,
   'related_script_id': 1,
   'timing': {'duration': 4.4, 'start_at': 0.0},
   'description': 'Draw the right-angled triangle with three sides',
   'objects_in_scene': [{'name': 'triangle',
     'type': 'Polygon',
     'color': 'WHITE',
     'pos': 'ORIGIN',
     'points': '[[-1.5,-1],[0,1.5],[1.5,-1]]'}]},
  {'id': 2,
   'related_script_id': 2,
   'timing': {'duration': 5.6, 'start_at': 4.4},
   'description': 'Squares pop out of each triangle side, labeled a², b², c²',
   'objects_in_scene': [{'name': 'sq_a',
     'type': 'Square',
     'color': '#F72585',
     'pos': 'LEFT*2.5',
     'label': 'a²'},
    {'name': 'sq_b',
     'type': 'Square',
     'color': '#3A86FF',
     'pos': 'DOWN*2.5',
     'label': 'b²'},
    {'name': 'sq_c',
     'type': 'Square',
     'color': '#FFD60A',
     'pos': 'UP*2.5 + RIGHT*0.5',
     'label': 'c²'}]},
  {'id': 3,
   'related_script_id': 3,
   'timing': {'duration': 13.0, 'start_at': 10.0},
   'description': 'Equation a² + b² = 

## Animator

In [16]:
animator_prompt = open("../prompts/animator.md").read()
print(animator_prompt)

# ROLE
You are the Lead Manim Developer. Your task is to write a complete, error-free Python script using the `manim` library (Community Edition v0.18+) based on a provided Storyboard.

# INPUT DATA
You will receive a JSON object called "Storyboard" containing a list of `scenes`.
Each item in the list represents a segment of the video and contains:
1. `description` (Context for comments)
2. `objects_in_scene` (Variables to define)
3. `animations` (Actions to perform using `self.play`)
4. `timing` (Duration constraints)

# CODING STANDARDS (STRICT)
1.  **Structure:** Generate a SINGLE class named `VideoScene` inheriting from `Scene`.
2.  **Method:** Write all code inside the `def construct(self):` method. Do NOT split into multiple methods (this avoids variable scope errors).
3.  **Imports:** Start with `from manim import *`.
4.  **Audio Sync:** You must assume the audio will be overlaid later. Your job is to ensure the visual timing matches the `timing.duration` from the JSON.

# MANIM

In [37]:
class AnimatorResponse(BaseModel):
    code: str

anim_agent = create_agent(
    name="animator",
    model="groq:moonshotai/kimi-k2-instruct-0905",
    system_prompt=animator_prompt,
    response_format=ToolStrategy(AnimatorResponse)
)

anim_input = """
---- STORYBOARD ----
{}
"""

code = anim_agent.invoke({"messages": [
    HumanMessage(anim_input.format(storyboard))
]})
code

BadRequestError: Error code: 400 - {'error': {'message': "Failed to call a function. Please adjust your prompt. See 'failed_generation' for more details.", 'type': 'invalid_request_error', 'code': 'tool_use_failed', 'failed_generation': '{\n  "code": """\nfrom manim import *\n\nclass VideoScene(Scene):\n    def construct(self):\n        # --- SCENE 1: Draw the right-angled triangle with three sides ---\n        triangle = Polygon(\n            [-1.5, -1, 0],\n            [0, 1.5, 0],\n            [1.5, -1, 0],\n            color=WHITE\n        ).move_to(ORIGIN)\n        self.play(Create(triangle), run_time=4.4)\n        \n        # --- SCENE 2: Squares pop out of each triangle side, labeled a², b², c² ---\n        sq_a = Square(color="#F72585").scale(0.8).move_to(LEFT * 2.5)\n        label_a = MathTex("a^2", color="#F72585").next_to(sq_a, DOWN, buff=0.2)\n        \n        sq_b = Square(color="#3A86FF").scale(0.8).move_to(DOWN * 2.5)\n        label_b = MathTex("b^2", color="#3A86FF").next_to(sq_b, LEFT, buff=0.2)\n        \n        sq_c = Square(color="#FFD60A").scale(0.8).move_to(UP * 2.5 + RIGHT * 0.5)\n        label_c = MathTex("c^2", color="#FFD60A").next_to(sq_c, RIGHT, buff=0.2)\n        \n        self.play(\n            FadeIn(sq_a),\n            FadeIn(label_a),\n            FadeIn(sq_b),\n            FadeIn(label_b),\n            FadeIn(sq_c),\n            FadeIn(label_c),\n            run_time=5.6\n        )\n        \n        # --- SCENE 3: Equation a² + b² = c² fades in over the scene ---\n        eq = MathTex("a^2 + b^2 = c^2", color=WHITE).move_to(UP * 3.5)\n        self.play(FadeIn(eq), run_time=13.0)\n        \n        # --- SCENE 4: Camera zooms slightly toward the equation ---\n        self.play(\n            self.camera.frame.animate.scale(1.2),\n            run_time=2.8\n        )\n        \n        # --- SCENE 5: Split small squares into four right-triangle pieces each and glide them into the large square ---\n        # Create 4 right triangles for sq_a\n        pieces_a = VGroup()\n        for i in range(2):\n            for j in range(2):\n                tri = Polygon(\n                    [-0.5, -0.5, 0],\n                    [0.5, -0.5, 0],\n                    [-0.5, 0.5, 0],\n                    color="#F72585"\n                ).scale(0.3).move_to(LEFT * 2.5 + np.array([i * 0.6 - 0.3, j * 0.6 - 0.3, 0]))\n                pieces_a.add(tri)\n        \n        # Create 4 right triangles for sq_b\n        pieces_b = VGroup()\n        for i in range(2):\n            for j in range(2):\n                tri = Polygon(\n                    [-0.5, -0.5, 0],\n                    [0.5, -0.5, 0],\n                    [-0.5, 0.5, 0],\n                    color="#3A86FF"\n                ).scale(0.3).move_to(DOWN * 2.5 + np.array([i * 0.6 - 0.3, j * 0.6 - 0.3, 0]))\n                pieces_b.add(tri)\n        \n        self.play(\n            FadeOut(sq_a),\n            FadeOut(sq_b),\n            FadeIn(pieces_a),\n            FadeIn(pieces_b),\n            run_time=11.0\n        )\n        \n        # --- SCENE 6: Pieces snap into place inside the large square with matching colors ---\n        target_center = sq_c.get_center()\n        pieces_a.generate_target()\n        for piece in pieces_a:\n            piece.target.move_to(target_center + np.array([0.3, 0.3, 0]))\n        pieces_b.generate_target()\n        for piece in pieces_b:\n            piece.target.move_to(target_center + np.array([-0.3, -0.3, 0]))\n        \n        self.play(\n            MoveToTarget(pieces_a),\n            MoveToTarget(pieces_b),\n            run_time=5.6\n        )\n        \n        # --- SCENE 7: Large square briefly shines ---\n        self.play(\n            sq_c.animate.set_fill("#FFD60A", opacity=0.5),\n            run_time=2.8\n        )\n        self.play(\n            sq_c.animate.set_fill(opacity=0),\n            run_time=0.2\n        )\n        \n        # --- SCENE 8: Zoom out and fade the small squares, leaving only the equation ---\n        self.play(\n            self.camera.frame.animate.scale(0.8),\n            FadeOut(pieces_a),\n            FadeOut(pieces_b),\n            run_time=6.2\n        )\n        \n        # Move equation to center\n        eq.generate_target()\n        eq.target.move_to(ORIGIN)\n        self.play(MoveToTarget(eq), run_time=0.5)\n        \n        # --- SCENE 9: Hold on the equation with a subtle glow fade-out ---\n        self.play(\n            eq.animate.set_fill(WHITE, opacity=1).scale(1.1),\n            run_time=3.6\n        )\n        self.play(\n            eq.animate.scale(1 / 1.1),\n            run_time=3.6\n        )\n  """\n}'}}

In [20]:
print(code['structured_response'].code)

from manim import *

class VideoScene(Scene):
    def construct(self):
        # --- SCENE 1: Draw the right-angled triangle with three sides ---
        try:
            triangle
        except NameError:
            triangle = Polygon([[-1.5, -1, 0], [0, 1.5, 0], [1.5, -1, 0]])
            triangle.set_color(WHITE)
            triangle.move_to(ORIGIN)
        self.play(Create(triangle), run_time=4.4)

        # --- SCENE 2: Squares pop out of each triangle side, labeled a², b², c² ---
        try:
            sq_a
        except NameError:
            sq_a = Square()
            sq_a.set_color('#F72585')
            sq_a.move_to(LEFT * 2.5)
        try:
            sq_b
        except NameError:
            sq_b = Square()
            sq_b.set_color('#3A86FF')
            sq_b.move_to(DOWN * 2.5)
        try:
            sq_c
        except NameError:
            sq_c = Square()
            sq_c.set_color('#FFD60A')
            sq_c.move_to(UP * 2.5 + RIGHT * 0.5)
        self.play(C

## Executing Manim code (using old script)

In [21]:
code = code['structured_response']
code

AnimatorResponse(code="from manim import *\n\nclass VideoScene(Scene):\n    def construct(self):\n        # --- SCENE 1: Draw the right-angled triangle with three sides ---\n        try:\n            triangle\n        except NameError:\n            triangle = Polygon([[-1.5, -1, 0], [0, 1.5, 0], [1.5, -1, 0]])\n            triangle.set_color(WHITE)\n            triangle.move_to(ORIGIN)\n        self.play(Create(triangle), run_time=4.4)\n\n        # --- SCENE 2: Squares pop out of each triangle side, labeled a², b², c² ---\n        try:\n            sq_a\n        except NameError:\n            sq_a = Square()\n            sq_a.set_color('#F72585')\n            sq_a.move_to(LEFT * 2.5)\n        try:\n            sq_b\n        except NameError:\n            sq_b = Square()\n            sq_b.set_color('#3A86FF')\n            sq_b.move_to(DOWN * 2.5)\n        try:\n            sq_c\n        except NameError:\n            sq_c = Square()\n            sq_c.set_color('#FFD60A')\n            sq

In [22]:
import subprocess
import tempfile

cwd = os.path.abspath(os.path.curdir) + "/tmp/"
os.makedirs(cwd, exist_ok=True)

with tempfile.TemporaryFile(mode="w+", suffix=".py", dir=cwd, delete=False) as file:
    file.write(rf'{code.code}')
    
res = subprocess.run(["manim", "-p", file.name], cwd=cwd, capture_output=True)
os.remove(file.name)

In [23]:
print((res.stderr).decode())

┌───────────────────── Traceback (most recent call last) ─────────────────────┐
│ c:\Users\Utkarsh\OneDrive\Documents\GitHub\visual-explainer\tests\tmp\tmpm2 │
│ iwzdyh.py:7 in construct                                                    │
└─────────────────────────────────────────────────────────────────────────────┘
UnboundLocalError: local variable 'triangle' referenced before assignment

During handling of the above exception, another exception occurred:

┌───────────────────── Traceback (most recent call last) ─────────────────────┐
│ C:\Users\Utkarsh\OneDrive\Documents\GitHub\visual-explainer\.venv\lib\site- │
│ packages\manim\cli\render\commands.py:125 in render                         │
│                                                                             │
│   122 │   │   │   try:                                                      │
│   123 │   │   │   │   with tempconfig({}):                                  │
│   124 │   │   │   │   │   scene = SceneClass()        

In [32]:
from langchain_core.messages import BaseMessage, AIMessage

def generate_code(agent, history: List[BaseMessage], error, retries=0):
    if error:
        history.append(HumanMessage(f"Error received in previous execution: {error}"))
    
    response = agent.invoke({"messages": history}).code
    
    print("Code generated successfully.")
    print(response, end=f"{'='*50}\n\n")
    
    history.append(AIMessage(content=response))
    return history, response


def execute_manim_code(code):
    cwd = os.path.abspath(os.path.curdir) + "/tmp/"
    os.makedirs(cwd, exist_ok=True)

    with tempfile.TemporaryFile(mode="w+", suffix=".py", dir=cwd, delete=False) as file:
        file.write(code)
        
    res = subprocess.run(["manim", "-p", file.name], cwd=cwd, capture_output=True)
    os.remove(file.name)
    
    if res.stderr.decode() != "":
        print("Code executed perfectly", end=f"{'='*50}\n\n")
        return "Executed"
    else:
        print("Code did not execute. Error - \n", res.stderr.decode(), end=f"{'='*50}\n\n")
        return f"Error: {res.stderr.decode()}"

In [None]:
messages = [
    SystemMessage(animator_prompt),
    HumanMessage(anim_input)
]

res = ""
while res != "Executed":
    messages, code = generate_code(anim_agent, messages, res)
    print("Code generated!!")
    res = execute_manim_code(code)

BadRequestError: Error code: 400 - {'error': {'message': 'Tool choice is required, but model did not call a tool', 'type': 'invalid_request_error', 'code': 'tool_use_failed', 'failed_generation': '{\n    "code": "from manim import *\\n\\nclass VideoScene(Scene):\\n    def construct(self):\\n        pass\\n"\n}'}}

In [None]:
import glob
from IPython.display import Video, display
import shutil

save_dir = os.path.abspath(os.path.curdir) + "/videos/"
os.makedirs(save_dir, exist_ok=True)

mp4_files = glob.glob("**/*.mp4", root_dir=cwd, recursive=True)
if mp4_files:
    display(Video(filename=os.path.join(cwd, mp4_files[0]), embed=True))
    shutil.move(os.path.join(cwd, mp4_files[0]), os.path.join(save_dir, mp4_files[0].split("\\")[-1]))
else:
    print("No .mp4 files found.")