## setup paths + load prompts

In [None]:
import json
import os
from pathlib import Path
from datetime import datetime

import pandas as pd

PROMPTS_PATH = Path("prompts/prompts_v1.json")
OUTPUTS_DIR = Path("outputs")
OUTPUTS_DIR.mkdir(exist_ok=True)

prompts_cfg = json.loads(PROMPTS_PATH.read_text(encoding="utf-8"))
domain = prompts_cfg["domain"]
formulations = prompts_cfg["formulations"]
items = prompts_cfg["items"]

domain, list(formulations.keys()), len(items)


## Gemini client “hello” check 

In [None]:
from dotenv import load_dotenv
load_dotenv()

GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
GEMINI_MODEL = os.getenv("GEMINI_MODEL", "gemini-2.5-flash")

assert GEMINI_API_KEY, "Missing GEMINI_API_KEY in your .env"

from google import genai
gemini_client = genai.Client(api_key=GEMINI_API_KEY)

def call_gemini(prompt: str) -> str:
    resp = gemini_client.models.generate_content(
        model=GEMINI_MODEL,
        contents=prompt
    )
    return (resp.text or "").strip()


## Second model via Ollama (local)

In [None]:
import subprocess

def read_code(path: Path) -> str:
    return path.read_text(encoding="utf-8", errors="replace")

def run_python_file(path: Path) -> dict:
    """
    Executes a Python file in a subprocess and captures stdout/stderr.
    Returns a dict with exit_code, stdout, stderr (traceback usually in stderr).
    """
    result = subprocess.run(
        ["python", str(path)],
        capture_output=True,
        text=True
    )
    return {
        "exit_code": result.returncode,
        "stdout": result.stdout,
        "stderr": result.stderr
    }


## Prompt builder

In [None]:
def build_prompt(code_snippet: str, traceback_text: str, stdout_text: str, exit_code: int) -> str:
    return f"""
You are a teaching assistant for beginner Python students.

You must use ONLY the provided traceback/output.
Do NOT guess the error type, do NOT invent line numbers, and do NOT invent code that is not shown.
If information is missing, state what is missing and what to do next.

Hard rules:
- Do NOT provide full code solutions.
- Do NOT output code blocks or fenced code (no ```).
- Do NOT write a corrected version of the program.

Inputs:
Exit code: {exit_code}

Stdout:
{stdout_text}

Stderr (traceback):
{traceback_text}

Student code:
{code_snippet}

Output format (use headings exactly):
## Error summary
## What it means
## Why it happened here
## Conceptual fix
## Debugging questions
""".strip()


## Detect and sanitize code-like output

In [None]:
import re

CODE_FENCE_RE = re.compile(r"```")
INLINE_CODE_RE = re.compile(r"`[^`]+`")  # inline backticks
CODEY_LINE_RE = re.compile(
    r"^\s*(def |class |import |from |for |while |try:|except |return\b|raise\b|with |print\()",
    re.MULTILINE
)

def sanitize_llm_output(text: str) -> str:
    """
    Enforces: explanation-only, no code blocks, no code-like lines.
    If the model output contains code markers, replace with a safe fallback.
    """
    if not text:
        text = ""

    # Normalize a bit (avoid weird spacing making detection harder)
    cleaned = text.strip()

    # Hard stops: fenced code blocks or code-like lines
    if CODE_FENCE_RE.search(cleaned) or CODEY_LINE_RE.search(cleaned):
        return fallback_explanation()

    # Inline backticks often introduce code fragments; remove them.
    cleaned = INLINE_CODE_RE.sub(lambda m: m.group(0).replace("`", ""), cleaned)

    return cleaned


def fallback_explanation() -> str:
    """
    Generic, constraint-safe explanation to use when the model output violates rules.
    """
    return (
        "## Error summary\n"
        "I can’t include code or code-like output. Below is a conceptual explanation.\n\n"
        "## What it means\n"
        "Python encountered something it could not parse or execute, and stopped.\n\n"
        "## Why it happened here\n"
        "Use the last lines of the traceback to find the exact file and line where execution failed.\n"
        "That line (or the line just before it) typically contains the root cause.\n\n"
        "## Conceptual fix\n"
        "- Identify the object mentioned in the error (variable, function, index, type).\n"
        "- Check it exists before use, and that it has the expected type/value.\n"
        "- If the error mentions an index/key, confirm the container actually contains it.\n"
        "- Make one small change at a time, then re-run to validate.\n\n"
        "## What to check next\n"
        "- Which exact line number is referenced last in the traceback?\n"
        "- What are the types/values of the variables used on that line?\n"
        "- What assumption did the code make that might be false?\n"
    )


## Ask Gemini with prompt

In [None]:
def ask_gemini(prompt: str) -> str:
    resp = client.models.generate_content(
        model=MODEL_NAME,
        contents=prompt
    )
    return resp.text or ""


## Save one markdown report per error case

In [None]:
from pathlib import Path

def write_markdown(case_name: str, run_info: dict, explanation: str) -> Path:
    out_path = OUTPUTS_DIR / f"{case_name}.md"

    tb = (run_info.get("stderr") or "").strip() or "(No stderr/traceback captured.)"
    out = (run_info.get("stdout") or "").strip() or "(No stdout captured.)"
    exit_code = run_info.get("exit_code")

    safe_expl = sanitize_llm_output(explanation)

    md = (
        f"# {case_name}\n\n"
        f"## Run info\n"
        f"- Exit code: {exit_code}\n\n"
        f"## Stdout\n"
        f"```\n{out}\n```\n\n"
        f"## Traceback\n"
        f"```\n{tb}\n```\n\n"
        f"## Explanation\n"
        f"{safe_expl.strip()}\n"
    )

    out_path.write_text(md, encoding="utf-8")
    return out_path


## Batch run all error scripts (generate 5 markdown files)

In [None]:
error_files = sorted(ERRORS_DIR.glob("*.py"))
assert error_files, "No error files found in ./errors"

generated = []

for path in error_files:
    case_name = path.stem
    code = read_code(path)
    run_info = run_python_file(path)

    # Gate: never call the LLM if we did not capture an actual error traceback
    if run_info["exit_code"] == 0 or not run_info["stderr"].strip():
        expl = (
            "## Error summary\n"
            "No traceback was captured, so I cannot identify the exact error.\n\n"
            "## What it means\n"
            "A traceback is required to know which exception occurred and where it happened.\n\n"
            "## Why it happened here\n"
            "Either the script did not raise an uncaught exception, or the failing line did not execute.\n\n"
            "## Conceptual fix\n"
            "1. Ensure the script actually triggers the intended failing line when run.\n"
            "2. Remove any try/except that catches the error and prevents a traceback.\n"
            "3. Re-run and confirm a non-zero exit code and non-empty stderr.\n\n"
            "## Debugging questions\n"
            "1. Does the line that should fail actually execute when the script runs?\n"
            "2. Is an exception being caught before it reaches the interpreter?\n"
        )
    else:
        prompt = build_prompt(
            code_snippet=code,
            traceback_text=run_info["stderr"],
            stdout_text=run_info["stdout"],
            exit_code=run_info["exit_code"]
        )
        raw_expl = ask_gemini(prompt)
        expl = sanitize_llm_output(raw_expl)

    out_path = write_markdown(case_name, run_info, expl)
    generated.append(out_path)

generated


## Quick compliance check over outputs

In [None]:
def explanation_section_has_code_fence(md_text: str) -> bool:
    if "## Explanation" not in md_text:
        return False
    expl = md_text.split("## Explanation", 1)[1]
    return "```" in expl

violations = []
for p in OUTPUTS_DIR.glob("*.md"):
    txt = p.read_text(encoding="utf-8", errors="replace")
    if explanation_section_has_code_fence(txt):
        violations.append(p.name)

violations



In [None]:
from pathlib import Path

p = Path("outputs/case_03_name_error.md")
print(p.read_text(encoding="utf-8", errors="replace"))
