# CADEvolve-style Generator Bootstrapping (Seminar)

Pipeline:
1) Write a **new client part family** spec (name + abstract + detailed constraints).
2) Ask **gpt-5-mini** to pick **5 similar** part families from `code_db.json`.
3) Feed the **5 generators** as few-shot context and synthesize a **new generator**.
4) Run it, visualize, validate, iterate.

## 0) Setup assumptions
- `code_db.json` exists (list or `{ "parts": [...] }`)
- Each record has `name` and `generator_code` (or similar).
- Python packages: `openai`, `numpy`, optional `cadquery`, optional `trimesh`.

In [None]:
import os, json, textwrap
from pathlib import Path
from typing import List, Tuple
import numpy as np
import trimesh

CODE_DB_PATH = Path("code_db.json")  

## 1) Load `code_db.json`

In [None]:
code_db = json.loads(CODE_DB_PATH.read_text(encoding="utf-8"))

parts = code_db

def get_part_name(p: dict) -> str:
    for k in ["name", "part_name", "title", "id"]:
        if k in p and isinstance(p[k], str) and p[k].strip():
            return p[k].strip()
    raise KeyError("No name field found.")

part_names = [get_part_name(p) for p in parts]
print("Loaded parts:", len(parts))
print("Example names:", part_names[:10])

## 2) Write your new client family spec

In [None]:
NEW_PART_NAME = "YOUR_PART_FAMILY_NAME"

NEW_PART_ABSTRACT = (
    "2â€“4 lines: what is the family, what is invariant, what varies."
)

NEW_PART_DETAILED = "\n".join([
    "Detailed constraints (bullets encouraged):",
    "- Context",
    "- Narrow requirements / invariants",
    "- What to reconstruct from STL",
    "- Generator constraints: parameter ranges, allowed CAD ops",
    "- Validation criteria",
])

assert NEW_PART_NAME != "YOUR_PART_FAMILY_NAME"
print("OK:", NEW_PART_NAME)

## 3) Configure OpenAI client (set OPENAI_API_KEY)

In [None]:
# pip install openai
from openai import OpenAI

assert os.getenv("OPENAI_API_KEY"), "Set OPENAI_API_KEY env var"
client = OpenAI()

## 4) Similarity search over part names (pick 5)

In [None]:
def llm_pick_similar_parts(
    new_name: str,
    abstract: str,
    detailed: str,
    candidate_names: List[str],
    k: int = 5,
) -> List[str]:
    name_blob = "\n".join(f"- {n}" for n in candidate_names)

    instructions = (
        f"Pick exactly {k} MOST SIMILAR part families from the candidate list.\n"
        f'Output MUST be valid JSON: {{"similar": ["name1", ...]}} with exactly {k} items.\n'
        "- Each name must be copied EXACTLY from the candidate list.\n"
        "- No extra keys, no commentary."
    )

    user_input = "\n".join([
        "NEW PART FAMILY",
        f"Name: {new_name}",
        "",
        "Abstract:",
        abstract,
        "",
        "Detailed description:",
        detailed,
        "",
        "CANDIDATE PART FAMILIES (names only):",
        name_blob,
    ])

    resp = client.responses.create(
        model="gpt-5-mini",
        input=[
            {"role": "system", "content": instructions},
            {"role": "user", "content": user_input},
        ],
    )
    data = json.loads(resp.output_text)
    sims = data["similar"]
    if not (isinstance(sims, list) and len(sims) == k and all(isinstance(x, str) for x in sims)):
        raise ValueError(f"Bad format: {data}")
    missing = [x for x in sims if x not in candidate_names]
    if missing:
        raise ValueError(f"Names not in DB: {missing}")
    return sims

similar_names = llm_pick_similar_parts(
    NEW_PART_NAME, NEW_PART_ABSTRACT, NEW_PART_DETAILED, part_names, k=5
)
similar_names

## 5) Pull the 5 generators from DB

In [None]:
def find_part_record_by_name(parts: List[dict], name: str) -> dict:
    for p in parts:
        if get_part_name(p) == name:
            return p
    raise KeyError(name)

def get_generator_code(p: dict) -> str:
    for k in ["generator_code", "code", "py", "source"]:
        if k in p and isinstance(p[k], str) and p[k].strip():
            return p[k]
    raise KeyError(f"No generator code in record: {get_part_name(p)}")

similar_records = [find_part_record_by_name(parts, n) for n in similar_names]
similar_generators: List[Tuple[str, str]] = [(get_part_name(r), get_generator_code(r)) for r in similar_records]

print("Pulled:", [n for n,_ in similar_generators])

## 6) Synthesize a new generator (few-shot)

In [None]:
GENERATOR_OUTPUT_PATH = Path("student_generator.py")

def llm_synthesize_generator(
    new_name: str,
    abstract: str,
    detailed: str,
    fewshot_generators: List[Tuple[str, str]],
) -> str:
    fewshot_blob = "\n\n".join(
        f"### Example: {name}\n<CODE>\n{code}\n</CODE>"
        for name, code in fewshot_generators
    )

    instructions = "\n".join([
        "Output ONLY python code (no markdown).",
        "Implement a robust parametric CAD generator for the NEW part family.",
        "",
        "Hard requirements:",
        "- Define DEFAULT_PARAMS (dict) and make_part(params=None).",
        "- Default params must produce a valid, non-empty part.",
        "- Prefer CadQuery if available; optionally provide trimesh fallback.",
        "- Keep it simple and robust (avoid fragile ops).",
    ])

    user_input = "\n".join([
        "NEW PART FAMILY",
        f"Name: {new_name}",
        "",
        "Abstract:",
        abstract,
        "",
        "Detailed description:",
        detailed,
        "",
        "FEW-SHOT EXAMPLES:",
        fewshot_blob,
        "",
        "Now write the generator code.",
    ])

    resp = client.responses.create(
        model="gpt-5-mini",
        input=[
            {"role": "system", "content": instructions},
            {"role": "user", "content": user_input},
        ],
    )
    return resp.output_text

new_code = llm_synthesize_generator(
    NEW_PART_NAME, NEW_PART_ABSTRACT, NEW_PART_DETAILED, similar_generators
)

GENERATOR_OUTPUT_PATH.write_text(new_code, encoding="utf-8")
print("Wrote:", GENERATOR_OUTPUT_PATH.resolve())

## 7) Run generator (defaults) + export STL + preview

In [None]:
import importlib.util

def import_module_from_path(path: Path, module_name: str = "student_generator"):
    spec = importlib.util.spec_from_file_location(module_name, str(path))
    mod = importlib.util.module_from_spec(spec)
    assert spec and spec.loader
    spec.loader.exec_module(mod)
    return mod

mod = import_module_from_path(GENERATOR_OUTPUT_PATH)

DEFAULT_PARAMS = getattr(mod, "DEFAULT_PARAMS", None)
make_part = getattr(mod, "make_part", None)
assert isinstance(DEFAULT_PARAMS, dict), "DEFAULT_PARAMS missing"
assert callable(make_part), "make_part missing"

part = make_part(None)

stl_path = Path("preview.stl")

def export_cq_to_stl(obj, out_path: Path):
    from cadquery import exporters
    shape = obj.val() if hasattr(obj, "val") else obj
    exporters.export(shape, str(out_path))

if cq is not None and hasattr(part, "val"):
    export_cq_to_stl(part, stl_path)
    print("Exported:", stl_path.resolve())
elif trimesh is not None and hasattr(part, "export"):
    part.export(stl_path)
    print("Exported:", stl_path.resolve())
else:
    print("Unknown part type:", type(part))

if trimesh is not None and stl_path.exists():
    m = trimesh.load(stl_path, force="mesh")
    print("Mesh:", m.vertices.shape, m.faces.shape, "watertight:", m.is_watertight)
    m.show()

## 8) Basic validation (optional)

In [None]:
def basic_mesh_checks(m) -> List[str]:
    problems = []
    if m is None:
        return ["mesh is None"]
    if getattr(m, "faces", None) is None or len(m.faces) == 0:
        problems.append("no faces")
    if getattr(m, "vertices", None) is None or len(m.vertices) == 0:
        problems.append("no vertices")
    if hasattr(m, "bounds"):
        b = np.array(m.bounds)
        if not np.isfinite(b).all():
            problems.append("non-finite bounds")
        span = b[1] - b[0]
        if (span <= 1e-6).any():
            problems.append(f"degenerate span: {span}")
    if hasattr(m, "is_watertight") and not m.is_watertight:
        problems.append("not watertight (may be ok)")
    return problems

assert trimesh is not None, "Install trimesh for validation"
m = trimesh.load("preview.stl", force="mesh")
basic_mesh_checks(m)

## 9) Repair loop (iterate)

In [None]:
REPAIR_NOTES = "\n".join([
    "Observed issues:",
    "- ...",
    "Spec mismatches:",
    "- ...",
    "Requested changes:",
    "- ...",
])

def llm_repair_generator(old_code: str, repair_notes: str) -> str:
    instructions = "\n".join([
        "Fix the CAD generator. Output ONLY python code (no markdown).",
        "You may rewrite the whole file.",
        "Preserve the API: DEFAULT_PARAMS and make_part(params=None).",
        "Keep it robust and simple.",
    ])

    user_input = "\n".join([
        "SPEC",
        f"Name: {NEW_PART_NAME}",
        "",
        "Abstract:",
        NEW_PART_ABSTRACT,
        "",
        "Detailed description:",
        NEW_PART_DETAILED,
        "",
        "REPAIR NOTES:",
        repair_notes,
        "",
        "CURRENT GENERATOR:",
        "<CODE>",
        old_code,
        "</CODE>",
    ])

    resp = client.responses.create(
        model="gpt-5-mini",
        input=[
            {"role": "system", "content": instructions},
            {"role": "user", "content": user_input},
        ],
    )
    return resp.output_text

old = GENERATOR_OUTPUT_PATH.read_text(encoding="utf-8")
fixed = llm_repair_generator(old, REPAIR_NOTES)
GENERATOR_OUTPUT_PATH.write_text(fixed, encoding="utf-8")
print("Patched:", GENERATOR_OUTPUT_PATH.resolve())