In [None]:
# Setup and imports
import sys
sys.path.insert(0, '/workspaces/wiki3-kg-project')

import dspy
import json
from pathlib import Path

from ontological_engineer import (
    configure_lm,
    StatementExtractor,
    StatementQualityJudge,
)
from ontological_engineer.judges import statement_quality_metric

## 1. Configure Language Model

Connect to LM Studio running Qwen-30B (or your preferred model).

In [None]:
# Configure the LM (defaults to Qwen-30B via LM Studio)
lm = configure_lm(
    model="qwen/qwen3-coder-30b",
    api_base="http://host.docker.internal:1234/v1",
    temperature=0.7,
)

print(f"Configured LM: {lm}")

## 2. Test Statement Extraction

Try extracting statements from a sample Wikipedia chunk.

In [None]:
# Sample chunk from Albert Einstein article
sample_chunk = """
Albert Einstein was born in Ulm, in the Kingdom of WÃ¼rttemberg in the German Empire, 
on 14 March 1879. His parents, secular Ashkenazi Jews, were Hermann Einstein, 
a salesman and engineer, and Pauline Koch. In 1880, the family moved to Munich's 
borough of Ludwigsvorstadt-Isarvorstadt, where Einstein's father and his uncle Jakob 
founded Elektrotechnische Fabrik J. Einstein & Cie, a company that manufactured 
electrical equipment based on direct current.
""".strip()

sample_context = "Albert Einstein > Life and career > Childhood, youth and education"

print("Chunk text:")
print(sample_chunk)
print(f"\nContext: {sample_context}")

In [None]:
# Initialize extractor and run extraction
extractor = StatementExtractor()

result = extractor(
    chunk_text=sample_chunk,
    section_context=sample_context,
)

print("Extracted statements:")
for i, stmt in enumerate(result.statements, 1):
    print(f"  {i}. {stmt}")

if result.reasoning:
    print(f"\nReasoning: {result.reasoning}")

## 3. Evaluate Extraction Quality

Use the `StatementQualityJudge` to score the extracted statements.

In [None]:
# Initialize judge
judge = StatementQualityJudge()

# Evaluate the extraction
evaluation = judge(
    chunk_text=sample_chunk,
    section_context=sample_context,
    statements=result.statements,
)

print("Quality scores:")
print(f"  Completeness:      {evaluation.completeness:.2f}")
print(f"  Atomicity:         {evaluation.atomicity:.2f}")
print(f"  Accuracy:          {evaluation.accuracy:.2f}")
print(f"  Link preservation: {evaluation.link_preservation:.2f}")
print(f"  ---")
print(f"  Weighted score:    {evaluation.weighted_score:.2f}")
print(f"\nReasoning: {evaluation.reasoning}")

## 4. Load Existing Data for Bootstrapping

Load chunks and facts from previous pipeline runs to create training examples.

In [None]:
from ontological_engineer.training.bootstrap import (
    load_chunks_from_notebook,
    load_facts_from_notebook,
    create_training_examples,
)

# Path to existing data
data_dir = Path("/workspaces/wiki3-kg-project/data/albert_einstein/20251218_231446")

# Load chunks
chunks = load_chunks_from_notebook(data_dir / "chunks.ipynb")
print(f"Loaded {len(chunks)} chunks")

# Load facts
facts = load_facts_from_notebook(data_dir / "facts.ipynb")
print(f"Loaded {len(facts)} fact sets")

In [None]:
# Show a sample chunk and its extracted facts
if chunks and facts:
    idx = 0  # Change to explore different chunks
    chunk = chunks[idx]
    fact_set = facts[idx] if idx < len(facts) else None
    
    print(f"Chunk {idx + 1}:")
    print(f"  Context: {chunk.get('section_context', 'N/A')}")
    print(f"  Text: {chunk['text'][:200]}...")
    
    if fact_set:
        print(f"\nFacts ({len(fact_set['statements'])} statements):")
        for stmt in fact_set['statements'][:5]:
            print(f"  - {stmt}")
        if len(fact_set['statements']) > 5:
            print(f"  ... and {len(fact_set['statements']) - 5} more")

In [None]:
# Create training examples from existing data
examples = create_training_examples(chunks, facts)
print(f"Created {len(examples)} training examples")

# Show a sample example
if examples:
    ex = examples[0]
    print(f"\nSample example:")
    print(f"  Inputs: chunk_text, section_context")
    print(f"  Outputs: statements ({len(ex.statements)} items)")

## 5. Run DSPy Evaluation

Evaluate the extractor on the bootstrapped dataset.

In [None]:
# Split into train/dev sets
from random import shuffle

shuffle(examples)
split_idx = int(len(examples) * 0.8)
trainset = examples[:split_idx]
devset = examples[split_idx:]

print(f"Train set: {len(trainset)} examples")
print(f"Dev set: {len(devset)} examples")

In [None]:
# Evaluate on dev set
evaluator = dspy.Evaluate(
    devset=devset[:5],  # Start with small subset
    metric=statement_quality_metric,
    num_threads=1,
    display_progress=True,
)

extractor = StatementExtractor()
score = evaluator(extractor)

print(f"\nAverage quality score: {score:.2f}")

## 6. MIPROv2 Prompt Optimization (Optional)

Run DSPy's prompt optimizer to improve instructions without model training.

In [None]:
# Skip this cell if you want to go straight to GRPO training
from dspy.teleprompt import MIPROv2

optimizer = MIPROv2(
    metric=statement_quality_metric,
    num_candidates=3,
    init_temperature=0.7,
)

# This may take a while
optimized_extractor = optimizer.compile(
    StatementExtractor(),
    trainset=trainset[:10],
    num_batches=2,
    max_bootstrapped_demos=2,
)

print("Optimization complete!")

In [None]:
# Evaluate optimized extractor
if 'optimized_extractor' in dir():
    optimized_score = evaluator(optimized_extractor)
    print(f"Original score: {score:.2f}")
    print(f"Optimized score: {optimized_score:.2f}")

## 7. Save Training Data

Save curated examples for later GRPO training.

In [None]:
# Convert examples to JSON-serializable format
def example_to_dict(ex):
    return {
        "chunk_text": ex.chunk_text,
        "section_context": ex.section_context,
        "statements": ex.statements,
    }

# Save datasets
output_dir = Path("/workspaces/wiki3-kg-project/data/training")
output_dir.mkdir(exist_ok=True)

with open(output_dir / "statement_trainset.json", "w") as f:
    json.dump([example_to_dict(ex) for ex in trainset], f, indent=2)

with open(output_dir / "statement_devset.json", "w") as f:
    json.dump([example_to_dict(ex) for ex in devset], f, indent=2)

print(f"Saved {len(trainset)} training examples")
print(f"Saved {len(devset)} dev examples")