In [None]:
import sys
sys.path.insert(0, '/workspaces/wiki3-kg-project')

import dspy
import json
from pathlib import Path

from ontological_engineer import (
    configure_lm,
    StatementExtractor,
    StatementQualityJudge,
)
from ontological_engineer.judges import statement_quality_metric
from ontological_engineer.training import (
    load_stage1_config,
    load_trainset,
    load_devset,
    load_fewshot_examples,
    save_optimized_extractor,
)

## 1. Load Configuration and Artifacts

Load the config and datasets saved by `stage1_statements.ipynb`.

In [None]:
# Load stage1 config (saved with CID provenance)
training_dir = Path("/workspaces/wiki3-kg-project/data/training")

config = load_stage1_config(training_dir)
print(f"Loaded config:")
print(f"  Model: {config['model']}")
print(f"  API base: {config['api_base']}")
print(f"  Temperature: {config['temperature']}")
print(f"  Num fewshot: {config['num_fewshot']}")
print(f"  Config CID: {config.get('cid', 'N/A')}")

In [None]:
# Configure LM from saved config
lm = configure_lm(
    model=config['model'],
    api_base=config['api_base'],
    temperature=config['temperature'],
)
print(f"Configured LM: {lm}")

In [None]:
# Load training and dev sets (with provenance)
trainset = load_trainset(training_dir)
devset = load_devset(training_dir)

print(f"Loaded trainset: {len(trainset)} examples")
print(f"Loaded devset: {len(devset)} examples")

In [None]:
# Load few-shot examples
fewshot_examples = load_fewshot_examples(training_dir)
print(f"Loaded {len(fewshot_examples)} few-shot examples")

In [None]:
# Load baseline results for comparison
baseline_path = training_dir / "baseline_results.json"
if baseline_path.exists():
    with open(baseline_path) as f:
        baseline_results = json.load(f)
    baseline_score = baseline_results['score']
    print(f"Baseline score: {baseline_score:.2f}")
    print(f"Baseline CID: {baseline_results.get('cid', 'N/A')}")
else:
    print("‚ö†Ô∏è No baseline results found. Run stage1_statements.ipynb first.")
    baseline_score = None

## 2. MLflow Setup

Configure MLflow for optimization tracking.

In [None]:
import mlflow

MLFLOW_TRACKING_URI = "http://127.0.0.1:5000"

try:
    mlflow.set_tracking_uri(MLFLOW_TRACKING_URI)
    mlflow.set_experiment("wiki3-kg-stage1-optimization")
    
    # Enable autologging with full optimizer tracking
    # Reference: https://dspy.ai/tutorials/optimizer_tracking/
    mlflow.dspy.autolog(
        log_compiles=True,
        log_evals=True,
        log_traces_from_compile=True
    )
    
    print(f"‚úÖ MLflow configured")
    print(f"   Tracking URI: {MLFLOW_TRACKING_URI}")
    print(f"   Experiment: wiki3-kg-stage1-optimization")
    MLFLOW_ENABLED = True
except Exception as e:
    print(f"‚ö†Ô∏è MLflow not available: {e}")
    MLFLOW_ENABLED = False

## 3. MIPROv2 Optimization

Run prompt optimization using MIPROv2.

In [None]:
from dspy.teleprompt import MIPROv2

# Configure optimizer
optimizer = MIPROv2(
    metric=statement_quality_metric,
    auto="light",  # "light", "medium", or "heavy"
)

NUM_FEWSHOT = config['num_fewshot']

print(f"Optimizing with {len(trainset)} training examples...")
print(f"Using {NUM_FEWSHOT} few-shot demos for bootstrapping...")
print(f"MIPROv2 mode: auto='light'")

In [None]:
# Run optimization
optimized_extractor = optimizer.compile(
    StatementExtractor(),
    trainset=trainset,
    max_bootstrapped_demos=NUM_FEWSHOT,
)

print("\n‚úÖ Optimization complete!")
if MLFLOW_ENABLED:
    print(f"üìä View traces in MLflow UI: {MLFLOW_TRACKING_URI}")

## 4. Evaluate Optimized Extractor

In [None]:
# Evaluate on dev set
EVAL_SIZE = min(10, len(devset))

evaluator = dspy.Evaluate(
    devset=devset[:EVAL_SIZE],
    metric=statement_quality_metric,
    num_threads=1,
    display_progress=True,
)

optimized_result = evaluator(optimized_extractor)
optimized_score = optimized_result.score if hasattr(optimized_result, 'score') else float(optimized_result)

print(f"\nOptimized score: {optimized_score:.2f}")
if baseline_score is not None:
    improvement = optimized_score - baseline_score
    print(f"Baseline score:  {baseline_score:.2f}")
    print(f"Improvement:     {improvement:+.2f}")

## 5. Inspect Optimized Prompts

In [None]:
print("Optimized extractor configuration:")
print("=" * 60)

if hasattr(optimized_extractor, 'demos'):
    print(f"\nDemonstrations: {len(optimized_extractor.demos)}")
    for i, demo in enumerate(optimized_extractor.demos[:2], 1):
        print(f"  Demo {i}: {demo.section_context[:50]}...")

if hasattr(optimized_extractor, 'signature'):
    print(f"\nSignature: {optimized_extractor.signature}")

## 6. Save Optimized Model

Save with CID provenance for reproducibility.

In [None]:
# Save optimized extractor with provenance
output_path = save_optimized_extractor(
    extractor=optimized_extractor,
    output_dir=training_dir,
    config=config,
    baseline_score=baseline_score,
    optimized_score=optimized_score,
)

print(f"‚úÖ Saved optimized extractor to: {output_path}")

In [None]:
# Save optimization results summary
results = {
    "baseline_score": baseline_score,
    "optimized_score": optimized_score,
    "improvement": optimized_score - baseline_score if baseline_score else None,
    "train_size": len(trainset),
    "eval_size": EVAL_SIZE,
    "num_fewshot": NUM_FEWSHOT,
    "optimizer": "MIPROv2",
    "auto_mode": "light",
    "config_cid": config.get('cid'),
}

results_path = training_dir / "optimization_results.json"
with open(results_path, "w") as f:
    json.dump(results, f, indent=2)

print(f"Saved optimization results to: {results_path}")
print(json.dumps(results, indent=2))

## Summary

This notebook:
1. Loaded config and datasets from `stage1_statements.ipynb`
2. Ran MIPROv2 prompt optimization
3. Evaluated the optimized extractor
4. Saved the optimized model with provenance

**Next steps**:
- Review optimization traces in MLflow UI
- Run Stage 2: Schema matching
- Run Stage 3: RDF generation