# Batch 2: Agentic Text2Cypher Inference (Day 2/5)

**Configurations**: Zero-Shot_Only-Paths, Few-Shot_Full

**Model**: Qwen3-32B via Groq API

**Questions**: 52 Ã— 2 = 104 inferences

---

## Prerequisites
- Batch 1 completed (Zero-Shot_Full, Zero-Shot_Nodes+Paths)
- Rate limit reset (new day)

In [None]:
# Setup
import sys
import os
from pathlib import Path

project_root = Path.cwd().parent
os.chdir(project_root)

if str(project_root) not in sys.path:
    sys.path.insert(0, str(project_root))

print(f"Project root: {project_root}")

In [None]:
# Imports
import json
import pandas as pd
from datetime import datetime
from tqdm.notebook import tqdm

from config.settings import Settings
from config.llm_config import LLMConfig
from data.ground_truth_loader import GroundTruthLoader
from prompts.prompt_manager import PromptManager, PromptType, SchemaFormat
from experiment.batch_processor import BatchProcessor

In [None]:
# Initialize
settings = Settings()
llm_config = LLMConfig()
llm_config.validate()

print(f"Model: {llm_config.model}")
print(f"Max Iterations: {settings.max_iterations}")

In [None]:
# Load ground truth
loader = GroundTruthLoader()
items = loader.load()
print(f"Loaded {len(items)} questions")

## Batch 2 Configurations

| # | Prompt Type | Schema Format | Config Name |
|---|-------------|---------------|-------------|
| 3 | Zero-Shot | Only-Paths | Zero-Shot_Only-Paths |
| 4 | Few-Shot | Full Schema | Few-Shot_Full |

In [None]:
# Define Batch 2 configurations
BATCH_2_CONFIGS = [
    {"prompt": PromptType.ZERO_SHOT, "schema": SchemaFormat.ONLY_PATHS},
    {"prompt": PromptType.FEW_SHOT, "schema": SchemaFormat.FULL_SCHEMA},
]

print(f"Batch 2: {len(BATCH_2_CONFIGS)} configurations")
print(f"Total inferences: {len(BATCH_2_CONFIGS) * len(items)}")

In [None]:
# Setup results directory
results_dir = project_root / "results_v2"
batch_results_dir = results_dir / "batch_2"
batch_results_dir.mkdir(parents=True, exist_ok=True)

print(f"Results will be saved to: {batch_results_dir}")

In [None]:
# Initialize processor
processor = BatchProcessor(
    settings=settings,
    llm_config=llm_config,
    checkpoint_dir=str(batch_results_dir / "checkpoints")
)

prompt_manager = PromptManager()

## Run Batch 2 Experiments

In [None]:
# Run experiments
batch_2_results = {}

for i, config in enumerate(BATCH_2_CONFIGS):
    config_name = prompt_manager.get_configuration_name(config["prompt"], config["schema"])
    print(f"\n{'='*60}")
    print(f"Configuration {i+1}/{len(BATCH_2_CONFIGS)}: {config_name}")
    print(f"{'='*60}")
    
    config_dir = batch_results_dir / config_name
    config_dir.mkdir(parents=True, exist_ok=True)
    
    try:
        results = processor.process_batch(
            items=items,
            prompt_type=config["prompt"],
            schema_format=config["schema"],
            batch_id=f"batch2_{config_name}",
            resume=True
        )
        
        batch_2_results[config_name] = results
        
        # Save results
        results_data = [state.to_dict() for state in results]
        with open(config_dir / "agentic_results.json", "w") as f:
            json.dump(results_data, f, indent=2, default=str)
        
        df = pd.DataFrame([{
            "question_id": s.question_id,
            "question": s.question,
            "ground_truth": s.ground_truth_query,
            "final_query": s.final_query,
            "success": s.success,
            "pass_at_1": s.first_attempt_success,
            "total_iterations": s.total_iterations,
            "kg_valid": s.kg_valid,
        } for s in results])
        df.to_csv(config_dir / "agentic_results.csv", index=False)
        
        # Stats
        success_rate = sum(1 for s in results if s.success) / len(results) * 100
        pass_at_1 = sum(1 for s in results if s.first_attempt_success) / len(results) * 100
        avg_iterations = sum(s.total_iterations for s in results) / len(results)
        
        print(f"\n{config_name} Results:")
        print(f"  Success Rate: {success_rate:.1f}%")
        print(f"  Pass@1 Rate: {pass_at_1:.1f}%")
        print(f"  Avg Iterations: {avg_iterations:.2f}")
        
    except Exception as e:
        print(f"Error processing {config_name}: {e}")
        import traceback
        traceback.print_exc()

## Batch 2 Summary

In [None]:
# Generate summary
if batch_2_results:
    summary_rows = []
    
    for config_name, results in batch_2_results.items():
        success_count = sum(1 for s in results if s.success)
        pass_at_1_count = sum(1 for s in results if s.first_attempt_success)
        kg_valid_count = sum(1 for s in results if s.kg_valid)
        
        summary_rows.append({
            "Configuration": config_name,
            "Total": len(results),
            "Success": success_count,
            "Success Rate (%)": round(success_count / len(results) * 100, 2),
            "Pass@1": pass_at_1_count,
            "Pass@1 Rate (%)": round(pass_at_1_count / len(results) * 100, 2),
            "KG Valid": kg_valid_count,
            "KG Valid Rate (%)": round(kg_valid_count / len(results) * 100, 2),
            "Avg Iterations": round(sum(s.total_iterations for s in results) / len(results), 2),
        })
    
    df_summary = pd.DataFrame(summary_rows)
    display(df_summary)
    
    df_summary.to_csv(batch_results_dir / "batch_2_summary.csv", index=False)
    print(f"\nSummary saved to: {batch_results_dir / 'batch_2_summary.csv'}")

In [None]:
# Save metadata
batch_metadata = {
    "batch_number": 2,
    "date": datetime.now().isoformat(),
    "configurations": [prompt_manager.get_configuration_name(c["prompt"], c["schema"]) for c in BATCH_2_CONFIGS],
    "total_questions": len(items),
    "total_inferences": len(BATCH_2_CONFIGS) * len(items),
    "llm_model": llm_config.model,
    "max_iterations": settings.max_iterations,
}

with open(batch_results_dir / "batch_2_metadata.json", "w") as f:
    json.dump(batch_metadata, f, indent=2)

print("Batch 2 completed!")
print("Continue with batch-3_01_agentic_inference.ipynb tomorrow.")

In [None]:
processor.close()