# Batch 5: Agentic Text2Cypher Inference (Day 5/5) - FINAL

**Configurations**: CoT_Only-Paths

**Model**: Qwen3-32B via Groq API

**Questions**: 52 Ã— 1 = 52 inferences

---

## Prerequisites
- Batch 1, 2, 3 & 4 completed
- Rate limit reset (new day)

## Note
This is the final batch. After completion, run the consolidation cells to merge all results.

In [None]:
# Setup
import sys
import os
from pathlib import Path

project_root = Path.cwd().parent
os.chdir(project_root)

if str(project_root) not in sys.path:
    sys.path.insert(0, str(project_root))

print(f"Project root: {project_root}")

In [None]:
# Imports
import json
import pandas as pd
from datetime import datetime
from tqdm.notebook import tqdm

from config.settings import Settings
from config.llm_config import LLMConfig
from data.ground_truth_loader import GroundTruthLoader
from prompts.prompt_manager import PromptManager, PromptType, SchemaFormat
from experiment.batch_processor import BatchProcessor

In [None]:
# Initialize
settings = Settings()
llm_config = LLMConfig()
llm_config.validate()

print(f"Model: {llm_config.model}")
print(f"Max Iterations: {settings.max_iterations}")

In [None]:
# Load ground truth
loader = GroundTruthLoader()
items = loader.load()
print(f"Loaded {len(items)} questions")

## Batch 5 Configuration (Final)

| # | Prompt Type | Schema Format | Config Name |
|---|-------------|---------------|-------------|
| 9 | CoT | Only-Paths | CoT_Only-Paths |

In [None]:
# Define Batch 5 configuration
BATCH_5_CONFIGS = [
    {"prompt": PromptType.CHAIN_OF_THOUGHT, "schema": SchemaFormat.ONLY_PATHS},
]

print(f"Batch 5 (Final): {len(BATCH_5_CONFIGS)} configuration")
print(f"Total inferences: {len(BATCH_5_CONFIGS) * len(items)}")

In [None]:
# Setup results directory
results_dir = project_root / "results_v2"
batch_results_dir = results_dir / "batch_5"
batch_results_dir.mkdir(parents=True, exist_ok=True)

print(f"Results will be saved to: {batch_results_dir}")

In [None]:
# Initialize processor
processor = BatchProcessor(
    settings=settings,
    llm_config=llm_config,
    checkpoint_dir=str(batch_results_dir / "checkpoints")
)

prompt_manager = PromptManager()

## Run Batch 5 Experiment

In [None]:
# Run experiment
batch_5_results = {}

for i, config in enumerate(BATCH_5_CONFIGS):
    config_name = prompt_manager.get_configuration_name(config["prompt"], config["schema"])
    print(f"\n{'='*60}")
    print(f"Configuration: {config_name} (FINAL)")
    print(f"{'='*60}")
    
    config_dir = batch_results_dir / config_name
    config_dir.mkdir(parents=True, exist_ok=True)
    
    try:
        results = processor.process_batch(
            items=items,
            prompt_type=config["prompt"],
            schema_format=config["schema"],
            batch_id=f"batch5_{config_name}",
            resume=True
        )
        
        batch_5_results[config_name] = results
        
        # Save results
        results_data = [state.to_dict() for state in results]
        with open(config_dir / "agentic_results.json", "w") as f:
            json.dump(results_data, f, indent=2, default=str)
        
        df = pd.DataFrame([{
            "question_id": s.question_id,
            "question": s.question,
            "ground_truth": s.ground_truth_query,
            "final_query": s.final_query,
            "success": s.success,
            "pass_at_1": s.first_attempt_success,
            "total_iterations": s.total_iterations,
            "kg_valid": s.kg_valid,
        } for s in results])
        df.to_csv(config_dir / "agentic_results.csv", index=False)
        
        # Stats
        success_rate = sum(1 for s in results if s.success) / len(results) * 100
        pass_at_1 = sum(1 for s in results if s.first_attempt_success) / len(results) * 100
        avg_iterations = sum(s.total_iterations for s in results) / len(results)
        
        print(f"\n{config_name} Results:")
        print(f"  Success Rate: {success_rate:.1f}%")
        print(f"  Pass@1 Rate: {pass_at_1:.1f}%")
        print(f"  Avg Iterations: {avg_iterations:.2f}")
        
    except Exception as e:
        print(f"Error processing {config_name}: {e}")
        import traceback
        traceback.print_exc()

## Batch 5 Summary

In [None]:
# Generate summary
if batch_5_results:
    summary_rows = []
    
    for config_name, results in batch_5_results.items():
        success_count = sum(1 for s in results if s.success)
        pass_at_1_count = sum(1 for s in results if s.first_attempt_success)
        kg_valid_count = sum(1 for s in results if s.kg_valid)
        
        summary_rows.append({
            "Configuration": config_name,
            "Total": len(results),
            "Success": success_count,
            "Success Rate (%)": round(success_count / len(results) * 100, 2),
            "Pass@1": pass_at_1_count,
            "Pass@1 Rate (%)": round(pass_at_1_count / len(results) * 100, 2),
            "KG Valid": kg_valid_count,
            "KG Valid Rate (%)": round(kg_valid_count / len(results) * 100, 2),
            "Avg Iterations": round(sum(s.total_iterations for s in results) / len(results), 2),
        })
    
    df_summary = pd.DataFrame(summary_rows)
    display(df_summary)
    
    df_summary.to_csv(batch_results_dir / "batch_5_summary.csv", index=False)
    print(f"\nSummary saved to: {batch_results_dir / 'batch_5_summary.csv'}")

In [None]:
# Save metadata
batch_metadata = {
    "batch_number": 5,
    "date": datetime.now().isoformat(),
    "configurations": [prompt_manager.get_configuration_name(c["prompt"], c["schema"]) for c in BATCH_5_CONFIGS],
    "total_questions": len(items),
    "total_inferences": len(BATCH_5_CONFIGS) * len(items),
    "llm_model": llm_config.model,
    "max_iterations": settings.max_iterations,
}

with open(batch_results_dir / "batch_5_metadata.json", "w") as f:
    json.dump(batch_metadata, f, indent=2)

print("Batch 5 completed!")

In [None]:
processor.close()

---

# Consolidate All Results

After all 5 batches are completed, run the cells below to merge results.

In [None]:
# Consolidate all batch results
print("="*60)
print("CONSOLIDATING ALL BATCH RESULTS")
print("="*60)

results_dir = project_root / "results_v2"
all_summaries = []
all_results = {}

for batch_num in range(1, 6):
    batch_dir = results_dir / f"batch_{batch_num}"
    summary_path = batch_dir / f"batch_{batch_num}_summary.csv"
    
    if summary_path.exists():
        df = pd.read_csv(summary_path)
        df['Batch'] = batch_num
        all_summaries.append(df)
        print(f"Loaded batch {batch_num}: {len(df)} configurations")
        
        # Load detailed results
        for config_name in df['Configuration'].values:
            config_dir = batch_dir / config_name
            csv_path = config_dir / "agentic_results.csv"
            if csv_path.exists():
                all_results[config_name] = pd.read_csv(csv_path)
    else:
        print(f"Batch {batch_num} not found (run batch-{batch_num}_01 notebook first)")

if all_summaries:
    df_all_summary = pd.concat(all_summaries, ignore_index=True)
    print(f"\nTotal configurations: {len(df_all_summary)}")

In [None]:
# Display consolidated summary
if 'df_all_summary' in dir() and len(df_all_summary) > 0:
    print("\n" + "="*60)
    print("CONSOLIDATED RESULTS - ALL 9 CONFIGURATIONS")
    print("="*60)
    
    display(df_all_summary[[
        'Configuration', 'Success Rate (%)', 'Pass@1 Rate (%)', 
        'KG Valid Rate (%)', 'Avg Iterations'
    ]].sort_values('Pass@1 Rate (%)', ascending=False))
    
    # Save consolidated summary
    df_all_summary.to_csv(results_dir / "consolidated_summary.csv", index=False)
    print(f"\nConsolidated summary saved to: {results_dir / 'consolidated_summary.csv'}")

In [None]:
# Create experiment summary JSON
if 'df_all_summary' in dir() and len(df_all_summary) == 9:
    experiment_summary = {
        "timestamp": datetime.now().isoformat(),
        "llm_provider": "groq",
        "llm_model": llm_config.model,
        "total_questions": len(items),
        "total_configurations": 9,
        "total_inferences": 9 * len(items),
        "max_iterations": settings.max_iterations,
        "configurations": {}
    }
    
    for _, row in df_all_summary.iterrows():
        config_name = row['Configuration']
        experiment_summary["configurations"][config_name] = {
            "success_rate": row['Success Rate (%)'],
            "pass_at_1_rate": row['Pass@1 Rate (%)'],
            "kg_valid_rate": row['KG Valid Rate (%)'],
            "avg_iterations": row['Avg Iterations'],
        }
    
    with open(results_dir / "experiment_summary.json", "w") as f:
        json.dump(experiment_summary, f, indent=2)
    
    print(f"Experiment summary saved to: {results_dir / 'experiment_summary.json'}")
    print("\nAll experiments completed! Proceed to 02_evaluation_metrics.ipynb")