In [None]:
# Import required libraries
import sys
import json
import asyncio
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path
from datetime import datetime
from typing import Dict, List, Tuple
from collections import Counter, defaultdict

# ML evaluation metrics
from sklearn.metrics import (
    confusion_matrix, 
    classification_report, 
    accuracy_score,
    precision_recall_fscore_support,
    roc_auc_score
)

# Add project root to path
project_root = Path.cwd().parent if Path.cwd().name == 'tests' else Path.cwd()
sys.path.insert(0, str(project_root))

print(f"📁 Project root: {project_root}")
print(f"🐍 Python version: {sys.version}")
print(f"📊 NumPy version: {np.__version__}")
print(f"🐼 Pandas version: {pd.__version__}")


In [None]:
# Import our agents and components
from app.modules.agents.core_agent import CoreAgent, AgentDecision
from app.modules.agents.info_advisor import InfoAdvisor
from app.modules.agents.scheduling_advisor import SchedulingAdvisor
from app.modules.agents.exit_advisor import ExitAdvisor
from app.modules.database.vector_store import VectorStore
from config.phase1_settings import get_settings

# Initialize settings
settings = get_settings()
print(f"🔧 Model: {settings.OPENAI_MODEL}")
print(f"🌡️ Temperature: {settings.OPENAI_TEMPERATURE}")


In [None]:
# Test dataset for Core Agent decision evaluation
core_agent_test_cases = [
    # CONTINUE decisions
    {"message": "Hi, I'm interested in learning more about this position", "expected": "CONTINUE"},
    {"message": "Tell me about your company culture", "expected": "CONTINUE"},
    {"message": "I have some questions about the role", "expected": "CONTINUE"},
    
    # INFO decisions
    {"message": "What programming languages are required for this position?", "expected": "INFO"},
    {"message": "What are the main responsibilities of this role?", "expected": "INFO"},
    {"message": "What experience level is needed?", "expected": "INFO"},
    {"message": "What technologies should I know?", "expected": "INFO"},
    
    # SCHEDULE decisions
    {"message": "I'd like to schedule an interview", "expected": "SCHEDULE"},
    {"message": "When can we meet for an interview?", "expected": "SCHEDULE"},
    {"message": "Let's set up a time to talk", "expected": "SCHEDULE"},
    
    # END decisions
    {"message": "I'm not interested in this position", "expected": "END"},
    {"message": "I found another job, thanks", "expected": "END"},
    {"message": "This role isn't a good fit for me", "expected": "END"},
]

print(f"📊 Core Agent test cases: {len(core_agent_test_cases)}")
decision_counts = Counter(case['expected'] for case in core_agent_test_cases)
for decision, count in decision_counts.items():
    print(f"  {decision}: {count} cases")
