# DeMo Scenario Classifier

This notebook allows you to visualize DeMo preprocessed data scenarios and manually classify them into different driving behaviors. Classifications are saved to a CSV file without moving the original files.

## Driving Behavior Categories:
- **Expert 1**: Lane Keeping
- **Expert 2**: Lane Change Left  
- **Expert 3**: Lane Change Right
- **Expert 4**: Turn Left
- **Expert 5**: Turn Right
- **Expert 6**: Acceleration
- **Expert 7**: Deceleration
- **Expert 8**: Yield / Stop
- **Expert 9**: Complex (difficult to classify scenarios)

## Classification Storage:
- All classifications are saved in **CSV format** (`classifications.csv`)
- Format: `[file_index,filename, expert_id]`
- Original `.pt` files remain in their original location
- No file copying or moving operations

## 1. Import Required Libraries and Setup

In [None]:
import torch
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from pathlib import Path
import os
import sys
import warnings
from typing import List, Optional, Dict
import importlib
warnings.filterwarnings('ignore')

# Add the project root to Python path
project_root = Path.cwd().resolve()
if not (project_root / "src").exists():
    for parent in project_root.parents:
        if (parent / "src").exists():
            project_root = parent
            break

if (project_root / "src").exists() and str(project_root) not in sys.path:
    sys.path.insert(0, str(project_root))

# Import and reload modules
import src.utils.data_visualization as viz_module
import src.utils.scenario_classifier as clf_module
importlib.reload(viz_module)
importlib.reload(clf_module)

# Import visualization components
from src.utils.data_visualization import (
    DataSelector, DataLoader, TrajectoryVisualizer, EgoOtherVisualizer,
    DEFAULT_VIZ_CONFIG, display_agent_information, display_scenario_statistics,
    get_available_files, display_file_info, explore_scenario,
    plot_ego_velocity_analysis, create_velocity_plots, create_integrated_ego_analysis,
    )

# Import ALL classification components from the dedicated module
from src.utils.scenario_classifier import (
    ScenarioClassifier, 
    list_files_with_status,
    print_expert_categories,
    manual_classify_current
    )

# Try to import ipywidgets for interactive controls
try:
    import ipywidgets as widgets
    from IPython.display import display, clear_output
    WIDGETS_AVAILABLE = True
except ImportError:
    print("Warning: ipywidgets not available. Interactive controls will be limited.")
    WIDGETS_AVAILABLE = False

# Set matplotlib style
plt.style.use('default')
plt.rcParams['figure.figsize'] = (15, 10)
plt.rcParams['font.size'] = 10

print("✅ All required libraries imported successfully!")
print("📦 Visualization: data_visualization.py")
print("📦 Classification: scenario_classifier.py")

## 2. Configuration and Setup

In [None]:
# Base path to preprocessed data
BASE_DATA_PATH = Path("data/DeMo_processed")
TRAIN_DATA_PATH = BASE_DATA_PATH / "train"

# Output path for classified scenarios
OUTPUT_BASE_PATH = Path("data/DeMo_classified")

# Expert categories mapping
EXPERT_CATEGORIES = {
    1: "Lane_Keeping",
    2: "Lane_Change_Left", 
    3: "Lane_Change_Right",
    4: "Turn_Left",
    5: "Turn_Right",
    6: "Acceleration",
    7: "Deceleration",
    8: "Yield_Stop",
    9: "Complex"
}

# Classification log file (now using CSV format)
CLASSIFICATION_CSV = OUTPUT_BASE_PATH / "classifications.csv"

# Check if data directory exists
if not TRAIN_DATA_PATH.exists():
    print(f"ERROR: Train data directory {TRAIN_DATA_PATH} not found!")
    print("Please ensure the preprocessed data is available in the correct location.")
else:
    print(f"Train data directory found: {TRAIN_DATA_PATH}")
    train_files = list(TRAIN_DATA_PATH.glob("*.pt"))
    print(f"Found {len(train_files)} .pt files in train directory")

# Create output directory (no longer creating subdirectories for each expert)
OUTPUT_BASE_PATH.mkdir(exist_ok=True)
print(f"Output directory: {OUTPUT_BASE_PATH}")
print(f"Classification CSV: {CLASSIFICATION_CSV}")

## 3. Classification Tracking System

In [None]:
# Initialize classifier using the imported ScenarioClassifier class
classifier = ScenarioClassifier(OUTPUT_BASE_PATH, CLASSIFICATION_CSV, EXPERT_CATEGORIES)
print("Scenario classifier initialized!")
classifier.get_classification_stats()

## 4. Initialize Visualization Components

In [None]:
# Get available train files
train_files = sorted(list(TRAIN_DATA_PATH.glob("*.pt")))
all_files = {'train': train_files}

# Create visualization components
data_selector = DataSelector(all_files)
data_loader = DataLoader()
ego_other_visualizer = EgoOtherVisualizer(data_loader, DEFAULT_VIZ_CONFIG)

print("=== Visualization Components Initialized ===")
print(f"Total train files: {len(train_files)}")

# Get unclassified files
unclassified_files = classifier.get_unclassified_files(train_files)
print(f"Unclassified files: {len(unclassified_files)}")
print(f"Classified files: {len(train_files) - len(unclassified_files)}")

## 4.5. Test File Selection Methods

Quick test to verify all file selection methods work correctly.

In [None]:
# === TEST FILE SELECTION METHODS ===
print("🧪 Testing File Selection Methods\n")
print("="*60)

# Test 1: List files using the imported function
print("\n1️⃣ Testing list_files_with_status():")
list_files_with_status(train_files, classifier, max_files=5)

# Test 2: Select by index
print("\n" + "="*60)
print("\n2️⃣ Testing selection by index:")
test_file = data_selector.select_file_by_index('train', 0)
if test_file:
    print(f"   ✅ Success! Selected: {test_file.name}")

# Test 3: Select by name (using first file)
if len(train_files) > 0:
    print("\n" + "="*60)
    print("\n3️⃣ Testing selection by filename:")
    first_file_name = train_files[0].name.replace('.pt', '')  # Remove .pt for testing
    test_file = data_selector.select_file_by_name(first_file_name, 'train')
    if test_file:
        print(f"   ✅ Success! Selected: {test_file.name}")

# Test 4: Get file index
if len(train_files) > 0:
    print("\n" + "="*60)
    print("\n4️⃣ Testing get_file_index():")
    first_file_name = train_files[0].name
    index = data_selector.get_file_index(first_file_name, 'train')
    if index is not None:
        print(f"   ✅ Success! File '{first_file_name}' is at index {index}")

# Test 5: Sequential navigation
print("\n" + "="*60)
print("\n5️⃣ Testing sequential navigation:")
print("   Testing next_file():")
test_file = data_selector.select_next_file('train')
if test_file:
    print(f"   ✅ Success! Selected: {test_file.name}")

print("\n   Testing previous_file():")
test_file = data_selector.select_previous_file('train')
if test_file:
    print(f"   ✅ Success! Selected: {test_file.name}")

print("\n" + "="*60)
print("\n✅ All file selection methods are working correctly!")
print("\n💡 Note: list_available_files() is defined in the next cell for use in the workflow")

## 6.5. Sequential Classification by Index

Use this section to classify files in order by their index. This is useful for systematic classification of the entire dataset.

In [None]:
# === SEQUENTIAL CLASSIFICATION BY INDEX ===

# Initialize index if not exists
if 'sequential_index' not in dir():
    sequential_index = 0
    print("🆕 Initialized sequential classification")
    print(f"Starting from index: {sequential_index}\n")

# Get unclassified files
unclassified = classifier.get_unclassified_files(train_files)

# Find next unclassified file index
file_path = None
original_index = sequential_index
while sequential_index < len(train_files):
    # Use the data_selector method to get file by index
    temp_file = data_selector.select_file_by_index('train', sequential_index)
    if temp_file and temp_file in unclassified:
        # Found an unclassified file
        file_path = temp_file
        break
    sequential_index += 1

# Visualize and classify if found
if file_path:
    print(f"{'='*60}")
    print(f"📍 File index: {sequential_index} / {len(train_files)}")
    print(f"📁 File: {file_path.name}")
    print(f"📋 Remaining unclassified: {len(unclassified)}")
    print(f"{'='*60}\n")
    
    # Load and visualize
    data_loader.load_scenario(file_path)
    data_loader.print_summary()
    print()
    
    # Create visualization
    create_integrated_ego_analysis(
        data_loader, ego_other_visualizer,
        show_lanes=True,
        show_agent_ids=True,
        show_velocity=True
    )
    
    # Display classification options
    print(f"\n📝 Current file (index {sequential_index}): {file_path.name}")
    print_expert_categories(EXPERT_CATEGORIES)
    
    # Move to next index for next run
    sequential_index += 1
    print(f"\n➡️  Next index will be: {sequential_index}")
else:
    print("🎉 All files have been classified!")
    print(f"Total files: {len(train_files)}")
    print("\n📊 Final Statistics:")
    classifier.get_classification_stats()


In [None]:
# === CLASSIFY CURRENT SEQUENTIAL FILE ===
# After analyzing the visualization above, classify the current file

# The file_index is automatically passed from the sequential_index variable
# Example usage:
# manual_classify_current(data_loader, classifier, expert_id=1, file_index=sequential_index-1)

print("✅ Classification cell ready!")
Current_ID = 1
manual_classify_current(data_loader, classifier, expert_id=Current_ID, file_index=sequential_index-1)