# Quick Debug: TSC Algorithms on Pedestrian Dataset

Simple sanity check for all tsckit algorithms:
- **Dataset**: Pedestrian (1% train, 1% test for speed)
- **Algorithms**: All 10 algorithms including both ensembles
- **Purpose**: Verify everything works after code changes

In [None]:
import sys

sys.path.extend([
    '/Users/urav/code/research',                    # For tsckit package
    '/Users/urav/code/research/quant/code',         # For original quant.py
    '/Users/urav/code/research/hydra/code',         # For original hydra.py  
    '/Users/urav/code/research/aaltd2024/code',     # For quant_aaltd.py, hydra_gpu.py, utils.py, ridge.py
])

In [None]:
from tsckit import Experiment, MonsterDataset

from tsckit.algorithms import (
    AeonAlgorithm, HydraAALTD2024, HydraOriginal, QuantAALTD2024, QuantOriginal,
    HydraQuantStackedAALTD2024, # Old ensemble (data leakage)
    HydraQuantStacked           # New clean ensemble (proper CV)
)

In [None]:
# Create dataset and experiment
dataset = MonsterDataset("FordChallenge", fold=0, train_pct=1, test_pct=1)
print("Dataset info:")
print(dataset.info())

In [None]:
# Add all algorithms to experiment
algorithms = [
    # Original implementations
    QuantOriginal(depth=6),
    HydraOriginal(k=4, g=16, seed=42),
    
    # AALTD2024 implementations
    QuantAALTD2024(num_estimators=50),  # Reduced for speed
    HydraAALTD2024(k=4, g=16, seed=42),
    
    # AEON implementations
    AeonAlgorithm(algorithm="quant"),
    AeonAlgorithm(algorithm="hydra", n_kernels=4, n_groups=16),
    AeonAlgorithm(algorithm="rocket", n_kernels=500),
    AeonAlgorithm(algorithm="multirocket", n_kernels=100, max_dilations_per_kernel=16),
    
    # Ensembles
    HydraQuantStackedAALTD2024(hydra_k=4, hydra_g=16, hydra_seed=42, quant_estimators=50),  # Old (data leakage)
    HydraQuantStacked(n_folds=3, hydra_k=4, hydra_g=16, hydra_seed=42, n_estimators=50),    # New (clean CV)
]

In [None]:
# Create experiment with all algorithms
exp = Experiment(
    name="debug_pedestrian",
    datasets=[dataset],
    algorithms=algorithms
)

print(f"🔬 Experiment setup complete: {len(algorithms)} algorithms")

In [None]:
# Run all experiments
exp.run(verbose=True)

In [None]:
# Quick summary
print(exp.summary())

In [None]:
# Detailed results analysis
import matplotlib.pyplot as plt

df = exp.results_df()
successful = df[df['status'] == 'success'].copy()

if len(successful) > 0:
    # Sort by accuracy for better visualization
    successful = successful.sort_values('accuracy', ascending=True)
    
    # Create comparison plot
    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 6))
    
    # Accuracy comparison
    ax1.barh(range(len(successful)), successful['accuracy'])
    ax1.set_yticks(range(len(successful)))
    ax1.set_yticklabels(successful['algorithm_name'], fontsize=10)
    ax1.set_xlabel('Accuracy')
    ax1.set_title('Algorithm Accuracy Comparison')
    ax1.grid(True, alpha=0.3)
    
    # Add accuracy values as text
    for i, v in enumerate(successful['accuracy']):
        ax1.text(v + 0.005, i, f'{v:.3f}', va='center', fontsize=9)
    
    # Runtime comparison
    ax2.barh(range(len(successful)), successful['total_time'])
    ax2.set_yticks(range(len(successful)))
    ax2.set_yticklabels(successful['algorithm_name'], fontsize=10)
    ax2.set_xlabel('Total Time (seconds)')
    ax2.set_title('Algorithm Runtime Comparison')
    ax2.grid(True, alpha=0.3)
    
    # Add time values as text
    for i, v in enumerate(successful['total_time']):
        ax2.text(v + 0.01, i, f'{v:.2f}s', va='center', fontsize=9)
    
    plt.tight_layout()
    plt.show()
    
    # Ensemble comparison table
    print("\n📊 Key Results:")
    print("=" * 60)
    
    # Find ensemble results
    old_ensemble = successful[successful['algorithm_name'].str.contains('HydraQuantStacked\\(hydra_k')]
    new_ensemble = successful[successful['algorithm_name'].str.contains('HydraQuantStacked\\(folds')]
    
    if len(old_ensemble) > 0 and len(new_ensemble) > 0:
        print(f"🔄 Old Ensemble (data leakage):   {old_ensemble.iloc[0]['accuracy']:.4f}")
        print(f"✨ New Ensemble (clean CV):      {new_ensemble.iloc[0]['accuracy']:.4f}")
        improvement = new_ensemble.iloc[0]['accuracy'] - old_ensemble.iloc[0]['accuracy']
        print(f"📈 Improvement: {improvement:+.4f}")
    
    print(f"\n🏆 Best Algorithm: {successful.iloc[-1]['algorithm_name']} ({successful.iloc[-1]['accuracy']:.4f})")
    print(f"⚡ Fastest Algorithm: {successful.loc[successful['total_time'].idxmin(), 'algorithm_name']} ({successful['total_time'].min():.3f}s)")
else:
    print("❌ No successful runs to analyze")

# Show any failures
failed = df[df['status'] == 'failed']
if len(failed) > 0:
    print(f"\n⚠️  {len(failed)} algorithms failed:")
    for _, row in failed.iterrows():
        print(f"   - {row['algorithm_name']}: {row['error']}")

In [None]:
print("✅ Debug completed succesxwsfully!")
print("💡 Note: Results not saved (debug mode). Use output_dir for persistent storage.")