# Quick Test: Fixed Ensemble Implementation

Testing the fixed ensemble with:
- Correct train_pct/test_pct values (0-100 scale)
- Stratified K-fold (no class mismatch)
- Increased Ridge regularization (less ill-conditioned)
- Optimized data flow (fewer conversions)

In [None]:
# Setup
import sys
import time
import numpy as np

sys.path.extend([
    '/Users/urav/code/research',
    '/Users/urav/code/research/quant/code',
    '/Users/urav/code/research/hydra/code',
    '/Users/urav/code/research/aaltd2024/code',
])

from tsckit import (
    MonsterDataset,
    QuantAALTD2024,
    HydraAALTD2024,
    HydraQuantStackedAALTD2024,  # Old ensemble
    HydraQuantStacked            # New clean ensemble
)

np.random.seed(42)
print("✓ Setup complete")

In [None]:
# Load dataset with CORRECT percentages (0-100 scale, not 0-1)
print("Loading Pedestrian dataset...")
dataset = MonsterDataset("Pedestrian", fold=0, train_pct=10.0, test_pct=50.0)  # 10% train, 50% test
print(dataset.info())

_, y_test = dataset.get_arrays("test")
print(f"Test samples: {len(y_test)}, Classes: {len(np.unique(y_test))}")

In [None]:
# Test individual algorithms first
def test_algorithm(algorithm, name):
    print(f"\n🔄 Testing {name}...")
    start = time.time()
    algorithm.fit(dataset)
    train_time = time.time() - start
    
    start = time.time()
    predictions = algorithm.predict(dataset)
    test_time = time.time() - start
    
    accuracy = np.mean(predictions == y_test)
    print(f"   ✅ {name}: {accuracy:.4f} accuracy, {train_time:.1f}s train, {test_time:.1f}s test")
    return accuracy

# Test baselines
quant_acc = test_algorithm(QuantAALTD2024(num_estimators=50), "QuantAALTD2024")
hydra_acc = test_algorithm(HydraAALTD2024(k=4, g=16, seed=42), "HydraAALTD2024")

In [None]:
# Test old ensemble
old_ensemble = HydraQuantStackedAALTD2024(
    hydra_k=4, hydra_g=16, hydra_seed=42, quant_estimators=50
)
old_acc = test_algorithm(old_ensemble, "OldEnsemble (data leak)")

In [None]:
# Test new clean ensemble
new_ensemble = HydraQuantStacked(
    n_folds=3, hydra_k=4, hydra_g=16, hydra_seed=42, n_estimators=50
)
new_acc = test_algorithm(new_ensemble, "NewEnsemble (clean CV)")

In [None]:
# Summary comparison
print("\n" + "="*60)
print("📊 FINAL COMPARISON")
print("="*60)
print(f"QuantAALTD2024:              {quant_acc:.4f}")
print(f"HydraAALTD2024:              {hydra_acc:.4f}")
print(f"Old Ensemble (data leak):    {old_acc:.4f}")
print(f"New Ensemble (clean CV):     {new_acc:.4f}")

best_individual = max(quant_acc, hydra_acc)
print(f"\n📈 Ensemble Analysis:")
print(f"Best individual:             {best_individual:.4f}")
print(f"Old ensemble improvement:    {old_acc - best_individual:+.4f}")
print(f"New ensemble improvement:    {new_acc - best_individual:+.4f}")

if new_acc > old_acc:
    print(f"✅ Clean ensemble beats old ensemble by {new_acc - old_acc:.4f}")
elif old_acc > new_acc:
    print(f"⚠️ Old ensemble still ahead by {old_acc - new_acc:.4f} (but has data leakage)")
else:
    print(f"🔄 Both ensembles perform similarly")

print("\n✅ Test complete!")