In [1]:
# Cell 1: Setup and Imports
import sys
sys.path.append('../src')

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path
import json
import warnings
warnings.filterwarnings('ignore')

# PyTorch and PyTorch Geometric
import torch
import torch.nn as nn
import torch_geometric as pyg
from torch_geometric.data import Data

# STL decomposition
from statsmodels.tsa.seasonal import STL

# Import existing infrastructure
from data import EvaluationCaseManager, load_evaluation_cases, get_case_train_test_data

# Import STGAT classes
from models.stgat import PatternAwareSTGAT, STGATGraphBuilder, STGATEvaluator

# Set style
plt.style.use('seaborn-v0_8')
sns.set_palette("husl")

print("📚 Libraries imported successfully")
print(f"🔥 PyTorch: {torch.__version__}")
print(f"🔥 PyTorch Geometric: {pyg.__version__}")
print("🎯 Phase 4: STGAT Implementation")
print("🏆 Building on Phase 3.6 Excellence (0.4130 RMSLE baseline)")
print("✅ STGAT classes imported from src/models/stgat.py")
print("=" * 60)

📚 Libraries imported successfully
🔥 PyTorch: 2.2.2
🔥 PyTorch Geometric: 2.6.1
🎯 Phase 4: STGAT Implementation
🏆 Building on Phase 3.6 Excellence (0.4130 RMSLE baseline)
✅ STGAT classes imported from src/models/stgat.py


In [2]:
# Cell 2: Load Phase 3.6 Results and Pattern Analysis (CORRECTED)
print("📊 LOADING PHASE 3.6 BASELINE RESULTS")
print("=" * 40)

# Load evaluation cases
evaluation_cases = load_evaluation_cases()
case_manager = EvaluationCaseManager()

print(f"✅ Loaded {len(evaluation_cases)} evaluation cases")

# Load Phase 3.6 pattern analysis results with CORRECT structure access
try:
    # Load your actual Phase 3.6 results with corrected structure
    with open('../results/pattern_selection/adaptive_results.json', 'r') as f:
        phase36_results = json.load(f)
    
    with open('../results/pattern_selection/pattern_analysis.json', 'r') as f:
        pattern_analysis = json.load(f)
        
    print("✅ Phase 3.6 results loaded successfully")
    
    # CORRECTED: Access performance_summary instead of summary
    performance_summary = phase36_results['performance_summary']
    baseline_rmsle = performance_summary['average_rmsle']  # CORRECTED: Use average_rmsle
    
    # CORRECTED: Calculate success rate from correct fields
    success_rate = performance_summary['cases_beat_traditional'] / performance_summary['cases_evaluated']
    
    print(f"   Baseline RMSLE: {baseline_rmsle:.4f}")
    print(f"   Success rate: {success_rate:.1%}")
    print(f"   Cases evaluated: {performance_summary['cases_evaluated']}")
    print(f"   Cases beat traditional: {performance_summary['cases_beat_traditional']}")
    
except FileNotFoundError:
    print("⚠️  Phase 3.6 results not found, creating mock pattern analysis")
    # Create mock pattern analysis with CORRECT structure for testing
    pattern_analysis = {
        'pattern_classifications': {}
    }
    
    for case in evaluation_cases:
        case_key = f"{case['store_nbr']}_{case['family']}"
        pattern_analysis['pattern_classifications'][case_key] = {
            'coefficient_variation': np.random.uniform(0.8, 3.0),  # CORRECTED: Use coefficient_variation
            'pattern_type': 'Regular' if np.random.random() > 0.7 else 'Volatile',  # CORRECTED: Use pattern_type
            'confidence': np.random.uniform(0.3, 0.9)
        }
    
    phase36_results = {
        'performance_summary': {  # CORRECTED: Use performance_summary
            'average_rmsle': 0.4130,  # CORRECTED: Use average_rmsle
            'cases_evaluated': 10,
            'cases_beat_traditional': 6
        }
    }
    baseline_rmsle = 0.4130
    success_rate = 0.6
    print("✅ Mock pattern analysis created for testing")

# CORRECTED: Access pattern_classifications correctly
pattern_classifications = pattern_analysis.get('pattern_classifications', {})

print(f"\n🧠 Pattern Analysis Summary (Phase 3.6 Insights)")
regular_count = sum(1 for p in pattern_classifications.values() 
                   if p.get('coefficient_variation', 2.0) < 1.5)  # CORRECTED: Use coefficient_variation
volatile_count = len(pattern_classifications) - regular_count

print(f"   Regular patterns (CV < 1.5): {regular_count} cases")
print(f"   Volatile patterns (CV ≥ 1.5): {volatile_count} cases")
if len(pattern_classifications) > 0:
    print(f"   Key insight: {volatile_count/len(pattern_classifications)*100:.0f}% volatile → Need robust STGAT")

# Store corrected baseline for later use
phase36_baseline_rmsle = baseline_rmsle

📊 LOADING PHASE 3.6 BASELINE RESULTS
✅ Loaded 10 evaluation cases
✅ Phase 3.6 results loaded successfully
   Baseline RMSLE: 0.4190
   Success rate: 60.0%
   Cases evaluated: 10
   Cases beat traditional: 6

🧠 Pattern Analysis Summary (Phase 3.6 Insights)
   Regular patterns (CV < 1.5): 3 cases
   Volatile patterns (CV ≥ 1.5): 7 cases
   Key insight: 70% volatile → Need robust STGAT


In [3]:
# Cell 2: Load Phase 3.6 Results and Pattern Analysis
print("📊 LOADING PHASE 3.6 BASELINE RESULTS")
print("=" * 40)

# Load evaluation cases
evaluation_cases = load_evaluation_cases()
case_manager = EvaluationCaseManager()

print(f"✅ Loaded {len(evaluation_cases)} evaluation cases")

# Load Phase 3.6 pattern analysis results
try:
    # Try to load your actual Phase 3.6 results
    with open('../results/pattern_selection/adaptive_results.json', 'r') as f:
        phase36_results = json.load(f)
    
    with open('../results/pattern_selection/pattern_analysis.json', 'r') as f:
        pattern_analysis = json.load(f)
        
    print("✅ Phase 3.6 results loaded successfully")
    print(f"   Baseline RMSLE: {phase36_results['performance_summary']['average_rmsle']:.4f}")
    success_rate = phase36_results['performance_summary']['cases_beat_traditional'] / phase36_results['performance_summary']['cases_evaluated']
    print(f"   Success rate: {success_rate:.1%}")
    
    # Extract pattern classifications for STGAT graph construction
    pattern_classifications = pattern_analysis['pattern_classifications']
    
except FileNotFoundError:
    print("⚠️  Phase 3.6 results not found, creating mock pattern analysis")
    # Create mock pattern analysis for testing
    pattern_classifications = {}
    for case in evaluation_cases:
        case_key = f"store_{case['store_nbr']}_family_{case['family']}"
        pattern_classifications[case_key] = {
            'coefficient_variation': np.random.uniform(0.8, 3.0),  # Random CV values
            'pattern_type': 'REGULAR' if np.random.random() > 0.7 else 'VOLATILE',
            'confidence_score': np.random.uniform(0.3, 0.9)
        }
    
    phase36_results = {'performance_summary': {'average_rmsle': 0.4130, 'cases_evaluated': 10, 'cases_beat_traditional': 6}}
    print("✅ Mock pattern analysis created for testing")

# Display pattern insights
print("\n🧠 Pattern Analysis Summary (Phase 3.6 Insights)")
regular_count = sum(1 for p in pattern_classifications.values() if p.get('coefficient_variation', 2.0) < 1.5)
volatile_count = len(pattern_classifications) - regular_count

print(f"   Regular patterns (CV < 1.5): {regular_count} cases")
print(f"   Volatile patterns (CV ≥ 1.5): {volatile_count} cases")
if len(pattern_classifications) > 0:
    print(f"   Key insight: {volatile_count/len(pattern_classifications)*100:.0f}% volatile → Need robust STGAT")

📊 LOADING PHASE 3.6 BASELINE RESULTS
✅ Loaded 10 evaluation cases
✅ Phase 3.6 results loaded successfully
   Baseline RMSLE: 0.4190
   Success rate: 60.0%

🧠 Pattern Analysis Summary (Phase 3.6 Insights)
   Regular patterns (CV < 1.5): 3 cases
   Volatile patterns (CV ≥ 1.5): 7 cases
   Key insight: 70% volatile → Need robust STGAT


In [4]:
# Cell 5: Initialize STGAT Evaluator (Fixed with Proper Imports)
print("🧪 INITIALIZING STGAT EVALUATOR")
print("=" * 35)

# Initialize STGAT evaluator with Phase 3.6 integration
stgat_evaluator = STGATEvaluator(case_manager, cv_threshold=1.5)

print("✅ STGATEvaluator initialized successfully")
print("   🔹 Pattern-based routing with CV thresholds")
print("   🔹 Confidence-based fallback mechanisms")
print("   🔹 STL temporal decomposition")
print("   🔹 Graph construction with correlation analysis")
print("   🔹 Phase 3.6 pattern analysis integration")

print(f"\n🔧 Configuration:")
print(f"   CV threshold: {stgat_evaluator.cv_threshold}")
print(f"   Graph builder: {stgat_evaluator.graph_builder.__class__.__name__}")
print(f"   Case manager: {stgat_evaluator.case_manager.__class__.__name__}")

print(f"\n🎯 Ready for STGAT evaluation on {len(evaluation_cases)} cases")
print("   Each case will use pattern-aware routing based on Phase 3.6 insights")

🧪 INITIALIZING STGAT EVALUATOR
📂 Loading sales data for STGAT evaluation...
✅ Sales data loaded from: ../data/raw/train.csv
   Records: 3,000,888
✅ STGATEvaluator initialized successfully
   🔹 Pattern-based routing with CV thresholds
   🔹 Confidence-based fallback mechanisms
   🔹 STL temporal decomposition
   🔹 Graph construction with correlation analysis
   🔹 Phase 3.6 pattern analysis integration

🔧 Configuration:
   CV threshold: 1.5
   Graph builder: STGATGraphBuilder
   Case manager: EvaluationCaseManager

🎯 Ready for STGAT evaluation on 10 cases
   Each case will use pattern-aware routing based on Phase 3.6 insights


In [12]:
# Cell 6: Test Single Case (FIXED with Error Handling + Module Reload)
print("🧪 TESTING STGAT ON SINGLE CASE")
print("=" * 32)

# IMPORTANT: Reload the module to get latest changes
import importlib
from models import stgat
importlib.reload(stgat)
from models.stgat import STGATEvaluator

# Reinitialize with reloaded module
print("🔄 Reloading STGAT module and reinitializing evaluator...")
stgat_evaluator = STGATEvaluator(case_manager, cv_threshold=1.5)

# Test with first evaluation case
test_case = evaluation_cases[0]
store_nbr = test_case['store_nbr']
family = test_case['family']

print(f"Testing Store {store_nbr} - {family}")
print(f"Expected data structure check...")

# Test data loading - FIXED: Use stgat_evaluator's loaded sales data
try:
    # CORRECTED: Use the STGATEvaluator's sales data
    train_data, test_data = get_case_train_test_data(stgat_evaluator.sales_data, store_nbr, family)
    print(f"✅ Data loading successful:")
    print(f"   Train data: {len(train_data)} records")
    print(f"   Test data: {len(test_data)} records")
    
    # Check data structure
    if isinstance(train_data, pd.DataFrame):
        print(f"   Train columns: {list(train_data.columns)}")
        print(f"   Date range: {train_data['date'].min()} to {train_data['date'].max()}")
        
        # DEBUG: Check the sales data characteristics for STL
        target_series = train_data.set_index('date')['sales']
        print(f"\n🔍 DEBUG - Sales data characteristics:")
        print(f"   Series length: {len(target_series)}")
        print(f"   Mean: {target_series.mean():.2f}")
        print(f"   Std: {target_series.std():.2f}")  
        print(f"   Variance: {target_series.var():.2f}")
        print(f"   Min/Max: {target_series.min():.2f} / {target_series.max():.2f}")
        print(f"   Zero values: {(target_series == 0).sum()}")
        print(f"   Unique values: {target_series.nunique()}")
    
    print(f"\n🔬 Running STGAT evaluation...")
    
    # Run STGAT evaluation on test case
    result = stgat_evaluator.evaluate_case(
        store_nbr=store_nbr,
        family=family,
        pattern_analysis=pattern_classifications,  # CORRECTED: Use pattern_classifications
        traditional_baseline=0.4755  # Phase 2 baseline
    )
    
    # FIXED: Check for errors first
    if 'error' in result:
        print(f"❌ STGAT evaluation failed with error:")
        print(f"   Error: {result['error']}")
        print(f"   RMSLE: {result['test_rmsle']:.4f} (error fallback)")
        print(f"   Method: {result.get('method_used', 'Unknown')}")
    else:
        print(f"✅ STGAT evaluation successful!")
        print(f"   RMSLE: {result['test_rmsle']:.4f}")
        print(f"   Pattern: {result.get('pattern_type', 'Unknown')} (CV: {result.get('cv_value', 0.0):.3f})")
        print(f"   Method: {result.get('method_used', 'Unknown')}")
        print(f"   Confidence: {result.get('stgat_confidence', 0.0):.3f}")
        print(f"   Prediction length: {result.get('prediction_length', 0)}")
        
        # Performance assessment - CORRECTED: Use correct baseline variable
        phase36_baseline = phase36_baseline_rmsle  # CORRECTED: Use the variable we stored
        traditional_baseline = 0.4755
        
        print(f"\n📊 Performance Assessment:")
        print(f"   vs Phase 3.6 baseline ({phase36_baseline:.4f}): {((phase36_baseline - result['test_rmsle'])/phase36_baseline*100):+.1f}%")
        print(f"   vs Traditional baseline ({traditional_baseline:.4f}): {((traditional_baseline - result['test_rmsle'])/traditional_baseline*100):+.1f}%")
    
    # Debug: Print all available keys in the result
    print(f"\n🔍 Debug - Result keys: {list(result.keys())}")
    
except Exception as e:
    print(f"❌ Single case test failed: {str(e)}")
    import traceback
    traceback.print_exc()

🧪 TESTING STGAT ON SINGLE CASE
🔄 Reloading STGAT module and reinitializing evaluator...
📂 Loading sales data for STGAT evaluation...
✅ Sales data loaded from: ../data/raw/train.csv
   Records: 3,000,888
Testing Store 49 - PET SUPPLIES
Expected data structure check...
✅ Data loading successful:
   Train data: 1638 records
   Test data: 46 records
   Train columns: ['id', 'date', 'store_nbr', 'family', 'sales', 'onpromotion']
   Date range: 2013-01-01 00:00:00 to 2017-06-30 00:00:00

🔍 DEBUG - Sales data characteristics:
   Series length: 1638
   Mean: 10.00
   Std: 10.39
   Variance: 107.92
   Min/Max: 0.00 / 59.00
   Zero values: 662
   Unique values: 47

🔬 Running STGAT evaluation...
✅ STGAT evaluation successful!
   RMSLE: 0.4254
   Pattern: REGULAR (CV: 1.038)
   Method: STGAT
   Confidence: 0.846
   Prediction length: 46

📊 Performance Assessment:
   vs Phase 3.6 baseline (0.4190): -1.5%
   vs Traditional baseline (0.4755): +10.5%

🔍 Debug - Result keys: ['store_nbr', 'family', 'te

In [13]:
# Cell 8: Full STGAT Evaluation (UPDATED with Working Fixes)
print("🚀 STGAT EVALUATION ON ALL CASES")
print("=" * 40)

# Use the fixed evaluator from previous cell
print("🔄 Using fixed STGAT evaluator from previous tests...")

# Results storage
stgat_results = {
    'detailed_results': {},
    'summary_metrics': {},
    'evaluation_metadata': {
        'phase': 'Phase 4: STGAT Implementation',
        'model_type': 'Pattern-Aware STGAT',
        'evaluation_date': pd.Timestamp.now().strftime('%Y-%m-%d %H:%M:%S'),
        'baseline_comparison': phase36_baseline_rmsle  # FIXED: Use correct variable
    }
}

# Evaluate all cases
print("Evaluating STGAT on all 10 cases...")
print("Note: This may take several minutes due to graph construction and neural processing")

successful_evaluations = 0
failed_evaluations = 0
total_rmsle = 0
beats_traditional = 0
beats_phase36 = 0
method_counts = {'STGAT': 0, 'Traditional_Fallback': 0}

for i, case in enumerate(evaluation_cases, 1):
    store_nbr = case['store_nbr']
    family = case['family']
    case_key = f"{store_nbr}_{family}"
    
    print(f"\n{i:2d}. Evaluating Store {store_nbr} - {family}")
    
    # Get traditional baseline for fallback (from Phase 2 or use default)
    traditional_baseline = case.get('traditional_rmsle', 0.4755)
    
    # Run STGAT evaluation with FIXED evaluator
    result = stgat_evaluator_complete.evaluate_case(  # FIXED: Use complete evaluator
        store_nbr=store_nbr,
        family=family,
        pattern_analysis=pattern_classifications,  # FIXED: Use correct pattern data
        traditional_baseline=traditional_baseline
    )
    
    stgat_results['detailed_results'][case_key] = result
    
    # Display result
    if 'error' not in result:
        print(f"   ✅ RMSLE: {result['test_rmsle']:.4f}")
        print(f"      Pattern: {result.get('pattern_type', 'Unknown')} (CV: {result.get('cv_value', 0.0):.3f})")
        print(f"      Method: {result.get('method_used', 'Unknown')}")
        print(f"      Confidence: {result.get('stgat_confidence', 0.0):.3f}")
        successful_evaluations += 1
        total_rmsle += result['test_rmsle']
        
        # Track method usage
        method_used = result.get('method_used', 'Unknown')
        if method_used in method_counts:
            method_counts[method_used] += 1
        
        # Performance tracking
        if result['test_rmsle'] < 0.4755:  # Traditional baseline
            beats_traditional += 1
        if result['test_rmsle'] < phase36_baseline_rmsle:  # Phase 3.6 baseline
            beats_phase36 += 1
        
        # Quick comparison - both baselines
        improvement_vs_traditional = ((0.4755 - result['test_rmsle']) / 0.4755) * 100
        improvement_vs_phase36 = ((phase36_baseline_rmsle - result['test_rmsle']) / phase36_baseline_rmsle) * 100
        
        status_trad = "🎯 BEATS TRADITIONAL" if result['test_rmsle'] < 0.4755 else "⚠️  ABOVE TRADITIONAL"
        status_phase36 = "🎯 BEATS PHASE 3.6" if result['test_rmsle'] < phase36_baseline_rmsle else "⚠️  ABOVE PHASE 3.6"
        
        print(f"      vs Traditional: {improvement_vs_traditional:+.1f}% ({status_trad})")
        print(f"      vs Phase 3.6: {improvement_vs_phase36:+.1f}% ({status_phase36})")
        
    else:
        print(f"   ❌ Error: {result['error']}")
        failed_evaluations += 1

# Calculate summary statistics
if successful_evaluations > 0:
    avg_rmsle = total_rmsle / successful_evaluations
    traditional_success_rate = beats_traditional / successful_evaluations * 100
    phase36_success_rate = beats_phase36 / successful_evaluations * 100
else:
    avg_rmsle = 0
    traditional_success_rate = 0
    phase36_success_rate = 0

# Store summary metrics
stgat_results['summary_metrics'] = {
    'successful_evaluations': successful_evaluations,
    'failed_evaluations': failed_evaluations,
    'success_rate': successful_evaluations / len(evaluation_cases) * 100,
    'average_rmsle': avg_rmsle,
    'beats_traditional_count': beats_traditional,
    'beats_phase36_count': beats_phase36,
    'traditional_success_rate': traditional_success_rate,
    'phase36_success_rate': phase36_success_rate,
    'method_distribution': method_counts
}

print(f"\n" + "="*60)
print(f"🎯 STGAT EVALUATION SUMMARY")
print(f"="*60)
print(f"Successful evaluations: {successful_evaluations}/10 ({successful_evaluations/len(evaluation_cases)*100:.1f}%)")
print(f"Failed evaluations: {failed_evaluations}/10")

if successful_evaluations > 0:
    print(f"\n📊 PERFORMANCE METRICS:")
    print(f"   Average RMSLE: {avg_rmsle:.4f}")
    print(f"   Beat Traditional Baseline: {beats_traditional}/{successful_evaluations} ({traditional_success_rate:.1f}%)")
    print(f"   Beat Phase 3.6 Baseline: {beats_phase36}/{successful_evaluations} ({phase36_success_rate:.1f}%)")
    
    print(f"\n🎯 METHOD DISTRIBUTION:")
    for method, count in method_counts.items():
        percentage = count / successful_evaluations * 100
        print(f"   {method}: {count}/{successful_evaluations} ({percentage:.1f}%)")
    
    print(f"\n📈 BASELINE COMPARISONS:")
    print(f"   Traditional Baseline: 0.4755")
    print(f"   Phase 3.6 Baseline: {phase36_baseline_rmsle:.4f}")
    print(f"   STGAT Average: {avg_rmsle:.4f}")
    
    # Performance assessment
    vs_traditional = ((0.4755 - avg_rmsle) / 0.4755) * 100
    vs_phase36 = ((phase36_baseline_rmsle - avg_rmsle) / phase36_baseline_rmsle) * 100
    
    print(f"   STGAT vs Traditional: {vs_traditional:+.1f}%")
    print(f"   STGAT vs Phase 3.6: {vs_phase36:+.1f}%")
    
    if avg_rmsle < phase36_baseline_rmsle:
        print(f"   🎉 STGAT outperforms Phase 3.6 baseline!")
    if avg_rmsle < 0.4755:
        print(f"   🎉 STGAT outperforms Traditional baseline!")
        
    print(f"\n✅ STGAT evaluation completed successfully!")
    print(f"📊 Pattern-aware routing with {method_counts['STGAT']}/{successful_evaluations} cases using STGAT")
    
else:
    print("\n❌ No successful evaluations - check implementation")

🚀 STGAT EVALUATION ON ALL CASES
🔄 Using fixed STGAT evaluator from previous tests...
Evaluating STGAT on all 10 cases...
Note: This may take several minutes due to graph construction and neural processing

 1. Evaluating Store 49 - PET SUPPLIES
   ✅ RMSLE: 0.3839
      Pattern: REGULAR (CV: 1.038)
      Method: STGAT
      Confidence: 0.846
      vs Traditional: +19.3% (🎯 BEATS TRADITIONAL)
      vs Phase 3.6: +8.4% (🎯 BEATS PHASE 3.6)

 2. Evaluating Store 8 - PET SUPPLIES
   ✅ RMSLE: 0.4524
      Pattern: REGULAR (CV: 1.014)
      Method: STGAT
      Confidence: 0.849
      vs Traditional: +4.9% (🎯 BEATS TRADITIONAL)
      vs Phase 3.6: -8.0% (⚠️  ABOVE PHASE 3.6)

 3. Evaluating Store 44 - SCHOOL AND OFFICE SUPPLIES
   ✅ RMSLE: 3.4728
      Pattern: VOLATILE (CV: 2.766)
      Method: Traditional_Fallback
      Confidence: 0.427
      vs Traditional: -630.4% (⚠️  ABOVE TRADITIONAL)
      vs Phase 3.6: -728.9% (⚠️  ABOVE PHASE 3.6)

 4. Evaluating Store 45 - SCHOOL AND OFFICE SUPPLIES

In [14]:
# Cell 9: Test Improved Traditional Fallback
print("🧪 TESTING IMPROVED TRADITIONAL FALLBACK")
print("=" * 45)

# Reload STGAT module to get latest changes
import importlib
from models import stgat
importlib.reload(stgat)
from models.stgat import STGATEvaluator

# Initialize with traditional models integration
print("🔧 Initializing new STGAT evaluator with traditional models integration...")
improved_stgat_evaluator = STGATEvaluator(case_manager, cv_threshold=1.5)

# Test one volatile case that should use traditional fallback
volatile_case = None
for case in evaluation_cases:
    case_key = f"{case['store_nbr']}_{case['family']}"
    if case_key in pattern_classifications:
        cv_value = pattern_classifications[case_key].get('coefficient_variation', 2.0)
        if cv_value >= 1.5:  # Volatile pattern
            volatile_case = case
            break

if volatile_case:
    print(f"\n🎯 Testing volatile case: Store {volatile_case['store_nbr']} - {volatile_case['family']}")
    case_key = f"{volatile_case['store_nbr']}_{volatile_case['family']}"
    cv_value = pattern_classifications[case_key].get('coefficient_variation', 2.0)
    print(f"   CV: {cv_value:.3f} (should route to traditional models)")
    
    # Run evaluation with improved system
    result = improved_stgat_evaluator.evaluate_case(
        store_nbr=volatile_case['store_nbr'],
        family=volatile_case['family'],
        pattern_analysis=pattern_classifications,
        traditional_baseline=0.4755
    )
    
    print(f"\n📊 Result:")
    print(f"   RMSLE: {result['test_rmsle']:.4f}")
    print(f"   Method: {result.get('method_used', 'Unknown')}")
    print(f"   Pattern: {result.get('pattern_type', 'Unknown')}")
    print(f"   Confidence: {result.get('stgat_confidence', 0.0):.3f}")
    
    # Compare with constant fallback
    if result.get('method_used', '').startswith('Traditional_'):
        if 'Constant' in result.get('method_used', ''):
            print("   ⚠️  Still using constant fallback")
        else:
            print(f"   ✅ Using actual traditional model: {result.get('method_used', '')}")
    
    # Performance assessment
    vs_constant = ((0.4755 - result['test_rmsle']) / 0.4755) * 100
    print(f"   vs Constant baseline (0.4755): {vs_constant:+.1f}%")
    
    print(f"\n🔍 Debug: All result keys: {list(result.keys())}")
else:
    print("❌ No volatile case found for testing")

🧪 TESTING IMPROVED TRADITIONAL FALLBACK
🔧 Initializing new STGAT evaluator with traditional models integration...
📂 Loading sales data for STGAT evaluation...
✅ Sales data loaded from: ../data/raw/train.csv
   Records: 3,000,888
🔧 Initializing traditional models for STGAT fallback...
📂 Loading sales data for traditional baselines...
✅ Sales data loaded from: ../data/raw/train.csv
   Records: 3,000,888
✅ Traditional models initialized
❌ No volatile case found for testing
