In [1]:
# Cell 1: Setup and imports with warning suppression
import warnings
warnings.filterwarnings('ignore')

import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error
import sys
from pathlib import Path
import os

# Suppress pandas warnings
pd.options.mode.chained_assignment = None

# Suppress sklearn warnings
os.environ['PYTHONWARNINGS'] = 'ignore'

# Dynamic path setup
current_dir = Path.cwd()
while current_dir.name != "ENEXIS" and current_dir.parent != current_dir:
    current_dir = current_dir.parent
project_root = current_dir

# Add utils to path
utils_path = project_root / "src" / "utils"
sys.path.append(str(utils_path))
from build_training_set import build_training_set

# Define features (top 10 from your analysis)
FEATURES = ['Load', 'wind_speed_10m', 'Flow_NO', 'temperature_2m', 'yearday_sin', 
           'yearday_cos', 'shortwave_radiation', 'Flow_GB', 'diffuse_radiation', 'hour_sin']
TARGET = 'Price'

def find_cheapest_consecutive_4h_window(prices, timestamps):
    """
    Find the cheapest consecutive 4-hour window.
    Returns: (start_hour, window_hours_list, total_cost)
    """
    if len(prices) < 4:
        return None, [], float('inf')
    
    min_cost = float('inf')
    best_start = None
    best_window = []
    
    for i in range(len(prices) - 3):  # -3 because we need 4 consecutive hours
        consecutive_4h = prices[i:i+4]
        cost = sum(consecutive_4h)
        
        if cost < min_cost:
            min_cost = cost
            best_start = i
            best_window = list(range(i, i+4))  # Hour indices
    
    return best_start, best_window, min_cost

def calculate_matching_hours(pred_window, actual_window):
    """
    Calculate how many hours match between predicted and actual windows.
    Returns: matching_count, accuracy_percentage
    """
    if not pred_window or not actual_window:
        return 0, 0.0
    
    pred_set = set(pred_window)
    actual_set = set(actual_window)
    matching = len(pred_set.intersection(actual_set))
    accuracy = (matching / 4) * 100  # Each hour = 25%
    
    return matching, accuracy

print("✅ Setup complete - all warnings suppressed")





In [2]:
# Cell 2: Main prediction loop (Feb 1 - June 16, 2025)
print("🔍 Starting charging window analysis from Feb 1, 2025 to today...")
print("=" * 70)

# Calculate date range
start_date = pd.Timestamp('2025-02-01')
end_date = pd.Timestamp('2025-06-16')  # Today
date_range = pd.date_range(start_date, end_date, freq='D')
total_days = len(date_range)

print(f"Analyzing {total_days} days from {start_date.strftime('%Y-%m-%d')} to {end_date.strftime('%Y-%m-%d')}")

# Training window setup (74 days before each forecast)
training_window_days = 74

results = []
successful_runs = 0

for day_idx, forecast_date in enumerate(date_range):
    train_start = forecast_date - pd.Timedelta(days=training_window_days)
    train_end = forecast_date - pd.Timedelta(days=1, hours=1)  # End at 23:00 previous day
    
    try:
        # Build training set
        df = build_training_set(
            train_start=train_start.strftime("%Y-%m-%d %H:%M:%S"),
            train_end=train_end.strftime("%Y-%m-%d %H:%M:%S"),
            run_date=forecast_date.strftime("%Y-%m-%d %H:%M:%S")
        )
        
        if df is None or df.empty:
            continue
            
        # Process data
        df['target_datetime'] = pd.to_datetime(df['target_datetime'], utc=True)
        df = df.sort_values('target_datetime').set_index('target_datetime')
        forecast_date_utc = forecast_date.tz_localize("UTC")
        
        # Split data
        train_data = df[df.index <= forecast_date_utc]
        test_data = df[df.index > forecast_date_utc]
        
        # Clean data
        train_data = train_data.dropna(subset=FEATURES + [TARGET])
        test_data = test_data.dropna(subset=FEATURES + [TARGET])
        
        if test_data.empty or train_data.empty or len(test_data) < 25:  # Need at least 25 hours (skip 24 + 4 for window)
            continue
            
        # Prepare features
        X_train = train_data[FEATURES]
        y_train = train_data[TARGET]
        X_test = test_data[FEATURES]
        y_test = test_data[TARGET]
        
        # Skip first 24 hours
        X_test_filtered = X_test.iloc[24:]
        y_test_filtered = y_test.iloc[24:]
        
        if len(X_test_filtered) < 4:  # Need at least 4 hours for window
            continue
            
        # Train model and predict
        model = RandomForestRegressor(n_estimators=100, random_state=42)
        model.fit(X_train, y_train)
        y_pred = model.predict(X_test_filtered)
        
        # Find optimal charging windows
        pred_start, pred_window, pred_cost = find_cheapest_consecutive_4h_window(y_pred, X_test_filtered.index)
        actual_start, actual_window, actual_cost = find_cheapest_consecutive_4h_window(y_test_filtered.values, X_test_filtered.index)
        
        # Calculate accuracy
        matching_hours, accuracy_pct = calculate_matching_hours(pred_window, actual_window)
        
        # Store results
        results.append({
            'run_date': forecast_date.strftime('%Y-%m-%d'),
            'total_test_hours': len(X_test_filtered),
            'pred_window_start': pred_start,
            'pred_window_hours': pred_window,
            'actual_window_start': actual_start,
            'actual_window_hours': actual_window,
            'matching_hours': matching_hours,
            'accuracy_percentage': accuracy_pct,
            'pred_total_cost': pred_cost,
            'actual_total_cost': actual_cost
        })
        
        successful_runs += 1
        
        # Progress update every 20 days
        if (day_idx + 1) % 20 == 0:
            print(f"Progress: {day_idx + 1}/{total_days} days processed ({successful_runs} successful)")
            
    except Exception as e:
        # Silent failure - just continue
        continue

print(f"\n✅ Analysis complete!")
print(f"📊 Successfully processed: {successful_runs}/{total_days} days")
print(f"📈 Success rate: {(successful_runs/total_days)*100:.1f}%")

if results:
    results_df = pd.DataFrame(results)
    print(f"\n🎯 Quick Preview:")
    print(f"Average accuracy: {results_df['accuracy_percentage'].mean():.1f}%")
    print(f"Perfect matches (100%): {(results_df['accuracy_percentage'] == 100).sum()}")
    print(f"Zero matches (0%): {(results_df['accuracy_percentage'] == 0).sum()}")
else:
    print("❌ No successful runs completed")

🔍 Starting charging window analysis from Feb 1, 2025 to today...
Analyzing 136 days from 2025-02-01 to 2025-06-16
Progress: 20/136 days processed (20 successful)
Progress: 40/136 days processed (40 successful)
Progress: 60/136 days processed (60 successful)
Progress: 80/136 days processed (80 successful)
Progress: 100/136 days processed (100 successful)

✅ Analysis complete!
📊 Successfully processed: 111/136 days
📈 Success rate: 81.6%

🎯 Quick Preview:
Average accuracy: 33.6%
Perfect matches (100%): 26
Zero matches (0%): 69


In [3]:
# Cell 3: Results analysis
print("\n" + "="*80)
print("📊 DETAILED CHARGING WINDOW FORECAST ANALYSIS")
print("="*80)

if results:
    results_df = pd.DataFrame(results)
    
    # Overall Statistics
    print(f"\n🎯 OVERALL PERFORMANCE SUMMARY")
    print("-" * 50)
    print(f"Total successful forecasts: {len(results_df)}")
    print(f"Average accuracy: {results_df['accuracy_percentage'].mean():.2f}%")
    print(f"Median accuracy: {results_df['accuracy_percentage'].median():.2f}%")
    print(f"Standard deviation: {results_df['accuracy_percentage'].std():.2f}%")
    
    # Accuracy Distribution
    print(f"\n📈 ACCURACY DISTRIBUTION")
    print("-" * 50)
    accuracy_counts = results_df['accuracy_percentage'].value_counts().sort_index()
    for accuracy, count in accuracy_counts.items():
        percentage_of_total = (count / len(results_df)) * 100
        print(f"{accuracy:5.0f}% accuracy: {count:3d} days ({percentage_of_total:5.1f}% of total)")
    
    # Matching Hours Distribution
    print(f"\n🔢 MATCHING HOURS BREAKDOWN")
    print("-" * 50)
    matching_counts = results_df['matching_hours'].value_counts().sort_index()
    for matches, count in matching_counts.items():
        percentage_of_total = (count / len(results_df)) * 100
        accuracy_pct = matches * 25
        print(f"{matches}/4 hours match ({accuracy_pct:3.0f}%): {count:3d} days ({percentage_of_total:5.1f}% of total)")
    
    # Performance Categories
    print(f"\n🏆 PERFORMANCE CATEGORIES")
    print("-" * 50)
    excellent = (results_df['accuracy_percentage'] == 100).sum()
    good = ((results_df['accuracy_percentage'] >= 75) & (results_df['accuracy_percentage'] < 100)).sum()
    moderate = ((results_df['accuracy_percentage'] >= 50) & (results_df['accuracy_percentage'] < 75)).sum()
    poor = ((results_df['accuracy_percentage'] >= 25) & (results_df['accuracy_percentage'] < 50)).sum()
    very_poor = (results_df['accuracy_percentage'] < 25).sum()
    
    total = len(results_df)
    print(f"🥇 Excellent (100%):     {excellent:3d} days ({(excellent/total)*100:5.1f}%)")
    print(f"🥈 Good (75-99%):        {good:3d} days ({(good/total)*100:5.1f}%)")
    print(f"🥉 Moderate (50-74%):    {moderate:3d} days ({(moderate/total)*100:5.1f}%)")
    print(f"⚠️  Poor (25-49%):       {poor:3d} days ({(poor/total)*100:5.1f}%)")
    print(f"❌ Very Poor (0-24%):    {very_poor:3d} days ({(very_poor/total)*100:5.1f}%)")
    
    # Time-based Analysis
    results_df['run_date'] = pd.to_datetime(results_df['run_date'])
    results_df['month'] = results_df['run_date'].dt.month
    results_df['weekday'] = results_df['run_date'].dt.day_name()
    
    print(f"\n📅 MONTHLY PERFORMANCE")
    print("-" * 50)
    monthly_stats = results_df.groupby('month')['accuracy_percentage'].agg(['count', 'mean', 'std']).round(2)
    month_names = {2: 'February', 3: 'March', 4: 'April', 5: 'May', 6: 'June'}
    for month, stats in monthly_stats.iterrows():
        month_name = month_names.get(month, f'Month {month}')
        print(f"{month_name:10s}: {stats['count']:3.0f} days, {stats['mean']:6.2f}% avg, {stats['std']:6.2f}% std")
    
    print(f"\n📆 WEEKDAY PERFORMANCE")
    print("-" * 50)
    weekday_stats = results_df.groupby('weekday')['accuracy_percentage'].agg(['count', 'mean']).round(2)
    weekday_order = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']
    for day in weekday_order:
        if day in weekday_stats.index:
            stats = weekday_stats.loc[day]
            print(f"{day:10s}: {stats['count']:3.0f} days, {stats['mean']:6.2f}% avg accuracy")
    
    # Best and Worst Performing Days
    print(f"\n🏅 BEST PERFORMING DAYS (100% accuracy)")
    print("-" * 50)
    perfect_days = results_df[results_df['accuracy_percentage'] == 100]['run_date'].dt.strftime('%Y-%m-%d').tolist()
    if perfect_days:
        print("Perfect forecast days:")
        for i, day in enumerate(perfect_days[:10]):  # Show first 10
            print(f"  {day}")
        if len(perfect_days) > 10:
            print(f"  ... and {len(perfect_days) - 10} more days")
    else:
        print("No days with perfect accuracy")
    
    print(f"\n⚠️  WORST PERFORMING DAYS (0% accuracy)")
    print("-" * 50)
    worst_days = results_df[results_df['accuracy_percentage'] == 0]['run_date'].dt.strftime('%Y-%m-%d').tolist()
    if worst_days:
        print("Zero accuracy days:")
        for day in worst_days[:10]:  # Show first 10
            print(f"  {day}")
        if len(worst_days) > 10:
            print(f"  ... and {len(worst_days) - 10} more days")
    else:
        print("No days with zero accuracy")
    
    # Summary Statistics Table
    print(f"\n📋 SUMMARY STATISTICS TABLE")
    print("-" * 50)
    print(f"{'Metric':<25} {'Value':<15}")
    print("-" * 40)
    print(f"{'Total Days Analyzed':<25} {len(results_df):<15}")
    print(f"{'Mean Accuracy':<25} {results_df['accuracy_percentage'].mean():.2f}%")
    print(f"{'Median Accuracy':<25} {results_df['accuracy_percentage'].median():.2f}%")
    print(f"{'Min Accuracy':<25} {results_df['accuracy_percentage'].min():.2f}%")
    print(f"{'Max Accuracy':<25} {results_df['accuracy_percentage'].max():.2f}%")
    print(f"{'Std Deviation':<25} {results_df['accuracy_percentage'].std():.2f}%")
    print(f"{'Perfect Matches':<25} {excellent}")
    print(f"{'At Least 50% Accuracy':<25} {(results_df['accuracy_percentage'] >= 50).sum()}")
    
else:
    print("\n❌ No results to analyze - all forecasting attempts failed")


📊 DETAILED CHARGING WINDOW FORECAST ANALYSIS

🎯 OVERALL PERFORMANCE SUMMARY
--------------------------------------------------
Total successful forecasts: 111
Average accuracy: 33.56%
Median accuracy: 0.00%
Standard deviation: 44.27%

📈 ACCURACY DISTRIBUTION
--------------------------------------------------
    0% accuracy:  69 days ( 62.2% of total)
   50% accuracy:   3 days (  2.7% of total)
   75% accuracy:  13 days ( 11.7% of total)
  100% accuracy:  26 days ( 23.4% of total)

🔢 MATCHING HOURS BREAKDOWN
--------------------------------------------------
0/4 hours match (  0%):  69 days ( 62.2% of total)
2/4 hours match ( 50%):   3 days (  2.7% of total)
3/4 hours match ( 75%):  13 days ( 11.7% of total)
4/4 hours match (100%):  26 days ( 23.4% of total)

🏆 PERFORMANCE CATEGORIES
--------------------------------------------------
🥇 Excellent (100%):      26 days ( 23.4%)
🥈 Good (75-99%):         13 days ( 11.7%)
🥉 Moderate (50-74%):      3 days (  2.7%)
⚠️  Poor (25-49%):         

In [4]:
# Cell 4: Detailed results export
print("\n" + "="*80)
print("💾 EXPORTING DETAILED RESULTS")  
print("="*80)

if results:
    results_df = pd.DataFrame(results)
    
    # Prepare detailed export dataframe
    export_df = results_df.copy()
    
    # Convert window hours lists to readable format
    export_df['predicted_window_hours'] = export_df['pred_window_hours'].apply(
        lambda x: ','.join(map(str, x)) if x else ''
    )
    export_df['actual_window_hours'] = export_df['actual_window_hours'].apply(
        lambda x: ','.join(map(str, x)) if x else ''
    )
    
    # Add helper columns for analysis
    export_df['run_date_dt'] = pd.to_datetime(export_df['run_date'])
    export_df['month'] = export_df['run_date_dt'].dt.month
    export_df['weekday'] = export_df['run_date_dt'].dt.day_name()
    export_df['week_number'] = export_df['run_date_dt'].dt.isocalendar().week
    
    # Performance category
    def categorize_performance(accuracy):
        if accuracy == 100:
            return 'Excellent'
        elif accuracy >= 75:
            return 'Good'
        elif accuracy >= 50:
            return 'Moderate'
        elif accuracy >= 25:
            return 'Poor'
        else:
            return 'Very Poor'
    
    export_df['performance_category'] = export_df['accuracy_percentage'].apply(categorize_performance)
    
    # Round numerical columns
    numeric_cols = ['accuracy_percentage', 'pred_total_cost', 'actual_total_cost']
    export_df[numeric_cols] = export_df[numeric_cols].round(2)
    
    # Reorder columns for export
    export_columns = [
        'run_date', 'weekday', 'month', 'week_number',
        'total_test_hours', 'pred_window_start', 'predicted_window_hours',
        'actual_window_start', 'actual_window_hours', 'matching_hours',
        'accuracy_percentage', 'performance_category',
        'pred_total_cost', 'actual_total_cost'
    ]
    
    final_export_df = export_df[export_columns]
    
    # Save detailed results
    detailed_filename = "charging_window_forecast_detailed_results.csv"
    final_export_df.to_csv(detailed_filename, index=False)
    print(f"✅ Detailed results saved to: {detailed_filename}")
    print(f"   📁 Contains {len(final_export_df)} rows with full forecast details")
    
    # Create summary statistics export
    summary_stats = {
        'Metric': [
            'Total Days Analyzed',
            'Successful Forecasts', 
            'Success Rate (%)',
            'Mean Accuracy (%)',
            'Median Accuracy (%)',
            'Std Deviation (%)',
            'Min Accuracy (%)',
            'Max Accuracy (%)',
            'Perfect Matches (100%)',
            'Good Performance (≥75%)',
            'Moderate Performance (≥50%)',
            'Poor Performance (≥25%)',
            'Very Poor Performance (<25%)',
            'Zero Matches (0%)'
        ],
        'Value': [
            total_days,
            len(results_df),
            round((len(results_df)/total_days)*100, 2),
            round(results_df['accuracy_percentage'].mean(), 2),
            round(results_df['accuracy_percentage'].median(), 2),
            round(results_df['accuracy_percentage'].std(), 2),
            round(results_df['accuracy_percentage'].min(), 2),
            round(results_df['accuracy_percentage'].max(), 2),
            (results_df['accuracy_percentage'] == 100).sum(),
            (results_df['accuracy_percentage'] >= 75).sum(),
            (results_df['accuracy_percentage'] >= 50).sum(),
            (results_df['accuracy_percentage'] >= 25).sum(),
            (results_df['accuracy_percentage'] < 25).sum(),
            (results_df['accuracy_percentage'] == 0).sum()
        ]
    }
    
    summary_df = pd.DataFrame(summary_stats)
    summary_filename = "charging_window_forecast_summary.csv"
    summary_df.to_csv(summary_filename, index=False)
    print(f"✅ Summary statistics saved to: {summary_filename}")
    
    # Show sample of detailed results
    print(f"\n📋 SAMPLE OF DETAILED RESULTS (First 10 rows)")
    print("-" * 80)
    sample_cols = ['run_date', 'predicted_window_hours', 'actual_window_hours', 
                   'matching_hours', 'accuracy_percentage', 'performance_category']
    print(final_export_df[sample_cols].head(10).to_string(index=False))
    
    if len(final_export_df) > 10:
        print(f"\n... and {len(final_export_df) - 10} more rows in the CSV file")
    
    # Monthly summary for export - using the export_df which has the month column
    monthly_summary = export_df.groupby('month').agg({
        'accuracy_percentage': ['count', 'mean', 'std', 'min', 'max'],
        'matching_hours': 'mean'
    }).round(2)
    
    # Flatten column names
    monthly_summary.columns = ['_'.join(col).strip() for col in monthly_summary.columns]
    monthly_summary = monthly_summary.reset_index()
    
    # Add month names
    month_names = {2: 'February', 3: 'March', 4: 'April', 5: 'May', 6: 'June'}
    monthly_summary['month_name'] = monthly_summary['month'].map(month_names)
    
    monthly_filename = "charging_window_monthly_summary.csv"
    monthly_summary.to_csv(monthly_filename, index=False)
    print(f"✅ Monthly summary saved to: {monthly_filename}")
    
else:
    print("❌ No results to export")


💾 EXPORTING DETAILED RESULTS
✅ Detailed results saved to: charging_window_forecast_detailed_results.csv
   📁 Contains 111 rows with full forecast details
✅ Summary statistics saved to: charging_window_forecast_summary.csv

📋 SAMPLE OF DETAILED RESULTS (First 10 rows)
--------------------------------------------------------------------------------
  run_date predicted_window_hours actual_window_hours  matching_hours  accuracy_percentage performance_category
2025-02-01            72,73,74,75     140,141,142,143               0                  0.0            Very Poor
2025-02-02            47,48,49,50     117,118,119,120               0                  0.0            Very Poor
2025-02-03        131,132,133,134         93,94,95,96               0                  0.0            Very Poor
2025-02-04                0,1,2,3         69,70,71,72               0                  0.0            Very Poor
2025-02-05        107,108,109,110     119,120,121,122               0                  0.0

In [5]:
# Cell 5: 4-Day Ahead Charging Window Simulation
print("\n" + "="*80)
print("🔋 4-DAY AHEAD CHARGING WINDOW SIMULATION")
print("="*80)
print("📅 Daily forecast to find optimal 4-hour charging window in next 4 days")
print("🎯 Simulates realistic charging planning behavior")

# Simulation parameters
simulation_results = []
simulation_successful_runs = 0

print(f"\n🔍 Running simulation from Feb 1, 2025 to June 16, 2025...")
print("⚡ Each day: forecast next 4 days, find cheapest 4-hour consecutive window")
print("-" * 80)

for day_idx, forecast_date in enumerate(date_range):
    train_start = forecast_date - pd.Timedelta(days=training_window_days)
    train_end = forecast_date - pd.Timedelta(days=1, hours=1)  # End at 23:00 previous day
    
    try:
        # Build training set
        df = build_training_set(
            train_start=train_start.strftime("%Y-%m-%d %H:%M:%S"),
            train_end=train_end.strftime("%Y-%m-%d %H:%M:%S"),
            run_date=forecast_date.strftime("%Y-%m-%d %H:%M:%S")
        )
        
        if df is None or df.empty:
            continue
            
        # Process data
        df['target_datetime'] = pd.to_datetime(df['target_datetime'], utc=True)
        df = df.sort_values('target_datetime').set_index('target_datetime')
        forecast_date_utc = forecast_date.tz_localize("UTC")
        
        # Split data
        train_data = df[df.index <= forecast_date_utc]
        test_data = df[df.index > forecast_date_utc]
        
        # Clean data
        train_data = train_data.dropna(subset=FEATURES + [TARGET])
        test_data = test_data.dropna(subset=FEATURES + [TARGET])
        
        if test_data.empty or train_data.empty:
            continue
            
        # For 4-day simulation: look at next 96 hours (4 days)
        # Take first 96 hours after forecast date
        four_days_ahead = test_data.head(96)  # 96 hours = 4 days
        
        if len(four_days_ahead) < 4:  # Need at least 4 hours for window
            continue
            
        # Prepare features
        X_train = train_data[FEATURES]
        y_train = train_data[TARGET]
        X_test_4days = four_days_ahead[FEATURES]
        y_test_4days = four_days_ahead[TARGET]
        
        # Train model and predict for 4-day window
        model = RandomForestRegressor(n_estimators=100, random_state=42)
        model.fit(X_train, y_train)
        y_pred_4days = model.predict(X_test_4days)
        
        # Find optimal charging windows in 4-day period
        pred_start, pred_window, pred_cost = find_cheapest_consecutive_4h_window(
            y_pred_4days, X_test_4days.index
        )
        actual_start, actual_window, actual_cost = find_cheapest_consecutive_4h_window(
            y_test_4days.values, X_test_4days.index
        )
        
        # Calculate accuracy
        matching_hours, accuracy_pct = calculate_matching_hours(pred_window, actual_window)
        
        # Convert window indices to actual timestamps for better understanding
        pred_timestamps = []
        actual_timestamps = []
        
        if pred_window and len(X_test_4days) > max(pred_window):
            pred_timestamps = [X_test_4days.index[i].strftime('%m-%d %H:%M') for i in pred_window]
        
        if actual_window and len(X_test_4days) > max(actual_window):
            actual_timestamps = [X_test_4days.index[i].strftime('%m-%d %H:%M') for i in actual_window]
        
        # Store results
        simulation_results.append({
            'forecast_date': forecast_date.strftime('%Y-%m-%d'),
            'forecast_horizon_hours': len(four_days_ahead),
            'pred_window_start_idx': pred_start,
            'pred_window_hours': pred_window,
            'pred_window_timestamps': pred_timestamps,
            'actual_window_start_idx': actual_start, 
            'actual_window_hours': actual_window,
            'actual_window_timestamps': actual_timestamps,
            'matching_hours': matching_hours,
            'accuracy_percentage': accuracy_pct,
            'pred_total_cost': pred_cost,
            'actual_total_cost': actual_cost,
            'cost_difference': pred_cost - actual_cost if pred_cost != float('inf') and actual_cost != float('inf') else None
        })
        
        simulation_successful_runs += 1
        
        # Progress update every 30 days
        if (day_idx + 1) % 30 == 0:
            print(f"Progress: {day_idx + 1}/{total_days} days simulated ({simulation_successful_runs} successful)")
            
    except Exception as e:
        # Silent failure - just continue
        continue

print(f"\n✅ 4-Day Simulation complete!")
print(f"📊 Successfully processed: {simulation_successful_runs}/{total_days} days")
print(f"📈 Success rate: {(simulation_successful_runs/total_days)*100:.1f}%")

if simulation_results:
    sim_df = pd.DataFrame(simulation_results)
    
    print(f"\n🎯 4-DAY SIMULATION RESULTS PREVIEW:")
    print("-" * 50)
    print(f"Average accuracy: {sim_df['accuracy_percentage'].mean():.1f}%")
    print(f"Perfect matches (100%): {(sim_df['accuracy_percentage'] == 100).sum()}")
    print(f"Good performance (≥75%): {(sim_df['accuracy_percentage'] >= 75).sum()}")
    print(f"Zero matches (0%): {(sim_df['accuracy_percentage'] == 0).sum()}")
    
    # Cost analysis
    cost_diff = sim_df['cost_difference'].dropna()
    if len(cost_diff) > 0:
        print(f"\n💰 COST IMPACT ANALYSIS:")
        print("-" * 50)
        print(f"Average cost difference: {cost_diff.mean():.2f} (predicted - actual)")
        print(f"Times predicted was cheaper: {(cost_diff < 0).sum()}")
        print(f"Times predicted was more expensive: {(cost_diff > 0).sum()}")
    
    # Save 4-day simulation results
    sim_filename = "charging_window_4day_simulation.csv"
    
    # Prepare export dataframe
    sim_export = sim_df.copy()
    sim_export['pred_window_timestamps_str'] = sim_export['pred_window_timestamps'].apply(
        lambda x: '; '.join(x) if x else ''
    )
    sim_export['actual_window_timestamps_str'] = sim_export['actual_window_timestamps'].apply(
        lambda x: '; '.join(x) if x else ''
    )
    
    # Select columns for export
    export_cols = [
        'forecast_date', 'forecast_horizon_hours', 'pred_window_start_idx', 
        'pred_window_timestamps_str', 'actual_window_start_idx', 'actual_window_timestamps_str',
        'matching_hours', 'accuracy_percentage', 'pred_total_cost', 'actual_total_cost', 'cost_difference'
    ]
    
    sim_export[export_cols].to_csv(sim_filename, index=False)
    print(f"\n✅ 4-day simulation results saved to: {sim_filename}")
    
    # Show sample results
    print(f"\n📋 SAMPLE 4-DAY SIMULATION RESULTS (First 5 days):")
    print("-" * 80)
    sample_cols = ['forecast_date', 'pred_window_timestamps_str', 'actual_window_timestamps_str', 
                   'matching_hours', 'accuracy_percentage']
    print(sim_export[sample_cols].head().to_string(index=False))
    
    # Compare with original day-ahead results
    if 'results_df' in locals() and len(results_df) > 0:
        print(f"\n📊 COMPARISON: Day-Ahead vs 4-Day Ahead Forecasting")
        print("-" * 60)
        print(f"{'Metric':<25} {'Day-Ahead':<15} {'4-Day Ahead':<15}")
        print("-" * 55)
        print(f"{'Average Accuracy':<25} {results_df['accuracy_percentage'].mean():.1f}%{'':<8} {sim_df['accuracy_percentage'].mean():.1f}%")
        print(f"{'Perfect Matches':<25} {(results_df['accuracy_percentage'] == 100).sum():<15} {(sim_df['accuracy_percentage'] == 100).sum()}")
        print(f"{'Good Performance (≥75%)':<25} {(results_df['accuracy_percentage'] >= 75).sum():<15} {(sim_df['accuracy_percentage'] >= 75).sum()}")
        print(f"{'Zero Matches':<25} {(results_df['accuracy_percentage'] == 0).sum():<15} {(sim_df['accuracy_percentage'] == 0).sum()}")
        
        # Statistical comparison
        day_ahead_mean = results_df['accuracy_percentage'].mean()
        four_day_mean = sim_df['accuracy_percentage'].mean()
        difference = four_day_mean - day_ahead_mean
        
        print(f"\n📈 INSIGHT:")
        if difference > 5:
            print(f"🚀 4-day ahead forecasting performs {difference:.1f}% better on average!")
        elif difference < -5:
            print(f"⚠️  Day-ahead forecasting performs {abs(difference):.1f}% better on average")
        else:
            print(f"📊 Both approaches perform similarly (difference: {difference:.1f}%)")
            
else:
    print("❌ No 4-day simulation results to analyze")


🔋 4-DAY AHEAD CHARGING WINDOW SIMULATION
📅 Daily forecast to find optimal 4-hour charging window in next 4 days
🎯 Simulates realistic charging planning behavior

🔍 Running simulation from Feb 1, 2025 to June 16, 2025...
⚡ Each day: forecast next 4 days, find cheapest 4-hour consecutive window
--------------------------------------------------------------------------------
Progress: 30/136 days simulated (30 successful)
Progress: 60/136 days simulated (60 successful)
Progress: 90/136 days simulated (90 successful)

✅ 4-Day Simulation complete!
📊 Successfully processed: 112/136 days
📈 Success rate: 82.4%

🎯 4-DAY SIMULATION RESULTS PREVIEW:
--------------------------------------------------
Average accuracy: 40.4%
Perfect matches (100%): 34
Good performance (≥75%): 48
Zero matches (0%): 62

💰 COST IMPACT ANALYSIS:
--------------------------------------------------
Average cost difference: 0.08 (predicted - actual)
Times predicted was cheaper: 41
Times predicted was more expensive: 71

✅