In [43]:
import pandas as pd
import numpy as np
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import plotly.express as px
from typing import Dict, List, Optional
import warnings
warnings.filterwarnings('ignore')

def predict_last_n_days_with_ensemble(
    df: pd.DataFrame,
    n_days: int = 4,
    context_length: int = 10,
    target_column: str = 'OPEN PRICE',
    date_column: str = 'DATE',
    ensemble_models: list = None
) -> Dict:
    """
    Predict last n days using sliding window approach with Chronos ensemble
    """
    
    if ensemble_models is None:
        ensemble_models = [
            {'model_size': 'tiny', 'weight': 0.15},
            {'model_size': 'mini', 'weight': 0.2},
            {'model_size': 'small', 'weight': 0.25},
            {'model_size': 'base', 'weight': 0.4}
        ]
    
    # Prepare data
    df = df.sort_values(date_column).reset_index(drop=True)
    target_series = df[target_column].values
    dates = pd.to_datetime(df[date_column])
    
    predictions_results = []
    actual_values = []
    prediction_dates = []
    
    try:
        from chronos import ChronosPipeline
        import torch
        
        # For each of the last n_days, predict using previous context_length days
        for day_offset in range(n_days):
            # Calculate indices for sliding window
            prediction_idx = len(df) - n_days + day_offset
            context_start = prediction_idx - context_length
            context_end = prediction_idx
            
            # Extract context window
            context_data = target_series[context_start:context_end]
            actual_value = target_series[prediction_idx]
            prediction_date = dates.iloc[prediction_idx]
            
            # Store actual values for comparison
            actual_values.append(actual_value)
            prediction_dates.append(prediction_date)
            
            # Run ensemble predictions
            model_predictions = {}
            ensemble_preds = []
            weights = []
            
            print(f"Predicting day {day_offset + 1}/{n_days}: {prediction_date.strftime('%Y-%m-%d')}")
            
            for model_config in ensemble_models:
                try:
                    model_name = f"amazon/chronos-t5-{model_config['model_size']}"
                    pipeline = ChronosPipeline.from_pretrained(
                        model_name,
                        device_map="auto",
                        torch_dtype=torch.bfloat16,
                    )
                    
                    # Prepare context tensor
                    context_tensor = torch.tensor(context_data, dtype=torch.float32).unsqueeze(0)
                    
                    # Generate forecast
                    forecast = pipeline.predict(
                        context_tensor,
                        prediction_length=1,
                        num_samples=20
                    )
                    
                    # Extract prediction statistics
                    forecast_samples = forecast[0].cpu().numpy()
                    mean_pred = np.mean(forecast_samples)
                    p10 = np.percentile(forecast_samples, 10)
                    p50 = np.percentile(forecast_samples, 50)
                    p90 = np.percentile(forecast_samples, 90)
                    
                    model_predictions[f"chronos_{model_config['model_size']}"] = {
                        'prediction': mean_pred,
                        'confidence_intervals': {'p10': p10, 'p50': p50, 'p90': p90}
                    }
                    
                    ensemble_preds.append(mean_pred)
                    weights.append(model_config['weight'])
                    
                    print(f"  ✓ {model_config['model_size']}: {mean_pred:.4f}")
                    
                except Exception as e:
                    print(f"  ✗ {model_config['model_size']} failed: {str(e)}")
                    continue
            
            # Calculate ensemble prediction
            if ensemble_preds:
                ensemble_pred = np.average(ensemble_preds, weights=weights)
                model_predictions['ensemble'] = {
                    'prediction': ensemble_pred,
                    'individual_predictions': ensemble_preds,
                    'weights_used': weights
                }
                print(f"  🎯 Ensemble: {ensemble_pred:.4f} (Actual: {actual_value:.4f})")
            
            predictions_results.append({
                'date': prediction_date,
                'actual': actual_value,
                'predictions': model_predictions
            })
    
    except ImportError:
        print("Chronos not available, using statistical fallback...")
        predictions_results = create_statistical_predictions_n_days(
            df, n_days, context_length, target_column, date_column
        )
    
    return {
        'predictions': predictions_results,
        'dates': prediction_dates,
        'actuals': actual_values,
        'context_length': context_length,
        'n_days': n_days
    }


def create_statistical_predictions_n_days(
    df: pd.DataFrame,
    n_days: int,
    context_length: int,
    target_column: str,
    date_column: str
) -> List[Dict]:
    """Fallback statistical predictions when Chronos is not available"""
    
    target_series = df[target_column].values
    dates = pd.to_datetime(df[date_column])
    predictions_results = []
    
    for day_offset in range(n_days):
        prediction_idx = len(df) - n_days + day_offset
        context_start = prediction_idx - context_length
        context_end = prediction_idx
        
        context_data = target_series[context_start:context_end]
        actual_value = target_series[prediction_idx]
        prediction_date = dates.iloc[prediction_idx]
        
        # Statistical methods
        ma_5 = np.mean(context_data[-5:])
        ma_10 = np.mean(context_data)
        
        # Exponential smoothing
        alpha = 0.3
        exp_smooth = context_data[-1]
        for i in range(len(context_data)-2, -1, -1):
            exp_smooth = alpha * context_data[i] + (1 - alpha) * exp_smooth
        
        # Linear trend
        x = np.arange(len(context_data))
        coeffs = np.polyfit(x, context_data, 1)
        trend_pred = coeffs[0] * len(x) + coeffs[1]
        
        ensemble_pred = np.mean([ma_5, ma_10, exp_smooth, trend_pred])
        
        model_predictions = {
            'moving_average_5': {'prediction': ma_5},
            'moving_average_10': {'prediction': ma_10},
            'exponential_smoothing': {'prediction': exp_smooth},
            'linear_trend': {'prediction': trend_pred},
            'statistical_ensemble': {'prediction': ensemble_pred}
        }
        
        predictions_results.append({
            'date': prediction_date,
            'actual': actual_value,
            'predictions': model_predictions
        })
    
    return predictions_results


def create_prediction_plots(results: Dict, save_plots: bool = True) -> go.Figure:
    """Create interactive Plotly visualizations for predictions"""
    
    predictions_data = results['predictions']
    
    # Extract data for plotting
    dates = [p['date'] for p in predictions_data]
    actuals = [p['actual'] for p in predictions_data]
    
    # Get all model names
    all_models = set()
    for p in predictions_data:
        all_models.update(p['predictions'].keys())
    
    # Create subplot with secondary y-axis for errors
    fig = make_subplots(
        rows=2, cols=2,
        subplot_titles=('Predictions vs Actuals', 'Prediction Errors', 'Model Comparison', 'Confidence Intervals'),
        specs=[[{'secondary_y': False}, {'secondary_y': False}],
               [{'secondary_y': False}, {'secondary_y': False}]]
    )
    
    # Plot 1: Predictions vs Actuals
    fig.add_trace(
        go.Scatter(x=dates, y=actuals, mode='lines+markers', name='Actual', 
                  line=dict(color='black', width=3), marker=dict(size=8)),
        row=1, col=1
    )
    
    # Color palette for models
    colors = px.colors.qualitative.Set3
    model_errors = {}
    
    for i, model in enumerate(sorted(all_models)):
        predictions = []
        errors = []
        
        for p in predictions_data:
            if model in p['predictions']:
                pred_value = p['predictions'][model]['prediction']
                predictions.append(pred_value)
                errors.append(abs(pred_value - p['actual']))
            else:
                predictions.append(None)
                errors.append(None)
        
        model_errors[model] = np.nanmean(errors) if errors else 0
        
        # Plot predictions
        fig.add_trace(
            go.Scatter(x=dates, y=predictions, mode='lines+markers', 
                      name=f'{model}', line=dict(color=colors[i % len(colors)]),
                      marker=dict(size=6)),
            row=1, col=1
        )
        
        # Plot errors
        fig.add_trace(
            go.Scatter(x=dates, y=errors, mode='lines+markers', 
                      name=f'{model} Error', line=dict(color=colors[i % len(colors)]),
                      showlegend=False),
            row=1, col=2
        )
    
    # Plot 3: Model Comparison (MAE)
    model_names = list(model_errors.keys())
    mae_values = list(model_errors.values())
    
    fig.add_trace(
        go.Bar(x=model_names, y=mae_values, name='Mean Absolute Error',
               marker_color=colors[:len(model_names)]),
        row=2, col=1
    )
    
    # Plot 4: Confidence Intervals (if available)
    for i, p in enumerate(predictions_data):
        for model, pred_data in p['predictions'].items():
            if 'confidence_intervals' in pred_data:
                ci = pred_data['confidence_intervals']
                fig.add_trace(
                    go.Scatter(x=[p['date']], y=[ci.get('p10', 0)], 
                              mode='markers', name=f'{model} P10', 
                              marker=dict(symbol='triangle-down', size=6),
                              showlegend=i==0),
                    row=2, col=2
                )
                fig.add_trace(
                    go.Scatter(x=[p['date']], y=[ci.get('p90', 0)], 
                              mode='markers', name=f'{model} P90',
                              marker=dict(symbol='triangle-up', size=6),
                              showlegend=i==0),
                    row=2, col=2
                )
    
    # Update layout
    fig.update_layout(
        title_text=f"Currency Prediction Analysis - Last {results['n_days']} Days",
        title_x=0.5,
        height=800,
        showlegend=True,
        template='plotly_white'
    )
    
    # Update axes labels
    fig.update_xaxes(title_text="Date", row=1, col=1)
    fig.update_yaxes(title_text="Price", row=1, col=1)
    fig.update_xaxes(title_text="Date", row=1, col=2)
    fig.update_yaxes(title_text="Absolute Error", row=1, col=2)
    fig.update_xaxes(title_text="Model", row=2, col=1)
    fig.update_yaxes(title_text="Mean Absolute Error", row=2, col=1)
    fig.update_xaxes(title_text="Date", row=2, col=2)
    fig.update_yaxes(title_text="Confidence Bounds", row=2, col=2)
    
    if save_plots:
        fig.write_html("prediction_analysis.html")
        print("Interactive plot saved as 'prediction_analysis.html'")
    
    return fig


def calculate_prediction_metrics(results: Dict) -> pd.DataFrame:
    """Calculate detailed prediction metrics"""
    
    predictions_data = results['predictions']
    metrics_list = []
    
    # Get all model names
    all_models = set()
    for p in predictions_data:
        all_models.update(p['predictions'].keys())
    
    for model in all_models:
        actuals = []
        predictions = []
        
        for p in predictions_data:
            if model in p['predictions']:
                actuals.append(p['actual'])
                predictions.append(p['predictions'][model]['prediction'])
        
        if len(actuals) > 0:
            actuals = np.array(actuals)
            predictions = np.array(predictions)
            
            mae = np.mean(np.abs(predictions - actuals))
            mse = np.mean((predictions - actuals) ** 2)
            rmse = np.sqrt(mse)
            mape = np.mean(np.abs((predictions - actuals) / actuals)) * 100
            
            metrics_list.append({
                'Model': model,
                'MAE': mae,
                'MSE': mse,
                'RMSE': rmse,
                'MAPE (%)': mape,
                'Predictions': len(predictions)
            })
    
    return pd.DataFrame(metrics_list).round(4)


# Main execution function
def run_multi_day_prediction_with_plots(
    csv_file_path: str,
    n_days: int = 4,
    context_length: int = 10,
    target_column: str = 'OPEN PRICE',
    date_column: str = 'DATE',
    save_results: bool = True
) -> Dict:
    """
    Complete pipeline: predict last n days and create visualizations
    """
    
    print(f"Loading data from {csv_file_path}...")
    
    # Load and preprocess data
    df = pd.read_csv(csv_file_path)
    df.columns = df.columns.str.strip()
    df[date_column] = pd.to_datetime(df[date_column], format='%d-%b-%Y', errors='coerce')
    
    # Clean numeric columns
    numeric_cols = ['OPEN PRICE', 'HIGH PRICE', 'LOW PRICE', 'CLOSE PRICE', 'Volume']
    for col in numeric_cols:
        if col in df.columns:
            df[col] = pd.to_numeric(df[col].astype(str).str.replace(',', ''), errors='coerce')
    
    print(f"Predicting last {n_days} days using context of {context_length} days...")
    
    # Run predictions
    results = predict_last_n_days_with_ensemble(
        df=df,
        n_days=n_days,
        context_length=context_length,
        target_column=target_column,
        date_column=date_column
    )
    
    print("\nCreating visualizations...")
    
    # Create plots
    fig = create_prediction_plots(results, save_plots=save_results)
    
    # Calculate metrics
    metrics_df = calculate_prediction_metrics(results)
    print("\nPrediction Metrics:")
    print(metrics_df.to_string(index=False))
    
    # Save detailed results
    if save_results:
        results_df = pd.DataFrame([
            {
                'Date': p['date'].strftime('%Y-%m-%d'),
                'Actual': p['actual'],
                **{f"{model}_pred": pred_data['prediction'] 
                   for model, pred_data in p['predictions'].items()}
            }
            for p in results['predictions']
        ])
        
        results_df.to_csv(f'prediction_results_{n_days}days.csv', index=False)
        metrics_df.to_csv(f'prediction_metrics_{n_days}days.csv', index=False)
        print(f"\nResults saved to CSV files")
    
    return {
        'results': results,
        'figure': fig,
        'metrics': metrics_df,
        'predictions_df': results_df if save_results else None
    }


# Usage example with your data
if __name__ == "__main__":
    
    # Configuration
    config = {
        'csv_file_path': 'Data/Quote-CD-USDINR-15-09-2024-to-15-09-2025.csv',
        'n_days': 4,  # Predict last 4 days
        'context_length': 30,  # Use last 10 days as context
        'target_column': 'OPEN PRICE',
        'date_column': 'DATE',
        'save_results': True
    }
    
    # Run complete analysis
    analysis_results = run_multi_day_prediction_with_plots(**config)
    
    # Display the interactive plot
    analysis_results['figure'].show()
    
    print("\n" + "="*60)
    print("ANALYSIS COMPLETE")
    print("="*60)
    print(f"✓ Predicted last {config['n_days']} days")
    print("✓ Interactive plot created")
    print("✓ Metrics calculated")
    print("✓ Results saved to CSV")


Loading data from Data/Quote-CD-USDINR-15-09-2024-to-15-09-2025.csv...
Predicting last 4 days using context of 30 days...
Predicting day 1/4: 2025-09-10
  ✓ tiny: 88.2013
  ✓ mini: 88.0084
  ✓ small: 87.9763
  ✓ base: 88.0406
  🎯 Ensemble: 88.0422 (Actual: 88.2200)
Predicting day 2/4: 2025-09-11
  ✓ tiny: 87.9946
  ✓ mini: 88.0589
  ✓ small: 87.9625
  ✓ base: 87.9625
  🎯 Ensemble: 87.9866 (Actual: 88.2500)
Predicting day 3/4: 2025-09-12
  ✓ tiny: 88.1075
  ✓ mini: 88.1075
  ✓ small: 88.0109
  ✓ base: 88.1075
  🎯 Ensemble: 88.0833 (Actual: 88.3000)
Predicting day 4/4: 2025-09-15
  ✓ tiny: 88.1822
  ✓ mini: 88.1499
  ✓ small: 87.9890
  ✓ base: 88.3109
  🎯 Ensemble: 88.1789 (Actual: 88.3000)

Creating visualizations...
Interactive plot saved as 'prediction_analysis.html'

Prediction Metrics:
        Model    MAE    MSE   RMSE  MAPE (%)  Predictions
chronos_small 0.2828 0.0806 0.2839    0.3204            4
 chronos_mini 0.1863 0.0352 0.1876    0.2111            4
 chronos_tiny 0.1461 0.029


ANALYSIS COMPLETE
✓ Predicted last 4 days
✓ Interactive plot created
✓ Metrics calculated
✓ Results saved to CSV
