In [1]:
import pandas as pd
import numpy as np
import torch
from chronos import BaseChronosPipeline
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import warnings
warnings.filterwarnings('ignore')


In [None]:
class SlidingWindowFinancialPredictor:
    def __init__(self, csv_file_path, model_name="amazon/chronos-bolt-base", device="cuda"):
        """Initialize sliding window predictor for financial data"""
        self.csv_file_path = csv_file_path
        self.model_name = model_name
        self.device = device if torch.cuda.is_available() else "cpu"
        self.pipeline = None
        self.df = None
        self.results_df = None
        
    def load_data(self, date_col="DATE", close_col="CLOSE PRICE"):
        """Load and preprocess financial data"""
        print("Loading financial data for sliding window prediction...")
        
        # Load CSV
        self.df = pd.read_csv(self.csv_file_path)
        self.df.columns = self.df.columns.str.strip()
        
        # Convert date and sort
        self.df[date_col.strip()] = pd.to_datetime(self.df[date_col.strip()], format='%d-%b-%Y')
        self.df = self.df.sort_values(date_col.strip()).reset_index(drop=True)
        
        # Convert price columns to numeric
        price_cols = ['OPEN PRICE', 'HIGH PRICE', 'LOW PRICE', 'CLOSE PRICE', 'SETTLE PRICE']
        for col in price_cols:
            if col in self.df.columns:
                self.df[col] = pd.to_numeric(self.df[col], errors='coerce')
        
        # Handle volume (remove commas)
        if 'Volume' in self.df.columns:
            self.df['Volume'] = self.df['Volume'].astype(str).str.replace(',', '')
            self.df['Volume'] = pd.to_numeric(self.df['Volume'], errors='coerce')
        
        # Remove rows with missing OHLC data
        self.df = self.df.dropna(subset=[close_col])
        
        print(f"Data loaded: {len(self.df)} records from {self.df[date_col.strip()].min()} to {self.df[date_col.strip()].max()}")
        return self.df
    
    def load_model(self):
        """Load the Chronos-Bolt model"""
        print(f"Loading {self.model_name} model...")
        
        self.pipeline = BaseChronosPipeline.from_pretrained(
            self.model_name,
            device_map=self.device,
            torch_dtype=torch.bfloat16,
        )
        print("Model loaded successfully!")
    
    def sliding_window_forecast(self, window_size=7, start_index=None, end_index=None):
        """
        Perform sliding window forecasting across the entire dataset
        
        Parameters:
        - window_size: Number of days to use as context for prediction
        - start_index: Starting index for predictions (default: window_size)
        - end_index: Ending index for predictions (default: len(data))
        """
        if self.pipeline is None:
            self.load_model()
        
        close_prices = self.df['CLOSE PRICE'].values
        dates = self.df['DATE'].values
        
        # Set default indices
        if start_index is None:
            start_index = window_size
        if end_index is None:
            end_index = len(close_prices)
        
        print(f"Running sliding window forecast...")
        print(f"Window size: {window_size} days")
        print(f"Predicting from index {start_index} to {end_index}")
        print(f"Total predictions: {end_index - start_index}")
        
        predictions = []
        actual_prices = []
        prediction_dates = []
        confidence_intervals = []
        
        # Progress tracking
        total_predictions = end_index - start_index
        
        for i in range(start_index, end_index):
            try:
                # Get context window
                context = torch.tensor(close_prices[i-window_size:i], dtype=torch.float32)
                actual_next = close_prices[i]
                
                # Generate forecast
                forecast = self.pipeline.predict(
                    context=context,
                    prediction_length=1
                )
                
                # Extract prediction
                forecast_np = forecast[0].cpu().numpy()
                
                if len(forecast_np.shape) == 2:  # [prediction_length, num_quantiles]
                    num_quantiles = forecast_np.shape[1]
                    if num_quantiles >= 5:
                        # Extract different quantiles
                        q10 = forecast_np[0, 0]
                        q30 = forecast_np[0, min(2, num_quantiles-1)]
                        q50 = forecast_np[0, num_quantiles//2]  # Median
                        q70 = forecast_np[0, min(num_quantiles-2, 3*num_quantiles//4)]
                        q90 = forecast_np[0, -1]
                        
                        prediction = q50  # Use median as prediction
                        confidence_intervals.append({'q10': q10, 'q30': q30, 'q50': q50, 'q70': q70, 'q90': q90})
                    else:
                        prediction = forecast_np[0, num_quantiles//2]
                        confidence_intervals.append({'q10': prediction, 'q30': prediction, 'q50': prediction, 
                                                   'q70': prediction, 'q90': prediction})
                else:
                    prediction = float(forecast_np[0])
                    confidence_intervals.append({'q10': prediction, 'q30': prediction, 'q50': prediction, 
                                               'q70': prediction, 'q90': prediction})
                
                predictions.append(prediction)
                actual_prices.append(actual_next)
                prediction_dates.append(dates[i])
                
                # Progress update
                if (i - start_index + 1) % 10 == 0 or (i - start_index + 1) == total_predictions:
                    progress = (i - start_index + 1) / total_predictions * 100
                    print(f"Progress: {progress:.1f}% ({i - start_index + 1}/{total_predictions})")
                
            except Exception as e:
                print(f"Prediction failed for index {i}: {e}")
                continue
        
        # Create results dataframe
        self.results_df = pd.DataFrame({
            'Date': prediction_dates,
            'Actual_Price': actual_prices,
            'Predicted_Price': predictions,
            'Error': np.array(actual_prices) - np.array(predictions),
            'Error_Pct': ((np.array(actual_prices) - np.array(predictions)) / np.array(actual_prices)) * 100,
            'Abs_Error_Pct': np.abs(((np.array(actual_prices) - np.array(predictions)) / np.array(actual_prices)) * 100)
        })
        
        # Add confidence intervals
        for i, ci in enumerate(confidence_intervals):
            for key, value in ci.items():
                if f'CI_{key}' not in self.results_df.columns:
                    self.results_df[f'CI_{key}'] = np.nan
                self.results_df.loc[i, f'CI_{key}'] = value
        
        # Calculate performance metrics
        self.calculate_performance_metrics()
        
        print(f"Sliding window forecast completed!")
        print(f"Total successful predictions: {len(self.results_df)}")
        
        return self.results_df
    
    def calculate_performance_metrics(self):
        """Calculate comprehensive performance metrics"""
        if self.results_df is None:
            return None
        
        actual = self.results_df['Actual_Price'].values
        predicted = self.results_df['Predicted_Price'].values
        
        # Basic metrics
        mape = np.mean(self.results_df['Abs_Error_Pct'])
        rmse = np.sqrt(np.mean(self.results_df['Error'] ** 2))
        mae = np.mean(np.abs(self.results_df['Error']))
        
        # Direction accuracy
        actual_directions = np.diff(actual) > 0
        pred_directions = predicted[1:] > actual[:-1]
        direction_accuracy = np.mean(actual_directions == pred_directions) * 100
        
        # Additional metrics
        r_squared = np.corrcoef(actual, predicted)[0, 1] ** 2
        mean_error = np.mean(self.results_df['Error'])
        std_error = np.std(self.results_df['Error'])
        
        # Accuracy thresholds
        within_05_pct = np.sum(self.results_df['Abs_Error_Pct'] < 0.5) / len(self.results_df) * 100
        within_10_pct = np.sum(self.results_df['Abs_Error_Pct'] < 1.0) / len(self.results_df) * 100
        within_20_pct = np.sum(self.results_df['Abs_Error_Pct'] < 2.0) / len(self.results_df) * 100
        
        self.performance_metrics = {
            'MAPE': mape,
            'RMSE': rmse,
            'MAE': mae,
            'Direction_Accuracy': direction_accuracy,
            'R_Squared': r_squared,
            'Mean_Error': mean_error,
            'Std_Error': std_error,
            'Within_0.5%': within_05_pct,
            'Within_1.0%': within_10_pct,
            'Within_2.0%': within_20_pct
        }
        
        return self.performance_metrics
    
    def plot_sliding_window_results(self, save_html=True):
        """Plot comprehensive sliding window forecast results"""
        if self.results_df is None:
            print("No results to plot. Run sliding_window_forecast() first.")
            return None
        
        fig = make_subplots(
            rows=4, cols=1,
            shared_xaxes=True,
            vertical_spacing=0.05,
            subplot_titles=[
                'Actual vs Predicted Prices (Sliding Window)',
                'Prediction Error Over Time',
                'Cumulative Error Analysis', 
                'Performance Metrics Summary'
            ],
            row_heights=[0.4, 0.25, 0.2, 0.15],
            specs=[[{"secondary_y": False}],
                   [{"secondary_y": False}], 
                   [{"secondary_y": False}],
                   [{"type": "table"}]]
        )
        
        dates = self.results_df['Date']
        actual = self.results_df['Actual_Price']
        predicted = self.results_df['Predicted_Price']
        
        # --- ROW 1: Actual vs Predicted Prices ---
        fig.add_trace(
            go.Scatter(
                x=dates,
                y=actual,
                mode='lines',
                name='Actual Price',
                line=dict(color='blue', width=2)
            ),
            row=1, col=1
        )
        
        fig.add_trace(
            go.Scatter(
                x=dates,
                y=predicted,
                mode='lines',
                name='Predicted Price',
                line=dict(color='red', width=2, dash='dash')
            ),
            row=1, col=1
        )
        
        # Add confidence intervals if available
        if 'CI_q90' in self.results_df.columns:
            fig.add_trace(
                go.Scatter(
                    x=dates,
                    y=self.results_df['CI_q90'],
                    mode='lines',
                    name='90% Upper',
                    line=dict(color='red', width=1),
                    showlegend=False
                ),
                row=1, col=1
            )
            
            fig.add_trace(
                go.Scatter(
                    x=dates,
                    y=self.results_df['CI_q10'],
                    mode='lines',
                    name='90% Lower',
                    line=dict(color='red', width=1),
                    fill='tonexty',
                    fillcolor='rgba(255,0,0,0.1)',
                    showlegend=False
                ),
                row=1, col=1
            )
        
        # --- ROW 2: Prediction Error Over Time ---
        error_colors = ['green' if abs(e) < 0.5 else 'orange' if abs(e) < 1.0 else 'red' 
                       for e in self.results_df['Error_Pct']]
        
        fig.add_trace(
            go.Scatter(
                x=dates,
                y=self.results_df['Error_Pct'],
                mode='markers+lines',
                name='Error %',
                marker=dict(color=error_colors, size=4),
                line=dict(color='gray', width=1)
            ),
            row=2, col=1
        )
        
        # Add error threshold lines
        fig.add_hline(y=1.0, line_dash="dash", line_color="orange", row=2, col=1)
        fig.add_hline(y=-1.0, line_dash="dash", line_color="orange", row=2, col=1)
        fig.add_hline(y=0, line_dash="solid", line_color="black", row=2, col=1)
        
        # --- ROW 3: Cumulative Error Analysis ---
        cumulative_error = np.cumsum(self.results_df['Error_Pct'])
        rolling_avg_error = pd.Series(self.results_df['Error_Pct']).rolling(window=10).mean()
        
        fig.add_trace(
            go.Scatter(
                x=dates,
                y=cumulative_error,
                mode='lines',
                name='Cumulative Error %',
                line=dict(color='purple', width=2)
            ),
            row=3, col=1
        )
        
        fig.add_trace(
            go.Scatter(
                x=dates,
                y=rolling_avg_error,
                mode='lines',
                name='10-Day Avg Error %',
                line=dict(color='orange', width=2)
            ),
            row=3, col=1
        )
        
        # --- ROW 4: Performance Metrics Table ---
        if hasattr(self, 'performance_metrics'):
            metrics_data = [
                ['MAPE', f"{self.performance_metrics['MAPE']:.2f}%"],
                ['RMSE', f"{self.performance_metrics['RMSE']:.4f}"],
                ['Direction Accuracy', f"{self.performance_metrics['Direction_Accuracy']:.1f}%"],
                ['R²', f"{self.performance_metrics['R_Squared']:.3f}"],
                ['Within ±0.5%', f"{self.performance_metrics['Within_0.5%']:.1f}%"],
                ['Within ±1.0%', f"{self.performance_metrics['Within_1.0%']:.1f}%"],
                ['Within ±2.0%', f"{self.performance_metrics['Within_2.0%']:.1f}%"],
                ['Mean Error', f"{self.performance_metrics['Mean_Error']:.4f}"],
                ['Predictions', f"{len(self.results_df)}"]
            ]
            
            fig.add_trace(
                go.Table(
                    header=dict(values=['Metric', 'Value'], 
                               fill_color='lightblue',
                               align='left'),
                    cells=dict(values=[[row[0] for row in metrics_data], 
                                     [row[1] for row in metrics_data]],
                              fill_color='white',
                              align='left')
                ),
                row=4, col=1
            )
        
        # Update layout
        fig.update_layout(
            title='Comprehensive Sliding Window Forecast Analysis',
            height=1000,
            showlegend=True,
            legend=dict(
                orientation="h",
                yanchor="bottom",
                y=1.02,
                xanchor="right",
                x=1
            )
        )
        
        # Update axes
        fig.update_xaxes(title_text="Date", row=4, col=1)
        fig.update_yaxes(title_text="Price (₹)", row=1, col=1)
        fig.update_yaxes(title_text="Error (%)", row=2, col=1)
        fig.update_yaxes(title_text="Cumulative Error (%)", row=3, col=1)
        
        if save_html:
            filename = "sliding_window_forecast_analysis.html"
            fig.write_html(filename)
            print(f"Comprehensive analysis saved to '{filename}'")
        
        fig.show()
        return fig
    
    def export_results(self, filename="sliding_window_results.csv"):
        """Export results to CSV for further analysis"""
        if self.results_df is not None:
            self.results_df.to_csv(filename, index=False)
            print(f"Results exported to '{filename}'")
        else:
            print("No results to export. Run sliding_window_forecast() first.")



In [3]:


# Main execution function
def run_complete_sliding_window_analysis(csv_file="Quote-CD-USDINR-15-09-2024-to-15-09-2025.csv", 
                                       window_size=7):
    """Run complete sliding window analysis from start to end of dataset"""
    
    print("="*60)
    print("SLIDING WINDOW FINANCIAL FORECAST ANALYSIS")
    print("="*60)
    
    # Initialize predictor
    predictor = SlidingWindowFinancialPredictor(
        csv_file_path=csv_file,
        model_name="amazon/chronos-bolt-base",
        device="cuda"
    )
    
    # Load data
    predictor.load_data()
    
    # Run sliding window forecast for entire dataset
    results_df = predictor.sliding_window_forecast(
        window_size=window_size,
        start_index=window_size,  # Start after we have enough history
        end_index=None  # Go to the end
    )
    
    # Plot comprehensive results
    fig = predictor.plot_sliding_window_results(save_html=True)
    
    # Print summary
    if hasattr(predictor, 'performance_metrics'):
        metrics = predictor.performance_metrics
        print("\n" + "="*50)
        print("SLIDING WINDOW PERFORMANCE SUMMARY")
        print("="*50)
        print(f"Total Predictions: {len(results_df)}")
        print(f"MAPE: {metrics['MAPE']:.2f}%")
        print(f"Direction Accuracy: {metrics['Direction_Accuracy']:.1f}%")
        print(f"R-Squared: {metrics['R_Squared']:.3f}")
        print(f"Predictions within ±0.5%: {metrics['Within_0.5%']:.1f}%")
        print(f"Predictions within ±1.0%: {metrics['Within_1.0%']:.1f}%")
        print(f"Mean Error: {metrics['Mean_Error']:.4f}")
        
        # Trading profitability analysis
        correct_directions = metrics['Direction_Accuracy']
        if correct_directions > 55:
            print(f"\n🟢 Model shows profitable potential (>{correct_directions:.1f}% direction accuracy)")
        elif correct_directions > 50:
            print(f"\n🟡 Model shows marginal potential ({correct_directions:.1f}% direction accuracy)")
        else:
            print(f"\n🔴 Model needs improvement ({correct_directions:.1f}% direction accuracy)")
    
    # Export results
    predictor.export_results("sliding_window_predictions.csv")
    
    return predictor, results_df


In [5]:

# Run the complete analysis
predictor, results = run_complete_sliding_window_analysis(
    csv_file="Data/Quote-CD-USDINR-15-09-2024-to-15-09-2025.csv",
    window_size=20
)


SLIDING WINDOW FINANCIAL FORECAST ANALYSIS
Loading financial data for sliding window prediction...
Data loaded: 235 records from 2024-09-26 00:00:00 to 2025-09-12 00:00:00
Loading amazon/chronos-bolt-base model...
Model loaded successfully!
Running sliding window forecast...
Window size: 20 days
Predicting from index 20 to 235
Total predictions: 215
Progress: 4.7% (10/215)
Progress: 9.3% (20/215)
Progress: 14.0% (30/215)
Progress: 18.6% (40/215)
Progress: 23.3% (50/215)
Progress: 27.9% (60/215)
Progress: 32.6% (70/215)
Progress: 37.2% (80/215)
Progress: 41.9% (90/215)
Progress: 46.5% (100/215)
Progress: 51.2% (110/215)
Progress: 55.8% (120/215)
Progress: 60.5% (130/215)
Progress: 65.1% (140/215)
Progress: 69.8% (150/215)
Progress: 74.4% (160/215)
Progress: 79.1% (170/215)
Progress: 83.7% (180/215)
Progress: 88.4% (190/215)
Progress: 93.0% (200/215)
Progress: 97.7% (210/215)
Progress: 100.0% (215/215)
Sliding window forecast completed!
Total successful predictions: 215
Comprehensive ana


SLIDING WINDOW PERFORMANCE SUMMARY
Total Predictions: 215
MAPE: 0.49%
Direction Accuracy: 72.4%
R-Squared: 0.714
Predictions within ±0.5%: 58.1%
Predictions within ±1.0%: 91.2%
Mean Error: 0.3702

🟢 Model shows profitable potential (>72.4% direction accuracy)
Results exported to 'sliding_window_predictions.csv'
