In [1]:
import pandas as pd
import numpy as np
from autogluon.timeseries import TimeSeriesPredictor, TimeSeriesDataFrame
import torch

In [2]:
torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {torch.cuda.get_device_name(0) if torch.cuda.is_available() else 'CPU'}")

Using device: NVIDIA GeForce GTX 1050 Ti


In [3]:

def prepare_usdinr_data(file_path):
    """Prepare USDINR data for AutoGluon TimeSeriesPredictor"""
    
    # Load processed data
    data = pd.read_csv(file_path, index_col=0, parse_dates=True)
    
    # Reset index to get timestamp as column
    data = data.reset_index()
    data['item_id'] = 'USDINR'
    data = data.rename(columns={data.columns[0]: 'timestamp'})
    
    # Set target as 'Open' (next day opening price)
    target_col = 'Open'
    
    # Select required columns: item_id, timestamp, target
    # AutoGluon requires this specific format
    ts_data = data[['item_id', 'timestamp', target_col]].copy()
    ts_data = ts_data.rename(columns={target_col: 'target'})
    
    # Known covariates (features) - we'll add these separately
    known_covariates = [
        'Close', 'High', 'Low', 'Volume',
        'SMA_20', 'SMA_50', 'EMA_12', 'EMA_26',
        # 'RSI_14', 'MACD', 'MACD_signal', 'MACD_hist',
        # 'BB_upper', 'BB_middle', 'BB_lower', 'ATR_14',
        # 'Price_Change', 'Volatility_20', 'HL_Ratio',
        # 'Stoch_K', 'Stoch_D',
        # 'Hour', 'DayOfWeek', 'Month'
    ]
    
    # Add covariates to the dataframe
    for covariate in known_covariates:
        if covariate in data.columns:
            ts_data[covariate] = data[covariate]
    
    # Create TimeSeriesDataFrame with correct parameters
    ts_df = TimeSeriesDataFrame.from_data_frame(
        ts_data,
        id_column='item_id',
        timestamp_column='timestamp'
    )
    
    return ts_df, known_covariates



In [None]:

def train_usdinr_forecaster(file_path):
    """Train USDINR forecasting model with Chronos-Bolt-Base"""
    
    # Load and prepare data
    ts_df, known_covariates = prepare_usdinr_data(file_path)
    
    print(f"Training data shape: {ts_df.shape}")
    print(f"Available columns: {list(ts_df.columns)}")
    print(f"Known covariates: {len(known_covariates)}")
    
    # Split data for validation (last 300 days for testing)
    all_timestamps = ts_df.index.get_level_values('timestamp').unique()
    cutoff_date = sorted(all_timestamps)[-300]  # Last 300 days for validation

    train_data = ts_df[ts_df.index.get_level_values('timestamp') < cutoff_date]
    test_data = ts_df[ts_df.index.get_level_values('timestamp') >= cutoff_date]
    
    print(f"Train data: {train_data.shape}")
    print(f"Test data: {test_data.shape}")
    
    # Configure AutoGluon TimeSeriesPredictor with path
    predictor = TimeSeriesPredictor(
        path='models/usdinr_chronos_predictor',
        prediction_length=4,          # Predict next 4 days
        target='target',              # Target column name
        eval_metric='MASE',           # Mean Absolute Scaled Error
        known_covariates_names=known_covariates,  # Technical indicators
        freq='D',                     # Daily frequency - CRITICAL FIX
        verbosity=2
    )
    
    # Optimized hyperparameters for Chronos-Bolt-Base
    hyperparameters = {
        
        'Chronos': {
            "fine_tune": True,
            'model_path': 'amazon/chronos-bolt-base',
            'context_length': 64,           # Reduced context
            'num_samples': 25,              # Fewer samples
            'learning_rate': 0.0005,        # Lower learning rate
            'max_steps': 100,              # Early stopping
            'torch_dtype': 'float32',                 # Precision
            'device_map': 'auto',                     # GPU allocation
            'ag_args': {
                'name_suffix': '_USDINR_FineTuned_base_early_stopping'
            }
        }
    }
    
    # Train the model
    print("🚀 Starting Chronos-Bolt-Base training for USDINR...")
    
    try:
        predictor.fit(
            train_data=train_data,
            hyperparameters=hyperparameters,
            time_limit=3600,              # 1 hour maximum
            presets='best_quality',       # Focus on accuracy
            enable_ensemble=False,        # Single model
            verbosity=2
        )
        
        # Evaluate on test data
        print("📊 Evaluating model performance...")
        
        if len(test_data) > 0:
            # Use evaluate for metrics
            scores = predictor.evaluate(test_data)
            print("Evaluation scores:")
            print(scores)
            
            leaderboard = predictor.leaderboard(test_data)
            print("Leaderboard:")
            print(leaderboard)
        
        # Generate future forecasts
        print("🔮 Generating future forecasts...")
        
        # Create future known covariates by forward-filling last known values
        last_timestamp = train_data.index.get_level_values('timestamp')[-1]
        future_timestamps = [last_timestamp + pd.Timedelta(days=i) for i in range(1, 5)]
        
        future_df = pd.DataFrame({
            'timestamp': future_timestamps,
            'item_id': 'USDINR'
        })
        
        # Forward-fill covariates from last row
        last_values = train_data.iloc[-1]
        for col in known_covariates:
            future_df[col] = last_values[col]
        
        # Update time-based covariates
        future_df['Hour'] = 0  # Assume market open hour
        future_df['DayOfWeek'] = future_df['timestamp'].dt.dayofweek
        future_df['Month'] = future_df['timestamp'].dt.month
        
        # Create TimeSeriesDataFrame for known covariates
        future_known = TimeSeriesDataFrame.from_data_frame(
            future_df,
            id_column='item_id',
            timestamp_column='timestamp'
        )
        
        future_predictions = predictor.predict(
            data=train_data,
            known_covariates=future_known
        )
        
        print("Future 4-day USDINR opening price predictions:")
        print(future_predictions.tail())
        
        return predictor, future_predictions
        
    except Exception as e:
        print(f"Training error: {str(e)}")
        raise e



Loading predictor from path /workspaces/Market/models/usdinr_chronos_predictor


🚀 Starting USDINR Inference Pipeline...
🔄 Loading model from: models/usdinr_chronos_predictor
✅ Model loaded successfully!
📊 Loading data from: Data/USDINR_day_2025-01-2025-09-Infer-processed.csv
✅ Data preprocessed: 121 rows, 13 columns
📅 Date range: 2025-03-12 00:00:00 to 2025-08-29 00:00:00
🔮 Generating predictions...
❌ Error during prediction: TimeSeriesPredictor.predict() got an unexpected keyword argument 'quantile_levels'
❌ Inference failed - check error messages above


In [5]:

# def simple_chronos_training():
#     """Simplified version focusing on core functionality"""
    
#     # Load and prepare data
#     file_path = 'Data/USDINR-train-processed.csv'
#     data = pd.read_csv(file_path, index_col=0, parse_dates=True)
    
#     # Create minimal TimeSeriesDataFrame
#     data_reset = data.reset_index()
#     data_reset['item_id'] = 'USDINR'
#     data_reset = data_reset.rename(columns={
#         data_reset.columns[0]: 'timestamp',
#         'Open': 'target'  # Use Open price as target
#     })
    
#     # Keep only essential columns
#     essential_cols = ['item_id', 'timestamp', 'target', 'Close', 'High', 'Low', 'Volume',
#                       'SMA_20', 'SMA_50', 'EMA_12', 'EMA_26',
#                       'RSI_14', 'MACD', 'MACD_signal', 'MACD_hist',
#                       'BB_upper', 'BB_middle', 'BB_lower', 'ATR_14',
#                       'Price_Change', 'Volatility_20', 'HL_Ratio',
#                       'Stoch_K', 'Stoch_D',
#                       'Hour', 'DayOfWeek', 'Month']
    
#     train_data = data_reset[essential_cols].dropna()
    
#     # Create TimeSeriesDataFrame
#     ts_df = TimeSeriesDataFrame.from_data_frame(
#         train_data,
#         id_column='item_id',
#         timestamp_column='timestamp'
#     )
    
#     print(f"Simplified data shape: {ts_df.shape}")
#     print(f"Columns: {list(ts_df.columns)}")
    
#     # Simple predictor configuration
#     predictor = TimeSeriesPredictor(
#         prediction_length=4,
#         target='target',
#         eval_metric='MASE',
#         freq='D',                                    # CRITICAL FIX: Added freq
#         known_covariates_names=essential_cols[3:],   # All except item_id, timestamp, target
#         verbosity=2
#     )
    
#     # Simple hyperparameters
#     hyperparameters = {
#         'Chronos': {
#             'model_path': 'amazon/chronos-bolt-base',
#             'context_length': 256,                    # Added context_length
#             'ag_args': {'name_suffix': '_Simple'}
#         }
#     }
    
#     # Train
#     print("🚀 Starting simplified Chronos training...")
    
#     predictor.fit(
#         train_data=ts_df,
#         hyperparameters=hyperparameters,
#         time_limit=2400,  # 40 minutes
#         enable_ensemble=False,  # Single model to avoid ensemble issues
#         verbosity=2
#     )
    
#     # CRITICAL FIX: Create future covariate data for prediction
#     print("🔮 Preparing future covariate data...")
    
#     # Get the last timestamp
#     last_timestamp = ts_df.index.get_level_values('timestamp')[-1]
#     future_timestamps = [last_timestamp + pd.Timedelta(days=i) for i in range(1, 5)]
    
#     future_df = pd.DataFrame({
#         'timestamp': future_timestamps,
#         'item_id': 'USDINR'
#     })
    
#     # Forward-fill covariates from last row
#     last_values = ts_df.iloc[-1]
#     for col in essential_cols[3:]:
#         future_df[col] = last_values[col]
    
#     # Update time-based features
#     future_df['Hour'] = 0  # Market opening hour
#     future_df['DayOfWeek'] = future_df['timestamp'].dt.dayofweek
#     future_df['Month'] = future_df['timestamp'].dt.month
    
#     # Create TimeSeriesDataFrame for known covariates
#     future_known = TimeSeriesDataFrame.from_data_frame(
#         future_df,
#         id_column='item_id',
#         timestamp_column='timestamp'
#     )
    
#     # Predict with history and future known covariates
#     predictions = predictor.predict(
#         data=ts_df,
#         known_covariates=future_known
#     )
#     print("✅ Training completed!")
#     print("Last few predictions:")
#     print(predictions.tail())
    
#     return predictor, predictions



In [6]:

try:
    # Ensure directory exists
    import os
    os.makedirs('models', exist_ok=True)
    file_name = 'Data/USDINR_day_2022-2025-Train-processed.csv'
    predictor, predictions = train_usdinr_forecaster(file_name)
        
        # Save the predictor (no path argument needed since specified in constructor)
    predictor.save()
        
    predictions.to_csv('Data/USDINR_predictions.csv')
        
    print("✅ Model and predictions saved successfully!")
        
    # Display final results
    print("\n🎯 Final Results Summary:")
    print("Model saved to: models/usdinr_chronos_predictor")
    print("Predictions saved to: Data/USDINR_predictions.csv")
    print(f"Prediction shape: {predictions.shape}")
        
    # Show sample predictions
    print("\n📈 Sample Future USDINR Opening Price Predictions:")
    if not predictions.empty:
        latest = predictions.tail(4)  # Last 4 predictions
        for idx, row in latest.iterrows():
            timestamp = idx[1] if isinstance(idx, tuple) else idx
            median_pred = row.get('0.5', row.get('mean', 'N/A'))
            print(f"  {timestamp}: {median_pred}")
        
except Exception as e:
    print(f"❌ Error: {str(e)}")
    print("\nTroubleshooting tips:")
    print("1. Ensure AutoGluon is installed: pip install autogluon.timeseries")
    print("2. Check if chronos models are available: pip install transformers>=4.30.0")
    print("3. Try the simplified training mode first")
    print("4. Check GPU memory availability for large context windows")


Beginning AutoGluon training... Time limit = 3600s
AutoGluon will save models to '/workspaces/Market/models/usdinr_chronos_predictor'
AutoGluon Version:  1.4.0
Python Version:     3.11.11
Operating System:   Linux
Platform Machine:   x86_64
Platform Version:   #29~24.04.1-Ubuntu SMP PREEMPT_DYNAMIC Thu Aug 14 16:52:50 UTC 2
CPU Count:          8
GPU Count:          1
Memory Avail:       13.93 GB / 23.31 GB (59.8%)
Disk Space Avail:   357.58 GB / 931.51 GB (38.4%)
Setting presets to: best_quality

Fitting with arguments:
{'enable_ensemble': False,
 'eval_metric': MASE,
 'freq': 'D',
 'hyperparameters': {'Chronos': {'ag_args': {'name_suffix': '_USDINR_FineTuned_base_early_stopping'},
                                 'context_length': 64,
                                 'device_map': 'auto',
                                 'fine_tune': True,
                                 'learning_rate': 0.0005,
                                 'max_steps': 100,
                                 'mode

Training data shape: (3077, 9)
Available columns: ['target', 'Close', 'High', 'Low', 'Volume', 'SMA_20', 'SMA_50', 'EMA_12', 'EMA_26']
Known covariates: 8
Train data: (2777, 9)
Test data: (300, 9)
🚀 Starting Chronos-Bolt-Base training for USDINR...


Provided train_data has 3893 rows (NaN fraction=28.7%), 1 time series. Median time series length is 3893 (min=3893, max=3893). 

Provided data contains following columns:
	target: 'target'
	known_covariates:
		categorical:        []
		continuous (float): ['Close', 'High', 'Low', 'Volume', 'SMA_20', 'SMA_50', ...]

To learn how to fix incorrectly inferred types, please see documentation for TimeSeriesPredictor.fit

AutoGluon will gauge predictive performance using evaluation metric: 'MASE'
	This metric's sign has been flipped to adhere to being higher_is_better. The metric score can be multiplied by -1 to get the metric value.

Starting training. Start time is 2025-09-18 18:00:28
Models that will be trained: ['Chronos_USDINR_FineTuned_base_early_stopping[amazon__chronos-bolt-base]']
Training timeseries model Chronos_USDINR_FineTuned_base_early_stopping[amazon__chronos-bolt-base]. Training for up to 3597.9s of the 3597.9s of remaining time.
Chronos_USDINR_FineTuned_base_early_stopping[am

📊 Evaluating model performance...


Model not specified in predict, will default to the model with the best validation score: Chronos_USDINR_FineTuned_base_early_stopping[amazon__chronos-bolt-base]
data with frequency 'B' has been resampled to frequency 'D'.


Evaluation scores:
{'MASE': -0.8907262894238714}


Additional data provided, testing on additional data. Resulting leaderboard will be sorted according to test score (`score_test`).
data with frequency 'IRREG' has been resampled to frequency 'D'.


Leaderboard:
                                               model  score_test  score_val  \
0  Chronos_USDINR_FineTuned_base_early_stopping[a...   -0.890726  -0.883582   

   pred_time_test  pred_time_val  fit_time_marginal  fit_order  
0        1.633249       0.101114        1418.279238          1  
🔮 Generating future forecasts...


Model not specified in predict, will default to the model with the best validation score: Chronos_USDINR_FineTuned_base_early_stopping[amazon__chronos-bolt-base]


Future 4-day USDINR opening price predictions:
                         mean        0.1        0.2        0.3        0.4  \
item_id timestamp                                                           
USDINR  2022-11-05  82.570511  82.333870  82.405388  82.465958  82.519760   
        2022-11-06  82.529160  82.274330  82.360153  82.416649  82.473732   
        2022-11-07  82.530190  82.280724  82.375801  82.424553  82.470108   
        2022-11-08  82.575310  82.282120  82.380386  82.451096  82.514839   

                          0.5        0.6        0.7        0.8        0.9  
item_id timestamp                                                          
USDINR  2022-11-05  82.570511  82.631821  82.692719  82.771484  82.913429  
        2022-11-06  82.529160  82.592812  82.675163  82.770248  82.916649  
        2022-11-07  82.530190  82.595840  82.667358  82.764641  82.906204  
        2022-11-08  82.575310  82.640495  82.706879  82.791748  82.939308  
✅ Model and predictions saved succ

In [8]:
import pandas as pd
import plotly.graph_objects as go
import plotly.express as px
from datetime import datetime
import numpy as np
from io import StringIO

def load_real_data():
    """Load real USDINR data from the provided sample"""
    real_data_csv = '''item_id,timestamp,mean,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9
USDINR,2023-12-02,83.38365,83.28604,83.318886,83.34383,83.36482,83.38365,83.40262,83.42266,83.447,83.496185
USDINR,2023-12-03,83.381645,83.27629,83.3118,83.338394,83.36119,83.381645,83.40211,83.42462,83.45216,83.505455
USDINR,2023-12-04,83.374954,83.26521,83.302635,83.330086,83.353065,83.374954,83.39622,83.41901,83.445854,83.50398
USDINR,2023-12-05,83.36032,83.244774,83.28433,83.31224,83.33689,83.36032,83.3827,83.40624,83.43437,83.500305'''
    
    real_df = pd.read_csv(StringIO(real_data_csv))
    real_df['timestamp'] = pd.to_datetime(real_df['timestamp'])
    return real_df

def load_predictions():
    """Load predictions from CSV file"""
    try:
        pred_df = pd.read_csv('Data/USDINR_predictions.csv')
        
        # Try to identify the date and prediction columns
        date_columns = [col for col in pred_df.columns if 'date' in col.lower() or 'timestamp' in col.lower()]
        pred_columns = [col for col in pred_df.columns if 'mean' in col.lower() or 'pred' in col.lower() or 'forecast' in col.lower()]
        
        if date_columns:
            date_col = date_columns[0]
        elif 'timestamp' in pred_df.columns:
            date_col = 'timestamp'
        else:
            # Assume first column is date
            date_col = pred_df.columns[0]
            
        if pred_columns:
            pred_col = pred_columns[0]
        elif 'mean' in pred_df.columns:
            pred_col = 'mean'
        else:
            # Assume second column or any numeric column is prediction
            numeric_cols = pred_df.select_dtypes(include=[np.number]).columns
            pred_col = numeric_cols[0] if len(numeric_cols) > 0 else pred_df.columns[1]
        
        pred_df['timestamp'] = pd.to_datetime(pred_df[date_col])
        pred_df = pred_df.rename(columns={pred_col: 'predicted_mean'})
        
        return pred_df[['timestamp', 'predicted_mean']]
        
    except FileNotFoundError:
        print("Predictions file not found. Creating sample predictions for demonstration.")
        # Create sample predictions based on real data dates + some future dates
        real_df = load_real_data()
        
        # Generate some sample predictions
        sample_predictions = []
        base_value = 83.37
        
        for i in range(len(real_df) + 5):  # Include some future predictions
            if i < len(real_df):
                date = real_df.iloc[i]['timestamp']
            else:
                # Add future dates
                last_date = real_df.iloc[-1]['timestamp']
                date = last_date + pd.Timedelta(days=(i - len(real_df) + 1))
            
            # Simple trend with some noise for demonstration
            predicted_value = base_value + (i * 0.01) + np.random.normal(0, 0.05)
            sample_predictions.append({'timestamp': date, 'predicted_mean': predicted_value})
        
        return pd.DataFrame(sample_predictions)

def create_visualization():
    """Create Plotly visualization comparing real vs predicted values"""
    
    # Load data
    real_df = load_real_data()
    pred_df = load_predictions()
    
    # Create the plot
    fig = go.Figure()
    
    # Add real values line
    fig.add_trace(go.Scatter(
        x=real_df['timestamp'],
        y=real_df['mean'],
        mode='lines+markers',
        name='Actual USDINR',
        line=dict(color='blue', width=2),
        marker=dict(size=6)
    ))
    
    # Add prediction values line
    fig.add_trace(go.Scatter(
        x=pred_df['timestamp'],
        y=pred_df['predicted_mean'],
        mode='lines+markers',
        name='Predicted USDINR',
        line=dict(color='red', width=2, dash='dash'),
        marker=dict(size=6)
    ))
    
    # Add confidence intervals if available in real data
    if all(col in real_df.columns for col in ['0.1', '0.9']):
        fig.add_trace(go.Scatter(
            x=real_df['timestamp'],
            y=real_df['0.9'],
            mode='lines',
            line=dict(width=0),
            showlegend=False,
            hoverinfo='skip'
        ))
        
        fig.add_trace(go.Scatter(
            x=real_df['timestamp'],
            y=real_df['0.1'],
            mode='lines',
            line=dict(width=0),
            fill='tonexty',
            fillcolor='rgba(0, 100, 200, 0.2)',
            name='Confidence Interval (10%-90%)',
            hoverinfo='skip'
        ))
    
    # Update layout
    fig.update_layout(
        title={
            'text': 'USDINR Exchange Rate: Actual vs Predicted Values',
            'x': 0.5,
            'xanchor': 'center',
            'font': {'size': 20}
        },
        xaxis_title='Date',
        yaxis_title='USDINR Exchange Rate',
        hovermode='x unified',
        template='plotly_white',
        width=1000,
        height=600,
        legend=dict(
            yanchor="top",
            y=0.99,
            xanchor="left",
            x=0.01
        )
    )
    
    # Add grid
    fig.update_xaxes(showgrid=True, gridwidth=1, gridcolor='lightgray')
    fig.update_yaxes(showgrid=True, gridwidth=1, gridcolor='lightgray')
    
    return fig

def calculate_metrics(real_df, pred_df):
    """Calculate prediction accuracy metrics"""
    # Merge dataframes on timestamp for common dates
    merged_df = pd.merge(real_df[['timestamp', 'mean']], 
                        pred_df[['timestamp', 'predicted_mean']], 
                        on='timestamp', how='inner')
    
    if len(merged_df) > 0:
        mae = np.mean(np.abs(merged_df['mean'] - merged_df['predicted_mean']))
        mse = np.mean((merged_df['mean'] - merged_df['predicted_mean'])**2)
        rmse = np.sqrt(mse)
        mape = np.mean(np.abs((merged_df['mean'] - merged_df['predicted_mean']) / merged_df['mean'])) * 100
        
        print(f"\nPrediction Accuracy Metrics:")
        print(f"Mean Absolute Error (MAE): {mae:.4f}")
        print(f"Mean Squared Error (MSE): {mse:.4f}")
        print(f"Root Mean Squared Error (RMSE): {rmse:.4f}")
        print(f"Mean Absolute Percentage Error (MAPE): {mape:.2f}%")
    else:
        print("No overlapping dates found between actual and predicted data.")

if __name__ == "__main__":
    # Create and display the visualization
    fig = create_visualization()
    fig.show()
    
    # Calculate and display metrics
    real_data = load_real_data()
    pred_data = load_predictions()
    calculate_metrics(real_data, pred_data)
    
    # Optional: Save the plot as HTML
    fig.write_html("USDINR_actual_vs_predicted.html")
    print(f"\nPlot saved as 'USDINR_actual_vs_predicted.html'")


No overlapping dates found between actual and predicted data.

Plot saved as 'USDINR_actual_vs_predicted.html'
