In [1]:
import pandas as pd
import numpy as np
from autogluon.timeseries import TimeSeriesPredictor, TimeSeriesDataFrame

In [2]:


def prepare_usdinr_data(file_path):
    """Prepare USDINR data for AutoGluon TimeSeriesPredictor"""
    
    # Load processed data
    data = pd.read_csv(file_path, index_col=0, parse_dates=True)
    
    # Reset index to get timestamp as column
    data = data.reset_index()
    data['item_id'] = 'USDINR'
    data = data.rename(columns={data.columns[0]: 'timestamp'})
    
    # Set target as 'Open' (next day opening price)
    target_col = 'Open'
    
    # Select required columns: item_id, timestamp, target
    # AutoGluon requires this specific format
    ts_data = data[['item_id', 'timestamp', target_col]].copy()
    ts_data = ts_data.rename(columns={target_col: 'target'})
    
    # Known covariates (features) - we'll add these separately
    known_covariates = [
        'Close', 'High', 'Low', 'Volume',
        'SMA_20', 'SMA_50', 'EMA_12', 'EMA_26',
        'RSI_14', 'MACD', 'MACD_signal', 'MACD_hist',
        'BB_upper', 'BB_middle', 'BB_lower', 'ATR_14',
        'Price_Change', 'Volatility_20', 'HL_Ratio',
        'Stoch_K', 'Stoch_D',
        'Hour', 'DayOfWeek', 'Month'
    ]
    
    # Add covariates to the dataframe
    for covariate in known_covariates:
        if covariate in data.columns:
            ts_data[covariate] = data[covariate]
    
    # Create TimeSeriesDataFrame with correct parameters
    ts_df = TimeSeriesDataFrame.from_data_frame(
        ts_data,
        id_column='item_id',
        timestamp_column='timestamp'
    )
    
    return ts_df, known_covariates



In [3]:

def train_usdinr_forecaster():
    """Train USDINR forecasting model with Chronos-Bolt-Base"""
    
    # Load and prepare data
    file_path = 'Data/USDINR-train-processed.csv'
    ts_df, known_covariates = prepare_usdinr_data(file_path)
    
    print(f"Training data shape: {ts_df.shape}")
    print(f"Available columns: {list(ts_df.columns)}")
    print(f"Known covariates: {len(known_covariates)}")
    
    # Split data for validation (last 20 days for testing)
    all_timestamps = ts_df.index.get_level_values('timestamp').unique()
    cutoff_date = sorted(all_timestamps)[-20]  # Last 20 days for validation
    
    train_data = ts_df[ts_df.index.get_level_values('timestamp') < cutoff_date]
    test_data = ts_df[ts_df.index.get_level_values('timestamp') >= cutoff_date]
    
    print(f"Train data: {train_data.shape}")
    print(f"Test data: {test_data.shape}")
    
    # Configure AutoGluon TimeSeriesPredictor with path
    predictor = TimeSeriesPredictor(
        path='models/usdinr_chronos_predictor',
        prediction_length=4,          # Predict next 4 days
        target='target',              # Target column name
        eval_metric='MASE',           # Mean Absolute Scaled Error
        known_covariates_names=known_covariates,  # Technical indicators
        freq='D',                     # Daily frequency - CRITICAL FIX
        verbosity=2
    )
    
    # Optimized hyperparameters for Chronos-Bolt-Base
    hyperparameters = {
        
        'Chronos': {
            "fine_tune": True,
            'model_path': 'amazon/chronos-bolt-base',
            'context_length': 64,           # Reduced context
            'num_samples': 25,              # Fewer samples
            'learning_rate': 0.0005,        # Lower learning rate
            'max_steps': 300,              # Early stopping
            'torch_dtype': 'float32',                 # Precision
            'device_map': 'auto',                     # GPU allocation
            'ag_args': {
                'name_suffix': '_USDINR_FineTuned_base_early_stopping'
            }
        }
    }
    
    # Train the model
    print("🚀 Starting Chronos-Bolt-Base training for USDINR...")
    
    try:
        predictor.fit(
            train_data=train_data,
            hyperparameters=hyperparameters,
            time_limit=3600,              # 1 hour maximum
            presets='best_quality',       # Focus on accuracy
            enable_ensemble=False,        # Single model
            verbosity=2
        )
        
        # Evaluate on test data
        print("📊 Evaluating model performance...")
        
        if len(test_data) > 0:
            # Use evaluate for metrics
            scores = predictor.evaluate(test_data)
            print("Evaluation scores:")
            print(scores)
            
            leaderboard = predictor.leaderboard(test_data)
            print("Leaderboard:")
            print(leaderboard)
        
        # Generate future forecasts
        print("🔮 Generating future forecasts...")
        
        # Create future known covariates by forward-filling last known values
        last_timestamp = train_data.index.get_level_values('timestamp')[-1]
        future_timestamps = [last_timestamp + pd.Timedelta(days=i) for i in range(1, 5)]
        
        future_df = pd.DataFrame({
            'timestamp': future_timestamps,
            'item_id': 'USDINR'
        })
        
        # Forward-fill covariates from last row
        last_values = train_data.iloc[-1]
        for col in known_covariates:
            future_df[col] = last_values[col]
        
        # Update time-based covariates
        future_df['Hour'] = 0  # Assume market open hour
        future_df['DayOfWeek'] = future_df['timestamp'].dt.dayofweek
        future_df['Month'] = future_df['timestamp'].dt.month
        
        # Create TimeSeriesDataFrame for known covariates
        future_known = TimeSeriesDataFrame.from_data_frame(
            future_df,
            id_column='item_id',
            timestamp_column='timestamp'
        )
        
        future_predictions = predictor.predict(
            data=train_data,
            known_covariates=future_known
        )
        
        print("Future 4-day USDINR opening price predictions:")
        print(future_predictions.tail())
        
        return predictor, future_predictions
        
    except Exception as e:
        print(f"Training error: {str(e)}")
        raise e



In [4]:

# def simple_chronos_training():
#     """Simplified version focusing on core functionality"""
    
#     # Load and prepare data
#     file_path = 'Data/USDINR-train-processed.csv'
#     data = pd.read_csv(file_path, index_col=0, parse_dates=True)
    
#     # Create minimal TimeSeriesDataFrame
#     data_reset = data.reset_index()
#     data_reset['item_id'] = 'USDINR'
#     data_reset = data_reset.rename(columns={
#         data_reset.columns[0]: 'timestamp',
#         'Open': 'target'  # Use Open price as target
#     })
    
#     # Keep only essential columns
#     essential_cols = ['item_id', 'timestamp', 'target', 'Close', 'High', 'Low', 'Volume',
#                       'SMA_20', 'SMA_50', 'EMA_12', 'EMA_26',
#                       'RSI_14', 'MACD', 'MACD_signal', 'MACD_hist',
#                       'BB_upper', 'BB_middle', 'BB_lower', 'ATR_14',
#                       'Price_Change', 'Volatility_20', 'HL_Ratio',
#                       'Stoch_K', 'Stoch_D',
#                       'Hour', 'DayOfWeek', 'Month']
    
#     train_data = data_reset[essential_cols].dropna()
    
#     # Create TimeSeriesDataFrame
#     ts_df = TimeSeriesDataFrame.from_data_frame(
#         train_data,
#         id_column='item_id',
#         timestamp_column='timestamp'
#     )
    
#     print(f"Simplified data shape: {ts_df.shape}")
#     print(f"Columns: {list(ts_df.columns)}")
    
#     # Simple predictor configuration
#     predictor = TimeSeriesPredictor(
#         prediction_length=4,
#         target='target',
#         eval_metric='MASE',
#         freq='D',                                    # CRITICAL FIX: Added freq
#         known_covariates_names=essential_cols[3:],   # All except item_id, timestamp, target
#         verbosity=2
#     )
    
#     # Simple hyperparameters
#     hyperparameters = {
#         'Chronos': {
#             'model_path': 'amazon/chronos-bolt-base',
#             'context_length': 256,                    # Added context_length
#             'ag_args': {'name_suffix': '_Simple'}
#         }
#     }
    
#     # Train
#     print("🚀 Starting simplified Chronos training...")
    
#     predictor.fit(
#         train_data=ts_df,
#         hyperparameters=hyperparameters,
#         time_limit=2400,  # 40 minutes
#         enable_ensemble=False,  # Single model to avoid ensemble issues
#         verbosity=2
#     )
    
#     # CRITICAL FIX: Create future covariate data for prediction
#     print("🔮 Preparing future covariate data...")
    
#     # Get the last timestamp
#     last_timestamp = ts_df.index.get_level_values('timestamp')[-1]
#     future_timestamps = [last_timestamp + pd.Timedelta(days=i) for i in range(1, 5)]
    
#     future_df = pd.DataFrame({
#         'timestamp': future_timestamps,
#         'item_id': 'USDINR'
#     })
    
#     # Forward-fill covariates from last row
#     last_values = ts_df.iloc[-1]
#     for col in essential_cols[3:]:
#         future_df[col] = last_values[col]
    
#     # Update time-based features
#     future_df['Hour'] = 0  # Market opening hour
#     future_df['DayOfWeek'] = future_df['timestamp'].dt.dayofweek
#     future_df['Month'] = future_df['timestamp'].dt.month
    
#     # Create TimeSeriesDataFrame for known covariates
#     future_known = TimeSeriesDataFrame.from_data_frame(
#         future_df,
#         id_column='item_id',
#         timestamp_column='timestamp'
#     )
    
#     # Predict with history and future known covariates
#     predictions = predictor.predict(
#         data=ts_df,
#         known_covariates=future_known
#     )
#     print("✅ Training completed!")
#     print("Last few predictions:")
#     print(predictions.tail())
    
#     return predictor, predictions



In [5]:

# Main execution
if __name__ == "__main__":
    
    try:
        # Ensure directory exists
        import os
        os.makedirs('models', exist_ok=True)
        
        predictor, predictions = train_usdinr_forecaster()
        
        # Save the predictor (no path argument needed since specified in constructor)
        predictor.save()
        
        predictions.to_csv('Data/USDINR_predictions.csv')
        
        print("✅ Model and predictions saved successfully!")
        
        # Display final results
        print("\n🎯 Final Results Summary:")
        print("Model saved to: models/usdinr_chronos_predictor")
        print("Predictions saved to: Data/USDINR_predictions.csv")
        print(f"Prediction shape: {predictions.shape}")
        
        # Show sample predictions
        print("\n📈 Sample Future USDINR Opening Price Predictions:")
        if not predictions.empty:
            latest = predictions.tail(4)  # Last 4 predictions
            for idx, row in latest.iterrows():
                timestamp = idx[1] if isinstance(idx, tuple) else idx
                median_pred = row.get('0.5', row.get('mean', 'N/A'))
                print(f"  {timestamp}: {median_pred}")
        
    except Exception as e:
        print(f"❌ Error: {str(e)}")
        print("\nTroubleshooting tips:")
        print("1. Ensure AutoGluon is installed: pip install autogluon.timeseries")
        print("2. Check if chronos models are available: pip install transformers>=4.30.0")
        print("3. Try the simplified training mode first")
        print("4. Check GPU memory availability for large context windows")


Beginning AutoGluon training... Time limit = 3600s
AutoGluon will save models to '/workspaces/Market/models/usdinr_chronos_predictor'


Training data shape: (1774, 25)
Available columns: ['target', 'Close', 'High', 'Low', 'Volume', 'SMA_20', 'SMA_50', 'EMA_12', 'EMA_26', 'RSI_14', 'MACD', 'MACD_signal', 'MACD_hist', 'BB_upper', 'BB_middle', 'BB_lower', 'ATR_14', 'Price_Change', 'Volatility_20', 'HL_Ratio', 'Stoch_K', 'Stoch_D', 'Hour', 'DayOfWeek', 'Month']
Known covariates: 24
Train data: (1754, 25)
Test data: (20, 25)
🚀 Starting Chronos-Bolt-Base training for USDINR...


AutoGluon Version:  1.4.0
Python Version:     3.11.11
Operating System:   Linux
Platform Machine:   x86_64
Platform Version:   #29~24.04.1-Ubuntu SMP PREEMPT_DYNAMIC Thu Aug 14 16:52:50 UTC 2
CPU Count:          8
GPU Count:          1
Memory Avail:       13.20 GB / 23.31 GB (56.6%)
Disk Space Avail:   355.75 GB / 931.51 GB (38.2%)
Setting presets to: best_quality

Fitting with arguments:
{'enable_ensemble': False,
 'eval_metric': MASE,
 'freq': 'D',
 'hyperparameters': {'Chronos': {'ag_args': {'name_suffix': '_USDINR_FineTuned_base_early_stopping'},
                                 'context_length': 64,
                                 'device_map': 'auto',
                                 'fine_tune': True,
                                 'learning_rate': 0.0005,
                                 'max_steps': 300,
                                 'model_path': 'amazon/chronos-bolt-base',
                                 'num_samples': 25,
                                 'torch_dtype

📊 Evaluating model performance...


Model not specified in predict, will default to the model with the best validation score: Chronos_USDINR_FineTuned_base_early_stopping[amazon__chronos-bolt-base]
data with frequency 'B' has been resampled to frequency 'D'.


Evaluation scores:
{'MASE': -0.7747990802202221}


Additional data provided, testing on additional data. Resulting leaderboard will be sorted according to test score (`score_test`).
data with frequency 'IRREG' has been resampled to frequency 'D'.


Leaderboard:
                                               model  score_test  score_val  \
0  Chronos_USDINR_FineTuned_base_early_stopping[a...   -0.774799  -0.390568   

   pred_time_test  pred_time_val  fit_time_marginal  fit_order  
0        1.196198        0.04647        1008.687408          1  
🔮 Generating future forecasts...


Model not specified in predict, will default to the model with the best validation score: Chronos_USDINR_FineTuned_base_early_stopping[amazon__chronos-bolt-base]


Future 4-day USDINR opening price predictions:
                         mean        0.1        0.2        0.3        0.4  \
item_id timestamp                                                           
USDINR  2022-12-03  81.478821  81.160858  81.289803  81.369881  81.426895   
        2022-12-04  81.400604  81.024597  81.179703  81.271225  81.339714   
        2022-12-05  81.351952  80.944031  81.125778  81.221504  81.294975   
        2022-12-06  81.415947  80.932274  81.144341  81.266624  81.349648   

                          0.5        0.6        0.7        0.8        0.9  
item_id timestamp                                                          
USDINR  2022-12-03  81.478821  81.531578  81.597572  81.670692  81.752823  
        2022-12-04  81.400604  81.454781  81.512886  81.579025  81.665848  
        2022-12-05  81.351952  81.409035  81.467674  81.540627  81.651062  
        2022-12-06  81.415947  81.477425  81.552238  81.639618  81.772697  
✅ Model and predictions saved succ