In [13]:
#!/usr/bin/env python3
"""
AutoGluon Chronos USDINR Inference Script - Matching Training Structure
Loads trained model and runs predictions using known_covariates approach
"""

import pandas as pd
import numpy as np
from autogluon.timeseries import TimeSeriesPredictor, TimeSeriesDataFrame
import os
import sys
from datetime import datetime, timedelta
import warnings
warnings.filterwarnings('ignore')

def prepare_inference_data(file_path):
    """
    Prepare USDINR inference data similar to training data preparation
    
    Args:
        file_path: Path to CSV file with market data
    
    Returns:
        ts_df: TimeSeriesDataFrame for inference
        known_covariates: List of covariate column names
    """
    # Load processed data
    data = pd.read_csv(file_path)
    
    # Handle date column (Price column in your dataset)
    if 'Price' in data.columns:
        data['timestamp'] = pd.to_datetime(data['Price'])
    else:
        # Fallback for other date column names
        date_cols = [col for col in data.columns if any(x in col.lower() for x in ['date', 'time', 'timestamp'])]
        if date_cols:
            data['timestamp'] = pd.to_datetime(data[date_cols[0]])
        else:
            raise ValueError("No date column found")
    
    # Set item_id and target
    data['item_id'] = 'USDINR'
    data['target'] = data['Close']  # Target is Close price
    
    # Define known covariates (same as training)
    known_covariates = [
        'Close', 'High', 'Low', 'Volume', 'SMA_20', 'SMA_50', 
        'EMA_12', 'EMA_26', 'RSI_14', 'MACD', 'MACD_signal', 'MACD_hist'
    ]
    
    # Select required columns
    required_cols = ['item_id', 'timestamp', 'target'] + known_covariates
    available_cols = [col for col in required_cols if col in data.columns]
    
    if len(available_cols) < len(required_cols):
        missing_cols = set(required_cols) - set(available_cols)
        print(f"⚠️  Missing columns: {missing_cols}")
    
    data_processed = data[available_cols].copy()
    
    # Sort by timestamp and remove NaN targets
    data_processed = data_processed.sort_values('timestamp')
    data_processed = data_processed.dropna(subset=['target'])
    
    # Convert to TimeSeriesDataFrame (using same method as training)
    ts_df = TimeSeriesDataFrame.from_data_frame(
        data_processed,
        id_column='item_id',
        timestamp_column='timestamp'
    )
    
    return ts_df, known_covariates

def inference_usdinr_forecaster(model_path, inference_data_path, prediction_length=4):
    """
    Run USDINR inference using trained Chronos model - matches training structure
    
    Args:
        model_path: Path to trained AutoGluon model
        inference_data_path: Path to inference dataset
        prediction_length: Number of future days to predict
    
    Returns:
        predictions: Future USDINR predictions
    """
    
    print("🚀 Starting USDINR Inference Pipeline...")
    print("=" * 50)
    
    # Load trained model
    print(f"🔄 Loading model from: {model_path}")
    try:
        predictor = TimeSeriesPredictor.load(model_path)
        print("✅ Model loaded successfully!")
    except Exception as e:
        print(f"❌ Error loading model: {str(e)}")
        return None
    
    # Load and prepare inference data
    print(f"📊 Loading inference data from: {inference_data_path}")
    try:
        ts_df, known_covariates = prepare_inference_data(inference_data_path)
        print(f"✅ Data prepared: {ts_df.shape}")
        print(f"📊 Known covariates: {len(known_covariates)}")
        print(f"📅 Date range: {ts_df.index.get_level_values('timestamp').min()} to {ts_df.index.get_level_values('timestamp').max()}")
    except Exception as e:
        print(f"❌ Error preparing data: {str(e)}")
        return None
    
    # Generate future forecasts (following training approach)
    print("🔮 Generating future forecasts...")
    
    try:
        # Create future known covariates by forward-filling last known values
        last_timestamp = ts_df.index.get_level_values('timestamp')[-1]
        future_timestamps = [last_timestamp + pd.Timedelta(days=i) for i in range(1, prediction_length + 1)]
        
        future_df = pd.DataFrame({
            'timestamp': future_timestamps,
            'item_id': 'USDINR'
        })
        
        # Forward-fill covariates from last row (same as training)
        last_values = ts_df.iloc[-1]
        for col in known_covariates:
            if col in ts_df.columns:
                future_df[col] = last_values[col]
        
        # Update time-based covariates if they exist
        if 'Hour' in ts_df.columns:
            future_df['Hour'] = 0  # Market open hour
        if 'DayOfWeek' in ts_df.columns:
            future_df['DayOfWeek'] = future_df['timestamp'].dt.dayofweek
        if 'Month' in ts_df.columns:
            future_df['Month'] = future_df['timestamp'].dt.month
        
        # Create TimeSeriesDataFrame for known covariates (same as training)
        future_known = TimeSeriesDataFrame.from_data_frame(
            future_df,
            id_column='item_id',
            timestamp_column='timestamp'
        )
        
        # Make predictions using known_covariates (same as training)
        predictions = predictor.predict(
            data=ts_df,
            known_covariates=future_known
        )
        
        print(f"✅ Predictions generated: {predictions.shape}")
        
        return predictions
        
    except Exception as e:
        print(f"❌ Error during prediction: {str(e)}")
        return None

def save_and_display_predictions(predictions, output_path='usdinr_inference_predictions.csv'):
    """Save and display predictions"""
    
    if predictions is None:
        print("❌ No predictions to save")
        return False
    
    try:
        # Save predictions
        predictions.to_csv(output_path)
        print(f"💾 Predictions saved to: {output_path}")
        
        # Display predictions
        print("=" * 50)
        print("🎯 Inference completed successfully!")
        print(f"\n📈 Future {len(predictions)} days USDINR Predictions:")
        
        # Show all predictions with proper formatting
        for i, (idx, row) in enumerate(predictions.iterrows()):
            if hasattr(idx, '__len__') and len(idx) > 1:
                timestamp = idx[1]  # MultiIndex (item_id, timestamp)
            else:
                timestamp = idx
            
            # Show mean prediction (0.5 quantile) and confidence interval
            if 'mean' in row.index:
                mean_pred = row['mean']
            elif '0.5' in row.index:
                mean_pred = row['0.5']
            else:
                mean_pred = row.iloc[len(row)//2]  # Middle value
            
            print(f"  📅 {timestamp.strftime('%Y-%m-%d')}: {mean_pred:.5f} INR")
        
        # Show confidence intervals if available
        if '0.1' in predictions.columns and '0.9' in predictions.columns:
            print(f"\n🎯 Prediction Ranges (10%-90% confidence):")
            for i, (idx, row) in enumerate(predictions.iterrows()):
                if hasattr(idx, '__len__') and len(idx) > 1:
                    timestamp = idx[1]
                else:
                    timestamp = idx
                
                low_pred = row['0.1']
                high_pred = row['0.9']
                print(f"  📅 {timestamp.strftime('%Y-%m-%d')}: {low_pred:.5f} - {high_pred:.5f} INR")
        
        print(f"\n📊 Available prediction quantiles: {list(predictions.columns)}")
        return True
        
    except Exception as e:
        print(f"❌ Error saving predictions: {str(e)}")
        return False

# Main execution
if __name__ == "__main__":
    
    # Configuration
    MODEL_PATH = "models/usdinr_chronos_predictor"
    DATA_PATH = "Data/USDINR_day_2025-01-2025-09-Infer-processed.csv"
    OUTPUT_PATH = "usdinr_inference_predictions.csv"
    PREDICTION_LENGTH = 4  # Next 4 days
    
    # Run inference
    predictions = inference_usdinr_forecaster(
        model_path=MODEL_PATH,
        inference_data_path=DATA_PATH,
        prediction_length=PREDICTION_LENGTH
    )
    
    # Save and display results
    if predictions is not None:
        success = save_and_display_predictions(predictions, OUTPUT_PATH)
        if success:
            print(f"\n🎯 SUCCESS! Next {PREDICTION_LENGTH} days USDINR predictions ready!")
            print(f"📊 Check '{OUTPUT_PATH}' for detailed quantile forecasts")
        else:
            print("❌ Failed to save predictions")
    else:
        print("❌ Inference failed - check error messages above")


data with frequency 'IRREG' has been resampled to frequency 'D'.


🚀 Starting USDINR Inference Pipeline...
🔄 Loading model from: models/usdinr_chronos_predictor
✅ Model loaded successfully!
📊 Loading inference data from: Data/USDINR_day_2025-01-2025-09-Infer-processed.csv
✅ Data prepared: (121, 13)
📊 Known covariates: 12
📅 Date range: 2025-03-12 00:00:00 to 2025-08-29 00:00:00
🔮 Generating future forecasts...


Passing a tuple of `past_key_values` is deprecated and will be removed in Transformers v4.48.0. You should pass an instance of `EncoderDecoderCache` instead, e.g. `past_key_values=EncoderDecoderCache.from_legacy_cache(past_key_values)`.


✅ Predictions generated: (4, 10)
💾 Predictions saved to: usdinr_inference_predictions.csv
🎯 Inference completed successfully!

📈 Future 4 days USDINR Predictions:
  📅 2025-08-30: 87.51659 INR
  📅 2025-08-31: 87.49575 INR
  📅 2025-09-01: 87.47936 INR
  📅 2025-09-02: 87.50985 INR

🎯 Prediction Ranges (10%-90% confidence):
  📅 2025-08-30: 87.31823 - 87.75201 INR
  📅 2025-08-31: 87.27477 - 87.77258 INR
  📅 2025-09-01: 87.26141 - 87.76715 INR
  📅 2025-09-02: 87.24723 - 87.80813 INR

📊 Available prediction quantiles: ['mean', '0.1', '0.2', '0.3', '0.4', '0.5', '0.6', '0.7', '0.8', '0.9']

🎯 SUCCESS! Next 4 days USDINR predictions ready!
📊 Check 'usdinr_inference_predictions.csv' for detailed quantile forecasts


In [15]:
#!/usr/bin/env python3
"""
AutoGluon Chronos USDINR Inference Script - WITH BACKTESTING
Trims last N days, predicts them, and compares with actual values
"""

import pandas as pd
import numpy as np
from autogluon.timeseries import TimeSeriesPredictor, TimeSeriesDataFrame
import os
import sys
from datetime import datetime, timedelta
import warnings
warnings.filterwarnings('ignore')

def trim_data_for_backtesting(original_path, trim_last_n_days=4, output_path='USDINR_trimmed_for_inference.csv'):
    """
    Remove last N days from dataset for backtesting
    
    Args:
        original_path: Path to original dataset
        trim_last_n_days: Number of days to remove from end
        output_path: Path to save trimmed dataset
    
    Returns:
        trimmed_path: Path to trimmed dataset
        actual_data: DataFrame with actual values for removed days
    """
    print(f"📂 Trimming last {trim_last_n_days} days from {original_path}")
    
    # Load full data
    df = pd.read_csv(original_path)
    
    # Convert 'Price' col to datetime
    if 'Price' in df.columns:
        df['timestamp'] = pd.to_datetime(df['Price'])
    else:
        date_cols = [col for col in df.columns if any(x in col.lower() for x in ['date', 'time', 'timestamp'])]
        if date_cols:
            df['timestamp'] = pd.to_datetime(df[date_cols[0]])
        else:
            raise ValueError("No date column found for trimming.")
    
    # Sort by timestamp
    df = df.sort_values('timestamp')
    
    # Get last N days for comparison
    unique_dates = sorted(df['timestamp'].unique())
    trim_dates = unique_dates[-trim_last_n_days:]
    
    # Split data
    df_trimmed = df[~df['timestamp'].isin(trim_dates)]
    actual_data = df[df['timestamp'].isin(trim_dates)][['timestamp', 'Close', 'High', 'Low']].copy()
    
    # Save trimmed data
    df_trimmed.drop('timestamp', axis=1).to_csv(output_path, index=False)  # Remove temp timestamp col
    
    print(f"✅ Trimmed data saved to {output_path}")
    print(f"📊 Removed {len(actual_data)} days for backtesting")
    print(f"📅 Backtesting dates: {trim_dates[0].strftime('%Y-%m-%d')} to {trim_dates[-1].strftime('%Y-%m-%d')}")
    
    return output_path, actual_data

def prepare_inference_data(file_path):
    """Same as before - prepare data for inference"""
    data = pd.read_csv(file_path)
    
    if 'Price' in data.columns:
        data['timestamp'] = pd.to_datetime(data['Price'])
    else:
        date_cols = [col for col in data.columns if any(x in col.lower() for x in ['date', 'time', 'timestamp'])]
        if date_cols:
            data['timestamp'] = pd.to_datetime(data[date_cols[0]])
        else:
            raise ValueError("No date column found")
    
    data['item_id'] = 'USDINR'
    data['target'] = data['Close']
    
    known_covariates = [
        'Close', 'High', 'Low', 'Volume', 'SMA_20', 'SMA_50', 
        'EMA_12', 'EMA_26', 'RSI_14', 'MACD', 'MACD_signal', 'MACD_hist'
    ]
    
    required_cols = ['item_id', 'timestamp', 'target'] + known_covariates
    available_cols = [col for col in required_cols if col in data.columns]
    
    if len(available_cols) < len(required_cols):
        missing_cols = set(required_cols) - set(available_cols)
        print(f"⚠️  Missing columns: {missing_cols}")
    
    data_processed = data[available_cols].copy()
    data_processed = data_processed.sort_values('timestamp')
    data_processed = data_processed.dropna(subset=['target'])
    
    ts_df = TimeSeriesDataFrame.from_data_frame(
        data_processed,
        id_column='item_id',
        timestamp_column='timestamp'
    )
    
    return ts_df, known_covariates

def inference_usdinr_forecaster(model_path, inference_data_path, prediction_length=4):
    """Same inference function as before"""
    print("🚀 Starting USDINR Inference Pipeline...")
    print("=" * 50)
    
    print(f"🔄 Loading model from: {model_path}")
    try:
        predictor = TimeSeriesPredictor.load(model_path)
        print("✅ Model loaded successfully!")
    except Exception as e:
        print(f"❌ Error loading model: {str(e)}")
        return None
    
    print(f"📊 Loading inference data from: {inference_data_path}")
    try:
        ts_df, known_covariates = prepare_inference_data(inference_data_path)
        print(f"✅ Data prepared: {ts_df.shape}")
        print(f"📊 Known covariates: {len(known_covariates)}")
        print(f"📅 Date range: {ts_df.index.get_level_values('timestamp').min()} to {ts_df.index.get_level_values('timestamp').max()}")
    except Exception as e:
        print(f"❌ Error preparing data: {str(e)}")
        return None
    
    print("🔮 Generating future forecasts...")
    
    try:
        last_timestamp = ts_df.index.get_level_values('timestamp')[-1]
        future_timestamps = [last_timestamp + pd.Timedelta(days=i) for i in range(1, prediction_length + 1)]
        
        future_df = pd.DataFrame({
            'timestamp': future_timestamps,
            'item_id': 'USDINR'
        })
        
        last_values = ts_df.iloc[-1]
        for col in known_covariates:
            if col in ts_df.columns:
                future_df[col] = last_values[col]
        
        if 'Hour' in ts_df.columns:
            future_df['Hour'] = 0
        if 'DayOfWeek' in ts_df.columns:
            future_df['DayOfWeek'] = future_df['timestamp'].dt.dayofweek
        if 'Month' in ts_df.columns:
            future_df['Month'] = future_df['timestamp'].dt.month
        
        future_known = TimeSeriesDataFrame.from_data_frame(
            future_df,
            id_column='item_id',
            timestamp_column='timestamp'
        )
        
        predictions = predictor.predict(
            data=ts_df,
            known_covariates=future_known
        )
        
        print(f"✅ Predictions generated: {predictions.shape}")
        return predictions
        
    except Exception as e:
        print(f"❌ Error during prediction: {str(e)}")
        return None

def create_comparison_csv(predictions, actual_data, output_path='USDINR_predictions_vs_actual.csv'):
    """
    Create comparison CSV with predictions vs actual values
    
    Args:
        predictions: Model predictions DataFrame
        actual_data: Actual values DataFrame
        output_path: Output CSV path
    """
    print("📊 Creating predictions vs actual comparison...")
    
    try:
        # Prepare predictions data
        pred_data = []
        for i, (idx, row) in enumerate(predictions.iterrows()):
            if hasattr(idx, '__len__') and len(idx) > 1:
                timestamp = idx[1]
            else:
                timestamp = idx
            
            pred_record = {
                'Date': timestamp.strftime('%Y-%m-%d'),
                'Predicted_Close': row.get('0.5', row.get('mean', row.iloc[4])),
                'Predicted_Low_10%': row.get('0.1', 'N/A'),
                'Predicted_High_90%': row.get('0.9', 'N/A'),
                'Prediction_Range': f"{row.get('0.1', 'N/A'):.4f} - {row.get('0.9', 'N/A'):.4f}" if row.get('0.1') and row.get('0.9') else 'N/A'
            }
            pred_data.append(pred_record)
        
        pred_df = pd.DataFrame(pred_data)
        
        # Prepare actual data
        actual_df = actual_data.copy()
        actual_df['Date'] = actual_df['timestamp'].dt.strftime('%Y-%m-%d')
        actual_df = actual_df.rename(columns={'Close': 'Actual_Close', 'High': 'Actual_High', 'Low': 'Actual_Low'})
        actual_df = actual_df[['Date', 'Actual_Close', 'Actual_High', 'Actual_Low']]
        
        # Merge predictions and actuals
        comparison_df = pd.merge(pred_df, actual_df, on='Date', how='outer')
        
        # Calculate accuracy metrics
        comparison_df['Absolute_Error'] = abs(comparison_df['Predicted_Close'] - comparison_df['Actual_Close'])
        comparison_df['Percentage_Error'] = (comparison_df['Absolute_Error'] / comparison_df['Actual_Close']) * 100
        comparison_df['Within_Range'] = (
            (comparison_df['Actual_Close'] >= comparison_df['Predicted_Low_10%'].astype(float)) & 
            (comparison_df['Actual_Close'] <= comparison_df['Predicted_High_90%'].astype(float))
        )
        
        # Save comparison
        comparison_df.to_csv(output_path, index=False)
        
        # Display results
        print("=" * 60)
        print("🎯 BACKTESTING RESULTS")
        print("=" * 60)
        
        for _, row in comparison_df.iterrows():
            print(f"📅 {row['Date']}:")
            print(f"   Predicted: {row['Predicted_Close']:.5f} INR")
            print(f"   Actual:    {row['Actual_Close']:.5f} INR")
            print(f"   Error:     {row['Absolute_Error']:.5f} INR ({row['Percentage_Error']:.2f}%)")
            print(f"   In Range:  {'✅ YES' if row['Within_Range'] else '❌ NO'}")
            print()
        
        # Summary statistics
        mae = comparison_df['Absolute_Error'].mean()
        mape = comparison_df['Percentage_Error'].mean()
        accuracy_rate = comparison_df['Within_Range'].mean() * 100
        
        print("📈 SUMMARY METRICS:")
        print(f"   Mean Absolute Error (MAE): {mae:.5f} INR")
        print(f"   Mean Absolute Percentage Error (MAPE): {mape:.2f}%")
        print(f"   Predictions within 10%-90% range: {accuracy_rate:.1f}%")
        
        print(f"\n💾 Detailed comparison saved to: {output_path}")
        return True
        
    except Exception as e:
        print(f"❌ Error creating comparison: {str(e)}")
        return False

# MAIN EXECUTION WITH BACKTESTING
if __name__ == "__main__":
    
    # Configuration
    MODEL_PATH = "models/usdinr_chronos_predictor"
    ORIGINAL_DATA_PATH = "Data/USDINR_day_2025-01-2025-09-Infer-processed.csv"  # Updated path
    TRIMMED_DATA_PATH = "USDINR_trimmed_for_inference.csv"
    COMPARISON_OUTPUT = "USDINR_predictions_vs_actual.csv"
    PREDICTION_LENGTH = 4  # Next 4 days
    
    # Step 1: Trim data for backtesting
    trimmed_path, actual_data = trim_data_for_backtesting(
        ORIGINAL_DATA_PATH, 
        trim_last_n_days=PREDICTION_LENGTH,
        output_path=TRIMMED_DATA_PATH
    )
    
    # Step 2: Run inference on trimmed data
    predictions = inference_usdinr_forecaster(
        model_path=MODEL_PATH,
        inference_data_path=trimmed_path,
        prediction_length=PREDICTION_LENGTH
    )
    
    # Step 3: Compare predictions with actual values
    if predictions is not None:
        success = create_comparison_csv(predictions, actual_data, COMPARISON_OUTPUT)
        if success:
            print(f"\n🎯 SUCCESS! Backtesting completed!")
            print(f"📊 Check '{COMPARISON_OUTPUT}' for detailed predictions vs actual comparison")
        else:
            print("❌ Failed to create comparison")
    else:
        print("❌ Inference failed - check error messages above")


data with frequency 'IRREG' has been resampled to frequency 'D'.


📂 Trimming last 4 days from Data/USDINR_day_2025-01-2025-09-Infer-processed.csv
✅ Trimmed data saved to USDINR_trimmed_for_inference.csv
📊 Removed 4 days for backtesting
📅 Backtesting dates: 2025-08-26 to 2025-08-29
🚀 Starting USDINR Inference Pipeline...
🔄 Loading model from: models/usdinr_chronos_predictor
✅ Model loaded successfully!
📊 Loading inference data from: USDINR_trimmed_for_inference.csv
✅ Data prepared: (117, 13)
📊 Known covariates: 12
📅 Date range: 2025-03-12 00:00:00 to 2025-08-25 00:00:00
🔮 Generating future forecasts...


✅ Predictions generated: (4, 10)
📊 Creating predictions vs actual comparison...
🎯 BACKTESTING RESULTS
📅 2025-08-26:
   Predicted: 87.19531 INR
   Actual:    87.60860 INR
   Error:     0.41328 INR (0.47%)
   In Range:  ❌ NO

📅 2025-08-27:
   Predicted: 87.18262 INR
   Actual:    87.60070 INR
   Error:     0.41808 INR (0.48%)
   In Range:  ❌ NO

📅 2025-08-28:
   Predicted: 87.16073 INR
   Actual:    87.65910 INR
   Error:     0.49837 INR (0.57%)
   In Range:  ❌ NO

📅 2025-08-29:
   Predicted: 87.16889 INR
   Actual:    87.59170 INR
   Error:     0.42281 INR (0.48%)
   In Range:  ❌ NO

📈 SUMMARY METRICS:
   Mean Absolute Error (MAE): 0.43814 INR
   Mean Absolute Percentage Error (MAPE): 0.50%
   Predictions within 10%-90% range: 0.0%

💾 Detailed comparison saved to: USDINR_predictions_vs_actual.csv

🎯 SUCCESS! Backtesting completed!
📊 Check 'USDINR_predictions_vs_actual.csv' for detailed predictions vs actual comparison
