In [1]:
import pathlib
import os
import warnings
warnings.filterwarnings('ignore')

# Change to the project root directory
project_root = pathlib.Path("/Users/victormp/Desktop/ml/ml-project")
os.chdir(project_root)


import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from darts.models import ARIMA
from darts import TimeSeries
from darts import concatenate
from darts.metrics import mse as MSE
from darts.metrics import mae as MAE
from darts.models import SKLearnModel
import torch
import torch.nn as nn
from src.train import Trainer
from src.train import CrossValidator
from src.train import DartsBridge
from src.models import Classic_TCN
from src.models import AdditiveHybrid_ARMA_TCN


# Import Data

In [2]:
data_path = pathlib.Path("data/DCOILWTICO.csv")
print(f"Loading from: {data_path.absolute()}")
print(f"File exists: {data_path.exists()}")

data = pd.read_csv(data_path)
data.rename(columns = {"observation_date" : "date", "DCOILWTICO" : "price"}, inplace  = True)
data["date"] = pd.to_datetime(data["date"])
data = data.set_index("date")
data["return"] = np.log(data["price"]) - np.log(data["price"].shift(1))
returns = data["return"].replace([np.inf, -np.inf], np.nan).dropna().astype("float32")

Loading from: /Users/victormp/Desktop/ml/ml-project/data/DCOILWTICO.csv
File exists: True


# Split Data

In [3]:
# Three-way split: 60% train, 20% validation, 20% test
train_ratio = 0.6
val_ratio = 0.2
test_ratio = 0.2

train_end = int(len(returns) * train_ratio)
val_end = int(len(returns) * (train_ratio + val_ratio))

# Split the data
y_train = returns.iloc[:train_end]
y_val = returns.iloc[train_end:val_end]
y_test = returns.iloc[val_end:]

# Convert to darts TimeSeries objects
train_series = TimeSeries.from_values(y_train)
val_series = TimeSeries.from_values(y_val)
test_series = TimeSeries.from_values(y_test)

print(f"Train size: {len(train_series)} ({train_ratio*100}%)")
print(f"Validation size: {len(val_series)} ({val_ratio*100}%)")
print(f"Test size: {len(test_series)} ({(1-train_ratio-val_ratio)*100}%)")

Train size: 715 (60.0%)
Validation size: 239 (20.0%)
Test size: 239 (20.0%)


# Train AR Model
### Calculate residuals, preditions on training set, and the MSE

In [8]:
#Flexible AR model
AR_ORDERS = [1, 5]

# Dictionary to store results
ar_results = {}


for ar_order in AR_ORDERS:

    if os.path.exists(f"weights/model_weights_ar{ar_order}.pkl"):
        with open(f"weights/model_weights_ar{ar_order}.pkl", "rb") as f:
            model = ARIMA.load(f)
            print(f"Loaded model weights from weights/model_weights_ar{ar_order}.pkl")
        
        print("="*60)
        print(f"Fitting AR({ar_order}) Model")
        print("="*60)

        model.fit(train_series)
        residuals = model.residuals(train_series)
        train_series_predict = model.predict(len(train_series))

        # To avoid nan, only compute MSE on pairs where both are not nan
        pred_vals = train_series_predict.values().flatten()
        true_vals = train_series.values().flatten()

        # Remove pairs where either is nan
        mask = ~np.isnan(pred_vals) & ~np.isnan(true_vals)
        mse = np.mean((true_vals[mask] - pred_vals[mask]) ** 2)
        mae = np.mean(np.absolute(true_vals[mask] - pred_vals[mask]))

        print(f"AR({ar_order}) MSE on training data: {mse:.6f}")
        print(f"AR({ar_order}) MAE on training data: {mae:.6f}")

        # Store results for later use
        ar_results[ar_order] = {
            'model': model,
            'residuals': residuals,
            'predictions': train_series_predict,
            'mse': mse, 
            'mae': mae, 
        }
        print()

    else:

        print(f"Model_weights_ar{ar_order}.pkl does not exist.")
        print("="*60)
        print(f"Fitting AR({ar_order}) Model")
        print("="*60)

        # Train model and calculate residuals, compute MSE
        model = ARIMA(p=ar_order, d=0, q=0)
        model.fit(train_series)
        model.save(f'weights/model_weights_ar{ar_order}.pkl')
        residuals = model.residuals(train_series)
        train_series_predict = model.predict(len(train_series))

        # To avoid nan, only compute MSE on pairs where both are not nan
        pred_vals = train_series_predict.values().flatten()
        true_vals = train_series.values().flatten()

        # Remove pairs where either is nan
        mask = ~np.isnan(pred_vals) & ~np.isnan(true_vals)
        mse = np.mean((true_vals[mask] - pred_vals[mask]) ** 2)
        mae = np.mean(np.absolute(true_vals[mask] - pred_vals[mask]))

        print(f"AR({ar_order}) MSE on training data: {mse:.6f}")
        print(f"AR({ar_order}) MAE on training data: {mae:.6f}")

        # Store results for later use
        ar_results[ar_order] = {
            'model': model,
            'residuals': residuals,
            'predictions': train_series_predict,
            'mse': mse, 
            'mae': mae, 
        }
        print()

print("="*60)
for ar_order in AR_ORDERS:
    print(f" AR({ar_order}) available in ar_results[{ar_order}]['model']")

print("="*60)


Loaded model weights from weights/model_weights_ar1.pkl
Fitting AR(1) Model
AR(1) MSE on training data: 0.000593
AR(1) MAE on training data: 0.018579

Loaded model weights from weights/model_weights_ar5.pkl
Fitting AR(5) Model
AR(5) MSE on training data: 0.000593
AR(5) MAE on training data: 0.018581

 AR(1) available in ar_results[1]['model']
 AR(5) available in ar_results[5]['model']


# Train TCN

In [5]:
#define hyperparameter grid
hyperparameter_grid = {
    'kernel_size': [2, 3, 5],
    'num_filters': [64, 128],
    'num_layers': [3, 5, 7],
    'dilation_base': [2, 4],
    'lr': [0.0001, 0.001, 0.01]
}

#run cross-validation
cv = CrossValidator(
    model_type="classic",
    hyperparameter_grid=hyperparameter_grid,
    num_epochs=100,
    seed=42
)
cv.fit(train_series, val_series)
best_config_tcn = cv.get_best_config()
results_tcn_df = cv.get_results()

Using device: mps

Testing Classic TCN configuration: {'kernel_size': 2, 'num_filters': 64, 'num_layers': 3, 'dilation_base': 2, 'lr': 0.0001}
Epoch [20/100], Training Loss: 0.017914
Epoch [40/100], Training Loss: 0.004131
Early stopping after 10 epochs

 Validation Forecasting Metrics:
   MSE:  0.004677
   MAE:  0.065979
   RMSE: 0.068388

Testing Classic TCN configuration: {'kernel_size': 2, 'num_filters': 64, 'num_layers': 3, 'dilation_base': 2, 'lr': 0.001}
Early stopping after 10 epochs

 Validation Forecasting Metrics:
   MSE:  0.007943
   MAE:  0.087334
   RMSE: 0.089123

Testing Classic TCN configuration: {'kernel_size': 2, 'num_filters': 64, 'num_layers': 3, 'dilation_base': 2, 'lr': 0.01}
Early stopping after 10 epochs

 Validation Forecasting Metrics:
   MSE:  0.000424
   MAE:  0.016755
   RMSE: 0.020582

Testing Classic TCN configuration: {'kernel_size': 2, 'num_filters': 64, 'num_layers': 3, 'dilation_base': 4, 'lr': 0.0001}
Epoch [20/100], Training Loss: 0.017914
Epoch [4

KeyboardInterrupt: 

In [7]:
try:
    # Only pass architecture parameters here
    model_classic_tcn = Classic_TCN(
        num_channels=[128, 128, 128], 
        kernel_size=[2, 2, 2], 
        dilations=[1, 2, 4],
        dropout=0.1
    )
    
    model_classic_tcn.load_state_dict(torch.load("weights/model_weights_classic_tcn.pth"))
    model_classic_tcn.eval()
    print(f"Loaded model weights from 'weights/model_weights_classic_tcn.pth'")

except RuntimeError as e:
    print(f"Model architecture mismatch: {e}")
    print(f"Training new model...")
    # Training parameters go here
    model_classic_tcn = Trainer(
        model_type="classic",
        num_channels=[128, 128, 128],
        kernel_size=[2, 2, 2],
        dilations=[1, 2, 4],
        num_epochs=100, 
        lr=0.0001
    ).fit(y_train.values)
    torch.save(model_classic_tcn.state_dict(), "weights/model_weights_classic_tcn.pth")

Loaded model weights from 'weights/model_weights_classic_tcn.pth'


# Train Hybrid AR-TCN

In [4]:
#define hyperparameter grid
hyperparameter_grid = {
    'kernel_size': [3, 5],
    'num_filters': [64, 128],
    'num_layers': [3, 5, 7],
    'dilation_base': [2, 4],
    'lr': [0.0001]
}

#cross validate additive hybrid ARMA
cv_add = CrossValidator(
    model_type="additive",
    ar_orders=[1, 2, 3, 4, 5],
    ma_orders=[1, 2, 3, 4, 5],
    hyperparameter_grid=hyperparameter_grid,
    num_epochs=100,
    seed=42
)
cv_add.fit(train_series, val_series)
cv_add.get_best_config()
results_add_add = cv_add.get_results()


Using device: mps

Testing Additive Hybrid ARMA(1,1) configuration: {'kernel_size': 3, 'num_filters': 64, 'num_layers': 3, 'dilation_base': 2, 'lr': 0.0001}
Early stopping after 10 epochs

 Validation Forecasting Metrics:
   MSE:  0.000570
   MAE:  0.019229
   RMSE: 0.023875

Testing Additive Hybrid ARMA(1,1) configuration: {'kernel_size': 3, 'num_filters': 64, 'num_layers': 3, 'dilation_base': 4, 'lr': 0.0001}
Early stopping after 10 epochs

 Validation Forecasting Metrics:
   MSE:  0.000570
   MAE:  0.019229
   RMSE: 0.023875

Testing Additive Hybrid ARMA(1,1) configuration: {'kernel_size': 3, 'num_filters': 64, 'num_layers': 5, 'dilation_base': 2, 'lr': 0.0001}
Epoch [20/100], Training Loss: 0.008203
Early stopping after 10 epochs

 Validation Forecasting Metrics:
   MSE:  0.004508
   MAE:  0.064732
   RMSE: 0.067139

Testing Additive Hybrid ARMA(1,1) configuration: {'kernel_size': 3, 'num_filters': 64, 'num_layers': 5, 'dilation_base': 4, 'lr': 0.0001}
Epoch [20/100], Training Loss

In [12]:
try:
    # Only pass architecture parameters here
    model_hybrid_arma = AdditiveHybrid_ARMA_TCN(
        ar_order=4,
        ma_order=3,
        num_channels=[128, 128, 128],
        kernel_size=[3, 3, 3],
        dilations=[1, 2, 4],
        dropout=0.1

    )

    model_hybrid_arma.load_state_dict(torch.load("weights/model_weights_hybrid_arma.pth"))
    model_hybrid_arma.eval()
    print(f"Loaded model weights from 'weights/model_weights_hybrid_arma.pth'")

except (RuntimeError, FileNotFoundError) as e:
    print(f"Model architecture mismatch: {e}")
    print(f"Training new model...")
    # Training parameters go here
    model_hybrid_arma = Trainer(
        model_type="additive",
        ar_order=4,
        ma_order=3,
        num_channels=[128, 128, 128],
        kernel_size=[3, 3, 3],
        dilations=[1, 2, 4],
        num_epochs=100,
        lr=0.0001
    ).fit(y_train.values)
    model_hybrid_arma = model_hybrid_arma.get_model()
    torch.save(model_hybrid_arma.get_model().state_dict(), "weights/model_weights_hybrid_arma.pth")

Loaded model weights from 'weights/model_weights_hybrid_arma.pth'


In [16]:
# Extract models
model_ar1 = ar_results[1]["model"]
model_ar5 = ar_results[5]["model"]

# Evaluate models
model_classic_tcn.eval()
model_hybrid_arma.eval()


with torch.no_grad():
    y_train_tensor = torch.FloatTensor(y_train.values)

    predictions_class, targets_class = model_classic_tcn(y_train_tensor)
    predictions_hybrid, targets_hybrid = model_hybrid_arma(y_train_tensor)

    print(f"AR prediction shape: {pred_vals.shape}")
    print(f"Classic prediction shape: {predictions_class.shape}")
    print(f"Additive prediction shape: {predictions_hybrid.shape}")

# Calculate MSE
y_train_adj = y_train.values[1:]  # Adjust for the shift in predictions
predictions_class_np = predictions_class.numpy().flatten()
mse_class = np.mean((y_train_adj - predictions_class_np) ** 2)
mae_class = np.mean(np.absolute(y_train_adj - predictions_class_np))

y_train_adj = y_train.values[5:]  # Adjust for the shift in predictions
predictions_hybrid_np = predictions_hybrid.numpy().flatten()
mse_hybrid = np.mean((y_train_adj - predictions_hybrid_np) ** 2)
mae_hybrid = np.mean(np.absolute(y_train_adj - predictions_hybrid_np))


print("\n" + "=" * 60)
print("FINAL RESULTS - True Hybrid Models)")
print("=" * 60)
print(f"MSE AR(1):                                 {ar_results[1]["mse"]:.6f}")
print(f"MSE AR(5):                                 {ar_results[5]["mse"]:.6f}")
print(f"MSE Classic TCN:                           {mse_class:.6f}")
print(f"MSE Hybrid ARMA + TCN (L + N):             {mse_hybrid:.6f}")
print("=" * 60)
print(f"MAE AR(1):                                 {ar_results[1]["mae"]:.6f}")
print(f"MAE AR(5):                                 {ar_results[5]["mae"]:.6f}")
print(f"MAE Classic TCN:                           {mae_class:.6f}")
print(f"MAE Hybrid ARMA + TCN (L + N):             {mse_hybrid:.6f}")
print("=" * 60)


AR prediction shape: (715,)
Classic prediction shape: torch.Size([1, 714, 1])
Additive prediction shape: torch.Size([1, 710, 1])

FINAL RESULTS - True Hybrid Models)
MSE AR(1):                                 0.000593
MSE AR(5):                                 0.000593
MSE Classic TCN:                           0.000608
MSE Hybrid ARMA + TCN (L + N):             0.000590
MAE AR(1):                                 0.018579
MAE AR(5):                                 0.018581
MAE Classic TCN:                           0.018608
MAE Hybrid ARMA + TCN (L + N):             0.000590


# Forecasting

In [17]:
# Retrain on combined training and validation sets
train_val_combined = concatenate([train_series, val_series], axis=0, ignore_time_axis=True)
train_val_tensor = torch.FloatTensor(train_val_combined.values().flatten())
test_tensor = torch.FloatTensor(test_series.values().flatten())

return_series = TimeSeries.from_values(returns)
test_series_aligned = return_series[val_end:]

In [19]:
AR_ORDERS = [1, 5]
forecast_horizon = [1, 2, 3, 5, 10, 30, 50, 100, 150, 200]
results_mse_df = pd.DataFrame(columns=[f"{time}-step MSE" for time in forecast_horizon])
results_mae_df = pd.DataFrame(columns=[f"{time}-step MAE" for time in forecast_horizon])

# Retrain the models on training set + validation set
model_ar1_final = ARIMA(p=1, d=0, q=0)
model_ar1_final.fit(train_val_combined)
model_ar5_final = ARIMA(p=5, d=0, q=0)
model_ar5_final.fit(train_val_combined)



for ar in AR_ORDERS:
    model = globals()[f"model_ar{ar}_final"]
    for time in forecast_horizon:
        forecast_series = model.historical_forecasts(
            series=return_series,
            start=val_end,
            forecast_horizon=time,
            stride=1,
            retrain=False,
            last_points_only=True
        )
        
        # Compute metrics against the actual test series
        results_mse_df.loc[f"AR({ar})", f"{time}-step MSE"] = MSE(test_series_aligned, forecast_series, intersect=True)
        results_mae_df.loc[f"AR({ar})", f"{time}-step MAE"] = MAE(test_series_aligned, forecast_series, intersect=True)

In [26]:
# Specify optimal TCN model based on cross-validation and train it on train+val set
model_classic_tcn_final = Trainer(model_type="classic",
                                  num_channels=[128, 128, 128],
                                  kernel_size=[2, 2, 2],
                                  dilations=[1, 2, 4],
                                  num_epochs=100,
                                  lr=0.0001).fit(train_val_combined).get_model()

model_classic_tcn_darts = SKLearnModel(
    model=DartsBridge(model_classic_tcn_final),
    lags=15 # Choose effective lag memory fore recursive forecasting
)
model_classic_tcn_darts.fit(train_val_combined)

for time in forecast_horizon:
        forecast_series = model_classic_tcn_darts.historical_forecasts(
            series=return_series,
            start=val_end,
            forecast_horizon=time,
            stride=1,
            retrain=False,
            overlap_end=True,
            last_points_only=True
        )
        
        # Compute metrics against the actual test series
        results_mse_df.loc["Classic TCN", f"{time}-step MSE"] = MSE(test_series_aligned, forecast_series, intersect=True)
        results_mae_df.loc["Classic TCN", f"{time}-step MAE"] = MAE(test_series_aligned, forecast_series, intersect=True)

Classic TCN - Epoch [10/100], Loss: 0.005302
Classic TCN - Epoch [20/100], Loss: 0.004864
Classic TCN - Epoch [30/100], Loss: 0.004295
Classic TCN - Epoch [40/100], Loss: 0.002263
Classic TCN - Epoch [50/100], Loss: 0.002272
Classic TCN - Epoch [60/100], Loss: 0.002069
Classic TCN - Epoch [70/100], Loss: 0.002209
Classic TCN - Epoch [80/100], Loss: 0.001989
Classic TCN - Epoch [90/100], Loss: 0.001906
Classic TCN - Epoch [100/100], Loss: 0.001980


In [35]:
# Specify optimal hybrid models based on cross-validation and train it on train+val set
model_hybrid_final = Trainer(
        model_type="additive",
        ar_order=4,
        ma_order=3,
        num_channels=[128, 128, 128],
        kernel_size=[3, 3, 3],
        dilations=[1, 2, 4],
        num_epochs=10,
        lr=0.0001
    ).fit(train_val_combined).get_model()

model_hybrid_darts = SKLearnModel(
    model=DartsBridge(model_hybrid_final),
    lags=29
)

model_hybrid_darts.fit(train_series)


for time in forecast_horizon:
    forecast_series = model.historical_forecasts(
            series=return_series,
            start=val_end,
            forecast_horizon=time,
            stride=1,
            retrain=False,
            overlap_end=True,
            last_points_only=True
        )

        # Compute metrics against the actual test series
    results_mse_df.loc[f"ARMA+TCN", f"{time}-step MSE"] = MSE(test_series_aligned, forecast_series, intersect=True)
    results_mae_df.loc[f"ARMA+TCN", f"{time}-step MAE"] = MAE(test_series_aligned, forecast_series, intersect=True)

Additive Hybrid ARMA(4,3) + TCN - Epoch [10/10], Loss: 0.002223


In [36]:
results_mse_df

Unnamed: 0,1-step MSE,2-step MSE,3-step MSE,5-step MSE,10-step MSE,30-step MSE,50-step MSE,100-step MSE,150-step MSE,200-step MSE
AR(1),0.000376,0.000378,0.000379,0.000382,0.000384,0.000399,0.000416,0.000369,0.000227,0.000155
AR(5),0.000388,0.000389,0.000384,0.000384,0.000384,0.000399,0.000416,0.000369,0.000227,0.000155
Classic TCN,0.000365,0.000366,0.000369,0.000378,0.000382,0.000397,0.000415,0.000368,0.000225,0.000156
ARMA+TCN,0.000388,0.000389,0.000384,0.000384,0.000384,0.000399,0.000416,0.000369,0.000227,0.000155


In [37]:
results_mae_df

Unnamed: 0,1-step MAE,2-step MAE,3-step MAE,5-step MAE,10-step MAE,30-step MAE,50-step MAE,100-step MAE,150-step MAE,200-step MAE
AR(1),0.014347,0.014346,0.014383,0.014421,0.014411,0.014606,0.014875,0.013947,0.011883,0.010151
AR(5),0.01454,0.014531,0.014472,0.014503,0.01442,0.014605,0.014875,0.013947,0.011883,0.010152
Classic TCN,0.014226,0.014236,0.014281,0.014374,0.014417,0.014634,0.014908,0.013995,0.01196,0.010308
ARMA+TCN,0.01454,0.014531,0.014472,0.014503,0.01442,0.014605,0.014875,0.013947,0.011883,0.010152
