In [2]:
# GPU-Accelerated Forecasting: Baselines & MLPs
# Dataset: 6-variable series, sampled once per minute
# Task: Predict all 6 variables 4 hours (240 minutes) ahead

import numpy as np
import pandas as pd
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader, random_split
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import xgboost as xgb


In [3]:
# ========= Device =========
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")



Using device: cuda


In [4]:
import pandas as pd
import torch

# ========= Data Loading =========
df = pd.read_csv('Train_timeseries_filled.csv')
data = torch.tensor(df.values, dtype=torch.float32)

# ========= Move to device and crop =========
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
data = data.to(device)[:100_000]

# ========= Build one‐step forecast dataset =========
# X[t] → predict X[t+1]
X = data[:-6]   # (N-1, 6)
Y = data[-6:]    # (N-1, 6)

# ========= Train/test split (80/20) =========
M = X.size(0)
train_M = int(0.8 * M)
X_train, Y_train = X[:train_M], Y[:train_M]
X_test,  Y_test  = X[train_M:], Y[train_M:]

# ========= Train Normalization =========
train_mean = X_train.mean(dim=0, keepdim=True)  # (1,6)
train_std  = X_train.std(dim=0, keepdim=True)   # (1,6)
train_std[train_std == 0] = 1.0

X_train = (X_train - train_mean) / train_std
X_test  = (X_test  - train_mean) / train_std


In [5]:
num_features = X.size(1) # 6 variables * hist_len
num_targets = Y.size(1)   # 6 variables * horiz

In [6]:
num_features

6

In [7]:
# ========= Metrics =========
def compute_metrics(y_true, y_pred):
    yt = y_true.cpu().numpy()
    yp = y_pred.detach().cpu().numpy()
    return {
        'MSE': mean_squared_error(yt, yp),
        'MAE': mean_absolute_error(yt, yp),
        'R2' : r2_score(yt, yp)
    }

In [8]:
# ========= 1. Naïve Forecast =========
def predict_naive(X_batch):
    # last observed 6 values → repeat for horizon
    last = X_batch[:, -num_features:]                # (B,6)
    return last.repeat(1, num_targets).view(-1, num_targets)  # (B,6*4)

Y_pred_naive = predict_naive(X_test)

In [9]:
compute_metrics(Y_test, Y_pred_naive)

ValueError: Found input variables with inconsistent numbers of samples: [0, 119994]

In [None]:
# ========= 2. Moving Average =========
def predict_moving_avg(X_batch, window=240):
    # reshape to (B, hist_len, 6)
    X_batch = X_batch.view(-1, 240, num_features)  # (B,240,6)
    return X_batch.mean(dim=1).repeat(1, num_targets).view(-1, num_targets)  # (B,6*4)

Y_pred_ma = predict_moving_avg(X_test)

In [None]:
compute_metrics(Y_test, Y_pred_ma)

{'MSE': 1.0803263187408447,
 'MAE': 0.6927345991134644,
 'R2': 0.43251562118530273}

In [None]:
def predict_knn(X_train, Y_train, X_query, k=10, batch_size=1):
    preds = []
    for i in range(0, X_query.size(0), batch_size):
        batch = X_query[i:i+batch_size]
        dists = torch.cdist(batch, X_train)  # (Bq, Bt)
        idx = torch.topk(dists, k, largest=False).indices
        neigh = Y_train[idx]  # (Bq, k, D)
        preds.append(neigh.mean(dim=1))
    return torch.cat(preds, dim=0)

Y_pred_knn = predict_knn(X_train, Y_train, X_test, k=10)

In [None]:
compute_metrics(Y_test, Y_pred_knn)

{'MSE': 1.9872112274169922,
 'MAE': 0.9827815294265747,
 'R2': -0.056001272052526474}

In [None]:
# ========= 5. MLP (PyTorch) =========
class MLP(nn.Module):
    def __init__(self, in_dim, hidden=[128, 64], out_dim=num_features):
        super().__init__()
        layers = []
        dims = [in_dim] + hidden
        for i in range(len(hidden)):
            layers += [nn.Linear(dims[i], dims[i+1]), nn.ReLU()]
        layers.append(nn.Linear(dims[-1], out_dim))
        self.net = nn.Sequential(*layers)
    def forward(self, x): return self.net(x)



NameError: name 'nn' is not defined

In [None]:
mlp = MLP(in_dim=num_features).to(device)
opt = torch.optim.Adam(mlp.parameters(), lr=1e-3)
loss_fn = nn.MSELoss()

In [None]:
for epoch in range(100):
    mlp.train()
    opt.zero_grad()
    pred = mlp(X_train)
    loss = loss_fn(pred, Y_train)
    loss.backward()
    opt.step()
Y_pred_mlp = mlp(X_test)

In [None]:
compute_metrics(Y_test, Y_pred_mlp)

{'MSE': 1.1297732591629028,
 'MAE': 0.7185354232788086,
 'R2': 0.4065788984298706}

In [None]:
results = {
    'Naive' : compute_metrics(Y_test, Y_pred_naive),
    'MovAvg': compute_metrics(Y_test, Y_pred_ma),
    'KNN'   : compute_metrics(Y_test, Y_pred_knn),
    'MLP'   : compute_metrics(Y_test, Y_pred_mlp),
}

for name, mets in results.items():
    print(f"{name:8s} | MSE: {mets['MSE']:.4f} | MAE: {mets['MAE']:.4f} | R2: {mets['R2']:.4f}")

Naive    | MSE: 1.0510 | MAE: 0.6816 | R2: 0.4477
MovAvg   | MSE: 1.0803 | MAE: 0.6927 | R2: 0.4325
KNN      | MSE: 1.9872 | MAE: 0.9828 | R2: -0.0560
MLP      | MSE: 1.1298 | MAE: 0.7185 | R2: 0.4066
FFT-MLP  | MSE: 2.1759 | MAE: 1.1708 | R2: -0.1492
