🧠 Classical Time Series Models
Great for small datasets or explainability.

1. ARIMA / SARIMA
Univariate

Good for individual customer time series

Doesn't handle multivariate inputs

2. Exponential Smoothing (Holt-Winters)
Good for capturing trends/seasonality

Works best on individual customer series

🧠 Machine Learning Models
Good if you extract features (lags, trends, rolling stats, etc.)

4. Random Forest / Gradient Boosting (e.g., XGBoost, LightGBM)
Treat it like a regression problem

Need feature engineering (lagged values, time of day/week, etc.)

Good for tabular-style learning

5. Linear Regression (with engineered features)
Works better than you'd think if features are well-designed

🧠 Deep Learning Models
Best when lots of data and long-term dependencies matter.

6. LSTM / GRU (Recurrent Neural Networks)
Sequence models, good for capturing temporal dependencies

Can be univariate or multivariate

PyTorch and TensorFlow support these

7. Temporal Convolutional Networks (TCN)
Alternative to RNNs

Often train faster and are easier to tune

8. Transformer-based Models
Powerful for long-range dependencies

Libraries: PyTorch Forecasting, HuggingFace Timeseries Transformers

In [None]:
import pandas as pd
import numpy as np
import torch
from torch import nn
from torch.utils.data import DataLoader, Dataset
from sklearn.preprocessing import MinMaxScaler
from tqdm import tqdm

# ------------------------------
# 1. Load and prepare the data
# ------------------------------

df = pd.read_csv("data/datasets2025/historical_metering_data_IT.csv", parse_dates=["DATETIME"])
df = df.sort_values("DATETIME")

# Create a new DataFrame where no columns have any NaN values
df = df.dropna(axis=1, how='any')
# print(df[target_col].isna().sum())
# for col in df_no_nans.columns:
#     if df_no_nans[col].isna().sum() > 0:
#         print(f"Column {col} has {df_no_nans[col].isna().sum()} missing values")

# Choose ONE customer for now (e.g., customerES_1)
target_col = "VALUEMWHMETERINGDATA_customerIT_6"
series = df[["DATETIME", target_col]].dropna()

scaler = MinMaxScaler(feature_range=(0, 1))
# Normalize target values
series["value"] = scaler.fit_transform(series[[target_col]])

# Split Data (Train/Test)
# Define test size as the last month's data
test_size = 30 * 24  # 30 days * 24 hours = 720 hours

# Split dataset
train_data = series[:-test_size]  # All data except the last month's data
test_data = series[-test_size:]  # Last month's data

# ------------------------------
# 2. Create Dataset
# ------------------------------

class TimeSeriesDataset(Dataset):
    def __init__(self, series, seq_len=24):
        self.seq_len = seq_len
        self.data = series["value"].values

    def __len__(self):
        return len(self.data) - self.seq_len

    def __getitem__(self, idx):
        x = self.data[idx:idx + self.seq_len] # these are the input features
        y = self.data[idx + self.seq_len] # this is the target value you want to predict
        return torch.tensor(x, dtype=torch.float32), torch.tensor(y, dtype=torch.float32)

seq_len = 24  # e.g., using 24 hours of history to predict the next hour
dataset = TimeSeriesDataset(series, seq_len=seq_len)
train_loader = DataLoader(dataset, batch_size=32, shuffle=True)


# ------------------------------
# 3. Define LSTM Model
# ------------------------------

class LSTMForecast(nn.Module):
    def __init__(self, input_dim=1, hidden_dim=64, num_layers=2):
        super(LSTMForecast, self).__init__()
        self.lstm = nn.LSTM(input_dim, hidden_dim, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_dim, 1)

    def forward(self, x):
        x = x.unsqueeze(-1)  # (batch, seq_len, 1)
        out, _ = self.lstm(x)
        out = self.fc(out[:, -1, :])  # take last time step
        return out.squeeze()

model = LSTMForecast()
loss_fn = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

# ------------------------------
# 4. Training Loop
# ------------------------------

# Create a tqdm instance
progress_bar = tqdm(range(10), desc="Training Epochs")

for epoch in progress_bar:
    model.train()
    total_loss = 0
    
    for x, y in train_loader:
        optimizer.zero_grad()
        output = model(x)
        loss = loss_fn(output, y)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
        
    # Calculate average loss for this epoch
    avg_loss = total_loss / len(train_loader)
    
    # Update the tqdm bar with the loss value
    progress_bar.set_postfix(loss=f"{avg_loss:.5f}")


# ------------------------------
# 5. Evaluation
# ------------------------------

# Use the last available sequence to predict the next value
last_seq = torch.tensor(train_data["value"].values[-seq_len:], dtype=torch.float32)
model.eval()
with torch.no_grad():
    pred = model(last_seq.unsqueeze(0))  # Make the prediction
    pred_rescaled = scaler.inverse_transform(pred.unsqueeze(0).reshape(-1, 1))  # Reshape to 2D
    print(f"Predicted next value: {pred_rescaled[0][0]}")

# Get the first actual value from the test set (first hour of the last month)
first_actual_value = test_data[target_col].iloc[0]

# Print the predicted vs actual value
print(f"Predicted first hour of the last month: {pred_rescaled[0][0]:.5f}")
print(f"Actual first hour of the last month: {first_actual_value:.5f}")

Training Epochs: 100%|██████████| 10/10 [00:26<00:00,  2.62s/it, loss=0.00226]

Predicted first hour of the last month: 0.29612
Actual first hour of the last month: 0.18406
Predicted next value: 0.17338642830580475
Predicted first hour of the last month: 0.17339
Actual first hour of the last month: 0.18406





In [None]:
### MULTISTEP FORECASTING

import pandas as pd
import numpy as np
import torch
from torch import nn
from torch.utils.data import DataLoader, Dataset
from sklearn.preprocessing import MinMaxScaler

# ------------------------------
# 1. Load and prepare the data
# ------------------------------

df = pd.read_csv("your_data.csv", parse_dates=["DATETIME"])
df = df.sort_values("DATETIME")

# Choose ONE customer for now (e.g., customerES_1)
target_col = "VALUEMWHMETERINGDATA_customerES_1"
series = df[["DATETIME", target_col]].dropna()

# Normalize target values
scaler = MinMaxScaler()
series["value"] = scaler.fit_transform(series[[target_col]])

# ------------------------------
# 2. Create Dataset for multi-step forecasting
# ------------------------------

class TimeSeriesDataset(Dataset):
    def __init__(self, series, seq_len=24, forecast_horizon=12):
        self.seq_len = seq_len
        self.forecast_horizon = forecast_horizon
        self.data = series["value"].values

    def __len__(self):
        return len(self.data) - self.seq_len - self.forecast_horizon

    def __getitem__(self, idx):
        x = self.data[idx:idx + self.seq_len]
        y = self.data[idx + self.seq_len:idx + self.seq_len + self.forecast_horizon]
        return torch.tensor(x, dtype=torch.float32), torch.tensor(y, dtype=torch.float32)

seq_len = 24  # Using 24 hours of history to predict the next 12 hours
forecast_horizon = 12  # Number of steps to forecast ahead
dataset = TimeSeriesDataset(series, seq_len=seq_len, forecast_horizon=forecast_horizon)
train_loader = DataLoader(dataset, batch_size=32, shuffle=True)

# ------------------------------
# 3. Define LSTM Model for Multi-Step Forecasting
# ------------------------------

class LSTMForecast(nn.Module):
    def __init__(self, input_dim=1, hidden_dim=64, num_layers=2, forecast_horizon=12):
        super(LSTMForecast, self).__init__()
        self.lstm = nn.LSTM(input_dim, hidden_dim, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_dim, forecast_horizon)  # output forecast_horizon steps

    def forward(self, x):
        x = x.unsqueeze(-1)  # (batch, seq_len, 1)
        out, _ = self.lstm(x)
        out = self.fc(out[:, -1, :])  # take last time step and predict forecast_horizon steps
        return out

model = LSTMForecast(forecast_horizon=forecast_horizon)
loss_fn = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

# ------------------------------
# 4. Training Loop
# ------------------------------

for epoch in range(10):  # increase epochs for better performance
    model.train()
    total_loss = 0
    for x, y in train_loader:
        optimizer.zero_grad()
        output = model(x)
        loss = loss_fn(output, y)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    print(f"Epoch {epoch+1}, Loss: {total_loss/len(train_loader):.5f}")

# ------------------------------
# 5. Evaluation
# ------------------------------    

# Use the last available sequence to predict the next forecast_horizon steps
last_seq = torch.tensor(series["value"].values[-seq_len:], dtype=torch.float32).unsqueeze(0)

# Forecast using the trained model
model.eval()
with torch.no_grad():
    forecast = model(last_seq)
    forecast_rescaled = scaler.inverse_transform(forecast.squeeze(0).numpy().reshape(-1, 1))

    # Print or plot the forecast
    print(f"Predicted next {forecast_horizon} values: {forecast_rescaled.flatten()}")


import matplotlib.pyplot as plt

# Plot the last observed values and the predicted future values
plt.figure(figsize=(12, 6))
plt.plot(np.arange(len(series)), scaler.inverse_transform(series["value"].values.reshape(-1, 1)), label="Historical Data")
plt.plot(np.arange(len(series), len(series) + forecast_horizon), forecast_rescaled.flatten(), label="Forecast", color='red')
plt.legend()
plt.xlabel('Time')
plt.ylabel('Value')
plt.title('Multi-Step Forecast')
plt.show()


In [None]:
import torch
import torch.nn as nn

In [None]:
class LSTMModel(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, output_size):
        super(LSTMModel, self).__init__()
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        out, _ = self.lstm(x)  # out: [batch, seq_len, hidden]
        out = self.fc(out[:, -1, :])  # take last time step
        return out


In [None]:
def train(model, dataloader, criterion, optimizer, device, epochs=10):
    model.to(device)
    model.train()

    for epoch in range(epochs):
        running_loss = 0.0
        for inputs, targets in dataloader:
            inputs, targets = inputs.to(device), targets.to(device)

            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, targets)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()

        avg_loss = running_loss / len(dataloader)
        print(f"Epoch [{epoch+1}/{epochs}], Loss: {avg_loss:.4f}")


In [None]:
from torch.utils.data import Dataset, DataLoader
import numpy as np

class DummySequenceDataset(Dataset):
    def __init__(self, num_samples=1000, seq_len=20, input_size=1):
        self.x = torch.randn(num_samples, seq_len, input_size)
        self.y = torch.randn(num_samples, 1)  # regression target

    def __len__(self):
        return len(self.x)

    def __getitem__(self, idx):
        return self.x[idx], self.y[idx]


In [None]:
input_size = 1
hidden_size = 64
num_layers = 2
output_size = 1
batch_size = 32
epochs = 10
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

dataset = DummySequenceDataset()
dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)

model = LSTMModel(input_size, hidden_size, num_layers, output_size)
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

train(model, dataloader, criterion, optimizer, device, epochs)
