In [3]:
import numpy as np
import optuna
import pandas as pd

from IPython import get_ipython

import matplotlib.pyplot as plt

plt.style.use("seaborn")

from sklearn.metrics import mean_absolute_percentage_error

import torch
from torch import nn
import torch.optim as optim
from torch.utils.data import DataLoader


import os
import sys

sys.path.append("..")

from src.data.dataset import SequenceDataset
from src.optim.early_stopping import EarlyStopping

from src.models.lstm import LSTM
from src.model_selection.objective import LSTMObjective, split_train_test

## Load data

In [4]:
root_path = ""
if "google.colab" in str(get_ipython()):
    root_path = "/content/drive/MyDrive/data/"
else:
    root_path = "../data/"

In [None]:
data = pd.read_pickle(f"{root_path}preprocessed/load.pkl")

X, y = data["S_TOT"].to_frame(), data["S_TOT"]
X_train, y_train, X_val, y_val, X_test, y_test = split_train_test(X, y)


In [6]:
from torch.utils.tensorboard import SummaryWriter

## Manual search

In [7]:
seq_length_input = 96 * 3
seq_length_output = 96
batch_size = 128

training_data = SequenceDataset(
    X=X_train,
    y=y_train,
    seq_length_input=seq_length_input,
    seq_length_prediction=seq_length_output,
)
train_loader = DataLoader(training_data, batch_size=batch_size, shuffle=False)

val_data = SequenceDataset(
    X=X_val,
    y=y_val,
    seq_length_input=seq_length_input,
    seq_length_prediction=seq_length_output,
)
val_loader = DataLoader(val_data, batch_size=batch_size, shuffle=False)

test_data = SequenceDataset(
    X=X_test,
    y=y_test,
    seq_length_input=seq_length_input,
    seq_length_prediction=seq_length_output,
)
test_loader = DataLoader(test_data, batch_size=batch_size, shuffle=False)

In [None]:
epochs = 2048
lr = 3e-4
weight_decay = 0.001
dropout = 0
num_features = X_train.shape[1]
hidden_units = 64
num_layers = 1
seq_length = seq_length_input


#  use gpu if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# load model to device
model = LSTM(
    num_features=num_features,
    hidden_units=hidden_units,
    num_layers=num_layers,
    batch_size=batch_size,
    seq_length_input=seq_length_input,
    seq_length_output=seq_length_output,
    dropout=0,
).to(device)

# Generate the optimizers
optimizer = optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay)

# mean squared error
criterion = nn.MSELoss()

# scheduler for lr decay
# scheduler = StepLR(optimizer, step_size=1, gamma=0.1)

# keep track of val loss and do early stopping
early_stopping = EarlyStopping(patience=5)

train_history, test_history = [], []

writer = SummaryWriter(f"./models/runs/load/tensorboard")

for epoch in range(epochs):

    # perform training
    loss_in_epoch_train = 0

    model.train()

    for inputs, targets in train_loader:

        inputs = inputs.to(device)
        targets = targets.to(device)

        # reset the gradients back to zero PyTorch accumulates gradients on subsequent backward passes
        optimizer.zero_grad()

        outputs = model(inputs)

        train_loss = criterion(outputs, targets)

        # compute accumulated gradients
        train_loss.backward()

        # perform parameter update based on current gradients
        optimizer.step()

        # add the mini-batch training loss to epoch loss
        loss_in_epoch_train += train_loss.item()

        # Decay Learning Rate
        # scheduler.step()

    model.eval()

    loss_in_epoch_test = 0

    with torch.no_grad():
        for inputs, targets in val_loader:

            inputs = inputs.to(device)
            targets = targets.to(device)

            outputs = model(inputs)

            test_loss = criterion(outputs, targets)
            loss_in_epoch_test += test_loss.item()

    train_loss = loss_in_epoch_train / len(train_loader)
    test_loss = loss_in_epoch_test / len(val_loader)

    train_history.append(train_loss)
    test_history.append(test_loss)

    print(
        f"epoch : {epoch + 1}/{epochs}, loss (train) = {train_loss:.8f}, loss (test) = {test_loss:.8f}"
    )

    # return early if val loss doesn't decrease for several iterations
    early_stopping(test_loss)
    if early_stopping.early_stop:
        break

writer.add_graph(model, inputs)
writer.close()


# Save model for each trial
torch.save(
    model.state_dict(),
    f"../models/{model.__class__.__name__}.pth",
)


In [8]:
results = pd.DataFrame({"train_loss": train_history, "test_loss": test_history})
results.index.name = "epochs"

ax = results.plot(
    title=f"test and training loss of {model.__class__.__name__}", figsize=(16, 9)
)
ax.set_ylabel("MSE")

In [9]:
# make predictions with final model
predictions_all, targets_all = [], []

model.eval()

for inputs, targets in test_loader:
    inputs = inputs.to(device)
    prediction = model(inputs)
    predictions_all.append(prediction.detach().cpu().numpy().flatten())
    targets_all.append(targets.numpy().flatten())

predictions_all = np.concatenate(predictions_all)
targets_all = np.concatenate(targets_all)

results = pd.DataFrame(
    {"ground truth": targets_all, "predictions": predictions_all},
    index=X_test.index[seq_length_input - 1 :],
)
results.head(1000).plot(title="ground truth vs. prediction", figsize=(16, 9))

In [None]:
mape_model = (
    results.groupby(pd.Grouper(freq="M"))[["ground truth", "predictions"]].apply(
        lambda x: mean_absolute_percentage_error(x["ground truth"], x["predictions"])
    )
    * 100
)

mape_model.plot(ylabel="MAPE in %", title="MAPE in % over time", figsize=(16, 9))


## Bayesian search
Bayesian search build on top of [optuna](https://optuna.org/).

In [10]:
N_TRIALS = 64
name = "load_solar"

default_params = {
    "lr": 3e-4,
    "hidden_units": 64,
    "num_layers": 1,
    "batch_size": 128,
}

study = optuna.create_study(direction="minimize")
objective = LSTMObjective(X, y, name)

# use default params from manual trial
study.enqueue_trial(default_params)

study.optimize(objective, n_trials=N_TRIALS)

optimized_params = study.best_trial.params
print(f"params: {optimized_params}")
print(f"no: {study.best_trial.number}")

study_df = study.trials_dataframe()
study_df.to_csv(f"../docs/study_lstm_{name}_all_parameters.csv")

In [11]:
optuna.visualization.matplotlib.plot_optimization_history(study)
optuna.visualization.matplotlib.plot_slice(study)
optuna.visualization.matplotlib.plot_contour(
    study, ["lr", "hidden_units", "num_layers", "batch_size"]
)