In [1]:
import sys
import csv
sys.path.append('..')
from BDDData import *
import numpy as np
import torch
import torch.utils.data as data

import networkx as nx
import matplotlib.pyplot as plt
from scipy.linalg import circulant
from product_graph import *
from utils import *

In [2]:
#Load dataframes
bdd_data = BDD_dataset("raw_data/BDDdata/")
#Add column with the timestep
bdd_data.add_timestep_id()
#Add flags for chaotic values
bdd_data.tag_chaotic(replace=True)
#Compute the mod for the nazelle and wind angles
bdd_data.angle_mod()

# ! Don't interpolate this data
# ! Missing values remain NaN
# #Interpolate the missing values
# bdd_data.interpolate_power()

#Values smaller than 0 are set to 0
bdd_data.cap_power_to_zero()
#Normalize Patv feature to [0,1]
bdd_data.normalize_power(min=0, max=1, method= "MinMaxScaler")
#Convert df to matrix form, where only Patv is included. Then split into train, validation and test
#The matrix contains the subset of the time series for ALL nodes, so an (TxN matrix)
train, val, test = bdd_data.split_df()

print(train.shape)

(134, 24624)


In [3]:
class CustomBDD_Dataset(data.Dataset):
    def __init__(self, dataset, observation_window=12, forecast_window=12, starting_turbine = 0,  ending_turbine=133):
        self.observation_window = observation_window
        self.forecast_window = forecast_window
        length = eval(f'len({dataset}[0])')#Retrieves length of dataset
        bdd_data.get_observation_forecasting_window(time_series_len=length, observation_steps=self.observation_window, forecast_steps=self.forecast_window)#Generates obs window
        self.window_of_interest =  bdd_data.sliding_indices[str(self.observation_window)+","+str(self.forecast_window)]#Retrieves windows
        self.starting_turbine = starting_turbine
        self.ending_turbine = ending_turbine  
        self.dataset = dataset

    def __len__(self):
        return len(self.window_of_interest)

    def __getitem__(self, idx):
        window = self.window_of_interest[idx]
        if self.dataset == "train":
            features = train[self.starting_turbine:self.ending_turbine+1,window[0]:window[1]]
            labels = train[self.starting_turbine:self.ending_turbine+1,window[1]:window[2]]
        elif self.dataset == "val":
            features = val[self.starting_turbine:self.ending_turbine+1,window[0]:window[1]]
            labels = val[self.starting_turbine:self.ending_turbine+1,window[1]:window[2]]
        elif self.dataset == "test":
            features = test[self.starting_turbine:self.ending_turbine+1,window[0]:window[1]]
            labels = test[self.starting_turbine:self.ending_turbine+1,window[1]:window[2]]
        else:
            raise NotImplementedError
        return torch.from_numpy(features).float(), torch.from_numpy(labels).float()
    
obs_window = 12
forecast_window = 12
batch_size = 1

train_dataset = CustomBDD_Dataset("train",observation_window=obs_window,forecast_window=forecast_window)
train_loader = data.DataLoader(train_dataset, shuffle=True, batch_size = batch_size)
val_dataset = CustomBDD_Dataset("val",observation_window=obs_window,forecast_window=forecast_window)
val_loader = data.DataLoader(val_dataset, shuffle=True, batch_size = batch_size)

In [4]:
x,y = next(iter(train_loader))
print(f"{x.shape=}\n{y.shape=}")

x.shape=torch.Size([1, 134, 12])
y.shape=torch.Size([1, 134, 12])


In [5]:
G = nx.read_gml('data/spatial_graph_2000.gml')
adj_mat = nx.adjacency_matrix(G)
adj_mat = nx.to_numpy_array(G)


In [6]:
S = normalize_adjacency(torch.tensor(adj_mat)).float()

In [7]:
print(adj_mat.shape)

(134, 134)


In [47]:
import torch
import torch.nn as nn
import torch.nn.functional as F

from gmm_code.gmm_gcnn import GMMGCNN

# TODO: Should we always take the first time step?
all_features = train[0, :].reshape(-1, 1)
all_A = S

In [54]:
import time

torch.autograd.set_detect_anomaly(True)

class MaskedMSELoss(nn.Module):
    def __init__(self):
        super(MaskedMSELoss, self).__init__()

    def forward(self, prediction, target):
        # Create a mask that is 1 for non-NaN entries and 0 for NaN entries
        mask = ~torch.isnan(target)
        # Apply the mask to only keep non-NaN elements
        out = prediction[mask]
        tar = target[mask]
        # Calculate MSE Loss on non-NaN elements
        return nn.functional.mse_loss(out, tar)

def train_epoch_gcnn(model, loader, optimizer, criterion):
    model.train()
    total_loss = 0
    counter = 0
    for x, y in loader:
        counter += 1
        print(f"{counter}/{len(loader)}")
        # Remove dimension of size 1.
        x = x[0, :, :]
        y = y[0, :, :]
        out = model(all_A, x)
        loss = criterion(out, y)
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()
        total_loss += loss.item()
    return total_loss / len(loader)


@torch.no_grad()
def evaluate_epoch_gcnn(model, loader, criterion):
    model.eval()
    total_loss = 0

    for x, y in loader:
        # x = sample[0].reshape(n_stations, obs_window)
        # y = sample[1].reshape(n_stations, -1)
        out = model(all_A, x)
        # print(out)
        loss = criterion(out, y)
        total_loss += loss.item()
    return total_loss / len(loader)


def train_gcnn(model, num_epochs, criterion, train_loader, test_loader):
    # TODO: Check loss function!
    optimizer = torch.optim.Adam(model.parameters(), lr=0.1, weight_decay=5e-4)

    start_time = time.time()
    train_losses = []
    val_losses = []

    for epoch in range(1, num_epochs + 1):
        # Model training
        train_loss = train_epoch_gcnn(model, train_loader, optimizer, criterion)
        print(f"loss: {train_loss}")

        # Model validation
        val_loss = evaluate_epoch_gcnn(model, test_loader, criterion)

        train_losses.append(train_loss)
        val_losses.append(val_loss)

        if epoch % 10 == 0:
            print(
                "epoch:",
                epoch,
                "\t training loss:",
                np.round(train_loss, 4),
                "\t validation loss:",
                np.round(val_loss, 4),
            )

    elapsed_time = time.time() - start_time
    print(f"Model training took {elapsed_time:.3f} seconds")

    return train_losses, val_losses

In [55]:
# train_gcnn(model, 1, torch.nn.MSELoss(), train_loader, val_loader)
n_components = 5
order = 1
model = GMMGCNN(obs_size=obs_window, pred_size=forecast_window, hid_sizes=[8], num_components=n_components, all_features=all_features, all_A=all_A, order=order)
train_gcnn(model, 1, MaskedMSELoss(), train_loader, val_loader)

1/24600
2/24600
3/24600
4/24600
5/24600
6/24600
7/24600
8/24600
9/24600
10/24600
11/24600
12/24600
13/24600
14/24600
15/24600
16/24600
17/24600
18/24600
19/24600
20/24600
21/24600
22/24600
23/24600
24/24600
25/24600
26/24600
27/24600
28/24600
29/24600
30/24600
31/24600
32/24600
33/24600
34/24600
35/24600
36/24600
37/24600
38/24600
39/24600
40/24600
41/24600
42/24600
43/24600
44/24600
45/24600
46/24600
47/24600
48/24600
49/24600
50/24600
51/24600
52/24600
53/24600
54/24600
55/24600
56/24600
57/24600
58/24600
59/24600
60/24600
61/24600
62/24600
63/24600
64/24600
65/24600
66/24600
67/24600
68/24600
69/24600
70/24600
71/24600
72/24600
73/24600
74/24600
75/24600
76/24600
77/24600
78/24600
79/24600
80/24600
81/24600
82/24600
83/24600
84/24600
85/24600
86/24600
87/24600
88/24600
89/24600
90/24600
91/24600
92/24600
93/24600
94/24600
95/24600
96/24600
97/24600
98/24600
99/24600
100/24600
101/24600
102/24600
103/24600
104/24600
105/24600
106/24600
107/24600
108/24600
109/24600
110/24600
111/2460

IndexError: too many indices for tensor of dimension 2