In [None]:
!pip install networkx
!pip install optuna

In [None]:
import numpy as np
import matplotlib.pyplot as plt

import networkx as nx

import pandas as pd

import copy
import random

import networkx as nx
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader
from torch_geometric.nn import GCNConv
from torch_geometric.data import Batch, Data, Dataset

from torch.optim.lr_scheduler import ReduceLROnPlateau
from torch.optim.lr_scheduler import LambdaLR

from sklearn.preprocessing import MinMaxScaler
import time

import math

In [None]:
import os
os.environ['KMP_DUPLICATE_LIB_OK']='True'

os.environ["CUDA_VISIBLE_DEVICES"] = "0"
!python --version

device = torch.device("cuda:0") if torch.cuda.is_available() else torch.device("cpu")
print(device)

if torch.cuda.is_available():
    print("GPU is available.")
else:
    print("No GPU detected.")

os.environ["CUDA_LAUNCH_BLOCKING"] = "1"

# Data Prep

In [None]:
files = ["donken",
         "Holmgrens",
         "IONITY",
         "Jureskogs_Vattenfall",
         "UFC"]

nr_of_data_points = 163618
splits=[0.8, 0.9]

data_dict = {}


for f in files:
    data = pd.read_csv('data_' + f + '_5T_k-10.csv')
    #data.info()
    data.set_index('Unnamed: 0', inplace=True)
    
    data = data.drop(columns=data.columns.difference(['Occupancy']))
    
    data_dict[f] = data

# Model Making

In [None]:
# Create a graph
G = nx.Graph()

# Add nodes
num_nodes = 5
G.add_nodes_from(range(num_nodes))

Jureskogs_Vattenfall = 0
IONITY = 1
donken = 2
Holmgrens = 3
UFC = 4

# Define edges to connect specific nodes with custom weights
edges_to_connect = [
    (Jureskogs_Vattenfall, IONITY, 230),
    (Jureskogs_Vattenfall, UFC, 750),
    (Jureskogs_Vattenfall, Holmgrens, 750),
    (Jureskogs_Vattenfall, donken, 650),
    (IONITY, UFC, 550),
    (IONITY, Holmgrens, 500),
    (IONITY, donken, 450),
    (UFC, Holmgrens, 280),
    (UFC, donken, 550),
    (Holmgrens, donken, 550)
]

# Add edges with custom weights
for edge in edges_to_connect:
    G.add_edge(edge[0], edge[1], weight=edge[2])

# Create adjacency matrix with weights
adj_matrix = nx.adjacency_matrix(G).todense()

torch_adj_matrix = torch.Tensor(adj_matrix)

edge_index = torch_adj_matrix.nonzero(as_tuple=False).t().contiguous()
edge_attr = torch_adj_matrix[torch_adj_matrix.nonzero()].reshape(-1, 1)


# Define colors for nodes
node_colors = ['Yellow'] * num_nodes

print(G.nodes)

# Draw the graph
pos = nx.spring_layout(G)  # positions for all nodes
nx.draw(G, pos, with_labels=True, node_color=node_colors, node_size=700, font_size=10)
edge_labels = nx.get_edge_attributes(G, 'weight')
nx.draw_networkx_edge_labels(G, pos, edge_labels=edge_labels)

# Show the graph
plt.show()


In [None]:
four_hours = 48
three_hours = 36
two_hours = 24
one_hour = 12
half_hour = 6

future_steps = four_hours
seq_len = 576
batch_size = 64


indices = list(range(len(data) - future_steps - seq_len))
random.shuffle(indices)
print(len(indices))

train_i = indices[:int(len(indices)*0.8)] 
val_i = indices[int(len(indices)*0.8):int(len(indices)*0.9)]
test_i = indices[int(len(indices)*0.9):]

class datasetMaker(Dataset):
    def __init__(self, station_data, indices_conversion, edge_index, edge_attr, seq_len, future_steps, batch_size):
        self.station_data = station_data
        self.indices_conversion = indices_conversion
        self.size = station_data["donken"].shape[0]
        self.edge_index = edge_index
        self.edge_attr = edge_attr
        self.seq_len = seq_len
        self.future_steps = future_steps
        self.batch_size = batch_size

    def __len__(self):
        return len(self.indices_conversion) - self.seq_len - self.future_steps

    def __getitem__(self, index):
        
        index = self.indices_conversion[index]
        
        seq_end = index + self.seq_len
        fut_end = index + self.seq_len + self.future_steps
        
        node_features = []
        for i, (station, data) in enumerate(self.station_data.items()):
            node_feature = data.iloc[index:seq_end].values
            node_features.append(node_feature)
        node_features = torch.tensor(np.array(node_features)).float()

        labels = []
        for i, (station, data) in enumerate(self.station_data.items()):
            label = data.iloc[seq_end:fut_end].values
            labels.append(label)
        labels = torch.unsqueeze(torch.tensor(np.array(labels)), dim=2).float()
        
        Gdata = Data(x=node_features, y=labels, edge_index=self.edge_index, edge_attr=self.edge_attr)

        return Gdata, labels

    
def custom_collate(batch):
    label = torch.cat([i[1] for i in batch])
    
    label = label.squeeze(3)
    
    batch = Batch.from_data_list([b[0] for b in batch])

    
    return batch, label

train_dataset = datasetMaker(data_dict, train_i, edge_index, edge_attr, seq_len, future_steps, batch_size)
train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True, drop_last=True, collate_fn=custom_collate)

val_dataset = datasetMaker(data_dict, val_i, edge_index, edge_attr, seq_len, future_steps, batch_size)
val_loader = DataLoader(dataset=val_dataset, batch_size=batch_size, shuffle=True, drop_last=True, collate_fn=custom_collate)

test_dataset = datasetMaker(data_dict, test_i, edge_index, edge_attr, seq_len, future_steps, batch_size)
test_loader = DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=True, drop_last=True, collate_fn=custom_collate)


print("train len ", len(train_loader))
print("val len   ", len(val_loader))
print("test len  ", len(test_loader))

for data, label in train_loader:
    print(data)
    print(label.shape)
    break

In [None]:

def reshape_to_batches(x, batch_description):
    """
        Does something like this:
        torch.Size([28, 576, 64]) --> torch.Size([4, 7, 576, 64])
    """
    num_splits = batch_description.max().item() + 1
    new_shape_dim_0 = num_splits
    new_shape_dim_1 = x.size(0) // new_shape_dim_0
    new_shape = torch.Size([new_shape_dim_0, new_shape_dim_1] + list(x.size()[1:]))
    reshaped_tensor = x.view(new_shape)
    return reshaped_tensor


class GCN(torch.nn.Module):
    def __init__(self, in_channels=1, gcn_hidden_channels=8, gcn_layers=1):
        super(GCN, self).__init__()
        self.in_conv = GCNConv(in_channels, gcn_hidden_channels)
        self.hidden_convs = [GCNConv(gcn_hidden_channels, gcn_hidden_channels).cuda() for i in range(gcn_layers - 1)]

    def forward(self, x, edge_index, batch):
        x = x.float()
        x = self.in_conv(x, edge_index)
        for conv in self.hidden_convs:
            x = F.relu(x)
            x = conv(x, edge_index)
        x = F.relu(x)
        return x

class SimpleTransformer(nn.Module):
    def __init__(self, input_size, hidden_layer_size, output_size, nhead, seq_length, num_layers=1, dropout=0.1):
        super(SimpleTransformer, self).__init__()

        self.seq_length = seq_length
        self.output_size = output_size
        self.hidden_layer_size = hidden_layer_size
        
        self.embeddingIn = nn.Linear(input_size, hidden_layer_size)
        self.embeddingTGT = nn.Linear(output_size, hidden_layer_size)
        
        self.PositionalEncoding = PositionalEncoding(max_len=1000, d_model=hidden_layer_size)
        
        encoder_layers = nn.TransformerEncoderLayer(d_model=hidden_layer_size, nhead=nhead, 
                                                    dim_feedforward=4*hidden_layer_size, dropout=dropout, 
                                                    activation='gelu')
        self.transformer_encoder = nn.TransformerEncoder(encoder_layer=encoder_layers, num_layers=num_layers)
        # tr
        decoder_layers = nn.TransformerDecoderLayer(d_model=hidden_layer_size, nhead=nhead,
                                                    dim_feedforward=4*hidden_layer_size, dropout=dropout, 
                                                    activation='gelu')
        self.transformer_decoder = nn.TransformerDecoder(decoder_layer=decoder_layers, num_layers=num_layers)

        self.linear1 = nn.Linear(hidden_layer_size, output_size)
                
    def generate_square_subsequent_mask(self, sz):
        mask = torch.triu(torch.ones(sz, sz) * float('-inf'), diagonal=1)
        return mask
        
    def forward(self, x, tgt=None, last_value=None, inference=False):
        last_value = torch.unsqueeze(last_value, dim=2)

        initial_tgt = last_value#x[:, -1:]
        #start_value = x[:, -1:]
        
        tgt_input = torch.cat([last_value, tgt[:, :-1]], dim=1)
        
        x = self.embeddingIn(x)
        x = self.PositionalEncoding(x)
        enc_mask = self.generate_square_subsequent_mask(x.size(1)).to(tgt.device)
        x = x.permute(1, 0, 2)
        encoder_output = self.transformer_encoder(x, mask=enc_mask)
        encoder_output = encoder_output.permute(1, 0, 2)
        
        if inference:
            tgt_gen = initial_tgt
            #print(encoder_output.shape)
            #encoder_output = encoder_output.permute(1, 0, 2)
            #print(encoder_output.shape)
            #print(tgt_gen.shape)
            generated_sequence = torch.zeros((initial_tgt.size(0), self.seq_length, self.output_size), device=x.device)
            encoder_output = encoder_output.permute(1,0,2)

            for i in range(self.seq_length):
                #print(tgt_gen.shape)
                
                tgt_emb = self.embeddingTGT(tgt_gen)
                #print(tgt_emb.shape)
                
                tgt_emb = self.PositionalEncoding(tgt_emb)
                tgt_emb = tgt_emb.permute(1, 0, 2)
                #print(tgt_emb.shape)

                decoder_output = self.transformer_decoder(tgt_emb, encoder_output)

                output_step = self.linear1(decoder_output[-1, :, :])
                output_step = output_step.unsqueeze(1) 

                generated_sequence[:, i:i+1, :] = output_step

                tgt_gen = torch.cat((tgt_gen, output_step), dim=1)
                #start_value = torch.unsqueeze(x[:, -1:, 1], 1)

                if tgt_gen.size(1) > self.seq_length:
                    tgt_gen = tgt_gen[:, 1:, :]

            return generated_sequence

        else:
            tgt = self.embeddingTGT(tgt_input)
            tgt = self.PositionalEncoding(tgt)
            tgt = tgt.permute(1, 0, 2)

            tgt_mask = self.generate_square_subsequent_mask(tgt.size(0)).to(tgt.device)

            encoder_output = encoder_output.permute(1,0,2)
            
            decoder_output = self.transformer_decoder(tgt, encoder_output, tgt_mask=tgt_mask)
            #try dropout here
            output = self.linear1(decoder_output)

            return output.permute(1, 0, 2)
        
class PositionalEncoding(nn.Module):
    def __init__(self, d_model, max_len=5000):
        super(PositionalEncoding, self).__init__()
        pe = torch.zeros(max_len, d_model)
        position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-math.log(10000.0) / d_model))
        pe[:, 0::2] = torch.sin(position * div_term)
        pe[:, 1::2] = torch.cos(position * div_term)
        
        # Correct the shaping of pe to [1, max_len, d_model]
        pe = pe.unsqueeze(0)
        
        self.register_buffer('pe', pe)
        
    def forward(self, x):
        #print(x.shape)
        #print(self.pe[:, :x.size(1), :].shape)
        # Adjust slicing of pe to match the sequence length of x
        # pe is broadcasted correctly across the batch dimension
        return x + self.pe[:, :x.size(1), :]

class STGCN(nn.Module):
    
    def __init__(self, in_channels, gcn_layers, hidden_channels, transformer_hidden_size, transformer_num_layers, transformer_nhead, out_channels):
        super(STGCN, self).__init__()
        print("\033[100mhidden_channels:", hidden_channels,
              "   GCN hidden layers:", gcn_layers,
              "   transformer_hidden_size:", transformer_hidden_size,
              "   transformer_num_layers:", transformer_num_layers,
              "   transformer_nhead:", transformer_nhead, "\033[0m")

        self.GCN = GCN(in_channels=in_channels, gcn_hidden_channels=hidden_channels, gcn_layers=gcn_layers)

        self.transformer = SimpleTransformer(input_size = hidden_channels, hidden_layer_size=transformer_hidden_size,
                                             output_size=out_channels, seq_length=48, num_layers=transformer_num_layers,
                                             nhead=transformer_nhead).cuda()
        
    def forward(self, data, inference=False):    
        batch = data.batch
        label = data.y
        label = torch.squeeze(label, 2)
        
        data.x = data.x.float()  # Convert node features to Double
        data.edge_attr = data.edge_attr.float()  # Convert edge attributes to Double
        x, edge_index, edge_attr = data.x, data.edge_index, data.edge_attr
       
        # Spatial processing
        x = self.GCN(x, edge_index, edge_attr)

        x = reshape_to_batches(x, batch)
        last_value = reshape_to_batches(data.x[:,-1,:],batch)
        label = reshape_to_batches(label, batch)
                
        # Reshape and pass data through the model for each station
        predictions = []
       
        for station_data, station_label, station_last_value in zip(x.permute(1,0,2,3), label.permute(1,0,2,3), last_value.permute(1,0,2)):
            output = self.transformer(station_data, station_label, station_last_value, inference)
            predictions.append(output)

        # Concatenate predictions for all stations
        predictions = torch.stack(predictions, dim=1)
        return predictions

# Example usage:
# Define the adjacency matrix for spatial processing (A_spatial)
# Define the input size, number of layers, and number of heads for the temporal transformer


# Training

In [None]:

def train_epoch(epoch, optimizer, loss_function, model, train_loader):
    total_loss = 0
    model.train()
    for batch_idx, (data,label) in enumerate(train_loader):

        label = reshape_to_batches(label, data.batch)
        data = data.cuda()
        label = label.cuda().float()
                
        optimizer.zero_grad()
        
        predictions = model(data, inference=False)
                
        loss_value = loss_function(predictions,label)
        loss_value.backward()
        optimizer.step()

        total_loss += loss_value.item()
    return total_loss / len(train_loader)

def validate_epoch(epoch, loss, model, val_loader):
    total_loss = 0
    model.eval()

    with torch.no_grad():
        for batch_idx, (data, label) in enumerate(val_loader):

            label = reshape_to_batches(label, data.batch)
            data = data.cuda()
            label = label.cuda().float()
            
            predictions = model(data, inference=True)
            
            loss_value = loss(predictions, label)
            total_loss += loss_value.item()
    return total_loss / len(val_loader)
import time
import copy

def a_proper_training(num_epoch, model, optimizer, loss_function, train_loader, val_loader, scheduler, patience=200):
    best_epoch = None
    best_model = None
    best_loss = float('inf')
    train_losses = list()
    val_losses = list()
    lrs = list()

    # Early stopping variables
    patience_counter = 0  # to count the number of epochs without improvement
    stop_training = False

    print("Begin Training")

    for epoch in range(num_epoch):
        if stop_training:
            break

        start_time = time.time()  # Start time
        train_loss = train_epoch(epoch, optimizer, loss_function, model, train_loader)
        val_loss = validate_epoch(epoch, loss_function, model, val_loader)
        train_losses.append(train_loss)
        val_losses.append(val_loss)

        scheduler.step()
        lrs.append(optimizer.param_groups[0]['lr'])
        
        if val_loss < best_loss:
            best_loss = val_loss
            best_model = copy.deepcopy(model)
            best_epoch = epoch
            patience_counter = 0  # reset counter if there's an improvement
        else:
            patience_counter += 1  # increment counter if no improvement

        if patience_counter >= patience:
            print(f"Stopping early at epoch {epoch + 1}")
            stop_training = True

        end_time = time.time()
        elapsed_time = end_time - start_time
        print(f"Epoch {epoch + 1}/{num_epoch}: Train Loss = {train_loss} Val Loss = {val_loss} Elapsed_time = {elapsed_time // 60}mins")

    return (best_model, best_epoch, train_losses, val_losses, lrs)


In [None]:
if True:

    epochs = 100
    model = STGCN(in_channels=1, gcn_layers=2, hidden_channels=2, transformer_hidden_size=12,
                  transformer_num_layers=1, transformer_nhead=3, out_channels=1).cuda()

    optimizer = optim.Adam(model.parameters(), lr=0.005)
    criterion = nn.MSELoss()

    # Define the lambda function for scheduling with Noam-style learning rate decay
    def lr_lambda(current_step: int, d_model: int, warmup_steps: int) -> float:
        current_step+=1
        return (d_model ** (-0.5)) * min((current_step ** (-0.5)), current_step * (warmup_steps ** (-1.5)))

    warmup_steps = int(epochs * 0.1)
    d_model = 10
    scheduler = LambdaLR(optimizer, lr_lambda=lambda step: lr_lambda(step, d_model, warmup_steps))    

    # Now pass the scheduler to the training function
    best_model, best_epoch, train_losses, val_losses, lrs = a_proper_training(
        epochs, model, optimizer, criterion, train_loader, val_loader, scheduler
    )

    torch.save(best_model.state_dict(), "best_ST-GCN_Transformer_4hour.pth")

In [None]:

plt.plot(train_losses, label="train")
plt.plot(val_losses, label="val")
#plt.plot(lrs, label="learning rates")

plt.title("MSE Loss")
plt.legend()


In [None]:
import optuna
from optuna.trial import TrialState
import torch.optim as optim
import torch.nn as nn
from torch.optim.lr_scheduler import ReduceLROnPlateau
import copy
import time


NUM_EPOCHS = 250

def objective(trial):
    print("\033[41m-------------------------------------------------------------------------------------\033[0m")
    try:
        # Suggest hyperparameters with even values
        hidden_channels = trial.suggest_int('hidden_channels', 2, 14, step=2)
        gcn_layers = trial.suggest_int('gcn_layers', 1, 4)
        transformer_num_layers = trial.suggest_int('transformer_num_layers', 1, 6)
        transformer_nhead = trial.suggest_int('transformer_nhead', 1, 6)
        factor = trial.suggest_int('factor', 2, 12, step=2)
        transformer_hidden_size = transformer_nhead * factor
        learning_rate = trial.suggest_float('learning_rate', 1e-5, 1e-1, log=True) 


        model = STGCN(in_channels=1,
                      gcn_layers=gcn_layers,
                      hidden_channels=hidden_channels, 
                      transformer_hidden_size=transformer_hidden_size, 
                      transformer_num_layers=transformer_num_layers,
                      transformer_nhead=transformer_nhead,
                      out_channels=1).cuda()

        optimizer = optim.Adam(model.parameters(), lr=learning_rate)
        criterion = nn.MSELoss()

        # Define the lambda function for scheduling with Noam-style learning rate decay
        def lr_lambda(current_step: int, d_model: int, warmup_steps: int) -> float:
            current_step+=1
            return (d_model ** (-0.5)) * min((current_step ** (-0.5)), current_step * (warmup_steps ** (-1.5)))

        warmup_steps = NUM_EPOCHS // 3#int(NUM_EPOCHS * 0.3)
        d_model = transformer_hidden_size
        scheduler = LambdaLR(optimizer, lr_lambda=lambda step: lr_lambda(step, d_model, warmup_steps))    

        best_loss = float('inf')
        patience = 20  # Number of epochs to wait for improvement before stopping
        patience_counter = 0  # Counter for epochs without improvement
        best_model = None
        train_losses = list()
        val_losses = list()
        
        for epoch in range(NUM_EPOCHS):
            train_loss = train_epoch(epoch, optimizer, criterion, model, train_loader)
            val_loss = validate_epoch(epoch, criterion, model, val_loader)
            train_losses.append(train_loss)
            val_losses.append(val_loss)

            scheduler.step()

            if val_loss < best_loss:
                best_loss = val_loss
                best_model = copy.deepcopy(model)
                patience_counter = 0  # Reset counter if improvement is observed
            else:
                patience_counter += 1  # Increment counter if no improvement

            if patience_counter >= patience:
                print(f"\033[34mStopping early at epoch {epoch} due to no improvement in validation loss.\033[0m")
                break  # Exit the loop if the model hasn't improved for 'patience' epochs
        
        plt.plot(train_losses, label="train")
        plt.plot(val_losses, label="val")
        plt.title("MSE Loss, lr=" + str(learning_rate))
        plt.legend()
        plt.savefig(f'models/direct_connect_transformer/model_{hidden_channels}{gcn_layers}{transformer_num_layers}{transformer_nhead}{transformer_hidden_size}_{str(best_loss)[2:8]}.png')
        torch.save(best_model.state_dict(), f'models/direct_connect_transformer/model_{hidden_channels}{gcn_layers}{transformer_num_layers}{transformer_nhead}{transformer_hidden_size}_{str(best_loss)[2:8]}.pth')
                
        print()
        return best_loss
    except Exception as e:
        print(e)
        print()
        return float('inf')

# Optimize hyperparameters
study = optuna.create_study(direction='minimize')
study.optimize(objective, n_trials=100)  # Define the number of trials

print("Best trial:")
trial = study.best_trial

print(" Value: ", trial.value)
print(" Params: ")
for key, value in trial.params.items():
    print(f"    {key}: {value}")
