In [None]:
# Imports

import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import plotly.express as px
import scipy as sp
# import contextily as cx

import torch
import pygsp
import optuna
import joblib
import gc
import argparse
import os
import matplotlib

from matplotlib.ticker import ScalarFormatter, StrMethodFormatter, FormatStrFormatter, FuncFormatter
from matplotlib.animation import FuncAnimation

from sklearn.preprocessing import MinMaxScaler, StandardScaler

from optuna.samplers import TPESampler
from torch.nn import Linear
from torch_geometric.nn.models import GraphUNet
from torch_geometric.nn import GCNConv, Sequential
from torch_geometric.data import Data
from torch_geometric.utils import to_networkx, grid
from torchvision import datasets, transforms

from pyprojroot import here
ROOT_DIR = str(here())

matplotlib.rcParams.update({'font.size': 20})
matplotlib.rcParams.update({'font.family': 'Times New Roman'})

# Function definitions
%run utils.ipynb

## Graph U-net

### Hyperparameter tuning

In [None]:
%run utils.ipynb

class Args(argparse.Namespace):
    n_epochs = [40,60,80,100]
    n_trials = 50
    learning_rate = [1e-3, 1e-2, 1e-1]
    penalty_rate = [1e-15, 1e-5, 1e-4, 1e-2, 1e-1]
    hidden_channels = [2, 3, 5]
    depth = [2, 3, 5]
    pool_ratios = [0.2, 0.3, 0.5]

    log_dir=ROOT_DIR + '/models/outputs/optuna_gunet/'

args = Args()


for graph_size in [5, 10, 25, 50, 100, 150, 200]:
    print(f'GRAPH SIZE: {graph_size}')

    def train_model(model, n_epochs, learning_rate, penalty_rate):

        torch.manual_seed(0)
        loss_function = torch.nn.MSELoss() 
        optimizer = torch.optim.Adam(model.parameters(),
                                    lr = learning_rate,
                                    weight_decay = penalty_rate)
        
        scaler = StandardScaler()
        auc = []

        for seed in range(20):

            print(f'seed:{seed}')
            np.random.seed(seed)

            G = generate_synthetic_graph(N=graph_size)
            edge_index = torch.tensor(np.array(np.nonzero(G.A.toarray())), dtype=torch.long)

            data, label = generate_data(G, size=20)

            data = scaler.fit_transform(data)
            label_vector = label.reshape((-1,), order='F')

            error = []
            
            for snap in range(data.shape[1]):

                # GRAPH UNET PART
                x = torch.Tensor(data[:,snap]).reshape(-1,1)

                model.reset_parameters()

                epochs = n_epochs
                outputs = []
                losses = []
                for epoch in range(epochs):
                            
                    reconstructed = model(x, edge_index)     # Output of Autoencoder
                    loss = loss_function(reconstructed, x)    # Calculating the loss function
                    
                    optimizer.zero_grad() # The gradients are set to zero,
                    loss.backward() # the gradient is computed and stored.
                    optimizer.step() # .step() performs parameter update

                    # Storing the losses in a list for plotting
                    losses.append(loss)
                    outputs.append((epoch, x, reconstructed))

                error_snap = np.abs(reconstructed.detach() - x).numpy().flatten()
                error.extend(error_snap)
                
            error = np.array(error).reshape((-1,))
            tpr, fpr, thr = roc_params(error, label_vector, interp=True)
            auc.append(compute_auc(tpr,fpr))

        return np.mean(auc)        
        
        
    def objective(trial):
        gc.collect()

        n_epochs = trial.suggest_categorical('n_epochs', args.n_epochs)
        learning_rate = trial.suggest_categorical('learning_rate', args.learning_rate)
        penalty_rate = trial.suggest_categorical('penalty_rate', args.penalty_rate)
        hidden_channels = trial.suggest_categorical('hidden_channels', args.hidden_channels)
        depth = trial.suggest_categorical('depth', args.depth)
        pool_ratios = trial.suggest_categorical('pool_ratios', args.pool_ratios)

        print(f"INFO: Trial number: {trial.number}")
        print(f"INFO: Learning rate: {learning_rate}")
        print(f"INFO: Penalty rate: {penalty_rate}")
        print(f"INFO: Hidden_channels: {hidden_channels}")
        print(f"INFO: Depth: {depth}")
        print(f"INFO: Pool ratios: {pool_ratios}")
        print(f"INFO: n_epochs: {n_epochs}")

        model = GraphUNet(1, hidden_channels, 1, depth, pool_ratios)

        return train_model(model, n_epochs, learning_rate, penalty_rate)


    if not os.path.exists(args.log_dir):
        os.makedirs(args.log_dir,exist_ok=True)

    study = optuna.create_study(sampler=TPESampler(),
                                direction='maximize',
                                pruner=optuna.pruners.MedianPruner(n_startup_trials=5, n_warmup_steps=24, interval_steps=6))

    log_file = args.log_dir + f'optimization_logs_{graph_size}.pkl'
    if os.path.isfile(log_file):
        study = joblib.load(log_file)

    study.optimize(objective, n_trials=args.n_trials, gc_after_trial=True)
    joblib.dump(study, log_file)

In [None]:
joblib.load("../models/outputs/optuna_gunet/optimization_logs.pkl").trials_dataframe().sort_values('value')