In [None]:
import numpy as np
import pandas as pd
import plotly.express as px
import plotly.io as pio
import contextily as cx
import plotly.graph_objects as go
import geopandas as gpd
import os
import matplotlib
import subprocess
import torch
import joblib
import glob

from shapely.geometry import MultiPoint
from sklearn.cluster import KMeans
from tsmoothie import LowessSmoother, ExponentialSmoother
from pyprojroot import here
from scipy.spatial import ConvexHull
from torch.utils.data import DataLoader, TensorDataset

import source.nn.models as models
import source.utils.utils as utils
import source.utils.fault_detection as fd

from source.utils.utils import roc_params, compute_auc, get_auc, best_mcc, best_f1score, otsuThresholding
from source.utils.utils import synthetic_timeseries
from source.utils.utils import plotly_signal

from importlib import reload
models = reload(models)
utils = reload(utils)
fd = reload(fd)

from pyprojroot import here
root_dir = str(here())

insar_dir = os.path.expanduser('~/data/raw/')
data_path = root_dir + '/data/interim/'
dataset_path = root_dir + "/data/datasets/pixel_detection/"

matplotlib.rcParams.update({'font.size': 20})
matplotlib.rcParams.update({'font.family': 'DejaVu Serif'})

pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', None)

In [None]:
def pixel_mse(output,X):
    point_mse = torch.nn.MSELoss(reduction='none')
    return torch.mean(point_mse(output,X), axis=1)


device = 'cuda:2'
def train_model(model, X, label, lr, G=None):

    rng_seed = 0
    torch.manual_seed(rng_seed)
    torch.cuda.manual_seed(rng_seed)
    np.random.seed(rng_seed)

    loss_epoch = []
    auc_epoch = []
    scores_epoch = []

    if G is not None:
        A = torch.tensor(G.W.toarray()).float()
        A = A.to(device)    

    optimizer = torch.optim.Adam(model.parameters(), lr=lr)
    criterion = torch.nn.MSELoss()
    def pixel_mse(output,X):
        point_mse = torch.nn.MSELoss(reduction='none')
        return torch.mean(point_mse(output,X), axis=1)

    model.train()
    model.reset_parameters()

    # for epoch in range(1, 1+np.max(epochs_list)):
    for epoch in range(1,1000):

        optimizer.zero_grad()
        output = model(X)
        loss = criterion(output, X)
        loss.backward()
        optimizer.step()

        

        if epoch in np.ceil(np.geomspace(1,1000,10)):


            loss_epoch.append(loss.item())

            scores = pixel_mse(output, X).detach().cpu().numpy()
            # scores_epoch.append(scores)

            auc = get_auc(scores, label, resolution=101).round(3)
            auc_epoch.append(auc)


        # if epoch in epochs_list:
        #     S_partials.append(S)...

    return auc_epoch, loss_epoch

def evaluate_model(model, datasets, lr):

    auc_epoch_list = []
    loss_epoch_list = []

    it = 0
    for dataset in datasets[:5]:

        print(f'Evaluating dataset {it}', flush=True)
        it+=1

        data = dataset['data']
        label = dataset['label'].max(axis=1) #label per pixel
        
        X = torch.tensor(data).float().to(device)

        auc, loss = train_model(model, X, label, lr)
        auc_epoch_list.append(auc)
        loss_epoch_list.append(loss)

        # auc_list.append(get_auc(scores, label).round(3))
        # f1_list.append(best_f1score(scores, label).round(3))
        # mcc_list.append(best_mcc(scores, label).round(3))

    return np.mean(auc_epoch_list, axis=0).round(3), np.mean(loss_epoch_list, axis=0).round(3)

-----------------

In [None]:
li = [0,1,2,3,4,5,6,7,8,9]

In [None]:
sorted(li)[::-1][:3]

In [None]:
device = 'cuda'
dataset_path = root_dir + "/data/datasets/"
datafile = 'pixel_detection/Oslo/training/dataset.pt'
datasets = torch.load(dataset_path + datafile)

dataset = datasets[0]
n_sensors = datasets[0]['data'].shape[0]
input_dim = datasets[0]['data'].shape[1]


pkl_files = glob.glob(root_dir + '/outputs/HP_training/Geological_anomaly/*.pkl')

# Iterate through each file, load the study, and create the model
models_dict = {}  # Dictionary to store the models

# Collect data for all trials

for file in pkl_files:
    study = joblib.load(file)
    model_name = os.path.basename(file)[3:-4]

    print(model_name)

    trial_values = []
    num_params_list = []

    for trial in study.trials:
        best_params = trial.params
        if 'RAE' in model_name:
            model_params = {
                'n_features': n_sensors,
                'latent_dim': best_params['latent_dim'],
                'rnn_type': model_name[4:], 
                'rnn_act': 'relu',
                'device': device
            }
            model_class = getattr(models, 'RAE')
            model = model_class(**model_params)
            model = model.to(device)

        elif model_name == 'GUNet':
            model_params = {
                'in_channels': input_dim,
                'out_channels': input_dim,
                'hidden_channels': best_params['hidden_channels'],
                'depth': best_params['depth'],
                'pool_ratios': best_params['pool_ratios']
            }
            model_class = getattr(models, model_name)
            model = model_class(**model_params)

        else:
            layer_dims = [input_dim]
            current_dim = input_dim * 2
            for i in range(best_params['n_layers']):
                next_dim = best_params[f'layer_dim_{i}']
                layer_dims.append(next_dim)
                current_dim = next_dim

            model_params = {'layer_dims': layer_dims}
            model_class = getattr(models, model_name)
            model = model_class(**model_params)

        num_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
        trial_values.append(trial.value)
        num_params_list.append(num_params)

    fig = px.scatter(x=num_params_list, y=trial_values, labels={'x': 'Number of Parameters', 'y': 'Trial Value'}, title='Trial Value vs Number of Parameters')
    fig.show()

In [None]:
device = 'cuda'
dataset_path = root_dir + "/data/datasets/"
datafile = 'pixel_detection/Oslo/training/dataset.pt'
datasets = torch.load(dataset_path + datafile)

dataset = datasets[0]
n_sensors = datasets[0]['data'].shape[0]
input_dim = datasets[0]['data'].shape[1]


pkl_files = glob.glob(root_dir + '/outputs/HP_training/Geological_anomaly/*.pkl')

# Iterate through each file, load the study, and create the model
models_dict = {}  # Dictionary to store the models

print(f"{'Model':<15} {'AUC':<10} {'Parameters':<10}")


for file in pkl_files:
    study = joblib.load(file)
    model_name = os.path.basename(file)[3:-4]

    # Get the 10 best trials
    best_trials = sorted(study.trials, key=lambda trial: trial.value)[::-1][:10]

    for trial in best_trials:
        best_params = trial.params
        # Initialize the model based on the best parameters
        if 'RAE' in model_name:
            model_params = {
                'n_features': n_sensors,
                'latent_dim': best_params['latent_dim'],
                'rnn_type': model_name[4:], 
                'rnn_act': 'relu',
                'device': device
            }
            model_class = getattr(models, 'RAE')
            model = model_class(**model_params)
            model = model.to(device)

        elif model_name == 'GUNet':
            model_params = {
                'in_channels': input_dim,
                'out_channels': input_dim,
                'hidden_channels': best_params['hidden_channels'],
                'depth': best_params['depth'],
                'pool_ratios': best_params['pool_ratios']
            }
            model_class = getattr(models, model_name)
            model = model_class(**model_params)

        else:
            # For other models
            layer_dims = [input_dim]
            current_dim = input_dim * 2
            for i in range(best_params['n_layers']):
                next_dim = best_params[f'layer_dim_{i}']
                layer_dims.append(next_dim)
                current_dim = next_dim

            model_params = {'layer_dims': layer_dims}
            model_class = getattr(models, model_name)
            model = model_class(**model_params)

        models_dict[f"{model_name}_trial_{trial.number}"] = model

        num_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
        print(f"{model_name:<15} {trial.value:<10.3f} {num_params:<10}")

In [None]:
device = 'cuda'
dataset_path = root_dir + "/data/datasets/"
datafile = 'pixel_detection/Oslo/training/dataset.pt'
datasets = torch.load(dataset_path + datafile)

dataset = datasets[0]
n_sensors = datasets[0]['data'].shape[0]
input_dim = datasets[0]['data'].shape[1]


pkl_files = glob.glob(root_dir + '/outputs/HP_training/Geological_anomaly/*.pkl')

# Iterate through each file, load the study, and create the model
models_dict = {}  # Dictionary to store the models

print(f"{'Model':<15} {'AUC':<10} {'Parameters':<10}")


for file in pkl_files:

    study = joblib.load(file)
    model_name = os.path.basename(file)[3:-4]

    best_params = study.best_trial.params
    # Initialize the model based on the best parameters
    if 'RAE' in model_name:
        model_params = {
            'n_features': n_sensors,
            'latent_dim': best_params['latent_dim'],
            'rnn_type': model_name[4:], 
            'rnn_act': 'relu',
            'device': device  # Assuming this is predefined
        }
        model_class = getattr(models, 'RAE')
        model = model_class(**model_params)
        model = model.to(device)

    elif model_name == 'GUNet':
        model_params = {
            'in_channels': input_dim,  # Assuming this is predefined
            'out_channels': input_dim,
            'hidden_channels': best_params['hidden_channels'],
            'depth': best_params['depth'],
            'pool_ratios': best_params['pool_ratios']
        }
        model_class = getattr(models, model_name)
        model = model_class(**model_params)

    else:
        # For other models
        layer_dims = [input_dim]  # Assuming this is predefined
        current_dim = input_dim * 2
        for i in range(best_params['n_layers']):
            next_dim = best_params[f'layer_dim_{i}']
            layer_dims.append(next_dim)
            current_dim = next_dim
        
        model_params = {'layer_dims': layer_dims}
        model_class = getattr(models, model_name)
        model = model_class(**model_params)

    models_dict[model_name] = model

    num_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
    print(f"{model_name:<15} {study.best_value:<10.3f} {num_params:<10}")
    # print(f"Model: {model_name} - {study.best_value} AUC - {num_params} parameters\n")   


In [None]:
# Get all trials and sort them by value in ascending order (assuming lower is better)
best_trials = sorted(study.trials, key=lambda trial: trial.value)[:10]

# Extract the corresponding values
best_values = [trial.value for trial in best_trials]
best_values

In [None]:
model

In [None]:
study.best_params

In [None]:
device = 'cuda'
dataset_path = root_dir + "/data/datasets/"
datafile = 'pixel_detection/Oslo/training/dataset.pt'
datasets = torch.load(dataset_path + datafile)

dataset = datasets[0]
input_dim = datasets[0]['data'].shape[1]

data = dataset['data']
label = dataset['label'].max(axis=1) #label per pixel

X = torch.tensor(data).float().to( device )


# Define the directory containing the Optuna study files
pkl_files = glob.glob(root_dir + '/outputs/HP_training/Geological_anomaly/*.pkl')

# Iterate through each file, load the study, and create the model
models_dict = {}  # Dictionary to store the models

for file in pkl_files:
    # Load the Optuna study
    study = joblib.load(file)
    
    # Get the best parameters and the model name
    best_params = study.best_trial.params
    model_name = os.path.basename(file)[3:-4]  # Extract model name from the filename
    
    # Initialize the model based on the best parameters
    if 'RAE' in model_name:
        model_params = {
            'n_features': 2,
            'latent_dim': best_params['latent_dim'],
            'rnn_type': model_name[4:], 
            'rnn_act': 'relu',
            'device': device  # Assuming this is predefined
        }
        model_class = getattr(models, model_name)
        model = model_class(**model_params)
        model = model.to(device)
    
    elif model_name == 'GUNet':
        model_params = {
            'in_channels': input_dim,  # Assuming this is predefined
            'out_channels': input_dim,
            'hidden_channels': best_params['hidden_channels'],
            'depth': best_params['depth'],
            'pool_ratios': best_params['pool_ratios']
        }
        model_class = getattr(models, model_name)
        model = model_class(**model_params)
    
    else:
        # For other models
        layer_dims = [input_dim]  # Assuming this is predefined
        current_dim = input_dim * 2
        for i in range(best_params['n_layers']):
            next_dim = best_params[f'layer_dim_{i}']
            layer_dims.append(next_dim)
            current_dim = next_dim
        
        model_params = {'layer_dims': layer_dims}
        model_class = getattr(models, model_name)
        model = model_class(**model_params)
    
    # Store the model in the dictionary
    models_dict[model_name] = model

# Print the created models
for model_name, model in models_dict.items():
    print(f"Model: {model_name}")
    print(model)

In [None]:
best_params

In [None]:
X2 = X.clone()

X2 = X2.view(-1, X.shape[1], 1)

dataset = TensorDataset(X2, X2)  # we want to reconstruct the same input
dataloader = DataLoader(dataset, batch_size=100, shuffle=True)

# Create an iterator
data_iter = iter(dataloader)

# Get the first batch
batch_X, batch_y = next(data_iter)

if model.n_features>1:
    batch_X2 = batch_X.T.unsqueeze(0)

print(batch_X.shape)
print(batch_X2.shape)


In [None]:
batch_X.unsqueeze(2).shape

In [None]:
batch_X.T.unsqueeze(0).shape

In [None]:
model_params = {'n_features': 2,
                'latent_dim': 4,
                'rnn_type': 'LSTM',
                'rnn_act': 'relu',
                'device': device}
batch_size = 512

model_class = getattr(models, 'RAE')
model = model_class(**model_params)
model = model.to(device)

In [None]:
relevant_params = ['n_features', 'latent_dim', 'rnn_type', 'rnn_act', 'device']
model_params = {key: getattr(model, key) for key in relevant_params if hasattr(model, key)}


In [None]:
asd = 'all'

In [None]:
asd != 1

In [None]:
n_features = 1
batch_size = 27
seq_len = 10

x = torch.tensor([])
for i in range(seq_len):
    x_i = i*torch.ones([batch_size, n_features])

    if x_i.dim() == 1:
        x = torch.cat([x, x_i.unsqueeze(0)], axis=1)
    else:
        x = torch.cat([x, x_i], axis=1)        

In [None]:
x.view(-1, seq_len, n_features).shape

In [None]:
X

In [None]:
new_params

In [None]:
new_params['n_features'] = 300

---------

In [None]:
study = joblib.load(root_dir+'/outputs/pixel_detection/HP_training/TR_AE.pkl')
datasets = torch.load(dataset_path + 'Oslo/training/dataset.pt')
input_dim = datasets[0]['data'].shape[1]

dataset = datasets[9]
data = dataset['data']
label = dataset['label'].max(axis=1) #label per pixel
X = torch.tensor(data).float().to(device)

px.imshow(dataset['label'], aspect='auto', width=600, title=f'Example: {label.sum():.3g} anomalous nodes').show()

In [None]:
dataset = datasets[9]
print(dataset['metadata'])
data = dataset['data']
label = dataset['label'].max(axis=1) #label per pixel
X = torch.tensor(data).float().to(device)

lr = study.best_params['lr']
n_epochs = study.best_params['n_epochs']
n_layers = study.best_params['n_layers']
layer_dims = [input_dim]
for i in range(n_layers):
    layer_dims.append(study.best_params[f'layer_dim_{i}'])

# dims = [177, 89, 49, 35, 17]
# layer_dims = [input_dim, *dims]
# lr = 0.000025	
# n_epochs = 261

model = models.AE(layer_dims)
model = model.to(device)

rng_seed = 0
torch.manual_seed(rng_seed)
torch.cuda.manual_seed(rng_seed)
np.random.seed(rng_seed)

optimizer = torch.optim.Adam(model.parameters(), lr=lr)
criterion = torch.nn.MSELoss()

model.train()
model.reset_parameters()

output_list = []

# for epoch in range(1, 1+np.max(epochs_list)):
for epoch in range(n_epochs):

    optimizer.zero_grad()
    output = model(X)
    loss = criterion(output, X)
    loss.backward()
    optimizer.step()

    output_list.append(output)

scores = pixel_mse(output_list[-1], X).detach().cpu().numpy()
auc = get_auc(scores, label, resolution=101).round(3)
auc

In [None]:
# Create DataFrames
df_X = pd.DataFrame(X.detach().cpu().numpy())
df_output = pd.DataFrame(output_list[-1].detach().cpu().numpy())

# Assign sensor IDs as index
df_X.index.name = "sensor_id"
df_output.index.name = "sensor_id"

# Melt to long format
df_X_long = df_X.reset_index().melt(id_vars=["sensor_id"], var_name="timestamp", value_name="X")
df_output_long = df_output.reset_index().melt(id_vars=["sensor_id"], var_name="timestamp", value_name="output")

# Merge both DataFrames
df_final = pd.merge(df_X_long, df_output_long, on=["sensor_id", "timestamp"])

# Convert timestamp to integer (assuming column names were originally numbers)
df_final["timestamp"] = df_final["timestamp"].astype(int)

print(f'{np.where(label)[0]}')
px.line(df_final, x='timestamp', y=['X','output'], animation_frame='sensor_id', width=1000, range_y=[-10,35]).show()


fig = px.line(y=[label*scores.max()*0.75, scores], width=1000, markers=True)  # Add markers
fig.update_traces(line=dict(width=0.5), marker={'size':5})  # Make line thin
fig.show()

px.line(df_final[df_final.sensor_id.isin(np.where(label)[0])], x='timestamp', y=['X','output'], animation_frame='sensor_id', width=1000, range_y=[-10,35]).show()

In [None]:
G = fd.NNGraph(pd.DataFrame(data=dataset['pos'], columns=['easting','northing']), radius=15)
utils.plotly_signal(G, X[:,-1].cpu().numpy(), width=500, height=300)
utils.plotly_signal(G, label, width=500, height=300)

In [None]:
model = models.GCNencoder([15,12,12])

In [None]:
possible_classes = [models.GCN2MLP, models.AE]

In [None]:
isinstance(model, tuple(possible_classes))

In [None]:
pygsp.graphs.NNGraph(dataset['pos'])

In [None]:
G = fd.NNGraph(pd.DataFrame(dataset['pos'], columns=['easting','northing']), radius=15)

In [None]:
G.plot()

In [None]:
from torch_geometric.utils import dense_to_sparse

In [None]:
X.device

In [None]:
next(model.parameters())

In [None]:
dataset['edge_weight'].to(device)

In [None]:
getattr(models, 'AE')([2, 2, 2, 2])