In [None]:
import numpy as np
import pandas as pd
import plotly.express as px
import plotly.io as pio
import contextily as cx
import plotly.graph_objects as go
import geopandas as gpd
import matplotlib.pyplot as plt
import os
import matplotlib
import subprocess
import torch
import joblib
import calflops
import time 

from sklearn.metrics import mean_squared_error, confusion_matrix, auc, f1_score, matthews_corrcoef
from calflops.flops_counter import calculate_flops
from shapely.geometry import MultiPoint
from sklearn.cluster import KMeans
from tsmoothie import LowessSmoother, ExponentialSmoother
from pyprojroot import here
from scipy.spatial import ConvexHull
from torch.utils.data import DataLoader, TensorDataset

import source.nn.models as models
import source.utils.utils as utils
import source.utils.fault_detection as fd

from source.utils.utils import roc_params, compute_auc, get_auc, best_mcc, best_f1score, otsuThresholding
from source.utils.utils import synthetic_timeseries
from source.utils.utils import plotly_signal

from importlib import reload
models = reload(models)
utils = reload(utils)
fd = reload(fd)

from pyprojroot import here
root_dir = str(here())

insar_dir = os.path.expanduser('~/data/raw/')
data_path = root_dir + '/data/interim/'
dataset_path = root_dir + "/data/datasets/"

matplotlib.rcParams.update({'font.size': 20})
matplotlib.rcParams.update({'font.family': 'DejaVu Serif'})

pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', None)

### TEST METRICS

In [None]:
# RUN Test_metrics.py

In [None]:
dataset_name = 'Geological_anomaly'
datasets = torch.load(dataset_path + f'{dataset_name}/Test/dataset.pt')
model_dict = torch.load(root_dir + f'/outputs/Testing/model_dict_testing_{dataset_name}.pkl')

test_metrics = torch.load(root_dir + f'/outputs/Testing/Test_metrics_{dataset_name}.pkl')

# Create a nicely formatted table showing test metrics for all models
metrics_df = pd.DataFrame()
for model in test_metrics.keys():
    metrics_df.loc[model, 'AUC'] = f"{test_metrics[model]['mean_auc']:.3f} ± {test_metrics[model]['std_auc']:.3f}"
    metrics_df.loc[model, 'Oslo'] = f"{test_metrics[model]['oslo_auc']:.3f}"
    metrics_df.loc[model, 'Malmo'] = f"{test_metrics[model]['malmo_auc']:.3f}"

    metrics_df.loc[model, 'F1'] = f"{test_metrics[model]['mean_f1']:.3f} ± {test_metrics[model]['std_f1']:.3f}"
    metrics_df.loc[model, 'MCC'] = f"{test_metrics[model]['mean_mcc']:.3f} ± {test_metrics[model]['std_mcc']:.3f}"

print(metrics_df.to_string())

In [None]:
dataset_name = 'EGMS_anomaly'
datasets = torch.load(dataset_path + f'{dataset_name}/Test/dataset.pt')
model_dict = torch.load(root_dir + f'/outputs/Testing/model_dict_testing_{dataset_name}.pkl')

test_metrics = torch.load(root_dir + f'/outputs/Testing/Test_metrics_{dataset_name}.pkl')

# Create a nicely formatted table showing test metrics for all models
metrics_df = pd.DataFrame()
for model in test_metrics.keys():
    metrics_df.loc[model, 'AUC'] = f"{test_metrics[model]['mean_auc']:.3f} ± {test_metrics[model]['std_auc']:.3f}"
    metrics_df.loc[model, 'Oslo'] = f"{test_metrics[model]['oslo_auc']:.3f}"
    metrics_df.loc[model, 'Malmo'] = f"{test_metrics[model]['malmo_auc']:.3f}"

    metrics_df.loc[model, 'F1'] = f"{test_metrics[model]['mean_f1']:.3f} ± {test_metrics[model]['std_f1']:.3f}"
    metrics_df.loc[model, 'MCC'] = f"{test_metrics[model]['mean_mcc']:.3f} ± {test_metrics[model]['std_mcc']:.3f}"

print(metrics_df.to_string())

### ACCURACY

In [None]:
def label_accuracy(metric, labels, interp=True):
    label = labels.any(axis=1)
    
    thr_list = list(np.linspace(metric.min(), metric.max(),101))

    f1score = []
    for threshold in thr_list[0:-1]:
        y = (metric>threshold)
        f1score.append(f1_score(label, y))

    label_1 = (labels==1).any(axis=1)
    label_2 = (labels==2).any(axis=1)

    thr_max = thr_list[np.argmax(f1score)]
    detections = (metric>thr_max)
    
    # Calculate accuracy for label_1
    true_positives = np.sum(detections & label_1)
    total_label_1 = np.sum(label_1)
    accuracy_label_1 = true_positives / total_label_1 if total_label_1 > 0 else 0

    # Calculate accuracy for label_2
    true_positives = np.sum(detections & label_2)
    total_label_2 = np.sum(label_2)
    accuracy_label_2 = true_positives / total_label_2 if total_label_2 > 0 else 0   
    
    return accuracy_label_1, accuracy_label_2

for dataset_name in ['Geological_anomaly', 'EGMS_anomaly']:

    datasets = torch.load(dataset_path + f'{dataset_name}/Test/dataset.pt')
    model_dict = torch.load(root_dir + f'/outputs/Testing/model_dict_testing_{dataset_name}.pkl')

    model_names = ['AE', 'GCN2MLP', 'GCNAE', 'GConv2MLP', 'GConvAE', 'GUNet', 'RAE_GRU', 'RAE_LSTM']

    # Dictionary to store metrics for each model
    accuracy_dict = {}

    for model_name in model_names:
        print(f"Computing metrics for {model_name}")
        acc1_list = []
        acc2_list = []
        
        for idx, dataset in enumerate(datasets):
            print(f"\rProcessing dataset {idx+1}/{len(datasets)} for model {model_name}", end='', flush=True)

            acc1_seed = []
            acc2_seed = []
            for seed in range(25):
                # Compute metrics for each dataset based on each label being true if any anomaly is present
                scores = model_dict[model_name]['scores'][idx][seed]
                
                acc1, acc2 = label_accuracy(scores, dataset['label'])
                acc1_seed.append(acc1)
                acc2_seed.append(acc2)

            acc1_list.append(acc1_seed)
            acc2_list.append(acc2_seed)
        
        # Store metrics and compute statistics
        accuracy_dict[model_name] = {
            'mean_acc1': np.mean(np.mean(acc1_list,axis=0)).round(3),
            'std_acc1': np.mean(np.mean(acc1_list,axis=0)).round(3),
            'mean_acc2': np.mean(np.mean(acc2_list,axis=0)).round(3),
            'std_acc2': np.std(np.mean(acc2_list,axis=0)).round(3),
        }
        print("\n")

    torch.save(accuracy_dict, root_dir + f'/outputs/Testing/Test_accuracy_{dataset_name}.pkl')

In [None]:
dataset_name = 'Geological_anomaly'
accuracy_dict = torch.load(root_dir + f'/outputs/Testing/Test_accuracy_{dataset_name}.pkl')
accuracy_df = pd.DataFrame()
for model in accuracy_dict.keys():
    accuracy_df.loc[model, 'Geological'] = f"{accuracy_dict[model]['mean_acc1']:.3f} ± {accuracy_dict[model]['std_acc1']:.3f}"
    accuracy_df.loc[model, 'Phase'] = f"{accuracy_dict[model]['mean_acc2']:.3f} ± {accuracy_dict[model]['std_acc2']:.3f}"

print(accuracy_df.to_string())

In [None]:
dataset_name = 'EGMS_anomaly'
accuracy_dict = torch.load(root_dir + f'/outputs/Testing/Test_accuracy_{dataset_name}.pkl')
accuracy_df = pd.DataFrame()
for model in accuracy_dict.keys():
    accuracy_df.loc[model, 'Geological'] = f"{accuracy_dict[model]['mean_acc1']:.3f} ± {accuracy_dict[model]['std_acc1']:.3f}"
    accuracy_df.loc[model, 'Phase'] = f"{accuracy_dict[model]['mean_acc2']:.3f} ± {accuracy_dict[model]['std_acc2']:.3f}"

print(accuracy_df.to_string())

### VISUALIZATION

In [None]:
dataset_name = 'EGMS_anomaly'
datasets = torch.load(dataset_path + f'{dataset_name}/Test/dataset.pt')
model_dict = torch.load(root_dir + f'/outputs/Testing/model_dict_testing_{dataset_name}.pkl')

# Select models to visualize
models_to_plot = ['AE', 'GCN2MLP', 'RAE_LSTM']
i = 20  # First dataset
seed = 0

# Create figure with subplots
fig = plt.figure(figsize=(16, 12))
gs = fig.add_gridspec(2, 2, width_ratios=[1, 1])

# Plot the true labels first
ax1 = fig.add_subplot(gs[0, 0])
labels = datasets[i]['label'].max(axis=1)
colors = [(0.5, 0.5, 0.8), (0.05, 0.25, 0.25), (0.55, 0.55, 0.05)] 
scatter_groups = []
labels_unique = np.unique(labels)

for label_val, color in zip(labels_unique, colors):
    mask = (labels == label_val)
    scatter = ax1.scatter(
        datasets[i]['pos'][mask,0],
        datasets[i]['pos'][mask,1],
        color=color,
        s=50,
    )
    scatter_groups.append(scatter)

ax1.legend(['Normal', 'Geological', 'Phase'], loc='upper left')
ax1.set_title('Labels')
ax1.axis('off')
cx.add_basemap(ax1, crs='EPSG:3035', source=cx.providers.CartoDB.Positron)

# Create scatter plots for each model
axes = []
for idx in range(len(models_to_plot)):
    row = (idx + 1) // 2
    col = (idx + 1) % 2
    ax = fig.add_subplot(gs[row, col])
    axes.append(ax)

vmin, vmax = 0, 1  # Set consistent color scale
norm = plt.Normalize(vmin=vmin, vmax=vmax)

for idx, (ax, model_name) in enumerate(zip(axes, models_to_plot)):
    scores = model_dict[model_name]['scores'][i][seed]
    scores = (scores - np.min(scores)) / (np.max(scores) - np.min(scores))
    
    scatter = ax.scatter(
        datasets[i]['pos'][:,0],
        datasets[i]['pos'][:,1],
        c=scores,
        cmap='PuRd',
        s=100,
        norm=norm
    )
    ax.set_title(model_name.replace('_', ''))
    ax.axis('off')
    cx.add_basemap(ax, crs='EPSG:3035', source=cx.providers.CartoDB.Positron)

# Add colorbar
cbar_ax = fig.add_axes([1., 0.15, 0.02, 0.7])  # [left, bottom, width, height]
plt.colorbar(scatter, cax=cbar_ax, label='Normalized Anomaly Score')

plt.tight_layout(pad=1.0)
plt.savefig(root_dir + f'/outputs/figs/{dataset_name}_scores.png', dpi=300, bbox_inches='tight')
plt.show()


In [None]:
dataset_name = 'EGMS_anomaly'
datasets = torch.load(dataset_path + f'{dataset_name}/Test/dataset.pt')
model_dict = torch.load(root_dir + f'/outputs/Testing/model_dict_testing_{dataset_name}.pkl')

# Select models to visualize
models_to_plot = ['AE', 'GCN2MLP', 'GCNAE', 'GConv2MLP', 'GConvAE', 'GUNet', 'RAE_GRU', 'RAE_LSTM']
i = 20  # Dataset index
seed = 0

# Create figure with subplots
fig, axes = plt.subplots(2, 4, figsize=(24, 12))
axes = axes.flatten()

vmin, vmax = 0, 1  # Set consistent color scale
norm = plt.Normalize(vmin=vmin, vmax=vmax)

for idx, (ax, model_name) in enumerate(zip(axes, models_to_plot)):
    scores = model_dict[model_name]['scores'][i][seed]
    scores = (scores - np.min(scores)) / (np.max(scores) - np.min(scores))
    
    scatter = ax.scatter(
        datasets[i]['pos'][:,0],
        datasets[i]['pos'][:,1],
        c=scores,
        cmap='PuRd',
        s=100,
        norm=norm
    )
    ax.set_title(model_name.replace('_', ''))
    ax.axis('off')
    cx.add_basemap(ax, crs='EPSG:3035', source=cx.providers.CartoDB.Positron)

# Add colorbar
cbar_ax = fig.add_axes([1.0, 0.10, 0.02, 0.75])  # [left, bottom, width, height]
cbar = plt.colorbar(scatter, cax=cbar_ax)
cbar.set_label('Normalized Anomaly Score', fontsize=32)

plt.tight_layout(pad=1.0)
plt.savefig(root_dir + f'/outputs/figs/{dataset_name}_scores_all.png', dpi=300, bbox_inches='tight')
plt.show()


In [None]:
from plotly.subplots import make_subplots

dataset_name = 'EGMS_anomaly'
datasets = torch.load(dataset_path + f'{dataset_name}/Test/dataset.pt')
model_dict = torch.load(root_dir + f'/outputs/Testing/model_dict_testing_{dataset_name}.pkl')

# Select models to visualize
models_to_plot = ['GUNet', 'AE', 'RAE_GRU']
i = 0  # First dataset
seed = 0

# Create figure with subplots
fig = make_subplots(
    rows=2, cols=2,
    subplot_titles=['True Labels'] + models_to_plot,
    vertical_spacing=0.08,
    horizontal_spacing=0.05
)

# Plot the true labels first
labels = datasets[i]['label'].max(axis=1)
fig.add_trace(
    go.Scatter(
        x=datasets[i]['pos'][:,0],
        y=datasets[i]['pos'][:,1], 
        mode='markers',
        marker=dict(
            size=6,
            color=labels,
            colorscale='Viridis',
            showscale=True,
            colorbar=dict(len=0.4, y=0.8)
        ),
        hovertemplate='<br>'.join([
            'x: %{x}',
            'y: %{y}',
            'Label: %{marker.color:.2f}'
        ]),
        name='True Labels'
    ),
    row=1, col=1
)

# Create scatter plots for each model
for idx, model_name in enumerate(models_to_plot):
    scores = model_dict[model_name]['scores'][i][seed]
    
    # Normalize scores to [0, 1]
    scores = (scores - np.min(scores)) / (np.max(scores) - np.min(scores))
    
    row = (idx + 1) // 2 + 1
    col = (idx + 1) % 2 + 1
    
    fig.add_trace(
        go.Scatter(
            x=datasets[i]['pos'][:,0],
            y=datasets[i]['pos'][:,1], 
            mode='markers',
            marker=dict(
                size=6,
                color=scores,
                colorscale='Viridis',
                showscale=True,
                colorbar=dict(len=0.4, y=0.8 if col==1 else 0.3)
            ),
            hovertemplate='<br>'.join([
                'x: %{x}',
                'y: %{y}',
                'Score: %{marker.color:.3f}'
            ]),
            name=model_name
        ),
        row=row, col=col
    )

# Update layout
fig.update_layout(
    width=1000,
    height=800,
    showlegend=False,
    margin=dict(l=20, r=20, t=40, b=20),
    font=dict(size=14),
)

# Hide all axes
fig.update_xaxes(visible=False)
fig.update_yaxes(visible=False)

fig.show()


### REAL DATA ANALYSIS

In [None]:
model_results = torch.load(root_dir + f'/outputs/Testing/Scores_real_data.pkl')
df_Oslo = pd.read_parquet('/home/vitorro/Repositories/stae/data/interim/df_Oslo.parq')

In [None]:
datasets = model_results['GCN2MLP']
df_results = []

for data in datasets:
    df = pd.DataFrame({'easting': data['pos'][:,0],
                'northing': data['pos'][:,1],
                'latitude': data['coords'][:,0],
                'longitude': data['coords'][:,1],
                'pid': data['pid'],
                'scores': data['scores']/data['scores'].max(),
                })
    df_results.append(df)
df_results = pd.concat(df_results, ignore_index=True)
df_results = df_results.sort_values('scores', ascending=False).drop_duplicates(subset=['easting', 'northing'], keep='first')

df_gcn2mlp = df_results.query('scores > 0.75').copy()

utils.visualize_map(df_gcn2mlp, color='scores')

In [None]:
datasets = model_results['GUNet']
df_results = []

for data in datasets:
    df = pd.DataFrame({'easting': data['pos'][:,0],
                'northing': data['pos'][:,1],
                'latitude': data['coords'][:,0],
                'longitude': data['coords'][:,1],
                'pid': data['pid'],
                'scores': data['scores']/data['scores'].max(),
                })
    df_results.append(df)
df_results = pd.concat(df_results, ignore_index=True)
df_results = df_results.sort_values('scores', ascending=False).drop_duplicates(subset=['easting', 'northing'], keep='first')

df_gunet = df_results.query('scores > 0.75').copy()

utils.visualize_map(df_gunet, color='scores')

In [None]:
df_gcn2mlp['type'] = 0
df_gunet['type'] = 1
df_combined = pd.concat([df_gcn2mlp, df_gunet], ignore_index=True)

df_type = df_combined.groupby('pid', as_index=False).mean()

conditions = [
    df_type['type'] == 0,
    df_type['type'] == 1
]
choices = ['GCN2MLP', 'GUNet']

df_type['type'] = np.select(conditions, choices, default='Both')
df_type['norm_scores'] = (df_type['scores'] - df_type['scores'].min()) / (df_type['scores'] - df_type['scores'].min()).max()

In [None]:
G, subs = fd.NNGraph(df_type, radius=20, subgraphs=True)
df_type['subgraph'] = subs

plt.figure(figsize=(12, 8))

# Define colors for each type
color_dict = {
    'GCN2MLP': 'red',
    'GUNet': 'blue',
    'Both': 'limegreen'
}

# Create scatter plot for each unique type
for type_val in df_type['type'].unique():
    mask = df_type['type'] == type_val
    plt.scatter(
        df_type[mask]['easting'], 
        df_type[mask]['northing'],
        label=type_val,
        color=color_dict[type_val],
        alpha=0.6,
        s=50
    )

annotated = []
# annotate each position in G.coords with the value in subs
for i, pos in enumerate(G.coords):
    x_pos = pos[0]
    y_pos = pos[1]
    x_offset = 20
    y_offset = 20

    if (subs[i] not in annotated) or (subs[i]==0):
        plt.annotate(f"{subs[i]:.0f}", (x_pos, y_pos), xytext=(x_pos + x_offset, y_pos + y_offset), fontsize=12, color='black', ha='center', va='center')

    annotated.append(subs[i])

plt.legend()
plt.axis('off')
cx.add_basemap(plt.gca(), crs='EPSG:3035', source=cx.providers.CartoDB.Positron)
plt.tight_layout()
plt.show()

fig = utils.visualize_map(df_type, color='type', size='norm_scores', size_max=6, zoom=14, discrete_colormap=['blue','red','limegreen'], return_fig=True)


fig.update_layout(
    legend=dict(
        x=0.25,          # x position (0 = left, 1 = right)
        y=0.25,          # y position (0 = bottom, 1 = top)
        xanchor='right', # anchor the x position to the 'right' of the box
        yanchor='top',   # anchor the y position to the 'top' of the box
        bgcolor='rgba(255,255,255,0.8)',  # optional: add background
        bordercolor='black',
        borderwidth=1,
        font=dict(family="Times New Roman, Times, serif", size=24),
        title='',
    ),
    margin=dict(l=20, r=20, t=20, b=20),  # Reduced margins
    width=1200,
    height=800
)

# saving figure as pdf
# fig.write_image(root_dir + f'/outputs/figs/Types.pdf', width=800, height=600, scale=1)

fig.show()

In [None]:
# 1. Filter and prepare data
# subgraph = [57, 60]
subgraph = [63]
df_sub = df_type[df_type['subgraph'].isin(subgraph)].copy()
pid_list = df_sub.pid.values
type_map = dict(zip(df_sub.pid, df_sub.type))

df_filtered = df_Oslo[df_Oslo['pid'].isin(pid_list)].copy()
df_filtered['type'] = df_filtered['pid'].map(type_map)

color_dict = {
    'GCN2MLP': 'red',
    'GUNet': 'blue',
    'Both': 'limegreen'
}
pid_color_map = {pid: color_dict[type_map[pid]] for pid in pid_list}

# 2. Create main scatter plot (color by pid to get trendlines per pid)
fig = px.scatter(
    df_filtered,
    x='timestamp',
    y='displacement',
    color='pid',
    color_discrete_map=pid_color_map,
    trendline='lowess',
    template='simple_white',
    width=800,
    height=600,
    labels={'timestamp': 'Timestamp', 'displacement': 'Displacement (mm)'},
)

# Update layout with box around plot and reduced margins
fig.update_layout(
    font=dict(family="Times New Roman, Times, serif", size=18),
    showlegend=False,
    margin=dict(l=50, r=20, t=20, b=50),  # Reduced margins
    xaxis=dict(showline=True, linewidth=1, linecolor='black', mirror=True),
    yaxis=dict(showline=True, linewidth=1, linecolor='black', mirror=True),
)

fig.write_image(root_dir + f'/outputs/figs/Types_both_63.pdf', width=800, height=600, scale=1)
fig.show()


In [None]:
# 1. Filter and prepare data
subgraph = [57, 60]
# subgraph = [31]
df_sub = df_type[df_type['subgraph'].isin(subgraph)].copy()
pid_list = df_sub.pid.values
type_map = dict(zip(df_sub.pid, df_sub.type))

df_filtered = df_Oslo[df_Oslo['pid'].isin(pid_list)].copy()
df_filtered['type'] = df_filtered['pid'].map(type_map)

color_dict = {
    'GCN2MLP': 'red',
    'GUNet': 'blue',
    'Both': 'limegreen'
}
pid_color_map = {pid: color_dict[type_map[pid]] for pid in pid_list}

# 2. Create main scatter plot (color by pid to get trendlines per pid)
fig = px.scatter(
    df_filtered,
    x='timestamp',
    y='displacement',
    color='pid',
    color_discrete_map=pid_color_map,
    trendline='lowess',
    template='simple_white',
    width=800,
    height=600,
    labels={'timestamp': 'Timestamp', 'displacement': 'Displacement (mm)'},
)

# Update layout with box around plot and reduced margins
fig.update_layout(
    font=dict(family="Times New Roman, Times, serif", size=18),
    showlegend=False,
    margin=dict(l=50, r=20, t=20, b=50),  # Reduced margins
    xaxis=dict(showline=True, linewidth=1, linecolor='black', mirror=True),
    yaxis=dict(showline=True, linewidth=1, linecolor='black', mirror=True),
)

fig.write_image(root_dir + f'/outputs/figs/Types_GCN2MLP_57.pdf', width=800, height=600, scale=1)
fig.show()


In [None]:
# 1. Filter and prepare data
subgraph = [53]
df_sub = df_type[df_type['subgraph'].isin(subgraph)].copy()
pid_list = df_sub.pid.values
type_map = dict(zip(df_sub.pid, df_sub.type))

df_filtered = df_Oslo[df_Oslo['pid'].isin(pid_list)].copy()
df_filtered['type'] = df_filtered['pid'].map(type_map)

color_dict = {
    'GCN2MLP': 'red',
    'GUNet': 'blue',
    'Both': 'limegreen'
}
pid_color_map = {pid: color_dict[type_map[pid]] for pid in pid_list}

# 2. Create main scatter plot (color by pid to get trendlines per pid)
fig = px.scatter(
    df_filtered,
    x='timestamp',
    y='displacement',
    color='pid',
    color_discrete_map=pid_color_map,
    trendline='lowess',
    template='simple_white',
    width=800,
    height=600,
    labels={'timestamp': 'Timestamp', 'displacement': 'Displacement (mm)'},
)

# Update layout with box around plot and reduced margins
fig.update_layout(
    font=dict(family="Times New Roman, Times, serif", size=18),
    showlegend=False,
    margin=dict(l=50, r=20, t=20, b=50),  # Reduced margins
    xaxis=dict(showline=True, linewidth=1, linecolor='black', mirror=True),
    yaxis=dict(showline=True, linewidth=1, linecolor='black', mirror=True),
)

fig.write_image(root_dir + f'/outputs/figs/Types_GUNet_53.pdf', width=800, height=600, scale=1)

fig.show()


In [None]:
df_plot = df_results.query('scores >= 0.89').copy()
fig, ax = plt.subplots(figsize=(12, 8))
scatter = plt.scatter(df_plot['easting'], df_plot['northing'], c=df_plot['scores'], 
                     cmap='PuRd', s=10*df_plot['scores'], alpha=0.6)
tick_values = np.arange(0.7, 1.1, 0.1)  

cbar = plt.colorbar(scatter, ax=ax, label='Anomaly Score', ticks=tick_values)
cbar.ax.set_yticklabels([f'{t:.1f}' for t in tick_values])  # optional

# plt.colorbar(scatter, label='Anomaly Score')
plt.axis('off')

cx.add_basemap(ax, crs='EPSG:3035', source=cx.providers.CartoDB.Positron)
plt.tight_layout()
plt.show()

In [None]:
subgraph = 63
pid_list = df_type.query('subgraph == @subgraph').pid.values
type_list = df_type.query('subgraph == @subgraph').type.values

color_dict = {
    'GCN2MLP': 'red',
    'GUNet': 'blue',
    'Both': 'limegreen'
}

fig = px.scatter(df_Oslo[df_Oslo['pid'].isin(pid_list)], 
                 x='timestamp', 
                 y='displacement', 
                 color='pid', 
                 trendline='lowess',
                 template='simple_white',
                 width=1000, height=600,
                 labels={'timestamp': 'Timestamp', 'displacement': 'Displacement (mm)'},
                 )

# Update font settings
fig.update_layout(
    font=dict(
        family="Times New Roman, Times, serif",
        size=18
    ),
    showlegend=False
)

fig.show()

In [None]:
for model_name in model_names:
    print(f"{model_name:<10}: - Geo: {metrics_dict[model_name]['mean_acc1']:<5}, Phase: {metrics_dict[model_name]['mean_acc2']:<5}")


In [None]:
dataset_name = 'EGMS_anomaly'
datasets = torch.load(dataset_path + f'{dataset_name}/Test/dataset.pt')
model_dict = torch.load(root_dir + f'/outputs/Testing/model_dict_testing_{dataset_name}.pkl')

dataset = datasets[0]
labels = dataset['label']

In [None]:
(labels==1).any(axis=1)

-----------------------------

In [None]:
len(model_dict['AE']['scores'])

In [None]:
px.line(datasets[0]['label'].max(axis=1))

In [None]:
datasets[0]['data'].shape

In [None]:
model_orig = copy.deepcopy(model_dict['AE']['model'])

# Modify input layer size
model_orig.encoder[0].in_features = 300  # new input size
model_orig.encoder[0].weight = torch.nn.Parameter(torch.randn(25, 300))  # new weight matrix
model_orig.encoder[0].bias = torch.nn.Parameter(torch.randn(25))  # new bias vector

# Modify output layer size
model_orig.decoder[-1].out_features = 300  # new output size
model_orig.decoder[-1].weight = torch.nn.Parameter(torch.randn(300, 25))  # new weight matrix
model_orig.decoder[-1].bias = torch.nn.Parameter(torch.randn(300))  # new bias vector

In [None]:
model_name = 'RAE_GRU'

In [None]:
model_name.split('_')[-1].lower()

In [None]:
model_dict['GUNet']['trial_params']

In [None]:
[150] + [model_dict['AE']['trial_params'][f'layer_dim_{i}'] for i in range(model_dict['AE']['trial_params']['n_layers'])]


In [None]:
relevant_params = ['n_features', 'latent_dim', 'rnn_type', 'rnn_act', 'device']
new_model_params = {key: getattr(model, key) for key in relevant_params}
new_model_params['n_features'] = X.shape[0]
model = models.RAE(**new_model_params)
model.to(new_model_params['device'])

In [None]:
model_dict['AE']['model'].decoder[-1].out_features

### Number of parameters

In [None]:
dataset_name = 'EGMS_anomaly'
datasets = torch.load(dataset_path + f'{dataset_name}/Training/dataset.pt')
model_dict = torch.load(root_dir + f'/outputs/Optuna_analysis/model_dict_{dataset_name}.pkl')
model_dict['AE'].keys()

model_names = ['AE', 'GCN2MLP', 'GCNAE', 'GConv2MLP', 'GConvAE', 'GUNet', 'RAE_GRU', 'RAE_LSTM']

bar_width = 0.75

# Base x locations
x = np.arange(len(model_names))
x_positions = 2*(x)

# Extract num_parameters and total_params from model_dict
num_parameters = [model_dict[model]['num_parameters'] for model in model_names]
total_params = [model_dict[model]['num_parameters'] * model_dict[model]['trial_params']['n_epochs'] for model in model_names]


# Create the bar chart
fig = go.Figure()

# Add bars for num_parameters
fig.add_trace(go.Bar(
    x=x_positions - bar_width / 2,
    y=num_parameters,
    width=bar_width,
    name='Count of Trainable Parameters',
    text=num_parameters,
    textposition='outside'
))

# Add bars for total_params
fig.add_trace(go.Bar(
    x=x_positions + bar_width / 2,
    y=total_params,
    width=bar_width,
    name='Count of Parameter Updates',
    text=total_params,
    textposition='outside'
))

# Update layout
fig.update_layout(
    xaxis=dict(
        tickvals=x_positions,
        ticktext=[m.replace('_','') for m in model_names],
    ),
    yaxis=dict(
        tickformat="~s",
        tickvals=[i*1000 for i in range(0, 1001, 100)],  # Example: 0k, 100k, 200k, ..., 1000k
    ),
    barmode='group',
    width=1000,
    height=600,
    legend=dict(
        x=0.02,  # Horizontal position of the legend
        y=0.95,  # Vertical position of the legend
        bgcolor='rgba(255,255,255,0.5)',  # Semi-transparent background
        bordercolor='black',
        borderwidth=1,
    ),
    font=dict(
        family="Times New Roman, Times, serif",
        size=18
    )
)

fig.update_layout(
    margin=dict(l=20, r=20, t=20, b=20),
)

fig.write_image(root_dir + f'/outputs/figs/complexity_parameters_EGMS.pdf')
fig.show()

In [None]:
dataset_name = 'Geological_anomaly'
datasets = torch.load(dataset_path + f'{dataset_name}/Training/dataset.pt')
model_dict = torch.load(root_dir + f'/outputs/Optuna_analysis/model_dict_{dataset_name}.pkl')

model_names = ['AE', 'GCN2MLP', 'GCNAE', 'GConv2MLP', 'GConvAE', 'GUNet', 'RAE_GRU', 'RAE_LSTM']

bar_width = 0.75

# Base x locations
x = np.arange(len(model_names))
x_positions = 2*(x)

# Extract num_parameters and total_params from model_dict
num_parameters = [model_dict[model]['num_parameters'] for model in model_names]
total_params = [model_dict[model]['num_parameters'] * model_dict[model]['trial_params']['n_epochs'] for model in model_names]


# Create the bar chart
fig = go.Figure()

# Add bars for num_parameters
fig.add_trace(go.Bar(
    x=x_positions - bar_width / 2,
    y=num_parameters,
    width=bar_width,
    name='Count of Trainable Parameters',
    text=num_parameters,
    textposition='outside'
))

# Add bars for total_params
fig.add_trace(go.Bar(
    x=x_positions + bar_width / 2,
    y=total_params,
    width=bar_width,
    name='Count of Parameter Updates',
    text=total_params,
    textposition='outside'
))

# Update layout
fig.update_layout(
    xaxis=dict(
        tickvals=x_positions,
        ticktext=[m.replace('_','') for m in model_names],
    ),
    yaxis=dict(
        tickformat="~s",
        tickvals=[i*1000 for i in range(0, 1001, 100)],  # Example: 0k, 100k, 200k, ..., 1000k
    ),
    barmode='group',
    width=1000,
    height=600,
    legend=dict(
        x=0.02,  # Horizontal position of the legend
        y=0.95,  # Vertical position of the legend
        bgcolor='rgba(255,255,255,0.5)',  # Semi-transparent background
        bordercolor='black',
        borderwidth=1,
    ),
    font=dict(
        family="Times New Roman, Times, serif",
        size=18
    )
)

fig.update_layout(
    margin=dict(l=20, r=20, t=20, b=20),
)

fig.write_image(root_dir + f'/outputs/figs/complexity_parameters_Geological.pdf')
fig.show()

### Processing Time

RUN Epoch_analysis.py

In [None]:
dataset_name = 'EGMS_anomaly'
datasets = torch.load(dataset_path + f'{dataset_name}/Training/dataset.pt')
model_dict = torch.load(root_dir + f'/outputs/Optuna_analysis/model_dict_times_{dataset_name}.pkl')

model_names = ['AE', 'GCN2MLP', 'GCNAE', 'GConv2MLP', 'GConvAE', 'GUNet', 'RAE_GRU', 'RAE_LSTM']

In [None]:
var = 'time_epoch'

mean_times = {model: np.mean(np.mean(model_dict[model][var], axis=1)) for model in model_names}
std_times = {model: np.std(np.mean(model_dict[model][var], axis=1)) for model in model_names}

for model in model_names:
    print(f"{model}:")
    print(f"  Epoch time: {mean_times[model]:.3f} +- {std_times[model]:.3f} seconds")

In [None]:
var = 'time_total'

mean_times = {model: np.mean(np.mean(model_dict[model][var], axis=1)) for model in model_names}
std_times = {model: np.std(np.mean(model_dict[model][var], axis=1)) for model in model_names}

for model in model_names:
    print(f"{model}:")
    print(f"  Total time: {mean_times[model]:.3f} +- {std_times[model]:.3f} seconds")

In [None]:
next(model_dict['AE']['model'].parameters())

In [None]:
np.array(model_dict['AE']['time_epoch'])

In [None]:
for model_name, model_info in model_dict.items():
    print(f"{model_name}: {model_info['trial_params']['n_epochs']} epochs")

In [None]:
device = 'cuda'
dataset_path = root_dir + "/data/datasets/"
datafile = 'EGMS_anomaly/Training/dataset.pt'
datasets = torch.load(dataset_path + datafile)

dataset = datasets[0]
input_dim = datasets[0]['data'].shape[1]

data = dataset['data']
label = dataset['label'].max(axis=1) #label per pixel

X = torch.tensor(data).float().to( device )
X.shape[0]

In [None]:
len(datasets)

In [None]:
model_params = {'n_features': 2,
                'latent_dim': 4,
                'rnn_type': 'GRU',
                'rnn_act': 'relu',
                'device': device}
batch_size = 512

model_class = getattr(models, 'RAE')
model = model_class(**model_params)
model = model.to(device)


if isinstance(model, models.RAE) and (model.n_features != 1):
    relevant_params = ['n_features', 'latent_dim', 'rnn_type', 'rnn_act', 'device']
    new_model_params = {key: getattr(model, key) for key in relevant_params}
    new_model_params['n_features'] = X.shape[0]
    model = models.RAE(**new_model_params)
    model.to(new_model_params['device'])

num_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
print(f"Number of parameters: {num_params}\n")   


In [None]:
model_params = {'layer_dims':[input_dim, 4, 4]}

model_class = getattr(models, 'GConvAE')
model = model_class(**model_params)
model = model.to(device)

num_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
print(f"Number of parameters: {num_params}")    

In [None]:
model

In [None]:
model_params = {'layer_dims':[input_dim, 4, 4]}

model_class = getattr(models, 'GCNAE')
model = model_class(**model_params)
model = model.to(device)

num_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
print(f"Number of parameters: {num_params}")    

In [None]:
model_params = {'in_channels': input_dim,
                'out_channels': input_dim,
                'hidden_channels': 300,
                'depth': 1,
                'pool_ratios': 0.7}

model_class = getattr(models, 'GUNet')
model = model_class(**model_params)
model = model.to(device)

num_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
print(f"Number of parameters: {num_params}")  

In [None]:
def pixel_mse(output,X):
    point_mse = torch.nn.MSELoss(reduction='none')
    return torch.mean(point_mse(output,X), axis=1)


device = 'cuda:2'
def train_model(model, X, label, lr, G=None):

    rng_seed = 0
    torch.manual_seed(rng_seed)
    torch.cuda.manual_seed(rng_seed)
    np.random.seed(rng_seed)

    loss_epoch = []
    auc_epoch = []
    scores_epoch = []

    if G is not None:
        A = torch.tensor(G.W.toarray()).float()
        A = A.to(device)    

    optimizer = torch.optim.Adam(model.parameters(), lr=lr)
    criterion = torch.nn.MSELoss()
    def pixel_mse(output,X):
        point_mse = torch.nn.MSELoss(reduction='none')
        return torch.mean(point_mse(output,X), axis=1)

    model.train()
    model.reset_parameters()

    # for epoch in range(1, 1+np.max(epochs_list)):
    for epoch in range(1,1000):

        optimizer.zero_grad()
        output = model(X)
        loss = criterion(output, X)
        loss.backward()
        optimizer.step()

        

        if epoch in np.ceil(np.geomspace(1,1000,10)):


            loss_epoch.append(loss.item())

            scores = pixel_mse(output, X).detach().cpu().numpy()
            # scores_epoch.append(scores)

            auc = get_auc(scores, label, resolution=101).round(3)
            auc_epoch.append(auc)


        # if epoch in epochs_list:
        #     S_partials.append(S)...

    return auc_epoch, loss_epoch

def evaluate_model(model, datasets, lr):

    auc_epoch_list = []
    loss_epoch_list = []

    it = 0
    for dataset in datasets[:5]:

        print(f'Evaluating dataset {it}', flush=True)
        it+=1

        data = dataset['data']
        label = dataset['label'].max(axis=1) #label per pixel
        
        X = torch.tensor(data).float().to(device)

        auc, loss = train_model(model, X, label, lr)
        auc_epoch_list.append(auc)
        loss_epoch_list.append(loss)

        # auc_list.append(get_auc(scores, label).round(3))
        # f1_list.append(best_f1score(scores, label).round(3))
        # mcc_list.append(best_mcc(scores, label).round(3))

    return np.mean(auc_epoch_list, axis=0).round(3), np.mean(loss_epoch_list, axis=0).round(3)

-----------------

In [None]:
X.shape

In [None]:
X2 = X.clone()

X2 = X2.view(-1, X.shape[1], 1)

dataset = TensorDataset(X2, X2)  # we want to reconstruct the same input
dataloader = DataLoader(dataset, batch_size=100, shuffle=True)

# Create an iterator
data_iter = iter(dataloader)

# Get the first batch
batch_X, batch_y = next(data_iter)

if model.n_features>1:
    batch_X2 = batch_X.T.unsqueeze(0)

print(batch_X.shape)
print(batch_X2.shape)


In [None]:
batch_X.unsqueeze(2).shape

In [None]:
batch_X.T.unsqueeze(0).shape

In [None]:
model_params = {'n_features': 2,
                'latent_dim': 4,
                'rnn_type': 'LSTM',
                'rnn_act': 'relu',
                'device': device}
batch_size = 512

model_class = getattr(models, 'RAE')
model = model_class(**model_params)
model = model.to(device)

In [None]:
relevant_params = ['n_features', 'latent_dim', 'rnn_type', 'rnn_act', 'device']
model_params = {key: getattr(model, key) for key in relevant_params if hasattr(model, key)}


In [None]:
asd = 'all'

In [None]:
asd != 1

In [None]:
n_features = 1
batch_size = 27
seq_len = 10

x = torch.tensor([])
for i in range(seq_len):
    x_i = i*torch.ones([batch_size, n_features])

    if x_i.dim() == 1:
        x = torch.cat([x, x_i.unsqueeze(0)], axis=1)
    else:
        x = torch.cat([x, x_i], axis=1)        

In [None]:
x.view(-1, seq_len, n_features).shape

In [None]:
X

In [None]:
new_params

In [None]:
new_params['n_features'] = 300

---------

In [None]:
study = joblib.load(root_dir+'/outputs/pixel_detection/HP_training/TR_AE.pkl')
datasets = torch.load(dataset_path + 'Oslo/training/dataset.pt')
input_dim = datasets[0]['data'].shape[1]

dataset = datasets[9]
data = dataset['data']
label = dataset['label'].max(axis=1) #label per pixel
X = torch.tensor(data).float().to(device)

px.imshow(dataset['label'], aspect='auto', width=600, title=f'Example: {label.sum():.3g} anomalous nodes').show()

In [None]:
dataset = datasets[9]
print(dataset['metadata'])
data = dataset['data']
label = dataset['label'].max(axis=1) #label per pixel
X = torch.tensor(data).float().to(device)

lr = study.best_params['lr']
n_epochs = study.best_params['n_epochs']
n_layers = study.best_params['n_layers']
layer_dims = [input_dim]
for i in range(n_layers):
    layer_dims.append(study.best_params[f'layer_dim_{i}'])

# dims = [177, 89, 49, 35, 17]
# layer_dims = [input_dim, *dims]
# lr = 0.000025	
# n_epochs = 261

model = models.AE(layer_dims)
model = model.to(device)

rng_seed = 0
torch.manual_seed(rng_seed)
torch.cuda.manual_seed(rng_seed)
np.random.seed(rng_seed)

optimizer = torch.optim.Adam(model.parameters(), lr=lr)
criterion = torch.nn.MSELoss()

model.train()
model.reset_parameters()

output_list = []

# for epoch in range(1, 1+np.max(epochs_list)):
for epoch in range(n_epochs):

    optimizer.zero_grad()
    output = model(X)
    loss = criterion(output, X)
    loss.backward()
    optimizer.step()

    output_list.append(output)

scores = pixel_mse(output_list[-1], X).detach().cpu().numpy()
auc = get_auc(scores, label, resolution=101).round(3)
auc

In [None]:
# Create DataFrames
df_X = pd.DataFrame(X.detach().cpu().numpy())
df_output = pd.DataFrame(output_list[-1].detach().cpu().numpy())

# Assign sensor IDs as index
df_X.index.name = "sensor_id"
df_output.index.name = "sensor_id"

# Melt to long format
df_X_long = df_X.reset_index().melt(id_vars=["sensor_id"], var_name="timestamp", value_name="X")
df_output_long = df_output.reset_index().melt(id_vars=["sensor_id"], var_name="timestamp", value_name="output")

# Merge both DataFrames
df_final = pd.merge(df_X_long, df_output_long, on=["sensor_id", "timestamp"])

# Convert timestamp to integer (assuming column names were originally numbers)
df_final["timestamp"] = df_final["timestamp"].astype(int)

print(f'{np.where(label)[0]}')
px.line(df_final, x='timestamp', y=['X','output'], animation_frame='sensor_id', width=1000, range_y=[-10,35]).show()


fig = px.line(y=[label*scores.max()*0.75, scores], width=1000, markers=True)  # Add markers
fig.update_traces(line=dict(width=0.5), marker={'size':5})  # Make line thin
fig.show()

px.line(df_final[df_final.sensor_id.isin(np.where(label)[0])], x='timestamp', y=['X','output'], animation_frame='sensor_id', width=1000, range_y=[-10,35]).show()

In [None]:
G = fd.NNGraph(pd.DataFrame(data=dataset['pos'], columns=['easting','northing']), radius=15)
utils.plotly_signal(G, X[:,-1].cpu().numpy(), width=500, height=300)
utils.plotly_signal(G, label, width=500, height=300)

In [None]:
model = models.GCNencoder([15,12,12])

In [None]:
possible_classes = [models.GCN2MLP, models.AE]

In [None]:
isinstance(model, tuple(possible_classes))

In [None]:
pygsp.graphs.NNGraph(dataset['pos'])

In [None]:
G = fd.NNGraph(pd.DataFrame(dataset['pos'], columns=['easting','northing']), radius=15)

In [None]:
G.plot()

In [None]:
from torch_geometric.utils import dense_to_sparse

In [None]:
X.device

In [None]:
next(model.parameters())

In [None]:
dataset['edge_weight'].to(device)

In [None]:
getattr(models, 'AE')([2, 2, 2, 2])