In [1]:
# Imports

import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import plotly.express as px
import scipy as sp
# import contextily as cx

import torch
import pygsp
import optuna
import joblib
import gc
import argparse
import os
import matplotlib

from matplotlib.ticker import ScalarFormatter, StrMethodFormatter, FormatStrFormatter, FuncFormatter
from matplotlib.animation import FuncAnimation
from IPython.display import HTML

from sklearn.metrics import mean_squared_error, confusion_matrix, auc
from sklearn.preprocessing import MinMaxScaler, StandardScaler

from optuna.samplers import TPESampler
from torch.nn import Linear
from torch_geometric.nn.models import GraphUNet
from torch_geometric.nn import GCNConv, Sequential
from torch_geometric.data import Data
from torch_geometric.datasets import KarateClub
from torch_geometric.utils import to_networkx, grid
from torchvision import datasets, transforms

from importlib import reload
from pyprojroot import here
ROOT_DIR = str(here())
insar_path = ROOT_DIR + "/data/raw/insar/"

matplotlib.rcParams.update({'font.size': 20})
matplotlib.rcParams.update({'font.family': 'Times New Roman'})

# import dario.models.mismatch_analysis as mma
# mma = reload(mma)

# Function definitions

def plot_anim(outputs, epochs):
    def generate_matrix(epoch):
        out = outputs[epoch][2].detach().numpy().reshape(28,28)
        inp = outputs[epoch][1].numpy().reshape(28,28)

        out = np.c_[inp,out]
        return out #np.abs(out-inp)

    fig, ax = plt.subplots()
    def init():
        ax.clear()
        plt.close()

    def update(frame):
        matrix = generate_matrix(frame)  # Generate the matrix for the current frame
        ax.imshow(matrix, cmap='gray', vmin=0, vmax=1)  # Update the plot with the new matrix
        # Hide all ticks and tick labels
        ax.set_xticks([])
        ax.set_yticks([])
        ax.set_xticklabels([])
        ax.set_yticklabels([])
        ax.set_title(f'{frame}', fontdict={'color':'white'})
        plt.close()

    fps = 2
    ani = FuncAnimation(fig, update, frames=range(epochs), interval=1000/fps, repeat=True, blit=False, init_func=init)
    return ani

def roc_params(metric, label, interp=True):
    fpr = []
    tpr = []
    thr = []
    thr_list = list(np.linspace(0, metric.max(),1001))

    fp = 1
    ind = 0
    while fp > 0:
        threshold = thr_list[ind]
        ind += 1

        y = (metric>threshold)
        tn, fp, fn, tp = confusion_matrix(label, y).ravel()

        fpr.append( fp/(tn + fp) )
        tpr.append( tp/(tp + fn) )
        thr.append( threshold )

    while tp > 0:
        threshold = thr_list[ind]
        ind += 1
        y = (metric>threshold)
        tn, fp, fn, tp = confusion_matrix(label, y).ravel()

    
    fpr = fpr[::-1]
    tpr = tpr[::-1]
    thr = thr[::-1]

    if interp:
        fpr_base = np.linspace(0, 1, 101)
        tpr = list(np.interp(fpr_base, fpr, tpr))
        thr = list(np.interp(fpr_base, fpr, thr))
        fpr = list(fpr_base)

    fpr.insert(0, 0)
    tpr.insert(0, 0)
    thr.insert(0, threshold)

    return tpr, fpr, thr

def compute_auc(tpr, fpr):
    auc = 0
    for i in range(1, len(fpr)):
        auc += (fpr[i] - fpr[i - 1]) * (tpr[i] + tpr[i - 1]) / 2
    return auc

# def detection(df_metrics, column_name='wse', threshold_min=1000, threshold_max=np.inf, selector='group',
#               detection_param='detection_sum', detection_param_threshold=None):
#     # df_relevant contains data from nodes that, at some point, have lower<=wse<=upper, and their neighbors.
#     # nodes are put into groups if they are close to each other.

#     if detection_param_threshold is None:
#         detection_param_threshold = df_metrics.timestamp.nunique()//2

#     df_relevant = mma.relevant_neighborhood(df_metrics, column_name=column_name,
#                                             lower=threshold_min, upper=threshold_max,
#                                             only_relevant=True, return_df=True, plot=False, filter_dates=False)

#     # Treating disconnected nodes as individual groups. Assining new values
#     new_group_values = df_relevant.query('group==0').pid.factorize()[0] + df_relevant.group.max()+1
#     df_relevant.loc[df_relevant.group==0, 'group'] = new_group_values


#     df_relevant['detection'] = (df_relevant[column_name]>=threshold_min) & (df_relevant[column_name]<=threshold_max)
#     df_detection = df_relevant.groupby('pid').agg({column_name:['max','mean'],
#                                                     'detection':['sum',mma.consecutive_ones],
#                                                     'group':'mean'}).reset_index()

#     df_detection.columns = [f"{level1}_{level2}" if level2 else level1 for level1, level2 in df_detection.columns]
#     df_detection.rename({'group_mean':'group'}, axis=1, inplace=True)

#     query = f'{detection_param}>{detection_param_threshold}'
#     selected = df_detection.query(query)[selector].unique()

#     return df_relevant, selected

# def skew(df):
#     return np.abs(sp.stats.skew(df.mean_velocity))


# def compute_metric(df_test, cut=2, radius=15):

#     df_metrics = []
#     for cluster in sorted(df_test.cluster.unique()):

#         df, nodes = mma.treat_nodes(df_test.query('cluster==@cluster'))
#         G, nodes['subgraph'] = mma.NNGraph(nodes, radius=radius, subgraphs=True)

#         df_metrics_cluster = []
#         for sub_index in sorted(nodes.subgraph.unique())[1:]:

#             subnodes = nodes.query('subgraph==@sub_index').copy()
#             subdf = df[df.pid.isin(subnodes.pid)].copy()

#             G = mma.NNGraph(subnodes, radius=radius)

#             w, V = np.linalg.eigh(G.L.toarray())
#             wh = np.ones(G.N)
#             wh[w<cut] = 0
#             Hh = V @ np.diag(wh) @ V.T

#             smoothed = subdf[['pid', 'timestamp', 'smoothed' ]].pivot(index='pid', columns='timestamp')

#             subdf['hf'] = np.abs((Hh @ smoothed.values).reshape((-1,), order='C'))

#             df_metrics_cluster.append(subdf)

#         df_metrics_cluster = pd.concat(df_metrics_cluster)
#         df_metrics.append(df_metrics_cluster)

#     df_metrics = pd.concat(df_metrics)
#     return df_metrics


# def hfilter(G, cut=2):
#     L = G.L.toarray()
#     w, V = np.linalg.eigh(L)
#     wh = np.ones(G.N)
#     wh[w<cut] = 0
#     Hh = V @ np.diag(wh) @ V.T
#     return Hh

# def matplotlib_roc(save=None, ax=None):
#     matplotlib.rcParams.update({'font.size': 20})
#     matplotlib.rcParams.update({'font.family': 'Times New Roman'})

#     if ax is None:
#         fig, ax = plt.subplots(figsize=(12,5))
#     # sc = ax.scatter(fpr, tpr, c=thr, cmap='viridis', label='Threshold')
#     sc = ax.plot(fpr, tpr, linestyle='dotted', linewidth=1, color='black')

#     # # Colorbar
#     # cbar = plt.colorbar(sc, ax=ax)
#     # cbar.set_label('Threshold', rotation=270, labelpad=15)

#     plt.xlabel('False Positive Rate')
#     plt.ylabel('True Positive Rate')
#     # plt.grid()
#     # plt.tight_layout()

#     if save is not None:
#         plt.savefig(save, transparent=True)


  if kerneltype is 'sf':
  elif kerneltype is 'wavelet':


ModuleNotFoundError: No module named 'torchvision'

## Reading results

In [None]:
joblib.load(ROOT_DIR+"/models/outputs/optuna_wse/optimization_logs_2.pkl").trials_dataframe().sort_values('value')

## Preprocessing

In [None]:
df_proc = pd.read_parquet(ROOT_DIR+"/data/interim/df_Porsgrunn_A1L2B.parq")
df_orig = pd.read_csv(insar_path+"/A1/L2B_117_0350_IW3_VV.csv") # New Prorsgrunn A1
df = df_orig.copy()

lat_min, lat_max, lon_min, lon_max = (59.10, 59.20, 9.55, 9.74) # 1 - Porsgrunn
df = df[ (df.longitude>lon_min) & (df.longitude<=lon_max) &
            (df.latitude>lat_min) & (df.latitude<=lat_max)  ]

# Selection relevant columns
date_cols = sorted([col for col in df.columns if "20" in col]) #columns named after timestamps
keep_cols = date_cols #list with variables to keep from dataframe
id_cols = ['pid', 'latitude', 'longitude', 'easting', 'northing', 'mean_velocity']
keep_cols.extend(id_cols)
df = df[keep_cols]  #replacing old df for memory efficiency
# df_originals.append(df)

# Formatting from wide to tall dataframe
# Uses a single column for timestamp and a column for displacement
# Number of rows = number of pixels * number of timestamps
df = df.melt(id_vars=id_cols, value_vars=date_cols,
                var_name='timestamp', value_name='displacement').sort_values('pid')
df.timestamp = pd.to_datetime(df.timestamp)

# RETRO: based on gap before 2016.06
df = df[df.timestamp>='2016-06-01'].copy()
df.reset_index(drop=True, inplace=True)
df.sort_values(['pid','timestamp'], inplace=True)

In [None]:
matplotlib.rcParams.update({'font.size': 20})
matplotlib.rcParams.update({'font.family': 'Times New Roman'})

id = df_proc.pid.unique()[0]
fig, ax = plt.subplots(figsize=(12,5))
ax.scatter(df.query('pid==@id').timestamp, df.query('pid==@id').displacement, s=15)
ax.plot(df_proc.query('pid==@id').timestamp, df_proc.query('pid==@id').smoothed, color='red')
ax.set_xlabel('Timestamp')
ax.set_ylabel('Ground displacement [mm]')
ax.legend(['Original','Preprocessed'])
plt.grid(which='both')
plt.tight_layout()
plt.savefig(ROOT_DIR+f"/models/outputs/figs/ReportESA/preprocessing", transparent=True)
plt.show()

In [None]:
df_proc = pd.read_parquet(ROOT_DIR+"/data/interim/df_Malmo_D1old.parq")
df_orig = pd.read_csv(insar_path+"066_0742_iw1_vv.csv") # Old Malmo
df = df_orig.copy()

lat_min, lat_max, lon_min, lon_max = (55.55, 55.58, 12.9,13.1) # Malmo
# lat_min, lat_max, lon_min, lon_max = (59.10, 59.20, 9.55, 9.74) # 1 - Porsgrunn
df = df[ (df.longitude>lon_min) & (df.longitude<=lon_max) &
            (df.latitude>lat_min) & (df.latitude<=lat_max)  ]

# Selection relevant columns
date_cols = sorted([col for col in df.columns if "20" in col]) #columns named after timestamps
keep_cols = date_cols #list with variables to keep from dataframe
id_cols = ['pid', 'latitude', 'longitude', 'easting', 'northing', 'mean_velocity']
keep_cols.extend(id_cols)
df = df[keep_cols]  #replacing old df for memory efficiency
# df_originals.append(df)

# Formatting from wide to tall dataframe
# Uses a single column for timestamp and a column for displacement
# Number of rows = number of pixels * number of timestamps
df = df.melt(id_vars=id_cols, value_vars=date_cols,
                var_name='timestamp', value_name='displacement').sort_values('pid')
df.timestamp = pd.to_datetime(df.timestamp)

# RETRO: based on gap before 2016.06
df = df[df.timestamp>='2016-06-01'].copy()
df.reset_index(drop=True, inplace=True)
df.sort_values(['pid','timestamp'], inplace=True)

In [None]:
matplotlib.rcParams.update({'font.size': 20})
matplotlib.rcParams.update({'font.family': 'Times New Roman'})

id = df_proc.pid.unique()[0]
fig, ax = plt.subplots(figsize=(12,5))
ax.scatter(df.query('pid==@id').timestamp, df.query('pid==@id').displacement, s=15)
ax.plot(df_proc.query('pid==@id').timestamp, df_proc.query('pid==@id').smoothed, color='red')
ax.set_xlabel('Timestamp')
ax.set_ylabel('Ground displacement [mm]')
ax.legend(['Original','Preprocessed'])
plt.grid(which='both')
plt.tight_layout()
plt.savefig(ROOT_DIR+f"/models/outputs/figs/ReportESA/preprocessing_malmo", transparent=True)
plt.show()

### Plotting graph

In [None]:
df_test

In [None]:
np.random.seed(1)
df_test = df.drop_duplicates('pid')
# df_test = df_test.query(('latitude>58.147 and latitude<58.149 and longitude>8.02 and longitude<8.024'))
# df_test = df_test[df_test.pid.isin(np.random.choice(df_test.pid.unique(), size=200))]


Graph = mma.NNGraph(df_test, radius=15, plotting_params= {'edge_color':'darkgray', 'edge_width':1.5,'vertex_color':'black', 'vertex_size':5})
Graph.coords = df_test[['longitude','latitude']].values
fig, ax = plt.subplots(figsize=(16,9))
# ax.scatter(df_test.longitude, df_test.latitude)

ax.xaxis.set_visible(False)
ax.yaxis.set_visible(False)
Graph.plot(ax=ax, plot_name='')
cx.add_basemap(ax, crs='epsg:4326', source=cx.providers.OpenStreetMap.Mapnik)
# plt.savefig(ROOT_DIR+"/models/outputs/figs/ReportESA/graph.png", transparent=True)

In [None]:
df_test

In [None]:
colors.value_counts()

In [None]:
np.random.seed(1)
df_test = df.drop_duplicates('pid')

fig, ax = plt.subplots(figsize=(16,9))
# colors = df_test.mean_velocity.astype('category').cat.codes
ax.scatter(df_test.longitude, df_test.latitude, s=0.1, cmap='Greys')

ax.xaxis.set_visible(False)
ax.yaxis.set_visible(False)
# Graph.plot(ax=ax, plot_name='')
cx.add_basemap(ax, crs='epsg:4326', source=cx.providers.OpenStreetMap.Mapnik)
plt.savefig(ROOT_DIR+"/models/outputs/figs/ReportESA/Porsgrunn.png", transparent=True)

## GRAPH UNET

### Hyperparameter tunning

In [None]:
class Args(argparse.Namespace):
    n_epochs = [20,40,60,80]
    n_trials = 45
    learning_rate = [1e-3, 1e-2, 1e-1]
    penalty_rate = [1e-15, 1e-5, 1e-1]
    hidden_channels = [2, 3, 5]
    depth = [2, 3, 5]
    pool_ratios = [0.2, 0.5, 0.7]

    log_dir=ROOT_DIR + '/models/outputs/optuna_gunet/'

args = Args()

def roc_params(metric, label, interp=True):
    fpr = []
    tpr = []
    thr = []
    thr_list = list(np.linspace(0, metric.max(),1001))

    fp = 1
    ind = 0
    while fp > 0:
        threshold = thr_list[ind]
        ind += 1

        y = (metric>threshold)
        tn, fp, fn, tp = confusion_matrix(label, y).ravel()

        fpr.append( fp/(tn + fp) )
        tpr.append( tp/(tp + fn) )
        thr.append( threshold )

    while tp > 0:
        threshold = thr_list[ind]
        ind += 1
        y = (metric>threshold)
        tn, fp, fn, tp = confusion_matrix(label, y).ravel()

    
    fpr = fpr[::-1]
    tpr = tpr[::-1]
    thr = thr[::-1]

    if interp:
        fpr_base = np.linspace(0, 1, 101)
        tpr = list(np.interp(fpr_base, fpr, tpr))
        thr = list(np.interp(fpr_base, fpr, thr))
        fpr = list(fpr_base)

    fpr.insert(0, 0)
    tpr.insert(0, 0)
    thr.insert(0, threshold)

    return tpr, fpr, thr

def compute_auc(tpr, fpr):
    auc = 0
    for i in range(1, len(fpr)):
        auc += (fpr[i] - fpr[i - 1]) * (tpr[i] + tpr[i - 1]) / 2
    return auc

def train_model(model, n_epochs, learning_rate, penalty_rate):

    loss_function = torch.nn.MSELoss() 
    optimizer = torch.optim.Adam(model.parameters(),
                                lr = learning_rate,
                                weight_decay = penalty_rate)
    
    scaler = StandardScaler()
    auc = []

    for seed in range(10):

        print(f'seed:{seed}')
        np.random.seed(seed)

        G = mma.synth_graph(seed=seed)
        edge_index = torch.tensor(np.array(np.nonzero(G.A.toarray())), dtype=torch.long)

        data, label = mma.create_data(G, anomaly=0.1, size=25, seed=seed,
                                    noise_var=1e-5, eigs=1, signal_power=1e-6)

        data = scaler.fit_transform(data)
        label_vector = label.reshape((-1,), order='F')

        error = []
        
        for snap in range(data.shape[1]):

            # GRAPH UNET PART
            x = torch.Tensor(data[:,snap]).reshape(-1,1)

            model.reset_parameters()

            epochs = n_epochs
            outputs = []
            losses = []
            for epoch in range(epochs):
                        
                reconstructed = model(x, edge_index)     # Output of Autoencoder
                loss = loss_function(reconstructed, x)    # Calculating the loss function
                
                optimizer.zero_grad() # The gradients are set to zero,
                loss.backward() # the gradient is computed and stored.
                optimizer.step() # .step() performs parameter update

                # Storing the losses in a list for plotting
                losses.append(loss)
                outputs.append((epoch, x, reconstructed))

            error_snap = np.abs(reconstructed.detach() - x).numpy().flatten()
            error.extend(error_snap)
            
        error = np.array(error).reshape((-1,))
        tpr, fpr, thr = roc_params(error, label_vector, interp=True)
        auc.append(compute_auc(tpr,fpr))

    return np.mean(auc)        
    
    
def objective(trial):
    gc.collect()

    n_epochs = trial.suggest_categorical('n_epochs', args.n_epochs)
    learning_rate = trial.suggest_categorical('learning_rate', args.learning_rate)
    penalty_rate = trial.suggest_categorical('penalty_rate', args.penalty_rate)
    hidden_channels = trial.suggest_categorical('hidden_channels', args.hidden_channels)
    depth = trial.suggest_categorical('depth', args.depth)
    pool_ratios = trial.suggest_categorical('pool_ratios', args.pool_ratios)

    print(f"INFO: Trial number: {trial.number}")
    print(f"INFO: Learning rate: {learning_rate}")
    print(f"INFO: Penalty rate: {penalty_rate}")
    print(f"INFO: Hidden_channels: {hidden_channels}")
    print(f"INFO: Depth: {depth}")
    print(f"INFO: Pool ratios: {pool_ratios}")
    print(f"INFO: n_epochs: {n_epochs}")

    model = GraphUNet(1, hidden_channels, 1, depth, pool_ratios)

    return train_model(model, n_epochs, learning_rate, penalty_rate)


if not os.path.exists(args.log_dir):
    os.makedirs(args.log_dir,exist_ok=True)

study = optuna.create_study(sampler=TPESampler(),
                            direction='maximize',
                            pruner=optuna.pruners.MedianPruner(n_startup_trials=5, n_warmup_steps=24, interval_steps=6))

log_file = args.log_dir + 'optimization_logs.pkl'
if os.path.isfile(log_file):
    study = joblib.load(log_file)

study.optimize(objective, n_trials=args.n_trials, gc_after_trial=True)
joblib.dump(study, log_file)

In [None]:
study = joblib.load('/Users/vitorro/Repositories/dario/models/outputs/optuna_gunet/optimization_logs.pkl')
study.trials_dataframe().query('params_n_epochs==80').sort_values('value').tail()

### Testing

In [None]:
matplotlib.rcParams.update({'font.size': 20})
matplotlib.rcParams.update({'font.family': 'Times New Roman'})

learning_rate = 0.1
penalty_rate = 1e-5
n_epochs = 80

hidden_channels = 5
depth = 2
pool_ratios = 0.5

model = GraphUNet(1, hidden_channels, 1, depth, pool_ratios)

loss_function = torch.nn.MSELoss() 
optimizer = torch.optim.Adam(model.parameters(),
                            lr = learning_rate,
                            weight_decay = penalty_rate)

scaler = StandardScaler()
auc = []

test_seed = 1001
fig, ax = plt.subplots(figsize=(12,5))

for seed in range(10):

    print(f'seed:{seed}')

    seed = seed+test_seed
    np.random.seed(seed)
    torch.manual_seed(seed)

    G = mma.synth_graph(seed=seed)
    edge_index = torch.tensor(np.array(np.nonzero(G.A.toarray())), dtype=torch.long)

    data, label = mma.create_data(G, anomaly=0.1, size=25, seed=seed,
                                noise_var=1e-5, eigs=1, signal_power=1e-6)

    data = scaler.fit_transform(data)
    label_vector = label.reshape((-1,), order='F')

    error = []
    
    for snap in range(data.shape[1]):

        # GRAPH UNET PART
        x = torch.Tensor(data[:,snap]).reshape(-1,1)

        model.reset_parameters()

        epochs = n_epochs
        outputs = []
        losses = []
        for epoch in range(epochs):
                    
            reconstructed = model(x, edge_index)     # Output of Autoencoder
            loss = loss_function(reconstructed, x)    # Calculating the loss function
            
            optimizer.zero_grad() # The gradients are set to zero,
            loss.backward() # the gradient is computed and stored.
            optimizer.step() # .step() performs parameter update

            # Storing the losses in a list for plotting
            losses.append(loss)
            outputs.append((epoch, x, reconstructed))

        error_snap = np.abs(reconstructed.detach() - x).numpy().flatten()
        error.extend(error_snap)
        
    error = np.array(error).reshape((-1,))
    tpr, fpr, thr = roc_params(error, label_vector, interp=True)
    auc.append(compute_auc(tpr,fpr))
    matplotlib_roc(ax=ax)

# fig = px.scatter(x=fpr, y=tpr, color=thr,
#            labels={'x':'false positive rate', 'y':'true positive rate', 'color':'threshold'},
#            width=700, height=500)
# fig.update_traces(line={'dash':'dot'}, mode='lines+markers')
# fig.show()

# matplotlib_roc(save=ROOT_DIR+"/models/outputs/figs/ReportESA_synth_roc_gunet.png")
plt.grid()
plt.tight_layout()
plt.savefig(ROOT_DIR+"/models/outputs/figs/ReportESA/synth_roc_gunet.png", transparent=True)
print(np.mean(auc))

## GRAPH FILTER

### Hyperparameter tunning

In [None]:
class Args(argparse.Namespace):
    cut = np.arange(0,8,0.5).tolist()
    n_trials = len(cut)

    log_dir=ROOT_DIR + '/models/outputs/optuna_gfilter/'

args = Args()

def train_model_gsp(cut):
    
    scaler = StandardScaler()
    auc = []

    for seed in range(10):

        print(f'seed:{seed}')
        np.random.seed(seed)

        G = mma.synth_graph(seed=seed)
        edge_index = torch.tensor(np.array(np.nonzero(G.A.toarray())), dtype=torch.long)

        data, label = mma.create_data(G, anomaly=0.1, size=25, seed=seed,
                                    noise_var=1e-5, eigs=1, signal_power=1e-6)

        data = scaler.fit_transform(data)
        label_vector = label.reshape((-1,), order='F')

        Hh = hfilter(G, cut)

        error = []
        
        for snap in range(data.shape[1]):

            error.extend(np.abs(Hh @ data[:,snap]))
                        
        error = np.array(error).reshape((-1,))
        tpr, fpr, thr = roc_params(error, label_vector, interp=True)
        auc.append(compute_auc(tpr,fpr))

    return np.mean(auc)        
    
    
def objective(trial):
    gc.collect()

    cut = trial.suggest_categorical('cut',args.cut)

    print(f"INFO: Trial number: {trial.number}")
    print(f"INFO: cut: {cut}")

    return train_model_gsp(cut)


if not os.path.exists(args.log_dir):
    os.makedirs(args.log_dir,exist_ok=True)

study = optuna.create_study(sampler=TPESampler(),
                            direction='maximize',
                            pruner=optuna.pruners.MedianPruner(n_startup_trials=5, n_warmup_steps=24, interval_steps=6))

log_file = args.log_dir + 'optimization_logs.pkl'
if os.path.isfile(log_file):
    study = joblib.load(log_file)

study.optimize(objective, n_trials=args.n_trials, gc_after_trial=True)
joblib.dump(study, log_file)

In [None]:
study = joblib.load('/Users/vitorro/Repositories/dario/models/outputs/optuna_gfilter/optimization_logs.pkl')
study.trials_dataframe()

### Testing

In [None]:
cut = 1.5

scaler = StandardScaler()
auc = []

test_seed = 1001

fig, ax = plt.subplots(figsize=(12,5))

for seed in range(10):

    print(f'seed:{seed}')

    seed = seed+test_seed
    np.random.seed(seed)

    G = mma.synth_graph(seed=seed)
    edge_index = torch.tensor(np.array(np.nonzero(G.A.toarray())), dtype=torch.long)

    data, label = mma.create_data(G, anomaly=0.1, size=25, seed=seed,
                                noise_var=1e-5, eigs=1, signal_power=1e-6)

    data = scaler.fit_transform(data)
    label_vector = label.reshape((-1,), order='F')

    Hh = hfilter(G, cut)

    error = []
    
    for snap in range(data.shape[1]):

        filtered = np.abs(Hh @ data[:,snap])
        error.extend(filtered)
                    
    error = np.array(error).reshape((-1,))
    tpr, fpr, thr = roc_params(error, label_vector, interp=True)
    auc.append(compute_auc(tpr,fpr))
    matplotlib_roc(ax=ax)

# fig = px.scatter(x=fpr, y=tpr, color=thr,
#            labels={'x':'false positive rate', 'y':'true positive rate', 'color':'threshold'},
#            width=700, height=500)
# fig.update_traces(line={'dash':'dot'}, mode='lines+markers')
# fig.show()
# matplotlib_roc(save=ROOT_DIR+"/models/outputs/figs/ReportESA_synth_roc_gfilter.png")
plt.grid()
plt.tight_layout()
plt.savefig(ROOT_DIR+"/models/outputs/figs/ReportESA/synth_roc_gfilter.png", transparent=True)
print(np.mean(auc))


### Filtering real data

In [None]:
dataset = 'df_Trondheim_A1L2B'
df_orig = pd.read_parquet(ROOT_DIR+f"/data/interim/{dataset}.parq")

th1 = 70 # round 1 threshold
th2 = 40 # round 2 threshold
th_hits = 10 # number of timestamps hitting threshold to assign anomaly

cut = 2 # frequency cut
radius = 20 # radius for constructing NNGraph

df = df_orig.copy()

# round 1
df_metrics = compute_metric(df, cut, radius)
df_detection, selected = detection(df_metrics, column_name='hf', threshold_min=th1, selector='pid',
                                   detection_param='detection_sum', detection_param_threshold=th_hits)

In [None]:
df_relevant = mma.relevant_neighborhood(df_metrics, column_name='hf', lower=10, upper=14, zoom=11, range_meters=15,
                          only_relevant=False, filter_dates=False, by_max=True, return_df=True) #lower 60

In [None]:
import matplotlib.dates as mdates


def plot_pixel_data(df, y='smoothed', animation_frame=None, range_y=None, figsize=(12, 8)):
    matplotlib.rcParams.update({'font.size': 20})
    matplotlib.rcParams.update({'font.family': 'Times New Roman'})

    if animation_frame:
        # Handle the case with animation_frame
        fig, ax = plt.subplots(figsize=figsize)
        unique_pids = df['pid'].unique()
        cmap = plt.get_cmap('tab10')
        
        for pid in unique_pids:
            subset = df[df['pid'] == pid]
            ax.plot(subset['timestamp'], subset[y], label=f'PID {pid}', marker='o', color=cmap(pid))
        
        ax.set_xlabel('Timestamp')
        ax.set_ylabel(y)
        ax.legend(title='PID', loc='upper right')
        ax.grid(True)
        
        if range_y:
            ax.set_ylim(range_y)
        
        plt.title('Pixel Data Over Time')
        
    else:
        # Handle the case without animation_frame
        id_list = df['pid'].unique()
        pivot_df = df.pivot(index='timestamp', columns='pid', values=y)
        
        fig, ax = plt.subplots(figsize=figsize)
        pivot_df.plot(ax=ax, colormap='tab10')
        
        ax.set_xlabel('Timestamp')
        ax.set_ylabel('Ground displacement [mm]')
        ax.legend(title='ID')
        ax.grid(True)

        ax.legend().remove()

        # Format x-axis labels to show only the year
        date_format = mdates.DateFormatter('%Y')
        ax.xaxis.set_major_formatter(date_format)
        
        # Set major locator to show only one tick per year
        ax.xaxis.set_major_locator(mdates.YearLocator(base=1))
        
        # Set minor locator to show ticks for every month (adjust interval as needed)
        # ax.xaxis.set_minor_locator(mdates.MonthLocator(interval=1))
        
        if range_y:
            ax.set_ylim(range_y)
    plt.tight_layout()
    # plt.show()

In [None]:
matplotlib.rcParams.update({'font.size': 20})
matplotlib.rcParams.update({'font.family': 'Times New Roman'})

# df_plot = df_relevant.query('group==248')

# legend = []
# fig, ax = plt.subplots(figsize=(12,9))
# for pid in df_plot.pid.unique():
#     ax.plot(df_plot.query('pid==@pid').timestamp, df_plot.query('pid==@pid').smoothed)
#     legend.append(pid)

# plt.grid(which='both')
# ax.set_xlabel('Timestamp')
# ax.set_ylabel('Ground displacement [mm]')
# # ax.legend(legend)
# # plt.savefig(ROOT_DIR+f"/models/outputs/figs/ReportESA/fault_example.png", transparent=True)
# plt.show() 

plot_pixel_data(df_relevant.query('group==248'), y='smoothed', animation_frame=None, range_y=None, figsize=(5, 5))
# plt.savefig(ROOT_DIR+"/models/outputs/figs/ReportESA/IGARSS_fault_example.png", transparent=True)


## WSE

### Hyperparameter tunning

In [None]:
np.arange(2, 5.5, 0.5).tolist()

In [None]:
class Args(argparse.Namespace):
    cut = np.arange(2, 5.5, 0.5).tolist()
    decay = [1, 1.5, 2, 2.5, 3, 3.5]
    n_trials = 20

    log_dir=ROOT_DIR + '/models/outputs/optuna_wse/'

args = Args()

def train_model_gsp(cut, decay):
    
    scaler = StandardScaler()
    auc = []

    for seed in range(10):

        print(f'seed:{seed}')
        np.random.seed(seed)

        G = mma.synth_graph(seed=seed)
        nodes = pd.DataFrame({'easting':G.coords[:,0], 'northing':G.coords[:,1]})


        data, label = mma.create_data(G, anomaly=0.1, size=25, seed=seed,
                                    noise_var=1e-5, eigs=1, signal_power=1e-6)

        data = scaler.fit_transform(data)
        label_vector = label.reshape((-1,), order='F')

        error = []
        
        for snap in range(data.shape[1]):

            wse = mma.wse(nodes, x=data[:,snap].reshape((-1,1)), decay=decay, cut=cut)

            error.extend(wse)
                        
        error = np.array(error).reshape((-1,))
        tpr, fpr, thr = roc_params(error, label_vector, interp=True)
        auc.append(compute_auc(tpr,fpr))

    return np.mean(auc)
    
    
def objective(trial):
    gc.collect()

    cut = trial.suggest_categorical('cut', args.cut)
    decay = trial.suggest_categorical('decay', args.decay)

    print(f"INFO: Trial number: {trial.number}")
    print(f"INFO: cut: {cut}")
    print(f"INFO: decay: {decay}")

    return train_model_gsp(cut, decay)


if not os.path.exists(args.log_dir):
    os.makedirs(args.log_dir,exist_ok=True)

study = optuna.create_study(sampler=TPESampler(),
                            direction='maximize',
                            pruner=optuna.pruners.MedianPruner(n_startup_trials=5, n_warmup_steps=24, interval_steps=6))

log_file = args.log_dir + 'optimization_logs_2.pkl'
if os.path.isfile(log_file):
    study = joblib.load(log_file)

study.optimize(objective, n_trials=args.n_trials, gc_after_trial=True)
joblib.dump(study, log_file)

### Testing

In [None]:
cut = 4.0
decay = 1

scaler = StandardScaler()
auc = []

test_seed = 1001
fig, ax = plt.subplots(figsize=(12,5))

for seed in range(10):

    print(f'seed:{seed}')

    seed = seed+test_seed
    np.random.seed(seed)

    G = mma.synth_graph(seed=seed)
    nodes = pd.DataFrame({'easting':G.coords[:,0], 'northing':G.coords[:,1]})

    data, label = mma.create_data(G, anomaly=0.1, size=25, seed=seed,
                                noise_var=1e-5, eigs=1, signal_power=1e-6)

    data = scaler.fit_transform(data)
    label_vector = label.reshape((-1,), order='F')

    error = []
    
    for snap in range(data.shape[1]):

        wse = mma.wse(nodes, x=data[:,snap].reshape((-1,1)), decay=decay, cut=cut)
        error.extend(wse)
                    
    error = np.array(error).reshape((-1,))
    tpr, fpr, thr = roc_params(error, label_vector, interp=True)
    auc.append(compute_auc(tpr,fpr))
    matplotlib_roc(ax=ax)

# fig = px.scatter(x=fpr, y=tpr, color=thr,
#            labels={'x':'false positive rate', 'y':'true positive rate', 'color':'threshold'},
#            width=700, height=500)
# fig.update_traces(line={'dash':'dot'}, mode='lines+markers')
# fig.show()
# matplotlib_roc(save=ROOT_DIR+"/models/outputs/figs/ReportESA_synth_roc_wse.png")
plt.grid()
plt.tight_layout()
# plt.savefig(ROOT_DIR+"/models/outputs/figs/ReportESA/synth_roc_wse.png", transparent=True)
print(np.mean(auc))

In [None]:
error

In [None]:
nodes['data'] = data[:,snap]
nodes['label'] = label[:,snap]
nodes['wse'] = wse
nodes['gunet'] = error_snap
nodes['hf'] = filtered
# nodes = nodes.reset_index(names='pid')


In [None]:
mma.visualise_mismatch_map(nodes, color='label', size='wse', zoom=17.8, recenter=[4000000,0000])

In [None]:
fig, ax = plt.subplots(figsize=(12,5))
colors = nodes['label'].astype('category').cat.codes
ax.scatter(nodes['easting'], nodes['northing'], c=colors, s=5000*nodes['hf'], cmap='viridis')
plt.box(False)
ax.xaxis.set_visible(False)
ax.yaxis.set_visible(False)
plt.tight_layout()
plt.savefig(ROOT_DIR+"/models/outputs/figs/ReportESA/synth_score_hf.png", transparent=True)
plt.show()


In [None]:
mma = reload(mma)
mma.plot_graph(G, name='', figsize=(12,5))
plt.savefig(ROOT_DIR+"/models/outputs/figs/ReportESA/synth_graph.png", transparent=True)

In [None]:
4950/198

In [None]:
# Create a line plot connecting the points
plt.plot(fpr, tpr, marker='o', linestyle='dotted')

plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')

plt.show()

In [None]:
def ramp_to_plateou(pos, slope, start=-np.inf, end= np.inf):
    mask =  (pos[:,0] >= start) * (pos[:,0]< end)
    ramp = slope*(pos[:,0] - start)
    ramp = ramp*mask
    ramp[pos[:,0]>=end] = ramp.max()
    return ramp

np.random.seed(1011)

size = 1
noise_var = 0.1
max_slope = 1
pos = G.coords

w, V = np.linalg.eigh(G.L.toarray())
w[1:] = 0 # Frequency filter

displacement = np.random.randn(G.N, size)
displacement = V @ np.diag(w) @ V.T @ displacement # filtering
# normalizing for desired power
displacement = np.sqrt(1*G.N)*displacement/(np.linalg.norm(displacement,axis=0))

noise = np.sqrt(noise_var)*np.random.randn(G.N,size)

# terrain corresponds to a ramp to a plateou in the horizontal direction
slope = max_slope*np.random.rand() # makes small difference given proportional anomaly and scaler
start = pos[:,0].max()*np.random.rand()*0.5 # Slope always start in the first half
# end = start + (pos[:,0].max()-start)*np.random.rand()

min_slope_dist = 100
end = start + min_slope_dist + (pos[:,0].max()-start-min_slope_dist)*np.random.rand() #At least 50m of slope

terrain = ramp_to_plateou(pos, slope=slope, start=start, end=end).reshape((-1,1))
terrain = np.tile(terrain, (1,size)) # Matching number of samples (size)

ptp = terrain.ptp()

signal = displacement + noise + terrain

In [None]:
matplotlib.rcParams.update({'font.size': 20})
matplotlib.rcParams.update({'font.family': 'Times New Roman'})

fig, ax = plt.subplots(figsize=(12,5))
G.plot_signal(signal, ax=ax, plot_name='')
plt.xlabel('West-east')
plt.ylabel('North-south')

cbar = fig.get_axes()[1]
cbar.set_xlabel('test')

plt.tight_layout()

plt.show()

In [None]:
matplotlib.rcParams.update({'font.size': 20})
matplotlib.rcParams.update({'font.family': 'Times New Roman'})


fig, ax = plt.subplots(nrows=2, figsize=(16, 8), gridspec_kw={'height_ratios':[1.5,4]})

# ax[0].plot(G.coords[:,0], data[:,snap], linewidth=1)
scat = ax[0].scatter(G.coords[:,0], data[:,snap], c=label[:,snap], s=10, cmap= plt.get_cmap('Paired', 2) )
# scat = ax[0].scatter(np.arange(198), data[:,snap], c=label[:,snap], s=5, cmap= plt.get_cmap('viridis', 2) )

cbar1 = plt.colorbar(scat, ax=ax[0], ticks=[0,1], aspect=7)
cbar1.set_label('Fault label')

# ax[0].set_xlabel('Node index')
ax[0].set_ylabel('Signal [mm]')
ax[0].set_xticklabels([])
# ax[0].xaxis.set_label_position('top')
# ax[0].xaxis.set_ticks_position('top')
# ax[0].tick_params(axis='x', which='both', top=True)

fault_pos = G.coords[np.where(label[:,snap])[0],:]

ax[1].scatter(fault_pos[:,0], fault_pos[:,1], marker='o',linewidth=20, s=1, c='black', linestyle='-')
scatter = ax[1].scatter(G.coords[:, 0], G.coords[:, 1], c=signal, cmap='viridis', marker='o', linewidth=5)

# Add a colorbar
cbar = plt.colorbar(scatter, ax=ax[1])
cbar.set_label('Healthy signal [mm]')

# Set labels for the axes
ax[1].set_xlabel('Position West-East')
ax[1].set_ylabel('Position South-North')

plt.tight_layout()
plt.savefig(ROOT_DIR+'/models/outputs/figs/ReportESA/synth_signal.png', transparent=True)
plt.show()

In [None]:
fault_pos

In [None]:
fault_pos

In [None]:
data, label = mma.create_data(G, anomaly=0.1, size=25, seed=seed,
                                noise_var=1e-5, eigs=1, signal_power=1e-6)
plt.plot(data[:,snap])

## Old Stuff

### GCN

In [None]:
dataset = KarateClub()
print("Dataset:", dataset)
print("# Graphs:", len(dataset))
print("# Features:", dataset.num_features)
print("# Classes:", dataset.num_classes)

data = dataset[0]
G = to_networkx(data, to_undirected=True)
nx.draw(G, node_color=data.y, node_size=150)

class GCN(torch.nn.Module):
  def __init__(self):
    super(GCN, self).__init__()
    torch.manual_seed(42)
    self.conv1 = GCNConv(dataset.num_features, 4)
    self.conv2 = GCNConv(4, 4)
    self.conv3 = GCNConv(4, 2)
    self.classifier = Linear(2, dataset.num_classes)
  def forward(self, x, edge_index):
    h = self.conv1(x, edge_index)
    h = h.tanh()
    h = self.conv2(h, edge_index)
    h = h.tanh()
    h = self.conv3(h, edge_index)
    h = h.tanh()
    out = self.classifier(h)
    return out, h
model = GCN()

criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
def train(data):
  optimizer.zero_grad() # resets the gradient values stored in the parameters
  out, h = model(data.x, data.edge_index) # computes forward
  loss = criterion(out[data.train_mask], data.y[data.train_mask]) # computes the loss given forward
  loss.backward() # computes the gradient given loss and stores in the parameters
  optimizer.step() # executes an optimization step using the gradient given backward
  return loss, h

epochs = range(1, 301) # training iterations
losses = []
embeddings = []
for epoch in epochs: # training process
  loss, h = train(data)
  losses.append(loss)
  embeddings.append(h)
  if (not epoch%50) or (epoch==1):
    print(f"Epoch: {epoch}\tLoss: {loss:.4f}")

out, h = model(data.x, data.edge_index)
class_predictions = torch.argmax(out, dim=1)

------------------------------------

### AE

In [None]:
# Creating a PyTorch class
# 28*28 ==> 9 ==> 28*28
class AE(torch.nn.Module):
	def __init__(self):
		super().__init__()
		
		# Building an linear encoder with Linear
		# layer followed by Relu activation function
		# 784 ==> 9
		self.encoder = torch.nn.Sequential(
			torch.nn.Linear(28 * 28, 128),
			torch.nn.ReLU(),
			torch.nn.Linear(128, 64),
			torch.nn.ReLU(),
			torch.nn.Linear(64, 36),
			torch.nn.ReLU(),
			torch.nn.Linear(36, 18),
			torch.nn.ReLU(),
			torch.nn.Linear(18, 9)
		)
		
		# Building an linear decoder with Linear
		# layer followed by Relu activation function
		# The Sigmoid activation function
		# outputs the value between 0 and 1
		# 9 ==> 784
		self.decoder = torch.nn.Sequential(
			torch.nn.Linear(9, 18),
			torch.nn.ReLU(),
			torch.nn.Linear(18, 36),
			torch.nn.ReLU(),
			torch.nn.Linear(36, 64),
			torch.nn.ReLU(),
			torch.nn.Linear(64, 128),
			torch.nn.ReLU(),
			torch.nn.Linear(128, 28 * 28),
			torch.nn.Sigmoid()
		)

	def forward(self, x):
		encoded = self.encoder(x)
		decoded = self.decoder(encoded)
		return decoded


In [None]:
# Transforms images to a PyTorch Tensor
tensor_transform = transforms.ToTensor()

# Download the MNIST Dataset
dataset = datasets.MNIST(root = ROOT_DIR + "/data/raw/",
						train = True,
						download = False,
						transform = tensor_transform)

# DataLoader is used to load the dataset
# for training
loader = torch.utils.data.DataLoader(dataset = dataset,
									batch_size = 32,
									shuffle = True)


In [None]:
# Model Initialization
model = AE()

# Validation using MSE Loss function
loss_function = torch.nn.MSELoss()

# Using an Adam Optimizer with lr = 0.1
optimizer = torch.optim.Adam(model.parameters(),
							lr = 1e-4,
							weight_decay = 1e-8)

epochs = 50
outputs = []
losses = []
for epoch in range(epochs):
	for (batch, _) in loader:
			
		# Reshaping the batch to (-1, 784)
		batch = batch.reshape(-1, 28*28)
		
		# Output of Autoencoder
		reconstructed = model(batch)
		
		# Calculating the loss function
		loss = loss_function(reconstructed, batch)
		
		# The gradients are set to zero,
		# the gradient is computed and stored.
		# .step() performs parameter update
		optimizer.zero_grad()
		loss.backward()
		optimizer.step()
		
		# Storing the losses in a list for plotting
		losses.append(loss)
	outputs.append((epoch, batch, reconstructed))

plt.plot([l.item() for l in losses])
plt.show()

ep = 49
for index in range(1):
    fig, ax = plt.subplots(nrows=1, ncols=2, figsize=(4,8))
    ax[0].imshow(outputs[ep][1][index,:].reshape(28,28))
    ax[1].imshow(outputs[ep][2].detach().numpy()[index,:].reshape(28,28))
    plt.show()



#### Training with a single image

In [None]:
# Model Initialization
model = AE()

# Validation using MSE Loss function
loss_function = torch.nn.MSELoss()

# Using an Adam Optimizer with lr = 0.1
optimizer = torch.optim.Adam(model.parameters(),
							lr = 5e-3,
							weight_decay = 1e-8)

epochs = 45
outputs = []
losses = []


for epoch in range(epochs):
			
# Reshaping the batch to (-1, 784)
    image = dataset.__getitem__(1)[0].reshape(-1,28*28)

    # Output of Autoencoder
    reconstructed = model(image)

    # Calculating the loss function
    loss = loss_function(reconstructed, image)

    # The gradients are set to zero,
    # the gradient is computed and stored.
    # .step() performs parameter update
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    # Storing the losses in a list for plotting
    losses.append(loss)
    outputs.append((epoch, image, reconstructed))
    
    # plt.imshow(reconstructed.detach().numpy().reshape(28,28))
    # plt.show()


def generate_matrix(epoch):
    out = outputs[epoch][2].detach().numpy().reshape(28,28)
    inp = outputs[epoch][1].numpy().reshape(28,28)
    return np.abs(out-inp)

fig, ax = plt.subplots()
def init():
    ax.clear()
    plt.close()

def update(frame):
    matrix = generate_matrix(frame)  # Generate the matrix for the current frame
    ax.imshow(matrix, cmap='gray', vmin=0, vmax=1)  # Update the plot with the new matrix
    # Hide all ticks and tick labels
    ax.set_xticks([])
    ax.set_yticks([])
    ax.set_xticklabels([])
    ax.set_yticklabels([])
    plt.close()

fps = 2
ani = FuncAnimation(fig, update, frames=range(epochs), interval=1000/fps, repeat=True, blit=False, init_func=init)
# plt.show()

plt.plot([l.item() for l in losses])
plt.show()

HTML(ani.to_jshtml())


### Graph Unet

#### One timestamp

In [None]:
mma = reload(mma)
Nsamples = 10

G = mma.synth_graph()
edge_index = torch.tensor(np.array(np.nonzero(G.A.toarray())), dtype=torch.long)
scaler = StandardScaler()

In [None]:
anomalous_nodes_defect = {10:{'start':0, 'end':5, 'slope':3, 'onset':5},
                          13:{'start':0,'end':-5, 'slope':3, 'onset':5},
                          187:{'start':0,'end':10, 'slope':5, 'onset':5}}

anomalous_nodes_ramping = {node:{
                                'start':0,
                                'end':10,
                                'slope':int(Nsamples//1.5),
                                'onset':int(Nsamples//5)
                                }
                                for node in range(50)}

anomalous_nodes_bulge = {node:{
                                'start':0,
                                'end':np.max((0, 10 - np.abs(node-50)/5)),
                                'slope':int(Nsamples//1.5),
                                'onset':int(Nsamples//5)
                                }
                                for node in range(100)}

In [None]:
def hfilter(G, cut=2):
    L = G.L.toarray()
    w, V = np.linalg.eigh(L)
    wh = np.ones(G.N)
    wh[w<cut] = 0
    Hh = V @ np.diag(wh) @ V.T
    return Hh

In [None]:
model = GraphUNet(1, 3, 1, 3, 0.5)

loss_function = torch.nn.MSELoss() 
optimizer = torch.optim.Adam(model.parameters(),
                            lr = 1e-1,
                            weight_decay = 1e-18)

# scaler = MinMaxScaler()
scaler = StandardScaler()

auc = []
auc_hf = []
for seed in range(20):
    np.random.seed(seed)

    G = mma.synth_graph()
    edge_index = torch.tensor(np.array(np.nonzero(G.A.toarray())), dtype=torch.long)

    data, label = mma.create_data(G, anomaly=0.1, size=Nsamples, seed=seed,
                                noise_var=1e-5, eigs=1, signal_power=1e-6)
    # data = scaler.fit_transform(data.reshape(-1,1)).reshape(data.shape)
    data = scaler.fit_transform(data)
    label_vector = label.reshape((-1,), order='F')

    Hh = hfilter(G, cut=2)


    error = []
    error_hf = []
    df_result = []
    for snap in range(data.shape[1]):

        # GRAPH UNET PART
        x = torch.Tensor(data[:,snap]).reshape(-1,1)

        model.reset_parameters()

        epochs = 75
        outputs = []
        losses = []
        for epoch in range(epochs):
                    
            reconstructed = model(x, edge_index)     # Output of Autoencoder
            loss = loss_function(reconstructed, x)    # Calculating the loss function
            
            optimizer.zero_grad() # The gradients are set to zero,
            loss.backward() # the gradient is computed and stored.
            optimizer.step() # .step() performs parameter update

            # Storing the losses in a list for plotting
            losses.append(loss)
            outputs.append((epoch, x, reconstructed))

        error_snap = np.abs(reconstructed.detach() - x).numpy().flatten()
        error.extend(error_snap)

        # GSP PART
        error_hf.extend(np.abs(Hh @ data[:,snap]))
        
    error = np.array(error).reshape((-1,))
    tpr, fpr, thr = roc_params(error, label_vector, interp=True)
    auc.append(compute_auc(tpr,fpr))

    error_hf = np.array(error_hf).reshape((-1,))
    tpr_hf, fpr_hf, thr_hf = roc_params(error_hf, label_vector, interp=True)
    auc_hf.append(compute_auc(tpr,fpr))

# fig = px.scatter(x=fpr, y=tpr, color=thr,
#            labels={'x':'false positive rate', 'y':'true positive rate', 'color':'threshold'},
#            width=700, height=500)
# fig.update_traces(line={'dash':'dot'}, mode='lines+markers')
# fig.show()


# mma.relevant_neighborhood(df_result, column_name='error', color='data', lower=0, recenter=[4341000,4479550],
#                                     zoom=16.5, figsize=(1000,600), colormap='viridis', transparent=False)

# plt.plot([l.item() for l in losses])
# plt.title(f'{losses[-1]}')
# plt.show()

# HTML(graph_anim(outputs,epochs, G).to_jshtml())

# model.state_dict()


In [None]:
auc

In [None]:
fig = px.scatter(x=fpr, y=[tpr, tpr_hf], color=thr,
           labels={'x':'false positive rate', 'y':'true positive rate', 'color':'threshold'},
           width=700, height=500)
fig.update_traces(line={'dash':'dot'}, mode='lines+markers')
fig.show()

In [None]:
px.box(auc, width=500, height=300).show()

#### All timestamps

In [None]:
Nsamples = 10

anomalous_nodes_defect = {10:{'start':0, 'end':5, 'slope':3, 'onset':5},
                          13:{'start':0,'end':-5, 'slope':3, 'onset':5},
                          187:{'start':0,'end':10, 'slope':5, 'onset':5}}

anomalous_nodes_ramping = {node:{
                                'start':0,
                                'end':10,
                                'slope':int(Nsamples//1.5),
                                'onset':int(Nsamples//5)
                                }
                                for node in range(50)}

anomalous_nodes_bulge = {node:{
                                'start':0,
                                'end':np.max((0, 10 - np.abs(node-50)/5)),
                                'slope':int(Nsamples//1.5),
                                'onset':int(Nsamples//5)
                                }
                                for node in range(100)}

G = mma.synth_graph()
edge_index = torch.tensor(np.array(np.nonzero(G.A.toarray())), dtype=torch.long)
X = mma.synth_data(anomalous_nodes_defect, G.coords, 10, plot=False)
X.sort_values(['timestamp', 'pid'], inplace=True)

In [None]:
model = GraphUNet(X.timestamp.nunique(), X.timestamp.nunique()*2, X.timestamp.nunique(), 3, 0.5)

loss_function = torch.nn.MSELoss() 
optimizer = torch.optim.Adam(model.parameters(),
                            lr = 1e-2,
                            weight_decay = 1e-8)

scaler = StandardScaler()
X['data_norm'] = scaler.fit_transform(X[['data']])

x = X[['pid','timestamp','data_norm']].pivot(index='pid', columns='timestamp').reset_index(drop=True).values
x = torch.Tensor(x)

model.reset_parameters()

epochs = 50
outputs = []
losses = []

for epoch in range(epochs):
            
    reconstructed = model(x, edge_index)     # Output of Autoencoder
    loss = loss_function(reconstructed, x)    # Calculating the loss function
    
    optimizer.zero_grad() # The gradients are set to zero,
    loss.backward() # the gradient is computed and stored.
    optimizer.step() # .step() performs parameter update

    # Storing the losses in a list for plotting
    losses.append(loss)
    outputs.append((epoch, x, reconstructed))

out = reconstructed.detach().numpy()
inp = x.detach().numpy()

df_result = X.drop_duplicates('pid').copy()
df_result['mse'] = mean_squared_error(out.T, inp.T, multioutput='raw_values', squared=True)

mma.relevant_neighborhood(df_result, column_name='mse', color='data', lower=0, recenter=[4341000,4479550],
                                    zoom=16.5, figsize=(1000,600), colormap='viridis', transparent=False)


In [None]:
X

### GAE

In [None]:
# Creating a PyTorch class
# 28*28 ==> 9 ==> 28*28
class AE(torch.nn.Module):
	def __init__(self):
		super().__init__()
		
		# Building an linear encoder with Linear
		# layer followed by Relu activation function
		# 784 ==> 9
		self.encoder = torch.nn.Sequential(
			torch.nn.Linear(28 * 28, 128),
			torch.nn.ReLU(),
			torch.nn.Linear(128, 64),
			torch.nn.ReLU(),
			torch.nn.Linear(64, 36),
			torch.nn.ReLU(),
			torch.nn.Linear(36, 18),
			torch.nn.ReLU(),
			torch.nn.Linear(18, 9)
		)
		
		# Building an linear decoder with Linear
		# layer followed by Relu activation function
		# The Sigmoid activation function
		# outputs the value between 0 and 1
		# 9 ==> 784
		self.decoder = torch.nn.Sequential(
			torch.nn.Linear(9, 18),
			torch.nn.ReLU(),
			torch.nn.Linear(18, 36),
			torch.nn.ReLU(),
			torch.nn.Linear(36, 64),
			torch.nn.ReLU(),
			torch.nn.Linear(64, 128),
			torch.nn.ReLU(),
			torch.nn.Linear(128, 28 * 28),
			torch.nn.Sigmoid()
		)

	def forward(self, x):
		encoded = self.encoder(x)
		decoded = self.decoder(encoded)
		return decoded


In [None]:
# Creating a PyTorch class
# 28*28 ==> 9 ==> 28*28
class AE(torch.nn.Module):
	def __init__(self):
		super().__init__()
		
		# Building an linear encoder with Linear
		# layer followed by Relu activation function
		# 784 ==> 9
		self.encoder = Sequential(
			GCNConv(128, 64),
			torch.nn.ReLU(),
			GCNConv(64, 36),
			torch.nn.ReLU(),
			GCNConv(36, 18),
			torch.nn.ReLU(),
			GCNConv(18, 9)
		)
		
		# Building an linear decoder with Linear
		# layer followed by Relu activation function
		# The Sigmoid activation function
		# outputs the value between 0 and 1
		# 9 ==> 784
		self.decoder = Sequential(
			GCNConv(9, 18),
			torch.nn.ReLU(),
			GCNConv(18, 36),
			torch.nn.ReLU(),
			GCNConv(36, 64),
			torch.nn.ReLU(),
			GCNConv(128, 28 * 28),
			torch.nn.Sigmoid()
		)

	def forward(self, x):
		encoded = self.encoder(x)
		decoded = self.decoder(encoded)
		return decoded


### GSP

In [None]:
def hfilter(G, cut=2):
    L = G.L.toarray()
    w, V = np.linalg.eigh(L)
    wh = np.ones(G.N)
    wh[w<cut] = 0
    Hh = V @ np.diag(wh) @ V.T
    return Hh

In [None]:
L = G.L.toarray()
w, V = np.linalg.eigh(L)
wh = np.ones(G.N)
cut = 2
wh[w<cut] = 0
Hh = V @ np.diag(wh) @ V.T

In [None]:
mma = reload(mma)

auc = []
for seed in range(10):
    data, label = mma.create_data(G, anomaly=0.1, size=Nsamples, seed=seed,
                                noise_var=1e-5, eigs=1, signal_power=1e-6)
    # data = scaler.fit_transform(data.reshape(-1,1)).reshape(data.shape)
    data = scaler.fit_transform(data)
    label_vector = label.reshape((-1,), order='F')

    error = []
    df_result = []
    for snap in range(data.shape[1]):

        x = data[:,snap]
        y = Hh @ x

        error.extend(np.abs(y))
        
    error = np.array(error).reshape((-1,))
    tpr, fpr, thr = roc_params(error, label_vector, interp=True)
    auc.append(compute_auc(tpr,fpr))

In [None]:
fig = px.scatter(x=fpr, y=tpr, color=thr,
           labels={'x':'false positive rate', 'y':'true positive rate', 'color':'threshold'},
           width=700, height=500)
fig.update_traces(line={'dash':'dot'}, mode='lines+markers')
fig.show()

In [None]:
plt.plot(Hh @ data[:,0])
plt.plot(data[:,0])
plt.show()

In [None]:
dataset = 'df_Trondheim_D1L2B'
df_orig_full = pd.read_parquet(ROOT_DIR+f"/data/interim/{dataset}.parq")

In [None]:
mma = reload(mma)

radius = 15
decay = 2
cut = 2

df_test = df_orig_full
df_metrics = compute_metric(df_orig_full)

In [None]:
fig, ax = plt.subplots(nrows=2, figsize=(16,12))
counts, bins, patches = ax[0].hist(df_metrics.hf, bins=30, log=True, rwidth=0.9, density=True)
# ax[0].set_xlabel('Anomaly score')
ax[0].set_xlim([0,df_metrics.hf.max()])
ax[0].set_title('Density')
ax[0].xaxis.set_tick_params(labelsize=12, rotation=-45)
ax[0].set_xticks(bins)
# ax.yaxis.set_major_formatter(StrMethodf_metricsormatter('{x:.3f}'))
# plt.show()

# fig, ax = plt.subplots(figsize=(16,8))
counts, bins, patches = ax[1].hist(df_metrics.hf, bins=30, log=False, rwidth=0.9, cumulative=True, density=True)
ax[1].set_xlabel('Anomaly score')
ax[1].set_xlim([0,df_metrics.hf.max()])
ax[1].set_ylim([0.85,1])
ax[1].set_title('Cumulative')
ax[1].yaxis.set_major_formatter(StrMethodFormatter('{x:.3f}'))
ax[1].xaxis.set_tick_params(labelsize=12, rotation=-90)
ax[1].set_xticks(bins)

plt.tight_layout()
# plt.savefig('')
plt.show()

In [None]:
selector = 'group'
th = 80
th_up = np.inf

df_relevant, selected = detection(df_metrics, column_name='hf',
                                  threshold_min=th, threshold_max=th_up, detection_param_threshold=20)

df_var = df_relevant.drop_duplicates('pid').groupby('group').apply(skew).reset_index(name='skewness')
df_relevant = df_relevant.merge(df_var, how='left', on='group')
df_relevant


In [None]:
df_metrics2 = compute_metric(df_orig_full[~df_orig_full.pid.isin(df_relevant.query('group.isin(@selected)').pid.unique())])

In [None]:
fig, ax = plt.subplots(nrows=2, figsize=(16,12))
counts, bins, patches = ax[0].hist(df_metrics2.hf, bins=30, log=True, rwidth=0.9, density=True)
# ax[0].set_xlabel('Anomaly score')
ax[0].set_xlim([0,df_metrics2.hf.max()])
ax[0].set_title('Density')
ax[0].xaxis.set_tick_params(labelsize=12, rotation=-45)
ax[0].set_xticks(bins)
# ax.yaxis.set_major_formatter(StrMethodf_metrics2ormatter('{x:.3f}'))
# plt.show()

# fig, ax = plt.subplots(figsize=(16,8))
counts, bins, patches = ax[1].hist(df_metrics2.hf, bins=30, log=False, rwidth=0.9, cumulative=True, density=True)
ax[1].set_xlabel('Anomaly score')
ax[1].set_xlim([0,df_metrics2.hf.max()])
ax[1].set_ylim([0.85,1])
ax[1].set_title('Cumulative')
ax[1].yaxis.set_major_formatter(StrMethodFormatter('{x:.3f}'))
ax[1].xaxis.set_tick_params(labelsize=12, rotation=-90)
ax[1].set_xticks(bins)

plt.tight_layout()
# plt.savefig('')
plt.show()

In [None]:
selector = 'group'
th = 40
th_up = np.inf

df_relevant, selected = detection(df_metrics2, column_name='hf',
                                  threshold_min=th, threshold_max=th_up, detection_param_threshold=20)

df_var = df_relevant.drop_duplicates('pid').groupby('group').apply(skew).reset_index(name='skewness')
df_relevant = df_relevant.merge(df_var, how='left', on='group')
df_relevant

In [None]:
zoom = 11.5
mma=reload(mma)
mma.visualise_mismatch_map(df_relevant,#[df_relevant.group.isin(selected)],
                           size='hf',
                           range_color=[-50,50], color='smoothed', animation_frame='timestamp',
                           title='', transparent=True, hover_data=['group','easting','northing', 'smoothed'],
                           figsize=(1200,650), zoom=zoom)

In [None]:
mma.relevant_neighborhood(df_metrics2, column_name='hf', lower=40, zoom=11, only_relevant=False, filter_dates=False)