In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import plotly.express as px
import networkx as nx
import matplotlib
from matplotlib.colors import ListedColormap
import plotly.graph_objects as go

import math
import os
import gc
import argparse
import torch
import optuna
import joblib
import pickle
import tifffile
import nibabel
import scipy.io
import pygsp
import scipy.ndimage
import pyod
import warnings
import hashlib
import sqlite3

from scipy.stats import entropy, kurtosis, skew
from sklearn.mixture import GaussianMixture
from torch_geometric.utils import dense_to_sparse
from sklearn.cluster import KMeans, BisectingKMeans, SpectralClustering
from sklearn.metrics import pairwise_distances

from pyod.models.knn import KNN
from pyod.models.lof import LOF
from pyod.models.cof import COF
from pyod.models.cblof import CBLOF
from pyod.models.kpca import KPCA

from torch import nn, Tensor
from torch.nn import Linear, Conv1d, LayerNorm, DataParallel, ReLU, Sequential, Parameter
from torch_geometric.nn.dense import mincut_pool, dense_mincut_pool
from torch_geometric.datasets import AttributedGraphDataset
from torch_geometric.utils import to_networkx, subgraph, to_dense_adj

import source.nn.models as models
import source.utils.utils as utils
import source.utils.fault_detection as fd

from source.utils.utils import roc_params, compute_auc, get_auc, best_mcc, best_f1score, otsuThresholding
from source.utils.utils import synthetic_timeseries
from source.utils.utils import plotly_signal


from importlib import reload
models = reload(models)
utils = reload(utils)

from pyprojroot import here
root_dir = str(here())

data_dir = os.path.expanduser('~/data/interim/')

matplotlib.rcParams.update({'font.size': 20})
matplotlib.rcParams.update({'font.family': 'DejaVu Serif'})

pd.set_option('display.max_rows', 500)


In [None]:
def add_anomaly(node_positions, num_timestamps, anomaly_radius, onset, transient, sigma_decay=3):

    num_nodes = node_positions.shape[0]
    selected_nodes, anomaly_center = utils.select_radius(node_positions, anomaly_radius)

    distances = np.linalg.norm(node_positions - anomaly_center, axis=1)
    sigma = distances[selected_nodes[-1]]/sigma_decay
    diff = node_positions - anomaly_center

    anomaly = np.exp(- (diff[:,0]**2)/(2*sigma**2) - (diff[:,1]**2)/(2*sigma**2))

    onset_index = int(onset*num_timestamps)

    label = np.zeros((num_nodes, num_timestamps))
    label[selected_nodes, onset_index:] = 1

    transient_matrix = np.tile(np.arange(num_timestamps), (num_nodes, 1))

    transient_matrix = (transient_matrix - onset_index)/transient
    transient_matrix = transient_matrix*label
    transient_matrix[transient_matrix>1] = 1

    anomaly_matrix = transient_matrix*np.tile(anomaly.reshape((-1,1)), (1, num_timestamps))

    return anomaly_matrix, label

In [None]:
dataset = 'df_StOlavs_D1L2B'
df_orig = pd.read_parquet(data_dir + f'{dataset}.parq')

df_ds = df_orig[df_orig.timestamp<'2022-06'].copy()

df_ds = df_ds.groupby('pid').resample('30d', on='timestamp').mean().reset_index()

df, nodes = fd.treat_nodes(df_ds)
_, nodes['subgraph'] = fd.NNGraph(nodes, radius=15, subgraphs=True)

main_graph = nodes.subgraph.value_counts().index[0]
nodes = nodes.query('subgraph==@main_graph').copy()
G = fd.NNGraph(nodes, radius=15)

In [None]:
# np.random.seed(0)
N = 200
T = 20
G = pygsp.graphs.Grid2d(N)
anomaly, label = add_anomaly(G.coords, T, 0.2, 0, 10, 2)
px.imshow(anomaly.reshape((N,N,T)), animation_frame=2, aspect='auto', 
            range_color=[anomaly.min(), anomaly.max()], width=600, height=600)

In [None]:
label

In [None]:
Number_of_nodes = 500
coords = G.coords
offset = coords.min(axis=0)

ranges = (coords - offset).max(axis=0)

# Generate a random location within the bounding box
start_coords = offset + np.random.uniform([0.05*ranges[0], 0.05*ranges[1]], [0.95*ranges[0], 0.95*ranges[1]])
print(offset)
print(start_coords)

distances = np.linalg.norm(coords - start_coords, axis=1)

selected_nodes = np.argsort(distances)[:Number_of_nodes]

In [None]:
sigma = distances[selected_nodes[-1]]/3
diff = coords - start_coords

anomaly = 100*np.exp(- (diff[:,0]**2)/(2*sigma**2) - (diff[:,1]**2)/(2*sigma**2))

signal = np.zeros((G.N,))
signal[selected_nodes] = 1

In [None]:
plotly_signal(G, signal, width=600, height=500)
plotly_signal(G, anomaly*signal, width=600, height=500)

In [None]:
selected_nodes = select_radius(G.coords, 40)[0]
len(selected_nodes)

signal = np.zeros((G.N,))
signal[selected_nodes] = 1
plotly_signal(G, signal, width=600, height=500)


In [None]:
data_matrix, label = geological_events(G.coords, 20, 50, 0.25, 10)

In [None]:
np.where(label.max(axis=1))

In [None]:
px.line(data_matrix[3312,:])

In [None]:
data_matrix.reshape((100,100,20))

In [None]:
plotly_signal(G, data_matrix[:,6])

In [None]:
np.tile(anomaly.reshape((-1,1)), (1, num_timestamps)).shape

In [None]:
(transient_matrix*(anomaly.reshape(10000,1))).max()

In [None]:
num_nodes = 6
num_timestamps = 10
onset = 0.5
transient = 3

onset_index = int(onset*num_timestamps)

transient_matrix = np.tile(np.arange(num_timestamps), (num_nodes, 1))

transient_matrix = (transient_matrix - onset_index)/transient
transient_matrix = transient_matrix*label
transient_matrix[transient_matrix>1] = 1

In [None]:
anomaly = np.random.rand(num_nodes)

In [None]:
anomaly

In [None]:
transient_matrix*anomaly.reshape((-1,1))

In [None]:
np.linspace(0,1,6)[1:]

In [None]:
geological_events(G.coords, 10, 50, 0.25)

In [None]:
label = np.zeros((5,10))

In [None]:
int(22.9999)

In [None]:
int(0.25*90)

In [None]:
label[selected_nodes, 3:] = 1
label

In [None]:
selected_nodes = np.array((1,2))

_____________________

#

----------------------

In [None]:
df_test_VR = pd.read_parquet(root_dir+'/outputs/testing_mincut/df_locality_VR.parq').drop(['DR'],axis=1)
df_test_DR = pd.read_parquet(root_dir+'/outputs/testing_mincut/df_locality_DR.parq').drop(['VR'],axis=1)
df_means_VR = df_test_VR.groupby('bin', as_index=False).mean().rename({'VR':'Var'}, axis=1)
df_means_VR['Varname'] = 'VR'
df_means_DR = df_test_DR.groupby('bin', as_index=False).mean().rename({'DR':'Var'}, axis=1)
df_means_DR['Varname'] = 'DR'
df = pd.concat([df_means_VR, df_means_DR])
df_long = df.melt(id_vars=['Var', 'Varname'], value_vars=['AUC', 'F1 score', 'MCC'],
                  var_name='Metric', value_name='Value')

fig = px.scatter(df_long, x='Var', y='Value', color='Metric', facet_col='Varname',
                 trendline='lowess', height=400, width=1000, template='plotly_white')

# Customize x-axis labels for each facet
fig.update_xaxes(matches=None)  # Allow each x-axis to be edited separately

# Set custom x-axis titles for each facet
fig.update_xaxes(title_text="VR", col=1, row=1, showline=True, mirror='allticks', linewidth=1, linecolor='black')  
fig.update_xaxes(title_text="DR", col=2, row=1, showline=True, mirror='allticks', linewidth=1, linecolor='black') 
fig.update_yaxes(range=[0.2,1], col=1, row=1, showline=True, mirror='allticks', linewidth=1, linecolor='black') 
fig.update_yaxes(range=[0.2,1], col=2, row=1, showline=True, mirror='allticks', linewidth=1, linecolor='black') 
fig.for_each_annotation(lambda a: a.update(text=""))

fig.update_layout(font =dict(family="Times New Roman", size=22),
                  plot_bgcolor = 'rgba(0, 0, 0, 0)',
                  paper_bgcolor = 'rgba(0, 0, 0, 0)',
                  legend=dict(x=0.65,
                              y=0.45,
                              xanchor="right",
                              yanchor="top",
                              bgcolor="rgba(255, 255, 255, 0.8)"
                            ),
                  margin=dict(l=20, r=20, t=1, b=40)
)

fig.write_image(root_dir+'/outputs/figs/TPAMI/locality_synthetic.png')
fig.show()


### Oil Spill

In [None]:
S_partials_list = []
label_list = []
shape_list = []

datasets = []

for im in range(1,19):

    mat = scipy.io.loadmat(data_dir+f'HSIoil/GM{im:02}.mat')
    downsample_factor = 0.1
    map = scipy.ndimage.zoom(mat['map'], zoom=downsample_factor, order=3)
    data_orig = scipy.ndimage.zoom(mat['img'], zoom=(downsample_factor, downsample_factor, 1), order=3)
    X = data_orig.reshape(data_orig.shape[0]*data_orig.shape[1],data_orig.shape[2])
    # X = torch.tensor(X.astype(np.float32)).float()

    metadata = {'samples':1,
                'id':im,
                'N':X.shape[0],
                'T':X.shape[1],
                'downsample':downsample_factor
                }

    print(im)

    G = pygsp.graphs.Grid2d(data_orig.shape[0],data_orig.shape[1])

    dataset = {'data':[X], 'labels':[map.reshape(-1,)], 'G':G, 'metadata':metadata}
    datasets.append(dataset)

    # coords = G.coords
    # A = G.W.toarray()
    # idx = np.lexsort((-coords[:, 1], coords[:, 0]))
    # A = torch.tensor(A[np.ix_(idx,idx)]).float()
    # A = A.to(device)

    # n_timestamps = X.shape[1]
    # n_clusters = 5
    # n_extra_feats = 0
    # weight_loss = 1

    # model = models.ClusterTS(n_timestamps, n_clusters, n_extra_feats)
    # model = model.to(device)

    # epochs_list = [1,25,50,100,200,500,1000]
    # S_partials, lmc, lo = train_cluster(epochs_list, model, X, G, device, weight_loss)
    # S_partials_list.append(S_partials)

In [None]:
device='cuda:2'
study = joblib.load(root_dir+f'/outputs/HP_training/SB_MC.pkl')
best_params = study.best_params
model = models.ClusterTS(metadata['T'], n_clusters=best_params['n_clusters'])
model = model.to(device)

In [None]:
df_test = test_locality(model, datasets,
                        epochs=best_params['N_epochs'],
                        weight_loss=best_params['weight_loss'],
                        lr=best_params['lr'],
                        skewth=best_params['skewth'],
                        device=device
                    )

In [None]:
df_test.to_parquet(root_dir+'/outputs/testing_mincut/df_locality_OIL.parq')

In [None]:
px.scatter(df_test, x='VR', y='AUC', trendline='lowess').show()

In [None]:
df_test.head()

In [None]:
df = df_test.melt(id_vars=['sample_id','AUC','F1 score', 'MCC'],
                  value_vars=['VR','DR'], var_name='Varname', value_name='Var')

In [None]:
df = df_test.melt(id_vars=['sample_id','AUC','F1 score', 'MCC'],
                  value_vars=['VR','DR'], var_name='Varname', value_name='Var')
df_long = df.melt(id_vars=['Var', 'Varname'], value_vars=['AUC', 'F1 score', 'MCC'],
                  var_name='Metric', value_name='Value')

fig = px.scatter(df_long, x='Var', y='Value', color='Metric', facet_col='Varname',
                 trendline='lowess', height=400, width=1000, template='plotly_white')

# Customize x-axis labels for each facet
fig.update_xaxes(matches=None)  # Allow each x-axis to be edited separately

# Set custom x-axis titles for each facet
fig.update_xaxes(title_text="VR", col=1, row=1, showline=True, mirror='allticks', linewidth=1, linecolor='black')  
fig.update_xaxes(title_text="DR", col=2, row=1, showline=True, mirror='allticks', linewidth=1, linecolor='black') 
fig.update_yaxes(range=[0.2,1], col=1, row=1, showline=True, mirror='allticks', linewidth=1, linecolor='black') 
fig.update_yaxes(range=[0.2,1], col=2, row=1, showline=True, mirror='allticks', linewidth=1, linecolor='black') 
fig.for_each_annotation(lambda a: a.update(text=""))

fig.update_layout(font =dict(family="Times New Roman", size=22),
                  plot_bgcolor = 'rgba(0, 0, 0, 0)',
                  paper_bgcolor = 'rgba(0, 0, 0, 0)',
                  legend=dict(x=0.65,
                              y=0.45,
                              xanchor="right",
                              yanchor="top",
                              bgcolor="rgba(255, 255, 255, 0.8)"
                            ),
                  margin=dict(l=20, r=20, t=1, b=40)
)

fig.write_image(root_dir+'/outputs/figs/TPAMI/locality_Oil.png')
fig.show()

In [None]:
df_test = pd.read_parquet(root_dir+'/outputs/testing_mincut/df_locality_OIL.parq')

In [None]:
df_test

In [None]:
fig, axes = plt.subplots(3, 6, figsize=(18, 9))
axes = axes.ravel()  # Flatten the 3x6 grid into a 1D array for easy indexing

# Iterate through images and DataFrame rows
for idx in range(1, 19):
    # Load the image
    mat = scipy.io.loadmat(data_dir + f'HSIoil/GM{idx:02}.mat')
    image = mat['map']

    # Get the row corresponding to the current image id
    row = df_test[df_test['id'] == idx].iloc[0]

    # Plot the image on the left side of the cell
    ax = axes[idx - 1]
    ax.imshow(image, cmap='gray')
    ax.axis('off')

    # Display metrics on the right side of the cell
    ax.text(1.05, 0.5, 
            f"VR: {row['VR']:.2f}\n"
            f"DR: {row['DR']:.2f}\n"
            f"AUC: {row['AUC']:.2f}\n"
            f"F1 score: {row['F1 score']:.2f}\n"
            f"MCC: {row['MCC']:.2f}",
            transform=ax.transAxes,
            verticalalignment='center',
            fontsize=10,
            bbox=dict(facecolor='white', alpha=0.6))

plt.tight_layout()
plt.show()

In [None]:
from PIL import Image

heights, widths = [], []
for im in range(1, 19):
    mat = scipy.io.loadmat(data_dir + f'HSIoil/GM{im:02}.mat')
    image = mat['map']
    heights.append(image.shape[0])
    widths.append(image.shape[1])

avg_height = int(np.mean(heights))
avg_width = int(np.mean(widths))

# Step 2: Create the plot with 3 rows and 6 columns
fig, axes = plt.subplots(3, 6, figsize=(18, 9))
axes = axes.ravel()  # Flatten the 3x6 grid into a 1D array for easy indexing

# Step 3: Load each image, resize it, and plot with metrics
for idx in range(1, 19):
    # Load and resize the image
    mat = scipy.io.loadmat(data_dir + f'HSIoil/GM{idx:02}.mat')
    image = mat['map']
    image_resized = np.array(Image.fromarray(image).resize((avg_width, avg_height)))

    # Get the corresponding row in df_test
    row = df_test[df_test['id'] == idx].iloc[0]

    # Plot the image on the left side of the cell
    ax = axes[idx - 1]
    ax.imshow(image_resized, cmap='gray')
    ax.axis('off')

    # Display the metrics text on the right side of the cell
    metrics_text = (f"VR: {row['VR']:.2f}\n"
                    f"DR: {row['DR']:.2f}\n\n"
                    f"AUC: {row['AUC']:.2f}\n"
                    f"F1 score: {row['F1 score']:.2f}\n"
                    f"MCC: {row['MCC']:.2f}")

    ax.text(1.05, 0.5, metrics_text,
            transform=ax.transAxes,
            verticalalignment='center',
            fontsize=10)

plt.tight_layout()
plt.show()


In [None]:
import matplotlib.pyplot as plt
import scipy.io
import pandas as pd
import numpy as np
from PIL import Image

# Load your DataFrame with columns: 'id', 'VR', 'DR', 'AUC', 'F1', and 'MCC'
# Assuming `df_test` is already defined as per your description

# Step 1: Sort the DataFrame by VR in ascending order
df_test_sorted = df_test.sort_values(by='VR').reset_index(drop=True)

# Step 2: Calculate the average height and width of the images
heights, widths = [], []
for im in range(1, 19):
    mat = scipy.io.loadmat(data_dir + f'HSIoil/GM{im:02g}.mat')
    image = mat['map']
    heights.append(image.shape[0])
    widths.append(image.shape[1])

avg_height = int(np.mean(heights))
avg_width = int(np.mean(widths))

# Step 3: Create the plot with 3 rows and 6 columns
fig, axes = plt.subplots(3, 6, figsize=(18, 9))
axes = axes.ravel()  # Flatten the 3x6 grid into a 1D array for easy indexing

# Step 4: Load each image based on sorted VR, resize it, and plot with metrics
for idx, row in df_test_sorted.iterrows():
    # Load and resize the image
    mat = scipy.io.loadmat(data_dir + f'HSIoil/GM{row["id"]:02g}.mat')
    image = mat['map']
    image_resized = np.array(Image.fromarray(image).resize((avg_width, avg_height)))

    # Plot the image on the left side of the cell
    ax = axes[idx]
    ax.imshow(image_resized, cmap='viridis')
    ax.axis('off')

    # Display the metrics text on the right side of the cell
    metrics_text = (f"VR: {row['VR']:.2f}\n"
                    f"DR: {row['DR']:.2f}\n\n"
                    f"AUC: {row['AUC'].round(2)}\n"
                    f"F1: {row['F1 score'].round(2)}\n"
                    f"MCC: {row['MCC'].round(2)}")

    ax.text(1.05, 0.5, metrics_text,
            transform=ax.transAxes,
            verticalalignment='center',
            fontsize=18)

plt.tight_layout()
# plt.savefig(root_dir+'/outputs/figs/TPAMI/locality_Oil_images.png')
plt.show()


In [None]:
df_test