In [None]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import torch
import h5py
import os
import sys
import scipy
import damselfly as df

PATH = '/storage/home/adz6/group/project'
RESULTPATH = os.path.join(PATH, 'results/damselfly')
PLOTPATH = os.path.join(PATH, 'plots/damselfly')
DATAPATH = os.path.join(PATH, 'damselfly/data/datasets')

"""
Date: 7/23/2021
Description: plot distribution of mse loss from autoencoder
"""

In [None]:
os.listdir(RESULTPATH)

In [None]:
resultfile = '210731_matched_filter_scores_variable_track_length_10K.npz'

In [None]:
numpyfile = np.load(os.path.join(RESULTPATH, resultfile))

In [None]:
noise_scores = numpyfile['noise'][:, 0, :, :]
signal_scores = numpyfile['signal'][:, 0, :, :]

In [None]:
print(noise_scores.shape)

In [None]:
plt.imshow(noise_scores[0], interpolation='none', aspect='auto')

In [None]:
plt.imshow(signal_scores[0], interpolation='none', aspect='auto')

In [None]:
sns.set_theme(style='whitegrid', context='talk')
#plt.figure(figsize=(8, 5))

n = 0

fig = plt.figure(figsize=(8,5))

ax = fig.add_subplot(1,1,1)

for n in [0, 3, 31]:
    hist = ax.hist(np.diag(signal_scores[n]).flatten(), 64, histtype='step', label=f'length = {test_track_lengths[n]}')
    
hist = ax.hist(np.diag(noise_scores[0]).flatten(), 64, histtype='step', label='noise')
#hist = ax.hist(np.diag(noise_scores[27]).flatten(), 64, histtype='step')

#plt.hist(signal_scores.flatten(), 64, histtype='step')
#plt.hist(noise_scores.flatten(), 64, histtype='step')
plt.legend(loc=1)

In [None]:
noise_flat = noise_scores.flatten()
signal_flat = signal_scores.flatten()

In [None]:
n_threshold = 101

ROC_curves = np.zeros((noise_scores.shape[0], 2, 101))

threshold = np.linspace(0, 25, n_threshold)
FAR_array, TPR_array = np.zeros(threshold.size), np.zeros(threshold.size)


for n in range(noise_scores.shape[0]):
    for i, t in enumerate(threshold):
        FA = np.argwhere(np.diag(noise_scores[n]).flatten() >= t).squeeze().size
        TP = np.argwhere(np.diag(signal_scores[n]).flatten() >= t).squeeze().size

        
        
        FAR = FA / np.diag(noise_scores[n]).size
        TPR = TP / np.diag(signal_scores[n]).size

        ROC_curves[n, 0, i] = FAR
        ROC_curves[n, 1, i] = TPR
    

    

In [None]:
sns.set_theme(style='whitegrid', context='talk')
plt.figure(figsize=(8, 5))
plt.plot(ROC_curves[0, 0, :], ROC_curves[0, 1, :], label = 'length = 512')
plt.plot(ROC_curves[3, 0, :], ROC_curves[3, 1, :], label = 'length = 2048')
plt.plot(ROC_curves[15, 0, :], ROC_curves[15, 1, :], label = 'length = 8192')
#plt.plot(ROC_curves[23, 0, :], ROC_curves[23, 1, :], label = 'length = 512')
plt.plot(ROC_curves[31, 0, :], ROC_curves[31, 1, :], label = 'length = 16384')

print(test_track_lengths[[0, 3, 15, 23, 31]])

plt.legend(loc=4)

In [None]:
plt.plot(np.mean(ROC_curves[:, 0, :], axis=0), np.mean(ROC_curves[:, 1, :], axis=0))

In [None]:
import scipy.integrate as integrate

In [None]:
-1 * integrate.trapezoid(np.mean(ROC_curves[:, 1, :], axis=0), np.mean(ROC_curves[:, 0, :], axis=0))

In [None]:

sns.set_theme(style='whitegrid', context='talk')
#plt.figure()

rng = np.random.default_rng()
y = rng.exponential(3, 100000)
fig = plt.figure(figsize=(13, 8))

ax = fig.add_subplot(1,1,1)

hist = ax.hist(y, 101)

ax.set_yscale('log')

In [None]:
print(len(np.argwhere(y<.5).squeeze()), len(np.argwhere(y>=.5).squeeze()))

In [None]:
hist

In [None]:
prob = hist[0] / 100000
track_len = hist[1][0:101]

In [None]:
plt.plot(track_len, prob)

plt.yscale('log')

In [None]:
test_track_lengths = numpyfile['track_lengths']

In [None]:
test_track_length_prob = np.zeros(test_track_lengths.size)

for i, length in enumerate(test_track_lengths):
    bin_id = np.argmin(abs(track_len * 8192 - length))
    test_track_length_prob[i] = prob[bin_id]


In [None]:
relative_length_prob = test_track_length_prob / test_track_length_prob.sum()

In [None]:
weighted_mean_ROC = np.zeros((ROC_curves.shape[1], ROC_curves.shape[2]))

for icurve in range(ROC_curves.shape[0]):
    
    weighted_mean_ROC += relative_length_prob[icurve] * ROC_curves[icurve, :, :]

    
    


In [None]:
plt.plot(weighted_mean_ROC[0, :], weighted_mean_ROC[1, :])

In [None]:
-1 * integrate.trapezoid(weighted_mean_ROC[1, :], weighted_mean_ROC[0, :])

In [None]:
np.save(os.path.join(RESULTPATH, 'roc', '210802_mf_roc_df84_mt6.np'), weighted_mean_ROC)