In [None]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import torch
import h5py
import os
import sys
import scipy
import damselfly as df
import scipy.signal
import scipy.stats

PATH = '/storage/home/adz6/group/project'
RESULTPATH = os.path.join(PATH, 'results/damselfly')
PLOTPATH = os.path.join(PATH, 'plots/damselfly')
DATAPATH = os.path.join(PATH, 'damselfly/data/datasets')
SIMDATAPATH = os.path.join(PATH, 'damselfly/data/sim_data')

"""
Date: 6/25/2021
Description: template
"""


dataset_file = '210729_df_84/210726_df_84_1.h5'
h5datafile = h5py.File(os.path.join(PATH, 'sim_data/datasets', dataset_file), 'r')

var = 1.38e-23 * 10 * 50 * 200e6

#signals = h5datafile['data'][:, 0:8192]

#metadata = h5datafile['meta']
#Nsignals = signals.shape[0]

def MakeTemplates(signals, var =  1.38e-23 * 10 * 50 * 200e6):
    norm_mat = 1 / np.sqrt(var * np.diag(np.matmul(signals, signals.conjugate().T)))

    templates = norm_mat.reshape((*norm_mat.shape, 1)).repeat(signals.shape[-1], axis=-1) * signals
    
    return templates

def RandomSplitIndex(signals, N_test_signals=100):
    
    rng = np.random.default_rng()
    
    test_signal_indx = rng.integers(low=0, high=signals.shape[0], size=N_test_signals)
    
    template_indx = np.delete(np.arange(0, signals.shape[0], 1), test_signal_indx, axis=0)
    
    return template_indx, test_signal_indx

def CalcScores(signals, template_indx, signal_indx):
    
    templates = MakeTemplates(signals[template_indx, :])
    
    scores = abs(np.matmul(templates, signals[signal_indx, :].conjugate().T))
    
    return scores

def CalcScoresConvolution(signals, template_indx, signal_indx):
    
    templates = MakeTemplates(signals[template_indx, :])
    
    score_matrix = np.zeros((template_indx.size, signal_indx.size))
    
    for n, index in enumerate(signal_indx):
        
        score_matrix[:, n] = np.max(abs(
                                    scipy.signal.fftconvolve(
                                        signals[index, :].reshape((1, signals[index, :].size)).repeat(template_indx.size, axis=0)
                                    , templates, mode='valid', axes = -1)), axis=-1)
    
    #scores = abs(np.matmul(templates, signals[signal_indx, :].conjugate().T))
    
    return scores

def CalcMismatch(scores, signals, template_indx, signal_indx):
    
    ideal_scores = np.diag(CalcScores(signals, signal_indx, signal_indx))
    
    best_scores = np.max(scores, axis=0)
    
    return best_scores / ideal_scores


In [None]:
for i , item in enumerate(h5datafile['signal']['0'].attrs.items()):
    print(item)

In [None]:
x = np.linspace(0, 1, 100)
y = np.exp(-1 * x)
plt.plot(x,y)

In [None]:
y = np.geomspace(18500, 18600, 100)

plt.plot(y, '.')

In [None]:
metapd[metapd['energy']>18590]

# mismatch with conv

In [None]:
N_experiment = 1
#N_test_signals = 10
mismatch_results = {}
for Ntest in [1000]:
    mismatch_results[Ntest] = np.zeros((N_experiment, Ntest))
    for Nexp in range(N_experiment):
        print(Ntest, Nexp)
        template_inds, signal_inds = RandomSplitIndex(signals, N_test_signals=Ntest)

        test_signal_matrix = CalcScoresConvolution(signals, template_inds, signal_inds)

        mismatch = CalcMismatch(test_signal_matrix, signals, template_inds, signal_inds)
        mismatch_results[Ntest][Nexp, :] = mismatch

In [None]:
print(mismatch_results[1000].shape)

In [None]:
sns.set_theme(context='talk', style='whitegrid')

fig = plt.figure(figsize=(8,5))
ax = fig.add_subplot(1,1,1)
hist = ax.hist(mismatch_results[1000].flatten(), 64)

ax.set_ylabel('N')
ax.set_xlabel(r'Mismatch = $\frac{T_{max}}{T_{ideal}}$')

ax.set_title('Distribution of Mismatch Ratio for Random Template Bank')

plt.tight_layout()
#plt.savefig(os.path.join(PLOTPATH, '210805_mismatch_dist_of_random_template_bank.png'))
print(np.sqrt(np.mean(mismatch_results[1000]**2)))
print(np.median(mismatch_results[1000]))

In [None]:
plt.plot(mismatch, '.')
plt.plot(np.arange(0, mismatch.size, 1), np.ones(mismatch.size) * np.sqrt(np.mean(mismatch**2)), label = np.sqrt(np.mean(mismatch**2)))
plt.legend()

# Assume Template Length Will Match Signal Length (score decreases by 1/sqrt(N))

In [None]:
norm_mat = 1 / np.sqrt(var * np.diag(np.matmul(signals, signals.conjugate().T)))

templates = norm_mat.reshape((*norm_mat.shape, 1)).repeat(signals.shape[-1], axis=-1) * signals

mean_score_matrix = abs(np.matmul(templates, signals.conjugate().T))
print('scores done')
length_fractions = np.arange(64, 16384 + 64, 64) / 16384 # track_len / signal_len

# get scaled MF scores with sqrt(track_len/signal_len) * score_matrix
scaled_b = np.diag(mean_score_matrix).reshape((mean_score_matrix.shape[0], 1)).repeat(length_fractions.size, axis=-1) * np.sqrt(length_fractions).reshape((1, length_fractions.size)).repeat(mean_score_matrix.shape[0], axis=0)

print('b scaling done')

b_scaled_test = scaled_b.reshape((*scaled_b.shape, 1)).repeat(x_rice.size, axis=2)

# TPR is the Rician CDF of the scaled mf score
test_cdf = scipy.stats.rice.cdf(x_rice.reshape(1,1,x_rice.size).repeat(scaled_b.shape[0],axis=0).repeat(scaled_b.shape[1],axis=1), b_scaled_test)

print('cdf done')

In [None]:
import scipy.integrate
x_expon = np.arange(64, 16384 + 64, 64) / 16384

mean = 3

# weighted average of the CDF functions based on exponential distribution. 
# Normalize probabilities to the max slice considered.
y_expon = scipy.stats.expon.pdf(x_expon, loc=0, scale=mean)

#plt.plot(x_expon, y_expon)
#plt.yscale('log')

area = scipy.stats.expon.cdf(x_expon, loc=0, scale=mean)[-1]

#plt.plot(x_expon, y_expon/area)
#plt.yscale('log')


exponential_weights = y_expon/area

print(scipy.integrate.trapezoid(y_expon/area, x_expon))

weighted_cdf = np.average(test_cdf, axis=1, weights=exponential_weights)
mean_cdf = weighted_cdf.mean(axis=0)

# noise distribution
x_rayleigh = np.linspace(0, 20, 301)
y_rayleigh = scipy.stats.rice.cdf(x_rayleigh, 0)

np.savez(os.path.join(RESULTPATH, 'roc', f'210803_analytical_mf_roc_df84_mt{mean}'), fpr=1-y_rayleigh, tpr=1-mean_cdf)


sns.set_theme(style='whitegrid', context='talk')
plt.figure(figsize=(8,5))

plt.plot(1-y_rayleigh, 1-mean_cdf)

plt.title(r'ROC Curve for Ideal MF, $\mu_t=$' + f'{mean}')
plt.xlabel('FPR')
plt.ylabel('TPR')
plt.tight_layout()

print(-1 * scipy.integrate.trapezoid(1-mean_cdf, 1-y_rayleigh))

# use fixed template lengths (score decreases by 1/N)

In [None]:
template_len = 16384

#signals = signals[:, 0:signal_len]

norm_mat = 1 / np.sqrt(var * np.diag(np.matmul(signals[:, 0:template_len], signals[:, 0:template_len].conjugate().T)))

templates = norm_mat.reshape((*norm_mat.shape, 1)).repeat(signals.shape[-1], axis=-1) * np.pad(signals[:, 0:template_len], ((0,0), (0, signals.shape[-1] - template_len)), mode='constant')

mean_score_matrix = abs(np.matmul(templates, signals.conjugate().T))
print('scores done')
length_fractions = np.arange(64, 16384 + 64, 64) / 16384 # track_len / signal_len

scaled_b = np.diag(mean_score_matrix).reshape((mean_score_matrix.shape[0], 1)).repeat(length_fractions.size, axis=-1) * length_fractions.reshape((1, length_fractions.size)).repeat(mean_score_matrix.shape[0], axis=0)


b_scaled_test = scaled_b.reshape((*scaled_b.shape, 1)).repeat(x_rice.size, axis=2)

# TPR is the Rician CDF of the scaled mf score
test_cdf = scipy.stats.rice.cdf(x_rice.reshape(1,1,x_rice.size).repeat(scaled_b.shape[0],axis=0).repeat(scaled_b.shape[1],axis=1), b_scaled_test)

print('cdf done')

In [None]:
import scipy.integrate
x_expon = np.arange(64, 16384 + 64, 64) / 16384

mean = 6

# weighted average of the CDF functions based on exponential distribution. 
# Normalize probabilities to the max slice considered.
y_expon = scipy.stats.expon.pdf(x_expon, loc=0, scale=mean)

#plt.plot(x_expon, y_expon)
#plt.yscale('log')

area = scipy.stats.expon.cdf(x_expon, loc=0, scale=mean)[-1]

#plt.plot(x_expon, y_expon/area)
#plt.yscale('log')


exponential_weights = y_expon/area

print(scipy.integrate.trapezoid(y_expon/area, x_expon))

weighted_cdf = np.average(test_cdf, axis=1, weights=exponential_weights)
mean_cdf = weighted_cdf.mean(axis=0)

# noise distribution
x_rayleigh = np.linspace(0, 20, 301)
y_rayleigh = scipy.stats.rice.cdf(x_rayleigh, 0)

np.savez(os.path.join(RESULTPATH, 'roc', f'210803_analytical_mf_roc_df84_mt{mean}_templatesize16384'), fpr=1-y_rayleigh, tpr=1-mean_cdf)


sns.set_theme(style='whitegrid', context='talk')
plt.figure(figsize=(8,5))

plt.plot(1-y_rayleigh, 1-mean_cdf)

plt.title(r'ROC Curve for Ideal MF, $\mu_t=$' + f'{mean}')
plt.xlabel('FPR')
plt.ylabel('TPR')
plt.tight_layout()

print(-1 * scipy.integrate.trapezoid(1-mean_cdf, 1-y_rayleigh))