In [None]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import torch
import h5py
import os
import sys
import scipy
import damselfly as df
import scipy.signal
import scipy.stats

PATH = '/storage/home/adz6/group/project'
RESULTPATH = os.path.join(PATH, 'results/damselfly')
PLOTPATH = os.path.join(PATH, 'plots/damselfly')
DATAPATH = os.path.join(PATH, 'damselfly/data/datasets')
SIMDATAPATH = os.path.join(PATH, 'damselfly/data/sim_data')

"""
Date: 6/25/2021
Description: template
"""


dataset_file = '211015_84_1d2sl_eval.h5'
h5datafile = h5py.File(os.path.join(SIMDATAPATH, dataset_file), 'r')

var = 1.38e-23 * 10 * 50 * 200e6

signals = h5datafile['data'][:]
print(signals.shape)


In [None]:
signals = signals[:, 0:8192]

# compute analytical ROC curves for different mismatch

In [None]:
x_rice = np.linspace(0, 20, 301)

norm_mat = 1 / np.sqrt(var * np.diag(np.matmul(signals, signals.conjugate().T)))

templates = norm_mat.reshape((*norm_mat.shape, 1)).repeat(signals.shape[-1], axis=-1) * signals

mean_score_matrix = abs(np.matmul(templates, signals.conjugate().T))
print('scores done')
#length_fractions = np.arange(64, 16384 + 64, 64) / 16384 # track_len / signal_len
length_fractions = np.arange(64, 8192 + 64, 64) / 8192 # track_len / signal_len

# get scaled MF scores with sqrt(track_len/signal_len) * score_matrix
scaled_b = np.diag(mean_score_matrix).reshape((mean_score_matrix.shape[0], 1)).repeat(length_fractions.size, axis=-1) * np.sqrt(length_fractions).reshape((1, length_fractions.size)).repeat(mean_score_matrix.shape[0], axis=0)

print('b scaling done')


In [None]:
np.arange(10, 4, -1) / 10

In [None]:
import scipy.integrate
#mismatch_array = np.arange(5, 11, 1) / 10
mismatch_array = np.arange(10, 4, -1) / 10
mean = 4
#x_expon = np.arange(64, 16384 + 64, 64) / 16384
x_expon = np.arange(64, 8192 + 64, 64) / 8192

for mean_mismatch in mismatch_array:
    b_scaled_test = mean_mismatch * scaled_b.reshape((*scaled_b.shape, 1)).repeat(x_rice.size, axis=2)

    # TPR is the Rician CDF of the scaled mf score
    test_cdf = scipy.stats.rice.cdf(x_rice.reshape(1,1,x_rice.size).repeat(scaled_b.shape[0],axis=0).repeat(scaled_b.shape[1],axis=1), b_scaled_test)

    print('cdf done')

    # weighted average of the CDF functions based on exponential distribution. 
    # Normalize probabilities to the max slice considered.
    y_expon = scipy.stats.expon.pdf(x_expon, loc=0, scale=mean)

    area = scipy.stats.expon.cdf(x_expon, loc=0, scale=mean)[-1]

    exponential_weights = y_expon/area

    weighted_cdf = np.average(test_cdf, axis=1, weights=exponential_weights)
    mean_cdf = weighted_cdf.mean(axis=0)

    # noise distribution
    x_rayleigh = np.linspace(0, 20, 301)
    y_rayleigh = scipy.stats.rice.cdf(x_rayleigh, 0)

    np.savez(os.path.join(RESULTPATH, 'roc', f'211018_analytical_mf_roc_df84_mt{mean}_mismatch{mean_mismatch}'), fpr=1-y_rayleigh, tpr=1-mean_cdf)


    
    

# Assume Template Length Will Match Signal Length (score decreases by 1/sqrt(N))

In [None]:
mean_assumed_mismatch = 0.5
x_rice = np.linspace(0, 20, 301)

norm_mat = 1 / np.sqrt(var * np.diag(np.matmul(signals, signals.conjugate().T)))

templates = norm_mat.reshape((*norm_mat.shape, 1)).repeat(signals.shape[-1], axis=-1) * signals

mean_score_matrix = abs(np.matmul(templates, signals.conjugate().T))
print('scores done')
length_fractions = np.arange(64, 16384 + 64, 64) / 16384 # track_len / signal_len

# get scaled MF scores with sqrt(track_len/signal_len) * score_matrix
scaled_b = np.diag(mean_score_matrix).reshape((mean_score_matrix.shape[0], 1)).repeat(length_fractions.size, axis=-1) * np.sqrt(length_fractions).reshape((1, length_fractions.size)).repeat(mean_score_matrix.shape[0], axis=0)

print('b scaling done')



In [None]:
b_scaled_test = mean_assumed_mismatch * scaled_b.reshape((*scaled_b.shape, 1)).repeat(x_rice.size, axis=2)

# TPR is the Rician CDF of the scaled mf score
test_cdf = scipy.stats.rice.cdf(x_rice.reshape(1,1,x_rice.size).repeat(scaled_b.shape[0],axis=0).repeat(scaled_b.shape[1],axis=1), b_scaled_test)

print('cdf done')

In [None]:
#test_cdf_with_mismatch = test_cdf
#test_cdf_without_mismatch = test_cdf
test_cdf_bigger_mismatch = test_cdf

In [None]:
import scipy.integrate
x_expon = np.arange(64, 16384 + 64, 64) / 16384

mean = 4

# weighted average of the CDF functions based on exponential distribution. 
# Normalize probabilities to the max slice considered.
y_expon = scipy.stats.expon.pdf(x_expon, loc=0, scale=mean)

#plt.plot(x_expon, y_expon)
#plt.yscale('log')

area = scipy.stats.expon.cdf(x_expon, loc=0, scale=mean)[-1]

#plt.plot(x_expon, y_expon/area)
#plt.yscale('log')


exponential_weights = y_expon/area

#print(scipy.integrate.trapezoid(y_expon/area, x_expon))


#weighted_cdf_no_mismatch = np.average(test_cdf_without_mismatch, axis=1, weights=exponential_weights)
#mean_cdf_no_mismatch = weighted_cdf_no_mismatch.mean(axis=0)

#weighted_cdf_mismatch = np.average(test_cdf_with_mismatch, axis=1, weights=exponential_weights)
#mean_cdf_mismatch = weighted_cdf_mismatch.mean(axis=0)


weighted_cdf_big_mismatch = np.average(test_cdf_bigger_mismatch, axis=1, weights=exponential_weights)
mean_cdf_big_mismatch = weighted_cdf_big_mismatch.mean(axis=0)

# noise distribution
x_rayleigh = np.linspace(0, 20, 301)
y_rayleigh = scipy.stats.rice.cdf(x_rayleigh, 0)

#np.savez(os.path.join(RESULTPATH, 'roc', f'210803_analytical_mf_roc_df84_mt{mean}'), fpr=1-y_rayleigh, tpr=1-mean_cdf)


sns.set_theme(style='whitegrid', context='talk')
plt.figure(figsize=(8,5))

#plt.plot(1-y_rayleigh, 1-mean_cdf_no_mismatch, label='mismatch=1.0')
#plt.plot(1-y_rayleigh, 1-mean_cdf_mismatch, label='mismatch=0.9')
plt.plot(1-y_rayleigh, 1-mean_cdf_big_mismatch, label='mismatch=0.8')

plt.title(r'ROC Curve for MF with Mismatch, $\mu_t=$' + f'{mean}')
plt.xlabel('FPR')
plt.ylabel('TPR')
#plt.xscale('log')
plt.tight_layout()
plt.legend(loc=4)

#plt.savefig(os.path.join(PLOTPATH, '210805_analytic_ROC_with_mismatch_zoom.png'))
#print(-1 * scipy.integrate.trapezoid(1-mean_cdf_mismatch, 1-y_rayleigh))
#print(-1 * scipy.integrate.trapezoid(1-mean_cdf_no_mismatch, 1-y_rayleigh))
print(-1 * scipy.integrate.trapezoid(1-mean_cdf_big_mismatch, 1-y_rayleigh))

# use fixed template lengths (score decreases by 1/N)

In [None]:
template_len = 16384

#signals = signals[:, 0:signal_len]

norm_mat = 1 / np.sqrt(var * np.diag(np.matmul(signals[:, 0:template_len], signals[:, 0:template_len].conjugate().T)))

templates = norm_mat.reshape((*norm_mat.shape, 1)).repeat(signals.shape[-1], axis=-1) * np.pad(signals[:, 0:template_len], ((0,0), (0, signals.shape[-1] - template_len)), mode='constant')

mean_score_matrix = abs(np.matmul(templates, signals.conjugate().T))
print('scores done')
length_fractions = np.arange(64, 16384 + 64, 64) / 16384 # track_len / signal_len

scaled_b = np.diag(mean_score_matrix).reshape((mean_score_matrix.shape[0], 1)).repeat(length_fractions.size, axis=-1) * length_fractions.reshape((1, length_fractions.size)).repeat(mean_score_matrix.shape[0], axis=0)


b_scaled_test = scaled_b.reshape((*scaled_b.shape, 1)).repeat(x_rice.size, axis=2)

# TPR is the Rician CDF of the scaled mf score
test_cdf = scipy.stats.rice.cdf(x_rice.reshape(1,1,x_rice.size).repeat(scaled_b.shape[0],axis=0).repeat(scaled_b.shape[1],axis=1), b_scaled_test)

print('cdf done')

In [None]:
import scipy.integrate
x_expon = np.arange(64, 16384 + 64, 64) / 16384

mean = 6

# weighted average of the CDF functions based on exponential distribution. 
# Normalize probabilities to the max slice considered.
y_expon = scipy.stats.expon.pdf(x_expon, loc=0, scale=mean)

#plt.plot(x_expon, y_expon)
#plt.yscale('log')

area = scipy.stats.expon.cdf(x_expon, loc=0, scale=mean)[-1]

#plt.plot(x_expon, y_expon/area)
#plt.yscale('log')


exponential_weights = y_expon/area

print(scipy.integrate.trapezoid(y_expon/area, x_expon))

weighted_cdf = np.average(test_cdf, axis=1, weights=exponential_weights)
mean_cdf = weighted_cdf.mean(axis=0)

# noise distribution
x_rayleigh = np.linspace(0, 20, 301)
y_rayleigh = scipy.stats.rice.cdf(x_rayleigh, 0)

np.savez(os.path.join(RESULTPATH, 'roc', f'210803_analytical_mf_roc_df84_mt{mean}_templatesize16384'), fpr=1-y_rayleigh, tpr=1-mean_cdf)


sns.set_theme(style='whitegrid', context='talk')
plt.figure(figsize=(8,5))

plt.plot(1-y_rayleigh, 1-mean_cdf)

plt.title(r'ROC Curve for Ideal MF, $\mu_t=$' + f'{mean}')
plt.xlabel('FPR')
plt.ylabel('TPR')
plt.tight_layout()

print(-1 * scipy.integrate.trapezoid(1-mean_cdf, 1-y_rayleigh))

In [None]:

print(abs(np.vdot(templates[0, :], np.pad(signals[0, 0:4096], (0, 8192 + 4096)))))
print(abs(np.vdot(templates[0, :], np.pad(signals[0, 0:16384], (0, 0)))))
plt.plot(templates[0, :].real)
#plt.plot(signals[0, :].real)

In [None]:
test_signal = signals[0, :]
test_template = templates[0, :]

In [None]:
sns.set_theme(style='whitegrid', context='talk')
plt.figure(figsize=(8,5))
b = scaled_b[0, 64]
rice_dist = scipy.stats.rice(b)
ray_dist = scipy.stats.rice(0)
x_rice = np.linspace(0, 20, 301)
y_rice = scipy.stats.rice.pdf(x_rice, b)

x_rayleigh = np.linspace(0, 20, 301)
y_rayleigh = scipy.stats.rice.pdf(x_rayleigh, 0)




plt.plot(x_rice, y_rice, label='Rician')
plt.plot(x_rayleigh, y_rayleigh, label='Rayleigh')
plt.xlim(-1, 13)
plt.legend(loc=0)
plt.xlabel('Matched Filter Score')
plt.ylabel('Probability Density')
plt.tight_layout()
#plt.savefig(os.path.join(PLOTPATH, '210803_mf_rician_short_track.png'))

In [None]:
b = scaled_b[0, 128]
rice_dist = scipy.stats.rice(b)
ray_dist = scipy.stats.rice(0)

x_rice = np.linspace(0, 20, 301)
y_rice = scipy.stats.rice.cdf(x_rice, b)

x_rayleigh = np.linspace(0, 20, 301)
y_rayleigh = scipy.stats.rice.cdf(x_rayleigh, 0)

plt.plot(x_rice, 1-y_rice)

plt.plot(x_rayleigh, 1-y_rayleigh)






In [None]:
plt.plot(1-y_rayleigh, 1-y_rice)

In [None]:
b_scaled_test = scaled_b.reshape((*scaled_b.shape, 1)).repeat(x_rice.size, axis=2)

In [None]:
test_cdf = scipy.stats.rice.cdf(x_rice.reshape(1,1,x_rice.size).repeat(scaled_b.shape[0],axis=0).repeat(scaled_b.shape[1],axis=1), b_scaled_test)

In [None]:
test_cdf.shape

In [None]:
plt.plot(x_rayleigh, y_rayleigh)
plt.plot(x_rice, test_cdf[0, 128, :])

In [None]:
weighted_cdf = np.average(test_cdf, axis=1, weights=exponential_weights)

In [None]:
sns.set_theme(style='whitegrid', context='talk')
plt.figure(figsize=(8,5))

plt.plot(1-y_rayleigh, 1-mean_cdf)

plt.title(r'ROC Curve for Ideal MF, $\mu_t=4$')
plt.xlabel('FPR')
plt.ylabel('TPR')
plt.tight_layout()
plt.savefig(os.path.join(PLOTPATH, '210803_ideal_mf_roc_curve.png'))

In [None]:
np.savez(os.path.join(RESULTPATH, 'roc', '210803_analytical_mf_roc_df84'), fpr=1-y_rayleigh, tpr=1-mean_cdf)

In [None]:

N = 1024

Ntrial = 1000
signal = signals[3489, 0:N]
print(signal.shape)
template = signal * 1 / np.sqrt(var * np.vdot(signal, signal))

list1 = []
for m in range(Ntrial):

    n = noise(rng, var, N, N)
    list1.append(abs(np.vdot(signal[0:N] + n, template[0:N])))
    

b = abs(np.vdot(signal[0:], template[0:]))
rice_dist = scipy.stats.rice(b)

print(abs(np.vdot(signal[0:N], template[0:N])))

#print(abs(np.vdot(signal[0:], template[0:])))



In [None]:
plt.plot(x, scipy.stats.rice.pdf(x, b))


In [None]:
hist = plt.hist(np.array(list1))

In [None]:
metadata = {}
for n, dset in enumerate(h5datafile['meta']):
    metadata[dset] = h5datafile['meta'][dset][:]
    
metadata = pd.DataFrame(metadata)



In [None]:
template_size = 8192

norm = 1 / np.sqrt(var * np.diag(np.matmul(signals[:, 0:template_size], signals[:, 0:template_size].conjugate().T)))

templates = signals[:, 0:template_size] * norm.reshape((norm.size, 1)).repeat(template_size, axis=-1)

In [None]:
print(abs(np.vdot(templates[0, :], signals[0, 0:template_size])))
print(np.max(abs(scipy.signal.fftconvolve(templates[0, :], signals[0, :], mode='valid'))))

track_len = 4096
aug_signal = np.zeros(signals[0, :].shape, dtype=np.complex64) 

aug_signal[0:track_len] = signals[0, 0:track_len]

print(np.max(abs(scipy.signal.fftconvolve(templates[0, :], aug_signal, mode='valid'))))

In [None]:
rng = np.random.default_rng()
slicesize = 8192
ntrack = 1000
mean_length = 3
track_lengths = np.zeros(ntrack, dtype=np.int32)

for n in range(ntrack):
    track_lengths[n] = GetTrackLength(rng, slicesize, mean=mean_length, signal_length=signals.shape[-1])

In [None]:
plt.hist(track_lengths, 32)

In [None]:
noise_samples = 5
#shape = signals.shape
#size = signals.size

track_lengths = np.arange(512, 2*8192 + 512, 512)
ntrack = track_lengths.size

nsignals = signals.shape[0]

noise_scores = np.zeros((ntrack * noise_samples, nsignals, nsignals))
signal_scores = np.zeros((ntrack * noise_samples, nsignals, nsignals))

for i, track_len in enumerate(track_lengths):
    for n in range(noise_samples):
        print(track_len)
        aug_signals = np.zeros((signals.shape[0], track_len), dtype=np.complex64) 
            
        aug_signals = signals[:, 0:track_len]
        
        norm = 1 / np.sqrt(var * np.diag(np.matmul(aug_signals, aug_signals.conjugate().T)))
        
        #print(norm.shape, aug_signals.shape, templates.shape)
        
        templates = aug_signals * norm.reshape((norm.size, 1)).repeat(track_len, axis=-1)
        
        noisy_data = aug_signals + noise(rng, var, aug_signals.size, aug_signals.shape)

        #x = abs(scipy.signal.fftconvolve(noisy_data, templates, axes=-1, mode='valid'))
        x = abs(np.matmul(noisy_data, templates.conjugate().T))
        print(x)
        signal_scores[i, :] = x

    
        x = abs(np.matmul(noise(rng, var, templates.size, templates.shape), templates.conjugate().T))
        #x = abs(np.matmul(noise(rng, var, size, shape)[:,0:template_size], templates.conjugate().T))
        #print(np.max(x, axis=-1), np.mean(x))
        print(x)
        noise_scores[i, :] = x
        
        if i % 10 == 9:
            print(f'{i + 1} / {ntrack}')

result_name = '210731_matched_filter_scores_mt3_10K.npz'

np.savez(os.path.join(RESULTPATH, result_name), signal=signal_scores, noise=noise_scores, track_len=track_lengths)
        

In [None]:
print(track_lengths)