In [None]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import torch
import h5py
import os
import sys
import scipy
import damselfly as df
import scipy.signal
import scipy.stats

PATH = '/storage/home/adz6/group/project'
RESULTPATH = os.path.join(PATH, 'results/damselfly')
PLOTPATH = os.path.join(PATH, 'plots/damselfly')
DATAPATH = os.path.join(PATH, 'damselfly/data/datasets')
SIMDATAPATH = os.path.join(PATH, 'damselfly/data/sim_data')

"""
Date: 6/25/2021
Description: template
"""

data = os.path.join(SIMDATAPATH, '210729_df_84_1slice.h5')
h5datafile = h5py.File(data, 'r')

Nsignal = len(list(h5datafile['signal'].keys()))
Nsample = h5datafile['signal']['0'].shape[-1]

#pca_abs = np.load(os.path.join(PATH, 'damselfly', 'data/210910_frequency_spectra_abs_principle_components.npz'))
#pca_real = np.load(os.path.join(PATH, 'damselfly', 'data/210910_frequency_spectra_real_principle_components.npz'))
#pca_imag = np.load(os.path.join(PATH, 'damselfly', 'data/210910_frequency_spectra_imag_principle_components.npz'))

pc = np.load(os.path.join(PATH, 'damselfly/data', '210914_frequency_spectra_imag_principle_components.npz'))


In [None]:
evecs = pc['evecs']
evals = pc['evals']

In [None]:
print(evecs.shape)

In [None]:
#plt.plot(evecs[:, 0])

# use principle componenets to project noise

In [None]:
dataset = np.zeros((Nsignal, Nsample), dtype=np.complex64)

for i in range(Nsignal):
    dataset[i, :] = h5datafile['signal'][f'{i}'][0, :]


In [None]:
var = 1 * 200e6 * 50 * 10 * 1.38e-23
rng = np.random.default_rng()
noise = rng.multivariate_normal([0, 0], np.eye(2) * var / 2, 8192)

noise = noise[:, 0] + 1j * noise[:, 1]



In [None]:

n_pc = 256
noise_projection = np.zeros(n_pc * 30)

for m in range(30):
    noise = rng.multivariate_normal([0, 0], np.eye(2) * var / 2, 8192)

    noise = noise[:, 0] + 1j * noise[:, 1]
    for k in range(n_pc):
        noise_projection[m * n_pc + k] = abs(scipy.signal.correlate(evecs[:, k], noise, mode='same')).max()
    

In [None]:
sns.set_theme(context='talk')
fig = plt.figure(figsize=(8,5))
ax = fig.add_subplot(1,1,1)

ax.plot(noise_projection)

In [None]:
rice_fit = scipy.stats.rice.fit(noise_projection)
ray_fit = scipy.stats.rayleigh.fit(noise_projection)
wei_fit = scipy.stats.weibull_min.fit(noise_projection)
gum_fit = scipy.stats.gumbel_r.fit(noise_projection)

print(rice_fit)
print(ray_fit)
print(wei_fit)
print(gum_fit)

In [None]:
_x = np.linspace(2.8e-6, 5e-6, 200)

_pdf = scipy.stats.gumbel_r.pdf(_x, loc = gum_fit[0], scale = gum_fit[1])

sns.set_theme(context='talk')
fig = plt.figure(figsize=(8,5))
ax = fig.add_subplot(1,1,1)

hist = ax.hist(noise_projection, 32, histtype='step')

ax.plot(_x, (1 / 1700) * _pdf)


hist = ax.hist(scipy.stats.gumbel_r.rvs(loc=gum_fit[0], scale = gum_fit[1], size=256 * 30), 32, histtype='step')

# add noise to the projections of signals

In [None]:
os.listdir(os.path.join(PATH, 'damselfly/data'))

sig_projection = np.load(os.path.join(PATH, 'damselfly/data', '210915_pc_match_matrix_normalized.npy'))

In [None]:
sig_projection.shape

In [None]:
projection_scale = abs(dataset ** 2).sum(axis=-1).reshape((5492, 1)).repeat(256, axis=-1)
print(projection_scale[4, 0])

In [None]:
sig_projection  *= projection_scale

In [None]:
sns.set_theme(context='talk')
fig = plt.figure(figsize=(8,5))
ax = fig.add_subplot(1,1,1)

ax.plot(sig_projection[0, :])

In [None]:
n = 0
m = 8

print(sig_projection[m, n])

pc = evecs[:, n]
signal = dataset[m, :]

N_trial = 2048
proj = np.zeros(N_trial)

proj_noise = np.zeros(N_trial)

for k in range(N_trial):
    
    noise = rng.multivariate_normal([0, 0], np.eye(2) * var / 2, 8192) 

    noise = np.fft.fft(noise[:, 0] + 1j * noise[:, 1]) / 8192
    
    proj[k] = abs(scipy.signal.correlate(noise + signal, pc, mode='same')).max()
    proj_noise[k] = abs(scipy.signal.correlate(noise , pc, mode='same')).max()
    
gum_fit_sig = scipy.stats.gumbel_r.fit(proj)
rice_fit_sig = scipy.stats.rice.fit(proj)
genex_fit_sig = scipy.stats.genextreme.fit(proj, loc=3e-8, scale=9e-9)


gum_fit_noise = scipy.stats.gumbel_r.fit(proj_noise)

#print(gum_fit_sig)
print(gum_fit_noise)
print(gum_fit_noise[0]**2)
print(np.mean(abs(noise)**2))
print
#print(rice_fit_sig)
print(genex_fit_sig)

sns.set_theme(context='talk')
fig = plt.figure(figsize=(8,5))
ax = fig.add_subplot(1,1,1)

hist = ax.hist(proj, 64, histtype='step')

hist = ax.hist(scipy.stats.genextreme.rvs(genex_fit_sig[0], loc=genex_fit_sig[1], scale = genex_fit_sig[2], size=2048), 64, histtype='step')

hist = ax.hist(proj_noise, 64, histtype='step')

In [None]:
sns.set_theme(context='talk')
fig = plt.figure(figsize=(8,5))
ax = fig.add_subplot(1,1,1)



hist = ax.hist(proj, 64, histtype='step')

norm_n = hist[0] / hist[0].sum()

x_pdf = np.linspace(hist[1][0], hist[1][-1])

#print(hist)
#hist = ax.hist(proj_noise, 64, histtype='step')

#hist = ax.hist(scipy.stats.gumbel_r.rvs(loc=gum_fit_noise[0], scale = gum_fit_noise[1], size=2048), 64, histtype='step')
#hist = ax.hist(scipy.stats.gumbel_r.rvs(loc=gum_fit_sig[0], scale = gum_fit_sig[1], size=2048), 64, histtype='step')

hist = ax.hist(scipy.stats.genextreme.rvs(genex_fit_sig[0], loc=genex_fit_sig[1], scale = genex_fit_sig[2], size=2048), 64, histtype='step')
#plt.plot(abs(noise))
#plt.plot(abs(signal))

# fit generalized extreme distribution for each pair of signal and component

In [None]:


n_signal = 5492
n_pc = 256
n_trial = 3000

genex_fit_params = np.zeros((n_signal, n_pc, 3))

for n in range(n_signal):
    for m in range(n_pc):
        _pc = evecs[:, n]
        _signal = dataset[m, :]
        _proj = np.zeros(n_trial)
        for k in range(n_trial):
            
            noise = rng.multivariate_normal([0, 0], np.eye(2) * var / 2, 8192) 
            noise = np.fft.fft(noise[:, 0] + 1j * noise[:, 1]) / 8192
            
            _proj[k] = abs(scipy.signal.correlate(noise + _signal, _pc, mode='same')).max()
            
        genex_fit_sig = scipy.stats.genextreme.fit(_proj, loc=3e-8, scale=9e-9)
        print(genex_fit_sig)
        genex_fit_params[n, m, :] = genex_fit_sig
        
    if n % 500 == 499:
        print(n+1)
            
            
        

In [None]:
sns.set_theme(context='talk', style='ticks')
#cmap = sns.color_palette('mako', as_cmap=True)

freqs = np.fft.fftshift(np.fft.fftfreq(8192, 1/200e6))


fig = plt.figure(figsize=(8,5))
ax = fig.add_subplot(1,1,1)

#ax.plot(dataset_real[:, 0])
for i in range(10):
    ax.plot(pca_real['evecs'][:, i].real)

#img = ax.imshow(np.log(covariance), interpolation='none', cmap=cmap, extent=(freqs[0], freqs[-1], freqs[-1], freqs[0]))

#ax.set_ylabel('Frequency (Hz)')
#ax.set_xlabel('Frequency (Hz)')

#ax.set_title('Log-Covariance Matrix for Sample Dataset', pad=20)

#plt.tight_layout()
#plt.savefig(os.path.join(PLOTPATH, '210910_log_covariance_matrix_dataset_imag_only'))

In [None]:
projection = np.matmul(dataset_real[:, 1], pca_real['evecs'][:, 0:8192].real)
print(projection.shape)
sns.set_theme(context='talk', style='ticks')
#cmap = sns.color_palette('mako', as_cmap=True)

fig = plt.figure(figsize=(8,5))
ax = fig.add_subplot(1,1,1)

ax.plot(projection)

ax.plot(dataset_real[:, 1])


In [None]:
fig = plt.figure(figsize=(8,5))
ax = fig.add_subplot(1,1,1)

idata = 0
signal = dataset_imag[:, idata].reshape((1, dataset_imag[:, 0].shape[0] )).repeat(256, axis=0)
print(signal.shape)
print(pca_real['evecs'][:, 0:256].shape)
projection = np.max(abs(scipy.signal.fftconvolve(signal, pca_imag['evecs'][:, 0:256].real.T, axes=-1, mode='same')), axis=-1)
print(projection.shape)
ax.plot(projection)
ax.plot(abs(dataset_imag[:, idata]))
#for i in range(2048):
    
    #projection.append(np.max(abs(scipy.signal.convolve(dataset_real[:, 10], pca_real['evecs'][:, i].real, mode='same'))))
    #ax.plot(abs(scipy.signal.convolve(dataset_real[:, 10], pca_real['evecs'][:, i].real, mode='same')))
    #ax.plot(dataset_real[:, 10])

In [None]:
var = 1.38e-23 * 10 * 200e6 * 50
#print(var / (np.sqrt(8192)))
noise = np.random.multivariate_normal([0,0], np.eye(2) * var / 2, 8192) / np.sqrt(8192)
noise_real = noise[:, 0]

print(np.mean(abs(noise_real) ** 2))

In [None]:
fig = plt.figure(figsize=(8,5))
ax = fig.add_subplot(1,1,1)

ax.plot(abs(noise_real))
ax.plot(abs(dataset_real[:, 1]))

In [None]:
fig = plt.figure(figsize=(8,5))
ax = fig.add_subplot(1,1,1)

idata = 3
signal = (dataset_real[:, idata] + noise_real).reshape((1, dataset_real[:, 0].shape[0] )).repeat(1024, axis=0)
noise_signal = (noise_real).reshape((1, dataset_real[:, 0].shape[0] )).repeat(1024, axis=0)

projection = np.max(abs(scipy.signal.fftconvolve(signal, pca_real['evecs'][:, 0:1024].real.T, axes=-1, mode='same')), axis=-1)
projection_noise = np.max(abs(scipy.signal.fftconvolve(noise_signal, pca_real['evecs'][:, 0:1024].real.T, axes=-1, mode='same')), axis=-1)
ax.plot(projection)
ax.plot(projection_noise)




In [None]:
evals, evecs = np.linalg.eig(covariance)

np.savez(os.path.join(PATH, 'damselfly/data/210910_frequency_spectra_imag_principle_components'), evals=evals , evecs=evecs)

In [None]:
sns.set_theme(context='talk', style='whitegrid')
fig = plt.figure(figsize=(8,5))
ax = fig.add_subplot(1,1,1)

ax.plot(abs(evals))
#ax.set_xlim(0,100)
ax.set_yscale('log')
ax.set_ylabel(r'$\log{\lambda}$')
ax.set_xlabel('N')
ax.set_title('Sorted Eigenvalues, Log-scale')

plt.tight_layout()
plt.savefig(os.path.join(PLOTPATH, '210910_log_eigenvalues_imag'))

In [None]:
fig = plt.figure(figsize=(8,5))
ax = fig.add_subplot(1,1,1)
for i in range(1):
    ax.plot(freqs, evecs[:, 0].real)
    
ax.set_xlabel('Frequency (Hz)')
ax.set_ylabel('Eigenvector Value (AU)')
ax.set_title('First Principle Component')

plt.tight_layout()
plt.savefig(os.path.join(PLOTPATH, '210910_first_principle_component_imag'))

#ax.plot(evecs[:, 0].imag)

In [None]:
covariance_real = np.matmul(signal_real_norm.T, signal_real_norm)
covariance_imag = np.matmul(signal_imag_norm.T, signal_imag_norm)
plt.figure()
plt.imshow(covariance_real, interpolation = 'none')
#plt.xlim(0, 20)
#plt.ylim(0, 20)
plt.figure()
plt.imshow(covariance_imag, interpolation = 'none')
#plt.xlim(0, 20)
#plt.ylim(0, 20)

In [None]:
print('real')
real_evals, real_evecs = np.linalg.eig(covariance_real)
print('imag')
imag_evals, imag_evecs = np.linalg.eig(covariance_imag)

In [None]:
print(real_evecs.shape)

In [None]:
plt.plot(real_evals)
plt.plot(imag_evals)
plt.xlim(0, 10)

In [None]:
plt.plot(real_evecs[1, :])
#plt.plot(real_evecs[1, :])
#plt.plot(real_evecs[2, :])
#plt.plot(real_evecs[3, :])
#plt.plot(real_evecs[4, :])
#plt.plot(real_evecs[5, :])
#plt.plot(real_evecs[6, :])
plt.xlim(8000, 8192)

# try 100 signals