In [None]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import torch
import h5py
import os
import sys
import scipy
import damselfly as df
import scipy.signal
import scipy.stats
import scipy.integrate 
import json

PATH = '/storage/home/adz6/group/project'
RESULTPATH = os.path.join(PATH, 'results/damselfly')
PLOTPATH = os.path.join(PATH, 'plots/damselfly')
DATAPATH = os.path.join(PATH, 'damselfly/data/datasets')
SIMDATAPATH = os.path.join(PATH, 'damselfly/data/sim_data')
TRAINPATH = os.path.join(PATH, 'damselfly/training')

"""
Date: 6/25/2021
Description: template
"""



In [None]:
os.listdir(os.path.join(TRAINPATH, 'checkpoints', '210920_dset_name_84_1d2sl4mt_partial_spectra_regress_temp10.0_model_cnn_regression'))
modelpath = os.path.join(TRAINPATH, 'checkpoints', '210920_dset_name_84_1d2sl4mt_partial_spectra_regress_temp10.0_model_cnn_regression', 'epoch138.pth')

In [None]:
model_config = '210920_dfcnn_regress_2sl_config1.json'

with open(os.path.join(PATH, 'damselfly/model_config', model_config), 'r') as infile:
    model_config = json.load(infile)
    
linear_list = [
                [df.models.CalcConvMaxpoolOutputSize(model_config['conv'], model_config['nch'], 2253), 416],
                [416, 213],
                [0.5, 0.5]
            ]

model = df.models.DFCNNRegression(model_config['nch'], model_config['conv'], linear_list)
    
model.load_state_dict(torch.load(modelpath))
model.eval()

In [None]:
dset = os.path.join(DATAPATH, '210920_84_1d2sl4mt_partial_spectra_regress.h5')

In [None]:
test_data = df.data.DFDataset(os.path.join(DATAPATH, dset), 'test', label_type='float')
batchsize = 5000
dataloader = torch.utils.data.DataLoader(
                                torch.utils.data.TensorDataset(test_data.data, test_data.label),
                                batchsize,
                                shuffle=False
                                )

In [None]:
model.to('cuda:0')

with torch.no_grad():
    
    for batch, labels in dataloader:
        
        batch = batch.to('cuda:0')
        
        output = model(batch)
        
        #fig = plt.figure(figsize=(8, 5))
        #ax = fig.add_subplot(1,1,1)
        
        print(labels, output.to('cpu'))
        #ax.plot(labels.numpy(), output.to('cpu').numpy())
        #plt.show()
        #plt.close()
        
        input()

In [None]:
val_data = df.data.DFDataset(os.path.join(DATAPATH, dset), 'val', label_type='float')

In [None]:
val_data.label

# use meta data to select a range of pitch angles

In [None]:
meta = {}

for i, key in enumerate(h5datafile['meta'].keys()):
    if h5datafile['meta'][key][:].ndim == 1:
        meta[key] = h5datafile['meta'][key][:]
    
meta = pd.DataFrame.from_dict(meta, )

In [None]:
angle_max = 90
angle_min = 84

pitch_angle_inds = np.array(meta[( meta['theta_min'] <= angle_max ) & ( meta['theta_min'] >= angle_min )].index.array)

data_subset = dataset[pitch_angle_inds, :]

covariance_subset = np.matmul(data_subset.T, data_subset)
print(covariance_subset.shape)

In [None]:
sns.set_theme(context='talk', style='ticks')
cmap = sns.color_palette('mako', as_cmap=True)

freqs = np.fft.fftshift(np.fft.fftfreq(8192, 1/200e6))


fig = plt.figure(figsize=(8,8))
ax = fig.add_subplot(1,1,1)

img = ax.imshow(np.log(abs(covariance_subset)), interpolation='none', cmap=cmap, extent=(freqs[0], freqs[-1], freqs[-1], freqs[0]))

# eigenvalue decomposition for each 1deg range of pitch angles

In [None]:
covariance_matrices = np.zeros((6, 8192,  8192), dtype = np.complex64)
evals_matrix = np.zeros((6, 8192), dtype = np.complex64)
evecs_matrix = np.zeros((6, 8192,  8192), dtype = np.complex64)

for k, angle in enumerate([90, 89, 88, 87, 86, 85]):
    
    angle_max = angle
    angle_min = angle - 1

    pitch_angle_inds = np.array(meta[( meta['theta_min'] <= angle_max ) & ( meta['theta_min'] >= angle_min )].index.array)

    data_subset = dataset[pitch_angle_inds, :]

    covariance_subset = np.matmul(data_subset.T, data_subset)
    
    covariance_matrices[k, :, :] = covariance_subset
    
    evals, evecs = np.linalg.eig(covariance_subset)
    
    evals_matrix[k, :], evecs_matrix[k, :, :] = evals, evecs
    
    print((angle_max, angle_min))
    
    
    

In [None]:
'''
np.save(os.path.join(PATH, 'damselfly/data/210916_principle_components_1deg_pitch_angle_range_covariance'), covariance_matrices)
np.save(os.path.join(PATH, 'damselfly/data/210916_principle_components_1deg_pitch_angle_range_evals'), evals_matrix)
np.save(os.path.join(PATH, 'damselfly/data/210916_principle_components_1deg_pitch_angle_range_evecs'), evecs_matrix)
'''
covariance_matrices = np.load(os.path.join(PATH, 'damselfly/data/210916_principle_components_1deg_pitch_angle_range_covariance.npy'), )
evals_matrix = np.load(os.path.join(PATH, 'damselfly/data/210916_principle_components_1deg_pitch_angle_range_evals.npy'), )
evecs_matrix = np.load(os.path.join(PATH, 'damselfly/data/210916_principle_components_1deg_pitch_angle_range_evecs.npy'), )

In [None]:
sns.set_theme(context='talk', style='whitegrid')
fig = plt.figure(figsize=(10,5))
ax = fig.add_subplot(1,1,1)

for i in range(6):
    ax.plot(abs(evals_matrix[i]), label = f'{90 - i}-{90 - i - 1}')
#ax.set_xlim(0,100)
ax.set_yscale('log')
ax.set_ylabel(r'$\log{\lambda}$')
ax.set_xlabel('N')
ax.set_title('Sorted Eigenvalues for Pitach, Log-scale')
ax.legend(title='Pitch Angle [deg]', loc=(1.01,0.))

ax.set_xlim(0, 1024)
ax.set_ylim(1e-16, 1e-13)
plt.tight_layout()
#plt.savefig(os.path.join(PLOTPATH, '210916_log_eigenvalues_pitch_angle'))

In [None]:

n = 1
m = 2

for j in range(6):
    for i in range(1):
        fig = plt.figure(figsize=(8,5))
        ax = fig.add_subplot(1,1,1)
        ax.plot(freqs, evecs_matrix[j, :, n].real, label='real')
        ax.plot(freqs, evecs_matrix[j, :, n].imag, label='imag')
        #ax.plot(freqs, abs(evecs[:, n]))
        #ax.plot(freqs, dataset[m, :].real*1e7)
        #ax.plot(freqs, dataset[m, :].imag*1e7)
    
#ax.set_xlim(0,200)
ax.set_xlabel('Frequency (Hz)')
ax.set_ylabel('Eigenvector Value (AU)')
ax.set_title('Third Principle Component')

#ax.legend(loc=2)

#plt.tight_layout()
#plt.savefig(os.path.join(PLOTPATH, '210914_third_principle_component'))

#ax.plot(evecs[:, 0].imag)

In [None]:
n = 3
m = 

pc = evecs[:, n]

signal = dataset[m, :] / np.sqrt((abs(dataset[m, :]) ** 2 ).sum())
print((abs(signal) ** 2).sum())
print((abs(pc) ** 2).sum())
fig = plt.figure(figsize=(8,5))
ax = fig.add_subplot(1,1,1)

ax.plot(freqs, pc.real)
#ax2 = ax.twinx()
ax.plot(freqs, signal.real, color='tab:red')

ax.plot(freqs, pc.imag)
ax.plot(freqs, signal.imag, color='tab:green')


fig = plt.figure(figsize=(8,5))
ax = fig.add_subplot(1,1,1)

#ax.plot((np.exp(1j * np.pi / 2) * pc).imag)
#ax.plot((np.exp(1j * np.pi / 2) * pc).real)
#ax.plot((np.exp(1j * np.pi / 2) * pc).imag)


for n in range(1):

    ax.plot(abs(scipy.signal.convolve(np.exp(-1j * (8) * np.pi / 16) * pc, signal, mode='full')))
    ax.plot(abs(scipy.signal.convolve(signal, signal, mode='full')))
    #print(abs(scipy.signal.convolve(signal, signal, mode='same')).max())
    
#ax.plot(abs(scipy.signal.fftconvolve(np.exp(1j * 1 * np.pi / 16) * pc, signal, mode='same')))

In [None]:
n = 2
m = 3

var = 1.38e-23 * 10 * 200e6 * 50
#print(var / (np.sqrt(8192)))
noise = np.random.multivariate_normal([0,0], np.eye(2) * var / 2, 8192)

noise = np.fft.fft(noise[:, 0] + 1j * noise[:, 1]) / 8192
pc = evecs[:, n]
signal = dataset[m, :]

fig = plt.figure(figsize=(8,5))
ax = fig.add_subplot(1,1,1)

#ax.plot(abs(dataset[m, :]))
#ax.plot(abs(noise))

#ax.plot(abs(scipy.signal.convolve(dataset[m, :], dataset[m, :], mode='full')))
ax.plot(abs(scipy.signal.convolve(noise + signal, signal, mode='full')))
#ax.plot(abs(scipy.signal.convolve(noise, noise, mode='full')))

fig = plt.figure(figsize=(8,5))
ax = fig.add_subplot(1,1,1)

norm_noise = noise / np.sqrt((abs(noise) ** 2).sum())
norm_signal = dataset[m, :] / np.sqrt((abs(dataset[m, :]) ** 2 ).sum())

ax.plot(abs(scipy.signal.convolve(noise + pc * np.sqrt((abs(dataset[m, :]) ** 2 ).sum()), signal, mode='full')))
#ax.plot(abs(scipy.signal.convolve(signal, signal, mode='full')))

In [None]:
N_comp = 256
m = 2
projection_n, projection_noise, projection_signal = [], [], []

for n in range(N_comp):
    
    noise = np.random.multivariate_normal([0,0], np.eye(2) * var / 2, 8192)

    noise = np.fft.fft(noise[:, 0] + 1j * noise[:, 1]) / 8192
    
    pc = evecs[:, n]
    
    projection_signal.append(abs(scipy.signal.correlate(noise + dataset[m, :], dataset[m, :], mode='full')).max())
    projection_n.append(abs(scipy.signal.correlate(pc * np.sqrt((abs(dataset[m, :]) ** 2 ).sum()), noise + dataset[m, :], mode='full')).max())
    projection_noise.append(abs(scipy.signal.correlate(noise , pc * np.sqrt((abs(dataset[m, :]) ** 2 ).sum()), mode='full')).max())
    
projection_n = np.array(projection_n)
projection_noise = np.array(projection_noise)
projection_signal = np.array(projection_signal)

print(projection_n.sum(), projection_signal.sum(), projection_noise.sum())

fig = plt.figure(figsize=(8,5))
ax = fig.add_subplot(1,1,1)

ax.plot(projection_n, label='signal+noise')
ax.plot(projection_noise, label='noise')
ax.legend(loc=1)
ax.set_xlabel('Principle Component')
ax.set_ylabel('Correlation Max')
ax.set_title('Correlation Using Principle Components',pad=20)

plt.tight_layout()
plt.savefig(os.path.join(PLOTPATH, '210914_correlation_using_pc_hard'))


fig = plt.figure(figsize=(8,5))
ax = fig.add_subplot(1,1,1)


ax.set_xlabel('Trial')
ax.set_ylabel('Correlation Max')
ax.set_title('Correlation Using Signal, N-Trials',pad=20)


ax.plot(projection_signal, label='signal+noise')
ax.plot(projection_noise,label='noise')

ax.legend(loc=1)

plt.tight_layout()
plt.savefig(os.path.join(PLOTPATH, '210914_correlation_using_signal_hard'))

In [None]:
N_comp = 256
N_trial = 50
m = 1

pc_max, signal_max, noise_max = [], [], []

for k in range(N_trial):

    projection_n, projection_noise, projection_signal = [], [], []

    for n in range(N_comp):

        noise = np.random.multivariate_normal([0,0], np.eye(2) * var / 2, 8192)

        noise = np.fft.fft(noise[:, 0] + 1j * noise[:, 1]) / 8192

        pc = evecs[:, n]

        projection_signal.append(abs(scipy.signal.correlate(noise + dataset[m, :], dataset[m, :], mode='full')).max())
        projection_n.append(abs(scipy.signal.correlate(pc * np.sqrt((abs(dataset[m, :]) ** 2 ).sum()), noise + dataset[m, :], mode='full')).max())
        projection_noise.append(abs(scipy.signal.correlatet(noise , pc * np.sqrt((abs(dataset[m, :]) ** 2 ).sum()), mode='full')).max())

    projection_n = np.array(projection_n)
    projection_noise = np.array(projection_noise)
    projection_signal = np.array(projection_signal)
    
    pc_max.append(projection_n.mean())
    signal_max.append(projection_signal.mean())
    noise_max.append(projection_noise.mean())
    
    

    
    



In [None]:
pc_max = np.array(pc_max)
signal_max = np.array(signal_max)
noise_max = np.array(noise_max)

fig = plt.figure(figsize=(8,5))
ax = fig.add_subplot(1,1,1)


hist1 = ax.hist(pc_max, histtype='step')
hist2 = ax.hist(noise_max, histtype='step')
hist3 = ax.hist(signal_max, histtype='step')


In [None]:
n = 5
m = 6

pc = evecs[:, n]
signal = dataset[m, :]

match_pc = abs(scipy.signal.correlate(pc, signal / np.sqrt((abs(dataset[m, :]) ** 2 ).sum()), mode='same')).max()
match_sig = (abs(scipy.signal.correlate(signal / np.sqrt((abs(dataset[m, :]) ** 2 ).sum()), signal / np.sqrt((abs(dataset[m, :]) ** 2 ).sum()), mode='same')).max())


print(match_pc, match_sig, match_pc / match_sig)

In [None]:
sns.set_theme(context='talk', style='whitegrid')

fig = plt.figure(figsize=(8,5))
ax = fig.add_subplot(1,1,1)


ax.plot(freqs, abs(pc), label='Principle Component')
ax.plot(freqs, abs(signal / np.sqrt((abs(dataset[m, :]) ** 2 ).sum())), label='Signal')

ax.set_ylabel('Amplitude')
ax.set_xlabel('Frequency (Hz)')

ax.legend(loc=2)

plt.tight_layout()
plt.savefig(os.path.join(PLOTPATH, '210914_normalized_signal_and_pc'))

#print(np.vdot(pc, pc))
#print(np.vdot(signal / np.sqrt((abs(dataset[m, :]) ** 2 ).sum()), signal / np.sqrt((abs(dataset[m, :]) ** 2 ).sum())))

In [None]:
x = scipy.signal.convolve(signal / np.sqrt((abs(signal) ** 2 ).sum()), signal / np.sqrt((abs(signal) ** 2 ).sum()), mode='same')
print(np.vdot(x, x))
plt.plot(abs(x))
#plt.plot(scipy.signal.convolve(signal / np.sqrt((abs(signal) ** 2 ).sum()), signal / np.sqrt((abs(signal) ** 2 ).sum()), mode='same').imag)

In [None]:
x = scipy.signal.convolve(pc, signal / np.sqrt((abs(signal) ** 2 ).sum()), mode='same')
print(np.vdot(x,x))
plt.plot(abs(x))

In [None]:
pc_match_matrix = np.zeros((dataset.shape[0], 256))

norm_dataset = dataset / np.sqrt((abs(dataset) ** 2 ).sum(axis=-1)).reshape((dataset.shape[0], 1)).repeat(dataset.shape[-1], axis=-1)

for k in range(dataset.shape[0]):
    if k % 100 == 99:
        print(k+1)
    for i in range(256):
        pc = evecs[:, i]

        x = abs(scipy.signal.correlate(pc, dataset[k, :] / np.sqrt((abs(dataset[k, :]) ** 2 ).sum()), mode='same'))

        pc_match_matrix[k, i] = x.max()

        #if i % 10 == 9:
        #    print(i + 1)



In [None]:
np.save(os.path.join(PATH, 'damselfly/data', '210915_pc_match_matrix_normalized'), pc_match_matrix)

In [None]:
sig_match_matrix = ((abs(dataset ) ** 2).sum(axis=-1)).reshape((dataset.shape[0], 1)).repeat(256, axis=-1)

In [None]:
sns.set_theme(style='ticks', context='talk')
cmap = sns.color_palette('mako', as_cmap=True)

fig = plt.figure(figsize=(8,6))
ax = fig.add_subplot(1,1,1)

img = ax.imshow((pc_match_matrix).T, aspect='auto', cmap=cmap, interpolation='none')

fig.colorbar(img, label='Match Ratio')

ax.set_xlabel('Signal Index')
ax.set_ylabel('Principle Component')

ax.set_title('Match Ratio')

plt.tight_layout()
plt.savefig(os.path.join(PLOTPATH, '210914_principle_components_match_ratio_matrix'))



In [None]:
sns.set_theme(style='whitegrid', context='talk')
cmap = sns.color_palette('mako', as_cmap=True)

fig = plt.figure(figsize=(8,5))
ax = fig.add_subplot(1,1,1)



ax.plot((pc_match_matrix).mean(axis=0))


ax.set_ylabel('Mean Match')
ax.set_xlabel('Principle Component')
ax.set_title('Mean Match for Principle Component')

plt.tight_layout()
plt.savefig(os.path.join(PLOTPATH, '210914_mean_match_for_principle_component'))

In [None]:
sns.set_theme(style='whitegrid', context='talk')
cmap = sns.color_palette('mako', as_cmap=True)

fig = plt.figure(figsize=(8,5))
ax = fig.add_subplot(1,1,1)



ax.plot(np.flip(np.sort((pc_match_matrix).mean(axis=0))))


ax.set_ylabel('Mean Match')
ax.set_xlabel('Principle Component')
ax.set_title('Mean Match for Principle Component, Sorted')

plt.tight_layout()
plt.savefig(os.path.join(PLOTPATH, '210914_mean_match_for_principle_component_sorted'))

In [None]:
sns.set_theme(style='whitegrid', context='talk')
cmap = sns.color_palette('mako', as_cmap=True)

fig = plt.figure(figsize=(8,5))
ax = fig.add_subplot(1,1,1)


hist = ax.hist(pc_match_matrix.max(axis=-1), 32)
#ax.plot(np.mean(pc_match_matrix.max(axis=-1)) * np.ones(5492), '--' ,color='tab:red', label = f'Mean Match = {np.round(np.mean(pc_match_matrix.max(axis=-1)), 3)}')
#ax.legend(loc=(0.3, 0.6))

ax.set_ylabel('N')
ax.set_xlabel('Match Ratio')
ax.set_title('Best Principle Component Match Ratio per Signal')


plt.tight_layout()
plt.savefig(os.path.join(PLOTPATH, '210914_best_match_for_principle_component'))
#print(np.mean(pc_match_matrix.max(axis=-1)))

In [None]:
covariance_real = np.matmul(signal_real_norm.T, signal_real_norm)
covariance_imag = np.matmul(signal_imag_norm.T, signal_imag_norm)
plt.figure()
plt.imshow(covariance_real, interpolation = 'none')
#plt.xlim(0, 20)
#plt.ylim(0, 20)
plt.figure()
plt.imshow(covariance_imag, interpolation = 'none')
#plt.xlim(0, 20)
#plt.ylim(0, 20)

In [None]:
print('real')
real_evals, real_evecs = np.linalg.eig(covariance_real)
print('imag')
imag_evals, imag_evecs = np.linalg.eig(covariance_imag)

In [None]:
print(real_evecs.shape)

In [None]:
plt.plot(real_evals)
plt.plot(imag_evals)
plt.xlim(0, 10)

In [None]:
plt.plot(real_evecs[1, :])
#plt.plot(real_evecs[1, :])
#plt.plot(real_evecs[2, :])
#plt.plot(real_evecs[3, :])
#plt.plot(real_evecs[4, :])
#plt.plot(real_evecs[5, :])
#plt.plot(real_evecs[6, :])
plt.xlim(8000, 8192)

# try 100 signals