In [None]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import torch
import h5py
import os
import sys
import scipy
import damselfly as df
import scipy.signal
import scipy.stats
import subprocess

PATH = '/storage/home/adz6/group/project'
RESULTPATH = os.path.join(PATH, 'results/damselfly')
PLOTPATH = os.path.join(PATH, 'plots/damselfly')
DATAPATH = os.path.join(PATH, 'damselfly/data/datasets')
SIMDATAPATH = os.path.join(PATH, 'damselfly/data/sim_data')
SCRIPTPATH = os.path.join(PATH, 'scripting/output/damselfly')

"""
Date: 6/25/2021
Description: template
"""


def MakeTemplates(signals, var =  1.38e-23 * 10 * 50 * 200e6):
    norm_mat = 1 / np.sqrt(var * np.diag(np.matmul(signals, signals.conjugate().T)))

    templates = norm_mat.reshape((*norm_mat.shape, 1)).repeat(signals.shape[-1], axis=-1) * signals
    
    return templates


In [None]:
os.listdir(SCRIPTPATH)

In [None]:
#file_list = os.listdir(SCRIPTPATH)
#dest_dir = os.path.join(SCRIPTPATH, '211011_pca_angle_subset_job_array_proj')


#for file in file_list:
#    for file2 in os.listdir(os.path.join(SCRIPTPATH, file)):
#        try:
#            fname = os.listdir(os.path.join(SCRIPTPATH, file, file2))[0]

#            subprocess.run(['cp', os.path.join(os.path.join(SCRIPTPATH, file, file2, fname)), os.path.join(dest_dir, fname)])
#        except:
#            continue
    #print(os.listdir(os.path.join(SCRIPTPATH, file)))

In [None]:
#os.listdir(dest_dir)

In [None]:
#file = np.load(os.path.join(dest_dir, '0.npz'))

In [None]:
#file['test']

In [None]:
dest_dir = os.path.join(SCRIPTPATH, '211013_pca_energy_subset_job_array_proj')

print(len(os.listdir(dest_dir)))

In [None]:
#for i in range(930):
#    try:
#        file_array = np.load(os.path.join(dest_dir, f'{i}.npz'))
#    except:
#        print(f'{i}.npz')
        

for i, file in enumerate(os.listdir(dest_dir)):
    if i == 0:
        file_array = np.load(os.path.join(dest_dir, f'{i}.npz'))
        
        train_data = np.zeros((file_array['train'].shape[0], file_array['train'].shape[1], len(os.listdir(dest_dir))), np.complex64)
        val_data = np.zeros((file_array['val'].shape[0], file_array['val'].shape[1], len(os.listdir(dest_dir))), np.complex64)
        test_data = np.zeros((file_array['test'].shape[0], file_array['test'].shape[1], len(os.listdir(dest_dir))), np.complex64)
        
        train_data[:, :, i] = file_array['train']
        val_data[:, :, i] = file_array['val']
        test_data[:, :, i] = file_array['test']
        
    else:
        try:
            file_array = np.load(os.path.join(dest_dir, f'{i}.npz'))

            train_data[:, :, i] = file_array['train']
            val_data[:, :, i] = file_array['val']
            test_data[:, :, i] = file_array['test']
        except:
            print(file)

In [None]:
os.listdir(os.path.join(PATH, 'damselfly/data/datasets/'))

In [None]:
plt.plot(abs(train_data[0, 0, :]))

In [None]:
h5dataset = h5py.File(os.path.join(PATH, 'damselfly/data/datasets/', '211009_84_1d2sl4mt_pca_angle_range.h5'), 'r')

In [None]:
train_label = h5dataset['train']['label'][:]
val_label = h5dataset['val']['label'][:]
test_label = h5dataset['test']['label'][:]



In [None]:
np.flatten()

In [None]:
train_data[np.argwhere(train_label==1).squeeze(), :, :].shape
flat_train_data = train_data[np.argwhere(train_label==1).squeeze(), :, :].reshape(105440, 2 * 930)
flat_noise_data = train_data[np.argwhere(train_label==0).squeeze(), :, :].reshape(26360, 2 * 930)

print(np.argwhere(abs(flat_train_data).mean(axis=1) > 4e-8).shape)

In [None]:
hist = plt.hist(abs(flat_train_data).mean(axis=1), 32, histtype='step')
hist = plt.hist(abs(flat_noise_data).mean(axis=1), 32, histtype='step')

In [None]:
hist = plt.hist(abs(train_data[np.argwhere(train_label==1).squeeze(), 0, :]).mean(axis=(1)), 32, histtype='step')
hist = plt.hist(abs(train_data[np.argwhere(train_label==1).squeeze(), 1, :]).mean(axis=(1)), 32, histtype='step')
hist = plt.hist(abs(train_data[np.argwhere(train_label==0).squeeze(), 0, :]).mean(axis=(1)), histtype='step' )


np.argwhere(abs(train_data[np.argwhere(train_label==1).squeeze(), 1, :]).mean(axis=(1)) > 4e-8).shape

In [None]:
hist = plt.hist(abs(train_data[np.argwhere(train_label==1).squeeze(), 1, :]).mean(axis=(1)), 32)
hist = plt.hist(abs(train_data[np.argwhere(train_label==0).squeeze(), 1, :]).mean(axis=(1)), )

# save pca data as a dataset for DL

In [None]:
newh5dataset = h5py.File(os.path.join(PATH, 'damselfly/data/datasets/', '211014_84_1d2sl4mt_pca_energy_range_projected_real_imag_abs_val_cnn.h5'), 'w')
groups = ['train', 'test', 'val']
labels = [train_label, test_label, val_label]
for i, dataset in enumerate([train_data, test_data, val_data]):
    
    #new_dataset = np.zeros((dataset.shape[0], dataset.shape[1] * dataset.shape[2]))
    new_dataset = np.zeros((dataset.shape[0], dataset.shape[1] * 3, dataset.shape[2]))
    
    for irow in range(dataset.shape[0]):
        n = 0
        for islice in range(dataset.shape[1]):
            for k in range(3): # real, imag, abs loop
                if k == 0:
                    new_dataset[irow, n, :] = dataset[irow, islice, :].real
                if k == 1:
                    new_dataset[irow, n, :] = dataset[irow, islice, :].imag
                if k == 2:
                    new_dataset[irow, n, :] = abs(dataset[irow, islice, :])
                n += 1
            
    newgroup = newh5dataset.create_group(groups[i])
    newgroup.create_dataset('data', data=new_dataset)
    newgroup.create_dataset('label', data=labels[i])
    
    print(groups[i])

In [None]:
newh5dataset.close()

In [None]:
os.listdir(os.path.join(PATH, 'damselfly/data/datasets'))