In [None]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import torch
import h5py
import os
import sys
import scipy
import damselfly as df
import mayfly as mf
import scipy.signal
import scipy.stats
import scipy.interpolate

PATH = '/storage/home/adz6/group/project'
RESULTPATH = os.path.join(PATH, 'results/damselfly')
PLOTPATH = os.path.join(PATH, 'plots/damselfly')
DATAPATH = os.path.join(PATH, 'datasets/data')
#SIMDATAPATH = os.path.join(PATH, 'damselfly/data/sim_data')

"""
Date: 6/25/2021
Description: template
"""

def ProjectDataset2D(h5file, evecs, n_evecs, n_project, name):
    
    new_h5file = h5py.File(name, 'w')
    projection_inds = np.flip(np.argsort(abs(evecs), axis=-1), axis=-1)[0:n_evecs, 0:n_project].flatten()
    
    for i, key in enumerate(['train', 'test', 'val']):
        
        new_h5file.create_group(key)
        new_h5file[key].create_dataset('label', data=h5file[key]['label'][:])
        new_h5file[key].create_group('meta')
        
        for j, metakey in enumerate(['energy', 'theta_min', 'x_min']):
            new_h5file[key]['meta'].create_dataset(metakey, data=h5file[key]['meta'][metakey])
            
        data = h5file[key]['data'][:]
        
        new_h5file[key].create_dataset('data', data = data[:, :, projection_inds].reshape((data.shape[0], data.shape[1], n_evecs, n_project)))
        print(key)
    new_h5file.close()
        
    

In [None]:
#os.mkdir(os.path.join(DATAPATH, 'dl', 'pca'))

In [None]:
os.listdir(os.path.join(DATAPATH, 'dl'))

In [None]:
os.listdir(os.path.join(DATAPATH, 'bf'))

In [None]:
os.listdir(os.path.join(PATH, 'datasets', 'kass'))

# load data

In [None]:
# signal data
data = h5py.File(os.path.join(DATAPATH, 'dl', '211203_dl_classification_84_25_2cm_slice1_sample2x8192.h5'), 'r')



In [None]:
data['train']['meta'].keys()

# load eigenvectors

In [None]:
os.listdir(os.path.join(PATH, 'results', 'damselfly', 'pca'))

In [None]:
pca_result = np.load(os.path.join(PATH, 'results', 'damselfly', 'pca', '211208_84_25_2cm_pca.npz'))

evecs = pca_result['evecs']
evals = pca_result['evals']

In [None]:
name = '211209_dl_classification_84_25_2cm_slice1_sample2x8192_proj256x128.h5'
n_evecs = 256
n_project = 128

ProjectDataset2D(data, evecs, n_evecs, n_project, name)



In [None]:
os.listdir(os.path.join(DATAPATH, 'dl', 'pca'))

In [None]:
# signal data
data = h5py.File(os.path.join(DATAPATH, 'dl','pca', '211209_dl_classification_84_25_2cm_slice1_sample2x8192_proj256x128.h5'), 'r')

In [None]:
example = data['train']['data'][6, 1, :, :]
plt.plot(example.flatten())
#proj_data = data[:, :, projection_inds].reshape((12221, 2, 256, 128))

In [None]:

#plt.colorbar()

In [None]:
plt.plot(abs(evecs[3000, np.flip(np.argsort(abs(evecs[3000, :])))]))

# define output dataset parameters

In [None]:
nsignal = data.shape[0]
nsample = data.shape[1]

# same signals in train, test, val sets. Different noise samples added to signals at run time
#ncopies_train = 10
#ncopies_test = 4

nslice = 1
ninput_ch = 2 # real, imag
slicesize = 2 * 8192

train_shape = (nsignal, nslice * ninput_ch, slicesize)
test_shape = (nsignal, nslice * ninput_ch, slicesize)

noise_temp = 10
fsample = 200e6
system_z = 50
nch = 60
kB = 1.38e-23

noise_var = kB * nch * noise_temp * system_z * fsample
noise_var_per_bin = noise_var / slicesize

name = os.path.join(DATAPATH, 'dl', '211203_dl_classification_84_25_2cm_slice1_sample2x8192.h5')
label = 'class'

config = {
    'train_shape': train_shape,
    'test_shape': test_shape,
    'nsignal': nsignal,
    'nsample': nsample,
    'nslice': nslice,
    'ninput_ch': ninput_ch,
    'slicesize': slicesize,
    'noise_temp': noise_temp,
    'fsample': fsample,
    'system_z': system_z,
    'nch': nch,
    'noise_var': noise_var,
    'noise_var_per_bin': noise_var_per_bin,
    'name': name,
    'label': label
}


CreateDLDataset(config, data, metadata, kass_data, kass_metadata)


