In [None]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import torch
import h5py
import os
import sys
import scipy
import damselfly as df
import mayfly as mf
import scipy.signal
import scipy.stats
import scipy.interpolate
import json
import time

PATH = '/storage/home/adz6/group/project'
RESULTPATH = os.path.join(PATH, 'results/damselfly')
PLOTPATH = os.path.join(PATH, 'plots/damselfly')
DATAPATH = os.path.join(PATH, 'datasets/data')
#SIMDATAPATH = os.path.join(PATH, 'damselfly/data/sim_data')

damselpath = '/storage/home/adz6/group/project/damselfly'
"""
Date: 6/25/2021
Description: template
"""

def CalculateAccuracy(output, labels):

    output_prob = torch.nn.functional.softmax(output, dim=1)

    most_likely_class = torch.argmax(output_prob, dim=1)

    most_likely_class_matches_label = torch.as_tensor(most_likely_class == labels, dtype=torch.float)

    return torch.mean(most_likely_class_matches_label)

def AddNoiseToBatch2D(batch, var):
    
    rng = np.random.default_rng()
    
    noise = rng.multivariate_normal([0, 0], np.eye(2) * var / 2, batch.shape[0] * batch.shape[2] * batch.shape[3])
    noise = noise[:, 0] + 1j * noise[:, 1]
    #print(noise.shape)
    #noise = noise.reshape((batch.shape[0], batch.shape[1], batch.shape[2]))
    
    batch[:, 0, :] += torch.tensor(noise.real.reshape(batch.shape[0], batch.shape[2], batch.shape[3]), dtype=torch.float)
    batch[:, 1, :] += torch.tensor(noise.imag.reshape(batch.shape[0], batch.shape[2], batch.shape[3]), dtype=torch.float)
    
    return batch
    
def NormBatch(batch):
    
    #print(torch.max(batch[:, 0, :], -1, keepdim=True)[0])
    
    batch[:, 0, :, :] *= 1 / torch.max(abs(batch[:, 0, :, :]), -1, keepdim=True)[0]
    batch[:, 1, :, :] *= 1 / torch.max(abs(batch[:, 1, :, :]), -1, keepdim=True)[0]
    
    return batch
    

def LoadDataArrays2D(datafilepath, train_noise_frac, val_noise_frac, noise_var):
    
    file = h5py.File(datafilepath, 'r')
    
    train_data_no_noise = file['train']['data'][:]
    train_label = file['train']['label'][:]
    
    val_data_no_noise = file['val']['data'][:]
    val_label = file['val']['label'][:]
    
    ninput_ch = train_data_no_noise.shape[1]
    input_shape = (train_data_no_noise.shape[2], train_data_no_noise.shape[3])
    
    Ntrain_signals_with_noise = int(train_data_no_noise.shape[0] * (1 + 0.25)) # need to fix these
    Nval_signals_with_noise = int(val_data_no_noise.shape[0] * (2)) # need to fix.
    
    train_data = np.concatenate(
        (
            train_data_no_noise, 
            np.zeros((Ntrain_signals_with_noise - train_data_no_noise.shape[0], ninput_ch, *input_shape),dtype=np.float32)
        ),axis = 0, dtype=np.float32)
    
    train_label = np.int32(np.concatenate(
        (
            train_label, 
            np.zeros(Ntrain_signals_with_noise - train_data_no_noise.shape[0])
        ),axis = 0))
    
    val_data = np.concatenate(
        (
            train_data_no_noise, 
            np.zeros((Nval_signals_with_noise - val_data_no_noise.shape[0], ninput_ch, *input_shape),dtype=np.float32)
        ),axis = 0, dtype=np.float32)
    
    val_label = np.int32(np.concatenate(
        (
            val_label, 
            np.zeros(Nval_signals_with_noise - val_data_no_noise.shape[0])
        ),axis = 0))
    
    file.close()
    
    return (torch.tensor(train_data, dtype = torch.float), torch.tensor(train_label, dtype = torch.long)), (torch.tensor(val_data, dtype = torch.float), torch.tensor(val_label, dtype = torch.long))

def TrainModel2D(class_weights, datafilepath, savepath, train_noise_frac, val_noise_frac, noise_var, device, batchsize, learning_rate, model, epochs, ncopies_train, ncopies_val):
    
    class_weight_tensor = torch.tensor(
                                class_weights,
                                 device=device, dtype=torch.float
                                )
    
    
    if not os.path.isdir(savepath):
        os.mkdir(savepath)
        
    if device == torch.device("cuda:0"):
        print('Model moved to GPU')
        model.to(device)
        
    print('Loading data')
    train_data, val_data = LoadDataArrays2D(datafilepath, train_noise_frac, val_noise_frac, noise_var)
  
    #print(train_data[0].shape, train_data[1].shape, val_data[0].shape, val_data[1].shape,)
    
    train_dataloader = torch.utils.data.DataLoader(
                                                    torch.utils.data.TensorDataset(train_data[0], train_data[1]),
                                                    batchsize,
                                                    shuffle=True, 
                                                    )
    val_dataloader = torch.utils.data.DataLoader(
                                                    torch.utils.data.TensorDataset(val_data[0], val_data[1]),
                                                    batchsize,
                                                    shuffle=True,
                                                    )
    
    # define loss function and optimizer
    criterion = torch.nn.CrossEntropyLoss(weight = class_weight_tensor, reduction = 'mean')
    optimizer = torch.optim.Adam(model.parameters(), lr = learning_rate)
    
    sns.set_theme(context='paper', style='whitegrid')
    
    
    train_acc = []
    train_loss = []
    val_acc = []
    
    best_val_acc = 0
    best_model = {}
    print('Training Starting')
    for ep in range(epochs):
        #print(f'Starting epoch {ep + 1}')
        for icopy in range(ncopies_train):
            for batch, labels in train_dataloader:
                
                batch = AddNoiseToBatch2D(batch, noise_var)
                batch = NormBatch(batch)
                
                #fig = plt.figure(figsize=(13, 8))
                #ax = fig.add_subplot(1,1,1)
                
                #ax.plot(batch[0, 0, :])
                #ax.plot(batch[0, 1, :])
                
                #plt.show()
                #input()
                
                if device == torch.device("cuda:0"):
                    batch = batch.to(device)
                    labels = labels.to(device)
                    
                optimizer.zero_grad()

                output = model(batch)

                loss = criterion(output, labels) # loss computed using input as output
                loss.backward()

                optimizer.step()
                
                acc = CalculateAccuracy(output, labels)
                print(f'|  {ep + 1}  |  {np.round(loss.item(), 5)}  |  {np.round(acc.item(), 5)}  ')
                
                train_acc.append(acc.item())
                train_loss.append(loss.item())
                
        with torch.no_grad():
            for icopy in range(ncopies_val):
                val_acc_list = []
                for batch, labels in val_dataloader:
                    
                    batch = AddNoiseToBatch2D(batch, noise_var)
                    batch = NormBatch(batch)

                    if device == torch.device("cuda:0"):
                        batch = batch.to(device)
                        labels = labels.to(device)

                    val_out = model(batch)
                    
                    val_loss = criterion(val_out, labels)
                    
                    val_acc_list.append(CalculateAccuracy(val_out, labels).item())
                val_acc.append(np.mean(val_acc_list))
                
                if np.mean(val_acc_list) > best_val_acc:
                    best_val_acc = np.mean(val_acc_list)
                    best_model = model.state_dict()
                    torch.save(best_model, os.path.join(savepath, f'model.pth'))
                    
                np.savez(os.path.join(savepath, f'loss'), train_loss = train_loss, train_acc = train_acc, val_acc = val_acc)
                    
                    
                print(f'Validation Accuracy = {np.round(np.mean(val_acc_list), 5)}')
                
    #return {'train_loss': train_loss, 'train_acc': train_acc, 'val_acc': val_acc}, best_model
        


In [None]:
os.listdir(os.path.join(DATAPATH, 'dl','pca'))

In [None]:
filepath = os.path.join(DATAPATH, 'dl', 'pca', '211209_dl_classification_84_25_2cm_slice1_sample2x8192_proj256x128.h5')
file = h5py.File(filepath, 'r')

#print(file['train'].keys())

file.close()

noise_var = 60 * 1.38e-23 * 200e6 * 10 * 50 / (2 * 8192) # summed noise in frequency space

batchsize = 500
epochs = 200
ep_per_check = 1
lr = 1e-4

noise_frac_train = 0.2
noise_frac_test = 0.5

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

In [None]:
n_pc = 256
conv_list = [
    [
        [2, 20, 20],
        [20, 20, 20],
        [(n_pc, 4), (n_pc, 4), (n_pc, 4)],
        (1, 4)
    ],
    [
        [20, 40, 40],
        [40, 40, 40],
        [(n_pc, 4), (n_pc, 4), (n_pc, 4)],
        (1, 4)
    ],
    [
        [40, 80, 80],
        [80, 80, 80],
        [(n_pc, 4), (n_pc, 4), (n_pc, 4)],
        (1, 4)
    ],
]

model_config_2d_cnn = {
    'nclass': 2,
    'nch': 2,
    'conv': conv_list
}

input_shape = (256, 128)

linear_list = [
            [df.models.GetConv2DOutputSize(model_config_2d_cnn['conv'], model_config_2d_cnn['nch'], input_shape), 512],
            [512, 256],
            [0.5, 0.5]
        ]

model = df.models.DFCNN2D(
    model_config_2d_cnn['nclass'], 
    model_config_2d_cnn['nch'], 
    model_config_2d_cnn['conv'], 
    linear_list)

In [None]:
train_date = 211209
train_name = '84_25_2cm_slice1_sample2x8192_10K_pca_proj256x128'

savepath = os.path.join(RESULTPATH, 'dl', 'train', f'{train_date}_{train_name}')

TrainModel2D([5., 1.], filepath, savepath, noise_frac_train, noise_frac_test, 
           noise_var, device, batchsize, lr, model, epochs, 1, 1)