In [None]:
import torch
from pathlib import Path
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import damselfly as df
import math
from scipy import integrate

home = Path.home()
results = home/'group'/'project'/'results'
snr_sweep = np.linspace(1, 10, 19)

def LoadModel(model, path):
    model.load_state_dict(torch.load(path))
    model.eval()
    
    return model

def LoadSignal(config):
    
    signal = np.load(config['data_path'])
    
    return torch.tensor(signal)

def FFT(signal):

    return torch.fft.fft(signal, dim=-1, norm='forward')

def GenerateDataTensor(signal, config):

    shape = (
        signal.shape[0] + config['number_noise'],
        config['number_channel'],
        config['samples']
        )

    data = torch.zeros(shape, dtype=torch.float)

    data[0:signal.shape[0], 0, :] = signal.real
    data[0:signal.shape[0], 1, :] = signal.imag

    return data

def GenerateTargetTensor(config, signal):

    labels = torch.zeros(
        signal.shape[0] + config['number_noise'],
        dtype=torch.long
        )

    labels[0:signal.shape[0]] = 1

    return labels

def AddNoise(data, config):

    shape = data.shape
    
    noise = torch.normal(
        mean = 0,
        std = math.sqrt(config['var'] / 2),
        size = shape
        )

    return data + noise

def NormBatch(batch):
    
    batch = batch / torch.max(torch.max(abs(batch), dim=2, keepdim=True)[0], dim=1, keepdim=True)[0]

    return batch

def RollBatch(batch):

    roll_ints = torch.randint(0, 200, (batch.shape[0],)) # 200 bins is roughly 100 eV of BW at 1T for 200 MHz and 8192 samples
    batch = torch.fft.fftshift(batch, dim=-1)
    for i, roll in enumerate(roll_ints):
        #print(roll)
        batch[i, :, :] = torch.roll(batch[i, :, :], (roll.item(),), dims=-1) # roll in the positive freq. direction for lower energy

    return batch

def EvalModel(config):
    
    data = LoadSignal(config)
    data = FFT(data)
    data_tensor = GenerateDataTensor(data, config)
    target = GenerateTargetTensor(config, data)
    #print(data.shape, data_tensor.shape)
    
    dataset = torch.utils.data.TensorDataset(data_tensor, target)
    loader = torch.utils.data.DataLoader(
        dataset, 
        config['training']['batchsize'],
        shuffle=False,
        pin_memory=True,
        drop_last=False,
    )

    if torch.cuda.is_available():
        print('Found GPU')
        model = LoadModel(
            config['model'](
                config['model_config']['nclass'],
                config['model_config']['nch'],
                config['model_config']['conv'],
                config['model_config']['lin']
            ),
            config['saved_model']
        ).cuda(0)
        
    else:
        print('No GPU')
        model = LoadModel(
            config['model'](
                config['model_config']['nclass'],
                config['model_config']['nch'],
                config['model_config']['conv'],
                config['model_config']['lin']
            ),
            config['saved_model']
        ).cpu()
        
    output_list = []
    label_list = []
    with torch.no_grad():
        for ep in range(config['epochs']):
            #batch_count = 0
            for batch, labels in loader:
                batch = RollBatch(batch)
                batch = AddNoise(batch, config)
                batch = NormBatch(batch)
                label_list.extend(labels.numpy())

                if torch.cuda.is_available():
                    output = model(batch.cuda(0))
                    output = output.cpu()
                    #output = torch.nn.functional.softmax(output.cpu(), dim=1)
                    output_list.extend(output.numpy())
                else:
                    output = model(batch)
                    output = torch.nn.functional.softmax(output, dim=1)
                    output_list.extend(output.numpy())

                #batch_count += 1
                print(ep + 1)

    return np.array(output_list), np.array(label_list)


In [None]:
for i, item in enumerate((results/'machine_learning'/'dnn'/'triggering'/'loss'/'dense_grid').iterdir()): print(item)

# plot loss curves

In [None]:
results = results/'machine_learning'/'dnn'/'triggering'/'loss'/'dense_grid'

sns.set_theme(context='talk')
fig = plt.figure(figsize=(13, 8))
ax = fig.add_subplot(1,1,1)

loss_data = np.load(results/'220228_pitch87.0_rad1cm_fft'/'model_cnn_loss.npy')
ax.plot(loss_data[:, 2])
#for snr in [1.0, 2.0, 3.0, 5.0, 10.0]:
#    loss_data = np.load(results/'machine_learning'/'dnn'/'triggering'/'loss'/'toy_sinusoid'/'220223_snr_sweep'/f'model_cnn_snr{snr}.npy')
#    ax.plot(loss_data[:, 2], label=f'SNR = {snr}')
#    #print(loss_data)
    
ax.set_xticks(np.linspace(1, 600, 6))
ax.set_xticklabels(np.int32(np.linspace(1, 100, 6)))
ax.set_xlabel('Training Epoch')
ax.set_ylabel('Training Loss (AU)')
ax.legend(loc=1)
ax.set_title('Model Training Loss')

plt.tight_layout()
#name = '220224_model_train_loss_vs_snr.png'
#save_path = Path.home()/'group'/'project'/'plots'/'machine_learning'/'dnn'/'triggering'/'toy_sinusoid'/name

#plt.savefig(save_path)


# Evaluate Models

In [None]:
conv_layer_config = [
                        [
                            [2 * 1, 16, 16],
                            [16, 16, 16],
                            [32, 32, 32],
                            [1, 1, 1], # dilation
                            16, # maxpool 
                        ],
                        [
                            [16, 32, 32],
                            [32, 32, 32],
                            [16, 16, 16],
                            [1, 1, 1],
                            8
                        ],
                        [
                            [32, 64, 64],
                            [64, 64, 64],
                            [8, 8, 8],
                            [1, 1, 1],
                            4
                        ],
                    ]

config = {
    'data_path': Path.home()/'group'/'project'/'datasets'/'data'/'bf'/'220301_dense_grid_87.0deg_1to4cm_random.npy',
    'samples': 8192,
    'sample_freq': 200e6,
    'number_noise': 4000,
    'number_channel': 2, # real, imag.
    'var': 1.38e-23 * 50 * 60 * math.sqrt(60) * 200e6 * 10 / 8192,
    'epochs': 20, # number of copies of the test data to analyze

    'training': {
        'batchsize': 2500,
    },
    
    'model': df.models.DFCNN,
    'saved_model': results/'machine_learning'/'dnn'/'triggering'/'models'/'dense_grid'/'220301_pitch87.0_rad1to4cm_fft_100epoch'/f'model_cnn.pt',
    'model_config': {
        'conv': conv_layer_config,
        'nclass': 2,
        'nch': 2,
        'lin': [
            [df.models.GetConv1DOutputSize(conv_layer_config, 2, 8192), 512, 256], # input dense layer sizes
            [512, 256, 128], # output dense layer sizes
            [0.0, 0.0, 0.0] # dropout
            ],
    },
}

output, target = EvalModel(config)
    
    

# plot histograms of model outputs

In [None]:
print(output.shape, target.shape)

In [None]:
n_snr = 2

signal_inds = np.argwhere(target == 1).squeeze()
noise_inds = np.argwhere(target == 0).squeeze()

sns.set_theme()
fig = plt.figure(figsize=(13, 8))
ax = fig.add_subplot(1,1,1)

hist = ax.hist(output[signal_inds, 1], bins=128, density=True )
hist = ax.hist(output[noise_inds, 1], bins=64, density=True )

# calculate pdf and cdf vs snr

In [None]:

bins = np.linspace(-5, 15, 5001)

pdf_sig = np.zeros(bins.size-1)
pdf_noise = np.zeros(bins.size-1)

cdf_sig = np.zeros(bins.size-1)
cdf_noise = np.zeros(bins.size-1)

        
signal_inds = np.argwhere(target == 1).squeeze()
noise_inds = np.argwhere(target == 0).squeeze()

hist_sig = np.histogram(output[signal_inds, 1], bins=bins, density=True )
hist_noise = np.histogram(output[noise_inds, 1], bins=bins, density=True )

hist_sig = hist_sig[0], hist_sig[1][1:]
hist_noise = hist_noise[0], hist_noise[1][1:]

pdf_sig = hist_sig[0]
pdf_noise = hist_noise[0]

#print(hist)
for j in range(len(bins)-1):
    cdf_sig[j] = integrate.trapezoid(hist_sig[0][0:j+1], x=hist_sig[1][0:j+1])
    cdf_noise[j] = integrate.trapezoid(hist_noise[0][0:j+1], x=hist_noise[1][0:j+1])    


# save pdf and cdf vs snr

In [None]:
name = 'pdf_and_cdf.npz'
save = results/'machine_learning'/'dnn'/'triggering'/'eval_outputs'/'dense_grid'/'220228_pitch87.0_rad1cm_fft'/name

np.savez(
    save,
    pdf_signal=pdf_sig,
    pdf_noise=pdf_noise,
    cdf_signal=cdf_sig, 
    cdf_noise=cdf_noise,
    bins=bins[1:]
)

In [None]:
plt.plot(bins[1:], cdf_noise[5, :])
plt.plot(bins[1:], cdf_sig[5, :])

In [None]:
plt.plot(bins[1:], pdf_noise[5, :])
plt.plot(bins[1:], pdf_sig[5, :])

In [None]:
sns.set_theme()
fig = plt.figure(figsize=(13, 8))
ax = fig.add_subplot(1,1,1)

#for i in np.arange(0, 18, 2):
ax.plot(1-cdf_noise[:], 1-cdf_sig[:])
    
ax.plot(np.linspace(0, 1, 1001), np.linspace(0, 1, 1001), color='grey', linestyle='--')

ax.set_xscale('log')
ax.set_xlim(1e-6, 1)



In [None]:
print(np.argmin(abs(1-cdf_noise[0] - 1e-4)))

In [None]:
sns.set_theme()
fig = plt.figure(figsize=(13, 8))
ax = fig.add_subplot(1,1,1)

select_fpr = 1e-3
select_tpr_vs_snr = np.zeros(snr_sweep.size)
for i in range(snr_sweep.size):
    index_near_fpr = np.argmin(abs(1-cdf_noise[i, :] - select_fpr))
    
    select_tpr_vs_snr[i] = 1-cdf_sig[i, index_near_fpr]
    
    
ax.plot(snr_sweep, select_tpr_vs_snr, '-')

In [None]:
sns.set_theme()
fig = plt.figure(figsize=(13, 8))
ax = fig.add_subplot(1,1,1)

for fpr in [1e-1, 1e-2, 1e-3, 1e-4]:
    select_fpr = fpr
    select_tpr_vs_snr = np.zeros(snr_sweep.size)
    for i in range(snr_sweep.size):
        index_near_fpr = np.argmin(abs(1-cdf_noise[i, :] - select_fpr))

        select_tpr_vs_snr[i] = 1-cdf_sig[i, index_near_fpr]


    ax.plot(snr_sweep, select_tpr_vs_snr, '-')