In [1]:
import torch
import torch.nn as nn
from torch.utils.data import DataLoader
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
import uproot
import awkward as ak
import pickle
import os
import datetime
import matplotlib.pyplot as plt
from scipy.optimize import curve_fit
import pandas as pd

from PionModel import *

In [2]:
# Create an instance of the model.
# This can be used to train a new weights set, or can evaluate data sets using existing weight sets. 
# Imported from PionModel
net = NN()
# Set the Criterion and optimizer for training later
criterion = MARE
optimizer = torch.optim.SGD(net.parameters(), lr = 0.001)

In [3]:
# Load in data for the summed energy layers to be used as input for the NN
# and targets as the training comparison values.
#import flat distribution training set for pion
with open('/home/rusack/shared/pickles/HGCAL_TestBeam/pkl_files/DiscreteSim_pklFiles_Jul22/v1/FTFP_pklFiles/trueE_target.pickle', 'rb') as f:
    targets = pickle.load(f)
    
#energy of each layer summed up.
with open(f'/home/rusack/shared/pickles/HGCAL_TestBeam/pkl_files/DiscreteSim_pklFiles_Jul22/v1/FTFP_pklFiles/layer_energy_sum.pickle', 'rb') as a4:
    layerSum = pickle.load(a4)

# with open(f'/home/rusack/shared/pickles/HGCAL_TestBeam/pkl_files/DiscreteSim_pklFiles_Jul22/v1/FTFP_pklFiles/Hit_Z.pickle', 'rb') as a1:
#     layers = pickle.load(a1)
# with open(f'/home/rusack/shared/pickles/HGCAL_TestBeam/pkl_files/DiscreteSim_pklFiles_Jul22/v1/FTFP_pklFiles/recHitEn.pickle', 'rb') as a2:
#     energy = pickle.load(a2)

In [4]:
# #Seperates the individual hits in the detector by their corresponding layers and sums them
# layer_position = np.unique(ak.flatten(layers))[0:50]
# temp = [ ak.sum(energy[layers==lz], axis=1) for lz in layer_position ] 

In [None]:
# Split the layer_sum data 80:20 into a training set and a testing set.

layerSum = np.array(layerSum)

# number of events used for training, should be all events unless troubleshooting
num_selected = len(layerSum)

split = 0.8

xfull = np.asarray(layerSum)
xtrain = xfull[:int(np.floor(num_selected*split)),:]
xtrain = torch.Tensor(xtrain)
xtest = xfull[int(np.floor(num_selected*split)):num_selected,:]
xtest = torch.Tensor(xtest)

yfull = np.asarray(targets)
ytrain = yfull[:int(np.floor(num_selected*split))]
ytrain = torch.Tensor(ytrain)
ytrain = ytrain.unsqueeze(-1)
ytest = yfull[int(np.floor(num_selected*split)):num_selected]
ytest = torch.Tensor(ytest)
ytest = ytest.unsqueeze(-1)


# print("layerSum ", np.shape(layerSum))
# print("xtrain ", np.shape(xtrain))

In [None]:
# Define training parameters and the training loop.
# The loop takes random permutations of the training or testing loop such that the ordering
# does not affect the predictions. 
# The loss batch in the testing and training are stored for each epoch.
# Epoch num arbitrarily chosen to show that it adequately trains after only a few epochs.
batch_size = 256*2
EPOCHS = 100
epoch_loss = []
epoch_loss_test = []
total_loss = []
for epoch in range(EPOCHS):
    #set to training mode (can alter the weights)
    net.train()
    batch_loss = []
    
    #create permutation
    permutation = torch.randperm(xtrain.size()[0])
    for i in range(0,xtrain.size()[0],batch_size):
        indices = permutation[i:i+batch_size]
        batch_x, batch_y = xtrain[indices,:], ytrain[indices]
        # Train on a batch permutation 
        loss, output = train(net, batch_x, batch_y, optimizer, criterion)
        batch_loss.append(torch.detach(loss).numpy())
#         total_loss.append(torch.detach(loss).numpy())
    epoch_loss.append(sum(batch_loss)/len(batch_loss))
    
    
    # Set to evaluation mode (does not update weights)
    net.eval()
    batch_lt = []
    permutation = torch.randperm(xtest.size()[0])
    for i in range(0,xtest.size()[0],batch_size):
        indices = permutation[i:i+batch_size]
        batch_x, batch_y = xtest[indices,:], ytest[indices]
        # Test on a batch permutation
        loss, output = train(net, batch_x, batch_y, optimizer, criterion)
        batch_lt.append(torch.detach(loss).numpy())
    epoch_loss_test.append(sum(batch_lt)/len(batch_lt))
    torch.save(net.state_dict(), f'PionDNNstates/exampleRun/epoch{epoch}')
    
# Format loss arrays for plotting
epoch_loss = np.asarray(epoch_loss)
epoch_loss = epoch_loss.flatten()
epoch_loss_test = np.asarray(epoch_loss_test)
epoch_loss_test = epoch_loss_test.flatten()
torch.save(net.state_dict(), 'PionDNNstates/exampleRun/epochfinal')

In [None]:
# np.savetxt("PionDNNstates/exampleRun/epoch_loss.csv", epoch_loss, delimiter=",")
# np.savetxt("PionDNNstates/exampleRun/epoch_loss_test.csv", epoch_loss_test, delimiter=",")

In [None]:
plt.clf()
plt.plot(epoch_loss, c='red', label='training set')
plt.plot(epoch_loss_test, c='blue', label='testing set')
plt.ylabel('Loss', size=14)
plt.xlabel('Epoch', size=14)
plt.legend(fontsize=12, loc='upper right')
plt.title('Pion DNN total loss per epoch')
plt.show()


In [None]:
# Plot epoch loss for training and testing

fig2 = go.Figure(go.Scatter(dict(y=epoch_loss, name='Epoch Loss Training')))
fig2.update_layout(
    title = f"MARE Loss over {EPOCHS} epochs",
    xaxis_title="Epoch",
    yaxis_title="Loss",
    xaxis=dict(tickformat=',d'),
    yaxis=dict(type='log')
)
fig2.add_trace(go.Scatter(y=epoch_loss_test, name='Epoch Loss Testing', mode='lines'))

fig4 = go.Figure(go.Scatter(dict(y=epoch_loss, name='Epoch Loss Training')))
fig4.update_layout(
    title = f"MARE Loss over {EPOCHS} epochs",
    xaxis_title="Epoch",
    yaxis_title="Loss",
    xaxis=dict(tickformat=',d', ),
    yaxis_range=[-0.05,1.05]
)
fig4.add_trace(go.Scatter(y=epoch_loss_test, name='Epoch Loss Testing', mode='lines'))

fig2.show()
fig4.show()