In [2]:
###################################################################################################
#
# PairIdentification.py
#
# Copyright (C) by Andreas Zoglauer & Harrison Costatino.
#
# Please see the file LICENSE in the main repository for the copyright-notice.
#
###################################################################################################



###################################################################################################

import warnings
warnings.filterwarnings('ignore', category=FutureWarning)

import numpy as np

#from mpl_toolkits.mplot3d import Axes3D
#import matplotlib.pyplot as plt

import random

import signal
import sys
import time
import math
import csv
import os
import argparse
from datetime import datetime
from functools import reduce


print("\nPair Identification")
print("============================\n")



# Step 1: Input parameters
###################################################################################################


# Default parameters

UseToyModel = True

# Split between training and testing data
TestingTrainingSplit = 0.1

MaxEvents = 10

# File names
FileName = "PairIdentification.p1.sim.gz"
GeometryName = "$(MEGALIB)/resource/examples/geomega/GRIPS/GRIPS.geo.setup"


# Set in stone later
TestingTrainingSplit = 0.8

OutputDirectory = "Results"


parser = argparse.ArgumentParser(description='Perform training and/or testing of the pair identification machine learning tools.')
parser.add_argument('-f', '--filename', default='PairIdentification.p1.sim.gz', help='File name used for training/testing')
parser.add_argument('-m', '--maxevents', default='1000', help='Maximum number of events to use')
parser.add_argument('-s', '--testingtrainigsplit', default='0.1', help='Testing-training split')
parser.add_argument('-b', '--batchsize', default='128', help='Batch size')

args = parser.parse_args()

if args.filename != "":
  FileName = args.filename

if int(args.maxevents) > 1000:
  MaxEvents = int(args.maxevents)

if int(args.batchsize) >= 16:
  BatchSize = int(args.batchsize)

if float(args.testingtrainigsplit) >= 0.05:
  TestingTrainingSplit = float(args.testingtrainigsplit)


if os.path.exists(OutputDirectory):
  Now = datetime.now()
  OutputDirectory += Now.strftime("_%Y%m%d_%H%M%S")

os.makedirs(OutputDirectory)



###################################################################################################
# Step 2: Global functions
###################################################################################################


# Take care of Ctrl-C
Interrupted = False
NInterrupts = 0
def signal_handler(signal, frame):
  global Interrupted
  Interrupted = True
  global NInterrupts
  NInterrupts += 1
  if NInterrupts >= 2:
    print("Aborting!")
    sys.exit(0)
  print("You pressed Ctrl+C - waiting for graceful abort, or press  Ctrl-C again, for quick exit.")
signal.signal(signal.SIGINT, signal_handler)


# Everything ROOT related can only be loaded here otherwise it interferes with the argparse
from EventData import EventData

# Load MEGAlib into ROOT so that it is usable
import ROOT as M
M.gSystem.Load("$(MEGALIB)/lib/libMEGAlib.so")
M.PyConfig.IgnoreCommandLineOptions = True



###################################################################################################
# Step 3: Create some training, test & verification data sets
###################################################################################################


# Read the simulation file data:
DataSets = []
NumberOfDataSets = 0

if UseToyModel == True:
  for e in range(0, MaxEvents):
    Data = EventData()
    Data.createFromToyModel(e)
    DataSets.append(Data)
    
    NumberOfDataSets += 1
    if NumberOfDataSets > 0 and NumberOfDataSets % 1000 == 0:
      print("Data sets processed: {}".format(NumberOfDataSets))
  
else:
  # Load geometry:
  Geometry = M.MDGeometryQuest()
  if Geometry.ScanSetupFile(M.MString(GeometryName)) == True:
    print("Geometry " + GeometryName + " loaded!")
  else:
    print("Unable to load geometry " + GeometryName + " - Aborting!")
    quit()


  Reader = M.MFileEventsSim(Geometry)
  if Reader.Open(M.MString(FileName)) == False:
    print("Unable to open file " + FileName + ". Aborting!")
    quit()


  print("\n\nStarted reading data sets")
  NumberOfDataSets = 0
  while NumberOfDataSets < MaxEvents:
    Event = Reader.GetNextEvent()
    if not Event:
      break

    if Event.GetNIAs() > 0:
      Data = EventData()
      if Data.parse(Event) == True:
        if Data.hasHitsOutside(XMin, XMax, YMin, YMax, ZMin, ZMax) == False:
          DataSets.append(Data)
          NumberOfDataSets += 1
          if NumberOfDataSets % 500 == 0:
            print("Data sets processed: {}".format(NumberOfDataSets))

print("Info: Parsed {} events".format(NumberOfDataSets))

# Split the data sets in training and testing data sets

TestingTrainingSplit = 0.75


numEvents = len(DataSets)

numTraining = int(numEvents * TestingTrainingSplit)

TrainingDataSets = DataSets[:numTraining]
TestingDataSets = DataSets[numTraining:]



# For testing/validation split
# ValidationDataSets = TestingDataSets[:int(len(TestingDataSets)/2)]
# TestingDataSets = TestingDataSets[int(len(TestingDataSets)/2):]

print("###### Data Split ########")
print("Training/Testing Split: {}".format(TestingTrainingSplit))
print("Total Data: {}, Training Data: {},Testing Data: {}".format(numEvents, len(TrainingDataSets), len(TestingDataSets)))
print("##########################")


###################################################################################################
# Step 4: Setting up the neural network
###################################################################################################



###################################################################################################
# Step 5: Training and evaluating the network
###################################################################################################



Pair Identification

Welcome to JupyROOT 6.18/04
Start: -18.62421495826161, 3.458826380559419, 11
Event ID: 0
  Origin Z: 11
  Gamma Energy: 10000.0
  Hit 1 (origin: 0): type=m, pos=(-18.62421495826161, 3.458826380559419, 11.0)cm, E=598.3161070822018keV
  Hit 2 (origin: 1): type=e, pos=(-18.420301026089952, 3.814019860635873, 10.0)cm, E=727.6768113687457keV
  Hit 3 (origin: 2): type=e, pos=(-18.99544321321849, 4.31113624362102, 9.0)cm, E=792.0507363912637keV
  Hit 4 (origin: 3): type=e, pos=(-15.709925470454426, 9.328725572642682, 8.0)cm, E=829.5138178017272keV
  Hit 5 (origin: 4): type=e, pos=(-17.253923693470288, 9.093207364940168, 7.0)cm, E=880.8354492849294keV
  Hit 6 (origin: 5): type=e, pos=(-17.90453314743086, 10.783465954307482, 8.0)cm, E=927.9391991388866keV
  Hit 7 (origin: 6): type=e, pos=(-16.026347022328814, 10.971831159438315, 9.0)cm, E=448.0239651487241keV
  Hit 8 (origin: 1): type=p, pos=(-19.014238739272972, 2.0944164093655546, 10.0)cm, E=723.1110512702696keV
  Hit 9 

In [3]:
def generate_incidence(edges, pos_data):
    #Generate Incidence Matrix from Edge List
    n_hits = len(pos_data)
    n_edges = len(edges)
    Ri = np.zeros((n_hits, n_edges), dtype=np.uint8)
    Ro = np.zeros((n_hits, n_edges), dtype=np.uint8)
    
    for i in range(len(edges)):
        point = edges[i]
        from_pt = point[0]
        to_pt = point[1]
        Ro[from_pt][i] = 1
        Ri[to_pt][i] = 1
    
    return Ri, Ro

In [4]:
def connect_pos(pos_data):
    #Manually Connect Graph based on Positions
    edges = []
    for i in range(len(pos_data)):
        point_A = pos_data[i]
        z_A = point_A[2]

        for j in range(len(pos_data)):
            point_B = pos_data[j]
            z_B = point_B[2]

            if z_B == z_A + 1:
                edges.append((i, j))
                edges.append((j, i))
    return generate_incidence(edges, pos_data)

In [5]:
def pad(arr, shape):
    #Padd arr to Shape
    padded_arr = np.zeros(shape)
    if len(shape) == 1:
        padded_arr[:arr.shape[0]] = arr
    elif len(shape) == 2:
        padded_arr[:arr.shape[0],:arr.shape[1]] = arr
    return padded_arr

In [6]:
def vectorize_data(eventArr):
    # Edge Validity Labels, Manually Connected Rin, Mannually Connected Rout, XYZ, Type, Energy, Gamma Energy
    Edge_Labels, True_Ri, True_Ro, Man_Ri, Man_Ro, XYZ, Type, Energy, GammaEnergy = [], [], [], [], [], [], [], [], []
    max_hits, max_edges = 0, 0
    
    #Start Parsing Events
    for event in eventArr:
        #Keep track of max hits for padding
        max_hits = max(max_hits, len(event.X))
        
        #Generate Incidence Matrices based on Edges
        edges = []
        pos = np.swapaxes(np.vstack((event.X, event.Y, event.Z)), 0, 1)
        for i in range(1,len(event.Origin+1)):
            edges.append((event.Origin[i-1]-1,i-1))
        print(edges)
        e_Ri, e_Ro = generate_incidence(edges,pos)
        
        #Generate Proposed Incidence Matrices based on Positions
        p_Ri, p_Ro = connect_pos(pos)
        
        #Generate Edge Labels (0 - fake edge; 1 - true edge)
        e_label = np.zeros(p_Ri.shape[1])
        for i in range(p_Ri.shape[1]):
            out = np.where(p_Ro[:,i] == 1)[0][0]
            inn = np.where(p_Ri[:,i] == 1)[0][0]
            e_label[i] = 1*((out, inn) in edges)
            
        #Keep track of max edges for Padding
        max_edges = max(max_edges, p_Ri.shape[1])
        
        #Add all of the event data to lists
        Edge_Labels.append(e_label)
        True_Ri.append(e_Ri)
        True_Ro.append(e_Ro)
        Man_Ri.append(p_Ri)
        Man_Ro.append(p_Ro)
        XYZ.append(np.vstack((event.X, event.Y, event.Z)).T)
        Type.append(2*(event.Type=='m')+(event.Type=='p'))
        Energy.append(event.E)
        GammaEnergy.append(event.GammaEnergy)
    
    #Padding based on Max Hits and Max Edges
    for i in range(len(Edge_Labels)):
        Edge_Labels[i] = pad(Edge_Labels[i],(max_edges,))
        Man_Ri[i] = pad(Man_Ri[i],(max_hits,max_edges))
        Man_Ro[i] = pad(Man_Ro[i],(max_hits,max_edges))
        XYZ[i] = pad(XYZ[i],(max_hits,3))
        Type[i] = pad(Type[i],(max_hits,))
        Energy[i] = pad(Energy[i],(max_hits,))
    
    return np.array(Edge_Labels, dtype=np.float32), np.array(Man_Ri, dtype=np.float32), np.array(Man_Ro, dtype=np.float32), np.array(XYZ, dtype=np.float32), np.array(Type, dtype=np.float32), np.array(Energy, dtype=np.float32), np.array(GammaEnergy, dtype=np.float32)

In [7]:
Edge_Labels, Man_Ri, Man_Ro, XYZ, Type, Energy, GammaEnergy = vectorize_data(TrainingDataSets)

[(-1, 0), (0, 1), (1, 2), (2, 3), (3, 4), (4, 5), (5, 6), (0, 7), (7, 8), (8, 9), (9, 10), (10, 11)]
[(-1, 0), (0, 1), (1, 2), (2, 3), (0, 4), (4, 5), (5, 6), (6, 7), (7, 8), (8, 9), (9, 10), (10, 11)]
[(-1, 0), (0, 1), (1, 2), (2, 3), (3, 4), (4, 5), (5, 6), (6, 7), (7, 8), (8, 9), (0, 10), (10, 11)]
[(-1, 0), (0, 1), (1, 2), (2, 3), (3, 4), (4, 5), (5, 6), (6, 7), (7, 8), (8, 9), (9, 10), (10, 11), (0, 12)]
[(-1, 0), (0, 1), (1, 2), (2, 3), (3, 4), (4, 5), (5, 6), (6, 7), (7, 8), (8, 9), (9, 10), (10, 11), (11, 12)]
[(-1, 0), (0, 1), (1, 2), (2, 3), (3, 4), (4, 5), (5, 6), (6, 7), (7, 8), (8, 9), (9, 10), (0, 11), (11, 12), (12, 13)]
[(-1, 0), (0, 1), (1, 2), (2, 3), (3, 4), (4, 5), (5, 6), (6, 7), (7, 8), (8, 9), (9, 10), (10, 11), (11, 12), (12, 13), (13, 14), (14, 15)]


In [8]:
Features = [[XYZ[i],Man_Ri[i], Man_Ro[i]] for i in range(XYZ.shape[0])]
Labels = Edge_Labels

In [9]:
import torch.distributed as dist
from torch.utils.data import DataLoader
from torch.utils.data.distributed import DistributedSampler

# Locals
from datasets import get_data_loaders
from trainers import get_trainer

In [10]:
train_dataset = [[Features[i],Labels[i]] for i in range(XYZ.shape[0])]

In [11]:
for i, (batch_input, batch_target) in enumerate(train_dataset):
    print(i)

0
1
2
3
4
5
6


In [12]:
trainer = get_trainer('gnn')
trainer.build_model(input_dim=3)
trainer.print_model_summary()

In [13]:
train_data_loader = DataLoader(train_dataset, batch_size=1)
test_data_loader = DataLoader(train_dataset, batch_size=1)

In [14]:
summary = trainer.train(train_data_loader=train_data_loader,
                        valid_data_loader=test_data_loader, n_epochs = 10)

batch_input
[tensor([[[-18.6242,   3.4588,  11.0000],
         [-18.4203,   3.8140,  10.0000],
         [-18.9954,   4.3111,   9.0000],
         [-15.7099,   9.3287,   8.0000],
         [-17.2539,   9.0932,   7.0000],
         [-17.9045,  10.7835,   8.0000],
         [-16.0263,  10.9718,   9.0000],
         [-19.0142,   2.0944,  10.0000],
         [-19.0600,  -1.4545,   9.0000],
         [-24.7594,  -0.7338,   8.0000],
         [-25.7479,  -1.0781,   9.0000],
         [-26.1042,  -1.2067,  10.0000],
         [-25.8555,  -0.1360,  11.0000],
         [  0.0000,   0.0000,   0.0000],
         [  0.0000,   0.0000,   0.0000],
         [  0.0000,   0.0000,   0.0000],
         [  0.0000,   0.0000,   0.0000]]]), tensor([[[1., 0., 0.,  ..., 0., 0., 0.],
         [0., 1., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         ...,
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.]]]), tensor([[[0., 1., 0.,  .

[tensor([[[ -0.9143,   4.8305, -15.0000],
         [ -1.4472,   4.9924, -16.0000],
         [ -0.9309,   3.7108, -17.0000],
         [ -1.2272,   2.6092, -18.0000],
         [ -1.2867,   4.6983, -19.0000],
         [  0.3011,   2.1504, -20.0000],
         [ -1.1007,   1.9909, -21.0000],
         [  2.4375,   3.0772, -20.0000],
         [  1.7171,   2.0683, -21.0000],
         [  7.0161,   8.5516, -20.0000],
         [  5.4955,   8.4550, -21.0000],
         [ -1.2694,   5.8898, -16.0000],
         [ -7.2655,   7.2658, -17.0000],
         [ -7.9868,   6.9925, -16.0000],
         [ -7.3488,   7.3746, -15.0000],
         [  0.0000,   0.0000,   0.0000],
         [  0.0000,   0.0000,   0.0000]]]), tensor([[[1., 0., 0.,  ..., 0., 0., 0.],
         [0., 1., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         ...,
         [0., 0., 1.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.]]]), tensor([[[0., 1., 0.,  ..., 0., 0., 

[tensor([[[  1.4617,   9.9694, -12.0000],
         [  1.1931,   9.9519, -13.0000],
         [  2.1611,   9.8922, -14.0000],
         [  2.3080,  10.0109, -15.0000],
         [  1.7509,  10.2314, -16.0000],
         [ -1.1782,   8.6207, -17.0000],
         [ -0.9142,  10.5705, -16.0000],
         [  0.3237,  16.6899, -15.0000],
         [ -9.8643,  16.8893, -16.0000],
         [ -9.3689,  16.7519, -15.0000],
         [  2.7871,  11.2156, -13.0000],
         [  3.5181,  10.9324, -14.0000],
         [  4.2311,  11.0530, -15.0000],
         [  0.0000,   0.0000,   0.0000],
         [  0.0000,   0.0000,   0.0000],
         [  0.0000,   0.0000,   0.0000],
         [  0.0000,   0.0000,   0.0000]]]), tensor([[[1., 0., 0.,  ..., 0., 0., 0.],
         [0., 1., 1.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         ...,
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.]]]), tensor([[[0., 1., 0.,  ..., 0., 0., 

In [17]:
summary

{'epoch': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9],
 'train_time': [0.06492090225219727,
  0.0438990592956543,
  0.03980708122253418,
  0.039784908294677734,
  0.04646110534667969,
  0.04848980903625488,
  0.04748392105102539,
  0.04668307304382324,
  0.042849063873291016,
  0.04238486289978027],
 'train_loss': [0.6846058453832354,
  0.6705183642251151,
  0.660495902810778,
  0.6515742199761527,
  0.6434368661471775,
  0.6359024473599025,
  0.6288561565535409,
  0.6222186003412519,
  0.6159296120916095,
  0.6099140984671456],
 'valid_time': [0.009070873260498047,
  0.007253885269165039,
  0.007227897644042969,
  0.007405996322631836,
  0.008620023727416992,
  0.008298873901367188,
  0.009068012237548828,
  0.007707834243774414,
  0.008463859558105469,
  0.007468223571777344],
 'valid_loss': [0.6724576268877301,
  0.6626857689448765,
  0.6538064990724836,
  0.6456261106899807,
  0.6379750711577279,
  0.6307758263179234,
  0.6239753110068185,
  0.6175299116543361,
  0.6113896199635097,
  0.6054604

Final Test Accuracy:  0.7532467532465902


Train Loss Log:  [0.6846058453832354, 0.6705183642251151, 0.660495902810778, 0.6515742199761527, 0.6434368661471775, 0.6359024473599025, 0.6288561565535409, 0.6222186003412519, 0.6159296120916095, 0.6099140984671456]


In [139]:
Man_Ro[0]

array([[0., 1., 0., ..., 0., 0., 0.],
       [1., 0., 1., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]])

In [140]:
Edge_Labels[0]

array([0., 1., 0., 0., 0., 1., 0., 0., 0., 1., 0., 1., 0., 1., 0., 1., 0.,
       1., 0., 1., 0., 1., 1., 0., 0., 0., 0., 0., 0., 0., 0., 1., 1., 0.,
       0., 0., 0., 0., 0., 0., 0., 1., 0., 1., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])

In [116]:
a = np.array([[1,2,6],[4,5,6]])

In [121]:
np.where(a[:,2] == 6)[0][0]

0

In [118]:
a[:,2]

array([6, 6])