In [7]:
###################################################################################################
#
# PairIdentification.py
#
# Copyright (C) by Andreas Zoglauer & Harrison Costatino.
#
# Please see the file LICENSE in the main repository for the copyright-notice.
#
###################################################################################################



###################################################################################################

import warnings
warnings.filterwarnings('ignore', category=FutureWarning)

import tensorflow as tf
import numpy as np

#from mpl_toolkits.mplot3d import Axes3D
#import matplotlib.pyplot as plt

import random

import signal
import sys
import time
import math
import csv
import os
import argparse
from datetime import datetime
from functools import reduce


print("\nPair Identification")
print("============================\n")



# Step 1: Input parameters
###################################################################################################


# Default parameters

UseToyModel = True

# Split between training and testing data
TestingTrainingSplit = 0.1

MaxEvents = 10

# File names
FileName = "PairIdentification.p1.sim.gz"
GeometryName = "$(MEGALIB)/resource/examples/geomega/GRIPS/GRIPS.geo.setup"


# Set in stone later
TestingTrainingSplit = 0.8

OutputDirectory = "Results"


parser = argparse.ArgumentParser(description='Perform training and/or testing of the pair identification machine learning tools.')
parser.add_argument('-f', '--filename', default='PairIdentification.p1.sim.gz', help='File name used for training/testing')
parser.add_argument('-m', '--maxevents', default='1000', help='Maximum number of events to use')
parser.add_argument('-s', '--testingtrainigsplit', default='0.1', help='Testing-training split')
parser.add_argument('-b', '--batchsize', default='128', help='Batch size')

args = parser.parse_args()

if args.filename != "":
  FileName = args.filename

if int(args.maxevents) > 1000:
  MaxEvents = int(args.maxevents)

if int(args.batchsize) >= 16:
  BatchSize = int(args.batchsize)

if float(args.testingtrainigsplit) >= 0.05:
  TestingTrainingSplit = float(args.testingtrainigsplit)


if os.path.exists(OutputDirectory):
  Now = datetime.now()
  OutputDirectory += Now.strftime("_%Y%m%d_%H%M%S")

os.makedirs(OutputDirectory)



###################################################################################################
# Step 2: Global functions
###################################################################################################


# Take care of Ctrl-C
Interrupted = False
NInterrupts = 0
def signal_handler(signal, frame):
  global Interrupted
  Interrupted = True
  global NInterrupts
  NInterrupts += 1
  if NInterrupts >= 2:
    print("Aborting!")
    sys.exit(0)
  print("You pressed Ctrl+C - waiting for graceful abort, or press  Ctrl-C again, for quick exit.")
signal.signal(signal.SIGINT, signal_handler)


# Everything ROOT related can only be loaded here otherwise it interferes with the argparse
from EventData import EventData

# Load MEGAlib into ROOT so that it is usable
import ROOT as M
M.gSystem.Load("$(MEGALIB)/lib/libMEGAlib.so")
M.PyConfig.IgnoreCommandLineOptions = True



###################################################################################################
# Step 3: Create some training, test & verification data sets
###################################################################################################


# Read the simulation file data:
DataSets = []
NumberOfDataSets = 0

if UseToyModel == True:
  for e in range(0, MaxEvents):
    Data = EventData()
    Data.createFromToyModel(e)
    DataSets.append(Data)
    
    NumberOfDataSets += 1
    if NumberOfDataSets > 0 and NumberOfDataSets % 1000 == 0:
      print("Data sets processed: {}".format(NumberOfDataSets))
  
else:
  # Load geometry:
  Geometry = M.MDGeometryQuest()
  if Geometry.ScanSetupFile(M.MString(GeometryName)) == True:
    print("Geometry " + GeometryName + " loaded!")
  else:
    print("Unable to load geometry " + GeometryName + " - Aborting!")
    quit()


  Reader = M.MFileEventsSim(Geometry)
  if Reader.Open(M.MString(FileName)) == False:
    print("Unable to open file " + FileName + ". Aborting!")
    quit()


  print("\n\nStarted reading data sets")
  NumberOfDataSets = 0
  while NumberOfDataSets < MaxEvents:
    Event = Reader.GetNextEvent()
    if not Event:
      break

    if Event.GetNIAs() > 0:
      Data = EventData()
      if Data.parse(Event) == True:
        if Data.hasHitsOutside(XMin, XMax, YMin, YMax, ZMin, ZMax) == False:
          DataSets.append(Data)
          NumberOfDataSets += 1
          if NumberOfDataSets % 500 == 0:
            print("Data sets processed: {}".format(NumberOfDataSets))

print("Info: Parsed {} events".format(NumberOfDataSets))

# Split the data sets in training and testing data sets

TestingTrainingSplit = 0.75


numEvents = len(DataSets)

numTraining = int(numEvents * TestingTrainingSplit)

TrainingDataSets = DataSets[:numTraining]
TestingDataSets = DataSets[numTraining:]



# For testing/validation split
# ValidationDataSets = TestingDataSets[:int(len(TestingDataSets)/2)]
# TestingDataSets = TestingDataSets[int(len(TestingDataSets)/2):]

print("###### Data Split ########")
print("Training/Testing Split: {}".format(TestingTrainingSplit))
print("Total Data: {}, Training Data: {},Testing Data: {}".format(numEvents, len(TrainingDataSets), len(TestingDataSets)))
print("##########################")


###################################################################################################
# Step 4: Setting up the neural network
###################################################################################################



###################################################################################################
# Step 5: Training and evaluating the network
###################################################################################################



Pair Identification

Start: 13.946457870439843, -2.4579130263593996, -16
Event ID: 0
  Origin Z: -16
  Gamma Energy: 10000.0
  Hit 1 (origin: 0): type=m, pos=(13.946457870439843, -2.4579130263593996, -16.0)cm, E=456.6472667405162keV
  Hit 2 (origin: 1): type=e, pos=(13.515682587737015, -2.2215895866683644, -17.0)cm, E=355.1528792769834keV
  Hit 3 (origin: 2): type=e, pos=(12.779081574391864, -2.088314264831899, -18.0)cm, E=392.94905274346763keV
  Hit 4 (origin: 3): type=e, pos=(13.598713884397826, -3.0214756128629916, -19.0)cm, E=468.7798046590152keV
  Hit 5 (origin: 4): type=e, pos=(14.295938621337633, -2.5024565875899474, -20.0)cm, E=501.85045292002695keV
  Hit 6 (origin: 5): type=e, pos=(14.05334519580752, -2.0674714044838827, -21.0)cm, E=551.2919047284079keV
  Hit 7 (origin: 6): type=e, pos=(13.207649134883658, -1.5747843826957557, -22.0)cm, E=592.977489694808keV
  Hit 8 (origin: 7): type=e, pos=(16.122036563343777, -1.0896600222680188, -23.0)cm, E=653.7722147211371keV
  Hit 9 (or

In [9]:
def connect_pos(pos_data):
    edges = []

    for i in range(len(pos_data)):
        point_A = pos_data[i]
        z_A = point_A[2]

        for j in range(len(pos_data)):
            point_B = pos_data[j]
            z_B = point_B[2]

            if z_B == z_A + 1:
                edges.append((i, j))
                edges.append((j, i))
    print(edges)
    
    return generate_incidence(edges, pos_data)

In [61]:
def vectorize_data(eventArr):
    Ri, Ro = [], []
    xyz = []
    t = []
    E = []
    GE = []
    
    max_hits = 0
    max_edges = 0
    
    #parse events
    for event in eventArr:
        edges = []
        max_hits = max(max_hits, len(event.X))
        
        pos = np.swapaxes(np.vstack((event.X, event.Y, event.Z)), 0, 1)
        for i in range(1,len(event.Origin+1)):
            edges.append((i-1,event.Origin[i-1]-1))
        
        max_edges = max(max_edges, len(edges))
        
        e_Ri, e_Ro = generate_incidence(edges,pos)
        
        Ri.append(e_Ri)
        Ro.append(e_Ro)
        xyz.append(np.hstack((event.X, event.Y, event.Z)))
        t.append(2*(event.Type=='m')+(event.Type=='p'))
        E.append(event.E)
        GE.append(event.GammaEnergy)
    
    #padding
    for i in range(len(Ri)):
        arr = Ri[i]
        padded_arr = np.zeros((max_hits,max_edges))
        padded_arr[:arr.shape[0],:arr.shape[1]] = arr
        Ri[i] = padded_arr
        
        arr = Ro[i]
        padded_arr = np.zeros((max_hits,max_edges))
        padded_arr[:arr.shape[0],:arr.shape[1]] = arr
        Ro[i] = padded_arr
        
        arr = xyz[i]
        padded_arr = np.zeros((max_hits*3))
        padded_arr[:arr.shape[0]] = arr
        xyz[i] = padded_arr
        
        arr = t[i]
        padded_arr = np.zeros((max_hits))
        padded_arr[:arr.shape[0]] = arr
        t[i] = padded_arr
        
        arr = E[i]
        padded_arr = np.zeros((max_hits))
        padded_arr[:arr.shape[0]] = arr
        E[i] = padded_arr
    
    return np.array(Ri), np.array(Ro), np.array(xyz), np.array(t), np.array(E), np.array(GE)

In [65]:
Ri, Ro, xyz, t, E, GE = vectorize_data(TrainingDataSets)

In [74]:
from heptrkx-gnn-tracking.trainers import get_trainer

SyntaxError: invalid syntax (<ipython-input-74-8191b9609366>, line 1)

In [66]:
Ri.shape

(7, 18, 17)

array([2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1])

In [174]:
np.vstack((TrainingDataSets[0].X, TrainingDataSets[0].Y, TrainingDataSets[0].Z)).shape

(3, 12)

In [178]:
np.swapaxes(np.vstack((TrainingDataSets[0].X, TrainingDataSets[0].Y, TrainingDataSets[0].Z)), 0, 1)

array([[  0.75579113,  18.46519107, -16.        ],
       [ -0.2556096 ,  18.92207314, -17.        ],
       [-15.86522727,  29.71083659, -16.        ],
       [-15.85672119,  30.74853819, -15.        ],
       [-13.34221553,  31.8794194 , -14.        ],
       [-12.2338948 ,  32.22959867, -15.        ],
       [  0.17782903,  18.76108344, -17.        ],
       [  2.55580603,  16.09349534, -18.        ],
       [  2.41034999,  14.97025035, -19.        ],
       [  0.15182756,  12.68716522, -20.        ],
       [  1.54137928,  14.09845768, -19.        ],
       [  0.07376962,  12.48190641, -20.        ]])

In [180]:
TrainingDataSets[0].Y

array([18.46519107, 18.92207314, 29.71083659, 30.74853819, 31.8794194 ,
       32.22959867, 18.76108344, 16.09349534, 14.97025035, 12.68716522,
       14.09845768, 12.48190641])

In [150]:
np.multiply(a,b)

array([[5, 5],
       [5, 5]])

In [152]:
np.pad(a, 1)

array([[0, 0, 0],
       [0, 5, 0],
       [0, 0, 0]])