In [2]:
import numpy as np
import os
import time
import argparse
import random

import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import (
    Input,
    Concatenate,
    Flatten,
    BatchNormalization,
    Activation,
)
from tensorflow.keras.optimizers import Adam
from qkeras import QActivation, QDense, QConv1D, QConv2D, quantized_bits
from qkeras.autoqkeras.utils import print_qmodel_summary

from sklearn.metrics import accuracy_score
from tensorflow_model_optimization.python.core.sparsity.keras import pruning_wrapper


from node_edge_projection import NodeEdgeProjection

In [3]:
jetConstituent = np.load("data/jetConstituent_150_16f.npy")
target = np.load("data/jetConstituent_target_150_16f.npy")

In [4]:
# Restrict the number of constituents to a maximum of NMAX
nmax = 30
jetConstituent = jetConstituent[:, 0:nmax, :]

In [5]:
# The dataset is N_jets x N_constituents x N_features
njet = jetConstituent.shape[0]
nconstit = jetConstituent.shape[1]
nfeat = jetConstituent.shape[2]


print("Number of jets =", njet)
print("Number of constituents =", nconstit)
print("Number of features =", nfeat)


Number of jets = 260000
Number of constituents = 30
Number of features = 16


In [6]:

# Shuffles jet constituents
print("Before --->> jetConstituent[0,0:4,0] = ", jetConstituent[0, 0:4, 0])
for i in range(jetConstituent.shape[0]):
    jetConstituent[i] = jetConstituent[i, np.random.permutation(nconstit), :]
print("After --->> jetConstituent[0,0:4,0] = ", jetConstituent[0, 0:4, 0])


from sklearn.model_selection import train_test_split

X = jetConstituent
Y = target
del jetConstituent, target

X_train_val, X_test, Y_train_val, Y_test = train_test_split(
    X, Y, test_size=0.33, random_state=7
)

print(X_train_val.shape, X_test.shape, Y_train_val.shape, Y_test.shape)

Before --->> jetConstituent[0,0:4,0] =  [-177.97363281 -139.30709839 -122.01608276 -112.20430756]
After --->> jetConstituent[0,0:4,0] =  [-112.20430756   -9.28591919   -6.54517365  -34.23769379]
(174200, 30, 16) (85800, 30, 16) (174200, 5) (85800, 5)


In [7]:
print(
    "number of G jets for training/validation: %i"
    % np.sum(np.argmax(Y_train_val, axis=1) == 0)
)
print(
    "number of Q jets for training/validation: %i"
    % np.sum(np.argmax(Y_train_val, axis=1) == 1)
)
print(
    "number of W jets for training/validation: %i"
    % np.sum(np.argmax(Y_train_val, axis=1) == 2)
)
print(
    "number of Z jets for training/validation: %i"
    % np.sum(np.argmax(Y_train_val, axis=1) == 3)
)
print(
    "number of T jets for training/validation: %i"
    % np.sum(np.argmax(Y_train_val, axis=1) == 4)
)


print("number of G jets for testing: %i" % np.sum(np.argmax(Y_test, axis=1) == 0))
print("number of Q jets for testing: %i" % np.sum(np.argmax(Y_test, axis=1) == 1))
print("number of W jets for testing: %i" % np.sum(np.argmax(Y_test, axis=1) == 2))
print("number of Z jets for testing: %i" % np.sum(np.argmax(Y_test, axis=1) == 3))
print("number of T jets for testing: %i" % np.sum(np.argmax(Y_test, axis=1) == 4))


number of G jets for training/validation: 35213
number of Q jets for training/validation: 33694
number of W jets for training/validation: 35083
number of Z jets for training/validation: 35023
number of T jets for training/validation: 35187
number of G jets for testing: 17191
number of Q jets for testing: 16774
number of W jets for testing: 17152
number of Z jets for testing: 17275
number of T jets for testing: 17408


In [8]:
# baseline keras model

njet = X_train_val.shape[0]
nconstit = X_train_val.shape[1]
ntargets = Y_train_val.shape[1]
nfeat = X_train_val.shape[2]


print("#jets = ", njet)
print("#constituents = ", nconstit)
print("#targets = ", ntargets)
print("#features = ", nfeat)

#jets =  174200
#constituents =  30
#targets =  5
#features =  16
