# Particle Flow Test 1

Make our own particle flow network, train it on top tagging dataset

In [1]:
# Up to a minute to import everything
# Make tensorflow quieter
import os
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "2"

# Computing imports
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers
from sklearn.metrics import roc_auc_score, roc_curve

# Useful imports
from tqdm import tqdm
import matplotlib.pyplot as plt

# Energyflow imports
import energyflow as ef
from energyflow.utils import data_split

## Import, preprocess data

Data lives in `X_train`, `X_val`, `X_test`, and `Y_train`, `Y_val`, `Y_test`.

In [2]:
# ~5 sec
jets_path = "/usatlas/atlas01/atlasdisk/users/atlas_wifeng/phys427/top-tagging/data/jets-988K-padded.npz"
jets = np.load(jets_path)

X, y = jets["X"], jets["y"]
Y = y

print("Loaded data")

Loaded data


In [3]:
# Do train/val/test split
n_val = 200000
n_test = 200000

(X_train, X_val, X_test,
 Y_train, Y_val, Y_test) = data_split(X, Y, val=n_val, test=n_test)

print("Done train/val/test split")

Done train/val/test split


## Define the model

PFN model:
$$\text{PFN}=F\left(\sum_{i=1}^M \Phi(p_i)\right)$$

In [4]:
def PFN(n_features,
        n_particles,
        n_outputs,
        Phi_sizes,
        F_sizes,
        name=None):
    
        inputs = layers.Input((n_particles, n_features), name="input")    
        masking_layer = layers.Masking(
            mask_value=0.,
            input_shape=(n_particles, n_features)
        )
        Phi_layers = [layers.Dense(size, activation="relu", name=f"Phi_{i}") for i, size in enumerate(Phi_sizes)]
        F_layers = [layers.Dense(size, activation="relu", name=f"F_{i}") for i, size in enumerate(F_sizes)]
        last_layer = layers.Dense(n_outputs, name="output")
        
        x = masking_layer(inputs)
        for layer in Phi_layers:
            x = layers.TimeDistributed(layer)(x)
        x = tf.math.reduce_sum(x, axis=1)
        for layer in F_layers:
            x = layer(x)
        x = last_layer(x)
        
        return tf.keras.Model(inputs=inputs, outputs=x)

## Compile and train the model

In [5]:
# Takes about 5 sec

# Training hyperparameters
lr = 1e-3
epochs = 5
batch_size = 10

# Data dimensions
n_features = 5
n_particles = 212

model = PFN(
    n_features=n_features,
    n_particles=n_particles,
    n_outputs=5,
    Phi_sizes=[100, 100, 50],
    F_sizes=[100, 100, 50]
)

In [6]:
tf.keras.backend.clear_session()
tf.config.experimental.get_memory_info("GPU:0")

{'current': 171520, 'peak': 230144}

In [7]:
model.summary()

Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input (InputLayer)          [(None, 212, 5)]          0         
                                                                 
 masking (Masking)           (None, 212, 5)            0         
                                                                 
 time_distributed (TimeDistr  (None, 212, 100)         600       
 ibuted)                                                         
                                                                 
 time_distributed_1 (TimeDis  (None, 212, 100)         10100     
 tributed)                                                       
                                                                 
 time_distributed_2 (TimeDis  (None, 212, 50)          5050      
 tributed)                                                       
                                                             

In [8]:
model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=lr),
    loss=tf.keras.losses.CategoricalCrossentropy(from_logits=True),
    metrics=["accuracy"]
)

In [9]:
history = model.fit(X_train, Y_train,
                    epochs=5,
                    batch_size=500,
                    validation_data=(X_val, Y_val),
                    verbose=1)

InternalError: Failed copying input tensor from /job:localhost/replica:0/task:0/device:CPU:0 to /job:localhost/replica:0/task:0/device:GPU:0 in order to run _EagerConst: Dst tensor is not initialized.

In [None]:
# get predictions on test data
preds = model.predict(X_test, batch_size=500)

In [None]:
test_labels = np.argmax(Y_test, axis=1)
pred_labels = np.argmax(preds, axis=1)

mask = (test_labels == pred_labels).astype(float)
print(mask)
print(f"Test accuracy: {mask.mean()}")