# PFN feature analysis

Analyze the high-level features ($\Phi_n$) of the PFN using the SHAP algorithm.

In [1]:
import tensorflow as tf
import numpy as np
import os
from tensorflow import keras
from matplotlib import pyplot as plt

2024-02-23 07:04:57.844937: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
task_name = "axion2"

## 1. Compute high-level features using saved model

In [3]:
# Allow importing from one level higher
import os
import sys; sys.path.append("..")

from config import DATA_DIR, MODEL_DIR, OUTPUT_DIR

In [4]:
# 2 sec
model = keras.models.load_model(f"{MODEL_DIR}/{task_name}_pfn")

2024-02-23 07:05:03.179821: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1639] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 9804 MB memory:  -> device: 0, name: NVIDIA GeForce RTX 2080 Ti, pci bus id: 0000:88:00.0, compute capability: 7.5
2024-02-23 07:05:03.180708: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1639] Created device /job:localhost/replica:0/task:0/device:GPU:1 with 9804 MB memory:  -> device: 1, name: NVIDIA GeForce RTX 2080 Ti, pci bus id: 0000:89:00.0, compute capability: 7.5
2024-02-23 07:05:03.181422: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1639] Created device /job:localhost/replica:0/task:0/device:GPU:2 with 9804 MB memory:  -> device: 2, name: NVIDIA GeForce RTX 2080 Ti, pci bus id: 0000:b1:00.0, compute capability: 7.5
2024-02-23 07:05:03.182158: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1639] Created device /job:localhost/replica:0/task:0/device:GPU:3 with 9804 MB memory:  -> device: 3, name: NVIDIA GeForce RTX

In [5]:
## LOAD DATA
particles = ["pi0", "gamma", task_name]
clouds = []
for particle in particles:
    print(f"Loading 10% of data for particle {particle}...")
    clouds.append(np.load(f"{DATA_DIR}/processed/pfn/{particle}_cloud.npy")[::10])
clouds = np.vstack(clouds)
print(f"clouds.shape: {clouds.shape}")

Loading 10% of data for particle pi0...
Loading 10% of data for particle gamma...
Loading 10% of data for particle axion2...
clouds.shape: (30000, 960, 4)


In [6]:
cut_layer = "Sigma"

In [7]:
Phi_model = keras.models.Model(inputs=model.inputs, outputs=model.get_layer(cut_layer).output)

In [8]:
F_model = keras.models.Model(inputs=model.get_layer(cut_layer).output, outputs=model.output)

In [9]:
Phi_outputs = Phi_model.predict(clouds)



In [10]:
print(f"Shape of Phi_outputs:", Phi_outputs.shape)

Shape of Phi_outputs: (30000, 128)


In [11]:
N = Phi_outputs.shape[0] // 3

preds = np.argmax(F_model(Phi_outputs), axis=1)
truth = np.array((0,)*N + (1,)*N + (2,)*N)
print(preds[:10])
print(truth[:10])
print(f"Accuracy: {(preds == truth).sum() / len(preds) * 100:.2f}%")

[0 0 0 0 0 0 0 0 0 0]
[0 0 0 0 0 0 0 0 0 0]
Accuracy: 98.57%


# Part 2: Apply SHAP algorithm

https://shap.readthedocs.io/en/latest/example_notebooks/tabular_examples/model_agnostic/Multioutput%20Regression%20SHAP.html

In [12]:
# ~2 min
import shap

In [13]:
sample_Phi_outputs = shap.sample(Phi_outputs, 50)

In [14]:
explainer = shap.KernelExplainer(
    model=F_model.predict,
    data=sample_Phi_outputs,
    link="identity"  # Not entirely sure what this does
)



In [15]:
shap_values = explainer.shap_values(X=sample_Phi_outputs)

  0%|          | 0/50 [00:00<?, ?it/s]



In [16]:
print(len(shap_values))
print(shap_values[0].shape)

3
(50, 128)


In [17]:
np.save(f"{OUTPUT_DIR}/pfn_results/{task_name}_PFN_SHAP_values.npy", np.array(shap_values))

In [18]:
print(f"{OUTPUT_DIR}/pfn_results/{task_name}_PFN_SHAP_values.npy")

/data/wifeng/photon-jet-v2/output/pfn_results/axion2_PFN_SHAP_values.npy
