# PFN feature analysis

Analyze the high-level features ($\Phi_n$) of the PFN using the SHAP algorithm.

In [1]:
import tensorflow as tf
import numpy as np
from tensorflow import keras
from matplotlib import pyplot as plt

2023-10-28 02:24:53.623906: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: SSE4.1 SSE4.2 AVX AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
task_name = "axion1"

## 1. Compute high-level features using saved model

In [3]:
# ~1 min
import sys
sys.path.append("../..")

from utils import data_dir, model_dir
from data import get_data

In [5]:
# ~2 min?
(X_train, _, X_test,
 Y_train, _, Y_test) = get_data(task_name)

In [None]:
# 2 sec
model = keras.models.load_model(f"{model_dir}/{task_name}_pfn")

In [None]:
model.evaluate(X_test, Y_test, batch_size=1000)

In [None]:
Phi_model = keras.models.Model(inputs=model.input, outputs=model.get_layer("F_0").input)
Phi_model.summary()

In [None]:
F_model = keras.models.Model(inputs=model.get_layer("F_0").input, outputs=model.output)
F_model.summary()

In [None]:
Phi = Phi_model(X_test[0][np.newaxis,...])
plt.xlabel("i")
plt.ylabel("Phi_i")
plt.title("Summed outputs of Phi layer")
plt.scatter(range(len(Phi[0])), Phi[0].numpy());

In [None]:
# Save the outputs of the Phi model
Phi_outputs = Phi_model.predict(X_test)

In [None]:
print(f"Shape of Phi_outputs:", Phi_outputs.shape)
np.savez(f"./{task_name}_Phi_outputs.npz", Phi_outputs)

In [None]:
preds = np.argmax(F_model(Phi_outputs), axis=1)
truth = np.argmax(Y_test, axis=1)
print(preds[:10])
print(truth[:10])
print(f"Accuracy: {(preds == truth).sum() / len(preds) * 100:.2f}%")

In [None]:
# ~5 sec
F_model.save(f"./{task_name}_F_model")

# Part 2: Apply SHAP algorithm

https://shap.readthedocs.io/en/latest/example_notebooks/tabular_examples/model_agnostic/Multioutput%20Regression%20SHAP.html

In [None]:
# ~2 min
import shap

In [None]:
sample_Phi_outputs = shap.sample(Phi_outputs, 50)

In [None]:
explainer = shap.KernelExplainer(
    model=F_model.predict,
    data=sample_Phi_outputs,
    link="identity"  # Not entirely sure what this does
)

In [None]:
shap_values = explainer.shap_values(X=sample_Phi_outputs)

In [None]:
shap.initjs()

In [None]:
shap.summary_plot(
    shap_values=shap_values[0],
    features=Phi_outputs[:50],
    feature_names=[f"$\Phi_{{{i}}}$" for i in range(Phi_outputs.shape[1])],
    max_display=50
)