# Compute PFN efficiencies

In [3]:
import numpy as np
import h5py
import yaml
import matplotlib.pyplot as plt

import tensorflow as tf
import tensorflow.keras as keras

In [22]:
task_name = "scalar1"

with open("../config.yaml") as fin:
    data = yaml.safe_load(fin)
    model_dir = data['model_dir']
    data_dir = data["data_dir"]
    cloud_dir = f"{data['data_dir']}/processed/{task_name}_cloud.npy"

In [5]:
# ~10 seconds
pfn = keras.models.load_model(model_dir)

2023-08-29 21:07:03.920531: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1635] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 15363 MB memory:  -> device: 0, name: Tesla P100-PCIE-16GB, pci bus id: 0000:07:00.0, compute capability: 6.0


In [32]:
# Grab data (~15 seconds?)
all_jets = []
for particle in ["pi0", "gamma", task_name]:
    print(f"Loading clouds for {particle}...")
    all_jets.append(np.load(f"{data_dir}/processed/{particle}_cloud.npy"))

Loading clouds for pi0...
Loading clouds for gamma...
Loading clouds for scalar1...


In [24]:
def get_GeV(particle):
    """
    Get an array of all the energies.
    """
    file = h5py.File(f"{data_dir}/h5/{particle}_40-250GeV_100k.h5")
    return np.array(np.squeeze(file["energy"]))

## Compute all the info we need

In [51]:
E = [get_GeV("pi0"), get_GeV("gamma"), get_GeV(task_name)]

In [33]:
Y_pred = [
    np.argmax(pfn.predict(jets, batch_size=100), axis=1)
    for jets in all_jets
]



In [40]:
N = 100000
Y_true = [0]*N + [1]*N + [2]*N

correct = np.sum(np.concatenate(Y_pred) == np.array(Y_true))
print(f"Overall accuracy: {(correct / len(Y_true)):.3f}")

Overall accuracy: 0.993


In [55]:
def compute_efficiencies(particle):
    """
    Particle can be either 0, 1, or 2.
    """
    cutoffs = np.linspace(40, 250, 11)
    
    # EnergyRangeLow, EnergyRangeUp, Eff, EffErrLow, EffErrUp
    table = []
    for i in range(len(cutoffs) - 1):
        low, up = cutoffs[i], cutoffs[i + 1]
        mask = (E[particle] >= low) & (E[particle] < up)
        
        # Number predicted signal
        eff = round(np.sum(Y_pred[particle][mask] == 2) / np.sum(mask), 5)
        
        # What should errors actually be?
        table.append([low, up, eff, 0.001, 0.001])
    
    return table

In [56]:
os.makedirs(f"./PFN_results/{task_name}_1GeV", exist_ok=True)

tables = {
    "eff_pi0.txt": compute_efficiencies(0),
    "eff_gamma.txt": compute_efficiencies(1),
    f"eff_{task_name}.txt": compute_efficiencies(2)    
}
for path, table in tables.items():
    with open(f"./PFN_results/{task_name}_1GeV/{path}", "w") as fout:
        fout.write("EnergyRangeLow, EnergyRangeUp, Eff, EffErrLow, EffErrUp\n")
        fout.write("\n".join([", ".join(map(str, row)) for row in table]))
        fout.write("\n")