# Photon jet data organizer

Paper draft: https://arxiv.org/abs/2203.16703

In [2]:
import os
import h5py
import numpy as np
from matplotlib import pyplot as plt
from pprint import pprint

In [3]:
# Constants and other data things
data_dir = "/usatlas/atlas01/atlasdisk/users/atlas_wifeng/photon-jet/data"
os.chdir(data_dir)

## Initial exploration

In [4]:
datasets = sorted(os.listdir(f"{data_dir}/raw_files/h5"))
pprint(datasets)

['axion1_40-250GeV_100k.h5',
 'axion1_40-250GeV_100k_mass0p5GeV.h5',
 'axion2_40-250GeV_100k.h5',
 'axion2_40-250GeV_100k_mass0p5GeV.h5',
 'gamma_40-250GeV_100k.h5',
 'gamma_40-250GeV_100k_mass0p5GeV.h5',
 'pi0_40-250GeV_100k.h5',
 'pi0_40-250GeV_100k_mass0p5GeV.h5',
 'scalar1_40-250GeV_100k.h5',
 'scalar1_40-250GeV_100k_mass0p5GeV.h5']


This is a 3-class classification task. These two particles always appear:
1. $\gamma$ (gamma)
2. $\pi^0$ (pi0)

Each task includes an additional unique particle, which could be one of the following:

3. axion1 ($a \rightarrow \gamma\gamma$)
4. axion2 ($a \rightarrow 3\pi^0$)
5. scalar1 ($s \rightarrow \pi^0 \pi^0$)

In [10]:
# Take a peek into these files
for dataset in datasets:
    print(f"=== {dataset} ===")
    
    try:
        f = h5py.File(f"raw_files/h5/{dataset}")
    except OSError:
        print(f"OSError encountered.\n")
        continue

    print(type(f["energy"]))

    for key in f.keys():
        print(key.ljust(20), f[key].shape)        
    print()

=== axion1_40-250GeV_100k.h5 ===
<class 'h5py._hl.dataset.Dataset'>
energy               (100000, 1)
layer_0              (100000, 4, 16)
layer_1              (100000, 4, 128)
layer_2              (100000, 16, 16)
layer_3              (100000, 16, 8)
overflow             (100000, 4)

=== axion1_40-250GeV_100k_mass0p5GeV.h5 ===
<class 'h5py._hl.dataset.Dataset'>
energy               (100000, 1)
layer_0              (100000, 4, 16)
layer_1              (100000, 4, 128)
layer_2              (100000, 16, 16)
layer_3              (100000, 16, 8)
overflow             (100000, 4)

=== axion2_40-250GeV_100k.h5 ===
<class 'h5py._hl.dataset.Dataset'>
energy               (100000, 1)
layer_0              (100000, 4, 16)
layer_1              (100000, 4, 128)
layer_2              (100000, 16, 16)
layer_3              (100000, 16, 8)
overflow             (100000, 4)

=== axion2_40-250GeV_100k_mass0p5GeV.h5 ===
<class 'h5py._hl.dataset.Dataset'>
energy               (100000, 1)
layer_0              (

In [5]:
energy = f.get("energy")[()]
overflow = f.get("overflow")[()]