## Evaluation of showers submitted to the Fast Calorimeter Challenge 2022

This is an interactive version of the file ```evaluate.py```. It can also be run directly using:

```
python evaluate.py -i INPUT_FILE -r REFERENCE_FILE -m MODE -d DATASET --output_dir OUTPUT_DIR --source_dir SOURCE_DIR
```

where the arguments are:

- ```INPUT_FILE``` is the .hdf5 file that contains the showers to be evaluated.

- ```REFERENCE_FILE``` is either the .hdf5 file that contains the showers the input is compared to, or the .pkl file that is created when the code is run for the first time. The latter contains all relevant high-level features and using it results in a faster runtime.

- ```MODE``` is one of [all, avg, avg-E, hist, hist-p, hist-chi], and defaults to 'all'. 'avg' plots the average shower of all provided events; 'avg-E' plots the average shower in smaller energy ranges; 'hist' plots histograms of high-level features and saves the separation power, a measure of difference between the histogram of the provided file and the histogram of a reference in SOURCE_DIR, into a file; 'hist-p' only plots the histograms; 'hist-chi' only saves the separation power; and 'all' does all of the above.

- ```DATASET``` is the name of the dataset that should be evaluated. Must be one of [1-photons, 1-pions, 2, 3].

- ```OUTPUT_DIR``` is the folder in which the plots and other files will be stored. It defaults to 'evaluation_results/'.

- ```SOURCE_DIR``` is the folder in which the reference .pkl will be stored. In the future, it will also be where the .hdf5 files for the classifier are saved


In [10]:
import os
import evaluate
import argparse
import h5py
import numpy as np
import latex
import HighLevelFeatures as HLF


import sys
print(sys.path)

['c:\\Users\\vince\\tdsm_encoder\\dataset_generation_code', 'C:\\Users\\vince\\tdsm_encoder', 'C:\\Users\\vince\\tdsm_encoder\\util', 'c:\\Users\\vince\\tdsm_encoder\\dataset_generation_code', 'c:\\Users\\vince\\AppData\\Local\\Programs\\Python\\Python312\\python312.zip', 'c:\\Users\\vince\\AppData\\Local\\Programs\\Python\\Python312\\DLLs', 'c:\\Users\\vince\\AppData\\Local\\Programs\\Python\\Python312\\Lib', 'c:\\Users\\vince\\AppData\\Local\\Programs\\Python\\Python312', '', 'c:\\Users\\vince\\AppData\\Local\\Programs\\Python\\Python312\\Lib\\site-packages', 'c:\\Users\\vince\\AppData\\Local\\Programs\\Python\\Python312\\Lib\\site-packages\\win32', 'c:\\Users\\vince\\AppData\\Local\\Programs\\Python\\Python312\\Lib\\site-packages\\win32\\lib', 'c:\\Users\\vince\\AppData\\Local\\Programs\\Python\\Python312\\Lib\\site-packages\\Pythonwin']


In [None]:
# specify to your needs:

INPUT_FILE = './dataset_1_photons_1.hdf5' # REPLACE THIS WITH YOUR GENERATED EVENTS
REFERENCE_FILE = './dataset_1_photons_2.hdf5' # These are the GEANT evaluation events that are provided on zenodo
#REFERENCE_FILE = 'source/dataset_2_2.pkl' # This is computed in the first run of the notebook. It can be used instead of the .hdf5 to save time in subsequent runs.
MODE = 'all' # not really needed here because the nb is interactive
DATASET = '1-photons'
OUTPUT_DIR = 'evaluation_results/'
SOURCE_DIR = 'source/'

In [None]:
# emulating the argument parser of evaluate.py
parser_replacement = {
    'input_file': INPUT_FILE, 'reference_file': REFERENCE_FILE, 'mode': MODE, 'dataset': DATASET, 
    'output_dir': OUTPUT_DIR, 'source_dir': SOURCE_DIR, }
args = argparse.Namespace(**parser_replacement)

In [None]:
# reading in source file
source_file = h5py.File(args.input_file, 'r')

# checking if it has correct shape
evaluate.check_file(source_file, args)

# preparing output directory
if not os.path.isdir(args.output_dir):
    os.makedirs(args.output_dir)

# preparing source directory
if not os.path.isdir(args.source_dir):
    os.makedirs(args.source_dir)

# extracting showers and energies from source file
#shower, energy = evaluate.extract_shower_and_energy(source_file, args, which='input')
shower, energy = evaluate.extract_shower_and_energy(source_file, which='input')

# creating helper class for high-level features
particle = {'1-photons': 'photon', '1-pions': 'pion',
            '2': 'electron', '3': 'electron'}[args.dataset]
hlf = HLF.HighLevelFeatures(particle, filename='binning_dataset_{}.xml'.format(args.dataset.replace('-', '_')))



In [None]:
# reading in reference

if os.path.splitext(args.reference_file)[1] == '.hdf5':
    print("using .hdf5 reference")
    reference_file = h5py.File(args.reference_file, 'r')
    evaluate.check_file(reference_file, args, which='reference')
    reference_hlf = HLF.HighLevelFeatures(particle, filename='binning_dataset_{}.xml'.format(
        args.dataset.replace('-', '_')))
    reference_shower, reference_energy = evaluate.extract_shower_and_energy(reference_file, which='reference')

    reference_hlf.Einc = reference_energy
    evaluate.save_reference(reference_hlf, "./evaluation_results/test.pkl")

elif os.path.splitext(args.reference_file)[1] == '.pkl':
    print("using .pkl file for reference")
    reference_hlf = evaluate.load_reference(args.reference_file)
else:
    raise ValueError("reference_file must be .hdf5 or .pkl!")


### The cells below correspond to different evaluation MODEs and can be run independent of each other.

In [8]:
print(shower)
_ = hlf.DrawAverageShower(shower, filename=None,
                                  title="Shower average")

[[3.00014e+02 4.45615e+01 0.00000e+00 ... 0.00000e+00 0.00000e+00
  0.00000e+00]
 [9.40184e+01 0.00000e+00 9.57449e+00 ... 7.80987e+02 1.80680e+02
  3.61823e-01]
 [1.76159e+02 0.00000e+00 0.00000e+00 ... 2.12393e+03 2.74176e+02
  0.00000e+00]
 ...
 [0.00000e+00 0.00000e+00 0.00000e+00 ... 5.65654e-01 0.00000e+00
  0.00000e+00]
 [8.10009e+01 0.00000e+00 0.00000e+00 ... 0.00000e+00 0.00000e+00
  0.00000e+00]
 [3.56421e+02 3.42307e+01 1.06803e+01 ... 1.27744e+01 2.15880e+01
  0.00000e+00]]


RuntimeError: Failed to process string with tex because latex could not be found

Error in callback <function _draw_all_if_interactive at 0x00000179DA88C180> (for post_execute), with arguments args (),kwargs {}:


RuntimeError: Failed to process string with tex because latex could not be found

RuntimeError: Failed to process string with tex because latex could not be found

<Figure size 2000x600 with 6 Axes>

In [None]:
# evaluation mode 'avg': average of given showers
print("Plotting average shower...")
_ = hlf.DrawAverageShower(shower, filename=os.path.join(args.output_dir, 
                                                        'average_shower_dataset_{}.png'.format(args.dataset)),
                                  title="Shower average")



if hasattr(reference_hlf, 'avg_shower'):
    pass
else:
    reference_hlf.avg_shower = reference_shower.mean(axis=0, keepdims=True)
    evaluate.save_reference(reference_hlf, args.reference_file, args)
_ = hlf.DrawAverageShower(reference_hlf.avg_shower, 
                          filename=os.path.join(args.output_dir, 'reference_average_shower_dataset_{}.png'.format(
                                          args.dataset)),
                          title="Shower average reference dataset")
print("Plotting average shower: DONE.\n")


In [None]:
# evaluation mode 'avg-E': average showers at different energy ranges
print("Plotting average showers for different energies ...")
if '1' in args.dataset:
    target_energies = 2**np.linspace(8, 23, 16)
    plot_title = ['shower average at E = {} MeV'.format(int(en)) for en in target_energies]
else:
    target_energies = 10**np.linspace(3, 6, 4)
    plot_title = []
    for i in range(3, 7):
        plot_title.append('shower average for E in [{}, {}] MeV'.format(10**i, 10**(i+1)))
for i in range(len(target_energies)-1):
    filename = 'average_shower_dataset_{}_E_{}.png'.format(args.dataset,
                                                                   target_energies[i])
    which_showers = ((energy >= target_energies[i]) & (energy < target_energies[i+1])).squeeze()
    _ = hlf.DrawAverageShower(shower[which_showers],
                              filename=os.path.join(args.output_dir, filename),
                              title=plot_title[i])
    if hasattr(reference_hlf, 'avg_shower_E'):
        pass
    else:
        reference_hlf.avg_shower_E = {}
    if target_energies[i] in reference_hlf.avg_shower_E:
        pass
    else:
        which_showers = ((reference_hlf.Einc >= target_energies[i]) & (reference_hlf.Einc < target_energies[i+1])).squeeze()
        reference_hlf.avg_shower_E[target_energies[i]] = reference_shower[which_showers].mean(axis=0, keepdims=True)
        evaluate.save_reference(reference_hlf, args.reference_file, args)

        _ = hlf.DrawAverageShower(reference_hlf.avg_shower_E[target_energies[i]],
                                  filename=os.path.join(args.output_dir, 'reference_'+filename),
                                  title='reference '+plot_title[i])

print("Plotting average shower for different energies: DONE.\n")

In [None]:
# evaluation mode 'hist': plotting histograms of high-level features and printing/saving the sepration power
# (equivalent to running hist-p for plotting and hist-chi for the separation power)
print("Calculating high-level features for histograms ...")
hlf.CalculateFeatures(shower)
hlf.Einc = energy

print("Calculating high-level features for histograms: DONE.\n")
if reference_hlf.E_tot is None:
    reference_hlf.CalculateFeatures(reference_shower)
    evaluate.save_reference(reference_hlf, args.reference_file, args)
print("Calculating high-level features for histograms: DONE.\n")

with open(os.path.join(args.output_dir, 'histogram_chi2_{}.txt'.format(args.dataset)), 'w') as f:
    f.write('List of chi2 of the plotted histograms, see eq. 15 of 2009.03796 for its definition.\n')
print("Plotting histograms ...")
evaluate.plot_histograms(hlf, reference_hlf, args)
print("Plotting histograms: DONE. \n")
