In [None]:
## In this example, we will study output data frame from pandora.py configuration
#### 1. Opening each data frame and check structure
#### 2. Collect POT and scale factor to the target POT
#### 3. Merge evtdf and mcnudf for further study
#### 4. Draw some plots for each slice and for each pfp

import os
import sys

import numpy as np
import math
import uproot as uproot
import pickle
import pandas as pd

import matplotlib.pyplot as plt
import matplotlib.colors
from matplotlib.colors import LinearSegmentedColormap
from matplotlib import ticker
from matplotlib.ticker import (AutoMinorLocator, MultipleLocator)
from matplotlib import gridspec

# Add the head direcoty to sys.path
workspace_root = os.getcwd()  
sys.path.insert(0, workspace_root + "/../../")

# import this repo's classes
import pyanalib.pandas_helpers as ph


In [None]:
## 1. Open each df
venv_path = os.getenv("PATH")
input_df_path = "/exp/sbnd/data/users/sungbino/sbnd_samples/cafpyana_outputs/cohpi_df_mcweight_test.df"
with pd.HDFStore(input_df_path) as store:
    print(store.keys())

In [None]:
cohpidf = pd.read_hdf(input_df_path, key='cohpi')
hdrdf = pd.read_hdf(input_df_path, key='hdr')
mcnuwgtdf = pd.read_hdf(input_df_path, key='mcnuwgt')

In [None]:
#### 1.1 Check evtdf structure
cohpidf

In [None]:
#### 1.2 Check hdrdf structure
hdrdf

In [None]:
#### 1.3 Check mcnudf structure
mcnuwgtdf

In [None]:
mcnuwgtdf.columns

In [None]:
## 2. Collect POT and scale factor to the target POT
this_pot = sum(hdrdf.pot)
print(this_pot)
target_POT = 3.0e18
POT_scale = target_POT / this_pot
print(POT_scale)

In [None]:
cohpidf.reset_index()

In [None]:
cohpidf.rec.slc.columns

In [None]:
## 3. Merge evtdf and mcnudf
#### 1) Merging is based on matching between slc.tmatch.idx of evtdf and rec.mc.nu..index of mcnudf.
####    For each entry (readout window), there could be multiple truth neutrino interactions and reconstructed slices
####    We want to match each truth neutrion interaction to a corresponding slice

matchdf = ph.multicol_merge(cohpidf.reset_index(), mcnuwgtdf.reset_index(),
                            left_on=[("entry", "",""), ("rec", "slc","tmatch", "idx")],
                            right_on=[("entry", "",""), ("rec.mc.nu..index", "","")], 
                            how="left") ## -- save all sllices

In [None]:
matchdf

In [None]:
matchdf.columns

In [None]:
## 4. Draw plots
#### 4.1) Make dataframe of nu.E for each rec.slc..index (nuE_per_slc) and for each rec.slc.reco.pfp..index (nuE_per_pfp)
nuE_col = ('E', '', '', '')
nuE_per_slc = matchdf.groupby([('entry'), ('rec.slc..index')])[[nuE_col]].first()

In [None]:
print(nuE_per_slc)
print("len(nuE_per_slc) = %d" %len(nuE_per_slc))

In [None]:
#### 4.2) Draw a plot of nu.E for each slc
plt.hist(nuE_per_slc.E, bins=np.linspace(0., 6., 71), histtype="step", label=["all"])
plt.xlabel("Neutrino Energy (GeV)")
plt.ylabel(f"Neutrinos (POT = {this_pot:.2e})")
plt.legend()
plt.show()

In [None]:
#### 4.4) Draw a plot of nu.E for each slc with POT scaling
plt.hist(nuE_per_slc.E, bins=np.linspace(0., 6., 71), weights=[np.ones_like(data) * POT_scale for data in nuE_per_slc.E], histtype="step", label=["all"])
plt.xlabel("Neutrino Energy (GeV)")
plt.ylabel(f"Neutrinos (POT = {target_POT:.2e})")
plt.legend()
plt.show()

In [None]:
#### 4.5) Draw nu score distribution per slc
nu_score_col = ('rec', 'slc', 'nu_score', '')
nu_score_per_slc = matchdf.groupby([('entry'), ('rec.slc..index')])[[nu_score_col]].first()
plt.hist(nu_score_per_slc.rec.slc.nu_score, bins=np.linspace(0., 1., 101), weights=[np.ones_like(data) * POT_scale for data in nu_score_per_slc.rec.slc.nu_score], histtype="step", label=["all"])
plt.xlabel("Neutrion Score")
plt.ylabel(f"Neutrinos (POT = {target_POT:.2e})")
plt.legend()
plt.show()
