In [None]:
#import management
import autoreload
%load_ext autoreload
%autoreload 2  # Autoreload all modules

import importlib

#standard imports
import sys
import os
from datetime import datetime
import pickle


#custom modules
import localSettings as ls
import plotter
import NUMUhelper as moreFunctions
#import xgboost as xgb
#import nue_booster 

#scientific imports
import uproot
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib import gridspec
import matplotlib.pylab as pylab
import matplotlib.backends.backend_pdf
import numpy as np
#import awkward
import math
#from sklearn.model_selection import train_test_split

#put these throughout the code to reload if needed
importlib.reload(ls)
importlib.reload(plotter)
importlib.reload(moreFunctions)

main_path = ls.main_path
sys.path.append(main_path)
now = datetime.now()
date_time = now.strftime("%m%d%Y")
print("date and time:",date_time)
params = {
    'axes.labelsize': 'x-large',
    'axes.titlesize': 'x-large',
    'xtick.labelsize': 'x-large',
    'ytick.labelsize': 'x-large'
}
pylab.rcParams.update(params)

In [None]:
importlib.reload(ls)

tree = "NeutrinoSelectionFilter"
#####################################################
# Setting datapaths here instead of localsettings.py
# Makes it easier to mix and match data, IMO
# Should probably revert back to localsettings.py method in future
# Going to just use the biggest possible datasets, no switching between 1e19 and 5e20 anymore on the fly anymore

##################### SET DIRECTORY PATHS #####################

MC_PATH = "E:\\HEPPA\\Data\\PeLEE\\0722_CCMEC\\"
NU  = 'prodgenie_bnb_nu_uboone_overlay_mcc9.1_v08_00_00_26_filter_run1_reco2_reco2'
mc = uproot.open(MC_PATH+NU+".root")[ls.fold][tree]


variables = [
    "nslice", 
    "interaction", "nu_pdg", "category", 
    "slpdg", "trk_score_v", "backtracked_pdg", # modified from shr_score_v
    #'theta',
    "topological_score",
    "trk_energy_tot", "ccnc",
    #"trk_bkt_pdg", "hits_ratio", "n_tracks_contained", 
    "NeutrinoEnergy2",
    "run","sub","evt",
    "trk_llr_pid_score_v", # trk-PID score
    "trk_energy_proton_v", # track energy under proton hyp
    "trk_energy_muon_v", # track energy under muon hyp
    #'true_nu_vtx_x','true_nu_vtx_y','true_nu_vtx_z',
    "reco_nu_vtx_sce_x","reco_nu_vtx_sce_y","reco_nu_vtx_sce_z",
    "nproton",# "nmuon",
    "nu_e", "trk_distance",
    "trk_phi", "trk_theta",
    'slnunhits',"slnhits",
    #"mc_pdg", "shr_score", 
    #"npi0","npion","pion_e","muon_e","true_e_visible",
    #"slclustfrac", "reco_nu_vtx_x", "reco_nu_vtx_y", "reco_nu_vtx_z","contained_fraction",
    "trk_sce_start_x_v","trk_sce_start_y_v","trk_sce_start_z_v",
    "trk_sce_end_x_v","trk_sce_end_y_v","trk_sce_end_z_v",
    "trk_mcs_muon_mom_v","trk_range_muon_mom_v", "trk_len_v",
    "pfp_generation_v","trk_distance_v","trk_theta_v","trk_phi_v",
    #"trk_energy_muon","trk_energy_tot","trk_energy",
    "pfnhits","pfnunhits",
]

variables += ['knobCCMECup', 'knobCCMECdn']

#variables.append("_closestNuCosmicDist")
#variables.append("crtveto")
#variables.append("crthitpe")
#variables.append("CosmicIP")

WEIGHTS = ["weightSpline","weightTune","weightSplineTimesTune"]
#WEIGHTS += ["weightsGenie", "weightsFlux", "weightsReint"]

MCVARS = [
    "_opfilter_pe_beam", "_opfilter_pe_veto", # did the event pass the common optical filter (for MC only)
]

mc = mc.pandas.df(variables + WEIGHTS + MCVARS, flatten=False)

In [None]:
df = mc
df.loc[ df['weightTune'] <= 0, 'weightTune' ] = 1.
df.loc[ df['weightTune'] == np.inf, 'weightTune' ] = 1.
df.loc[ df['weightTune'] > 100, 'weightTune' ] = 1.
df.loc[ np.isnan(df['weightTune']) == True, 'weightTune' ] = 1.
df.loc[ df['weightSplineTimesTune'] <= 0, 'weightSplineTimesTune' ] = 1.
df.loc[ df['weightSplineTimesTune'] == np.inf, 'weightSplineTimesTune' ] = 1.
df.loc[ df['weightSplineTimesTune'] > 100, 'weightSplineTimesTune' ] = 1.
df.loc[ np.isnan(df['weightSplineTimesTune']) == True, 'weightSplineTimesTune' ] = 1

df["slclnhits"] = df["pfnhits"].apply(lambda x: sum(x))
df["slclnunhits"] = df["pfnunhits"].apply(lambda x: sum(x))

M_mu = 0.105 #GeV/c
M_p = 0.938 #GeV/c
M_n = 0.939 #GeV/c
B = 0.04 #binding energy of argon used in simulation
proton_pidscore = -0.2


df['trk_p_quality_v'] = (df['trk_mcs_muon_mom_v']-df['trk_range_muon_mom_v'])/df['trk_range_muon_mom_v']
df['trk_cos_theta_v'] = df['trk_theta_v'].apply(lambda x: np.cos(x))
df['trk_sin_theta_v'] = df['trk_theta_v'].apply(lambda x: np.sin(x))
df['trk_cos_phi_v'] = df['trk_phi_v'].apply(lambda x: np.cos(x))
df['trk_sin_phi_v'] = df['trk_phi_v'].apply(lambda x: np.sin(x))
df['trk_range_proton_mom_v'] = df['trk_energy_proton_v'].apply(lambda x: np.sqrt(2*M_p*x))
df['trk_range_muon_e_v'] = (df['trk_range_muon_mom_v']**2 + M_mu**2)**.5 # E
df['trk_range_muon_ke_v'] = df['trk_range_muon_e_v'] - M_mu #KE
df['trk_energy_tot'] = df["trk_energy_proton_v"].apply(lambda x: sum(x)) #is missing from G1 sample
df['reco_nu_e_range_v'] = df["trk_range_muon_e_v"] + (df["trk_energy_tot"] - df["trk_energy_proton_v"])     
#protons have trk_score cut and llr_pid_score cut
proton_mask = df['trk_score_v'].apply(lambda x: x>0.5) * df['trk_llr_pid_score_v'].apply(lambda x: x<proton_pidscore)
df['reco_nproton'] = (df['trk_llr_pid_score_v']*proton_mask).apply(lambda x: len(x[x!=False]))
df['reco_ntrack'] = df['trk_score_v'].apply(lambda x: len(x))
# break momentum vector apart
df['trk_dx_v'] = df['trk_sin_theta_v']*df['trk_cos_phi_v']
df['trk_dy_v'] = df['trk_sin_theta_v']*df['trk_sin_phi_v']
df['trk_dz_v'] = df['trk_cos_theta_v']

df['backtracked_pdg_v'] = df['backtracked_pdg']

In [None]:
importlib.reload(moreFunctions)

query,_ = moreFunctions.get_NUMU_sel(False, opfilter=True)
presel_sample = mc.query(query)
        
fullsel_sample = moreFunctions.apply_muon_fullsel(presel_sample, 'mc', False, False )
fullsel_sample['reco_ntrack'] = presel_sample.loc[fullsel_sample.index]['reco_ntrack']

VECS = [
    'reco_nu_e_range_v', 'trk_range_muon_e_v',
    'trk_len_v','trk_cos_theta_v'
]

for vec in VECS:
    fullsel_sample[vec[:-2]] = fullsel_sample[vec].apply(lambda x: x[0])

In [None]:
dfSUB = fullsel_sample

vals = dfSUB['reco_nu_e_range'].values
W = dfSUB['weightSplineTimesTune'].values
Wup = dfSUB['weightSpline'].values * dfSUB['knobCCMECup'].values
Wdn = dfSUB['weightSpline'].values * dfSUB['knobCCMECdn'].values
BINS = np.linspace(0.05,2.05,11)
print (BINS)
fig = plt.figure(figsize=(6,6))
gs = gridspec.GridSpec(2, 1, height_ratios=[1.5, 1])
gs.update(wspace=0.0, hspace=0.0)
ax1 = plt.subplot(gs[0])
ax2 = plt.subplot(gs[1])
valsCV, edgesCV = np.histogram(vals,bins=BINS,weights=W)
valsUP, edgesUP = np.histogram(vals,bins=BINS,weights=Wup)
valsDN, edgesDN = np.histogram(vals,bins=BINS,weights=Wdn)
errs = np.sqrt(valsCV.astype(float))
errsrel = errs/valsCV
bincenters = 0.5*(edgesCV[1:]+edgesCV[:-1])
ax1.set_title(r'$\nu_{\mu}$ CC INC selection',loc='left',fontsize=16)
ax1.hist(vals,bins=BINS,histtype='step',lw=2,weights=W,color='k',label='CV')
ax1.hist(vals,bins=BINS,histtype='step',lw=2,weights=Wup,color='r',label='Empirical CCMEC')
ax1.hist(vals,bins=BINS,histtype='step',lw=2,weights=Wdn,color='b',label='Nieves CCMEC')
#ax2.errorbar(bincenters,valsDN/valsCV,yerr=valsDN/valsCV*errsrel,fmt='o',color='r')
#ax2.errorbar(bincenters,valsUP/valsCV,yerr=valsDN/valsCV*errsrel,fmt='o',color='b')
#ax2.errorbar(bincenters,valsDN/valsCV,xerr=(bincenters[1]-bincenters[0])/2,fmt='o',color='r')
#ax2.errorbar(bincenters,valsUP/valsCV,xerr=(bincenters[1]-bincenters[0])/2,fmt='o',color='b')
ax2.plot(bincenters,valsDN/valsCV,'ro')
ax2.plot(bincenters,valsUP/valsCV,'bo')
ax1.legend(loc=8)
ax1.set_xticks([])
ax2.set_xlim(ax1.get_xlim()[0],ax1.get_xlim()[1])
fig.tight_layout()
ax2.set_xlabel(r'reconstructed energy [GeV]')
ax2.set_ylim([0.86,1.14])
ax2.grid(which='both')
#ax1.grid(which='both')
#plt.grid()
plt.tight_layout()
plt.savefig("C:\\Users\\Ryan\\python-workspace\\PELEE-newmcfilteredsamples\\plots\\CCMEC\\numuCCINC_CCMEC.pdf")

In [None]:
#VARIABLE, BINS, RANGE, XTIT = 'nu_e',14,(0.15,1.55),r'True $\nu$ Energy [GeV]'
#VARIABLE, BINS, RANGE, XTIT = 'reco_ntrack',5,(0.5,5.5),r'Reco Track Multiplicity'
#VARIABLE, BINS, RANGE, XTIT = 'nproton',5,(-0.5,4.5),r'True Proton Multiplicity'
#VARIABLE, BINS, RANGE, XTIT = 'trk_len_v',24,(0,100),r'Track Length [cm]'
#VARIABLE, BINS, RANGE, XTIT = 'trk_score_v',24,(0.5,1),r'Track Score'
#VARIABLE, BINS, RANGE, XTIT = 'trk_distance_v',24,(0,10),r'Track Distance [cm]'
#VARIABLE, BINS, RANGE, XTIT = 'trk_p_quality_v',24,(-1,2.5),r'MCS Consistency $(\frac{P_{MCS}-P_{Range}}{P_{Range}})$'
#VARIABLE, BINS, RANGE, XTIT = 'crtveto',2,(-0.5,1.5),r'CRT Veto'
#VARIABLE, BINS, RANGE, XTIT = '_closestNuCosmicDist', 25, (0,100), r'Closest $\nu$-Cosmic Distance [cm]'
#VARIABLE, BINS, RANGE, XTIT = 'trk_llr_pid_score_v', 25, (-1, 1), "LLR PID Score"
#VARIABLE, BINS, RANGE, XTIT = 'Q2_v', 25, (0,2), "Q2"
#VARIABLE, BINS, RANGE, XTIT = 'Mhad_v', 25, (0,2.5), r"M$_{hadron}$"
#VARIABLE, BINS, RANGE, XTIT = 'Xbj_v', 25, (0,3), r"Bjorken x"
#VARIABLE, BINS, RANGE, XTIT = 'Ybj_v', 25, (0,2), r"Bjorken y"
#VARIABLE, BINS, RANGE, XTIT = 'trk_cos_theta',24,(-1,1),r'Cos($\theta$)'
#VARIABLE, BINS, RANGE, XTIT = 'trk_energy_proton_v', 25, (0,1), r'Reco Range-Based Proton Energy [GeV]'
#VARIABLE, BINS, RANGE, XTIT = 'NeutrinoEnergy2', 25, (0,2000), 'Visible Energy on Plane 2 [MeV]'
#VARIABLE, BINS, RANGE, XTIT = 'trk_range_muon_e_v', 10, (0.15,1.15), r'Reco Range-Based Muon Energy [GeV]'
#VARIABLE, BINS, RANGE, XTIT = 'reco_nu_e_range',14,(0.15,1.55),r'Reco Range-Based $\nu$ Energy [GeV]'
#VARIABLE, BINS, RANGE, XTIT = 'contained_fraction',25,(0,1),r'Contained Fraction'

##### set up plotting parameters and objects #####
fig = plt.figure(figsize=(10,7))
VAR = 'reco_nu_e_range'
BINS = 25
BINRANGE = (0.15,1.55)
XLABEL = r'Reco Range-Baed $\nu$ Energy [GeV]'

df_opfilter = fullsel_sample_opfilter
df_noopfilter = fullsel_sample_noopfilter

###### get weights #####
POT_scaling = 0.16
WEIGHTS_opfilter = df_opfilter['weightSplineTimesTune']*POT_scaling
WEIGHTS_noopfilter = df_noopfilter['weightSplineTimesTune']*POT_scaling

#### plot ####
n_op,bins,_ = plt.hist(df_opfilter[VAR],
                 bins=BINS, range=BINRANGE, weights=WEIGHTS_opfilter,
                 histtype='step',
                 label='opfilter')
n_noop,_,_ = plt.hist(df_noopfilter[VAR],
                 bins=BINS, range=BINRANGE, weights=WEIGHTS_noopfilter,
                 histtype='step',
                 label='no opfilter')

#plot stats errors for the opfilter (nominal) hist
stat_err = np.sqrt(np.array(n_op))
bin_widths = 0.5*(bins[1:]-bins[:-1])
bin_centers = bins[:-1]+bin_widths
plt.bar(bin_centers, stat_err*2, width=bin_widths*2, facecolor='gray', alpha=0.2, bottom=(n_op-stat_err),label='stat err on opfilter hist')
#### pretty up and save #####
plt.legend()
plt.xlabel(XLABEL)
plt.tight_layout()
plt.savefig("C:\\Users\\Ryan\\python-workspace\\PELEE-newmcfilteredsamples\\plots\\opfilter_study\\{}.pdf".format(VAR))