# SETUP

In [None]:
#import management
import autoreload
%load_ext autoreload
%autoreload 2  # Autoreload all modules
import importlib

#standard imports
import sys
import os
from datetime import datetime
import pickle

#custom modules
import localSettings as ls
import plotter
import NUMUhelper as moreFunctions
#import xgboost as xgb
#import nue_booster 

#scientific imports
import uproot
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib import gridspec
import matplotlib.pylab as pylab
import matplotlib.backends.backend_pdf
import numpy as np
#import awkward
import math
#from sklearn.model_selection import train_test_split

#put these throughout the code to reload if needed
importlib.reload(ls)
importlib.reload(plotter)
importlib.reload(moreFunctions)

main_path = ls.main_path
sys.path.append(main_path)
now = datetime.now()
date_time = now.strftime("%m%d%Y")
print("date and time:",date_time)
params = {
    'axes.labelsize': 'x-large',
    'axes.titlesize': 'x-large',
    'xtick.labelsize': 'x-large',
    'ytick.labelsize': 'x-large'
}
pylab.rcParams.update(params)

## The main 'control center'

In [None]:
# What're we looking at today
RUN = 3
PRESEL = True
COMBINEDONLY = False
NOCOMBINED = False
MCONLY = False

MC_SAMPLE_TYPE = '0702' #0304, 0702, 
BNB_SAMPLE_TYPE = '0617' #0304, 0617, fake_data

if BNB_SAMPLE_TYPE == 'fake_data': FAKEDATASET = 'set1' #which fake data set?

LOADDETSYS = True
USECRT = False
FLATTEN = False

## Load Dataframes

In [None]:
#header###########################
importlib.reload(ls)
importlib.reload(moreFunctions)
tree = "NeutrinoSelectionFilter"
#################################
################ SET FILE PATHS ###########################
if PRESEL:
    MC_PATH = "E:\\HEPPA\\Data\\PeLEE\\{}_numupresel\\".format(MC_SAMPLE_TYPE)
    BNB_PATH = "E:\\HEPPA\\Data\\PeLEE\\{}_numupresel\\".format(BNB_SAMPLE_TYPE)
    APPEND = "_numupresel"
else:
    MC_PATH = "E:\\HEPPA\\Data\\PeLEE\\{}\\".format(MC_SAMPLE_TYPE)
    BNB_PATH = "E:\\HEPPA\\Data\\PeLEE\\{}\\".format(BNB_SAMPLE_TYPE)
    APPEND = ""
    
################## SET BNB FILENAMES #####################
if BNB_SAMPLE_TYPE == '0617':
    BNB = {
        1: ["Run1\\data_bnb_peleeFilter_uboone_v08_00_00_41_pot_run1_C1_neutrinoselection_filt"],
        2: ["Run2\\data_bnb_peleeFilter_uboone_v08_00_00_41_pot_run2_D1_neutrinoselection_filt",
           "Run2\\data_bnb_peleeFilter_uboone_v08_00_00_41_pot_run2_D2_neutrinoselection_filt",
           "Run2\\data_bnb_peleeFilter_uboone_v08_00_00_41_pot_run2_E1_neutrinoselection_filt"],
        3: ["Run3\\data_bnb_peleeFilter_uboone_v08_00_00_41_pot_run3_G1_neutrinoselection_filt"],
    }
    BNB['combined'] = BNB[1] + BNB[2] + BNB[3]
elif BNB_SAMPLE_TYPE == 'fake_data':
    BNB = {
        1: [BNB_path+'prod_uboone_nu2020_fakedata_set3_run1_reco2_v08_00_00_41_reco2'],
        3: [BNB_path+'prod_uboone_nu2020_fakedata_set3_run3b_reco2_v08_00_00_41_reco2']
    }
    BNB['combined'] = BNB[1] + BNB[3]

################# SET MC FILENAMES #########################
if MC_SAMPLE_TYPE == '0702':
    EXT = {
        1: ['Run1\\data_extbnb_mcc9.1_v08_00_00_25_reco2_C_all_reco2'],
        2: ['Run2\\data_extbnb_mcc9.1_v08_00_00_25_reco2_D_E_all_reco2'],
        3: ['Run3\\data_extbnb_mcc9.1_v08_00_00_25_reco2_G1_all_reco2'],
    }
    EXT['combined'] = EXT[1] + EXT[2] + EXT[3]
    DIRT = {
        1: ['Run1\\prodgenie_bnb_dirt_overlay_mcc9.1_v08_00_00_26_run1_reco2_reco2'],
        3: ['Run3\\prodgenie_bnb_dirt_overlay_mcc9.1_v08_00_00_26_run3_reco2_reco2']
    }
    DIRT[2] = DIRT[1]
    DIRT['combined'] = DIRT[1] + DIRT[3]
    MC = {
        1: ['Run1\\prodgenie_bnb_nu_uboone_overlay_mcc9.1_v08_00_00_26_filter_run1_reco2_reco2'],
        2: ['Run2\\prodgenie_bnb_nu_uboone_overlay_mcc9.1_v08_00_00_26_filter_run2_reco2_D1D2_reco2'],
        3: ['Run3\\prodgenie_bnb_nu_uboone_overlay_mcc9.1_v08_00_00_26_filter_run3_reco2_G_reco2']
    }
    MC['combined'] = MC[1] + MC[2] + MC[3]
elif MC_SAMPLE_TYPE == '0304':
    ext_samples = {
        1: ["C1","C2"],
        2: ["D1","D2","E1","E2"],
        3: ["G1","G2"]
    }
    ext_prefix = "data_extbnb_mcc9.1_v08_00_00_25_reco2_{}_all_reco2_numupresel"
    EXT = {
        1: [MC_path+"Run1\\{}".format(ext_prefix).format(x) for x in ext_samples[1]],
        2: [MC_path+"Run2\\{}".format(ext_prefix).format(x) for x in ext_samples[2]],
        3: [MC_path+"Run3\\{}".format(ext_prefix).format(x) for x in ext_samples[3]],
        'combined': [MC_path + 'data_extbnb_mcc9.1_v08_00_00_25_rec2_C1_C2_D1_D2_E1_E2_G1_G2_numupresel']
    }
    DIRT = {
        1: [MC_path+"Run1\\prodgenie_bnb_dirt_overlay_mcc9.1_v08_00_00_26_run1_reco2_reco2_numupresel"],
        3: [MC_path+"Run3\\prodgenie_bnb_dirt_overlay_mcc9.1_v08_00_00_26_run3_reco2_reco2_numupresel"]
    }
    DIRT[2] = DIRT[1] + DIRT[3]
    DIRT['combined'] = DIRT[1] + DIRT[3]
    MC = {
        1: [MC_path+"Run1\\prodgenie_bnb_nu_uboone_overlay_mcc9.1_v08_00_00_26_filter_run1_reco2_reco2_numupresel"],
        2: [MC_path+"Run2\\prodgenie_bnb_nu_uboone_overlay_mcc9.1_v08_00_00_26_filter_run2_reco2_D1D2_reco2_numupresel"],
        3: [MC_path+"Run3\\prodgenie_bnb_nu_uboone_overlay_mcc9.1_v08_00_00_26_filter_run3_reco2_G_reco2_numupresel"]
    }
    MC['combined'] = MC[1] + MC[2] + MC[3]

if COMBINEDONLY:
    BNB = {'combined': BNB['combined']}
    EXT = {'combined': EXT['combined']}
    DIRT = {'combined': DIRT['combined']}
    MC = {'combined': MC['combined']}
if NOCOMBINED:
    BNB = { 
        1: BNB[1],
        2: BNB[2],
        3: BNB[3]}
    EXT = { 
        1: EXT[1],
        2: EXT[2],
        3: EXT[3]}
    DIRT = { 
        1: DIRT[1],
        2: DIRT[2],
        3: DIRT[3]}
    MC = { 
        1: MC[1],
        2: MC[2],
        3: MC[3]}
#################################
#make uproot files
data, ext, dirt, mc = {},{},{},{}
if not MCONLY:
    for run in BNB: BNB[run] = [uproot.open(BNB_PATH+filepath+".root")[ls.fold][tree] for filepath in BNB[run]]
    for run in EXT: EXT[run] = [uproot.open(MC_PATH+filepath+".root")[ls.fold][tree] for filepath in EXT[run]]
    for run in DIRT: DIRT[run] = [uproot.open(MC_PATH+filepath+".root")[ls.fold][tree] for filepath in DIRT[run]]
for run in MC: MC[run] = [uproot.open(MC_PATH+filepath+".root")[ls.fold][tree] for filepath in MC[run]]

#################################
# declare necessary variables
variables = [
    "nslice",
    "slpdg", "trk_score_v","slclustfrac",
    "pfnhits","pfnunhits",'slnunhits','slnhits',#"contained_fraction",
    "topological_score",
    "run", "sub", "evt",
    'NeutrinoEnergy2',
    'reco_nu_vtx_sce_[xyz]', 'trk_sce_start_[xyz]_v', 'trk_sce_end_[xyz]_v',
    "trk_mcs_muon_mom_v","trk_range_muon_mom_v", "trk_len_v",
    'trk_llr_pid_score_v',"pfp_generation_v","trk_distance_v","trk_theta_v","trk_phi_v",
    #"trk_energy_muon","trk_energy_tot","trk_energy",
    'trk_energy_muon_v','trk_energy_proton_v',
]
if USECRT:
    variables.append('crtveto')
    variables.append('_closestNuCosmicDist')
    variables.append('crthitpe')
MCVARS = ["_opfilter_pe_beam", "_opfilter_pe_veto",
         'nproton','npion','npi0','nmuon','theta','proton_e',
          'nu_e','ccnc','nu_pdg',
         'backtracked_pdg','category','interaction',
         'isVtxInFiducial','truthFiducial']
WEIGHTS = ["weightSpline","weightTune","weightSplineTimesTune"]
if LOADDETSYS: WEIGHTS += ["weightsGenie", "weightsFlux","weightsReint"] 

#################################
# create dataframes
# preselection applied immediately (for memory reasons)
print("starting to build dataframes at {}...".format(moreFunctions.get_current_time("%H:%M:%S")))
RUNS = MC.keys()
for run in RUNS: 
    if not MCONLY:
        print(run)
        ##################### BNB + EXT ########################
        if run == 3 and USECRT: query,_ = moreFunctions.get_NUMU_sel(True, opfilter=False) #no opfilter vars for data samples
        else: query,_ = moreFunctions.get_NUMU_sel(False, opfilter=False) #no opfilter vars for the data samples
        ###
        if PRESEL: 
            DFs = [sample.pandas.df(variables, flatten=False).query(query) for sample in BNB[run]]
        else: DFs = [sample.pandas.df(variables, flatten=False) for sample in BNB[run]]
        BNB[run] = pd.DataFrame().append(DFs, ignore_index=True)
        print("DATA Run {} built, pre-selected, and appended {}...".format(run,moreFunctions.get_current_time("%H:%M:%S")))
        ###
        if PRESEL: DFs = [sample.pandas.df(variables, flatten=False).query(query) for sample in EXT[run]]
        else: DFs = [sample.pandas.df(variables, flatten=False) for sample in EXT[run]]
        EXT[run] = pd.DataFrame().append(DFs, ignore_index=True)
        print("EXT Run {} built, pre-selected, and appended {}...".format(run,moreFunctions.get_current_time("%H:%M:%S")))
        ########################### MC + DIRT ################################
        if run == 3 and USECRT: query,_ = moreFunctions.get_NUMU_sel(True, opfilter=True) #apply opfilter correction
        else: query,_ = moreFunctions.get_NUMU_sel(False, opfilter=True) #apply opfilter correction
        ##
        if PRESEL: DFs = [sample.pandas.df(variables + WEIGHTS + MCVARS, flatten=False).query(query) for sample in DIRT[run]]
        else: DFs = [sample.pandas.df(variables + WEIGHTS + MCVARS, flatten=False) for sample in DIRT[run]]
        DIRT[run] = pd.DataFrame().append(DFs, ignore_index=True)
        print("DIRT Run {} built, pre-selected, and appended {}...".format(run,moreFunctions.get_current_time("%H:%M:%S")))
        
    if run == 3 and USECRT: query,_ = moreFunctions.get_NUMU_sel(True, opfilter=True) #apply opfilter correction
    else: query,_ = moreFunctions.get_NUMU_sel(False, opfilter=True) #apply opfilter correction
    ##
    if PRESEL: DFs = [sample.pandas.df(variables + WEIGHTS + MCVARS, flatten=False).query(query) for sample in MC[run]]
    else: DFs = [sample.pandas.df(variables + WEIGHTS + MCVARS, flatten=False) for sample in MC[run]]
    MC[run] = pd.DataFrame().append(DFs, ignore_index=True)
    print("MC Run {} built, pre-selected, and appended {}...".format(run,moreFunctions.get_current_time("%H:%M:%S")))

## Organize samples and weights for systematic manipulation later

In [None]:
#################################
# organize things
SAMPLES_RUN = {1: {}, 2: {}, 3: {}, 'combined': {}}
if COMBINEDONLY:
    SAMPLES_RUN = {'combined': {}}
if NOCOMBINED:
    SAMPLES_RUN = {1: {}, 2: {}, 3: {}}
if not MCONLY:
    for run in BNB: SAMPLES_RUN[run]['data'] = BNB[run]
    for run in EXT: SAMPLES_RUN[run]['ext'] = EXT[run]
    for run in DIRT: SAMPLES_RUN[run]['dirt'] = DIRT[run] 
for run in MC: SAMPLES_RUN[run]['mc'] = MC[run]
#################################
# Weights for each Run + combined
# https://docs.google.com/spreadsheets/d/1vdcm3FoYIF1XiS6qx4qTCbaTH79vu-Sb5j8dnqctaTM/edit#gid=1344532859
# these weights are for the MC_TYPE = 0702 and BNB_type = 0617
WEIGHTS_RUN = {
    1: {
        'ext': 0.552,
        'dirt': 0.507,
        'mc': 0.124,
        'nue': 0.124,
        'data': 1,
        'pot': 1.62E+20,
    },
    2: {
        'ext': 0.407,
        'dirt': 0.409, #R1+R2
        'mc': 0.259,
        'nue': 0.259,
        'data': 1,
        'pot': 2.62E+20,
    },
    3: {
        'ext': 0.356, #G1 all
        'dirt': 0.664,
        'mc': 0.16,
        'nue': 0.16,
        'data': 1,
        'pot': 2.13E+20,
    },
    'combined': {
        'ext': 0.413,
        'dirt': 0.497,
        'mc': 0.175,
        'nue': 0.175,
        'data': 1,
        'pot': 6.37E+20,
    }
}


##################### FAKE-DATA 3 #####################
#WEIGHTS_RUN = {
#    'combined': {
#        'dirt': 0,
#        'mc': 2.8e-1,
#        'nue': 2.8e-1,
#        'ext': 0,
#        'data': 1,
#        'pot': 3.72e20,
#    }
#}

In [None]:
M_mu = 0.105 #GeV/c
M_p = 0.938 #GeV/c
M_n = 0.939 #GeV/c
B = 0.04 #binding energy of argon used in simulation
proton_pidscore = -0.2 #proton candidate < proton_pidscore
NPROTON_CAT = True

for run in SAMPLES_RUN:
    print(run)
    samples = SAMPLES_RUN[run]
    for sample in samples:
        print(sample)
        if sample in ['mc','dirt','nue']:
            df = samples[sample]
            #######################################
            ## Calibrations
            df.loc[ df['weightTune'] <= 0, 'weightTune' ] = 1.
            df.loc[ df['weightTune'] == np.inf, 'weightTune' ] = 1.
            df.loc[ df['weightTune'] > 100, 'weightTune' ] = 1.
            df.loc[ np.isnan(df['weightTune']) == True, 'weightTune' ] = 1.
            df.loc[ df['weightSplineTimesTune'] <= 0, 'weightSplineTimesTune' ] = 1.
            df.loc[ df['weightSplineTimesTune'] == np.inf, 'weightSplineTimesTune' ] = 1.
            df.loc[ df['weightSplineTimesTune'] > 100, 'weightSplineTimesTune' ] = 1.
            df.loc[ np.isnan(df['weightSplineTimesTune']) == True, 'weightSplineTimesTune' ] = 1.
            #df['weightSpline']  = df['weightSpline']  * df['weightTune']
            #df.loc[ df['npi0'] > 0, 'weightSplineTimesTune' ] = df['weightSpline'] * df['weightTune'] * 0.7 #scale down pi0s

        if USECRT:
            if sample in ['ext','data']:
                df = samples[sample]
                #only apply to data and ext
                df.loc[(df['run'] > 16300),'crthitpe'] = df['crthitpe']*1.09 #hitpe correction
    ##########################################
    ## Calculated columns
    for sample in samples:
        df = samples[sample]
        #useful variables
        df["slclnhits"] = df["pfnhits"].apply(lambda x: sum(x))
        df["slclnunhits"] = df["pfnunhits"].apply(lambda x: sum(x))
        df['trk_p_quality_v'] = (df['trk_mcs_muon_mom_v']-df['trk_range_muon_mom_v'])/df['trk_range_muon_mom_v']
        df['trk_cos_theta_v'] = df['trk_theta_v'].apply(lambda x: np.cos(x))
        df['trk_sin_theta_v'] = df['trk_theta_v'].apply(lambda x: np.sin(x))
        df['trk_cos_phi_v'] = df['trk_phi_v'].apply(lambda x: np.cos(x))
        df['trk_sin_phi_v'] = df['trk_phi_v'].apply(lambda x: np.sin(x))
        df['trk_range_proton_mom_v'] = df['trk_energy_proton_v'].apply(lambda x: np.sqrt(2*M_p*x))
        df['trk_range_muon_e_v'] = (df['trk_range_muon_mom_v']**2 + M_mu**2)**.5 # E
        df['trk_range_muon_ke_v'] = df['trk_range_muon_e_v'] - M_mu #KE
        df['trk_energy_tot'] = df["trk_energy_proton_v"].apply(lambda x: sum(x)) #is missing from G1 sample
        df['reco_nu_e_range_v'] = df["trk_range_muon_e_v"] + (df["trk_energy_tot"] - df["trk_energy_proton_v"])     
        #df['reco_nproton'] = df['trk_llr_pid_score_v'].apply(lambda x: len(x[x<proton_pidscore]) + 0.01) #add the 0.1 to prevent issue later
        #protons have trk_score cut and llr_pid_score cut
        proton_mask = df['trk_score_v'].apply(lambda x: x>0.5) * df['trk_llr_pid_score_v'].apply(lambda x: x<proton_pidscore)
        df['reco_nproton'] = (df['trk_llr_pid_score_v']*proton_mask).apply(lambda x: len(x[x!=False]))
        df['reco_ntrack'] = df['trk_score_v'].apply(lambda x: len(x))
        # break momentum vector apart
        df['trk_dx_v'] = df['trk_sin_theta_v']*df['trk_cos_phi_v']
        df['trk_dy_v'] = df['trk_sin_theta_v']*df['trk_sin_phi_v']
        df['trk_dz_v'] = df['trk_cos_theta_v']
        df['id'] = df['trk_len_v'].apply(lambda x: [i for i,_ in enumerate(x)])

        #just MC stuff (truth level)
        if sample in ["mc","dirt"]:
            df['backtracked_pdg_v'] = df['backtracked_pdg']
    
    ########################################################
    # add back the cosmic category
    # and calculate Nproton multiplicity if you so desire
    df = samples['mc']
    df.loc[(df['category']!=1)&(df['category']!=10)&(df['category']!=11)&(df['category']!=111)&
           (df['slnunhits']/df['slnhits'] < 0.2), 'category'] = 4
    if NPROTON_CAT:
        #separate by CC-NP final state
        df.loc[((df['category']==2)|(df['category']==21))&(df['nproton']==0), 'category'] = 22
        df.loc[((df['category']==2)|(df['category']==21))&(df['nproton']==1), 'category'] = 23
        df.loc[((df['category']==2)|(df['category']==21))&(df['nproton']==2), 'category'] = 24
        df.loc[((df['category']==2)|(df['category']==21))&(df['nproton']>=3), 'category'] = 25

    samples['nue'] = samples['mc'].query('nu_pdg == 12 or nu_pdg == -12')
    samples['mc']  = samples['mc'].query('nu_pdg == 14 or nu_pdg == -14')

# Beginning of analysis section
### Preselect dataframes, this is a time saver
### Can also use plotter function to apply selection, waste of time, some potential bugs

In [None]:
presel_SAMPLES_RUN = {}
for run in SAMPLES_RUN:
    presel_SAMPLES_RUN[run] = {}
    for sample in ['mc','nue','dirt']:
        query,_ = moreFunctions.get_NUMU_sel(False,opfilter=True)
        presel_SAMPLES_RUN[run][sample] = SAMPLES_RUN[run][sample].query(query)
    for sample in ['ext','data']:
        query,_ = moreFunctions.get_NUMU_sel(False,opfilter=False)
        presel_SAMPLES_RUN[run][sample] = SAMPLES_RUN[run][sample].query(query)

In [None]:
importlib.reload(moreFunctions)
CONTAINED = False
CONTAINEDHIGHE = False #both contained and above

muon_vars = [
    'topological_score',
    'reco_nu_vtx_sce_x','reco_nu_vtx_sce_y','reco_nu_vtx_sce_z',
    #'trk_sce_start_x_v','trk_sce_start_y_v','trk_sce_start_z_v',
    #'trk_sce_end_x_v','trk_sce_end_y_v','trk_sce_end_z_v',
    #'trk_score_v','trk_llr_pid_score_v','trk_p_quality_v',
    #'trk_len_v','trk_distance_v','pfp_generation_v'
]
muon_vars += [
    'reco_ntrack',
    'NeutrinoEnergy2','reco_nu_e_range_v','trk_range_muon_e_v',
#    'trk_len_v',
#    'trk_cos_theta_v',
]

fullsel_SAMPLES_RUN = {}
for run in SAMPLES_RUN:
    print("Run {}: {}".format(run, moreFunctions.get_current_time("%H:%M:%S")))
    samples = SAMPLES_RUN[run]
    fullsel_SAMPLES_RUN[run] = {}
    for sample in samples:
        print("{}: {}".format(sample, moreFunctions.get_current_time("%H:%M:%S")))
        fullsel_SAMPLES_RUN[run][sample] = moreFunctions.apply_muon_fullsel(samples[sample], sample, False, LOADDETSYS, VARS=[muon_vars,[]])
        fullsel_SAMPLES_RUN[run][sample]['reco_nu_e_range'] = fullsel_SAMPLES_RUN[run][sample]['reco_nu_e_range_v'].apply(lambda x: x[0])
        fullsel_SAMPLES_RUN[run][sample]['reco_ntrack'] = presel_SAMPLES_RUN[run][sample].loc[fullsel_SAMPLES_RUN[run][sample].index]['reco_ntrack']
        

In [None]:
if CONTAINED:
    fullsel_contained_SAMPLES_RUN = {}
    for run in SAMPLES_RUN:
        print("Run {}: {}".format(run, moreFunctions.get_current_time("%H:%M:%S")))
        fullsel_contained_SAMPLES_RUN[run] = {}
        samples = SAMPLES_RUN[run]
        for sample in samples:
            print("{}: {}".format(sample, moreFunctions.get_current_time("%H:%M:%S")))
            #get all the fullsel events, with all the tracks, not just muon candidates
            df = SAMPLES_RUN[run][sample].loc[fullsel_SAMPLES_RUN[run][sample].index]
            #then apply the containment filter
            fullsel_contained_SAMPLES_RUN[run][sample] = moreFunctions.apply_contained(df, sample, False, LOADDETSYS)
            
    if CONTAINEDHIGHE:
        fullsel_contained_above105_SAMPLES_RUN = {}
        for run in SAMPLES_RUN:
            print("Run {}: {}".format(run, moreFunctions.get_current_time("%H:%M:%S")))
            fullsel_contained_above105_SAMPLES_RUN[run] = {}
            samples = fullsel_contained_SAMPLES_RUN[run]
            for sample in samples:
                print("{}: {}".format(sample, moreFunctions.get_current_time("%H:%M:%S")))
                fullsel_contained_above105_SAMPLES_RUN[run][sample] = moreFunctions.apply_contained(samples[sample], sample, False, LOADDETSYS)

In [None]:
vars_to_condense = [
    'reco_nu_e_range_v','trk_range_muon_e_v',
    'trk_len_v',
    'trk_cos_theta_v',
]
for run in fullsel_SAMPLES_RUN:
    for sample in fullsel_SAMPLES_RUN[run]:
        df = fullsel_SAMPLES_RUN[run][sample]
        for var in vars_to_condense:
            fullsel_SAMPLES_RUN[run][sample][var[:-2]] = fullsel_SAMPLES_RUN[run][sample][var].apply(lambda x: x[0])
            fullsel_SAMPLES_RUN[run][sample]['reco_ntrack'] = SAMPLES_RUN[run][sample].loc[fullsel_SAMPLES_RUN[run][sample].index]['reco_ntrack']

In [None]:
fullsel_above105_SAMPLES_RUN = {}
for run in SAMPLES_RUN:
    print("Run {}: {}".format(run, moreFunctions.get_current_time("%H:%M:%S")))
    samples = fullsel_SAMPLES_RUN[run]
    fullsel_above105_SAMPLES_RUN[run] = {}
    for sample in samples:
        print("{}: {}".format(sample, moreFunctions.get_current_time("%H:%M:%S")))
        Reco_E_NU = samples[sample]['reco_nu_e_range_v'].apply(lambda x: x[0])
        samples[sample]['reco_nu_e_range'] = Reco_E_NU
        fullsel_above105_SAMPLES_RUN[run][sample] = samples[sample].query("reco_nu_e_range > 1.05")

In [None]:
########################################
# 1muNp selection
# Second longest track is proton
# Np_presel_samples 
#######################################
importlib.reload(moreFunctions)
FVx = [5,251]                      #[10,246]
FVy = [-110,110]                   #[-105,105]
FVz = [20,986]
query,_ = moreFunctions.get_NUMU_sel(False,opfilter=True)
track_cuts_common = [
    ('trk_sce_start_x_v', '>', FVx[0]),
    ('trk_sce_start_x_v', '<', FVx[1]),
    ('trk_sce_start_y_v', '>', FVy[0]),
    ('trk_sce_start_y_v', '<', FVy[1]),
    ('trk_sce_start_z_v', '>', FVz[0]),
    ('trk_sce_start_z_v', '<', FVz[1]),
    ('trk_sce_end_x_v', '>', FVx[0]),
    ('trk_sce_end_x_v', '<', FVx[1]),
    ('trk_sce_end_y_v', '>', FVy[0]),
    ('trk_sce_end_y_v', '<', FVy[1]),
    ('trk_sce_end_z_v', '>', FVz[0]),
    ('trk_sce_end_z_v', '<', FVz[1]),
    ('trk_score_v', '>', 0.5),
    ('pfp_generation_v', '==', 2),
]
track_cuts_muon = [
    ('trk_llr_pid_score_v', '>', 0.2),
    ('trk_p_quality_v', '>', -0.5),
    ('trk_p_quality_v', '<', 0.5),
    ('trk_score_v', '>', 0.8),
    ('trk_len_v', '>', 10),
    ('trk_distance_v', '<', 4)
]
track_cuts_proton = [
    ('trk_llr_pid_score_v', '<', -0.2)
]
MC_VARS = ['isVtxInFiducial', 'truthFiducial','proton_e','nu_pdg','ccnc','nmuon','nproton','id','reco_nu_e_range_v','trk_sce_start_x_v','trk_sce_start_y_v','trk_sce_start_z_v']
SEL_VARS = [x[0] for x in track_cuts_muon] + [x[0] for x in track_cuts_proton] + MC_VARS
Np_presel_samples, Np_muon_samples, Np_proton_samples, Np_other_samples  = {}, {}, {}, {}
for run in SAMPLES_RUN:
    print("Run {}: {}".format(run, moreFunctions.get_current_time("%H:%M:%S")))
    samples = SAMPLES_RUN[run]
    Np_presel_samples[run], Np_muon_samples[run], Np_proton_samples[run]  = {}, {}, {}
    for sample in samples:
        print("{}: {}".format(sample, moreFunctions.get_current_time("%H:%M:%S")))
        #make all dataframes
        Np_presel_samples[run][sample] = moreFunctions.make_and_apply_mask(samples[sample], query, track_cuts_common, sample, LOADDETSYS, select_longest=False, VARS=[['nslice','trk_len_v','trk_score_v'],SEL_VARS])
        Np_presel_samples[run][sample]['reco_ntrack_contained'] = Np_presel_samples[run][sample]['trk_len_v'].apply(lambda x: len(x))
        print("Presel: {}".format(moreFunctions.get_current_time("%H:%M:%S")))
        Np_muon_samples[run][sample], Np_proton_samples[run][sample] = moreFunctions.split_muon_proton(Np_presel_samples[run][sample], 'nslice==1', track_cuts_muon, track_cuts_proton, sample, LOADDETSYS, [False,False], VARS=[['nslice','trk_len_v','trk_score_v'],MC_VARS])
        print("Muon/Proton Split: {}".format(moreFunctions.get_current_time("%H:%M:%S")))
        
        #sync up the dataframes
        muon_entries = set(Np_muon_samples[run][sample].index)
        proton_entries = set(Np_proton_samples[run][sample].index)
        intersec = list(muon_entries & proton_entries)
        Np_muon_samples[run][sample] = Np_muon_samples[run][sample].loc[intersec]
        Np_proton_samples[run][sample] = Np_proton_samples[run][sample].loc[intersec]
        
        #finish up
        muon = Np_muon_samples[run][sample]
        proton = Np_proton_samples[run][sample]
        muon['reco_nu_e_range'] = muon['reco_nu_e_range_v'].apply(lambda x: x[0])
        mu_x,mu_y,mu_z = muon['trk_sce_start_x_v'].apply(lambda x: x[0]),muon['trk_sce_start_y_v'].apply(lambda x: x[0]),muon['trk_sce_start_z_v'].apply(lambda x: x[0])
        p_x,p_y,p_z = proton['trk_sce_start_x_v'].apply(lambda x: x[0]),proton['trk_sce_start_y_v'].apply(lambda x: x[0]),proton['trk_sce_start_z_v'].apply(lambda x: x[0])
        dist = ( (mu_x-p_x)**2 + (mu_y-p_y)**2 + (mu_z-p_z)**2 )**0.5
        muon['mu_p_dist'] = dist
        proton['mu_p_dist'] = dist
        print('----------------------')
    print('--------------------------')

In [None]:
#make the high energy sideband cut
Np_muon_above105_samples,Np_proton_above105_samples = {},{}
for run in Np_muon_samples:
    Np_muon_above105_samples[run],Np_proton_above105_samples[run] = {}
    for sample in Np_muon_samples[run]:
        #first make the neutrino energy more accessible
        Reco_E_NU = Np_muon_samples[run][sample]['reco_nu_e_range_v'].apply(lambda x: x[0])
        Np_muon_samples[run][sample]['reco_nu_e_range'] = Reco_E_NU
        Np_muon_above105_samples[run][sample] = Np_muon_samples[run][sample].query('reco_nu_e_range > 1.05')
        Np_proton_above105_samples[run][sample] = Np_proton_samples[run][sample].loc[Np_muon_above105_samples[run][sample].index]
        

In [None]:
for run in Np_muon_above105_samples:
    for sample in Np_muon_above105_samples[run]:
        muon = Np_muon_above105_samples[run][sample]
        proton = Np_proton_above105_samples[run][sample]
        mu_x,mu_y,mu_z = muon['trk_sce_start_x_v'].apply(lambda x: x[0]),muon['trk_sce_start_y_v'].apply(lambda x: x[0]),muon['trk_sce_start_z_v'].apply(lambda x: x[0])
        p_x,p_y,p_z = proton['trk_sce_start_x_v'].apply(lambda x: x[0]),proton['trk_sce_start_y_v'].apply(lambda x: x[0]),proton['trk_sce_start_z_v'].apply(lambda x: x[0])
        dist = ( (mu_x-p_x)**2 + (mu_y-p_y)**2 + (mu_z-p_z)**2 )**0.5
        muon['mu_p_dist'] = dist
        proton['mu_p_dist'] = dist

In [None]:
#################################
# CCQE STUFF
# This takes a while, but you get a bunch of CCQE dataframes
importlib.reload(moreFunctions)
CCQE_presel_samples = {}
CCQE_muon_samples = {}
CCQE_proton_samples = {}
CCQE_samples = {}
verbose = False

for run in SAMPLES_RUN:
    print("Run {}: {}".format(run, moreFunctions.get_current_time("%H:%M:%S")))
    samples = SAMPLES_RUN[run]
    CCQE_presel_samples[run] = {}
    CCQE_muon_samples[run] = {}
    CCQE_proton_samples[run] = {}
    CCQE_samples[run] = {}
    for sample in samples:
        print("{}: {}".format(sample, moreFunctions.get_current_time("%H:%M:%S")))
        #events that have atleast one muon candidate and exactly 2 reco tracks
        if verbose: print("presel...")
        CCQE_presel_samples[run][sample] = moreFunctions.apply_CCQE_presel(samples[sample], sample, False, LOADDETSYS)
        if verbose: print("muons...")
        CCQE_muon_samples[run][sample] = moreFunctions.select_muons(CCQE_presel_samples[run][sample], sample, False, LOADDETSYS)
        if verbose: print("protons...")
        CCQE_proton_samples[run][sample] = moreFunctions.select_protons(CCQE_presel_samples[run][sample], sample, False, LOADDETSYS)
        if verbose: print("done")
        #intersection of muon & proton dataframes are the 1mu1p events
        muon_entries = set(CCQE_muon_samples[run][sample].index)
        proton_entries = set(CCQE_proton_samples[run][sample].index)
        intersec = list(muon_entries & proton_entries)
        #update everything
        CCQE_samples[run][sample] = CCQE_presel_samples[run][sample].loc[intersec]
        CCQE_muon_samples[run][sample] = CCQE_muon_samples[run][sample].loc[intersec]
        CCQE_proton_samples[run][sample] = CCQE_proton_samples[run][sample].loc[intersec]
        #make sure this is fixed
        
########################################
#make the CCQE calculated variables
M_mu = 0.105 #GeV/c
M_p = 0.938 #GeV/c
M_n = 0.939 #GeV/c
B = 0.04 #binding energy of argon used in simulation

for run in CCQE_samples:
    for sample in CCQE_samples[run]:
        df = CCQE_samples[run][sample]
        df_muon = CCQE_muon_samples[run][sample]
        df_proton = CCQE_proton_samples[run][sample]
        ########
        df_muon['p'] = df_muon['trk_range_muon_mom_v']
        df_muon['E'] = df_muon['trk_range_muon_e_v']
        df_muon['px'] = df_muon['p']*df_muon['trk_dx_v']
        df_muon['py'] = df_muon['p']*df_muon['trk_dy_v']
        df_muon['pz'] = df_muon['p']*df_muon['trk_dz_v']
        df_muon['pT'] = (df_muon['px']**2 + df_muon['py']**2)**0.5
        df_muon['pTransverseRatio'] = df_muon['pT'] / df_muon['p']
        df_muon['trk_len'] = df_muon['trk_len_v'].apply(lambda x: x[0])
        ########
        df_proton['p'] = df_proton['trk_range_proton_mom_v']
        df_proton['E'] = df_proton['trk_energy_proton_v']
        df_proton['px'] = df_proton['p']*df_proton['trk_dx_v']
        df_proton['py'] = df_proton['p']*df_proton['trk_dy_v']
        df_proton['pz'] = df_proton['p']*df_proton['trk_dz_v']
        df_proton['pT'] = (df_proton['px']**2 + df_proton['py']**2)**0.5
        df_proton['pTransverseRatio'] = df_proton['pT'] / df_proton['p']
        df_proton['trk_len'] = df_proton['trk_len_v'].apply(lambda x: x[0])
        #########
        df['E'] = df_muon['reco_nu_e_range_v'].apply(lambda x: x[0])
        df['Q2'] = 2 * df_muon['reco_nu_e_range_v'] * (df_muon['trk_range_muon_e_v'] - df_muon['p']**2) - M_mu**2
        df['Q3'] = (df_muon['reco_nu_e_range_v']**2 + df_muon['p']**2 - 2*df_muon['reco_nu_e_range_v']*df_muon['p']*df_muon['trk_cos_theta_v'])**0.5 #magnitude of 3-mom vector
        df['Mhad'] = df_muon['reco_nu_e_range_v'] - df_muon['trk_range_muon_e_v']
        df['Xbj'] = df['Q2'] / (2 * M_n * df['Mhad'])
        df['Ybj'] = df['Mhad'] / df_muon['reco_nu_e_range_v']
        df['p'] = ((df_muon['px'] + df_proton['px'])**2 + (df_muon['py'] + df_proton['py'])**2 + (df_muon['pz'] + df_proton['pz'])**2)**0.5
        df['pL'] = df_proton['pz']**2 + df_muon['pz']**2
        #opening angle
        # cos(opening angle) = dot(muon unit vector, proton unit vector)
        df['cos_alpha'] = df_muon['trk_dx_v']*df_proton['trk_dx_v'] + df_muon['trk_dy_v']*df_proton['trk_dy_v'] + df_muon['trk_dz_v']*df_proton['trk_dz_v']
        df['opening_angle'] = df['cos_alpha'].apply(lambda x: np.arccos(x))
        df['phi_diff'] = df_muon['trk_phi_v'] - df_proton['trk_phi_v']
        df['theta_tot'] = df_muon['trk_theta_v'] + df_proton['trk_theta_v']
        longest_mask = df['trk_len_v'].apply(lambda x: x == x[list(x).index(max(x))])#identify longest
        df['trk_len'] = (df['trk_len_v']*longest_mask).apply(lambda x: x[x!=False]) #longest available track        
        #muon-proton separation
        mu_x,mu_y,mu_z = df_muon['trk_sce_start_x_v'].apply(lambda x: x[0]),df_muon['trk_sce_start_y_v'].apply(lambda x: x[0]),df_muon['trk_sce_start_z_v'].apply(lambda x: x[0])
        p_x,p_y,p_z = df_proton['trk_sce_start_x_v'].apply(lambda x: x[0]),df_proton['trk_sce_start_y_v'].apply(lambda x: x[0]),df_proton['trk_sce_start_z_v'].apply(lambda x: x[0])
        dist = ( (mu_x-p_x)**2 + (mu_y-p_y)**2 + (mu_z-p_z)**2 )**0.5
        df['mu_p_dist'] = dist
        #single-transverse variable alpha
        df['px'] = df_muon['px'] + df_proton['px']
        df['py'] = df_muon['py'] + df_proton['py']
        df['pT'] = ((df['px'])**2 + (df['py'])**2)**0.5
        df['pTransverseRatio'] = df['pT'] / df['p']
        df['cos_alpha_t'] = -1*(df_muon['px']*df['px'] + df_muon['py']*df['py']) / (df_muon['pT']*df['pT'])
        df['alpha_t'] = df['cos_alpha_t'].apply(lambda x: np.arccos(x))
        

In [None]:
#make relative efficiency plot as a function of proton energy
#fullsel_SAMPLES_RUN
#Np_proton_samples
#SAMPLES_RUN

RUNS = ['combined']
xvar = 'proton_e'
xlab = 'Leading Proton Energy [GeV]'
bin_edges = np.linspace(0.8,1.3,20)
title = r'Relative Efficiency of 1$\mu$Np Sel vs Leading Proton E'
signal = 'abs(nu_pdg) == 14 and ccnc == 0 and nmuon==1 and nproton >= 1 and truthFiducial == 1 and isVtxInFiducial == 1'

BINS,EFFS,ERRS = moreFunctions.Eff(SAMPLES_RUN['combined']['mc'],
                                   Np_proton_samples['combined']['mc'],
                                   xvar,signal,bin_edges)

plt.errorbar(BINS,EFFS,
            xerr = [(BINS[1]-BINS[0])/2.]*len(BINS),
             yerr = ERRS,
            fmt='k')
plt.ylabel("Efficiency (w.r.t presel)")
plt.title(title)
plt.xlabel(xlab)
#plt.ylim([0,0.2])
plt.tight_layout()
plt.savefig("plots\\All-Open\\combined\\1muNp\\RelEfficiencyvsLeadingProtonE_1.3GeV.pdf")

In [None]:
#################################################################
# turn a bunch of vector-valued elements into scalar-valued ones
#################################################################
DFs = fullsel_SAMPLES_RUN
VARS = [
    'reco_nu_e_range_v',
    'trk_range_muon_e_v',
    'trk_len_v','trk_cos_theta_v',
]
'''
VARS = [
    'trk_sce_start_x_v','trk_sce_start_y_v','trk_sce_start_z_v',
    'trk_sce_end_x_v','trk_sce_end_y_v','trk_sce_end_z_v',
    'trk_score_v','trk_llr_pid_score_v','trk_p_quality_v',
    'trk_len_v','trk_distance_v','pfp_generation_v'
]
'''
if COMBINEDONLY:
    RUNS = ['combined']
else:
    RUNS = [1,2,3,'combined']
for run in RUNS:
    for sample in ['mc','nue','ext','data','dirt']:
        df = DFs[run][sample]
        for var in VARS:
            df[var[:-2]] = df[var].apply(lambda x: x[0])
        


## PLOTTING

In [None]:
############################################
## Time-Dependent Studies
## WORKHORSE NOTEBOOK
## Make a plot for each run, then combined
## need to upgrade to make one figure with all plots on it
############################################
importlib.reload(moreFunctions)

#active volume
AVx = [-1.55,254.8]
AVy = [-115.53, 117.47]
AVz = [0.1, 1036.9]

VARIABLE, BINS, RANGE, XTIT = 'reco_ntrack',5,(0.5,5.5),r'Reco Track Multiplicity'
VARIABLE, BINS, RANGE, XTIT = 'reco_ntrack_contained',7,(-0.5,6.5),r'Reco Contained Track Multiplicity'
VARIABLE, BINS, RANGE, XTIT = 'topological_score',25,(0,1),r'Topological Score'
#VARIABLE, BINS, RANGE, XTIT = 'mu_p_dist',25,(0,10),r'$\mu$-p separation [cm]'
#VARIABLE, BINS, RANGE, XTIT = 'Q3',25,(0,1.25),r'Q3'
#VARIABLE, BINS, RANGE, XTIT = 'trk_cos_theta_v',24,(-1,1),r'Cos($\theta$)'
#VARIABLE, BINS, RANGE, XTIT = 'topological_score',25,(0,1),'Topological Score'
#VARIABLE, BINS, RANGE, XTIT = 'run',50,(5000,17000),'Run'
#VARIABLE, BINS, RANGE, XTIT = 'nu_e',25,(0,2.5),r'True $\nu$ Energy [GeV]'

VARIABLES, BIN, RANGES, XTITS = [VARIABLE], [BINS], [RANGE], [XTIT]
VARIABLES, BIN, RANGES, XTITS = moreFunctions.get_plots('presel input noCRT')
#VARIABLES, BIN, RANGES, XTITS = moreFunctions.get_plots('muon input')
#VARIABLES, BIN, RANGES, XTITS = moreFunctions.get_plots('fullselKinematics')
#VARIABLES, BIN, RANGES, XTITS = moreFunctions.get_plots('CCQE') #look in NUMUhelper.py
#VARIABLES, BIN, RANGES, XTITS = moreFunctions.get_plots('CCQE_proton_kinematics') 
#VARIABLES, BIN, RANGES, XTITS = moreFunctions.get_plots('CCQE_muon_kinematics')

tag = "fullsel_samples" #this will show up in the name of saved figure
SAMPLES = fullsel_SAMPLES_RUN #or SAMPLES_RUN, presel_SAMPLES_RUN or fullsel_SAMPLES_RUN, CCQE_muon_samples, CCQE_proton_samples, CCQE_samples
title_base = r"1$\mu$Np, E$_{\nu}$ > 1.05 GeV"
title_base = r"FAKE DATA #3, $\nu_{\mu}$ CC INC"
title_base = ''

KIND = 'event_category' # interaction, backtracked_pdg, event_category

SAVEFIG = True
DRAWDATA = True
DRAWRATIO = True
SELECTLONGEST = True
DRAWSYS = True #need to load extra files
DETSYS = False
TIMESTUDY = True

QUERY, track_cuts = 'topological_score >= 0', None

SAVEDIR_BASE = 'C:\\Users\\Ryan\\python-workspace\\PELEE-newmcfilteredsamples\\plots\\{}-{}\\'.format(BNB_SAMPLE_TYPE,MC_SAMPLE_TYPE)

if SELECTLONGEST: tag += '_longest'
else: tag += '_alltracks'
if not DRAWDATA: tag += "_nodata"
if not DRAWRATIO: tag += "_noratio"
if not USECRT: tag += '_noCRT'

if SAVEFIG and len(VARIABLES) > 1:
    pdf_large = matplotlib.backends.backend_pdf.PdfPages(SAVEDIR_BASE+f"VARS_combined_{date_time}_{tag}_{KIND}.pdf") #pdf of everything made this batch
for (VARIABLE, BINS, RANGE, XTIT) in zip(VARIABLES, BIN, RANGES, XTITS):
#####################################################################
#get specific cuts based on what the tag is
#QUERY, track_cuts = moreFunctions.get_Cuts(tag, ISRUN3)
    if SAVEFIG and TIMESTUDY:
        pdf = matplotlib.backends.backend_pdf.PdfPages(SAVEDIR_BASE+"timeDep_combined_{}_{}_{}_{}.pdf".format(VARIABLE,date_time,tag,KIND)) #pdf of variable groupings

    if TIMESTUDY == True and COMBINEDONLY == False:
        RUNS = [1,2,3,'combined']
    if TIMESTUDY == True and NOCOMBINED == True:
        RUNS = [1,2,3]
    else:
        RUNS = ['combined']
    for run in RUNS:
        SAVEDIR = SAVEDIR_BASE + 'Run{}\\'.format(run)
        if DETSYS == False and DRAWSYS == True:
            SAVEDIR += 'StatsandFluxGenieReint\\'
        elif DETSYS and DRAWSYS:
            SAVEDIR += 'fullSystematics\\'
        elif DETSYS == True and DRAWSYS == False:
            SAVEDIR += 'StatsandDetsys\\'
        else:
            SAVEDIR += 'justStats\\'
        if 'fullsel' in tag.lower():
            SAVEDIR += 'fullsel\\'
        elif 'presel' in tag.lower():
            SAVEDIR += 'presel\\'
        print(SAVEDIR)
        if not os.path.exists(SAVEDIR): os.makedirs(SAVEDIR)
            
        if DETSYS:
            if 'fullsel' in tag.lower():
                detsys = moreFunctions.get_Detsys('fullsel',VARIABLE,RANGE,BINS)
            elif 'presel' in tag.lower():
                detsys = moreFunctions.get_Detsys('presel',VARIABLE,RANGE,BINS)
                
        print("Run {}: {}".format(run, moreFunctions.get_current_time("%H:%M:%S")))
        plot_sample = SAMPLES[run]
        weights = WEIGHTS_RUN[run]
        pot = WEIGHTS_RUN[run]['pot']
        plot_title = f"Run {run}"
        #######################################################
        # plotting
        my_plotter = plotter.Plotter(plot_sample, weights, pot=pot)
        fig, ax1, ax2 = my_plotter.plot_variable(
            VARIABLE,   
            query=QUERY,
            kind=KIND, #sample, interaction, backtracked_pdg
            track_cuts = track_cuts,
            select_longest = SELECTLONGEST, #this is true by default in self._selection
            title=XTIT,
            #bins=asymm_bins,
            bins=BINS,
            stacksort=4, #0-numerical, 1-weights, 2-eLee on top, 3-eLee+nue on top, 4-numu on top
            range=RANGE,
            ratio=DRAWRATIO,
            draw_sys=DRAWSYS,
            #purity=False,
            draw_data=DRAWDATA,
        )[0:3]
        print("Profile likelihood: {} sigma @ {} POT".format(my_plotter.significance_likelihood,pot))
        print("s/sqrt(b): {} sigma @ {} POT".format(my_plotter.significance, pot))

        #ax1.set_ylim(0,40)
        #ax1.set_yscale("log")
        #ax1.set_ylim(0,3000)
        ax1.set_ylim(0,ax1.get_ylim()[1]*1.5)
        if DRAWRATIO: ax2.set_ylim(0.5,1.5)
        ax1.set_title(plot_title, fontsize=15)

        if SAVEFIG:
            fn = VARIABLE+"_"+date_time+"_"+tag+'_'+KIND
            fn += ".pdf"
            print("saving to {}...".format("{}\\{}".format(SAVEDIR,fn)))
            fig.tight_layout()
            if not os.path.exists(SAVEDIR+str(run)): os.makedirs(SAVEDIR+str(run))
            fig.savefig("{}\\{}".format(SAVEDIR,fn))
            if TIMESTUDY:
                pdf.savefig(fig)
            if len(VARIABLES) > 1:
                pdf_large.savefig(fig)
        plt.show()
    if SAVEFIG and TIMESTUDY:
        pdf.close()

if SAVEFIG and len(VARIABLES) > 1: 
    pdf_large.close()

In [None]:
############################################
## Generic plot maker with all the fixins
############################################
importlib.reload(moreFunctions)

#active volume
AVx = [-1.55,254.8]
AVy = [-115.53, 117.47]
AVz = [0.1, 1036.9]

VARIABLE, BINS, RANGE, XTIT = 'nu_e',14,(0.15,1.55),r'True $\nu$ Energy [GeV]'
VARIABLE, BINS, RANGE, XTIT = 'reco_nproton',5,(-0.5,4.5),r'Reco Proton Multiplicity'
VARIABLE, BINS, RANGE, XTIT = 'reco_ntracks',5,(0.5,5.5),r'Reco Track Multiplicity'
#VARIABLE, BINS, RANGE, XTIT = 'reco_contained_ntrack',5,(0.5,5.5),r'Reco Contained Track Multiplicity'
#VARIABLE, BINS, RANGE, XTIT = 'nproton',5,(-0.5,4.5),r'True Proton Multiplicity'
#VARIABLE, BINS, RANGE, XTIT = 'reco_nu_e_range_v',14,(0.15,1.55),r'Reco Range-Based $\nu$ Energy [GeV]'
#VARIABLE, BINS, RANGE, XTIT = 'trk_len_v',24,(0,100),r'Track Length [cm]'
#VARIABLE, BINS, RANGE, XTIT = 'trk_score_v',24,(0.5,1),r'Track Score'
#VARIABLE, BINS, RANGE, XTIT = 'trk_distance_v',24,(0,10),r'Track Distance [cm]'
#VARIABLE, BINS, RANGE, XTIT = 'trk_p_quality_v',24,(-1,2.5),r'MCS Consistency $(\frac{P_{MCS}-P_{Range}}{P_{Range}})$'
#VARIABLE, BINS, RANGE, XTIT = 'crtveto',2,(-0.5,1.5),r'CRT Veto'
#VARIABLE, BINS, RANGE, XTIT = 'crthitpe', 25, (0.1,600), "CRT hit PE"
#VARIABLE, BINS, RANGE, XTIT = 'trk_llr_pid_score_v', 25, (-1, 1), "LLR PID Score"
#VARIABLE, BINS, RANGE, XTIT = 'Q2_v', 25, (0,2), "Q2"
#VARIABLE, BINS, RANGE, XTIT = 'Mhad_v', 25, (0,2.5), r"M$_{hadron}$"
#VARIABLE, BINS, RANGE, XTIT = 'Xbj_v', 25, (0,3), r"Bjorken x"
#VARIABLE, BINS, RANGE, XTIT = 'Ybj_v', 25, (0,2), r"Bjorken y"
#VARIABLE, BINS, RANGE, XTIT = 'trk_cos_theta_v',24,(-1,1),r'Cos($\theta$)'
#VARIABLE, BINS, RANGE, XTIT = 'trk_energy_proton_v', 25, (0,1), r'Reco Range-Based Proton Energy [GeV]'
#VARIABLE, BINS, RANGE, XTIT = 'trk_range_muon_e_v', 25, (0,1.5), r'Reco Range-Based Muon Energy [GeV]'
#VARIABLE, BINS, RANGE, XTIT = 'NeutrinoEnergy2', 25, (0,2000), 'Visible Energy on Plane 2 [MeV]'

VARIABLES, BIN, RANGES, XTITS = [VARIABLE], [BINS], [RANGE], [XTIT]
VARIABLES, BIN, RANGES, XTITS = moreFunctions.get_plots('CCQE')
#VARIABLES, BIN, RANGES, XTITS = moreFunctions.get_plots('CCQE_proton_kinematics')
#VARIABLES, BIN, RANGES, XTITS = moreFunctions.get_plots('CCQE_muon_kinematics')

#tag gets added to end of .pdf name and changes some parameters
#for example if "presel" is in the tag, no track_cuts will be applied
#include category, sample, interaction, or backtracked_pdg in tag to change category (default "category")
# "above105" or "below105" in tag make cuts on reco neutrino energy (1.05 GeV is muon mass E)
#tag = "presel_contained_samples_pdg_alltracks"
tag = "fullsel_contained_above105_samples_allopen"
SAVEFIG = True
DRAWDATA = True
DRAWRATIO = True
SELECTLONGEST = True
DRAWSYS = False #need to load extra files
#If plot_sample is fullsel_samples, change tag to "fullsel_samples to save time ;)
#SAVEDIR = ls.plots_path+"Run3\\"+ls.SAMPLE+"\\aprilcm-response\\"
SAVEDIR = ls.plots_path+"All-Open\\"
if not os.path.exists(SAVEDIR): os.makedirs(SAVEDIR)
QUERY, track_cuts = 'nslice == 1', None
KINDS = ['event_category','interaction'] #interaction, backtracked_pdg, event_category
#KINDS = ['event_category']

if SELECTLONGEST:
    tag += '_longest'
else:
    tag += '_alltracks'
if not DRAWDATA:
    tag += "_nodata"
if DRAWSYS:
    tag += '_detsys'

#if 23 in np.array(plot_sample['mc']['category']): 
#    tag += '_nproton'

for KIND in KINDS:
    if len(VARIABLES) > 1:
        pdf = matplotlib.backends.backend_pdf.PdfPages(SAVEDIR+"combined_{}_{}_{}.pdf".format(date_time,tag,KIND))
    for (VARIABLE, BINS, RANGE, XTIT) in zip(VARIABLES, BIN, RANGES, XTITS):
    #####################################################################
        # lots of formatting things based on the tag
        fullsel_contained_above105_samples
        if "fullsel_contained_above105_samples" in tag.lower():
            print("using fullsel_contained_above105_samples")
            plot_sample = fullsel_contained_above105_samples #or samples, or presel_sapmles, 
            plot_title = r"Fullsel INC (E$^{reco}_{\nu}$ > 1.05 GeV), contained,"
        elif "fullsel_above105_samples" in tag.lower():
            print("using fullsel_above105_samples")
            plot_sample = fullsel_above105_samples #or samples, or presel_sapmles, 
            plot_title = r"Fullsel INC (E$^{reco}_{\nu}$ > 1.05 GeV)"
        elif "fullsel_contained_samples" in tag.lower():
            print("using presel_contained_samples")
            plot_sample = fullsel_contained_samples
            plot_title = "Fullsel, Contained Tracks, "
        elif "fullsel_sample" in tag.lower():
            print("using fullsel_muon_samples")
            plot_sample = fullsel_samples #or samples, or presel_sapmles, 
            plot_title = "Fullsel INC"
        elif "ccqe_muon" in tag.lower():
            print("using CCQE_muon_samples")
            plot_sample = CCQE_muon_samples 
            plot_title = "CCQE muon"
        elif "ccqe_proton" in tag.lower():
            print("using CCQE_proton_samples")
            plot_sample = CCQE_proton_samples 
            plot_title = "CCQE proton"
        elif "ccqe_contained" in tag.lower():
            plot_sample = CCQE_contained_samples 
            plot_title = "CCQE, contained tracks"
        elif "ccqe_sample" in tag.lower():
            plot_sample = CCQE_samples 
            plot_title = "CCQE"
        elif "fullsel_nomcs_sample" in tag.lower():
            print("using fullsel_noMCS_muon_samples")
            plot_sample = fullsel_noMCS_samples 
            plot_title = "Fullsel, No MCS Cut, "
        elif "ccqe_tracktester_contained" in tag.lower():
            plot_sample = CCQE_tracktester_contained_samples
            plot_title = "CCQE, trk_score > 0.5, contained"
        elif "ccqe_tracktester" in tag.lower():
            plot_sample = CCQE_tracktester_samples
            plot_title = "CCQE, trk_score > 0.5"
        elif "presel_sample" in tag.lower():
            plot_sample = presel_samples
            plot_title = "Presel"
        elif "samples" in tag.lower():
            plot_sample = samples
            if "presel" in ls.SAMPLE:
                plot_title = "Presel"
            else:
                plot_title = "NoSel"
        else:
            print("using default samples")
            plot_sample = samples
            if "presel" in ls.SAMPLE:
                plot_title = "Presel"
            else:
                plot_title = "NoSel"

        if not SELECTLONGEST:
            plot_title += ', all tracks'
        if "noopfilter" in tag.lower():
            plot_title += ', no opfilter cuts'

        if VARIABLE not in samples['data'].keys(): samples['data'][VARIABLE] = -999

        if "above105" in tag.lower():
            XTIT += " (reco_nu_e_range > 1.05 GeV)"
        elif "below105" in tag.lower():
            XTIT += " (reco_nu_e_range <= 1.05 GeV)"
            if VARIABLE == 'reco_nu_e_range_v':
                BINS,  RANGE = 11, (-0.05, 1.05)

        if "fullsel_samples" in tag.lower():
            plot_sample = fullsel_samples
        elif "fullsel_notopo_samples" in tag.lower():
            plot_sample = fullsel_notopo_samples

        if "nomcs" in tag.lower():
            XTIT += " no MCS cut"
        elif "invertmcs" in tag.lower():
            XTIT += ' inverted MCS cut'

        if "true2212" in tag.lower():
            XTIT += ' (true leading proton) '

        if 'crtgt100' in tag.lower():
            XTIT += ' (crthitpe > 100)'
        elif 'crtlt100' in tag.lower():
            XTIT += ' (crthitpe < 100)'
        elif 'invertcrt' in tag.lower():
            XTIT += ' (crthitpe > 100 and crtveto == 0)'


        #get specific cuts based on what the tag is
        #QUERY, track_cuts = moreFunctions.get_Cuts(tag, ISRUN3)
        #######################################################
        # plotting
        print(tag)
        my_plotter = plotter.Plotter(plot_sample, weights, pot=pot)
        fig, ax1, ax2 = my_plotter.plot_variable(
            VARIABLE,   
            query=QUERY,
            kind=KIND, #sample, interaction, backtracked_pdg
            track_cuts = track_cuts,
            select_longest = SELECTLONGEST, #this is true by default in self._selection
            title=XTIT,
            #bins=asymm_bins,
            bins=BINS,
            stacksort=4, #0-numerical, 1-weights, 2-eLee on top, 3-eLee+nue on top, 4-numu on top
            range=RANGE,
            ratio=DRAWRATIO,
            draw_sys=DRAWSYS,
            purity=False,
            draw_data=DRAWDATA,
        )[0:3]
        print("Profile likelihood: {} sigma @ {} POT".format(my_plotter.significance_likelihood,pot))
        print("s/sqrt(b): {} sigma @ {} POT".format(my_plotter.significance, pot))

        #ax1.set_ylim(0,40)
        #ax1.set_yscale("log")
        #ax1.set_ylim(0,12000)
        ax1.set_ylim(0,ax1.get_ylim()[1]*1.5)
        ax2.set_ylim(0.5,1.5)
        ax1.set_title(plot_title, fontsize=20)

        if SAVEFIG:
            fn = VARIABLE+"_"+date_time+"_"+tag+'_'+KIND
            fn += ".pdf"
            print("saving to {}...".format(SAVEDIR+fn))
            fig.tight_layout()
            fig.savefig(SAVEDIR+fn)
            if len(VARIABLES) > 1:
                pdf.savefig(fig)
        plt.show()
        
    if SAVEFIG and len(VARIABLES) > 1:
        pdf.close()