In [None]:
#import management
import autoreload
%load_ext autoreload
%autoreload 2  # Autoreload all modules

import importlib

#standard imports
import sys
import os
from datetime import datetime
import pickle

#custom modules
import localSettings as ls
import plotter
import NUMUhelper as moreFunctions
#import xgboost as xgb
#import nue_booster 

#scientific imports
import uproot
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib import gridspec
import matplotlib.pylab as pylab
import matplotlib.backends.backend_pdf
import numpy as np
#import awkward
import math
#from sklearn.model_selection import train_test_split

#put these throughout the code to reload if needed
importlib.reload(ls)
importlib.reload(plotter)
importlib.reload(moreFunctions)

main_path = ls.main_path
sys.path.append(main_path)
now = datetime.now()
date_time = now.strftime("%m%d%Y")
print("date and time:",date_time)
params = {
    'axes.labelsize': 'x-large',
    'axes.titlesize': 'x-large',
    'xtick.labelsize': 'x-large',
    'ytick.labelsize': 'x-large'
}
pylab.rcParams.update(params)

In [None]:
#use slimmed (preselected) files or naw?
PRESEL = True
MAKEPLOTS = False
EXPORTTXTFILES = True

In [None]:
for RUN in [3]:
    for FAKEDATASET in ['set1','set2','set3','set4','set5']:
        print("starting {} {}...".format(FAKEDATASET, RUN))
        importlib.reload(ls)
        if FAKEDATASET == 'set5': RUN = 1
        tree = "NeutrinoSelectionFilter"
        #####################################################
        # Setting datapaths here instead of localsettings.py
        # Makes it easier to mix and match data, IMO
        # Should probably revert back to localsettings.py method in future

        if PRESEL:
            BNB_PATH = "E:\\HEPPA\\Data\\PeLEE\\fake-data\\Slim\\"
            MC_PATH = "E:\\HEPPA\\Data\\PeLEE\\0304_numupresel\\Run{}\\".format(RUN)
            APPEND = "_numupresel"
        else:
            BNB_PATH = "E:\\HEPPA\\Data\\PeLEE\\fake-data\\Unslim\\"
            MC_PATH = "E:\\HEPPA\\Data\\PeLEE\\0304\\Run{}\\".format(RUN)
            APPEND = ''


        if RUN == 3:
            USECRT = True
            ############## SETUP DATASAMPLE PATHS ##############
            BNB = 'prod_uboone_nu2020_fakedata_{}_run3b_reco2_v08_00_00_41_reco2'.format(FAKEDATASET)
            ################ SETUP MC SAMPLE PATHS ###################
            EXT = 'data_extbnb_mcc9.1_v08_00_00_25_reco2_G1_all_reco2'+ls.APPEND
            #EXT = 'ext'+ls.APPEND
            NU  = 'prodgenie_bnb_nu_uboone_overlay_mcc9.1_v08_00_00_26_filter_run3_reco2_G_reco2'+ls.APPEND
            DRT = 'prodgenie_bnb_dirt_overlay_mcc9.1_v08_00_00_26_run3_reco2_reco2'+ls.APPEND

        elif RUN == 1:
            USECRT = False
            BNB = 'prod_uboone_nu2020_fakedata_{}_run1_reco2_v08_00_00_41_reco2'.format(FAKEDATASET)
            EXT = 'data_extbnb_mcc9.1_v08_00_00_25_reco2_C1_all_reco2'+ls.APPEND
            #EXT = 'data_extbnb_mcc9.1_v08_00_00_25_reco2_all_reco2'+ls.APPEND #Run1 + Run2
            NU  = 'prodgenie_bnb_nu_uboone_overlay_mcc9.1_v08_00_00_26_filter_run1_reco2_reco2'+ls.APPEND
            DRT = 'prodgenie_bnb_dirt_overlay_mcc9.1_v08_00_00_26_run1_reco2_reco2'+ls.APPEND

        mc = uproot.open(MC_PATH+NU+APPEND+".root")[ls.fold][tree]
        ext = uproot.open(MC_PATH+EXT+APPEND+".root")[ls.fold][tree]
        dirt = uproot.open(MC_PATH+DRT+APPEND+".root")[ls.fold][tree]

        data = uproot.open(BNB_PATH+BNB+".root")[ls.fold][tree]

        uproot_v = [mc,ext,data,dirt]

        variables = [
            #"shr_dedx_Y", "shr_bkt_pdg", "p", "pt", "shr_theta",
            "selected", "nu_pdg",
            "slpdg", "trk_score_v", "backtracked_pdg", # modified from shr_score_v
            "shr_pfp_id_v", "category","shr_theta", 'theta',
            "topological_score",
            #"shr_energy_tot", 
            "trk_energy_tot", "shr_hits_tot", "ccnc", "trk_chipr",
            "trk_bkt_pdg", "hits_ratio", "n_tracks_contained", 
            "NeutrinoEnergy2",
            #"run","sub","evt",
            "CosmicIP","CosmicDirAll3D","CosmicIPAll3D",
            "nu_flashmatch_score","best_cosmic_flashmatch_score","best_obviouscosmic_flashmatch_score",
            #"trk_pfp_id",
            "trk_llr_pid_score_v", # trk-PID score
            "trk_energy_proton_v", # track energy under proton hyp
            "trk_energy_muon_v", # track energy under muon hyp
            "trk_calo_energy_y_v", # track calo energy
            #"pi0_energy2_Y", # pi0 tagger variables
            'true_nu_vtx_x','true_nu_vtx_y','true_nu_vtx_z',
            "reco_nu_vtx_sce_x","reco_nu_vtx_sce_y","reco_nu_vtx_sce_z",
            "nproton", "nmuon",
            "nu_e", "n_showers_contained", "shr_distance", "trk_distance",
            "hits_y", "shr_pz", "shr_energy", "shr_dedx_U", "shr_dedx_V", "shr_phi", "trk_phi", "trk_theta",
            "shr_tkfit_dedx_U", "shr_tkfit_dedx_V", "run", "sub", "evt", "nproton", "trk_pid_chipr_v",
            "trk_len", "mc_pdg", "slnunhits", "slnhits", "shr_score", "trk_score", 
            #"trk_hits_tot",
            #"matched_E", "shr_bkt_E", "trk_bkt_E", "trk_energy", "tksh_distance", "tksh_angle",
            "npi0","npion","pion_e","muon_e","pi0truth_elec_etot","true_e_visible",
            "pi0_e", "shr_energy_tot_cali", "shr_dedx_Y_cali", "evnunhits", "nslice", "interaction",
            "slclustfrac", "reco_nu_vtx_x", "reco_nu_vtx_y", "reco_nu_vtx_z","contained_fraction",
            "trk_sce_start_x_v","trk_sce_start_y_v","trk_sce_start_z_v",
            "trk_sce_end_x_v","trk_sce_end_y_v","trk_sce_end_z_v",
            "trk_mcs_muon_mom_v","trk_range_muon_mom_v", "trk_len_v",
            "pfp_generation_v","trk_distance_v","trk_theta_v","trk_phi_v",
            "trk_energy_muon","trk_energy_tot","trk_energy",
            "pfnhits","pfnunhits",
        ]
        #for numu selection
        #the big G1 file is a picky eater
        slimmed_variables = [
            "nslice", "selected", "nu_pdg",
            "slpdg", "trk_score_v","slclustfrac",
            #"contained_fraction",
            "backtracked_pdg","category",
            "topological_score",
            "run", "sub", "evt",
            "reco_nu_vtx_sce_x","reco_nu_vtx_sce_y","reco_nu_vtx_sce_z",
            "trk_sce_start_x_v","trk_sce_start_y_v","trk_sce_start_z_v",
            "trk_sce_end_x_v","trk_sce_end_y_v","trk_sce_end_z_v",
            "trk_mcs_muon_mom_v","trk_range_muon_mom_v", "trk_len_v",
            'trk_llr_pid_score_v',
            "pfp_generation_v","trk_distance_v","trk_theta_v","trk_phi_v",
            #"trk_energy_muon","trk_energy_tot","trk_energy",
            'trk_energy_muon_v','trk_energy_proton_v',
            "pfnhits","pfnunhits",
            'slnunhits','slnhits',
            'NeutrinoEnergy2',
        ]

        if USECRT and RUN == 3:
            variables.append("_closestNuCosmicDist")
            variables.append("crtveto")
            variables.append("crthitpe")
            variables.append("CosmicIP")
            slimmed_variables.append("_closestNuCosmicDist")
            slimmed_variables.append("crtveto")
            slimmed_variables.append("crthitpe")
            slimmed_variables.append("CosmicIP")

        #make the list unique
        variables = list(set(variables))
        BNB_variables = list(set(slimmed_variables))
        print(BNB_variables)


        WEIGHTS = ["weightSpline","weightTune","weightSplineTimesTune"]#"leeweight"
        #WEIGHTSLEE = ["weightSpline","weightTune","weightSplineTimesTune", "leeweight"]#, "weightsGenie", "weightsFlux", "weightsReint"]
        #MCFVARS = ["mcf_nu_e","mcf_lep_e","mcf_actvol","mcf_nmm","mcf_nmp","mcf_nem","mcf_nep","mcf_np0","mcf_npp",
        #           "mcf_npm","mcf_mcshr_elec_etot","mcf_pass_ccpi0","mcf_pass_ncpi0",
        #           "mcf_pass_ccnopi","mcf_pass_ncnopi","mcf_pass_cccpi","mcf_pass_nccpi"]
        MCVARS = [
            "_opfilter_pe_beam", "_opfilter_pe_veto", # did the event pass the common optical filter (for MC only)
        ]
        SYSTEMATICS = []# ['weightsFlux','weightsGenie']


        data = data.pandas.df(BNB_variables, flatten=False)        
        print("Data dataframe built...")
        mc = mc.pandas.df(variables + WEIGHTS + MCVARS, flatten=False)
        print("MC dataframe built...")
        ext = ext.pandas.df(variables, flatten=False)
        print("EXT dataframe built...")
        dirt = dirt.pandas.df(variables + WEIGHTS + MCVARS, flatten=False)
        print("Dirt dataframe built...")

        #############################################################################################
        #some scaling-related calculations

        df_v = [mc,dirt]

        for i,df in enumerate(df_v):
            df.loc[ df['weightTune'] <= 0, 'weightTune' ] = 1.
            df.loc[ df['weightTune'] == np.inf, 'weightTune' ] = 1.
            df.loc[ df['weightTune'] > 100, 'weightTune' ] = 1.
            df.loc[ np.isnan(df['weightTune']) == True, 'weightTune' ] = 1.
            df.loc[ df['weightSplineTimesTune'] <= 0, 'weightSplineTimesTune' ] = 1.
            df.loc[ df['weightSplineTimesTune'] == np.inf, 'weightSplineTimesTune' ] = 1.
            df.loc[ df['weightSplineTimesTune'] > 100, 'weightSplineTimesTune' ] = 1.
            df.loc[ np.isnan(df['weightSplineTimesTune']) == True, 'weightSplineTimesTune' ] = 1.
            #df['weightSpline']  = df['weightSpline']  * df['weightTune']
            #df.loc[ df['npi0'] > 0, 'weightSplineTimesTune' ] = df['weightSpline'] * df['weightTune'] * 0.7 #scale down pi0s
        #
        df_v = [mc,ext,data,dirt]

        for i,df in enumerate(df_v):
            df["slclnhits"] = df["pfnhits"].apply(lambda x: sum(x))
            df["slclnunhits"] = df["pfnunhits"].apply(lambda x: sum(x))
        #
        #Ryan's calculated columns and various necessities
        df_v = [mc,ext,data,dirt]

        M_mu = 0.105 #GeV/c
        M_p = 0.938 #GeV/c
        M_n = 0.939 #GeV/c
        B = 0.04 #binding energy of argon used in simulation
        proton_pidscore = -0.2


        for i,df in enumerate(df_v):
            print(i)
            #useful variables
            df['trk_p_quality_v'] = (df['trk_mcs_muon_mom_v']-df['trk_range_muon_mom_v'])/df['trk_range_muon_mom_v']
            df['trk_cos_theta_v'] = df['trk_theta_v'].apply(lambda x: np.cos(x))
            df['trk_sin_theta_v'] = df['trk_theta_v'].apply(lambda x: np.sin(x))
            df['trk_cos_phi_v'] = df['trk_phi_v'].apply(lambda x: np.cos(x))
            df['trk_sin_phi_v'] = df['trk_phi_v'].apply(lambda x: np.sin(x))
            df['trk_range_proton_mom_v'] = df['trk_energy_proton_v'].apply(lambda x: np.sqrt(2*M_p*x))
            df['trk_range_muon_e_v'] = (df['trk_range_muon_mom_v']**2 + M_mu**2)**.5 # E
            df['trk_range_muon_ke_v'] = df['trk_range_muon_e_v'] - M_mu #KE
            df['trk_energy_tot'] = df["trk_energy_proton_v"].apply(lambda x: sum(x)) #is missing from G1 sample

            df['reco_nu_e_range_v'] = df["trk_range_muon_e_v"] + (df["trk_energy_tot"] - df["trk_energy_proton_v"])     
            #protons have trk_score cut and llr_pid_score cut
            proton_mask = df['trk_score_v'].apply(lambda x: x>0.5) * df['trk_llr_pid_score_v'].apply(lambda x: x<proton_pidscore)
            df['reco_nproton'] = (df['trk_llr_pid_score_v']*proton_mask).apply(lambda x: len(x[x!=False]))
            df['reco_ntrack'] = df['trk_score_v'].apply(lambda x: len(x))
            # break momentum vector apart
            df['trk_dx_v'] = df['trk_sin_theta_v']*df['trk_cos_phi_v']
            df['trk_dy_v'] = df['trk_sin_theta_v']*df['trk_sin_phi_v']
            df['trk_dz_v'] = df['trk_cos_theta_v']

            #definitions related to neutrino energy
            #df['nu_e_QE_proton_v'] = 0.5 * (2 * (M_n - B) * df['trk_energy_proton_v'] - ((M_n - B)**2 + M_p**2 - M_mu**2)) / ((M_n-B)-df['trk_energy_proton_v']+np.sqrt(df['trk_energy_proton_v']**2-M_p**2)*df['trk_cos_v'])
            #df['nu_e_QE_muon_v'] = 0.5 * (2 * (M_n - B) * df['trk_energy_muon_v'] - ((M_n - B)**2 + M_p**2 - M_mu**2)) / ((M_n-B)-df['trk_energy_muon_v']+np.sqrt(df['trk_energy_muon_v']**2-M_p**2)*df['trk_cos_v'])
            #df['2body_E_cons_v'] = np.sqrt((df['reco_nu_e_range_v']-df['nu_e_QE_proton_v'])**2+(df['reco_nu_e_range_v']-df['nu_e_QE_muon_v'])**2+(df['nu_e_QE_muon_v']-df['nu_e_QE_proton_v'])**2)


            #just MC stuff (truth level)
            if i in [0,3]:
                df['backtracked_pdg_v'] = df['backtracked_pdg']

            #This information is useful for applying corrections and such
            if i == 2 or i == 4: df['bnbdata'] = 1
            else: df['bnbdata'] = 0
            if i == 1: df['extdata'] = 1
            else: df['extdata'] = 0
            if i in [1,2,4]:
                #column needs to exist, even if not cut on 
                df['_opfilter_pe_beam'] = 999
                df['_opfilter_pe_veto'] = -999
            if i not in [0,3] and USECRT and RUN == 3:
                #only apply to data and ext
                df.loc[(df['run'] > 16300),'crthitpe'] = df['crthitpe']*1.09 #hitpe correction
        # add back the cosmic category
        # and calculate Nproton multiplicity if you so desire
        NPROTON_CAT = True
        df = mc
        df.loc[(df['category']!=1)&(df['category']!=10)&(df['category']!=11)&(df['category']!=111)&(df['slnunhits']/df['slnhits']<0.2), 'category'] = 4
        if NPROTON_CAT:
            df.loc[(df['category']==2)&(df['nproton']==0), 'category'] = 22
            df.loc[(df['category']==2)&(df['nproton']==1), 'category'] = 23
            df.loc[(df['category']==2)&(df['nproton']==2), 'category'] = 24
            df.loc[(df['category']==2)&(df['nproton']>=3), 'category'] = 25

        nue = mc.query('nu_pdg == 12 or nu_pdg == -12')
        mc  = mc.query('nu_pdg == 14 or nu_pdg == -14')

        #####################################################################################
        #put the samples in a way that's convient to acces later
        scaling = 1
        bnb_type = 'FD' + FAKEDATASET[-1]   

        weights, pot = moreFunctions.get_scaling(RUN, bnb_type, scaling)

        samples = {"mc": mc,"nue": nue,"data": data,"ext": ext,"dirt": dirt}

        # ensure presel consistnecy
        if PRESEL:
            for sample in samples:
                if sample in ['mc','nue','dirt']: OPFIL = True
                else: OPFIL = False
                presel,_ = moreFunctions.get_NUMU_sel(USECRT, opfilter=OPFIL)
                samples[sample] = samples[sample].query(presel)#.dropna()

        my_plotter = plotter.Plotter(samples, weights, pot=pot)

        print("weights:")
        for weight in weights:
            print("{}: {}".format(weight,weights[weight]))
        print("POT: {}".format(pot))

        ###############################################################
        ## APPLY SELECTIONS
        fullsel_samples = {}
        presel_samples = {}
        query,_ = moreFunctions.get_NUMU_sel(USECRT)

        for sample in samples:
            presel_samples[sample] = samples[sample].query(query)
            if "presel" in ls.SAMPLE:
                samples[sample] = presel_samples[sample]

        for sample in samples:
            print("{}: {}".format(sample, moreFunctions.get_current_time("%H:%M:%S")))
            fullsel_samples[sample] = moreFunctions.apply_muon_fullsel(presel_samples[sample], sample, USECRT, False)
            fullsel_samples[sample]['reco_ntrack'] = presel_samples[sample].loc[fullsel_samples[sample].index]['reco_ntrack']

            
        ###############################################################
        ## Export TXT files for SBNFit 
        if EXPORTTXTFILES:
            # need to compress these vector-valued columns of interest to scalar-valued ones
            variables = ['trk_theta_v','trk_range_muon_e_v','reco_nu_e_range_v']
            for sample in fullsel_samples.keys():
                df = fullsel_samples[sample]
                trk_lens = df['trk_len_v']
                longest_mask = trk_lens.apply(lambda x: x == x[list(x).index(max(x))]) #identify longest
                for variable in variables:
                    VAR = df[variable]
                    VAR = VAR.apply(lambda x: x[~np.isnan(x)]) #clean up nan vals
                    VAR = VAR[VAR.apply(lambda x: len(x) > 0)]
                    VAR = (VAR*longest_mask).apply(lambda x: x[x != False]) #apply longest mask
                    if len(VAR.iloc[0]) == 1:
                        VAR = VAR.apply(lambda x: x[0])
                        #apply this new column
                        new_variable = variable[:-2]
                        df[new_variable] = VAR
                    else:
                        print('something is wrong...')
                        print(VAR)
            # just spit out some diagnostics    
            binedges = np.linspace(0.15,1.55,15)
            print("binedges:{}".format(binedges))

            for s,sample in enumerate(fullsel_samples):
                print(sample)
                df = fullsel_samples[sample].copy()
                binvals,_ = np.histogram(df['reco_nu_e_range'], bins=binedges)
                print(binvals," -> ",sum(binvals))
            #do the txt file exportation
            SAVEPATH = "passing_events\\fake-data-{}\\".format(FAKEDATASET[-1])
            if not os.path.exists(SAVEPATH):
                os.makedirs(SAVEPATH)

            for sample in ['mc','data']:
                print(sample)
                f_out = open(SAVEPATH+"{}_Passingevents_{}_{}.txt".format(sample,FAKEDATASET,date_time),'w')
                df = fullsel_samples[sample].query('reco_nu_e_range >= 0')

                print ('file %s_final has %i selected entries'%(sample,df.shape[0]))
                f_out.write('%s\t%s\t%s\t%s\t%s\t%s\n'%('run','sub','evt','angle','Emuon','Erange'))
                for i,row in df.iterrows():
                    run = row['run']
                    sub = row['sub']
                    evt = row['evt']
                    angle = row['trk_theta']
                    Emuon = row['trk_range_muon_e']
                    Erange = row['reco_nu_e_range']
                    f_out.write('%i\t%i\t%i\t%.4f\t%.4f\t%.4f\n'%(run,sub,evt,angle,Emuon,Erange))
                f_out.close()
        
        ############################################################### 
        ## MAKE ALL THE PLOTS
        if MAKEPLOTS:
            VECS = [
                'reco_nu_e_range_v', 'trk_range_muon_e_v',
                'trk_len_v','trk_cos_theta_v'
            ]
            for sample in fullsel_samples:
                for vec in VECS:
                    fullsel_samples[sample][vec[:-2]] = fullsel_samples[sample][vec].apply(lambda x: x[0])
            #####################################################################################################
            #active volume
            AVx = [-1.55,254.8]
            AVy = [-115.53, 117.47]
            AVz = [0.1, 1036.9]


            for (plot_type,tag) in zip(['presel input noCRT', 'muon input', 'fullselKinematics'],['samples','presel_samples','fullsel_samples']):

                VARIABLES, BIN, RANGES, XTITS = moreFunctions.get_plots(plot_type)
                tag = tag + '_fakedata{}'.format(FAKEDATASET[-1])
                SAVEFIG = True
                DRAWDATA = True
                DRAWRATIO = True
                SELECTLONGEST = True
                DRAWSYS = False #need to load extra files
                #If plot_sample is fullsel_samples, change tag to "fullsel_samples to save time ;)
                SAVEDIR = 'C:\\Users\\Ryan\\python-workspace\\PELEE-newmcfilteredsamples\\plots\\Fake-Data-{}\\Run{}\\'.format(FAKEDATASET[-1],RUN)
                if not os.path.exists(SAVEDIR): os.makedirs(SAVEDIR)
                QUERY, track_cuts = 'nslice == 1', None
                KINDS = ['event_category','interaction'] #interaction, backtracked_pdg, event_category
                KINDS = ['event_category']

                if SELECTLONGEST:
                    tag += '_longest'
                else:
                    tag += '_alltracks'
                if not DRAWDATA:
                    tag += "_nodata"
                if DRAWSYS:
                    tag += '_detsys'
                #if 23 in np.array(plot_sample['mc']['category']): 
                #    tag += '_nproton'

                for KIND in KINDS:
                    if SAVEFIG and len(VARIABLES) > 1:
                        pdf = matplotlib.backends.backend_pdf.PdfPages(SAVEDIR+"combined_{}_{}.pdf".format(tag,KIND))
                    for (VARIABLE, BINS, RANGE, XTIT) in zip(VARIABLES, BIN, RANGES, XTITS):
                    #####################################################################
                        # lots of formatting things based on the tag
                        if "fullsel_above105_samples" in tag.lower():
                            print("using fullsel_above105_samples")
                            plot_sample = fullsel_above105_samples #or samples, or presel_sapmles, 
                            plot_title = r"Fullsel INC (above 1.05 GeV reco E$_{\nu}$)"
                        elif "fullsel_sample" in tag.lower():
                            print("using fullsel_muon_samples")
                            plot_sample = fullsel_samples #or samples, or presel_sapmles, 
                            plot_title = r"Fullsel $\nu_{\mu}$ CC INC"
                        elif "ccqe_muon" in tag.lower():
                            print("using CCQE_muon_samples")
                            plot_sample = CCQE_muon_samples 
                            plot_title = "CCQE muon"
                        elif "ccqe_proton" in tag.lower():
                            print("using CCQE_proton_samples")
                            plot_sample = CCQE_proton_samples 
                            plot_title = "CCQE proton"
                        elif "ccqe_contained" in tag.lower():
                            plot_sample = CCQE_contained_samples 
                            plot_title = "CCQE, contained tracks"
                        elif "ccqe_sample" in tag.lower():
                            plot_sample = CCQE_samples 
                            plot_title = "CCQE"
                        elif "fullsel_nomcs_sample" in tag.lower():
                            print("using fullsel_noMCS_muon_samples")
                            plot_sample = fullsel_noMCS_samples 
                            plot_title = "Fullsel, No MCS Cut, "
                        elif "presel_contained_sample" in tag.lower():
                            print("using presel_contained_samples")
                            plot_sample = presel_contained_samples
                            plot_title = "Presel, Contained Tracks, "
                        elif "ccqe_tracktester_contained" in tag.lower():
                            plot_sample = CCQE_tracktester_contained_samples
                            plot_title = "CCQE, trk_score > 0.5, contained"
                        elif "ccqe_tracktester" in tag.lower():
                            plot_sample = CCQE_tracktester_samples
                            plot_title = "CCQE, trk_score > 0.5"
                        elif "presel_sample" in tag.lower():
                            plot_sample = presel_samples
                            plot_title = r"Presel $\nu_{\mu}$ CC INC"
                        elif "samples" in tag.lower():
                            plot_sample = samples
                            if "presel" in ls.SAMPLE or PRESEL == True:
                                plot_title = "Presel"
                            else:
                                plot_title = r"Presel"
                        else:
                            print("using default samples")
                            plot_sample = samples
                            if "presel" in ls.SAMPLE:
                                plot_title = "Presel"
                            else:
                                plot_title = "NoSel"

                        if not SELECTLONGEST:
                            plot_title += ', all tracks'
                        if "noopfilter" in tag.lower():
                            plot_title += ', no opfilter cuts'

                        if VARIABLE not in samples['data'].keys(): samples['data'][VARIABLE] = -999

                        if "above105" in tag.lower():
                            XTIT += " (reco_nu_e_range > 1.05 GeV)"
                        elif "below105" in tag.lower():
                            XTIT += " (reco_nu_e_range <= 1.05 GeV)"
                            if VARIABLE == 'reco_nu_e_range_v':
                                BINS,  RANGE = 11, (-0.05, 1.05)

                        if "fullsel_samples" in tag.lower():
                            plot_sample = fullsel_samples
                        elif "fullsel_notopo_samples" in tag.lower():
                            plot_sample = fullsel_notopo_samples

                        if "nomcs" in tag.lower():
                            XTIT += " no MCS cut"
                        elif "invertmcs" in tag.lower():
                            XTIT += ' inverted MCS cut'

                        if "true2212" in tag.lower():
                            XTIT += ' (true leading proton) '

                        if 'crtgt100' in tag.lower():
                            XTIT += ' (crthitpe > 100)'
                        elif 'crtlt100' in tag.lower():
                            XTIT += ' (crthitpe < 100)'
                        elif 'invertcrt' in tag.lower():
                            XTIT += ' (crthitpe > 100 and crtveto == 0)'

                        if 'nocrt' in tag.lower():
                            plot_title += ', no CRT'
                        plot_title += ', FD{}R{}'.format(FAKEDATASET,RUN)

                        #get specific cuts based on what the tag is
                        #QUERY, track_cuts = moreFunctions.get_Cuts(tag, ISRUN3)
                        #######################################################
                        # plotting
                        my_plotter = moreFunctions.get_plotter(tag, plot_sample, pot, RUN, USECRT_temp, False)
                        fig, ax1, ax2 = my_plotter.plot_variable(
                            VARIABLE,   
                            query=QUERY,
                            kind=KIND, #sample, interaction, backtracked_pdg
                            track_cuts = track_cuts,
                            select_longest = SELECTLONGEST, #this is true by default in self._selection
                            title=XTIT,
                            #bins=asymm_bins,
                            bins=BINS,
                            stacksort=4, #0-numerical, 1-weights, 2-eLee on top, 3-eLee+nue on top, 4-numu on top
                            range=RANGE,
                            ratio=DRAWRATIO,
                            draw_sys=DRAWSYS,
                            #draw_data=DRAWDATA,
                        )[0:3]
                        print("Profile likelihood: {} sigma @ {} POT".format(my_plotter.significance_likelihood,pot))
                        print("s/sqrt(b): {} sigma @ {} POT".format(my_plotter.significance, pot))

                        #ax1.set_ylim(0,40)
                        #ax1.set_yscale("log")
                        #ax1.set_ylim(0,12000)
                        ax1.set_ylim(0,ax1.get_ylim()[1]*1.5)
                        #ax2.set_ylim(0.5,1.5)
                        ax1.set_title(plot_title, fontsize=15)

                        if SAVEFIG:
                            fn = VARIABLE+"_"+date_time+"_"+tag+'_'+KIND
                            fn += ".pdf"
                            print("saving to {}...".format(SAVEDIR+fn))
                            fig.tight_layout()
                            fig.savefig(SAVEDIR+fn)
                            if len(VARIABLES) > 1:
                                pdf.savefig(fig)
                        plt.show()

                    if SAVEFIG and len(VARIABLES) > 1:
                        pdf.close()