# Unified Plotting - version for nue Xsec measurement

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
pi0scaling = {"val": 0, "suffix": "", "title": "No pi0 Scaling"} #no scaling
#pi0scaling = {"val": 1, "suffix": "_pi0flat", "title": "0.759 pi0 Scaling"} #flat scaling
#pi0scaling = {"val": 2, "suffix": "_pi0e040", "title": r"$\pi^0$ scaling: [$1 - 0.40 \times E_\pi$]"} #energy-dependent

In [None]:
from matplotlib import pyplot as plt

params = {
    'axes.labelsize': 'x-large',
    'axes.titlesize': 'x-large',
    'xtick.labelsize': 'x-large',
    'ytick.labelsize': 'x-large'
}
plt.rcParams.update(params)

from load_data_run123 import *
from unblinding_far_sideband import *

In [None]:
import sys
import localSettings as ls
print(ls.main_path)
print(ls.ntuple_path)
main_path = ls.main_path
sys.path.append(main_path)

In [None]:
import scipy.stats

In [None]:
#which_sideband = ['np_far'       ,"farsideband"] # far-sideband 1eNp datasets
#which_sideband = ['0p_far'       ,"farsideband"] # far-sideband 1eNp datasets
#which_sideband = ['numu',"numu"] # numu NTuples [v41]
#which_sideband = ['2plus_showers',"farsideband"] # far-sideband 2+ shower datasets
#which_sideband = ['np_sb_comb',"farsideband"] # combined far-sideband 1eNp and 2+ shower datasets
#which_sideband = ['opendata'     ,'opendata']    # open datasets (5e19 / 1e19)
#which_sideband = ['opendata'     ,'fakeset1']    # set1 fake dataset
#which_sideband = ['opendata'     ,'fakeset9']    # set9=NuWro fake dataset
which_sideband = ['fulldata'     ,'fulldata']    # full data

In [None]:
samples = load_data_run123(which_sideband=which_sideband[0],
                           return_plotter=False,
                           pi0scaling=pi0scaling["val"],
                           loadpi0variables=True,
                           loadtruthfilters=True,
                           loadshowervariables=True,
                           loadnumuntuples=False,
                           loadnumuvariables=False,
                           loadfakedata=0,#9,
                           USEBDT=True,
                           loadsystematics=True,
                           loadrecoveryvars=True,
                           updatedProtThresh=0.05)


In [None]:
import plotter
import unblinding_far_sideband
import importlib
importlib.reload(plotter)
importlib.reload(unblinding_far_sideband)

In [None]:
plotter_runbyrun = {}
for run in [1, 2, 3, 123, 12]:
    this_weights, this_pot = get_weights(run,dataset=which_sideband[1])
    plotter_runbyrun[run] = plotter.Plotter(samples, this_weights, pot=this_pot)
    if (run==123): print(this_weights)

# Xsec analysis

In [None]:
from matplotlib.lines import Line2D

def Pur(my_plotter,VARIABLE,QUERY,ACCEPTANCE,BINS,RANGE):
    fig, ax1, stack, labls, exty = my_plotter.plot_variable(
        VARIABLE,   
        query=QUERY,
        kind="event_category",
        draw_sys=False,
        stacksort=3,
        title="my_xaxis",
        bins=BINS,
        range=RANGE,
        detsysdict={'mc':False,'nue':False,'ccpi0':False,'ncpi0':False,'dirt':False,'ncnopi':False,'nccpi':False,'ncpi0':False,'ccpi0':False},
        DETSYSPATH=DETSYSPATH,
        COVMATRIX="",#'cov_matrix_Enu_reco_1eNp.txt',
        ratio=False,
        chisq=False,
    )
    plt.close(fig)
    den = stack[0][-1]+exty
    print(den)
    fig, ax1, stack, labls, exty = my_plotter.plot_variable(
        VARIABLE,   
        query=QUERY+' and '+ACCEPTANCE,
        kind="event_category",
        draw_sys=False,
        stacksort=3,
        title="my_xaxis",
        bins=BINS,
        range=RANGE,
        detsysdict={'mc':False,'nue':False,'ccpi0':False,'ncpi0':False,'dirt':False,'ncnopi':False,'nccpi':False,'ncpi0':False,'ccpi0':False},
        DETSYSPATH=DETSYSPATH,
        COVMATRIX="",#'cov_matrix_Enu_reco_1eNp.txt',
        ratio=False,
        chisq=False,
    )
    plt.close(fig)
    num = stack[0][-1]
    return num/den

def Eff(df,var,query,acceptance,bin_edges,absval=False):
    #print acceptance
    bin_centers = 0.5*(bin_edges[1:]+bin_edges[:-1])
    bins = []
    bin_eff = []
    bin_err = []
    for i in range(len(bin_centers)):
        binmin = bin_edges[i]
        binmax = bin_edges[i+1]
        bincut = '%s > %f and %s < %f'%(var,binmin,var,binmax)
        if (absval == True):
            bincut = '(%s > %f and %s < %f) or (%s > -%f and %s < -%f)'%(var,binmin,var,binmax,var,binmax,var,binmin)
        if (acceptance != ''): bincut += ' and %s'%acceptance
        #print bincut
        df_tmp =  df.query(bincut) # cut on bin range for desired var.
        df_sub = df_tmp.query(query) # apply constrain 
        if (df_tmp.shape[0] == 0): continue
        eff = df_sub.shape[0] / float( df_tmp.shape[0] )
        err = np.sqrt( eff*(1-eff)/df_tmp.shape[0] )
        bin_eff.append( eff )
        bin_err.append( err )
        bins.append(bin_centers[i])
        #print 'eff = %.02f @ bin = %.02f'%(eff,bin_centers[i])
    return np.array(bins),np.array(bin_eff),np.array(bin_err)

from scipy.optimize import curve_fit
def gauss(x, *p):
    A, mu, sigma = p
    return A*np.exp(-(x-mu)**2/(2.*sigma**2))

## Define acceptance and variable

In [None]:
from unblinding_far_sideband import *

ACCEPTANCE = 'isVtxInFiducial == 1 and ccnc==0 and (nu_pdg==12 or nu_pdg==-12) and npi0==0 and npion==0 and elec_e>0.03051'

savefigs = False

run = 123
sdb_key = 'Blind'

#pre_key = 'NP'
#sel_key = 'NPBDT'
#sel_key = 'NPXSBDT'
#VARR, VART, labl, bin_edges = 'reco_e', 'nu_e', 'Neutrino Energy [GeV]',np.linspace(0.01,2.53,10)
#VARR, VART, labl, bin_edges = 'reco_e', 'nu_e', 'Neutrino Energy [GeV]',np.linspace(0.15,1.55,11)
#ACCEPTANCE += ' and proton_ke>0.05'

#pre_key = 'NP'
#sel_key = 'NPXSBDT'
#VARR, VART, labl, bin_edges = 'cos_shr_theta', 'elec_pz', 'Electron CosTheta',np.array([-1.,0.2,0.7,0.9,1])
#ACCEPTANCE += ' and proton_ke>0.05'

#pre_key = 'NP'
#sel_key = 'NPXSBDT'
#VARR, VART, labl, bin_edges = 'cos_trk_theta', 'proton_pz', 'Proton CosTheta',np.array([-1,0,0.4,0.7,1])
#ACCEPTANCE += ' and proton_ke>0.05'

pre_key = 'NUE'
sel_key = 'XPXSBDT'
VARR, VART, labl, bin_edges = 'protonenergy_corr','proton_ke', 'Leading Proton Energy [GeV]',np.array([0.,0.05,0.1,0.2,0.3,0.8])
ACCEPTANCE += ' and (proton_ke>0.05 or (elec_e>0.5 and elec_pz>0.6))'

sname = 'nue'
nue = samples[sname]
potw = plotter_runbyrun[run].weights[sname]

bincenters = 0.5*(bin_edges[1:]+bin_edges[:-1])

print("ACCEPTANCE=",ACCEPTANCE)
print("VARIABLE=",VARR)

In [None]:
sideband = sideband_categories[sdb_key]
preselection = preselection_categories[pre_key]
sel =  selection_categories[sel_key]

DETSYSPATH = ls.ntuple_path+'/detsys/{}_{}_{}/'.format(sideband['dir'], preselection['dir'], sel['dir'])
print(DETSYSPATH)

QUERY = ""
for query in [sideband['query'],preselection['query'],sel['query']]:
    if query is None: continue
    if QUERY: QUERY += ' and '
    QUERY += query
    if run in [1, 2, 3, 12]:
        QUERY += f' and run{run}==True'

print(QUERY)

#formatting can be improved..
TITLE = ""
for text in [f'Run {run}',' - ',sideband['title'],' - ',preselection['title'],'\n',sel['title'],' - ',pi0scaling["title"]]:
    if text is None: continue
    TITLE += text
    
save_path = ls.plots_path

## Compute systematic uncertainties

In [None]:
#let's make a plot in reco space, so that the plotter is internally updated

DETSYS = {'mc':True,'nue':True,'ccpi0':True,'ncpi0':True,'dirt':True,\
          'ncnopi':True,'nccpi':True,'ncpi0':True,'ccpi0':True}
plotter_runbyrun[run].plot_variable(
    VARR,   
    kind="event_category",
    query=QUERY+" and paper_category!=111",
    draw_sys=True,
    detsysdict=DETSYS,
    DETSYSPATH=DETSYSPATH,
    COVMATRIX="",
    ratio=False,
    stacksort=3,
    title=labl,
    bins=bin_edges,
    range=None,
    chisq=True,
)
pred = plotter_runbyrun[run].prediction
print('prediction:',pred)

In [None]:
gcov = plotter_runbyrun[run].sys_err_with_resp_func("weightsGenie", sname, VARR, VART, QUERY, ACCEPTANCE, None, bin_edges, "weightSplineTimesTune")
print('genie\n',gcov)
ucov = plotter_runbyrun[run].sys_err_unisim_with_resp_func(sname, VARR, VART, QUERY, ACCEPTANCE, None, bin_edges)
print('unsim\n',ucov)
fcov = plotter_runbyrun[run].sys_err("weightsFlux", VARR, QUERY, None, bin_edges, "weightSplineTimesTune")
print('flux\n',fcov)
rcov = plotter_runbyrun[run].sys_err("weightsReint", VARR, QUERY, None, bin_edges, "weightSplineTimesTune")
print('reint\n',rcov)

In [None]:
scov = plotter_runbyrun[run].cov_mc_stat
print('mcstat\n',scov)
dcov = plotter_runbyrun[run].cov_mc_detsys
print('detector\n',dcov)

In [None]:
print('pred=',pred)

totcov = gcov+ucov+fcov+rcov+scov+dcov
print('tot cov\n',totcov)

fig = plt.figure(figsize=(6,6))
plt.imshow(totcov,origin='lower')
for i in range(len(totcov[0])):
    for j in range(len(totcov[0])):
        text = plt.text(j, i, "%.2f"%totcov[i, j],ha="center", va="center", color="w")
plt.title('Covariance Matrix\n'+labl)
plt.ylabel(labl)
plt.xlabel(labl)
plt.xticks(ticks=np.linspace(-0.5,len(bin_edges)-1.5,len(bin_edges)),labels=bin_edges)
plt.yticks(ticks=np.linspace(-0.5,len(bin_edges)-1.5,len(bin_edges)),labels=bin_edges)
plt.colorbar()
plt.tight_layout()
fig.show()
if savefigs: fig.savefig(save_path + '/xsec-nue-bnb/'+VARR+'_cov.pdf', dpi=250)

print(pred*pred)
fig = plt.figure(figsize=(6,6))
totfcov = totcov/(pred*pred)
plt.imshow(totfcov,origin='lower')
for i in range(len(totfcov[0])):
    for j in range(len(totfcov[0])):
        text = plt.text(j, i, "%.3f"%totfcov[i, j],ha="center", va="center", color="w")
plt.title('Fractional Covariance Matrix\n'+labl)
plt.ylabel(labl)
plt.xlabel(labl)
plt.xticks(ticks=np.linspace(-0.5,len(bin_edges)-1.5,len(bin_edges)),labels=bin_edges)
plt.yticks(ticks=np.linspace(-0.5,len(bin_edges)-1.5,len(bin_edges)),labels=bin_edges)
plt.colorbar()
plt.tight_layout()
fig.show()
if savefigs: fig.savefig(save_path + '/xsec-nue-bnb/'+VARR+'_frac_cov.pdf', dpi=250)

tdiag = np.sqrt(np.diag(totcov)) 
print('diag err=',tdiag)
tfdiag = tdiag/pred
print('frac diag err=',tfdiag)


In [None]:
gfdiag = np.sqrt(np.diag(gcov))/pred
ffdiag = np.sqrt(np.diag(fcov))/pred
rfdiag = np.sqrt(np.diag(rcov))/pred
sfdiag = np.sqrt(np.diag(scov))/pred
dfdiag = np.sqrt(np.diag(dcov))/pred

#xfdiag = np.sqrt(np.diag(dcov+scov+rcov))/pred

print('GENIE',gfdiag)
print('Flux',ffdiag)
print('Reint',rfdiag)
print('MCStat',sfdiag)
print('DetSyst',dfdiag)

fig = plt.figure(figsize=(6,6))
ax = fig.add_subplot(111)
ax.hist(bincenters,bins=bin_edges,weights=gfdiag,histtype='step',label='GENIE')
ax.hist(bincenters,bins=bin_edges,weights=ffdiag,histtype='step',label='Flux')
ax.hist(bincenters,bins=bin_edges,weights=rfdiag,histtype='step',label='Reint')
ax.hist(bincenters,bins=bin_edges,weights=sfdiag,histtype='step',label='MCStat')
ax.hist(bincenters,bins=bin_edges,weights=dfdiag,histtype='step',label='DetSyst')
#ax.hist(bincenters,bins=bin_edges,weights=xfdiag,histtype='step',label='DetSyst+MCStat+Reint')
ax.hist(bincenters,bins=bin_edges,weights=tfdiag,histtype='step',label='Total',color='black')
ax.hist(bincenters,bins=bin_edges,weights=np.sqrt(pred)/pred,histtype='step',label='Data Stat, exp.',color='black',ls='--')
handles, labels = ax.get_legend_handles_labels()
new_handles = [Line2D([], [], c=h.get_edgecolor()) for h in handles]
plt.legend(handles=new_handles, labels=labels,loc=2,ncol=3,frameon=False)
#plt.legend(loc=2,ncol=3,frameon=False)
plt.xlabel(labl)
plt.ylabel('Fractional Uncertainty')
plt.ylim(0,0.4)
plt.tight_layout()
fig.show()
if savefigs: fig.savefig(save_path + '/xsec-nue-bnb/'+VARR+'_frac_unc.pdf', dpi=250)


## Compute smearing and response matrices

In [None]:
x = nue.query(ACCEPTANCE+' and '+QUERY)[VARR]
y = nue.query(ACCEPTANCE+' and '+QUERY)[VART]
w = nue.query(ACCEPTANCE+' and '+QUERY)['weightSplineTimesTune']*potw
h2d = np.histogram2d(x,y,weights=w,bins=[bin_edges,bin_edges])

fig = plt.figure(figsize=(6, 6))
sm = h2d[0]
#print(sm)
sm = sm / sm.sum(axis=0)
#print(sm)
#print(sm.sum(axis=0))
#print(sm.sum(axis=1))
plt.imshow(sm,origin='lower')
for i in range(len(sm[0])):
    for j in range(len(sm[0])):
        text = plt.text(j, i, "%.2f"%sm[i, j],ha="center", va="center", color="w")
plt.title('Smearing Matrix\n'+labl)
plt.ylabel("reco "+labl)
plt.xlabel("true "+labl)
plt.xticks(ticks=np.linspace(-0.5,len(bin_edges)-1.5,len(bin_edges)),labels=bin_edges)
plt.yticks(ticks=np.linspace(-0.5,len(bin_edges)-1.5,len(bin_edges)),labels=bin_edges)
plt.colorbar()
plt.tight_layout()
fig.show()
if savefigs: fig.savefig(save_path + '/xsec-nue-bnb/'+VARR+'_smear_mtrx.pdf', dpi=250)


In [None]:
## response matrix

#todo: update acceptance

rm, xb, yb = plotter_runbyrun[run].ResponseMatrix(nue,ACCEPTANCE,QUERY,\
                                                  VART,VARR,bin_edges,'weightSplineTimesTune',potw)
print('rm\n',rm)
fig = plt.figure(figsize=(6, 6))
plt.imshow(np.transpose(rm),origin='lower')
#print(np.shape(rm)[0])
for i in range(np.shape(rm)[0]):
    for j in range(np.shape(rm)[1]):
        text = plt.text(i, j, "%.3f"%rm[i, j],ha="center", va="center", color="w")
plt.title('Response Matrix\n'+labl)
plt.xlabel('true '+labl)
plt.ylabel('reco '+labl)
plt.xticks(ticks=np.linspace(-0.5,len(xb)-1.5,len(xb)),labels=xb)
plt.yticks(ticks=np.linspace(-0.5,len(yb)-1.5,len(yb)),labels=yb)
plt.colorbar()
plt.tight_layout()
fig.show()
if savefigs: fig.savefig(save_path + '/xsec-nue-bnb/'+VARR+'_resp_mtrx.pdf', dpi=250)


In [None]:
#closure test for response matrix
tg = nue.query(ACCEPTANCE)[VART]
w1 = nue.query(ACCEPTANCE)['weightSplineTimesTune']*potw
tv = np.histogram(tg,weights=w1,bins=bin_edges)

print("truth-level signal, before selection:",tv[0])
print("response matrix:\n",rm)

rp = rm.dot(tv[0])

print("reco signal from response matrix: ",rp)

ro = nue.query(ACCEPTANCE+' and '+QUERY)[VARR]
w2 = nue.query(ACCEPTANCE+' and '+QUERY)['weightSplineTimesTune']*potw
tr = np.histogram(ro,weights=w2,bins=bin_edges)
print("reco signal straight from ntuples:",tr[0])

## Test fake data

In [None]:
genieuntuned = False
if which_sideband[1] == 'fakeset9':

    #replace QUERY to allow for data
    FDQUERY = QUERY.replace("bnbdata == 0","bnbdata >= 0")
    FDQUERY += ' and extdata==0'#remove for real data!

    FDTITLE = TITLE.replace("Blind","NuWro Fake Data")
    genieweight = "weightSplineTimesTune"
    if genieuntuned: genieweight = "weightSpline"

    fig, ax1, ax2 = plotter_runbyrun[run].plot_variable(
        VARR,   
        kind="event_category",
        query=FDQUERY+" and paper_category!=111",
        genieweight=genieweight,
        draw_sys=True,
        detsysdict=DETSYS,
        DETSYSPATH=DETSYSPATH,
        COVMATRIX="",
        ratio=False,
        stacksort=3,
        title=labl,
        bins=bin_edges,
        range=None,
        chisq=True,
    )[0:3]
    #
    # Some cosmetics by hand to set the position of the chi2
    for a in ax1.get_children(): 
        if len(a.findobj(match=plt.Text))==0: continue
        txt = a.findobj(match=plt.Text)[0]
        if 'n.d.f.' not in txt.get_text(): continue
        #print(txt.get_text())
        if VARR == 'cos_trk_theta' or VARR == 'cos_shr_theta':
            txt.set_x(0.20)
        if VARR == 'protonenergy_corr':
            txt.set_x(0.80)
        txt.set_y(0.38)
    #
    ax1.set_title(FDTITLE, loc='left')
    ax1.set_ylim(0.0, ax1.get_ylim()[1]*1.5)
    plt.tight_layout()
    fig.show()
    if True:#savefigs:
        if genieuntuned:
            fig.savefig(save_path + '/xsec-nue-bnb/'+VARR+'_NuWroFDS_untuned.pdf', dpi=250)
        else:
            fig.savefig(save_path + '/xsec-nue-bnb/'+VARR+'_NuWroFDS.pdf', dpi=250)

    #print(plotter_runbyrun[run].data)
    pos = -3
    if VARR == 'protonenergy_corr': pos = -4
    #print(ax2[0][pos])
    print(VARR)
    print('Background subtracted data:\n',plotter_runbyrun[run].data-ax2[0][pos])
    print('tot covariance\n',totcov)
    print("response matrix\n",rm)
    print("truth-level signal, before selection\n",tv[0])

## Efficiency, Purity, and Resolution

In [None]:
PRESQ = preselection_categories['NUE']['query']
NPPRESQ = preselection_categories['NP']['query']
NPLCUTQ =  NPPRESQ+' and '+selection_categories['NPL']['query']
NPTCUTQ =  NPPRESQ+' and '+selection_categories['NPT']['query']
NPBDTCQ =  NPLCUTQ+' and '+selection_categories['NPBDT']['query']
NPBDTLQ =  NPLCUTQ+' and pi0_score > 0.50 and nonpi0_score > 0.50'

In [None]:
lables = ['1e$N$p0$\pi$ Loose','BDT relaxed','BDT nominal','xsec sel.']
selecs = [NPLCUTQ,NPBDTLQ,NPBDTCQ,QUERY]

print(bin_edges)

bin_centers = 0.5*(bin_edges[1:]+bin_edges[:-1])

dist = nue.query(QUERY)[VARR]
n, bins = np.histogram(dist,bins=bin_edges)
n = n/len(dist)

fig, ax = plt.subplots(figsize=(8,6))
for i,q in enumerate(selecs):
    fmt = 'o-'
    vals = Pur(plotter_runbyrun[run],VARR,q,ACCEPTANCE,bin_edges,None)
    ax.errorbar(bin_centers,vals,xerr=(bin_edges[:-1]-bin_edges[1:])/2,fmt=fmt,label=lables[i])
ax.set_xlabel('Reconstructed '+labl)
ax.set_ylabel(r'Signal Purity')
ax.set_xticks(bin_edges)
ax.set_ylim(0.,1.2)
ax.set_xlim(bin_edges[0],bin_edges[-1])
ax.hist(bins[:-1], bins, weights=n,color='lightgray',label='signal after sel.')
ax.legend(ncol=2, prop={'size': 15})
ax.grid(True)
plt.tight_layout()
fig.show()
if savefigs: fig.savefig(save_path + '/xsec-nue-bnb/'+VARR+'_purity.pdf', dpi=250)

dist = nue[VART]  #true distribution, no selection
n, bins = np.histogram(dist,bins=bin_edges)
n = n/len(dist)

fig, ax = plt.subplots(figsize=(8,6))
for i,q in enumerate(selecs):
    fmt = 'o-'
    centers,vals,errs = Eff(nue,VART,q,ACCEPTANCE,bin_edges)
    ax.errorbar(centers,vals,yerr=errs,xerr=(bin_edges[:-1]-bin_edges[1:])/2,fmt=fmt,label=lables[i])
ax.set_xlabel('True '+labl)
ax.set_ylabel(r'Signal Efficiency')
ax.set_xticks(bin_edges)
ax.set_ylim(0.,0.5)
ax.set_xlim(bin_edges[0],bin_edges[-1])
ax.hist(bins[:-1], bins, weights=n,color='lightgray',label='signal no sel.')
ax.legend(ncol=2, prop={'size': 15})
ax.grid(True)
plt.tight_layout()
fig.show()
if savefigs: fig.savefig(save_path + '/xsec-nue-bnb/'+VARR+'_efficiency.pdf', dpi=250)


In [None]:
sel = ACCEPTANCE+' and '+QUERY

fig = plt.figure(figsize=(6, 6))
var = nue.query(sel)[VARR]-nue.query(sel)[VART]
n, b, p = plt.hist(var, bins=40, range=(-0.1,0.1),#weights=wgh,
                   histtype='step',linestyle='solid',color=('black'), linewidth=1.2,
                   stacked=False, label='signal')
plt.ylim(0., plt.gca().get_ylim()[1]*1.2)
#gaussian fit
# p0 is the initial guess for the fitting coefficients (A, mu and sigma above)
bin_centres = (b[:-1] + b[1:])/2
coeff, var_matrix = curve_fit(gauss, bin_centres, n, p0=(3000,0,0.01))
print(coeff)
errs = np.sqrt(np.diag(var_matrix))
curvebins = np.linspace(b[0],b[-1],200)
# Get the fitted curve
hist_fit = gauss(curvebins, *coeff)
plt.text(plt.gca().get_xlim()[-1]*0.1,plt.gca().get_ylim()[-1]*0.95,"mean=%.3f+/-%.3f"%(coeff[1],errs[1]))
plt.text(plt.gca().get_xlim()[-1]*0.1,plt.gca().get_ylim()[-1]*0.90,"sigma=%.3f+/-%.3f"%(coeff[2],errs[2]))
plt.plot(curvebins, hist_fit, label='Gaussian Fit')
plt.xlabel('Reco - True '+labl)
plt.legend(loc=2)
plt.tight_layout()
fig.show()
if savefigs: fig.savefig(save_path + '/xsec-nue-bnb/'+VARR+'_resolution.pdf', dpi=250)

fig = plt.figure(figsize=(6, 6))
var = (nue.query(sel)[VARR]-nue.query(sel)[VART])/nue.query(sel)[VART]
n, b, p = plt.hist(var, bins=40, range=(-0.25,0.25),
                   histtype='step',linestyle='solid',color=('black'), linewidth=1.2,
                   stacked=False, label='signal')
plt.ylim(0., plt.gca().get_ylim()[1]*1.2)
#gaussian fit
# p0 is the initial guess for the fitting coefficients (A, mu and sigma above)
bin_centres = (b[:-1] + b[1:])/2
coeff, var_matrix = curve_fit(gauss, bin_centres, n, p0=(3000,0,0.01))
print(coeff)
errs = np.sqrt(np.diag(var_matrix))
curvebins = np.linspace(b[0],b[-1],200)
# Get the fitted curve
hist_fit = gauss(curvebins, *coeff)
plt.text(plt.gca().get_xlim()[-1]*0.1,plt.gca().get_ylim()[-1]*0.95,"mean=%.3f+/-%.3f"%(coeff[1],errs[1]))
plt.text(plt.gca().get_xlim()[-1]*0.1,plt.gca().get_ylim()[-1]*0.90,"sigma=%.3f+/-%.3f"%(coeff[2],errs[2]))
plt.plot(curvebins, hist_fit, label='Gaussian Fit')
plt.xlabel('(Reco - True)/True '+labl)
plt.legend(loc=2)
plt.tight_layout()
fig.show()
if savefigs: fig.savefig(save_path + '/xsec-nue-bnb/'+VARR+'_fracresol.pdf', dpi=250)

## Study proton reconstruction and energy correction

In [None]:
sel = ACCEPTANCE+' and '+NPXSBDTQ
# need to make sure cut on proton energy not included in query!
sel = sel.replace(" and protonenergy_corr > 0.05","")

goodreco = 'trk_pdg==2212 and trk_pur>0.5 and trk_cmp>0.5'
selgr = sel+' and '+goodreco

In [None]:
fig = plt.figure(figsize=(6, 6))
pur = nue.query(sel+' and trk_pdg==2212')['trk_pur']
cmp = nue.query(sel+' and trk_pdg==2212')['trk_cmp']
plt.hist([pur,cmp],bins=10,range=(0,1),histtype='step',label=['purity','completeness'])
plt.legend(loc=2)
plt.title('true protons after selection')
plt.tight_layout()
fig.show()
fig.savefig(save_path + '/xsec-nue-bnb/proton_compur_all.pdf', dpi=250)

fig = plt.figure(figsize=(6, 6))
pur = nue.query(sel+' and trk_pdg==2212 and proton_ke<0.1')['trk_pur']
cmp = nue.query(sel+' and trk_pdg==2212 and proton_ke<0.1')['trk_cmp']
plt.hist([pur,cmp],bins=10,range=(0,1),histtype='step',label=['purity','completeness'])
plt.legend(loc=2)
plt.title('true protons (KE<0.1 GeV) after selection')
plt.tight_layout()
fig.show()
if savefigs: fig.savefig(save_path + '/xsec-nue-bnb/proton_compur_lowE.pdf', dpi=250)


In [None]:
recov = 'protonenergy'
truev = 'proton_ke'

rval = nue.query(selgr)[recov]
tval = nue.query(selgr)[truev]

fig = plt.figure(figsize=(7, 6))
plt.hist2d(rval,tval, bins=20, range=[(0,0.5),(0,0.5)])
plt.xlabel('reco KE')
plt.ylabel('true KE')
plt.title('true signal well reco protons')
plt.colorbar()
plt.tight_layout()
fig.show()
if savefigs: fig.savefig(save_path + '/xsec-nue-bnb/proton_true-vs-reco_KE.pdf', dpi=250)

fig = plt.figure(figsize=(7, 6))
h = plt.hist2d(rval,(rval-tval)/tval, bins=20, range=[(0,0.5),(-0.5,0.1)])
plt.xlabel('reco KE')
plt.ylabel('(reco-true)/true KE')
plt.title('true signal well reco protons')
plt.colorbar()
plt.tight_layout()
fig.show()
if savefigs: fig.savefig(save_path + '/xsec-nue-bnb/proton_fracres-vs-reco_KE.pdf', dpi=250)

fig = plt.figure(figsize=(7, 6))
h = plt.hist2d(rval,rval-tval, bins=20, range=[(0,0.5),(-0.05,0.01)])
plt.xlabel('reco KE')
plt.ylabel('reco-true KE')
plt.title('true signal well reco protons')
plt.colorbar()
plt.tight_layout()
fig.show()
if savefigs: fig.savefig(save_path + '/xsec-nue-bnb/proton_res-vs-reco_KE.pdf', dpi=250)


In [None]:
def func(x, *p):
    p1, p2 = p
    return p1/x + p2

xval = []
yval = []
rng = np.linspace(0.01,0.20,20)
for i in range(0,len(rng)-1):
    low = rng[i]
    high = rng[i+1]
    mid = low+(high-low)/2
    extracut = (' and %s>%f and %s<%f'%(recov,low,recov,high))
    mysel = selgr+extracut
    ba = nue.query(mysel)[truev]-nue.query(mysel)[recov]
    xval.append(mid)
    yval.append(np.median(ba))
    #print('%.3f'%mid,np.median(ba))

coeff, var_matrix = curve_fit(func, xval, yval, p0=(0.0005,-0.000))
curvebins = np.linspace(rng[0],rng[-1],200)
hist_fit = func(curvebins, *coeff)

fig = plt.figure(figsize=(6, 6))
plt.errorbar(x=xval,y=yval,fmt='r*',label='nue MC')
plt.plot(curvebins, hist_fit, label='Fitted data\n %.5f + %.5f/x'%(coeff[1],coeff[0]))
plt.xlim(rng[0],rng[-1])
plt.ylim(0,0.1)
plt.grid()
plt.xlabel('reco KE [GeV]')
plt.ylabel('true - reco KE [GeV]')
plt.legend()
plt.tight_layout()
fig.show()
if savefigs: fig.savefig(save_path + '/xsec-nue-bnb/proton_KE_correction_fit.pdf', dpi=250)

print('p1=%.6f, p2=%.6f'%(coeff[0],coeff[1]))

In [None]:
recov = 'protonenergy'
truev = 'proton_ke'

p1 = coeff[0] # 0.000620
p2 = coeff[1] #-0.001792

# all energy plots
varR = nue.query(selgr)[recov]-nue.query(selgr)[truev]
corrvar = nue.query(selgr)[recov] + p1/nue.query(selgr)[recov] + p2
varC = corrvar-nue.query(selgr)[truev]

fig = plt.figure(figsize=(6, 6))
plt.hist([varR,varC], bins=41, range=(-0.05,0.05),histtype='step',linestyle='solid',color=['black','red'], 
         linewidth=1.2,stacked=False, label=['reco','corr'])
plt.title('true protons after selection')
plt.xlabel('measured - true energy [GeV]')
plt.legend()
plt.grid()
plt.tight_layout()
fig.show()
if savefigs: fig.savefig(save_path + '/xsec-nue-bnb/proton_KE_delta_all.pdf', dpi=250)

fig = plt.figure(figsize=(6, 6))
plt.hist([varR/nue.query(selgr)[truev],varC/nue.query(selgr)[truev]], bins=41, range=(-0.5,0.5),histtype='step',linestyle='solid',color=['black','red'], 
         linewidth=1.2,stacked=False, label=['reco','corr'])
plt.title('true protons after selection')
plt.xlabel('(measured - true)/true energy')
plt.legend()
plt.grid()
plt.tight_layout()
fig.show()
if savefigs: fig.savefig(save_path + '/xsec-nue-bnb/proton_KE_fracdelta_all.pdf', dpi=250)

# low energy plots
lowecut = ' and protonenergy<0.1'
varR = nue.query(selgr+lowecut)[recov]-nue.query(selgr+lowecut)[truev]
corrvar = nue.query(selgr+lowecut)[recov] + p1/nue.query(selgr+lowecut)[recov] + p2
varC = corrvar-nue.query(selgr+lowecut)[truev]

fig = plt.figure(figsize=(6, 6))
plt.hist([varR,varC], bins=41, range=(-0.05,0.05),histtype='step',linestyle='solid',color=['black','red'], 
         linewidth=1.2,stacked=False, label=['reco','corr'])
plt.title('true protons (KE<0.1 GeV) after selection')
plt.xlabel('measured - true energy [GeV]')
plt.legend()
plt.grid()
plt.tight_layout()
fig.show()
if savefigs: fig.savefig(save_path + '/xsec-nue-bnb/proton_KE_delta_lowE.pdf', dpi=250)

fig = plt.figure(figsize=(6, 6))
plt.hist([varR/nue.query(selgr+lowecut)[truev],varC/nue.query(selgr+lowecut)[truev]], bins=41, range=(-1,1),histtype='step',linestyle='solid',color=['black','red'], 
         linewidth=1.2,stacked=False, label=['reco','corr'])
plt.title('true protons (KE<0.1 GeV) after selection')
plt.xlabel('(measured - true)/true energy')
plt.legend()
plt.grid()
plt.tight_layout()
fig.show()
if savefigs: fig.savefig(save_path + '/xsec-nue-bnb/proton_KE_fracdelta_lowE.pdf', dpi=250)


## Miscellanea

In [None]:
#def ResponseMatrix(sample,acceptance,fullsel,vart,varr,bin_edges,wlab,potw,univ=-1,wvar=''):
#    truevals = sample.query(acceptance)[[vart]].values.flatten()
#    tweights = sample.query(acceptance)[[wlab]].values.flatten()*potw
#    if univ>=0:
#        vweights = sample.query(acceptance)[[wvar]].values.flatten()
#        tweights = tweights*np.stack(vweights)[:,univ]/1000.
#    n, bins = np.histogram(truevals,weights=tweights,bins=bin_edges)
#    x = sample.query(acceptance+' and '+fullsel)[[vart]].values.flatten()
#    y = sample.query(acceptance+' and '+fullsel)[[varr]].values.flatten()
#    w = sample.query(acceptance+' and '+fullsel)[[wlab]].values.flatten()*potw
#    if univ>=0:
#        vw = sample.query(acceptance+' and '+fullsel)[[wvar]].values.flatten()
#        w = w*np.stack(vw)[:,univ]/1000.
#    H, xb, yb = np.histogram2d(x,y,weights=w,bins=[bin_edges,bin_edges])
#    rm = np.transpose(H)/n
#    return rm, xb, yb

In [None]:
#compare genie model tuned and untuned

print(VARR, bin_edges, labl)

fig1, ax1, s1 = plotter_runbyrun[run].plot_variable(
    VARR,   
    kind="event_category",
    query=QUERY+" and category==11",
    genieweight="weightSplineTimesTune",
    draw_sys=False,#True,
    detsysdict=DETSYS,
    DETSYSPATH=DETSYSPATH,
    COVMATRIX="",#
    ratio=False,
    stacksort=3,
    title=labl,
    bins=bin_edges,
    range=None,
    chisq=False,
)[0:3]

fig2, ax2, s2 = plotter_runbyrun[run].plot_variable(
    VARR,   
    kind="event_category",
    query=QUERY+" and category==11",
    genieweight="weightSpline",
    draw_sys=False,#True,
    detsysdict=DETSYS,
    DETSYSPATH=DETSYSPATH,
    COVMATRIX="",#
    ratio=False,
    stacksort=3,
    title=labl,
    bins=bin_edges,
    range=None,
    chisq=False,
)[0:3]
plt.show()

print(s1[1])
print(s1[0][0])
print(s2[0][0])

bc = 0.5*(s1[1][1:]+s1[1][:-1])
plt.hist(bc, bins=s1[1], weights=s1[0][0],histtype='step',color='blue',label='weightSplineTimesTune')
plt.errorbar(bc, s1[0][0], np.sqrt(s1[0][0]),color='blue',ls='none')
plt.hist(bc, bins=s1[1], weights=s2[0][0],histtype='step',color='red',label='weightSpline')
plt.errorbar(bc, s2[0][0], np.sqrt(s2[0][0]),color='red',ls='none')
plt.legend(loc=2)
plt.title('exepected 6.86e20 POT, stat only error')
plt.ylabel('events')
plt.xlabel(labl)
plt.show()

plt.hist(bc, bins=s1[1], weights=(s1[0][0]/s2[0][0]),histtype='step',color='black',label='ratio')
plt.ylabel('ratio')
plt.xlabel(labl)

In [None]:
print(ACCEPTANCE)
#nue.query(ACCEPTANCE+' and category==11 and elec_pz<-2')[['nu_pdg','nu_e','elec_e','elec_pz']]
nue.query(ACCEPTANCE+' and category!=11 and category!=0')[['run','category','slnunhits','slnhits','ccnc','nu_pdg','nu_e','elec_e','elec_pz','nproton','nslice','mc_pdg']]


In [None]:
np.amax(nue.query("nslice==1")[["protonenergy"]])

In [None]:
cv = tr[0]
cov = np.empty([len(cv), len(cv)])
cov.fill(0)
Nuniverse = 10#0
print("true value: ",tv[0])
print("central value: ",cv)
for univ in range(0,Nuniverse):
    rmv, xb, yb = plotter_runbyrun[run].ResponseMatrix(nue,ACCEPTANCE+' and category==11',NPXSBDTQ+' and category==11',\
                                                       VART,VARR,bin_edges,'weightSpline',potw,univ,'weightsGenie')
    #print(rmv)
    rp = rmv.dot(tv[0])
    #print("variation: ",rp)
    for i in range(0,len(cv)):
        for j in range(0,len(cv)):
            cov[i][j] += (rp[i] - cv[i]) * (rp[j] - cv[j])
cov /= Nuniverse
print('cov',cov)

fcov = cov
for i in range(0,len(cv)):
    for j in range(0,len(cv)):
        fcov[i][j] = fcov[i][j]/(cv[i] * cv[j])
#fcov = np.sqrt(fcov)
print('fcov',fcov)

plt.figure(figsize=(6, 6))
plt.imshow(fcov,origin='lower')
for i in range(np.shape(fcov)[0]):
    for j in range(np.shape(fcov)[1]):
        text = plt.text(i, j, "%.3f"%fcov[i, j],ha="center", va="center", color="w")
plt.title('Fractional Covariance Matrix\nGenieAll on Response Function')
plt.xlabel('reco '+labl)
plt.ylabel('reco '+labl)
plt.xticks(ticks=np.linspace(-0.5,len(xb)-1.5,len(xb)),labels=xb)
plt.yticks(ticks=np.linspace(-0.5,len(yb)-1.5,len(yb)),labels=yb)
plt.colorbar()
plt.show()


In [None]:
from matplotlib.colors import LogNorm

fig = plt.figure(figsize=(6, 6))
x = samples['nue'].query(NPPRESQ)['reco_e']
y = samples['nue'].query(NPPRESQ)['protonenergy']
plt.hist2d(x,y, bins=20, range=[[0,2],[0,1]])
plt.xlabel("reco_e [GeV]")
plt.ylabel("proton candidate energy [GeV]")
plt.title('nue sample after Np preselection')
plt.tight_layout()
fig.show()

fig = plt.figure(figsize=(6, 6))
x = samples['nue'].query(NPPRESQ)['reco_e']
y = samples['nue'].query(NPPRESQ)['trkpid']
plt.hist2d(x,y, bins=20, range=[[0,2],[-1,1]])
plt.xlabel("reco_e [GeV]")
plt.ylabel("proton candidate LLR pid")
plt.title('nue sample after Np preselection')
plt.tight_layout()
fig.show()

fig = plt.figure(figsize=(6, 6))
x = samples['nue'].query(NPPRESQ)['protonenergy']
y = samples['nue'].query(NPPRESQ)['trkpid']
plt.hist2d(x,y, bins=20, range=[[0,1],[-1,1]])
plt.xlabel("protonenergy [GeV]")
plt.ylabel("proton candidate LLR pid")
plt.title('nue sample after Np preselection')
plt.tight_layout()
fig.show()

fig = plt.figure(figsize=(6, 6))
x = samples['ext'].query(NPPRESQ)['protonenergy']
y = samples['ext'].query(NPPRESQ)['trkpid']
plt.hist2d(x,y, bins=20, range=[[0,1],[-1,1]])
plt.xlabel("protonenergy [GeV]")
plt.ylabel("proton candidate LLR pid")
plt.title('ext sample after Np preselection')
plt.tight_layout()
fig.show()

fig = plt.figure(figsize=(7, 6))
x = samples['nue'].query(NPPRESQ)['trk_len']
y = samples['nue'].query(NPPRESQ)['trkpid']
xf = np.linspace(0,50,100)
yf = 0.015*xf+0.02
plt.hist2d(x,y, bins=20, range=[[0,50],[-1,1]], norm=LogNorm())
plt.plot(xf,yf,'r')
plt.xlabel("track length [cm]")
plt.ylabel("proton candidate LLR pid")
plt.title('nue sample after Np preselection')
plt.colorbar()
plt.tight_layout()
fig.show()
if savefigs: fig.savefig(save_path + '/xsec-nue-bnb/trkpid_vs_trklen_nue.pdf', dpi=250)

fig = plt.figure(figsize=(7, 6))
x = samples['ext'].query(NPPRESQ)['trk_len']
y = samples['ext'].query(NPPRESQ)['trkpid']
plt.hist2d(x,y, bins=20, range=[[0,50],[-1,1]], norm=LogNorm())
plt.plot(xf,yf,'r')
plt.xlabel("track length [cm]")
plt.ylabel("proton candidate LLR pid")
plt.title('EXT sample after Np preselection')
plt.colorbar()
plt.tight_layout()
fig.show()
if savefigs: fig.savefig(save_path + '/xsec-nue-bnb/trkpid_vs_trklen_ext.pdf', dpi=250)

fig = plt.figure(figsize=(6, 6))
x = samples['nue'].query(NPPRESQ)['trkpid']
y = samples['nue'].query(NPPRESQ)['protonenergy']
plt.hist2d(x,y, bins=100, range=[[-1,1],[0,1]])
plt.ylabel("protonenergy [GeV]")
plt.xlabel("proton candidate LLR pid")
plt.title('nue sample after Np preselection')
plt.tight_layout()
fig.show()

# Single Variable Plotter

In [None]:
TRAINVAR = ["shr_score","tksh_distance","tksh_angle",
            "shr_tkfit_dedx_max",
            "trkfit","trkpid",
            "subcluster","shrmoliereavg",
            "trkshrhitdist2","hits_ratio",
            "secondshower_Y_nhit","secondshower_Y_vtxdist","secondshower_Y_dot","anglediff_Y",
            "CosmicIPAll3D","CosmicDirAll3D"]

pvaluetypes = ['pvalue','pvaluediag','pvaluestatonly']

In [None]:
#DETSYS = {'mc':True,'nue':True,'ccpi0':True,'ncpi0':True,'dirt':True,\
#          'ncnopi':True,'nccpi':True,'ncpi0':True,'ccpi0':True}

DETSYS = {'mc':False,'nue':False,'ccpi0':False,'ncpi0':False,'dirt':False,\
          'ncnopi':False,'nccpi':False,'ncpi0':False,'ccpi0':False}

In [None]:
from unblinding_far_sideband import *

# choose sideband, preselection, and selection
# find defintions in unblinding_far_sideband.py
sdb_key = 'Blind' #'TwoPShr' #'None'
pre_key = 'NP'
sel_key = 'NPXSBDT' #'NPXSL' #'NPXSBDT' #'NPXSBDT' #'NPXSBDTAllShr' #'NPL'
#pre_key = 'ZP'
#sel_key = 'ZPXSBDT' #'ZPXSL' #'ZPBDT' #'ZPLOOSESEL'#'ZPBDT'
#pre_key = 'NUE'
#sel_key = 'XPXSBDT'
#sel_key = 'ZPONEGAMMA'
#db_key = 'TwoPShr' #'None'
#pre_key = 'ZPTwoShr'
#sel_key = 'ZPBDTTWOSHR' #'ZPLOOSETWOSHR'
#sdb_key = 'TwoPShr'
#pre_key = 'NP'
#sel_key = 'NPBDTAllShr' #'None' 'NPVLAllShr' 'NPLAllShr' 'NPBDTAllShr' 'NPTAllShr'
run = 123

sideband = sideband_categories[sdb_key]
preselection = preselection_categories[pre_key]
sel =  selection_categories[sel_key]

DETSYSPATH = ls.ntuple_path+'/detsys/{}_{}_{}/'.format(sideband['dir'], preselection['dir'], sel['dir'])

QUERY = ""
for query in [sideband['query'],preselection['query'],sel['query']]:
    if query is None: continue
    if QUERY: QUERY += ' and '
    QUERY += query
    if run in [1, 2, 3, 12]:
        QUERY += f' and run{run}==True'

#QUERY += ' and (bnbdata==0 and reco_e<0.3)'
#QUERY += ' and (bnbdata==0)'
#QUERY += ' and (extdata==0)'
#QUERY += ' and nu_e>1'
#QUERY += ' abs(nu_pdg)==12'
#QUERY += ' and reco_e>0.15 and reco_e<0.29'
#QUERY += ' and reco_e>0.15 and reco_e<0.43'
#QUERY += ' and reco_e>0.15 and reco_e<0.57'
#QUERY += ' and reco_e>0.43'
#QUERY += ' and reco_e>0.15 and reco_e<0.65'
#QUERY += ' and reco_e>0.15 and reco_e<1.05'
#QUERY += ' and reco_e>0.4 and reco_e<1.2'
#QUERY += ' and tksh_angle<0.70'
#QUERY += ' and pi0_score > 0.67 and nonpi0_score > 0.70'
#QUERY += ' and (pi0_score < 0.67 or nonpi0_score < 0.70)'
#QUERY += ' and pi0_score > 0.67 and nonpi0_score > 0.67'
#QUERY += ' and pi0_score > 0.65 and nonpi0_score > 0.65'
#QUERY += ' and pi0_score > 0.62 and nonpi0_score > 0.65'
#QUERY += ' and pi0_score > 0.6 and nonpi0_score > 0.6'

#QUERY += ' and CosmicIPAll3D > 10. and hits_ratio > 0.50 and shrmoliereavg < 9 and subcluster > 4 and trkfit < 0.65 and tksh_distance < 6.0 and shr_trk_len < 300. and n_showers_contained == 1'
##QUERY += ' and (trkpid < 0.02 or reco_e>1)'
##QUERY += ' and (trkpid < 0.02 or protonenergy>0.2)'
##QUERY += ' and protonenergy<0.0.05'
##QUERY += ' and protonenergy>0.1 and protonenergy<0.2'
##QUERY += ' and protonenergy>0.5'
##QUERY += ' and trkpid < 0.6 and (trkpid < 0.02 or protonenergy>0.1)'
##QUERY += ' and trkpid<(2.18*protonenergy-0.09)'
##QUERY += ' and (trkpid<0.02 or (trkpid<2.45*protonenergy-0.225))'
#QUERY += ' and (trkpid<0.02 or (trkpid<(-2.5*protonenergy*protonenergy+3.3*protonenergy-0.1)))'
###QUERY += ' and (shr_tkfit_nhits_tot > 1 and shr_tkfit_dedx_max > 0.5 and shr_tkfit_dedx_max < 5.5)' 
#QUERY += ' and tksh_angle > -0.9'
#QUERY += ' and pi0_score > 0.45 and nonpi0_score > 0.45'

#QUERY += ' and CosmicIPAll3D>30.'
#QUERY += ' and tksh_angle<0.7'
#QUERY += ' and shr_tkfit_dedx_max > 1.0 and shr_tkfit_dedx_max < 3.8'
#QUERY += ' and shr_tkfit_dedx_max > 1.0 and shr_tkfit_dedx_max < 3.8 and tksh_angle<0.7 and CosmicIPAll3D>30.'
#QUERY += ' and (CosmicIPAll3D <= 30. or CosmicDirAll3D <= -0.98 or CosmicDirAll3D >= 0.98 or hits_ratio <= 0.65 or shr_score >= 0.25 or shrmoliereavg <= 2 or shrmoliereavg >= 10 or subcluster <= 7 or trkfit >= 0.70 or tksh_distance >= 4.0 or trkshrhitdist2 >= 1.5 or shr_tkfit_dedx_max <= 1.0 or shr_tkfit_dedx_max >= 3.8 or (secondshower_Y_nhit>8 and secondshower_Y_dot>0.8 and anglediff_Y>40 and secondshower_Y_vtxdist<100) or tksh_angle <= -0.9 or tksh_angle >= 0.70)'
#QUERY+=' and not (theta1PlusTheta2>1.2 and shr_theta>0.3 and trk_theta>0.3 and tksh_angle>-0.58 and tksh_angle<0.54)'
#QUERY += ' and reco_e>0.65 and n_tracks_contained<=1 and shr_score<0.03 and subcluster>15 and shrmoliereavg<12'#dedx plot
#QUERY += ' and shr_score<0.2 and (shr_phi<-2.1 or shr_phi>-1.1) and CosmicDirAll3D>-0.7 and CosmicDirAll3D<0.7 and shr_tkfit_dedx_max>3 and shr_trk_sce_start_y<60'
#QUERY += ' and shr_tkfit_dedx_max>3.0'
#QUERY += 'and ccnc==1 and npi0==0 and pi0truth_gamma_parent==22 and neta==0'
print(QUERY)

#formatting can be improved..
TITLE = ""
for text in [f'Run {run}',' - ',sideband['title'],' - ',preselection['title'],'\n',sel['title'],' - ',pi0scaling["title"]]:
    if text is None: continue
    TITLE += text

#TITLE+=' - First Bin'
#TITLE+=' - tksh_angle<0.70'
#TITLE+=' + BDT cuts'
#TITLE+=' + NOT BDT cuts'
#TITLE+=' + NOT Tight cuts'
#TITLE+=' + looser BDT cuts'
#TITLE+=' + BDT>0.65'
#TITLE+=' + BDT>0.6'
#TITLE+=' + select Tight cuts'
#TITLE+=' + Tight dEdx'
#TITLE+=' - 0.15<E<0.43 GeV'
#TITLE+=' - 0.4<Ereco<1.2 GeV'
#TITLE+=' - E_nu>1 GeV'
#TITLE+=' - dEdx > 3.0'

#TITLE = r''
#TITLE = r'1eNp $\nu_e$ selection'
#TITLE = r'1eNp loose selection, low BDT sideband'
#TITLE = r'1e0p loose selection, two shower sideband'

#VARIABLE, BINS, RANGE, XTIT = 'reco_e',7,(0.85, 2.55),r"Reconstructed Energy [ GeV ]"
#VARIABLE, BINS, RANGE, XTIT = 'reco_e',21,(0.15, 2.25),r"Reconstructed Energy [ GeV ]"
#VARIABLE, BINS, RANGE, XTIT = 'reco_e',15,(0.15, 2.25),r"Reconstructed Energy [ GeV ]"
#VARIABLE, BINS, RANGE, XTIT = 'reco_e',10,(0.15, 1.55),r"Reconstructed Energy [ GeV ]"
#VARIABLE, BINS, RANGE, XTIT = 'reco_e',2,(0.15, 0.43),r"Reconstructed Energy [ GeV ]"
VARIABLE, BINS, RANGE, XTIT = 'reco_e',17,(0.01, 2.39),r"Reconstructed Energy [ GeV ]"
#VARIABLE, BINS, RANGE, XTIT = 'reco_e_qe_l',17,(0.01, 2.39),r"Reconstructed QE Energy [ GeV ]"
#VARIABLE, BINS, RANGE, XTIT = 'reco_e',13,(0.05, 1.35),r"Reconstructed Energy [ GeV ]"
#VARIABLE, BINS, RANGE, XTIT = 'reco_e',10,(0., 2.5),r"Reconstructed Energy [ GeV ]"
#VARIABLE, BINS, RANGE, XTIT = 'reco_e',5,(0.15, 0.65),r"Reconstructed Energy [ GeV ]"
#VARIABLE, BINS, RANGE, XTIT = 'reco_e',8,(0.15, 2.55),r"Reconstructed $\nu$ Energy [ GeV ]"#proposed for paper
#VARIABLE, BINS, RANGE, XTIT = 'reco_e',8,(0.15, 2.39),r"Reconstructed $\nu$ Energy [ GeV ]"#proposed for paper
#VARIABLE, BINS, RANGE, XTIT = 'reco_e',1,(0,0.15),r"Reconstructed $\nu$ Energy [ GeV ]"
#VARIABLE, BINS, RANGE, XTIT = 'reco_e',1,(0.65,3.15),r"Reconstructed $\nu$ Energy [ GeV ]"

#VARIABLE, BINS, RANGE, XTIT = 'theta1PlusTheta2',8,(0,4),"shr theta + trk theta"

#VARIABLE, BINS, RANGE, XTIT = 'nu_e',20,(0., 4),r"True Energy [ GeV ]"

#VARIABLE, BINS, RANGE, XTIT = 'reco_e',12,(0.1, 0.7),r"Reconstructed Energy [ GeV ]"
#VARIABLE, BINS, RANGE, XTIT = 'shr_energy_tot_cali',12,(0.1, 0.7),r"Shower Energy [ GeV ]"

#VARIABLE, BINS, RANGE, XTIT = 'pi0_score',10,(0.,1),r"pi0 score"
#VARIABLE, BINS, RANGE, XTIT = 'nonpi0_score',10,(0.,1),r"non-pi0 score"
#VARIABLE, BINS, RANGE, XTIT = 'bkg_score',5,(0.5,1),r"BDT score"
#VARIABLE, BINS, RANGE, XTIT = 'bkg_score',6,(0.7,1),r"BDT score"
#VARIABLE, BINS, RANGE, XTIT = 'shr_tkfit_gap10_dedx_Y',6,(0.5,6.5),"shr tkfit dE/dx (Y, 1-5 cm) [MeV/cm]"
#VARIABLE, BINS, RANGE, XTIT = 'shr_tkfit_2cm_dedx_max',6,(0.5,6.5),"shr tkfit dE/dx (max, 0-2 cm) [MeV/cm]"
#VARIABLE, BINS, RANGE, XTIT = 'shr_tkfit_gap10_dedx_max',6,(0.5,6.5),"shr tkfit dE/dx (max, 1-5 cm) [MeV/cm]"
#VARIABLE, BINS, RANGE, XTIT = 'shr_tkfit_dedx_max',12,(0.5,6.5),"shr tkfit dE/dx (max, 0-4 cm) [MeV/cm]"

#VARIABLE, BINS, RANGE, XTIT = 'cos_shr_theta',10,(-1,1),r"Electron cos($\theta$)"
#VARIABLE, BINS, RANGE, XTIT = 'cos_shr_theta',3,(-1,1),r"Electron cos($\theta$)"

#VARIABLE, BINS, RANGE, XTIT = 'shr_phi',30,(-3.25, 3.25),r"Shower $\phi$"
#VARIABLE, BINS, RANGE, XTIT = 'shr_theta',10,(0,3.14),r"Shower $\theta$"
#VARIABLE, BINS, RANGE, XTIT = 'n_tracks_contained',4,(0.5, 4.5),"n tracks contained"
#VARIABLE, BINS, RANGE, XTIT = 'n_showers_contained',1,(0.5, 1.5),"n showers contained"
#VARIABLE, BINS, RANGE, XTIT = 'n_tracks_tot',4,(-0.5, 3.5),"n tracks tot"
#VARIABLE, BINS, RANGE, XTIT = 'n_traks_attach',4,(0.5, 4.5),"n tracks attached"
#VARIABLE, BINS, RANGE, XTIT = 'n_tracks_cont_attach',3,(0.5, 3.5),"n tracks (contained, attached)"
#VARIABLE, BINS, RANGE, XTIT = 'tk2sh1_distance',10,(0, 10),"tk2sh1 distance"
#VARIABLE, BINS, RANGE, XTIT = 'nproton',3,(0.5, 3.5),"n protons"

#VARIABLE, BINS, RANGE, XTIT = 'shr_tkfit_dedx_max',5,(0.5,5.5),"shr tkfit dE/dx (max, 0-4 cm) [MeV/cm]"
#VARIABLE, BINS, RANGE, XTIT = 'shr_tkfit_dedx_max',16,(0.,8),"Shower dE/dx [MeV/cm]"
#VARIABLE, BINS, RANGE, XTIT = 'shr_tkfit_dedx_max',12,(0.,12),"Shower dE/dx [MeV/cm]"

#VARIABLE, BINS, RANGE, XTIT = 'shr_trk_sce_start_y',40,(-120,120),"shr_trk_sce_start Y"
#VARIABLE, BINS, RANGE, XTIT = 'shr_trk_sce_end_y',40,(-120,120),"shr_trk_sce_end Y"

#test of tight variables
#VARIABLE, BINS, RANGE, XTIT = 'CosmicIPAll3D',10,(10,210),"CosmicIPAll3D [cm]"
#VARIABLE, BINS, RANGE, XTIT = 'CosmicDirAll3D',20,(-1,1),"cos(CosmicDirAll3D)"
#VARIABLE, BINS, RANGE, XTIT = 'hits_ratio',10,(0.5,1.0),"shower hits/all hits"
#VARIABLE, BINS, RANGE, XTIT = 'shr_score',10,(0,0.5),r"shr score"
#VARIABLE, BINS, RANGE, XTIT = 'shrmoliereavg',30,(0,30),"Shower Transverse Development [degrees]"
#VARIABLE, BINS, RANGE, XTIT = 'subcluster',50,(0,50),"N sub-clusters in shower"
#VARIABLE, BINS, RANGE, XTIT = 'trkfit',14,(0.,0.7),"fraction of hits in shower trunk"
#VARIABLE, BINS, RANGE, XTIT = 'tksh_distance',12,(0,6),"Conversion Distance [cm]"
#VARIABLE, BINS, RANGE, XTIT = 'trkshrhitdist2',10,(0,10),"2D trk-shr distance (Y)"
#VARIABLE, BINS, RANGE, XTIT = 'shr_tkfit_dedx_max',10,(0.5,5.5),"shr tkfit dE/dx (max, 0-4 cm) [MeV/cm]"
#VARIABLE, BINS, RANGE, XTIT = 'tksh_angle',19,(-0.9,1),"cos(trk-shr angle)"
#VARIABLE, BINS, RANGE, XTIT = 'pi0truth_gamma_parent',300,(1000,1300),"pi0truth gamma parent"
#VARIABLE, BINS, RANGE, XTIT = 'neta',3,(0,3),"N eta"

#QUERY += ' and CosmicIPAll3D > 30.'
#QUERY += ' and CosmicDirAll3D > -0.98 and CosmicDirAll3D < 0.98'
#QUERY += ' and hits_ratio > 0.65'
#QUERY += ' and shr_score < 0.25'
#QUERY += ' and shrmoliereavg > 2 and shrmoliereavg < 10'
#QUERY += ' and subcluster > 7'
#QUERY += ' and trkfit < 0.70'
#QUERY += ' and trkfit < 0.50'
#QUERY += ' and tksh_distance < 4.0'
#QUERY += ' and trkshrhitdist2 < 1.5'
#QUERY += ' and (shr_tkfit_nhits_tot > 1 and shr_tkfit_dedx_max > 1.0 and shr_tkfit_dedx_max < 3.8)'
#QUERY += ' and (secondshower_Y_nhit<=8 or secondshower_Y_dot<=0.8 or anglediff_Y<=40 or secondshower_Y_vtxdist>=100)'
#QUERY += ' and tksh_angle > -0.9 and tksh_angle < 0.70'

#QUERY += ' and (trkpid<0.02 or (trkpid<(3.3*protonenergy-0.1) or protonenergy>0.3))'
#QUERY += ' and (trkpid<0.02 or (trkpid<(-2.5*protonenergy*protonenergy+3.3*protonenergy-0.1)))'
#QUERY += ' and (trkpid<0.02 or trkpid<(3.3*protonenergy-0.10) or protonenergy>0.25)'
#QUERY += ' and (trkpid<0.02 or trkpid<(3.3*protonenergy-0.15) or protonenergy>0.25)'
#QUERY += ' and (trkpid<0.02 or trkpid<(3.3*protonenergy-0.20) or protonenergy>0.25)'
#QUERY += ' and (trkpid<0.02 or trkpid<(protonenergy) or protonenergy>0.30)'
#QUERY += ' and (trkpid<0.02 or trkpid<(3.3*protonenergy-0.1) or protonenergy>0.30)'
#QUERY += ' and (trkpid<0.02 or protonenergy>0.30)'
#QUERY += ' and (trkpid<0.02 or trkpid<(2.9*protonenergy-0.1) or protonenergy>0.30)'
#QUERY += ' and (trkpid<0.02 or trkpid<(2.7*protonenergy-0.08) or protonenergy>0.30)'
#QUERY += ' and (trkpid<0.02 or trkpid<(2.6*protonenergy-0.07) or protonenergy>0.30)'
#QUERY += ' and (trkpid<0.02 or trkpid<(2.7*protonenergy-0.08))'#THIS
#QUERY += ' and (trkpid<0.02 or (trkpid<(-2.5*protonenergy*protonenergy+3.3*protonenergy-0.1)) or protonenergy>0.25)'

#QUERY += ' and pi0_score > 0.45 and nonpi0_score > 0.45'
#QUERY += ' and pi0_score > 0.50 and nonpi0_score > 0.50'
#QUERY += ' and pi0_score > 0.55 and nonpi0_score > 0.55'

#QUERY += ' and protonenergy>0.07 and protonenergy<0.10'
#VARIABLE, BINS, RANGE, XTIT = 'trkpid',20,(-1, 1),"trk PID"
#VARIABLE, BINS, RANGE, XTIT = 'trk_len',20,(0, 100),"trk len"
#VARIABLE, BINS, RANGE, XTIT = 'protonenergy',8,(0,0.48),"Leading proton kinetic energy [GeV]"
#VARIABLE, BINS, RANGE, XTIT = 'protonenergy',9,(0,0.45),"Leading proton kinetic energy [GeV]"

#VARIABLE, BINS, RANGE, XTIT = 'slclustfrac',10,(0, 1),r"slice clustered hit fraction"

#VARIABLE, BINS, RANGE, XTIT = 'reco_e',5,(0.01, 2.81),r"Reconstructed Energy [ GeV ]"
#VARIABLE, BINS, RANGE, XTIT = 'cos_shr_theta',6,(-1,1),r"Electron cos($\theta$)"
VARIABLE, BINS, RANGE, XTIT = 'cos_shr_theta',np.array([-1.,0.2,0.7,0.9,1]),None,r"Electron cos($\theta$)"
#VARIABLE, BINS, RANGE, XTIT = 'electron_e',6,(0.,3.0),"Electron Energy [GeV]"#proposed for paper
VARIABLE, BINS, RANGE, XTIT = 'tksh_angle',5,(-1,1),"cos(opening angle)"
#VARIABLE, BINS, RANGE, XTIT = 'cos_trk_theta',6,(-1,1),r"Proton cos($\theta$)"
#VARIABLE, BINS, RANGE, XTIT = 'protonenergy',4,(0.,0.8),"Leading proton kinetic energy [GeV]"
#VARIABLE, BINS, RANGE, XTIT = 'protonenergy',np.array([0.,0.05,0.10,0.2,0.3,0.8]),None,"Leading proton kinetic energy [GeV]"
#VARIABLE, BINS, RANGE, XTIT = 'protonenergy_corr',np.array([0.,0.05,0.10,0.2,0.3,0.8]),None,"Leading proton kinetic energy [GeV]"
#VARIABLE, BINS, RANGE, XTIT = 'protonenergy',12,(0,0.6),"Leading proton kinetic energy [GeV]"
#VARIABLE, BINS, RANGE, XTIT = 'n_protons_attach',4,(0.5, 4.5),"n attached protons"
#VARIABLE, BINS, RANGE, XTIT = 'cos_trk_theta',np.array([-1.,0.,0.4,0.7,1]),None,r"Proton cos($\theta$)"

#VARIABLE, BINS, RANGE, XTIT = 'pi0_score',10,(0.,1),r"pi0 score"
#VARIABLE, BINS, RANGE, XTIT = 'nonpi0_score',10,(0.,1),r"non-pi0 score"

#VARIABLE, BINS, RANGE, XTIT = 'category',10,(0,100),"weigth"

#QUERY += ' and cos_shr_theta>0.6'
#QUERY += ' and shr_tkfit_gap10_dedx_max<6'
#QUERY += ' and shr_tkfit_gap10_dedx_max<4'
#QUERY += ' and reco_e>0.43'
#QUERY += ' and bkg_score>0.55'
#QUERY += ' and reco_e>0.43'
#QUERY += ' and bkg_score>0.35'
#QUERY += ' and bkg_score>0.6'
#QUERY += ' and (category==10 or category==11)'
#QUERY += ' and shr_tkfit_dedx_max>1'

#QUERY += ' and reco_e>0.51'
#QUERY += ' and bkg_score>0.4'
#QUERY += ' and cos_shr_theta>0.6'
#QUERY += ' and shr_tkfit_dedx_max<4'

#QUERY += ' and bkg_score>0.45'

#VARIABLE, BINS, RANGE, XTIT = 'bkg_score',10,(0,1),r"BDT score"
#VARIABLE, BINS, RANGE, XTIT = 'bkg_score',14,(0.3,1),r"BDT score"
#VARIABLE, BINS, RANGE, XTIT = 'reco_e',17,(0.01, 2.39),r"Reconstructed Energy [ GeV ]"
#VARIABLE, BINS, RANGE, XTIT = 'reco_e',14,(0., 3.50),r"Reconstructed Energy [ GeV ]"
#VARIABLE, BINS, RANGE, XTIT = 'electron_e',20,(0., 2.),r"Electron Energy [ GeV ]"
#VARIABLE, BINS, RANGE, XTIT = 'reco_e',40,(0.2, 1.2),r"Reconstructed Energy [ GeV ]"
#VARIABLE, BINS, RANGE, XTIT = 'reco_e',20,(0.4, 0.6),r"Reconstructed Energy [ GeV ]"
#VARIABLE, BINS, RANGE, XTIT = 'cos_shr_theta',20,(-1,1),r"Electron cos($\theta$)"
#VARIABLE, BINS, RANGE, XTIT = 'cos_shr_theta',10,(-1,1),r"Electron cos($\theta$)"
#VARIABLE, BINS, RANGE, XTIT = 'cos_shr_theta',2,(0.5,1),r"Electron cos($\theta$)"
#VARIABLE, BINS, RANGE, XTIT = 'cos_shr_theta',16,(0.60,1.0),r"Electron cos($\theta$)"
#VARIABLE, BINS, RANGE, XTIT = 'n_showers_contained',1,(0.5, 1.5),"normalization"
#VARIABLE, BINS, RANGE, XTIT = 'tksh_distance',7,(2, 30),"Conversion Distance [cm]"

#QUERY += ' and isVtxInFiducial==1'
#VARIABLE, BINS, RANGE, XTIT = 'true_nu_vtx_sce_x',50,(0, 250),"true vtx x [cm]"
#VARIABLE, BINS, RANGE, XTIT = 'true_nu_vtx_y',50,(-125, 125),"true vtx y [cm]"
#VARIABLE, BINS, RANGE, XTIT = 'true_nu_vtx_z',50,(0, 1000),"true vtx z [cm]"
#VARIABLE, BINS, RANGE, XTIT = 'isVtxInFiducial',2,(-0.5, 1.5),"is Vtx In Fiducial"

#VARIABLE, BINS, RANGE, XTIT = 'reco_nu_vtx_sce_x',50,(0, 250),"reco vtx x [cm]"
#VARIABLE, BINS, RANGE, XTIT = 'reco_nu_vtx_sce_y',50,(-125, 125),"reco vtx y [cm]"
#VARIABLE, BINS, RANGE, XTIT = 'reco_nu_vtx_sce_z',50,(0, 1000),"reco vtx z [cm]"

#VARIABLE, BINS, RANGE, XTIT = 'shr_score',10,(0,0.5),r"shr score"
#VARIABLE, BINS, RANGE, XTIT = 'CosmicIPAll3D',10,(0,50),"CosmicIPAll3D [cm]" #remove
#VARIABLE, BINS, RANGE, XTIT = 'CosmicDirAll3D',20,(-1,1),"cos(CosmicDirAll3D)" #remove
#VARIABLE, BINS, RANGE, XTIT = 'shrmoliereavg',30,(0,30),"Shower Transverse Development [degrees]"
#VARIABLE, BINS, RANGE, XTIT = 'subcluster',10,(0,10),"N sub-clusters in shower"
#VARIABLE, BINS, RANGE, XTIT = 'trkfit',12,(0.4,1),"fraction of hits in shower trunk"
#VARIABLE, BINS, RANGE, XTIT = 'secondshower_Y_nhit',10,(0.,100),"secondshower_Y_nhit var"
#VARIABLE, BINS, RANGE, XTIT = 'shr_trk_sce_start_y',24,(-120.,120),"shr_trk_sce_start_y var"
#VARIABLE, BINS, RANGE, XTIT = 'shr_trk_sce_end_y',26,(-130,130),"shr_trk_sce_end_y var"
#VARIABLE, BINS, RANGE, XTIT = 'shr_trk_len',10,(0.,500),"shr_trk_len var"

#VARIABLE, BINS, RANGE, XTIT = 'proton_ke',10,(0,0.10),"proton KE [GeV]"
#VARIABLE, BINS, RANGE, XTIT = 'proton_ke',50,(0.,0.50),"proton KE [GeV]"
#VARIABLE, BINS, RANGE, XTIT = 'proton_ke',50,(0.,1.0),"proton KE [GeV]"
#VARIABLE, BINS, RANGE, XTIT = 'shr_tkfit_dedx_max',24,(0.,12),"Shower dE/dx [MeV/cm]"
#VARIABLE, BINS, RANGE, XTIT = 'shr_tkfit_gap10_dedx_max',24,(0.,12),"Shower dE/dx gap10 [MeV/cm]"
#VARIABLE, BINS, RANGE, XTIT = 'shr_tkfit_gap10_dedx_max',12,(2,8),"Shower dE/dx gap10 [MeV/cm]"

#VARIABLE, BINS, RANGE, XTIT = 'n_tracks_contained',5,(-0.5, 4.5),"n tracks contained"
#VARIABLE, BINS, RANGE, XTIT = 'n_tracks_tot',3,(-0.5, 2.5),"n tracks tot"

#VARIABLE, BINS, RANGE, XTIT = 'protonenergy',2,(0,0.1),"Leading proton kinetic energy [GeV]"


print(VARIABLE, BINS, RANGE, XTIT)

fig, ax1, ax2 = plotter_runbyrun[run].plot_variable(
    VARIABLE,   
    #query=QUERY,
    #kind="paper_category",
    #kind="sample",
    kind="event_category",
    query=QUERY+" and paper_category!=111",
    #query=QUERY,
    #kind="interaction",
    #genieweight="weightSpline",
    draw_sys=True,
    detsysdict=DETSYS,
    DETSYSPATH=DETSYSPATH,
    COVMATRIX="",#'cov_matrix_Enu_reco_1eNp.txt',
    ratio=False,
    stacksort=3,
    title=XTIT,
    bins=BINS,
    range=RANGE,
    chisq=True,
    #asymErrs=True
)[0:3]

print('%s %.4f %.4f %.4f' % (VARIABLE, plotter_runbyrun[run].stats['pvaluestatonly'], plotter_runbyrun[run].stats['pvaluediag'], plotter_runbyrun[run].stats['pvalue']))

#ax1.set_ylim(0, ax1.get_ylim()[1]*1.8)

ax1.set_title(TITLE, loc='left')
ax1.set_ylim(0.0, ax1.get_ylim()[1]*1.5)
#ax1.set_yscale('log')

#ax1.step(bincenters-(BINSIZE)/2.,n_lee,color='red',lw=2,linestyle='--',where='post',label=r'eLEE model ($x = 1$)')

plt.tight_layout()
fig.show()
save_path = ls.plots_path
#fig.savefig(save_path + '_test_run3_trkpidcut.png', dpi=250)    
#fig.savefig(save_path + VARIABLE + '_1enp_bdt_np1.pdf', dpi=250)    
#fig.savefig(save_path + VARIABLE + '_1enp_tight+bdt06.pdf', dpi=250)    
#fig.savefig(save_path + VARIABLE + '_1enp_coarse.pdf', dpi=250)    
#fig.savefig(save_path + VARIABLE + '_1e0p_2shr.pdf', dpi=250)    
#plt.close()
 

# Multiple variable plotting

In [None]:
from unblinding_far_sideband import *

dirappend = "_xsec"

#plot_variables = [('reco_e',7,(0.05,2.85),r"Reconstructed Energy [GeV]","coarse")]

plot_variables = [
#                  ('shrmoliereavg',9,(0,9),"average Moliere angle [degrees]","zoomed"),
#                  ('shrmoliereavg',10,(0,10),"average Moliere angle [degrees]","zoomed"),
#                  ('subcluster',20,(0,80),"N sub-clusters in shower","extended"),
#                  ('tksh_distance',12,(0,6),"trk-shr distance [cm]","zoomed"),
#                  ('theta1PlusTheta2',8,(0,4),"shr theta + trk theta","zoomed"),
#                  ('tksh_angle',10,(-0.9,1),"cos(trk-shr angle)","zoomed"),
#                   ('tksh_angle',19,(-0.9,1),"cos(trk-shr angle)","fine")
#                  ('trkshrhitdist2',10,(0,10),"2D trk-shr distance (Y)","zoomed"),
#                  ('tksh_distance',6,(0,6),"trk-shr distance [cm]","zoomed2"),
#                  ('trkpid',5,(-1,0.02),"track LLR PID","zoomed"),
#                  ('hits_ratio',10,(0.5,1.0),"shower hits/all hits","zoomed"),
#                  ('trkfit',14,(0.,0.7),"fraction of hits in shower trunk","zoomed"),
#                  ('shr_tkfit_dedx_max',5,(0.5,5.5),"shr tkfit dE/dx (max, 0-4 cm) [MeV/cm]","zoomed"),
#                  ('nonpi0_score',6,(0,1.0),"BDT non-$\pi^0$ score","coarse"),
#                  ('pi0_score',6,(0,1.0),"BDT $\pi^0$ score","coarse"),
#                  ('bkg_score',10,(0,1.0),"1e0p BDT score"),
#                  ('reco_e',7,(0.05,2.85),r"Reconstructed Energy [GeV]","coarse"),
#                  ('reco_e',21,(0.05,2.15),r"Reconstructed Energy [GeV]"),
#                  ('reco_e',20,(0.05,3.05),r"Reconstructed Energy [GeV]","extended"),
#                  ('reco_e',10,(0.15,1.55),r"Reconstructed Energy [GeV]","nominal"),
#                  ('cos_shr_theta',10,(-1,1),"cos(shr theta)"),
#                  ('cos_trk_theta',10,(-1,1),"cos(trk theta)"),
#                  ('cos_shr_theta',6,(-1,1),"cos(shr theta)","coarse"),
#                  ('cos_trk_theta',6,(-1,1),"cos(trk theta)","coarse"),
#                  ('shr_energy_tot_cali',6,(0,1.8),"shr energy (calibrated) [GeV]","coarse"),
#                  ('protonenergy',6,(0,0.48),"proton kinetic energy [GeV]","coarse"),
#                  ('shr_energy_tot_cali',3,(0,0.6),"shr energy (calibrated) [GeV]","zoom"),
#                  ('protonenergy',4,(0,0.32),"proton kinetic energy [GeV]","zoom"),
#                  ('trk_phi',5,(-3.25, 3.25),r"Track $\phi$","coarse"),
#                  ('shr_phi',5,(-3.25, 3.25),r"Shower $\phi$","coarse"),
#                  ('n_protons_attach',4,(0.5, 4.5),"N protons at vertex"),
#                  ('subcluster',9,(5,50),"N sub-clusters in shower","zoomed"),
#                  ('CosmicIPAll3D',10,(10,210),"CosmicIPAll3D [cm]","zoomed"),
#                  ('reco_e',5,(0.15, 0.65),r"Reconstructed Energy [ GeV ]","zoomed"),
                  ('reco_e',5,(0.01, 2.81),r"Reconstructed Energy [ GeV ]"),
                  ('cos_shr_theta',6,(-1,1),r"Electron cos($\theta$)"),
                  ('cos_shr_theta',np.array([-1.,0.2,0.7,0.9,1]),None,r"Electron cos($\theta$)","var"),
                  ('electron_e',6,(0.,3.0),"Electron Energy [GeV]"),
                  ('tksh_angle',5,(-1,1),"cos(opening angle)"),
                  ('cos_trk_theta',6,(-1,1),r"Proton cos($\theta$)"),
                  ('protonenergy',4,(0,0.8),"Leading proton kinetic energy [GeV]"),
                  ('protonenergy',np.array([0,0.1,0.2,0.3,0.8]),None,"Leading proton kinetic energy [GeV]","var"),
                  ('n_protons_attach',4,(0.5, 4.5),"n attached protons"),
                  ('pi0_score',10,(0.,1),r"pi0 score"),
                  ('nonpi0_score',10,(0.,1),r"non-pi0 score")
]
#plot_variables += kinematic_variables
#plot_variables += energy_variables
#plot_variables += bdtscore_variables
#plot_variables += loosesel_variables_1eNp
#plot_variables += basic_variables
#plot_variables += bdt_common_variables_1eNp

# choose sideband, preselection, and selection(s)
# find defintions in unblinding_far_sideband.py
#sdb_key = 'None'
#pre_key = 'NP'
#sel_keys = ['NPXSBDT','NPXSL'] #['NPL','NPT'] #['None','NPVL','NPL','NPBDT','NPT']
sdb_key = 'TwoPShr'
pre_key = 'NP'
sel_keys = ['NPXSBDTAllShr','NPXSLAllShr']
#sel_keys = ['None','NPVLAllShr','NPLAllShr','NPBDTAllShr','NPTAllShr'] #['None','NPVLAllShr','NPLAllShr','NPBDTAllShr','NPTAllShr']
#sdb_key = 'LPID'
#pre_key = 'NP'
#sel_keys = ['None','NPVL','NPL']#
runs = [123] #[1,2,3,12,123]

sideband = sideband_categories[sdb_key]
preselection = preselection_categories[pre_key]

for run in runs:
    for sel_key in sel_keys:
        sel =  selection_categories[sel_key]
        print('\nRun %i, %s, %s, %s'%(run, sideband['dir'], preselection['dir'], sel['dir']))
        if (which_sideband[1] == "opendata"):
            this_folder = ls.plots_path+'PELEE_opendata/'
        elif (which_sideband[1] == "fulldata"):
            this_folder = ls.plots_path+'PELEE_fulldata/'
        else:
            this_folder = ls.plots_path+'PELEE_far_sideband_runbyrun'+dirappend+'/'
        this_folder += 'run_{}/'.format(run)
        this_folder += '{}_{}_{}'.format(sideband['dir'], preselection['dir'], sel['dir'])
        this_folder += (pi0scaling["suffix"]+'/')
        !mkdir -p $this_folder

        this_query = ""
        for query in [sideband['query'],preselection['query'],sel['query']]:
            if query is None: continue
            if this_query: this_query += ' and '
            this_query += query
        if run in [1, 2, 3, 12]:
            this_query += f' and run{run}==True'

        #formatting can be improved..
        this_title = ""
        for text in [f'Run {run}',' - ',sideband['title'],' - ',preselection['title'],'\n',sel['title'],' - ',pi0scaling["title"]]:
            if text is None: continue
            this_title += text

        pvalues = [[] for _ in range(len(pvaluetypes))]
        
        for plot_variable in plot_variables:
            VARIABLE, BINS, RANGE, XTIT = plot_variable[0:4]
            #if VARIABLE != 'tksh_angle': continue
            #if len(plot_variable)>4 or VARIABLE not in TRAINVAR: continue
            print(VARIABLE, BINS, RANGE, XTIT)
            fig, ax1, ax2 = plotter_runbyrun[run].plot_variable(
                VARIABLE,   
                query=this_query,
                kind="event_category",
                #kind="paper_category",
                draw_sys=True,
                detsysdict=DETSYS,
                DETSYSPATH=DETSYSPATH,
                COVMATRIX="",#'cov_matrix_Enu_reco_1eNp.txt',
                ratio=True,
                stacksort=3,
                title=XTIT,
                bins=BINS,
                range=RANGE,
                chisq=True,
            )[0:3]

            if len(plot_variable) == 6:
                if plot_variable[5] is True:
                    ax1.set_yscale('log')
            else:
                ax1.set_ylim(0, ax1.get_ylim()[1]*1.5) 

            # fix position of chi2 wrt legend
            plt.draw()
            legleft = (ax1.get_legend().get_window_extent().x0 < 100)
            if legleft:
                for a in ax1.get_children(): 
                    if len(a.findobj(match=plt.Text))==0: continue
                    txt = a.findobj(match=plt.Text)[0]
                    if 'n.d.f.' not in txt.get_text(): continue
                    #print(txt.get_text())
                    txt.set_x(0.80)
            # END - fix position of chi2 wrt legend

            ax1.set_title(this_title, loc='left')

            plt.tight_layout()
            save_path = this_folder+VARIABLE
            if len(plot_variable) >= 5:
                save_path += ('_' + plot_variable[4])
            #fig.savefig(save_path + '.pdf', dpi=250)    
            fig.savefig(save_path + '.png', dpi=250)    
            plt.close()
            # store pvalues
            if len(plot_variable)==4 and VARIABLE in TRAINVAR:
                for idx, pvt in enumerate(pvaluetypes):
                    pvalues[idx].append(plotter_runbyrun[run].stats[pvt])
            #print ('%s %.04f'%(VARIABLE,plotter_runbyrun[run].stats['chisqstatonly']))
            print('%s %.4f %.4f %.4f' % (VARIABLE, plotter_runbyrun[run].stats['pvaluestatonly'], plotter_runbyrun[run].stats['pvaluediag'], plotter_runbyrun[run].stats['pvalue']))
        # make pvalue plots
        #print(pvalues)
        for idx, pvt in enumerate(pvaluetypes):
            if len(pvalues[idx])!=16: continue
            plt.figure(figsize=(9, 7))
            plt.hist(pvalues[idx], bins=10, range=(0,1))
            plt.title(this_title, loc='left')
            plt.xlabel(pvt)
            plt.ylabel("number of BDT input variables")
            plt.savefig(this_folder + pvt + '.pdf', dpi=250)
            plt.close()

In [None]:
samples['data'].query(QUERY)[["run","sub","evt","reco_e","pi0_score","nonpi0_score","n_showers_contained","n_tracks_contained","reco_nu_vtx_sce_x","reco_nu_vtx_sce_y","reco_nu_vtx_sce_z"]]