In [1]:
import os, sys
sys.path.append("../../common/")
from python_tools import *

python tools loaded.


In [2]:
%matplotlib notebook

In [17]:
# some useful functions
# 
# you may need to comment out the 'numba' bits if your system can't install numba (like the gpvms...)
#

@numba.jit
def costheta_numba(p1x,p1y,p1z,p1mag,
                   p2x,p2y,p2z,p2mag):
    return np.clip(np.where((p1mag>0.0)&(p2mag>0.0),
                            (p1x*p2x+p1y*p2y+p1z*p2z)/p1mag/p2mag,
                            np.nan),
                   -1.0,1.0)

def eval_costheta(df,suffix1="",suffix2=""):
    return costheta_numba(df.loc[:,"px"+suffix1].values,df.loc[:,"py"+suffix1].values,df.loc[:,"pz"+suffix1].values,df.loc[:,"p"+suffix1].values,
                          df.loc[:,"px"+suffix2].values,df.loc[:,"py"+suffix2].values,df.loc[:,"pz"+suffix2].values,df.loc[:,"p"+suffix2].values)

    
@numba.jit(nopython=True)
def q3_numba(p1x,p1y,p1z,p2x,p2y,p2z):
    return np.sqrt((p1x-p2x)**2+(p1y-p2y)**2+(p1z-p2z)**2)

def eval_q3(df,suffix1="",suffix2="_mu"):
    return q3_numba(df.loc[:,"px"+suffix1].values,df.loc[:,"py"+suffix1].values,df.loc[:,"pz"+suffix1].values,
                    df.loc[:,"px"+suffix2].values,df.loc[:,"py"+suffix2].values,df.loc[:,"pz"+suffix2].values)

In [4]:
root_filenames = glob.glob("/Users/wketchum/Data/MicroBooNE/FakeData2020/Set2/Run3/*/*.root")

In [6]:
#Read from files (ROOT or HDF5)

t_df = []
p_df = []
pot_df = []

file_count = 0
event_count = 0
print("Processing %d files" % len(root_filenames))

for root_filename in root_filenames:
    
    try:
        p_df.append(uproot.open(root_filename)['mcana/particle_tree'].pandas.df())
        t_df.append(uproot.open(root_filename)['mcana/mctruth_tree'].pandas.df())
        pot_df.append(uproot.open(root_filename)['potana/pot_tree'].pandas.df())
    except:
        print("File %s, trees not found."%root_filename)
    
    event_count += len(t_df[-1])
    file_count += 1
    if file_count%500==0:
        print("\tProcessed %d files. %d events processed." % (file_count,event_count))

p_df = pd.concat(p_df)
t_df = pd.concat(t_df)
pot_df = pd.concat(pot_df)

p_df.set_index(["run","subrun","event","truth_index","p_index"],inplace=True)
t_df.set_index(["run","subrun","event","truth_index"],inplace=True)
pot_df.set_index(["run","subrun"],inplace=True)
        
print("Have dataframe objects. Total events is %d." % len(t_df))

Processing 19513 files
	Processed 500 files. 8961 events processed.
	Processed 1000 files. 18131 events processed.
	Processed 1500 files. 27069 events processed.
File /Users/wketchum/Data/MicroBooNE/FakeData2020/Set2/Run3/32044093_1706/sampler_hist_1706.root, trees not found.
	Processed 2000 files. 36214 events processed.
	Processed 2500 files. 45385 events processed.
	Processed 3000 files. 54525 events processed.
	Processed 3500 files. 63836 events processed.
File /Users/wketchum/Data/MicroBooNE/FakeData2020/Set2/Run3/32044121_1183/sampler_hist_1183.root, trees not found.
	Processed 4000 files. 73192 events processed.
	Processed 4500 files. 82578 events processed.
	Processed 5000 files. 91642 events processed.
	Processed 5500 files. 100922 events processed.
	Processed 6000 files. 110031 events processed.
	Processed 6500 files. 119490 events processed.
	Processed 7000 files. 128757 events processed.
	Processed 7500 files. 137751 events processed.
	Processed 8000 files. 146855 events pr

In [7]:
# calculate integrated POT for all the events we have

TOTAL_POT = pot_df["totpot"].sum()
TOTAL_EVENTS = len(t_df)
print("\n\n")
print("Total events of %d in POT of %E. Events per 1e20 POT is %f" % (TOTAL_EVENTS,
                                                                      TOTAL_POT,
                                                                      TOTAL_EVENTS/(TOTAL_POT/1e20)))
print("\n\n")




Total events of 357262 in POT of 3.454393E+20. Events per 1e20 POT is 103422.517476





In [8]:
df_n = pd.DataFrame()
df_n["is_mu"] = ((p_df["status"]==1)&(p_df["pdgcode"]==13)).replace(False,np.nan)
df_n["is_p_30MeV"] = ((p_df["status"]==1)&(p_df["pdgcode"]==2212)&((p_df["e"]-p_df["mass"])>0.03)).replace(False,np.nan)
df_n["is_pi"] = ((p_df["status"]==1)&((p_df["pdgcode"]==211)^(p_df["pdgcode"]==-211)^(p_df["pdgcode"]==111))).replace(False,np.nan)
df_n = df_n.groupby(["run","subrun","event","truth_index"]).agg("sum").rename(columns={"is_mu":"n_mu","is_p_30MeV":"n_p_30MeV","is_pi":"n_pi"})

In [9]:
p_df_mu = p_df.query("status==1 and pdgcode==13").groupby(["run","subrun","event","truth_index"]).first()
p_df_nu = p_df.query("status==0 and pdgcode==14").groupby(["run","subrun","event","truth_index"]).first()

In [10]:
df_ev_t = t_df.copy()
df_ev_t = df_ev_t.merge(p_df_nu,how="left",on=["run","subrun","event","truth_index"],suffixes=["","_nu"])
df_ev_t = df_ev_t.merge(p_df_mu,how="left",on=["run","subrun","event","truth_index"],suffixes=["","_mu"])
df_ev_t = df_ev_t.merge(df_n,how="left",on=["run","subrun","event","truth_index"])

In [11]:
df_ev_t

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,n_particles,origin,mode,interaction_type,ccnc,target,hit_nucl,hit_quark,hadronic_mass,interaction_x,...,pt_mu,e_mu,mass_mu,end_px_mu,end_py_mu,end_pz_mu,end_e_mu,n_mu,n_p_30MeV,n_pi
run,subrun,event,truth_index,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1
32044093,5963,1,0,12,1,1,1007,1,1000180400,2212,0,1.254441,0.012864,...,,,,,,,,0.0,2.0,0.0
32044093,5963,2,0,11,1,0,1002,1,1000180400,2112,0,1.050697,0.758080,...,,,,,,,,0.0,1.0,0.0
32044093,5963,3,0,12,1,1,1003,0,1000180400,2212,0,1.397657,0.236101,...,0.288972,0.386180,0.105658,0.268631,-0.106499,0.233380,0.386180,1.0,2.0,1.0
32044093,5963,4,0,14,1,0,1001,0,1000180400,2112,0,0.933446,1.014912,...,0.490337,0.514387,0.105658,-0.340144,0.353174,0.114018,0.514387,1.0,2.0,0.0
32044093,5963,5,0,12,1,1,1003,0,1000180400,2212,0,1.297618,0.330562,...,0.101340,0.168629,0.105658,-0.009646,-0.100879,-0.083679,0.168629,1.0,2.0,0.0
32044093,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
32044093,9983,6,0,8,1,0,1001,0,1000180400,2112,0,1.005090,0.652943,...,0.202885,0.235770,0.105658,0.182219,0.089210,-0.057108,0.235770,1.0,1.0,0.0
32044093,9983,7,0,32,1,0,1002,1,1000180400,2212,0,1.059362,0.543713,...,,,,,,,,0.0,2.0,0.0
32044093,9983,8,0,12,1,10,1000,0,1000180400,2000000200,0,0.949174,0.964324,...,0.523672,0.643029,0.105658,-0.469348,0.232259,0.357898,0.643029,1.0,1.0,0.0
32044093,9983,9,0,18,1,10,1000,0,1000180400,2000000201,0,1.239038,0.070226,...,0.133959,0.377712,0.105658,0.119718,-0.060105,0.336983,0.377712,1.0,0.0,0.0


In [12]:
max_x = 250.
min_x = 5.
max_y = 100.
min_y = -100.
max_z = 1000.
min_z = 10.

fid_vol = (max_x-min_x)*(max_y-min_y)*(max_z-min_z)
print("Active volume: %f cm^3" % fid_vol)

ar_density=1.3836
ar_mmol = 39.95
av_k=6.02214e23
n_nucl=40

target_nucl=fid_vol*ar_density/ar_mmol*av_k*n_nucl
print("Target Nucleons: %.3e cm^3" % target_nucl)

Active volume: 48510000.000000 cm^3
Target Nucleons: 4.047e+31 cm^3


In [13]:
flux_per_pot = 7.379e-10
flux_tot = TOTAL_POT*flux_per_pot

print("Total Flux: %.3e cm^-2"%flux_tot)

Total Flux: 2.549e+11 cm^-2


In [19]:
query_mucc = ("n_mu==1 and ccnc==0 and start_x_mu>%f and start_x_mu<%f and start_y_mu>%f and start_y_mu<%f and start_z_mu>%f and start_z_mu<%f"%(min_x,max_x,min_y,max_y,min_z,max_z))
df_mucc = df_ev_t.query(query_mucc).copy()
EVENTS_FID=len(df_mucc)
print("total events = %d"%EVENTS_FID)

total events = 94313


In [20]:
total_xsec = EVENTS_FID/(flux_tot*target_nucl)
print(total_xsec)

9.142531089541006e-39


In [21]:
df_mucc["costheta_mu"] = eval_costheta(df=df_mucc,suffix1="",suffix2="_mu")
df_mucc["q0"] = df_mucc["e"] - df_mucc["e_mu"]
df_mucc["q3"] = eval_q3(df=df_mucc,suffix1="",suffix2="_mu")

In [22]:
df_mucc

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,n_particles,origin,mode,interaction_type,ccnc,target,hit_nucl,hit_quark,hadronic_mass,interaction_x,...,end_px_mu,end_py_mu,end_pz_mu,end_e_mu,n_mu,n_p_30MeV,n_pi,costheta_mu,q0,q3
run,subrun,event,truth_index,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1
32044093,5963,3,0,12,1,1,1003,0,1000180400,2212,0,1.397657,0.236101,...,0.268631,-0.106499,0.233380,0.386180,1.0,2.0,1.0,0.632953,0.747223,0.943203
32044093,5963,5,0,12,1,1,1003,0,1000180400,2212,0,1.297618,0.330562,...,-0.009646,-0.100879,-0.083679,0.168629,1.0,2.0,0.0,-0.637564,0.638173,0.896329
32044093,5963,7,0,15,1,1,1003,0,1000180400,2212,0,1.470114,0.292676,...,-0.025663,0.312165,0.170332,0.371863,1.0,3.0,0.0,0.475160,0.963432,1.207354
32044093,5963,20,0,12,1,10,1000,0,1000180400,2000000201,0,1.299231,0.034050,...,0.013344,-0.124551,0.862827,0.878251,1.0,3.0,0.0,0.990153,0.444586,0.475482
32044041,4696,1,0,9,1,0,1001,0,1000180400,2112,0,0.906341,1.060150,...,0.149750,-0.440916,-0.049330,0.480031,1.0,1.0,0.0,-0.104306,0.532223,1.158795
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
32044093,9983,2,0,9,1,0,1001,0,1000180400,2112,0,1.102137,0.430368,...,0.107251,0.028731,-0.090591,0.178041,1.0,1.0,0.0,-0.631293,0.311437,0.590495
32044093,9983,5,0,8,1,0,1001,0,1000180400,2112,0,1.024969,0.370398,...,-0.104858,-0.234826,0.397841,0.485365,1.0,1.0,0.0,0.838805,0.142938,0.346195
32044093,9983,6,0,8,1,0,1001,0,1000180400,2112,0,1.005090,0.652943,...,0.182219,0.089210,-0.057108,0.235770,1.0,1.0,0.0,-0.267976,0.197383,0.530070
32044093,9983,8,0,12,1,10,1000,0,1000180400,2000000200,0,0.949174,0.964324,...,-0.469348,0.232259,0.357898,0.643029,1.0,1.0,0.0,0.561691,0.289024,0.779046


In [47]:
bins_theta = [ -1.00, -0.50, 0.00, 0.28, 0.47, 0.63, 0.765, 0.865, 0.935, 1.00 ]

fig,ax = plt.subplots()

plt.grid(axis='y', linewidth=0.5)
evs_cth,ar_cth,patches = plt.hist(df_mucc["costheta_mu"], color='red', label='$\mu$', bins=bins_theta,**pltops_hist)
#plt.legend(loc=2)
plt.title("BNB $\\nu_\mu$ CC (GENIE Truth Study, Set2)")
plt.xlabel("Muon $cos(\\theta)$")
plt.ylabel("Events")
plt.show()
#plt.savefig("plots/genie_1mu1p_costheta.pdf")

<IPython.core.display.Javascript object>

In [48]:
print(evs_cth)
print(ar_cth)
dxsec_dcth = [ evs_cth[i]/(ar_cth[i+1]-ar_cth[i])/(flux_tot*target_nucl) for i in range(len(evs_cth))]
print(dxsec_dcth)
xsec_tot_dcth_check=0
for i in range(len(dxsec_dcth)):
    xsec_tot_dcth_check += dxsec_dcth[i]*(ar_cth[i+1]-ar_cth[i])
print(xsec_tot_dcth_check)

[ 5891.  9296.  8704.  8601. 10361. 12576. 13205. 12239. 13440.]
[-1.    -0.5    0.     0.28   0.47   0.63   0.765  0.865  0.935  1.   ]
[1.142125701620902e-39, 1.8022747449105254e-39, 3.013392737986617e-39, 4.3882385975222604e-39, 6.277353375113607e-39, 9.03033052527362e-39, 1.280068739594637e-38, 1.6948949319953206e-38, 2.004383404905127e-38]
9.142531089541006e-39


In [63]:
dxsec_dcth_plt=dxsec_dcth
dxsec_dcth_plt.append(dxsec_dcth[-1])

fig,ax = plt.subplots()
plt.plot(ar_cth,dxsec_dcth_plt,color='blue',linewidth=2.0,ds="steps-post")
plt.grid()
#plt.legend()
#plt.title("LEE $\\nu_e$ signal model weights",fontsize=24)
#plt.xlabel("$E_\\nu$ (GeV)",fontsize=18)
#plt.ylabel("weight",fontsize=18)
plt.show()

<IPython.core.display.Javascript object>

In [45]:
bins_p = [ 0.00, 0.18, 0.30, 0.45, 0.77, 1.28, 2.50 ]

fig,ax = plt.subplots()

plt.grid(axis='y', linewidth=0.5)
evs_p,ar_p,patches = plt.hist(df_mucc["p_mu"], color='red', label='$\mu$', bins=bins_p,**pltops_hist)
#plt.legend()
plt.title("BNB $\\nu_\mu$ CC (GENIE Truth Study, Set2)")
plt.xlabel("Muon $p$")
plt.ylabel("Events")
plt.show()
#plt.savefig("plots/genie_1mu1p_costheta.pdf")

<IPython.core.display.Javascript object>

In [50]:
print(evs_p)
print(ar_p)
dxsec_dp = [ evs_p[i]/(ar_p[i+1]-ar_p[i])/(flux_tot*target_nucl) for i in range(len(evs_p))]
print(dxsec_dp)
xsec_tot_dp_check=0
for i in range(len(dxsec_dp)):
    print(dxsec_dp[i]*(ar_p[i+1]-ar_p[i]))
    xsec_tot_dp_check += dxsec_dp[i]*(ar_p[i+1]-ar_p[i])
print(xsec_tot_dp_check)

[ 5128. 12080. 18261. 29392. 21702.  6645.]
[0.   0.18 0.3  0.45 0.77 1.28 2.5 ]
[2.7616611959448432e-39, 9.758444007511809e-39, 1.1801254703388948e-38, 8.903772338641983e-39, 4.125004905551642e-39, 5.279952808862988e-40]
4.970990152700718e-40
1.1710132809014171e-39
1.7701882055083425e-39
2.8492071483654346e-39
2.1037525018313372e-39
6.441542426812845e-40
9.035414394557888e-39


In [64]:
dxsec_dp_plt=dxsec_dp
dxsec_dp_plt.append(dxsec_dp[-1])

fig,ax = plt.subplots()
plt.plot(ar_p,dxsec_dp_plt,color='blue',linewidth=2.0,ds="steps-post")
plt.grid()
#plt.legend()
#plt.title("LEE $\\nu_e$ signal model weights",fontsize=24)
#plt.xlabel("$E_\\nu$ (GeV)",fontsize=18)
#plt.ylabel("weight",fontsize=18)
plt.show()

<IPython.core.display.Javascript object>

In [None]:
bins_x = np.arange(-100,300,1)
bins_y = np.arange(-200,200,1)
bins_z = np.arange(-200,1500,1)

fig,axes = plt.subplots(3,1)

axes[0].grid(axis='y', linewidth=0.5)
axes[0].hist(df_mucc["start_x_mu"], color='red', label='$\mu$', bins=bins_x,**pltops_hist)
axes[1].grid(axis='y', linewidth=0.5)
axes[1].hist(df_mucc["start_y_mu"], color='red', label='$\mu$', bins=bins_y,**pltops_hist)
axes[2].grid(axis='y', linewidth=0.5)
axes[2].hist(df_mucc["start_z_mu"], color='red', label='$\mu$', bins=bins_z,**pltops_hist)
#plt.legend()
#plt.title("BNB $\\nu_\mu$ CC (GENIE Truth Study, Set2)")
#plt.xlabel("Muon $p$")
#plt.ylabel("Events")
plt.show()
#plt.savefig("plots/genie_1mu1p_costheta.pdf")