# 1. Import Ntuple and DecayHash

In [3]:
import glob
import numpy as np
import pandas
from tqdm.notebook import tqdm
from enum import Enum

# define DecayModes from DecayHash
class DecayMode(Enum):
    bkg = 0
    sig_D_tau_nu = 1
    sig_D_e_nu = 2
    sig_Dst_tau_nu = 3
    sig_Dst_e_nu = 4
    all_Dstst_tau_nu = 5
    all_Dstst_e_nu = 6
    sig_D_mu_nu = 7
    sig_Dst_mu_nu = 8
    all_Dstst_mu_nu = 9
    
# functional syntax
DecayMode = Enum('DecayMode', ['bkg', 'sig_D_tau_nu', 'sig_D_e_nu', 'sig_Dst_tau_nu',
                               'sig_Dst_e_nu', 'all_Dstst_tau_nu', 'all_Dstst_e_nu',
                               'sig_D_mu_nu', 'sig_Dst_mu_nu', 'all_Dstst_mu_nu'],
                 start=0)

plt.rcParams["axes.prop_cycle"] = plt.cycler("color", plt.cm.tab20.colors)

In [48]:
# read in root-file as a pandas dataframe
Dstst_e_nu_selection = f'DecayMode=={DecayMode["all_Dstst_e_nu"].value} and \
                        D_mcPDG*e_mcPDG==411*11 and e_genMotherPDG==B0_mcPDG and \
        ((B0_mcErrors<64 and B0_mcPDG*D_mcPDG==-511*411) or (B0_mcErrors<512 and abs(B0_mcPDG)==521))'
    
Dstst_tau_nu_selection = f'DecayMode=={DecayMode["all_Dstst_tau_nu"].value} and \
                        D_mcPDG*e_mcPDG==411*11 and e_mcPDG*e_genMotherPDG==11*15 and \
        ((B0_mcErrors<64 and B0_mcPDG*D_mcPDG==-511*411) or (B0_mcErrors<512 and abs(B0_mcPDG)==521))'
    
signals_selection = 'B0_mcPDG*D_mcPDG==-511*411 and D_mcPDG*e_mcPDG==411*11 and e_mcPDG*e_genMotherPDG==11*15'
norms_selection = 'B0_mcPDG*D_mcPDG==-511*411 and D_mcPDG*e_mcPDG==411*11 and e_genMotherPDG==B0_mcPDG'

folder = '/home/belle/zhangboy/R_D/Generic_MC14ri/MC14ri_bbbar_bengal_e_1'
pfs = glob.glob(f"{folder}/sub02_0.parquet")

samples = {}

df_bestSelected = pandas.read_parquet(pfs, engine="pyarrow")

# Signal components
sig_D_tau_nu=data.query(f'DecayMode=={DecayMode["sig_D_tau_nu"].value} and \
                                        B0_mcErrors<32 and {signals_selection}').copy()

sig_Dst_tau_nu=df_bestSelected.query(f'DecayMode=={DecayMode["sig_Dst_tau_nu"].value} and \
                                        B0_mcErrors<64 and {signals_selection}').copy()
sig_D_e_nu=df_bestSelected.query(f'DecayMode=={DecayMode["sig_D_e_nu"].value} and \
                                B0_mcErrors<16 and {norms_selection}').copy()
sig_Dst_e_nu=df_bestSelected.query(f'DecayMode=={DecayMode["sig_Dst_e_nu"].value} and \
                                    B0_mcErrors<64 and {norms_selection}').copy()
all_Dstst_tau_nu=df_bestSelected.query(Dstst_tau_nu_selection).copy() 
all_Dstst_e_nu=df_bestSelected.query(Dstst_e_nu_selection).copy()

cut = 'e_p>0'
samples['sig_D_tau_nu'] = sig_D_tau_nu.query(cut)
samples['sig_Dst_tau_nu'] = sig_Dst_tau_nu.query(cut)
samples['sig_D_l_nu'] = sig_D_e_nu.query(cut)
samples['sig_Dst_l_nu'] = sig_Dst_e_nu.query(cut)
samples['all_Dstst_tau_nu'] = all_Dstst_tau_nu.query(cut)
samples['all_Dstst_l_nu'] = all_Dstst_e_nu.query(cut)

In [49]:
a=df_bestSelected.query('e_mcErrors==2048')[['e_mcPDG','e_genMotherPDG','D_mcErrors','D_mcPDG','B0_mcErrors','B0_mcPDG']].value_counts()
a[a>120]

e_mcPDG  e_genMotherPDG  D_mcErrors  D_mcPDG    B0_mcErrors  B0_mcPDG 
-11.0     511.0          0.0         -411.0     24.0          511.0       4357
                         312.0        300553.0  312.0         300553.0    4313
 11.0    -511.0          0.0          411.0     24.0         -511.0       4199
-11.0     511.0          0.0         -411.0     8.0           511.0       4180
 11.0    -511.0          312.0        300553.0  312.0         300553.0    4164
                         0.0          411.0     8.0          -511.0       4022
-11.0     511.0          376.0        300553.0  376.0         300553.0    2434
 11.0    -511.0          376.0        300553.0  376.0         300553.0    2355
-11.0     511.0          440.0        300553.0  440.0         300553.0    1894
 11.0    -511.0          440.0        300553.0  440.0         300553.0    1783
-11.0     511.0          504.0        300553.0  504.0         300553.0    1263
 11.0    -511.0          504.0        300553.0  504.0       

In [10]:
for name in samples:
    print(f'{name}: {len(samples[name])}')

sig_D_tau_nu: 119066
sig_Dst_tau_nu: 73953
sig_D_l_nu: 54021
sig_Dst_l_nu: 40994
all_Dstst_tau_nu: 538
all_Dstst_l_nu: 18402


In [8]:
for name in samples:
    print(f'{name}: {len(samples[name])}')

sig_D_tau_nu: 96237
sig_Dst_tau_nu: 61287
sig_D_l_nu: 52388
sig_Dst_l_nu: 40312
all_Dstst_tau_nu: 438
all_Dstst_l_nu: 17105


In [41]:
df2 = df_bestSelected.query('DecayMode=="all_Dstst_e_nu" and D_mcPDG*e_mcPDG==411*11 and e_genMotherPDG==B0_mcPDG and \
    ((B0_mcErrors<64 and B0_mcPDG*D_mcPDG==-511*411) or (B0_mcErrors<512 and abs(B0_mcPDG)==521))')
df3 = df_bestSelected.query('DecayMode=="all_Dstst_e_nu"')
df4 = pandas.concat([df2,df3]).drop_duplicates(keep=False)
len(df4)

5099

In [23]:
df_bestSelected.DecayMode.value_counts()

bkg                 579118
sig_D_e_nu           48330
sig_Dst_e_nu         36679
all_Dstst_e_nu       32161
sig_D_tau_nu          1928
sig_Dst_tau_nu        1205
all_Dstst_tau_nu      1137
all_Dstst_mu_nu        140
sig_D_mu_nu             14
sig_Dst_mu_nu            9
Name: DecayMode, dtype: int64

In [14]:
data["B0_DecayHashEx"].isnull().sum()

5290

In [None]:
import pandas
pandas.set_option('display.max_rows', None)
print(data.isna().sum())

In [None]:
cut='DecayMode=="all_Dstst_e_nu" and D_mcPDG*e_mcPDG!=411*11 and e_genMotherPDG==B0_mcPDG and \
    ((B0_mcErrors<64 and B0_mcPDG*D_mcPDG==-511*411) or (B0_mcErrors<512 and abs(B0_mcPDG)==521))'#'DecayMode=="bkg" and B0_isContinuumEvent!=1'
candidate12 = df_cut.query(cut).iloc[2][['B0_DecayHash', "B0_DecayHashEx"]].values

# print the original decay as simulated in MC with removed Bremsstrahlung gammas
print("Monte Carlo Decay with removed Bremsstrahlung gammas: ")
org2 = hashmap2.get_original_decay(*candidate12)
print(org2.to_string())

In [None]:
samples = {}
names = ['BC','AC']
cut = 'D_vtxReChi2<13 and B0_vtxReChi2<14 and -3.2<B0_deltaE<0 and e_CMS_p>0.2 and \
    5<B0_roeMbc_my_mask and 4.3<B0_CMS2_weMbc and \
    -5<B0_roeDeltae_my_mask<2 and -3<B0_CMS0_weDeltae<2 and \
    abs(B0_roeCharge_my_mask)<3 and nElectrons90+nMuons90==1'
for name in names:
    if name == 'BC':
        df = data.copy()
    else:
        df = data.query(cut).copy()
    
    print(f'{name} before BCS')
    print(df.DecayMode.value_counts())

    df_bestSelected=df.loc[df.groupby(['__experiment__','__run__','__event__','__production__']).B_D_ReChi2.idxmin()]

    print(f'{name} after BCS')
    print(df_bestSelected.DecayMode.value_counts())
    
    df_merged = pandas.merge(df_bestSelected,MC,on=['__event__'],validate='1:1')
    samples[name] = df_merged