# Process Dataframe with Awkward Arrays
For each event organize particles together in descending of Ef, pxf, pyf, and pzf order based on Ef. Extract columns for easy plotting: leading particle, sums of particle energies. Because of the type of interaction it can be relevant to subtract the mass of the strange baryons, protons and neutrons from the given energy. The calculated energy (Cal) columns represents this calculation. 


In [1]:
# Import statements
import numpy as np
import pandas as pd
import time
import awkward as ak
from particle import Particle

In [2]:
def find_max_pars(df, col):
    '''From a given DataFrame return the largest number of particles from one event'''
    
    arr = df[col].to_numpy()
    arr = ak.Array(arr)
    counts = ak.num(arr)
    
    return np.max(counts)

In [3]:
def pad_value(arr, num, val):
    '''Take in a given 2D awkward array and pad it with a value to a specified number'''
    
    awk = ak.Array(arr)
    padded = ak.pad_none(awk, num, axis = 1)
    arr = ak.fill_none(padded, val)
    arr = ak.to_numpy(arr)
    
    return arr

In [6]:
def make_energy_df(df, pars, is_all, is_leading, is_count, isMom = False, num = 52, fill = 0):
    '''Process of df and return a new dataframe of the designated columns for each particle in pars. '''
    
    # Pad each array
    pdgf = pad_value(df['pdgf'].to_numpy(), num, 0)
    ef = pad_value(df['Ef'].to_numpy(), num, 0)
    kef = pad_value(df['kef'].to_numpy(), num, 0)
    cthf = pad_value(df['cthf'].to_numpy(), num, 0)
    
    if(isMom):
        pxf = pad_value(df['pxf'].to_numpy(), num, 0)
        pyf = pad_value(df['pyf'].to_numpy(), num, 0)
        pzf = pad_value(df['pzf'].to_numpy(), num, 0)
    
    # Create a column of new dataframe that specifies the event num
    df_new = pd.DataFrame()
    df_new['event'] = df.index
    
    # Go through each particle #
    for par in pars:
        start_time = time.time()
        
        # make arrays of energy, cthf, kef for each event
        ef_par = np.where(pdgf == par, ef, fill)
        ke_par = np.where(pdgf == par, kef, fill)
        cthf_par = np.where(pdgf == par, cthf, fill)
        
        # make arrays of mometums if wanted
        if(isMom):
            pxf_par = np.where(pdgf == par, pxf, fill)
            pyf_par = np.where(pdgf == par, pyf, fill)
            pzf_par = np.where(pdgf == par, pzf, fill)
        
        ### SORT BASED ON EF ##
        # Index to sort by 
        index = np.argsort(-ef_par)

        # Sort ef, ke, angle
        ef_par = np.take_along_axis(ef_par, index, axis=1)
        ke_par = np.take_along_axis(ke_par, index, axis=1)
        cthf_par = np.take_along_axis(cthf_par, index, axis=1)
        
        # Sort momentum if needed
        if(isMom):
            pxf_par = np.take_along_axis(pxf_par, index, axis=1)
            pyf_par = np.take_along_axis(pyf_par, index, axis=1)
            pzf_par = np.take_along_axis(pzf_par, index, axis=1)
        
        ## Remove zero momentum particles ##
        # get rid of zeros in ef, kef, cthf (already zero in pxf, pyf, pzf)
        if(fill == 0):
            ef_par = np.where(pxf_par == 0, 0, ef_par)
            kef_par = np.where(pxf_par == 0, 0, kef_par)
            cthf_par = np.where(pxf_par == 0, 0, cthf_par)
        else:
            # Remove from all arrays if fill is not 0
            ef_par = np.where(pxf_par == 0, fill, ef_par)
            pxf_par = np.where(pxf_par == 0, fill, pxf_par)
            pyf_par = np.where(pxf_par == 0, fill, pyf_par)
            pzf_par = np.where(pxf_par == 0, fill, pzf_par)
            
        ef_par = ak.to_awkward0(ef_par) 
        ke_par = ak.to_awkward0(ke_par)    
        cthf_par = ak.to_awkward0(cthf_par)    
        
        if(isMom):
            pxf_par = ak.to_awkward0(pxf_par)
            pyf_par = ak.to_awkward0(pyf_par)
            pzf_par = ak.to_awkward0(pzf_par)
        
    
        ## Add the jagged arrays of Ef, pxf, pyf, pzf, for each particle ##
        if is_all:
            
            ef_pars =  ef_par[~(ef_par == fill)]
            ke_pars =  ke_par[~(ke_par == fill)]
            cthf_pars =  cthf_par[~(cthf_par == fill)]
            
            if(isMom):
                pxf_pars = pxf_par[~(pxf_par == fill)]
                pyf_pars = pyf_par[~(pyf_par == fill)]
                pzf_pars = pzf_par[~(pzf_par == fill)]
            
            df_new['ef_%s'%(str(par))] = ak.to_numpy(ef_pars) 
            df_new['ke_%s'%(str(par))] = ak.to_numpy(ke_pars) 
            df_new['cthf_%s'%(str(par))] = ak.to_numpy(cthf_pars) 
            
            if(isMom):
                df_new['pxf_%s'%(str(par))] = ak.to_numpy(pxf_pars)
                df_new['pyf_%s'%(str(par))] = ak.to_numpy(pyf_pars)
                df_new['pzf_%s'%(str(par))] = ak.to_numpy(pzf_pars)
       
        ## Find the leading particle sorted base on energy in Ef, pxf pyf, pzf, ptf, and cal ##
        if is_leading:
            df_new['ef_%s_l'%(str(par))] = ef_par[::, 0]
            df_new['ke_%s_l'%(str(par))] = ke_par[::, 0]
            df_new['ke_%s_l'%(str(par))] = cthf_par[::, 0]
            
            if(isMom):
                df_new['pxf_%s_l'%(str(par))] = pxf_par[::, 0]
                df_new['pyf_%s_l'%(str(par))] = pyf_par[::, 0]
                df_new['pzf_%s_l'%(str(par))] = pzf_par[::, 0]
                df_new['ptf_%s_l'%(str(par))] = np.sqrt(pxf_par[::, 0]**2 +  pyf_par[::, 0]**2)

        
        # Count the number of outgoing particles in each event
        if is_count:
            # Remove fill values 
            ke_pars = ke_par[~(ke_par == fill)]
            counts = ak.num(ke_pars)
            df_new['%s_count'%(str(par))] = counts
        
        print('Done with: ', str(par), 'time took: ', time.time() - start_time, ' still working ...')
    return df_new

# Process dataframe with subentires to find leading

In [1]:
# Import statements
import numpy as np
import pandas as pd

In [2]:
def read_hdf(num):
    ''' Read in necessary dataframes (fp: final particles, ip: initial particles, sm all other columns) '''
    
    gst_fp_df = pd.read_hdf("/Users/laurazichi/Desktop/Fermilab/run_%s_gntp_FSI_df.hdf"%(num), "gst_fp_df")
    gst_ip_df = pd.read_hdf("/Users/laurazichi/Desktop/Fermilab/run_%s_gntp_FSI_df.hdf"%(num), "gst_ip_df")
    gst_df = pd.read_hdf("/Users/laurazichi/Desktop/Fermilab/run_%s_gntp_FSI_df.hdf"%(num), "gst_df")
    
    return gst_fp_df, gst_ip_df, gst_df

In [6]:
## Wghts used ##
wghts = ['wght_FrCEx_N_n1', 'wght_FrCEx_N_p1', 'wght_FrAbs_N_n1', 'wght_FrAbs_N_p1', 
         'wght_FrInel_N_n1', 'wght_FrInel_N_p1', 'wght_FrPiProd_N_n1', 'wght_FrPiProd_N_p1',
         'wght_FrCEx_pi_n1', 'wght_FrCEx_pi_p1', 'wght_FrAbs_pi_n1', 'wght_FrAbs_pi_p1', 
         'wght_FrInel_pi_n1', 'wght_FrInel_pi_p1', 'wght_FrPiProd_pi_n1', 'wght_FrPiProd_pi_p1', 'wght']

In [161]:
def calc_leading(df, pars):
    '''For a given multindex dataframe, return a multindex dataframe of the leading particle KE and angle
    for each particle in the pars array'''
    df_new = pd.DataFrame()
    
    for par in pars: 
        ## Only works for outgoing particle pdgf change to pdgi if incoming 
        df_temp = df.query('pdgf == %s'%(par)).sort_index()
        
        # Find leading particle based on Ef
        idx = df_temp.groupby(level = 0, sort = True)['Ef'].transform(max) == df_temp['Ef']
        df_group = df_temp[idx]
        
        # Find count of particles
        ser = df_temp.groupby(level=0, sort = True).agg({'Ef': 'count'}).rename(columns={'Ef':'Count'})  
        df_group = df_group.merge(right = ser, right_index = True, left_index = True)
        
        # Add to new dataframe for return
        df_new = pd.concat([df_new, df_group], sort = False)

    df_new = df_new.sort_index()
    
    return df_new

# Apply cuts to Dataframes with subentires

In [4]:
## Read in dataframe, make cuts, merge for one final dataframe ##
def process_df(num):
    
    gst_all_fp = pd.DataFrame()
    gst_all_cuts = pd.DataFrame()
    
    # Go through all others and cut and merge together
    for x in np.arange(1, num+1, 1):
        # Read in each dataframe
        gst_all_in_temp = read_hdf(x)
        
        gst_all_fp_temp = gst_all_in_temp[0]
        gst_all_temp = gst_all_in_temp[2]
        
        # Make cuts
        # Lepton pt cut 
        gst_all_temp["ptl"] = np.sqrt(gst_all_temp["pxl"]**2+gst_all_temp["pyl"]**2)
        gst_all_fp_temp = gst_all_fp_temp.join(gst_all_temp[wghts + ["ptl"]],on='entry').query("ptl>0.4")

        # Zero cut 
        gst_all_fp_temp = gst_all_fp_temp.query('pxf != 0')

        # All cuts df (kinetic energy and angle cuts)
        gst_all_cuts_temp = gst_all_fp_temp.query('kef > 0.06').query('cthf > %s'%(np.cos(40*np.pi/180)))
        
        # Merge with others
        gst_all_fp = pd.concat([gst_all_fp, gst_all_fp_temp], sort = True)
        gst_all_cuts = pd.concat([gst_all_cuts, gst_all_cuts_temp], sort = True)
        
        print('Done with: %s Still working ...'%(x))
    
    return gst_all_fp, gst_all_cuts

In [182]:
gst_all_fp, gst_all_cuts = process_df(10)

Done with: 1 Still working ...
Done with: 2 Still working ...
Done with: 3 Still working ...
Done with: 4 Still working ...
Done with: 5 Still working ...
Done with: 6 Still working ...
Done with: 7 Still working ...
Done with: 8 Still working ...
Done with: 9 Still working ...
Done with: 10 Still working ...


In [183]:
leading_df = calc_leading(gst_all_fp, np.array([211, -211, 111, 2112, 2212]))
leading_cuts_df = calc_leading(gst_all_cuts, np.array([211, -211, 111, 2112, 2212]))

In [185]:
gst_all_fp.to_hdf('gst_fp_10', 'fp_10')
gst_all_cuts.to_hdf('gst_10', 'gst_10')

leading_df.to_hdf('lead_10', 'l_10')
leading_cuts_df.to_hdf('lead_cuts_10', 'l_cuts10')