In [1]:
import seaborn
from mpl_toolkits.mplot3d import Axes3D
from mpl_toolkits.mplot3d.art3d import Line3DCollection
from matplotlib import pylab as plt
import root_numpy
import pandas as pd
import numpy as np
from tqdm import tqdm

BRICK_X = 124000
BRICK_Y = 99000
BRICK_Z = 75000
SAFE_M = 10000

## Load e-m showers basetracks

In [2]:
def load_mc(filename, step=1):
    mc = root_numpy.root2array(
                        filename,
                        treename="Data",
                        branches=["Event_id", "ele_P",
                                   "BT_X", "BT_Y", "BT_Z",
                                   "BT_SX", "BT_SY", "BT_plate",
                                   "ele_x", "ele_y", "ele_z", 
                                   "ele_sx", "ele_sy", 
                                   "chisquare", "alfa", "deltaSx", "deltaSy"],
                        step=step)
    pmc = pd.DataFrame(mc)
    pmc['numtracks'] = pmc.BT_X.apply(lambda x: len(x))
    # safety cuts
    shapechange = [pmc.shape[0]]
    pmc = pmc[pmc.ele_P > 0.1]
    shapechange.append(pmc.shape[0])
    pmc = pmc[pmc.ele_x.abs() < BRICK_X / 2 - SAFE_M]
    shapechange.append(pmc.shape[0])
    pmc = pmc[pmc.ele_y.abs() < BRICK_Y / 2 - SAFE_M]
    shapechange.append(pmc.shape[0])
    pmc = pmc[pmc.ele_z < 0]
    shapechange.append(pmc.shape[0])
    pmc = pmc[pmc.numtracks > 3]
    shapechange.append(pmc.shape[0])
    # print ("numtracks reduction by cuts: ", shapechange)
 
    pmc.BT_X += BRICK_X / 2
    pmc.BT_Y += BRICK_Y / 2
    pmc.BT_Z += BRICK_Z / 2
    pmc.ele_x += BRICK_X / 2
    pmc.ele_y += BRICK_Y / 2
    pmc.ele_z += BRICK_Z / 2
  
    # Correct Z coordinate to match plates coordinates
    pmc.BT_Z = (pmc.BT_plate - 1) * 1293
    
    return pmc
    

In [3]:
path_nu_root = 'data_root/mcdata_nue_10k.root'
path_tau_root = 'data_root/mcdata_taue_10k.root'

In [4]:
path_nu_csv = 'data_csv/mcdata_nue_10k'
path_tau_csv = 'data_csv/mcdata_taue_10k'

In [5]:
df_nu = load_mc(path_nu_root)
df_tau = load_mc(path_tau_root)



In [6]:
df_nu.head(2)

Unnamed: 0,Event_id,ele_P,BT_X,BT_Y,BT_Z,BT_SX,BT_SY,BT_plate,ele_x,ele_y,ele_z,ele_sx,ele_sy,chisquare,alfa,deltaSx,deltaSy,numtracks
0,152290,40.41106,"[33844.0, 33885.6, 33927.1, 33926.6, 33947.5, ...","[49176.7, 49183.7, 49190.9, 49191.4, 49187.0, ...","[21981, 23274, 24567, 24567, 24567, 25860, 258...","[0.0322676, 0.0324748, 0.0311172, 0.0285943, 0...","[0.00903987, 0.00643338, 0.0103145, 0.00348215...","[18, 19, 20, 20, 20, 21, 21, 22, 22, 23, 23, 2...",33840.695312,49175.777344,21980.984375,0.036384,0.002931,"[0.616263, 0.733469, 1.40148, 0.724876, 1.3671...","[0.00273792, 0.0375241, 0.0737786, 0.0737801, ...","[-0.00411633, -0.00390913, -0.00526671, -0.007...","[0.00610876, 0.00350226, 0.00738337, 0.0005510...",1630
12,153818,19.301327,"[45775.7, 45646.4, 45646.4, 45644.7, 45518.8, ...","[24100.3, 24113.4, 24113.2, 24113.1, 24126.8, ...","[23274, 24567, 24567, 24567, 25860, 25860, 258...","[-0.10461, -0.0973416, -0.10394, -0.110766, -0...","[0.0101344, 0.0117193, 0.0121083, 0.0100014, 0...","[19, 20, 20, 20, 21, 21, 21, 22, 22, 22, 22, 2...",45786.214844,24099.283203,23252.882812,-0.093035,0.009329,"[0.43341, 1.09776, 0.498545, 0.506697, 0.81049...","[0.00281573, 0.0389297, 0.0388759, 0.0388825, ...","[-0.0115757, -0.00430691, -0.0109049, -0.01773...","[0.000805694, 0.00239067, 0.00277967, 0.000672...",1411


In [7]:
def convert_to_csv(df, path, hdf=False):
    df_vert = pd.DataFrame.from_items([        
                    ('ele_x', df['ele_x']),
                    ('ele_y', df['ele_y']),
                    ('ele_z', df['ele_z']),
                    ('ele_sx', df['ele_sx']),
                    ('ele_sy', df['ele_sy']),
                    ('ele_P', df['ele_P']),
                    ('event_id', df['Event_id'])])
    df_vert.event_id = df_vert.event_id.astype(np.int32)
    df_vert.to_csv(path + '_showers.csv', index=False)
    
    showers = []
    for _, row in tqdm(df.iterrows(), total=len(df)):
        dmc = pd.DataFrame.from_items([
                    ('event_id', row['Event_id'] * np.ones(len(row['BT_SY']))),
                    ('BT_X', row['BT_X']),
                    ('BT_Y', row['BT_Y']),
                    ('BT_Z', row['BT_Z']),
                    ('BT_SX', row['BT_SX']),
                    ('BT_SY', row['BT_SY']),
                    ('chisquare', row.chisquare[row.chisquare < 3])
        ])
        showers.append(dmc)
        
    df_bck = pd.concat(showers, ignore_index=True)
    df_bck.event_id = df_bck.event_id.astype(np.int32)
    if (hdf == True):
        df_bck.to_hdf(path + '_basetracks.hdf5', key='data', format='table',complevel=9, append=True)
    else:
        df_bck.to_csv(path + '_basetracks.csv', index=False)

    return df_vert, df_bck

Test load function
```
v, b = convert_to_csv(taue.loc[[0,7]], path_tau_csv)
v.head(), b.head(), 
```

In [8]:
%%time
_, _ = convert_to_csv(df_nu, path_nu_csv)

100%|██████████| 2676/2676 [00:02<00:00, 1053.34it/s]


CPU times: user 38.9 s, sys: 832 ms, total: 39.7 s
Wall time: 39.9 s


In [9]:
%%time
_, _ = convert_to_csv(df_tau, path_tau_csv);

100%|██████████| 3247/3247 [00:02<00:00, 1143.42it/s]


CPU times: user 21 s, sys: 400 ms, total: 21.4 s
Wall time: 21.6 s


## Load background basetracks

In [10]:
path_background_base_root = "data_root/brick-129294"
path_background_csv = "data_csv/background-129294.csv"

In [11]:
def load_bg(basedir, step=1):
    # ignore 57th layer since it looks different
    etalon_plates = []
    for p in range(48, 57):
        basetracks = root_numpy.root2array("{basedir}/129294.{p}.1.1000.cp.root".format(basedir=basedir, p=p), 
                                       treename="couples", 
                                       branches=["s.eX", "s.eY", "s.eZ",
                                                "s.eTX", "s.eTY",
                                                "s.eChi2"], step=step)
        etalon_plates.append(basetracks)
        
    df = None
    plates = []
    for p in range(58):
        plate = np.copy(etalon_plates[p % len(etalon_plates)])
        plate['s.eZ'] = p * 1293
        df_plate = pd.DataFrame(plate)
        plates.append(df_plate)
    df = pd.concat(plates, ignore_index=True)
    df = df.rename(columns={'s.eX': 'ele_x', 's.eY':'ele_y', 's.eZ':'ele_z', 
                    's.eTX': 'ele_sx', 's.eTY':'ele_sy', 's.eChi2': 'chisquare'})
    print ("created: {n} tracks".format(n=df.shape[0]))
    return df

In [12]:
%%time
df_bg = load_bg(path_background_base_root)



created: 27322110 tracks
CPU times: user 5.44 s, sys: 1.3 s, total: 6.74 s
Wall time: 6.73 s




In [13]:
df_bg.head(2)

Unnamed: 0,ele_x,ele_y,ele_z,ele_sx,ele_sy,chisquare
0,52762.921875,46909.019531,0.0,0.346038,0.003274,0.054294
1,58770.84375,46742.039062,0.0,0.121466,0.658594,0.06127


In [14]:
%time df_bg.to_csv(path_background_csv, index=False)