In [1]:
import numpy as np
import pandas as pd
import ROOT, root_numpy
import re

In [2]:
#directory = 'ship_data/57c0494a9e74770d2f2328b6'
#filename = '/ship.10.0.Pythia8-TGeant4.root'

In [3]:
#path_to_root_file = directory + filename

In [4]:
def set_index(list_of_leaves, index):
    new_list_of_leaves = []
    for i in range(0, len(list_of_leaves)):
        new_list_of_leaves.append( list_of_leaves[i] + '[%i]' % index )
    return new_list_of_leaves


def GetStrawtubesPointData(directory, filename):
    
    path_to_root_file = directory + filename

    # Open .root file. Get list of leaves.
    f=ROOT.TFile(path_to_root_file)
    tree=f.Get("cbmsim")
    leaves = tree.GetListOfLeaves()

    leave_names = []
    for i in range(0,leaves.GetEntries() ) :
        leaf = leaves.At(i)
        name = leaf.GetName()
        leave_names.append(name)

    # Select leaves
    f = re.compile('[a-zA-Z0-9._ ]*strawtubesPoint[a-zA-Z0-9._ ]*')
    sel_leaves = []
    for i in leave_names:
        res = f.findall(i)
        if res==[]:
            pass
        else:
            sel_leaves.append(res[0])

    # Get new columns for the .csv file
    m = re.compile('(?<=cbmroot.strawtubes.strawtubesPoint.f)\w+')
    new_cols = []
    for i in sel_leaves:
        res = m.findall(i)
        if res==[]:
            pass
        else:
            new_cols.append(res[0])
    new_cols = ['_'] + new_cols 

    # MaxHits
    fn=ROOT.TFile(path_to_root_file,'update')
    sTree     = fn.cbmsim

    MaxHits = 0

    for i in range(sTree.GetEntries()):

        event = sTree.GetEvent(i)
        nHits = sTree.strawtubesPoint.GetEntriesFast()

        if nHits > MaxHits:
            MaxHits = nHits


    # Read Data
    for k in range(0, MaxHits):
        if k==0:

            data_root = root_numpy.root2array(path_to_root_file, treename='cbmsim',\
                                              branches=set_index(sel_leaves, k))
            data_slice = pd.DataFrame(data_root)
            data_slice.columns = new_cols
            data_slice['event'] = range(0, data_slice.shape[0])
            data_slice['k'] = k*np.ones(data_slice.shape[0])
            data_slice = data_slice[data_slice.DetectorID > 0]
            data_full = data_slice
            #data_slice.to_csv(directory + '/strawtubesPoint.csv', mode='a', header=True)
        else:
            data_root = root_numpy.root2array(path_to_root_file, treename='cbmsim',\
                                              branches=set_index(sel_leaves, k))
            data_slice = pd.DataFrame(data_root)
            data_slice.columns = new_cols
            data_slice['event'] = range(0, data_slice.shape[0])
            data_slice['k'] = k*np.ones(data_slice.shape[0])
            data_slice = data_slice[data_slice.DetectorID > 0]
            data_full = pd.concat([data_full, data_slice], axis=0)
            #data_full = data_full.append(data_slice)
            #data_slice.to_csv(directory + '/strawtubesPoint.csv', mode='a', header=False)
    
    #data_full = pd.read_csv(directory + '/strawtubesPoint.csv', delimiter=',', index_col=0)
    data_full.sort(['event', 'k'], inplace=True)
    data_full.reset_index(inplace=True)

    data_full.to_csv(directory + '/strawtubesPoint.csv')

    # Decode
    data_full.drop(['_', 'index','UniqueID', 'EventId'], axis=1, inplace=True)

    detID = data_full.DetectorID.values.astype(np.int32)
    StatNb = []
    ViewNb = []
    PlaneNb = []
    LayerNb = []
    StrawNb = []
    WireX = []
    WireY = []
    WireZ = []
    for i in detID:
        statnb = i//10000000
        vnb = (i - statnb*10000000)//1000000
        pnb = (i - statnb*10000000 - vnb*1000000)//100000
        lnb = (i - statnb*10000000 - vnb*1000000 - pnb*100000)//10000
        snb = i - statnb*10000000 - vnb*1000000 - pnb*100000 - lnb*10000 - 2000

        StatNb.append(statnb)
        ViewNb.append(vnb)
        PlaneNb.append(pnb)
        LayerNb.append(lnb)
        StrawNb.append(snb)

    data_full['StatNb'] = StatNb
    data_full['ViewNb'] = ViewNb
    data_full['PlaneNb'] = PlaneNb
    data_full['LayerNb'] = LayerNb
    data_full['StrawNb'] = StrawNb

    data_full.to_csv(directory + '/strawtubesPoint_decoded_hits.csv')
    del data_full
    
    
    
def GetMCTrackData(directory, filename):
    
    path_to_root_file = directory + filename

    # Open .root file. Get list of leaves.
    f=ROOT.TFile(path_to_root_file)
    tree=f.Get("cbmsim")
    leaves = tree.GetListOfLeaves()

    leave_names = []
    for i in range(0,leaves.GetEntries() ) :
        leaf = leaves.At(i)
        name = leaf.GetName()
        leave_names.append(name)

    # Select leaves
    f = re.compile('[a-zA-Z0-9._ ]*MCTrack[a-zA-Z0-9._ ]*')
    sel_leaves = []
    for i in leave_names:
        res = f.findall(i)
        if res==[]:
            pass
        else:
            sel_leaves.append(res[0])

    # Get new columns for the .csv file
    m = re.compile('(?<=cbmroot.Stack.MCTrack.f)\w+')
    new_cols = []
    for i in sel_leaves:
        res = m.findall(i)
        if res==[]:
            pass
        else:
            new_cols.append(res[0])
    new_cols = ['_'] + new_cols 

    # MaxHits
    fn=ROOT.TFile(path_to_root_file,'update')
    sTree     = fn.cbmsim

    MaxHits = 0

    for i in range(sTree.GetEntries()):

        event = sTree.GetEvent(i)
        nHits = sTree.MCTrack.GetEntriesFast()

        if nHits > MaxHits:
            MaxHits = nHits


    # Read Data
    for k in range(0, MaxHits):
        if k==0:

            data_root = root_numpy.root2array(path_to_root_file, treename='cbmsim',\
                                              branches=set_index(sel_leaves, k))
            data_slice = pd.DataFrame(data_root)
            data_slice.columns = new_cols
            data_slice['event'] = range(0, data_slice.shape[0])
            data_slice['k'] = k*np.ones(data_slice.shape[0])
            data_slice = data_slice[data_slice.Bits > 0]
            data_full = data_slice
        else:
            data_root = root_numpy.root2array(path_to_root_file, treename='cbmsim',\
                                              branches=set_index(sel_leaves, k))
            data_slice = pd.DataFrame(data_root)
            data_slice.columns = new_cols
            data_slice['event'] = range(0, data_slice.shape[0])
            data_slice['k'] = k*np.ones(data_slice.shape[0])
            data_slice = data_slice[data_slice.Bits > 0]
            data_full = pd.concat([data_full, data_slice], axis=0)

    data_full.sort(['event', 'k'], inplace=True)
    data_full.reset_index(inplace=True)

    data_full.to_csv(directory + '/MCTrack.csv')
    del data_full
    
    
def GetVetoPointData(directory, filename):
    
    path_to_root_file = directory + filename

    # Open .root file. Get list of leaves.
    f=ROOT.TFile(path_to_root_file)
    tree=f.Get("cbmsim")
    leaves = tree.GetListOfLeaves()

    leave_names = []
    for i in range(0,leaves.GetEntries() ) :
        leaf = leaves.At(i)
        name = leaf.GetName()
        leave_names.append(name)

    # Select leaves
    #f = re.compile('[a-zA-Z0-9._ ]*vetoPoint[a-zA-Z0-9._ ]*')
    f = re.compile('([a-zA-Z0-9._ ]*vetoPoint.fZ|[a-zA-Z0-9._ ]*vetoPoint.fTrackID)')
    sel_leaves = []
    for i in leave_names:
        res = f.findall(i)
        if res==[]:
            pass
        else:
            sel_leaves.append(res[0])

    # Get new columns for the .csv file
    m = re.compile('(?<=cbmroot.veto.vetoPoint.f)\w+')
    new_cols = []
    for i in sel_leaves:
        res = m.findall(i)
        if res==[]:
            pass
        else:
            new_cols.append(res[0])
    new_cols = new_cols 
    
    #print new_cols
    #return

    # MaxHits
    fn=ROOT.TFile(path_to_root_file,'update')
    sTree     = fn.cbmsim

    MaxHits = 0
    ns = []

    for i in range(sTree.GetEntries()):

        event = sTree.GetEvent(i)
        nHits = sTree.vetoPoint.GetEntriesFast()
        ns.append(nHits)

        if nHits > MaxHits:
            MaxHits = nHits

    #print MaxHits
    #return a
    
    # Read Data
    #print int(np.percentile(ns, 79.5))
    #for k in range(0, int(np.percentile(ns, 79.5))):
    for k in range(0, 2000):
        if k==0:

            data_root = root_numpy.root2array(path_to_root_file, treename='cbmsim',\
                                              branches=set_index(sel_leaves, k))
            data_slice = pd.DataFrame(data_root)
            data_slice.columns = new_cols
            data_slice['event'] = range(0, data_slice.shape[0])
            data_slice['k'] = k*np.ones(data_slice.shape[0])
            data_slice = data_slice[data_slice.Z != 0]
            data_full = data_slice
        else:
            data_root = root_numpy.root2array(path_to_root_file, treename='cbmsim',\
                                              branches=set_index(sel_leaves, k))
            data_slice = pd.DataFrame(data_root)
            data_slice.columns = new_cols
            data_slice['event'] = range(0, data_slice.shape[0])
            data_slice['k'] = k*np.ones(data_slice.shape[0])
            data_slice = data_slice[data_slice.Z != 0]
            data_full = pd.concat([data_full, data_slice], axis=0)

    data_full.sort(['event', 'k'], inplace=True)
    data_full.reset_index(inplace=True)

    data_full.to_csv(directory + '/vetoPoint.csv')
    del data_full

In [5]:
%%time
#GetStrawtubesPointData(directory, filename)

CPU times: user 2 µs, sys: 1e+03 ns, total: 3 µs
Wall time: 5.01 µs


In [6]:
%%time
#GetMCTrackData(directory, filename)

CPU times: user 2 µs, sys: 0 ns, total: 2 µs
Wall time: 5.01 µs


In [7]:
%%time
#GetVetoPointData(directory, filename)

CPU times: user 2 µs, sys: 1e+03 ns, total: 3 µs
Wall time: 5.01 µs


In [8]:
import os

directories = [x[0] for x in os.walk('ship_data/')][1:]

In [9]:
# from multiprocessing import Pool

# def preparation(directory):
    
#     filename = '/ship.10.0.Pythia8-TGeant4.root'
    
#     #GetStrawtubesPointData(directory, filename)
#     #GetMCTrackData(directory, filename)
#     GetVetoPointData(directory, filename) 


# if __name__ == "__main__":
#     p = Pool(4)
#     p.map(preparation, directories)

# Reconstructible Events

In [10]:
%%time
from utils import get_reconstractible_events


def GetRecoHits(directory):

    all_mctracks = pd.read_csv(directory + '/MCTrack.csv', delimiter=',')
    all_velo_points = pd.read_csv(directory + '/vetoPoint.csv', delimiter=',')
    all_hits = pd.read_csv(directory + '/strawtubesPoint_decoded_hits.csv', delimiter=',')
    all_hits['Index'] = all_hits.index


    reconstructible_events = get_reconstractible_events(np.unique(all_hits.event.values), all_hits, all_mctracks, all_velo_points)
    reco_hits = all_hits[all_hits.event.isin(reconstructible_events.keys())]
    
    reco_hits.to_csv(directory + '/RecoHits.csv')
    del reco_hits

CPU times: user 184 µs, sys: 326 µs, total: 510 µs
Wall time: 848 µs


In [11]:
%%time

#GetRecoHits(directory)

CPU times: user 5 µs, sys: 0 ns, total: 5 µs
Wall time: 11.2 µs


In [12]:
import os

directories = [x[0] for x in os.walk('ship_data/')][1:]

In [13]:
# from multiprocessing import Pool


# if __name__ == "__main__":
#     p = Pool(4)
#     p.map(GetRecoHits, directories)

# Unite Datasets

In [14]:
full_reco_hits = pd.DataFrame()

for num, directory in enumerate(directories):
    
    reco_hits = pd.read_csv(directory + '/RecoHits.csv', delimiter=',', index_col=0)
    
    reco_hits['event'] = reco_hits['event'].values + num * 1000
    
    if num == 0:
        
        full_reco_hits = reco_hits
        
    else:
        
        full_reco_hits = full_reco_hits.append(reco_hits)
        
    if num%10 == 0:
        print num

0
10
20
30
40
50
60
70
80
90


In [15]:
len(np.unique(full_reco_hits.event.values))

9503

In [16]:
full_reco_hits.head()

Unnamed: 0,Unnamed: 0.1,Bits,Link,TrackID,Px,Py,Pz,Time,Length,ELoss,...,PdgCode,dist2Wire,event,k,StatNb,ViewNb,PlaneNb,LayerNb,StrawNb,Index
1965,1965,33554432,0,4,0.767259,-0.506322,53.231792,62.409588,0.403025,3e-06,...,-13,0.443802,16,0,1,0,0,0,349,1965
1966,1966,33554432,0,4,0.767415,-0.506722,53.231731,62.446217,0.390365,5e-06,...,-13,0.446621,16,1,1,0,0,1,349,1966
1967,1967,33554432,0,4,0.767592,-0.506694,53.231697,62.523373,0.972879,4e-06,...,-13,0.031375,16,2,1,0,1,1,349,1967
1968,1968,33554432,0,4,0.767541,-0.50572,53.231091,62.770649,0.946837,4e-06,...,-13,0.11608,16,3,1,1,0,1,353,1968
1969,1969,33554432,0,4,0.767461,-0.505596,53.231075,62.860409,0.774458,5e-06,...,-13,0.296041,16,4,1,1,1,1,353,1969


# Data Preparation

In [20]:
from Y_views import *
from UV_views import *

hits_y_12, _ = modify_for_yz_analysis_1_2(full_reco_hits)
hits_y_34, _ = modify_for_yz_analysis_3_4(full_reco_hits)

hits_stereo_12, _ = modify_for_xz_analysis_1_2(full_reco_hits)
hits_stereo_34, _ = modify_for_xz_analysis_3_4(full_reco_hits)

In [22]:
hits_y_12.head()

Unnamed: 0,Unnamed: 0.1,Bits,Link,TrackID,Px,Py,Pz,Time,Length,ELoss,...,event,k,StatNb,ViewNb,PlaneNb,LayerNb,StrawNb,Index,Wy,Wz
1965,1965,33554432,0,4,0.767259,-0.506322,53.231792,62.409588,0.403025,3e-06,...,16,0,1,0,0,0,349,1965,-113.5314,2581.15
2046,2046,33554432,0,4,-0.434523,-0.0153,16.654167,90.677681,0.534579,2e-06,...,17,0,1,0,0,0,269,2046,27.2686,2581.15
2084,2084,33554432,0,3,0.516202,0.1576,20.548412,90.671677,0.968176,1.4e-05,...,17,38,1,0,0,0,252,2084,57.1886,2581.15
2326,2326,33554432,0,4,0.004076,-0.209668,4.549163,151.564682,0.829073,3e-06,...,26,0,1,0,0,0,280,2326,7.9086,2581.15
4500,4500,33554432,0,3,0.143586,-0.856637,39.110916,151.219528,0.974792,4e-06,...,86,35,1,0,0,0,400,4500,-203.2914,2581.15


In [23]:
hits_stereo_12.head()

Unnamed: 0,Unnamed: 0.1,Bits,Link,TrackID,Px,Py,Pz,Time,Length,ELoss,...,StrawNb,Index,Wu,Wv1,Wv2,Wx1,Wx2,Wy1,Wy2,Wz
2005,2005,33554432,0,3,-0.281636,-0.35006,33.927059,62.737587,0.479251,2e-06,...,350,2005,-115.2914,-250,250,-239.000367,259.096982,-136.641617,-93.063746,2591.2793
2049,2049,33554432,0,4,-0.434417,-0.014204,16.6539,91.016327,0.237317,2e-06,...,266,2049,32.5486,-250,250,-251.885472,246.211877,10.635807,54.213678,2591.2793
3251,3251,33554432,0,4,-0.101019,-0.295795,9.470301,106.74324,0.64945,1e-06,...,313,3251,-50.1714,-250,250,-244.675949,253.4214,-71.769418,-28.191547,2591.2793
4467,4467,33554432,0,4,-0.261138,0.136395,10.35381,151.587067,0.960634,4e-06,...,296,4467,-20.2514,-250,250,-247.283649,250.8137,-41.963273,1.614598,2591.2793
4832,4832,33554432,0,4,-0.047668,0.280637,13.992413,39.707237,0.779062,4e-06,...,345,4832,-106.4914,-250,250,-239.767337,258.330012,-127.875104,-84.297232,2591.2793


In [25]:
hits_y_12['Wy1'] = hits_y_12['Wy'].values
hits_y_12['Wy2'] = hits_y_12['Wy'].values
hits_y_12['Wx1'] = - 250. * np.ones(len(hits_y_12['Wy'].values))
hits_y_12['Wx2'] = 250. * np.ones(len(hits_y_12['Wy'].values))
hits_y_12['Wz1'] = hits_y_12['Wz'].values
hits_y_12['Wz2'] = hits_y_12['Wz'].values

hits_y_34['Wy1'] = hits_y_34['Wy'].values
hits_y_34['Wy2'] = hits_y_34['Wy'].values
hits_y_34['Wx1'] = - 250. * np.ones(len(hits_y_34['Wy'].values))
hits_y_34['Wx2'] = 250. * np.ones(len(hits_y_34['Wy'].values))
hits_y_34['Wz1'] = hits_y_34['Wz'].values
hits_y_34['Wz2'] = hits_y_34['Wz'].values

In [26]:
hits_y_12.head()

Unnamed: 0,Unnamed: 0.1,Bits,Link,TrackID,Px,Py,Pz,Time,Length,ELoss,...,StrawNb,Index,Wy,Wz,Wy1,Wy2,Wx1,Wx2,Wz1,Wz2
1965,1965,33554432,0,4,0.767259,-0.506322,53.231792,62.409588,0.403025,3e-06,...,349,1965,-113.5314,2581.15,-113.5314,-113.5314,-250,250,2581.15,2581.15
2046,2046,33554432,0,4,-0.434523,-0.0153,16.654167,90.677681,0.534579,2e-06,...,269,2046,27.2686,2581.15,27.2686,27.2686,-250,250,2581.15,2581.15
2084,2084,33554432,0,3,0.516202,0.1576,20.548412,90.671677,0.968176,1.4e-05,...,252,2084,57.1886,2581.15,57.1886,57.1886,-250,250,2581.15,2581.15
2326,2326,33554432,0,4,0.004076,-0.209668,4.549163,151.564682,0.829073,3e-06,...,280,2326,7.9086,2581.15,7.9086,7.9086,-250,250,2581.15,2581.15
4500,4500,33554432,0,3,0.143586,-0.856637,39.110916,151.219528,0.974792,4e-06,...,400,4500,-203.2914,2581.15,-203.2914,-203.2914,-250,250,2581.15,2581.15


In [27]:
hits_stereo_12['Wz1'] = hits_stereo_12['Wz'].values
hits_stereo_12['Wz2'] = hits_stereo_12['Wz'].values

hits_stereo_34['Wz1'] = hits_stereo_34['Wz'].values
hits_stereo_34['Wz2'] = hits_stereo_34['Wz'].values

In [28]:
hits_stereo_12.head()

Unnamed: 0,Unnamed: 0.1,Bits,Link,TrackID,Px,Py,Pz,Time,Length,ELoss,...,Wu,Wv1,Wv2,Wx1,Wx2,Wy1,Wy2,Wz,Wz1,Wz2
2005,2005,33554432,0,3,-0.281636,-0.35006,33.927059,62.737587,0.479251,2e-06,...,-115.2914,-250,250,-239.000367,259.096982,-136.641617,-93.063746,2591.2793,2591.2793,2591.2793
2049,2049,33554432,0,4,-0.434417,-0.014204,16.6539,91.016327,0.237317,2e-06,...,32.5486,-250,250,-251.885472,246.211877,10.635807,54.213678,2591.2793,2591.2793,2591.2793
3251,3251,33554432,0,4,-0.101019,-0.295795,9.470301,106.74324,0.64945,1e-06,...,-50.1714,-250,250,-244.675949,253.4214,-71.769418,-28.191547,2591.2793,2591.2793,2591.2793
4467,4467,33554432,0,4,-0.261138,0.136395,10.35381,151.587067,0.960634,4e-06,...,-20.2514,-250,250,-247.283649,250.8137,-41.963273,1.614598,2591.2793,2591.2793,2591.2793
4832,4832,33554432,0,4,-0.047668,0.280637,13.992413,39.707237,0.779062,4e-06,...,-106.4914,-250,250,-239.767337,258.330012,-127.875104,-84.297232,2591.2793,2591.2793,2591.2793


In [31]:
hits_y_12['EventID'] = hits_y_12.event.values
hits_y_34['EventID'] = hits_y_34.event.values

In [32]:
hits_stereo_12['EventID'] = hits_stereo_12.event.values
hits_stereo_34['EventID'] = hits_stereo_34.event.values

In [33]:
hits_y_12.columns

Index([u'Unnamed: 0.1', u'Bits', u'Link', u'TrackID', u'Px', u'Py', u'Pz',
       u'Time', u'Length', u'ELoss', u'DetectorID', u'X', u'Y', u'Z',
       u'PdgCode', u'dist2Wire', u'event', u'k', u'StatNb', u'ViewNb',
       u'PlaneNb', u'LayerNb', u'StrawNb', u'Index', u'Wy', u'Wz', u'Wy1',
       u'Wy2', u'Wx1', u'Wx2', u'Wz1', u'Wz2', u'EventID'],
      dtype='object')

In [34]:
hits_stereo_12.columns

Index([u'Unnamed: 0.1', u'Bits', u'Link', u'TrackID', u'Px', u'Py', u'Pz',
       u'Time', u'Length', u'ELoss', u'DetectorID', u'X', u'Y', u'Z',
       u'PdgCode', u'dist2Wire', u'event', u'k', u'StatNb', u'ViewNb',
       u'PlaneNb', u'LayerNb', u'StrawNb', u'Index', u'Wu', u'Wv1', u'Wv2',
       u'Wx1', u'Wx2', u'Wy1', u'Wy2', u'Wz', u'Wz1', u'Wz2', u'EventID'],
      dtype='object')

In [35]:
cols = [u'EventID', u'TrackID', u'PdgCode', u'DetectorID', u'dist2Wire', u'StatNb', u'ViewNb',
       u'PlaneNb', u'LayerNb', u'StrawNb', u'Px', u'Py', u'Pz',
       u'Time', u'Length', u'ELoss', u'X', u'Y', u'Z',
       u'Wx1', u'Wx2', u'Wy1', u'Wy2', u'Wz', u'Wz1', u'Wz2']

In [36]:
hits100k = pd.concat([hits_y_12[cols], hits_y_34[cols], hits_stereo_12[cols], hits_stereo_34[cols]], axis=0)

In [37]:
hits100k.head()

Unnamed: 0,EventID,TrackID,PdgCode,DetectorID,dist2Wire,StatNb,ViewNb,PlaneNb,LayerNb,StrawNb,...,X,Y,Z,Wx1,Wx2,Wy1,Wy2,Wz,Wz1,Wz2
1965,16,4,-13,10002349,0.443802,1,0,0,0,349,...,80.673553,-113.076218,2581.154297,-250,250,-113.5314,-113.5314,2581.15,2581.15,2581.15
2046,17,4,-13,10002269,0.407632,1,0,0,0,269,...,-51.527267,27.687632,2581.150391,-250,250,27.2686,27.2686,2581.15,2581.15,2581.15
2084,17,3,-211,10002252,0.058012,1,0,0,0,252,...,87.655495,57.258011,2581.149658,-250,250,57.1886,57.1886,2581.15,2581.15,2581.15
2326,26,4,13,10002280,0.256356,1,0,0,0,280,...,-69.621529,8.176084,2581.161865,-250,250,7.9086,7.9086,2581.15,2581.15,2581.15
4500,86,3,211,10002400,0.002709,1,0,0,0,400,...,0.143542,-203.282715,2581.149902,-250,250,-203.2914,-203.2914,2581.15,2581.15,2581.15


In [44]:
hits100k[(hits100k.TrackID == 4)*(np.abs(hits100k.PdgCode.values) != 13)*(np.abs(hits100k.PdgCode.values) != 211)]

Unnamed: 0,EventID,TrackID,PdgCode,DetectorID,dist2Wire,StatNb,ViewNb,PlaneNb,LayerNb,StrawNb,...,X,Y,Z,Wx1,Wx2,Wy1,Wy2,Wz,Wz1,Wz2


In [45]:
hits100k[(hits100k.TrackID == 3)*(np.abs(hits100k.PdgCode.values) != 13)*(np.abs(hits100k.PdgCode.values) != 211)]

Unnamed: 0,EventID,TrackID,PdgCode,DetectorID,dist2Wire,StatNb,ViewNb,PlaneNb,LayerNb,StrawNb,...,X,Y,Z,Wx1,Wx2,Wy1,Wy2,Wz,Wz1,Wz2


In [48]:
hits100k['Label'] = -1. * np.ones(len(hits100k)) + 1. * (hits100k.TrackID.values == 3) + 2. * (hits100k.TrackID.values == 4)

In [50]:
hits100k.head()

Unnamed: 0,EventID,TrackID,PdgCode,DetectorID,dist2Wire,StatNb,ViewNb,PlaneNb,LayerNb,StrawNb,...,Y,Z,Wx1,Wx2,Wy1,Wy2,Wz,Wz1,Wz2,Label
1965,16,4,-13,10002349,0.443802,1,0,0,0,349,...,-113.076218,2581.154297,-250,250,-113.5314,-113.5314,2581.15,2581.15,2581.15,1
2046,17,4,-13,10002269,0.407632,1,0,0,0,269,...,27.687632,2581.150391,-250,250,27.2686,27.2686,2581.15,2581.15,2581.15,1
2084,17,3,-211,10002252,0.058012,1,0,0,0,252,...,57.258011,2581.149658,-250,250,57.1886,57.1886,2581.15,2581.15,2581.15,0
2326,26,4,13,10002280,0.256356,1,0,0,0,280,...,8.176084,2581.161865,-250,250,7.9086,7.9086,2581.15,2581.15,2581.15,1
4500,86,3,211,10002400,0.002709,1,0,0,0,400,...,-203.282715,2581.149902,-250,250,-203.2914,-203.2914,2581.15,2581.15,2581.15,0


In [55]:
good_event = []
bad_events = []

for event_id in np.unique(hits100k.EventID.values):
    
    event = hits100k[hits100k.EventID == event_id]
    track_ids = np.unique(event.TrackID.values)
    
    if len(set(track_ids) & set([3,4])) == 2:
        good_event.append(event_id)
    else:
        bad_events.append(event_id) 

In [57]:
len(bad_events)

250

In [58]:
len(good_event)

9253

In [59]:
hits100k = hits100k[hits100k.EventID.isin(good_event)]

In [62]:
len(np.unique(hits100k.EventID.values))

9253

In [61]:
hits100k.to_csv('ship_data/hits100k.csv')