# root_numpy
Allows to read and write ROOT files with data in simple formats inside.

In [1]:
import root_numpy

# Reading .root

In [2]:
ROOT_EXAMPLE_FILE = "public_train_10000.root"

List trees in a ROOT file.

In [3]:
treename = root_numpy.list_trees(ROOT_EXAMPLE_FILE)[0]

List branches

In [4]:
root_numpy.list_branches(ROOT_EXAMPLE_FILE, treename=treename)

['event_id',
 'target',
 'lepton_pt',
 'lepton_eta',
 'lepton_phi',
 'mem_pt',
 'mem_phi',
 'jet1_pt',
 'jet1_eta',
 'jet1_phi',
 'jet1_btag',
 'jet2_pt',
 'jet2_eta',
 'jet2_phi',
 'jet2_btag',
 'jet3_pt',
 'jet3_eta',
 'jet3_phi',
 'jet3_btag',
 'jet4_pt',
 'jet4_eta',
 'jet4_phi',
 'jet4_btag',
 'm_jj',
 'm_jjj',
 'm_lv',
 'm_jlv',
 'm_bb',
 'm_wbb',
 'm_wwbb']

## Main function: reading a ROOT file

In [5]:
root2array_example = root_numpy.root2array(ROOT_EXAMPLE_FILE)

In [6]:
root2array_example.dtype.names

('event_id',
 'target',
 'lepton_pt',
 'lepton_eta',
 'lepton_phi',
 'mem_pt',
 'mem_phi',
 'jet1_pt',
 'jet1_eta',
 'jet1_phi',
 'jet1_btag',
 'jet2_pt',
 'jet2_eta',
 'jet2_phi',
 'jet2_btag',
 'jet3_pt',
 'jet3_eta',
 'jet3_phi',
 'jet3_btag',
 'jet4_pt',
 'jet4_eta',
 'jet4_phi',
 'jet4_btag',
 'm_jj',
 'm_jjj',
 'm_lv',
 'm_jlv',
 'm_bb',
 'm_wbb',
 'm_wwbb')

In [7]:
type(root2array_example)

numpy.ndarray

In [8]:
# We have 10000 entities
root2array_example.shape

(10000,)

In [9]:
# Each is a vector
root2array_example[0]

(1000001,  1.,  34.75056839,  0.78702533,  1.89889133,  20.86243439, -2.62299824,  44.81414795, -0.16817059,  2.63159466,  2.00002289,  57.68952179, -0.16162847, -0.68204731,  0.,  71.95420074,  1.15423262, -2.85851312,  2.00001597,  79.94803619,  0.87747198, -0.2567355,  0.,  81.72444916,  189.58314514,  80.11831665,  170.08607483,  91.12820435,  298.46878052,  374.6857605)

In [10]:
# With the fields corresponding to the brnaches
root2array_example.dtype

dtype([('event_id', '<i8'), ('target', '<f4'), ('lepton_pt', '<f4'), ('lepton_eta', '<f4'), ('lepton_phi', '<f4'), ('mem_pt', '<f4'), ('mem_phi', '<f4'), ('jet1_pt', '<f4'), ('jet1_eta', '<f4'), ('jet1_phi', '<f4'), ('jet1_btag', '<f4'), ('jet2_pt', '<f4'), ('jet2_eta', '<f4'), ('jet2_phi', '<f4'), ('jet2_btag', '<f4'), ('jet3_pt', '<f4'), ('jet3_eta', '<f4'), ('jet3_phi', '<f4'), ('jet3_btag', '<f4'), ('jet4_pt', '<f4'), ('jet4_eta', '<f4'), ('jet4_phi', '<f4'), ('jet4_btag', '<f4'), ('m_jj', '<f4'), ('m_jjj', '<f4'), ('m_lv', '<f4'), ('m_jlv', '<f4'), ('m_bb', '<f4'), ('m_wbb', '<f4'), ('m_wwbb', '<f4')])

In [11]:
# Reading can be limited to specific branches
root_numpy.root2array(ROOT_EXAMPLE_FILE, branches=["event_id", "m_lv"])

array([(1000001,  80.11831665), (1000002,  83.32155609),
       (1000003,  95.30760193), ..., (1009998,  81.17916107),
       (1009999,  85.55708313), (1010000,  78.98086548)], 
      dtype=[('event_id', '<i8'), ('m_lv', '<f4')])

In [12]:
# And event range
root_numpy.root2array(ROOT_EXAMPLE_FILE, start=100, stop=500).shape

(400,)

Converting structured array to regular

In [13]:
import pandas as pd # more about Pandas in short time
pd.DataFrame(root2array_example).values

array([[  1.00000100e+06,   1.00000000e+00,   3.47505684e+01, ...,
          9.11282043e+01,   2.98468781e+02,   3.74685760e+02],
       [  1.00000200e+06,   1.00000000e+00,   5.42509270e+01, ...,
          6.71184845e+01,   2.87363983e+02,   5.27247559e+02],
       [  1.00000300e+06,   1.00000000e+00,   4.77460251e+01, ...,
          1.30389206e+02,   2.37879318e+02,   3.36058838e+02],
       ..., 
       [  1.00999800e+06,   1.00000000e+00,   1.03452408e+02, ...,
          1.28377182e+02,   3.80536560e+02,   5.51422424e+02],
       [  1.00999900e+06,   0.00000000e+00,   3.38559570e+01, ...,
          1.40375320e+02,   2.64731140e+02,   3.71888153e+02],
       [  1.01000000e+06,   0.00000000e+00,   5.01330376e+01, ...,
          1.34282303e+02,   3.77124817e+02,   4.87969818e+02]])

# Writing .root

In [14]:
import numpy as np
random_data = np.random.normal(size=(3, 3))
random_data

array([[ 0.32754071, -0.17057658, -1.75494009],
       [ 1.81826112,  0.44164219,  0.73197519],
       [ 0.09827639,  0.54200887, -2.30925674]])

In [15]:
# This will fail
# root_numpy.array2root(random_data, "./random_data.root", mode="recreate")

Root needs to explicitly know the branches names. We set them

In [16]:
random_data.dtype = [('a', np.float_), ('b', np.float_), ('c', np.float_)]

And saving works

In [17]:
root_numpy.array2root(random_data, "./random_data.root", mode="recreate")

Verfy

In [18]:
root_numpy.root2array("./random_data.root")

array([( 0.32754071, -0.17057658, -1.75494009),
       ( 1.81826112,  0.44164219,  0.73197519),
       ( 0.09827639,  0.54200887, -2.30925674)], 
      dtype=[('a', '<f8'), ('b', '<f8'), ('c', '<f8')])