In [65]:
import uproot
import pyarrow as pa
import json

In [88]:
file = uproot.open('/Users/xweichu/Desktop/nano_dy.root')

class Node(object):
    def __init__(self, name, ndtype, parent):
        self.children  = []
        self.name = name
        self.ndtype = ndtype
        self.parent = parent

def buildObj(branch, subnode):
    objname = branch.name
    parent = subnode.parent
    while parent is not None:
        objname = str(parent.name) + '.' + str(objname)
        parent = parent.parent
    
    field = None
    fieldmeta = {}
    fieldmeta['BasketSeek'] = bytes(branch._fBasketSeek)
    fieldmeta['BasketBytes'] = bytes(branch._fBasketBytes)
    fieldmeta['Compression'] = bytes(str(branch.compression), 'utf8')
    fieldmeta['Compressionratio'] = bytes(str(branch.compressionratio()),'utf8')
    
    if('inf' not in str(branch.interpretation.type) and 'bool' in str(branch.interpretation.type)):
        function=getattr(pa,'bool_')
        field = pa.field(branch.name, function(), metadata = fieldmeta)
        
    elif('inf' in str(branch.interpretation.type)):
        function= getattr(pa,'list_')
        subfunc = None
        if('bool' in str(branch.interpretation.type)):
            subfunc = getattr(pa,'bool_')
        else:
            subfunc = getattr(pa,str(branch.interpretation.type).split()[-1])
        field = pa.field(branch.name, function(subfunc()), metadata = fieldmeta)
        
    else:
        function=getattr(pa,str(branch.interpretation.type))
        field = pa.field(branch.name, function(), metadata = fieldmeta)
        
    schema = pa.schema([field])
    
    #metadata for the arrow table
    sche_meta = {}
    #versions
    sche_meta['0'] = bytes(0)
    sche_meta['1'] = bytes(0)
    sche_meta['2'] = bytes(0)
    #data format -> arrow
    sche_meta['3'] = bytes(5)
    sche_meta['4'] = bytes('0' + ' ' + str(field.type) + ' 0 1 ' + str(branch.name), 'utf8')
    sche_meta['5'] = bytes('n/a','utf8')
    sche_meta['6'] = bytes(str(subnode.parent.name),'utf8')
    sche_meta['7'] = bytes(branch.numentries)

    schema.with_metadata(sche_meta)
    table = pa.Table.from_arrays([branch.array().tolist()],schema = schema)
    
    #Serialize arrow table to bytes
    batches = table.to_batches()
    sink = pa.BufferOutputStream()
    writer = pa.RecordBatchStreamWriter(sink, schema)
    for batch in batches:
        writer.write_batch(batch)
    buff = sink.getvalue()
    buff_bytes = buff.to_pybytes()
    cephobj = open('./data/'+objname,'wb+')
    cephobj.write(buff_bytes)
    cephobj.close()
    

        
def growTree(node, rootobj):
    for key in rootobj.allkeys():
        subnode = Node(key,type(rootobj[key]),node)
        node.children.append(subnode)
        growTree(subnode, rootobj[key])
        #build the object if it's a branch
        if('Branch' in str(subnode.ndtype)):
            buildObj(rootobj[key], subnode)
    
tree = Node(file.name, type(file), None)
growTree(tree,file)
# rootfile = open('nano_dy.meta','w+')
# s = json.dumps(tree.__dict__)
# print(s)

In [69]:
print(file._fNbytesName)
# dir(file)

64


In [70]:
file.keys()
# dir(file)

[b'Runs;1', b'Events;1']

In [71]:
file.values()

[<TTree b'Runs' at 0x000110c21630>, <TTree b'Events' at 0x000111138cc0>]

In [72]:
tr = file['Events']
print(tr._fEntries)
# dir(tr)

10


In [73]:
file['Events'].keys()
pass

In [74]:
file['Events']['event']

<TBranch b'event' at 0x000110c25dd8>

In [75]:
file['Events']['event']

<TBranch b'event' at 0x000111cee978>

In [76]:
# file['Events'].show()
# pass

In [77]:
file['Events'].name

b'Events'

In [78]:
file['Events'].title

b'Events'

In [92]:
file['Events'].numentries
br = file['Events']['Electron_dzErr']
print(dir(br))
baskets = br.baskets()
print(br._fBasketSeek)
print(br._fBasketBytes)
print(br.array())
print(str(br.compression))
print(br.compressionratio())
print(br.countbranch)
# dir(baskets[0])

['_BasketKey', '_RecoveredTBasket', '__class__', '__delattr__', '__dict__', '__dir__', '__doc__', '__eq__', '__format__', '__ge__', '__getattribute__', '__getitem__', '__gt__', '__hash__', '__init__', '__init_subclass__', '__iter__', '__le__', '__len__', '__lt__', '__metaclass__', '__module__', '__ne__', '__new__', '__reduce__', '__reduce_ex__', '__repr__', '__setattr__', '__sizeof__', '__str__', '__subclasshook__', '__weakref__', '_bases', '_basket', '_basket_entryoffset', '_basket_itemoffset', '_basketcachekey', '_basketkey', '_basketstartstop', '_cachekey', '_classname', '_classversion', '_context', '_copycontext', '_countbranch', '_countleaf', '_dtype1', '_dtype2', '_dtype3', '_entryoffsets', '_fBasketBytes', '_fBasketEntry', '_fBasketSeek', '_fBasketSize', '_fBaskets', '_fBranches', '_fCompress', '_fEntries', '_fEntryNumber', '_fEntryOffsetLen', '_fFileName', '_fFillColor', '_fFillStyle', '_fFirstEntry', '_fIOFeatures', '_fLeaves', '_fMaxBaskets', '_fName', '_fOffset', '_fSplitLev

In [37]:
file['Events']['Electron_dxyErr'].interpretation

asjagged(asdtype('>f4'))

In [75]:
# file['Events'].keys()

In [76]:
jd_arr = file['Events']['GenJetAK8_mass'].array()
dir(jd_arr)
jd_arr.shape


(10,)

In [77]:
b = file['Events']['GenPart_phi']
bsks = b.basket_uncompressedbytes(0)
bsks
# print(b.values())
# print(type(b))
# dir(b)

1008

In [46]:
import numpy
jd_arr = file['Events'].array('GenPart_phi')
# print(jd_arr[0])
# for item in jd_arr:
#     print(type(item))
#     print(item)
import pyarrow as pa

path = file.name + b'.Events' + b'.GenPart_phi'

data = pa.array(jd_arr)
field = pa.field('GenPart_phi', pa.list_(pa.float32()))

schema = pa.schema([field])

sche_meta = {}
#versions
sche_meta['0'] = bytes(0)
sche_meta['1'] = bytes(0)
sche_meta['2'] = bytes(0)
#data format -> arrow
sche_meta['3'] = bytes(5)
sche_meta['4'] = bytes('data schema','utf8')
sche_meta['5'] = bytes('db schema','utf8')
sche_meta['6'] = bytes('table name','utf8')
sche_meta['7'] = bytes('num of rows','utf8')

schema.with_metadata(sche_meta)

table = pa.Table.from_arrays([data],schema = schema)
dict = table.to_pydict()
batches = table.to_batches()

sink = pa.BufferOutputStream()
writer = pa.RecordBatchStreamWriter(sink, batches[0].schema)
writer.write_batch(batches[0])
buf = sink.getvalue()
bts = buf.to_pybytes()
print(len(bts))
reader = pa.ipc.open_stream(bts)
print(reader.schema)
batches = [b for b in reader]
# print(batches[0].column(0))
bftable = pa.serialize(table).to_buffer()
bftablebytes = bftable.to_pybytes()
print(len(bftablebytes))

f = open(path, 'wb')
f.write(bftablebytes)
f.close()

lss = ['1dewfwe', 'wfoeiwfowjf', ['wee', ' wefwfqf']]
bfu = pa.serialize(lss).to_buffer()
bfubytes = bfu.to_pybytes()
print(len(bfubytes))
type(pa.deserialize(bfubytes))

# table_2 = pa.deserialize(bftable)
# print(table_2)



# print(batches)
# print(path)
# print(batches[0].column(0))
# dir(batches[0])


1384
GenPart_phi: list<item: float>
  child 0, item: float
2616
1152


list

In [51]:
file['Events']['CaloMET_sumEt'].array()

array([ 889.5 ,  979.  , 1302.  ,  844.  ,  837.5 , 1463.  ,  734.5 ,
        487.25,  576.  , 1026.  ], dtype=float32)

In [78]:
file['Events'].lazyarrays()

{b'run': <LazyArray 'run' at 0001186c9828>,
 b'luminosityBlock': <LazyArray 'luminosityBlock' at 0001186c9c50>,
 b'event': <LazyArray 'event' at 0001186c9fd0>,
 b'CaloMET_phi': <LazyArray 'CaloMET_phi' at 0001186c9d30>,
 b'CaloMET_pt': <LazyArray 'CaloMET_pt' at 0001186c9da0>,
 b'CaloMET_sumEt': <LazyArray 'CaloMET_sumEt' at 0001186c9eb8>,
 b'nElectron': <LazyArray 'nElectron' at 0001186c9080>,
 b'Electron_deltaEtaSC': <LazyArray 'Electron_deltaEtaSC' at 0001186c9320>,
 b'Electron_dr03EcalRecHitSumEt': <LazyArray 'Electron_dr03EcalRecHitSumEt' at 0001186c9048>,
 b'Electron_dr03HcalDepth1TowerSumEt': <LazyArray 'Electron_dr03HcalDepth1TowerSumEt' at 0001186c90f0>,
 b'Electron_dr03TkSumPt': <LazyArray 'Electron_dr03TkSumPt' at 0001186c9b38>,
 b'Electron_dxy': <LazyArray 'Electron_dxy' at 0001186c9b70>,
 b'Electron_dxyErr': <LazyArray 'Electron_dxyErr' at 0001186c92b0>,
 b'Electron_dz': <LazyArray 'Electron_dz' at 0001186c9278>,
 b'Electron_dzErr': <LazyArray 'Electron_dzErr' at 0001186c9

In [79]:
dask_arr = uproot.daskarray('/Users/xweichu/Desktop/nano_dy.root','Events','Photon_isScEtaEB')

AttributeError: can't set attribute

In [1]:
arrs = file['Events'].arrays('Electron*')

NameError: name 'file' is not defined

In [69]:
arrs.get('Electron_deltaEtaSC')