-
Notifications
You must be signed in to change notification settings - Fork 1.3k
/
Copy pathdf037_TTreeEventMatching.py
162 lines (133 loc) · 5.83 KB
/
df037_TTreeEventMatching.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
# \file
# \ingroup tutorial_dataframe
# \notebook -nodraw
#
# This example shows processing of a TTree-based dataset with horizontal
# concatenations (friends) and event matching (based on TTreeIndex). In case
# the current event being processed does not match one (or more) of the friend
# datasets, one can use the FilterAvailable and DefaultValueFor functionalities
# to act upon the situation.
#
# \macro_code
# \macro_output
#
# \date September 2024
# \author Vincenzo Eduardo Padulano (CERN)
import array
import os
import ROOT
class DatasetContext:
"""A helper class to create the dataset for the tutorial below."""
main_file = "df037_TTreeEventMatching_py_main.root"
aux_file_1 = "df037_TTreeEventMatching_py_aux_1.root"
aux_file_2 = "df037_TTreeEventMatching_py_aux_2.root"
main_tree_name = "events"
aux_tree_name_1 = "auxdata_1"
aux_tree_name_2 = "auxdata_2"
def __init__(self):
with ROOT.TFile(self.main_file, "RECREATE"):
main_tree = ROOT.TTree(self.main_tree_name, self.main_tree_name)
idx = array.array("i", [0]) # any array can also be a numpy array
x = array.array("i", [0])
main_tree.Branch("idx", idx, "idx/I")
main_tree.Branch("x", x, "x/I")
idx[0] = 1
x[0] = 1
main_tree.Fill()
idx[0] = 2
x[0] = 2
main_tree.Fill()
idx[0] = 3
x[0] = 3
main_tree.Fill()
main_tree.Write()
# The first auxiliary file has matching indices 1 and 2, but not 3
with ROOT.TFile(self.aux_file_1, "RECREATE"):
aux_tree_1 = ROOT.TTree(self.aux_tree_name_1, self.aux_tree_name_1)
idx = array.array("i", [0]) # any array can also be a numpy array
y = array.array("i", [0])
aux_tree_1.Branch("idx", idx, "idx/I")
aux_tree_1.Branch("y", y, "y/I")
idx[0] = 1
y[0] = 4
aux_tree_1.Fill()
idx[0] = 2
y[0] = 5
aux_tree_1.Fill()
aux_tree_1.Write()
# The second auxiliary file has matching indices 1 and 3, but not 2
with ROOT.TFile(self.aux_file_2, "RECREATE"):
aux_tree_2 = ROOT.TTree(self.aux_tree_name_2, self.aux_tree_name_2)
idx = array.array("i", [0]) # any array can also be a numpy array
z = array.array("i", [0])
aux_tree_2.Branch("idx", idx, "idx/I")
aux_tree_2.Branch("z", z, "z/I")
idx[0] = 1
z[0] = 6
aux_tree_2.Fill()
idx[0] = 3
z[0] = 7
aux_tree_2.Fill()
aux_tree_2.Write()
def __enter__(self):
return self
def __exit__(self, *_):
os.remove(self.main_file)
os.remove(self.aux_file_1)
os.remove(self.aux_file_2)
def df037_TTreeEventMatching(dataset: DatasetContext):
# The input dataset has one main TTree and two auxiliary. The 'idx' branch
# is used as the index to match events between the trees.
# - The main tree has 3 entries, with 'idx' values(1, 2, 3).
# - The first auxiliary tree has 2 entries, with 'idx' values(1, 2).
# - The second auxiliary tree has 2 entries, with 'idx' values(1, 3).
# The two auxiliary trees are concatenated horizontally with the main one.
main_chain = ROOT.TChain(dataset.main_tree_name)
main_chain.Add(dataset.main_file)
aux_chain_1 = ROOT.TChain(dataset.aux_tree_name_1)
aux_chain_1.Add(dataset.aux_file_1)
aux_chain_1.BuildIndex("idx")
aux_chain_2 = ROOT.TChain(dataset.aux_tree_name_2)
aux_chain_2.Add(dataset.aux_file_2)
aux_chain_2.BuildIndex("idx")
main_chain.AddFriend(aux_chain_1)
main_chain.AddFriend(aux_chain_2)
# Create an RDataFrame to process the input dataset. The DefaultValueFor and
# FilterAvailable functionalities can be used to decide what to do for
# the events that do not match entirely according to the index column 'idx'
df = ROOT.RDataFrame(main_chain)
aux_tree_1_colidx = dataset.aux_tree_name_1 + ".idx"
aux_tree_1_coly = dataset.aux_tree_name_1 + ".y"
aux_tree_2_colidx = dataset.aux_tree_name_2 + ".idx"
aux_tree_2_colz = dataset.aux_tree_name_2 + ".z"
default_value = ROOT.std.numeric_limits[int].min()
# Example 1: provide default values for all columns in case there was no
# match
display_1 = (
df.DefaultValueFor(aux_tree_1_colidx, default_value)
.DefaultValueFor(aux_tree_1_coly, default_value)
.DefaultValueFor(aux_tree_2_colidx, default_value)
.DefaultValueFor(aux_tree_2_colz, default_value)
.Display(("idx", aux_tree_1_colidx, aux_tree_2_colidx, "x", aux_tree_1_coly, aux_tree_2_colz))
)
# Example 2: skip the entire entry when there was no match for a column
# in the first auxiliary tree, but keep the entries when there is no match
# in the second auxiliary tree and provide a default value for those
display_2 = (
df.DefaultValueFor(aux_tree_2_colidx, default_value)
.DefaultValueFor(aux_tree_2_colz, default_value)
.FilterAvailable(aux_tree_1_coly)
.Display(("idx", aux_tree_1_colidx, aux_tree_2_colidx, "x", aux_tree_1_coly, aux_tree_2_colz))
)
# Example 3: Keep entries from the main tree for which there is no
# corresponding match in entries of the first auxiliary tree
display_3 = df.FilterMissing(aux_tree_1_colidx).Display(("idx", "x"))
print("Example 1: provide default values for all columns")
display_1.Print()
print("Example 2: always skip the entry when there is no match")
display_2.Print()
print("Example 3: keep entries from the main tree for which there is no match in the auxiliary tree")
display_3.Print()
if __name__ == "__main__":
with DatasetContext() as dataset:
df037_TTreeEventMatching(dataset)