-
Notifications
You must be signed in to change notification settings - Fork 1.3k
/
Copy pathdf007_snapshot.py
92 lines (76 loc) · 2.72 KB
/
df007_snapshot.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
## \file
## \ingroup tutorial_dataframe
## \notebook -draw
## Write ROOT data with RDataFrame.
##
## This tutorial shows how to write out datasets in ROOT format using RDataFrame.
##
## \macro_image
## \macro_code
##
## \date April 2017
## \author Danilo Piparo (CERN)
import ROOT
# A simple helper function to fill a test tree: this makes the example stand-alone.
def fill_tree(treeName, fileName):
df = ROOT.RDataFrame(10000)
df.Define("b1", "(int) rdfentry_")\
.Define("b2", "(float) rdfentry_ * rdfentry_").Snapshot(treeName, fileName)
# We prepare an input tree to run on
fileName = "df007_snapshot_py.root"
outFileName = "df007_snapshot_output_py.root"
outFileNameAllColumns = "df007_snapshot_output_allColumns_py.root"
treeName = "myTree"
fill_tree(treeName, fileName)
# We read the tree from the file and create a RDataFrame
d = ROOT.RDataFrame(treeName, fileName)
# ## Select entries
# We now select some entries in the dataset
d_cut = d.Filter("b1 % 2 == 0")
# ## Enrich the dataset
# Build some temporary columns: we'll write them out
getVector_code ='''
std::vector<float> getVector (float b2)
{
std::vector<float> v;
for (int i = 0; i < 3; i++) v.push_back(b2*i);
return v;
}
'''
ROOT.gInterpreter.Declare(getVector_code)
d2 = d_cut.Define("b1_square", "b1 * b1") \
.Define("b2_vector", "getVector( b2 )")
# ## Write it to disk in ROOT format
# We now write to disk a new dataset with one of the variables originally
# present in the tree and the new variables.
# The user can explicitly specify the types of the columns as template
# arguments of the Snapshot method, otherwise they will be automatically
# inferred.
d2.Snapshot(treeName, outFileName, \
["b1", "b1_square", "b2_vector"])
# Open the new file and list the columns of the tree
f1 = ROOT.TFile(outFileName)
t = f1[treeName]
print("These are the columns b1, b1_square and b2_vector:")
for branch in t.GetListOfBranches():
print("Branch: %s" %branch.GetName())
f1.Close()
# We are not forced to write the full set of column names. We can also
# specify a regular expression for that. In case nothing is specified, all
# columns are persistified.
d2.Snapshot(treeName, outFileNameAllColumns)
# Open the new file and list the columns of the tree
f2 = ROOT.TFile(outFileNameAllColumns)
t = f2[treeName]
print("These are all the columns available to this dataframe:")
for branch in t.GetListOfBranches():
print("Branch: %s" %branch.GetName())
f2.Close()
# We can also get a fresh RDataFrame out of the snapshot and restart the
# analysis chain from it.
snapshot_df = d2.Snapshot(treeName, outFileName, ["b1_square"]);
h = snapshot_df.Histo1D("b1_square")
c = ROOT.TCanvas()
h.Draw()
c.SaveAs("df007_snapshot.png")
print("Saved figure to df007_snapshot.png")