-
Notifications
You must be signed in to change notification settings - Fork 1.3k
/
Copy pathdf105_WBosonAnalysis.py
226 lines (199 loc) · 8.6 KB
/
df105_WBosonAnalysis.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
## \file
## \ingroup tutorial_dataframe
## \notebook -draw
## The W boson mass analysis from the ATLAS Open Data release of 2020, with RDataFrame.
##
## This tutorial is the analysis of the W boson mass taken from the ATLAS Open Data release in 2020
## (http://opendata.atlas.cern/release/2020/documentation/). The data was recorded with the ATLAS detector
## during 2016 at a center-of-mass energy of 13 TeV. W bosons are produced frequently at the LHC and
## are an important background to studies of Standard Model processes, for example the Higgs boson analyses.
##
## The analysis is translated to a RDataFrame workflow processing up to 60 GB of simulated events and data.
## By default the analysis runs on a preskimmed dataset to reduce the runtime. The full dataset can be used with
## the --full-dataset argument and you can also run only on a fraction of the original dataset using the argument --lumi-scale.
##
## See the [corresponding spec json file](https://github.com/root-project/root/blob/master/tutorials/analysis/dataframe/df105_WBosonAnalysis.json).
##
## \macro_image
## \macro_code
## \macro_output
##
## \date March 2020
## \author Stefan Wunsch (KIT, CERN)
import ROOT
import sys
import json
import argparse
import os
# Argument parsing
parser = argparse.ArgumentParser()
parser.add_argument("--lumi-scale", type=float, default=0.001,
help="Run only on a fraction of the total available 10 fb^-1 (only usable together with --full-dataset)")
parser.add_argument("--full-dataset", action="store_true", default=False,
help="Use the full dataset (use --lumi-scale to run only on a fraction of it)")
parser.add_argument("-b", action="store_true", default=False, help="Use ROOT batch mode")
parser.add_argument("-t", action="store_true", default=False, help="Use implicit multi threading (for the full dataset only possible with --lumi-scale 1.0)")
if 'df105_WBosonAnalysis.py' in sys.argv[0]:
# Script
args = parser.parse_args()
else:
# Notebook
args = parser.parse_args(args=[])
if args.b: ROOT.gROOT.SetBatch(True)
if args.t: ROOT.EnableImplicitMT()
if not args.full_dataset: lumi_scale = 0.001 # The preskimmed dataset contains only 0.01 fb^-1
else: lumi_scale = args.lumi_scale
lumi = 10064.0
print('Run on data corresponding to {:.2f} fb^-1 ...'.format(lumi * lumi_scale / 1000.0))
if args.full_dataset: dataset_path = "root://eospublic.cern.ch//eos/opendata/atlas/OutreachDatasets/2020-01-22"
else: dataset_path = "root://eospublic.cern.ch//eos/root-eos/reduced_atlas_opendata/w"
# Create a ROOT dataframe for each dataset
# Note that we load the filenames from the external json file placed in the same folder than this script.
files = json.load(open(os.path.join(ROOT.gROOT.GetTutorialsDir(), "analysis/dataframe/df105_WBosonAnalysis.json")))
processes = files.keys()
df = {}
xsecs = {}
sumws = {}
samples = []
for p in processes:
for d in files[p]:
# Construct the dataframes
folder = d[0] # Folder name
sample = d[1] # Sample name
xsecs[sample] = d[2] # Cross-section
sumws[sample] = d[3] # Sum of weights
num_events = d[4] # Number of events
samples.append(sample)
df[sample] = ROOT.RDataFrame("mini", "{}/1lep/{}/{}.1lep.root".format(dataset_path, folder, sample))
# Scale down the datasets if requested
if args.full_dataset and lumi_scale < 1.0:
df[sample] = df[sample].Range(int(num_events * lumi_scale))
# Select events for the analysis
# Just-in-time compile custom helper function performing complex computations
ROOT.gInterpreter.Declare("""
bool GoodElectronOrMuon(int type, float pt, float eta, float phi, float e, float trackd0pv, float tracksigd0pv, float z0)
{
ROOT::Math::PtEtaPhiEVector p(pt / 1000.0, eta, phi, e / 1000.0);
if (abs(z0 * sin(p.theta())) > 0.5) return false;
if (type == 11 && abs(eta) < 2.46 && (abs(eta) < 1.37 || abs(eta) > 1.52)) {
if (abs(trackd0pv / tracksigd0pv) > 5) return false;
return true;
}
if (type == 13 && abs(eta) < 2.5) {
if (abs(trackd0pv / tracksigd0pv) > 3) return false;
return true;
}
return false;
}
""")
for s in samples:
# Select events with a muon or electron trigger and with a missing transverse energy larger than 30 GeV
df[s] = df[s].Filter("trigE || trigM")\
.Filter("met_et > 30000")
# Find events with exactly one good lepton
df[s] = df[s].Define("good_lep", "lep_isTightID && lep_pt > 35000 && lep_ptcone30 / lep_pt < 0.1 && lep_etcone20 / lep_pt < 0.1")\
.Filter("ROOT::VecOps::Sum(good_lep) == 1")
# Apply additional cuts in case the lepton is an electron or muon
df[s] = df[s].Define("idx", "ROOT::VecOps::ArgMax(good_lep)")\
.Filter("GoodElectronOrMuon(lep_type[idx], lep_pt[idx], lep_eta[idx], lep_phi[idx], lep_E[idx], lep_trackd0pvunbiased[idx], lep_tracksigd0pvunbiased[idx], lep_z0[idx])")
# Apply luminosity, scale factors and MC weights for simulated events
for s in samples:
if "data" in s:
df[s] = df[s].Define("weight", "1.0")
else:
df[s] = df[s].Define("weight", "scaleFactor_ELE * scaleFactor_MUON * scaleFactor_LepTRIGGER * scaleFactor_PILEUP * mcWeight * {} / {} * {}".format(xsecs[s], sumws[s], lumi))
# Compute transverse mass of the W boson using the lepton and the missing transverse energy and make a histogram
ROOT.gInterpreter.Declare("""
float ComputeTransverseMass(float met_et, float met_phi, float lep_pt, float lep_eta, float lep_phi, float lep_e)
{
ROOT::Math::PtEtaPhiEVector met(met_et, 0, met_phi, met_et);
ROOT::Math::PtEtaPhiEVector lep(lep_pt, lep_eta, lep_phi, lep_e);
return (met + lep).Mt() / 1000.0;
}
""")
histos = {}
for s in samples:
df[s] = df[s].Define("mt_w", "ComputeTransverseMass(met_et, met_phi, lep_pt[idx], lep_eta[idx], lep_phi[idx], lep_E[idx])")
histos[s] = df[s].Histo1D(ROOT.RDF.TH1DModel(s, "mt_w", 24, 60, 180), "mt_w", "weight")
# Run the event loop and merge histograms of the respective processes
# RunGraphs allows to run the event loops of the separate RDataFrame graphs
# concurrently. This results in an improved usage of the available resources
# if each separate RDataFrame can not utilize all available resources, e.g.,
# because not enough data is available.
ROOT.RDF.RunGraphs([histos[s] for s in samples])
def merge_histos(label):
h = None
for i, d in enumerate(files[label]):
t = histos[d[1]].GetValue()
if i == 0: h = t.Clone()
else: h.Add(t)
h.SetNameTitle(label, label)
return h
data = merge_histos("data")
wjets = merge_histos("wjets")
zjets = merge_histos("zjets")
ttbar = merge_histos("ttbar")
diboson = merge_histos("diboson")
singletop = merge_histos("singletop")
# Create the plot
# Set styles
ROOT.gROOT.SetStyle("ATLAS")
# Create canvas
c = ROOT.TCanvas("c", "", 600, 600)
c.SetTickx(0)
c.SetTicky(0)
c.SetLogy()
# Draw stack with MC contributions
stack = ROOT.THStack()
for h, color in zip(
[singletop, diboson, ttbar, zjets, wjets],
[(0.82, 0.94, 0.76), (0.76, 0.54, 0.57), (0.61, 0.6, 0.8), (0.97, 0.81, 0.41), (0.87, 0.35, 0.42)]):
h.SetLineWidth(1)
h.SetLineColor("black")
h.SetFillColor(ROOT.TColor.GetColor(*color))
# From Python 3.11 onward, implicit conversion of the tuple works too:
# h.SetFillColor(color)
stack.Add(h)
stack.Draw("HIST")
stack.GetXaxis().SetLabelSize(0.04)
stack.GetXaxis().SetTitleSize(0.045)
stack.GetXaxis().SetTitleOffset(1.3)
stack.GetXaxis().SetTitle("m_{T}^{W#rightarrow l#nu} [GeV]")
stack.GetYaxis().SetTitle("Events")
stack.GetYaxis().SetLabelSize(0.04)
stack.GetYaxis().SetTitleSize(0.045)
stack.SetMaximum(1e10 * args.lumi_scale)
stack.SetMinimum(1)
# Draw data
data.SetMarkerStyle(20)
data.SetMarkerSize(1.2)
data.SetLineWidth(2)
data.SetLineColor("black")
data.Draw("E SAME")
# Add legend
legend = ROOT.TLegend(0.60, 0.65, 0.92, 0.92)
legend.SetTextFont(42)
legend.SetFillStyle(0)
legend.SetBorderSize(0)
legend.SetTextSize(0.04)
legend.SetTextAlign(32)
legend.AddEntry(data, "Data" ,"lep")
legend.AddEntry(wjets, "W+jets", "f")
legend.AddEntry(zjets, "Z+jets", "f")
legend.AddEntry(ttbar, "t#bar{t}", "f")
legend.AddEntry(diboson, "Diboson", "f")
legend.AddEntry(singletop, "Single top", "f")
legend.Draw()
# Add ATLAS label
text = ROOT.TLatex()
text.SetNDC()
text.SetTextFont(72)
text.SetTextSize(0.045)
text.DrawLatex(0.21, 0.86, "ATLAS")
text.SetTextFont(42)
text.DrawLatex(0.21 + 0.16, 0.86, "Open Data")
text.SetTextSize(0.04)
text.DrawLatex(0.21, 0.80, "#sqrt{{s}} = 13 TeV, {:.2f} fb^{{-1}}".format(lumi * args.lumi_scale / 1000.0))
# Save the plot
c.SaveAs("df105_WBosonAnalysis.png")
print("Saved figure to df105_WBosonAnalysis.png")