-
Notifications
You must be signed in to change notification settings - Fork 0
/
histograms.py
147 lines (125 loc) · 5.15 KB
/
histograms.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
# Implementation of the histogramming step of the analysis
#
# The histogramming step produces histograms for each variable in the dataset
# and for each physics process resulting into the final state with a muon and a
# tau. Then, the resulting histograms are passed to the plotting step, which
# combines the histograms so that we can study the physics of the decay.
import argparse
import ROOT
ROOT.gROOT.SetBatch(True)
# Declare the range of the histogram for each variable
#
# Each entry in the dictionary contains of the variable name as key and a tuple
# specifying the histogram layout as value. The tuple sets the number of bins,
# the lower edge and the upper edge of the histogram.
default_nbins = 30
ranges = {
"pt_1": (default_nbins, 17, 70),
"pt_2": (default_nbins, 20, 70),
"eta_1": (default_nbins, -2.1, 2.1),
"eta_2": (default_nbins, -2.3, 2.3),
"phi_1": (default_nbins, -3.14, 3.14),
"phi_2": (default_nbins, -3.14, 3.14),
"iso_1": (default_nbins, 0, 0.10),
"iso_2": (default_nbins, 0, 0.10),
"q_1": (2, -2, 2),
"q_2": (2, -2, 2),
"pt_met": (default_nbins, 0, 60),
"phi_met": (default_nbins, -3.14, 3.14),
"m_1": (default_nbins, 0, 0.2),
"m_2": (default_nbins, 0, 2),
"mt_1": (default_nbins, 0, 100),
"mt_2": (default_nbins, 0, 100),
"dm_2": (11, 0, 11),
"m_vis": (default_nbins, 20, 140),
"pt_vis": (default_nbins, 0, 60),
"jpt_1": (default_nbins, 30, 70),
"jpt_2": (default_nbins, 30, 70),
"jeta_1": (default_nbins, -4.7, 4.7),
"jeta_2": (default_nbins, -4.7, 4.7),
"jphi_1": (default_nbins, -3.14, 3.14),
"jphi_2": (default_nbins, -3.14, 3.14),
"jm_1": (default_nbins, 0, 20),
"jm_2": (default_nbins, 0, 20),
"jbtag_1": (default_nbins, 0, 1.0),
"jbtag_2": (default_nbins, 0, 1.0),
"npv": (25, 5, 30),
"njets": (5, 0, 5),
"mjj": (default_nbins, 0, 400),
"ptjj": (default_nbins, 0, 200),
"jdeta": (default_nbins, -9.4, 9.4),
}
# Book a histogram for a specific variable
def bookHistogram(df, variable, range_):
return df.Histo1D(
ROOT.ROOT.RDF.TH1DModel(variable, variable, range_[0], range_[1], range_[2]),
variable,
"weight",
)
# Write a histogram with a given name to the output ROOT file
def writeHistogram(h, name):
h.SetName(name)
h.Write()
# Apply a selection based on generator information about the tau
#
# See the skimming step for further details about this variable.
def filterGenMatch(df, label):
if label == "ZTT":
return df.Filter("gen_match == true", "Select genuine taus")
elif label == "ZLL":
return df.Filter("gen_match == false", "Select fake taus")
else:
return df
# Main function of the histogramming step
#
# The function loops over the outputs from the skimming step and produces the
# required histograms for the final plotting.
# Note that we perform a set of secondary selections on the skimmed dataset. First,
# we perform a second reduction with the baseline selection to a signal-enriched
# part of the dataset. Second, we select besides the signal region a control region
# which is used to estimate the contribution of QCD events producing the muon-tau
# pair in the final state.
def main(sample, process, output):
# Create output file
tfile = ROOT.TFile(output, "RECREATE")
variables = ranges.keys()
# Process skimmed datasets and produce histograms of variables
print(">>> Process skimmed sample {} for process {}".format(sample, process))
# Load skimmed dataset and apply baseline selection
df = (
ROOT.ROOT.RDataFrame("Events", sample)
.Filter("mt_1<30", "Muon transverse mass cut for W+jets suppression")
.Filter("iso_1<0.1", "Require isolated muon for signal region")
)
# Book histograms for the signal region
df1 = df.Filter("q_1*q_2<0", "Require opposited charge for signal region")
df1 = filterGenMatch(df1, process)
hists = {}
for variable in variables:
hists[variable] = bookHistogram(df1, variable, ranges[variable])
report1 = df1.Report()
# Book histograms for the control region used to estimate the QCD contribution
df2 = df.Filter("q_1*q_2>0", "Control region for QCD estimation")
df2 = filterGenMatch(df2, process)
hists_cr = {}
for variable in variables:
hists_cr[variable] = bookHistogram(df2, variable, ranges[variable])
report2 = df2.Report()
# Write histograms to output file
for variable in variables:
writeHistogram(hists[variable], "{}_{}".format(process, variable))
for variable in variables:
writeHistogram(hists_cr[variable], "{}_{}_cr".format(process, variable))
# Print cut-flow report
print("Cut-flow report (signal region):")
report1.Print()
print("Cut-flow report (control region):")
report2.Print()
tfile.Close()
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("sample", type=str, help="Full path to skimmed sample")
parser.add_argument("process", type=str, help="Process name")
parser.add_argument("output", type=str, help="Output file with histograms")
args = parser.parse_args()
main(args.sample, args.process, args.output)