Skip to content

Commit

Permalink
feat: fixes towards v1.2 (#179)
Browse files Browse the repository at this point in the history
* fix b-tagging threshold comparison
* remove event offsetting by 1e-6 in histograms
* add updated histogram references
  • Loading branch information
alexander-held authored Jul 20, 2023
1 parent a03c3fd commit cce1afb
Show file tree
Hide file tree
Showing 12 changed files with 30,157 additions and 30,159 deletions.
6,696 changes: 3,348 additions & 3,348 deletions analyses/cms-open-data-ttbar/reference/histos_100_file_per_process.json

Large diffs are not rendered by default.

6,696 changes: 3,348 additions & 3,348 deletions analyses/cms-open-data-ttbar/reference/histos_10_file_per_process.json

Large diffs are not rendered by default.

6,696 changes: 3,348 additions & 3,348 deletions analyses/cms-open-data-ttbar/reference/histos_1_file_per_process.json

Large diffs are not rendered by default.

6,696 changes: 3,348 additions & 3,348 deletions analyses/cms-open-data-ttbar/reference/histos_200_file_per_process.json

Large diffs are not rendered by default.

6,696 changes: 3,348 additions & 3,348 deletions analyses/cms-open-data-ttbar/reference/histos_20_file_per_process.json

Large diffs are not rendered by default.

6,696 changes: 3,348 additions & 3,348 deletions analyses/cms-open-data-ttbar/reference/histos_2_file_per_process.json

Large diffs are not rendered by default.

6,696 changes: 3,348 additions & 3,348 deletions analyses/cms-open-data-ttbar/reference/histos_50_file_per_process.json

Large diffs are not rendered by default.

6,696 changes: 3,348 additions & 3,348 deletions analyses/cms-open-data-ttbar/reference/histos_5_file_per_process.json

Large diffs are not rendered by default.

6,696 changes: 3,348 additions & 3,348 deletions analyses/cms-open-data-ttbar/reference/histos_all_file_per_process.json

Large diffs are not rendered by default.

38 changes: 19 additions & 19 deletions analyses/cms-open-data-ttbar/ttbar_analysis_pipeline.ipynb

Large diffs are not rendered by default.

12 changes: 6 additions & 6 deletions analyses/cms-open-data-ttbar/ttbar_analysis_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -212,7 +212,7 @@ def process(self, events):
event_filters = event_filters & (ak.count(selected_jets.pt * pt_var_modifier, axis=1) >= 4)
# at least one b-tagged jet ("tag" means score above threshold)
B_TAG_THRESHOLD = 0.5
event_filters = event_filters & (ak.sum(selected_jets.btagCSVV2 >= B_TAG_THRESHOLD, axis=1) >= 1)
event_filters = event_filters & (ak.sum(selected_jets.btagCSVV2 > B_TAG_THRESHOLD, axis=1) >= 1)

# apply event filters
selected_events = events[event_filters]
Expand All @@ -223,7 +223,7 @@ def process(self, events):
for region in ["4j1b", "4j2b"]:
# further filtering: 4j1b CR with single b-tag, 4j2b SR with two or more tags
if region == "4j1b":
region_filter = ak.sum(selected_jets.btagCSVV2 >= B_TAG_THRESHOLD, axis=1) == 1
region_filter = ak.sum(selected_jets.btagCSVV2 > B_TAG_THRESHOLD, axis=1) == 1
selected_jets_region = selected_jets[region_filter]
# use HT (scalar sum of jet pT) as observable
pt_var_modifier = (
Expand Down Expand Up @@ -333,12 +333,12 @@ def postprocess(self, accumulator):
def get_query(source: ObjectStream) -> ObjectStream:
"""Query for event / column selection: >=4j >=1b, ==1 lep with pT>25 GeV, return relevant columns
"""
return source.Where(lambda e: e.Electron_pt.Where(lambda pt: pt > 25).Count()
return source.Where(lambda e: e.Electron_pt.Where(lambda pt: pt > 25).Count()
+ e.Muon_pt.Where(lambda pt: pt > 25).Count() == 1)\
.Where(lambda f: f.Jet_pt.Where(lambda pt: pt > 25).Count() >= 4)\
.Where(lambda g: {"pt": g.Jet_pt,
"btagCSVV2": g.Jet_btagCSVV2}.Zip().Where(lambda jet:
jet.btagCSVV2 >= 0.5
.Where(lambda g: {"pt": g.Jet_pt,
"btagCSVV2": g.Jet_btagCSVV2}.Zip().Where(lambda jet:
jet.btagCSVV2 > 0.5
and jet.pt > 25).Count() >= 1)\
.Select(lambda h: {"Electron_pt": h.Electron_pt,
"Muon_pt": h.Muon_pt,
Expand Down
2 changes: 0 additions & 2 deletions analyses/cms-open-data-ttbar/utils/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,8 +93,6 @@ def construct_fileset(n_files_max_per_sample, use_xcache=False, af_name=""):
def save_histograms(all_histograms, fileset, filename):
nominal_samples = [sample for sample in fileset.keys() if "nominal" in sample]

all_histograms += 1e-6 # add minimal event count to all bins to avoid crashes when processing a small number of samples

pseudo_data = (all_histograms[:, :, "ttbar", "ME_var"] + all_histograms[:, :, "ttbar", "PS_var"]) / 2 + all_histograms[:, :, "wjets", "nominal"]

with uproot.recreate(filename) as f:
Expand Down

0 comments on commit cce1afb

Please sign in to comment.