Skip to content

Commit 24a069b

Browse files
feat: support input files on public EOS (#185)
* add new config option to support reading input files on EOS via xrootd
1 parent 6409b67 commit 24a069b

File tree

3 files changed

+25
-13
lines changed

3 files changed

+25
-13
lines changed

analyses/cms-open-data-ttbar/config.yaml

+15-10
Original file line numberDiff line numberDiff line change
@@ -2,35 +2,40 @@ global:
22

33
# ServiceX: ignore cache with repeated queries
44
SERVICEX_IGNORE_CACHE: false
5-
5+
66
# analysis facility: set to "coffea_casa" for coffea-casa environments, "EAF" for FNAL, "local" for local setups
77
AF: coffea_casa
8-
8+
99
benchmarking:
1010

1111
# chunk size to use
1212
CHUNKSIZE: 500000
13-
13+
14+
# read files from public EOS (thanks to the CMS DPOA team!)
15+
# note that they are likely only available temporarily
16+
# and not part of an official CMS Open Data release
17+
INPUT_FROM_EOS: false
18+
1419
# metadata to propagate through to metrics
1520
# "ssl-dev" allows for the switch to local data on /data
1621
AF_NAME: coffea_casa
17-
22+
1823
# currently has no effect
1924
SYSTEMATICS: all
20-
25+
2126
# does not do anything, only used for metric gathering (set to 2 for distributed coffea-casa)
2227
CORES_PER_WORKER: 2
23-
28+
2429
# scaling for local setups with FuturesExecutor
2530
NUM_CORES: 4
26-
31+
2732
# only I/O, all other processing disabled
2833
DISABLE_PROCESSING: false
29-
34+
3035
# read additional branches (only with DISABLE_PROCESSING = True)
3136
# acceptable values are 4.1, 15, 25, 50 (corresponding to % of file read), 4.1% corresponds to the standard branches used in the notebook
3237
IO_FILE_PERCENT: '4.1'
33-
38+
3439
# nanoAOD branches that correspond to different values of IO_FILE_PERCENT
3540
IO_BRANCHES:
3641
'4.1':
@@ -79,4 +84,4 @@ benchmarking:
7984
- LHEPart_mass
8085
- Jet_qgl
8186
- Jet_muonSubtrFactor
82-
- Jet_puIdDisc
87+
- Jet_puIdDisc

analyses/cms-open-data-ttbar/ttbar_analysis_pipeline.ipynb

+6-1
Original file line numberDiff line numberDiff line change
@@ -387,7 +387,12 @@
387387
}
388388
],
389389
"source": [
390-
"fileset = utils.construct_fileset(N_FILES_MAX_PER_SAMPLE, use_xcache=False, af_name=config[\"benchmarking\"][\"AF_NAME\"]) # local files on /data for ssl-dev\n",
390+
"fileset = utils.construct_fileset(\n",
391+
" N_FILES_MAX_PER_SAMPLE,\n",
392+
" use_xcache=False,\n",
393+
" af_name=config[\"benchmarking\"][\"AF_NAME\"],\n",
394+
" input_from_eos=config[\"benchmarking\"][\"INPUT_FROM_EOS\"]\n",
395+
" ) # local files on /data for ssl-dev as af_name\n",
391396
"\n",
392397
"print(f\"processes in fileset: {list(fileset.keys())}\")\n",
393398
"print(f\"\\nexample of information in fileset:\\n{{\\n 'files': [{fileset['ttbar__nominal']['files'][0]}, ...],\")\n",

analyses/cms-open-data-ttbar/utils/__init__.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ def set_style():
4949
plt.rcParams['text.color'] = "222222"
5050

5151

52-
def construct_fileset(n_files_max_per_sample, use_xcache=False, af_name=""):
52+
def construct_fileset(n_files_max_per_sample, use_xcache=False, af_name="", input_from_eos=False):
5353
# using https://atlas-groupdata.web.cern.ch/atlas-groupdata/dev/AnalysisTop/TopDataPreparation/XSection-MC15-13TeV.data
5454
# for reference
5555
# x-secs are in pb
@@ -80,9 +80,11 @@ def construct_fileset(n_files_max_per_sample, use_xcache=False, af_name=""):
8080
file_paths = [f["path"] for f in file_list]
8181
if use_xcache:
8282
file_paths = [f.replace("https://xrootd-local.unl.edu:1094", "root://red-xcache1.unl.edu") for f in file_paths]
83-
if af_name == "ssl-dev":
83+
elif af_name == "ssl-dev":
8484
# point to local files on /data
8585
file_paths = [f.replace("https://xrootd-local.unl.edu:1094//store/user/", "/data/alheld/") for f in file_paths]
86+
elif input_from_eos:
87+
file_paths = [f.replace("https://xrootd-local.unl.edu:1094//store/user/AGC/nanoAOD", "root://eospublic.cern.ch//eos/opendata/cms/upload/agc/1.0.0/") for f in file_paths]
8688
nevts_total = sum([f["nevts"] for f in file_list])
8789
metadata = {"process": process, "variation": variation, "nevts": nevts_total, "xsec": xsec_info[process]}
8890
fileset.update({f"{process}__{variation}": {"files": file_paths, "metadata": metadata}})

0 commit comments

Comments
 (0)