Skip to content

Commit 464823c

Browse files
RCAL-1175: Implement multiband associations. (#2077)
1 parent 25bf019 commit 464823c

File tree

6 files changed

+241
-0
lines changed

6 files changed

+241
-0
lines changed

changes/2077.associations.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Implemented MultibandAssociation class to create multiband catalog association files.

docs/roman/associations/commands.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,3 +10,4 @@ Association Commands
1010
skycell_asn.rst
1111
mk_skycell_list.rst
1212
mk_skycell_asn_from_skycell_list.rst
13+
multiband_asn.rst
Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
2+
.. _multiband_asn:
3+
4+
multiband_asn
5+
=============
6+
7+
Create multiband associations using either the command line tool
8+
``multiband_asn`` or through the Python API using
9+
:class:`romancal.associations.multiband_asn.MultibandAssociation`.
10+
11+
Multiband Associations
12+
^^^^^^^^^^^^^^^^^^^^^^
13+
14+
This module groups input files by their skycell identifier, creating an
15+
association file for each unique skycell. Each association file contains all
16+
filters (observations) that were used for that specific skycell, allowing for
17+
multiband data products to be generated per skycell. This enables efficient
18+
organization and processing of data across multiple filters for the same region
19+
of the sky.
20+
21+
To create a multiband association, use the following command:
22+
23+
.. code-block:: bash
24+
25+
multiband_asn r00001_*full*_coadd.asdf
26+
27+
where the input files are in the current directory and the wildcard expands to
28+
all relevant files. The tool will group files by their skycell identifier and
29+
generate association files for each group. To get a complete list of options
30+
you can run the command with the ``-h`` option:
31+
32+
.. code-block:: bash
33+
34+
multiband_asn -h
35+
36+
The input filenames should follow the convention:
37+
38+
.. code-block:: text
39+
40+
rPPPPP_<data_release_id>_<product_type>_<skycell_id>_coadd.asdf
41+
42+
Where:
43+
PPPPP = Program number
44+
data_release_id = Data release identifier (e.g., 'p' for prompt)
45+
product_type = Product type (e.g., 'full')
46+
skycell_id = Skycell identifier (e.g., '270p65x48y69')
47+
48+
The association files will be JSON files named:
49+
50+
.. code-block:: text
51+
52+
rPPPPP_<data_release_id>_<product_type>_<skycell_id>_asn.json
53+
54+
For example, to generate associations for all skycells in a program:
55+
56+
.. code-block:: bash
57+
58+
multiband_asn r00001_*full*_coadd.asdf
59+
60+
You can also use the Python API:
61+
62+
.. code-block:: python
63+
64+
from romancal.associations.multiband_asn import MultibandAssociation
65+
files = ["r00001_p_full_270p65x48y69_coadd.asdf", ...]
66+
multiband = MultibandAssociation(files)
67+
multiband.create_multiband_asn()
68+
69+
The data release ID and product type are extracted from the filenames.
70+
Association files are generated for each skycell group.

pyproject.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,7 @@ roman_set_velocity_aberration = "romancal.scripts.set_velocity_aberration:main"
9191
roman_static_preview = "romancal.scripts.static_preview:command"
9292
roman_v1_calculate = "romancal.scripts.v1_calculate:main"
9393
skycell_asn = "romancal.associations.skycell_asn:_cli"
94+
multiband_asn = "romancal.associations.multiband_asn:_cli"
9495

9596
[build-system]
9697
requires = [
Lines changed: 115 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,115 @@
1+
import argparse
2+
import glob
3+
import logging
4+
import re
5+
6+
from . import asn_from_list
7+
8+
# Configure logging
9+
logger = logging.getLogger(__name__)
10+
logger.addHandler(logging.NullHandler())
11+
logger.setLevel("INFO")
12+
13+
__all__ = ["MultibandAssociation"]
14+
15+
16+
class MultibandAssociation:
17+
"""A class to create multiband associations."""
18+
19+
def __init__(self, files):
20+
self.files = self._parse_file_list(files)
21+
self.skycell_groups = self._get_skycell_groups(self.files)
22+
23+
def _parse_file_list(self, files):
24+
"""
25+
Parse a file list, expanding wildcards if present.
26+
27+
If the input list contains a single string with wildcard characters
28+
('*' or '?'), expand it to the matching files using glob. Otherwise,
29+
return the list as is.
30+
31+
Parameters
32+
----------
33+
files : list of str
34+
List of file paths or a single wildcard pattern.
35+
36+
Returns
37+
-------
38+
list of str
39+
List of file paths, expanded if a wildcard was provided.
40+
"""
41+
if len(files) == 1 and any(char in files[0] for char in ["*", "?"]):
42+
return glob.glob(files[0])
43+
return files
44+
45+
def _get_skycell_groups(self, filelist):
46+
"""
47+
Create skycell groups based on the unique skycell identifiers from a list of filenames.
48+
Parameters
49+
----------
50+
filelist : list of str
51+
List of filenames.
52+
Returns
53+
-------
54+
dict
55+
Dictionary mapping skycell identifiers to lists of filenames.
56+
"""
57+
pattern = re.compile(
58+
r".*_(?P<skycells>[0-9p]*x[0-9]*y[0-9]*)_f[0-9]*_coadd\.asdf$"
59+
)
60+
groups = {}
61+
for filename in filelist:
62+
match = pattern.match(filename)
63+
if match:
64+
key = match.group("skycells")
65+
groups.setdefault(key, []).append(filename)
66+
return groups
67+
68+
def create_multiband_asn(self):
69+
"""
70+
Create a multiband association from a list of files.
71+
72+
Parameters:
73+
files (list): List of file paths or pattern to include in the association.
74+
75+
Returns:
76+
dict: The created association.
77+
"""
78+
for skycell_id, filenames in self.skycell_groups.items():
79+
# Get prefixes for all combinations of data_release_id and product_type from filenames
80+
# (r00001_{data_release_id}_{product_type}_{skycell_id}_asn.json)
81+
prefixes = {x.split(skycell_id)[0] for x in filenames}
82+
for prefix in prefixes:
83+
# Get all files that match this prefix (data_release_id + product_type) and skycell
84+
files = [x for x in filenames if x.startswith(f"{prefix}{skycell_id}")]
85+
args = [
86+
*files,
87+
"-o",
88+
f"{prefix}{skycell_id}_asn.json",
89+
"--product-name",
90+
f"{skycell_id}",
91+
"--data-release-id",
92+
prefix.split("_")[1],
93+
]
94+
asn_from_list._cli(args)
95+
96+
97+
def _cli():
98+
parser = argparse.ArgumentParser(
99+
description="Create a multiband association from a list of files",
100+
usage="multiband_asn file1.asdf file2.asdf ... fileN.asdf",
101+
)
102+
parser.add_argument(
103+
"files",
104+
type=str,
105+
nargs="+",
106+
help="List of files to include in the multiband association",
107+
)
108+
109+
args = parser.parse_args()
110+
111+
multiband_asn = MultibandAssociation(args.files)
112+
113+
multiband_asn.create_multiband_asn()
114+
115+
logger.info("Multiband association creation complete.")
Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
import os
2+
3+
from romancal.associations import multiband_asn
4+
5+
6+
def test_parse_file_list_wildcard(tmp_path):
7+
"""Test that _parse_file_list expands wildcards and returns correct file list."""
8+
# Create dummy files
9+
filenames = ["test1.asdf", "test2.asdf"]
10+
for fname in filenames:
11+
(tmp_path / fname).touch()
12+
files = [str(tmp_path / "*.asdf")]
13+
assoc = multiband_asn.MultibandAssociation(files)
14+
parsed_files = assoc._parse_file_list(files)
15+
assert set(os.path.basename(f) for f in parsed_files) == set(filenames)
16+
17+
18+
def test_get_skycell_groups():
19+
"""Test that _get_skycell_groups correctly groups files by skycell id."""
20+
files = [
21+
"r00001_p_full_270p65x48y69_f123_coadd.asdf",
22+
"r00001_p_full_270p65x48y69_f456_coadd.asdf",
23+
"r00001_p_full_271p66x49y70_f123_coadd.asdf",
24+
]
25+
assoc = multiband_asn.MultibandAssociation(files)
26+
groups = assoc._get_skycell_groups(files)
27+
assert set(groups.keys()) == {"270p65x48y69", "271p66x49y70"}
28+
assert set(groups["270p65x48y69"]) == {
29+
"r00001_p_full_270p65x48y69_f123_coadd.asdf",
30+
"r00001_p_full_270p65x48y69_f456_coadd.asdf",
31+
}
32+
33+
34+
def test_parse_file_list_no_wildcard():
35+
"""Test that _parse_file_list returns the input list if no wildcard is present."""
36+
files = ["file1.asdf", "file2.asdf"]
37+
assoc = multiband_asn.MultibandAssociation(files)
38+
assert assoc._parse_file_list(files) == files
39+
40+
41+
def test_get_skycell_groups_empty():
42+
"""Test that _get_skycell_groups returns empty dict when no skycell patterns match."""
43+
files = ["not_a_skycell_file.txt", "another_file.fits"]
44+
assoc = multiband_asn.MultibandAssociation(files)
45+
groups = assoc._get_skycell_groups(files)
46+
assert groups == {}
47+
48+
49+
def test_multiband_association_empty_list():
50+
"""Test MultibandAssociation with an empty file list."""
51+
assoc = multiband_asn.MultibandAssociation([])
52+
assert assoc.files == []
53+
assert assoc.skycell_groups == {}

0 commit comments

Comments
 (0)