diff --git a/changelog/118.feature.rst b/changelog/118.feature.rst new file mode 100644 index 0000000..25951ec --- /dev/null +++ b/changelog/118.feature.rst @@ -0,0 +1 @@ +Added support for ``detector`` and ``wavelength`` search attributes and the ability to filter with them. diff --git a/sunpy_soar/attrs.py b/sunpy_soar/attrs.py index 00f3796..32c463e 100644 --- a/sunpy_soar/attrs.py +++ b/sunpy_soar/attrs.py @@ -129,3 +129,15 @@ def _(wlk, attr, params): # NOQA: ARG001 @walker.add_applier(SOOP) def _(wlk, attr, params): # NOQA: ARG001 params.append(f"soop_name='{attr.value}'") + + +@walker.add_applier(a.Detector) +def _(wlk, attr, params): # NOQA: ARG001 + params.append(f"Detector='{attr.value}'") + + +@walker.add_applier(a.Wavelength) +def _(wlk, attr, params): # NOQA: ARG001 + wavemin = attr.min.value + wavemax = attr.max.value + params.append(f"Wavemin='{wavemin}'+AND+Wavemax='{wavemax}'") diff --git a/sunpy_soar/client.py b/sunpy_soar/client.py index 07943a2..6ed600a 100644 --- a/sunpy_soar/client.py +++ b/sunpy_soar/client.py @@ -1,5 +1,6 @@ import json import pathlib +import re import astropy.table import astropy.units as u @@ -35,6 +36,62 @@ def search(self, *query, **kwargs): # NOQA: ARG002 qrt.hide_keys = ["Data item ID", "Filename"] return qrt + def add_join_to_query(query: list[str], data_table: str, instrument_table: str): + """ + Construct the WHERE, FROM, and SELECT parts of the ADQL query. + + Parameters + ---------- + query : list[str] + List of query items. + data_table : str + Name of the data table. + instrument_table : str + Name of the instrument table. + + Returns + ------- + tuple[str, str, str] + WHERE, FROM, and SELECT parts of the query. + """ + final_query = "" + # Extract wavemin and wavemax individually + wavemin_pattern = re.compile(r"Wavemin='(\d+\.\d+)'") + wavemax_pattern = re.compile(r"Wavemax='(\d+\.\d+)'") + for parameter in query: + wavemin_match = wavemin_pattern.search(parameter) + wavemax_match = wavemax_pattern.search(parameter) + # If the wavemin and wavemax are same that means only one wavelength is given in query. + if wavemin_match and wavemax_match and float(wavemin_match.group(1)) == float(wavemax_match.group(1)): + # For PHI and SPICE, we can specify wavemin and wavemax in the query and get the results. + # For PHI we have wavelength data in both angstrom and nanometer without it being mentioned in the SOAR. + # For SPICE we get data in form of wavemin/wavemax columns, but only for the first spectral window. + # To make sure this data is not misleading to the user we do not return any values for PHI AND SPICE. + parameter = f"Wavelength='{wavemin_match.group(1)}'" + elif wavemin_match and wavemax_match: + parameter = f"Wavemin='{wavemin_match.group(1)}'+AND+h2.Wavemax='{wavemax_match.group(1)}'" + prefix = "h1." if not parameter.startswith("Detector") and not parameter.startswith("Wave") else "h2." + if parameter.startswith("begin_time"): + time_list = parameter.split("+AND+") + final_query += f"h1.{time_list[0]}+AND+h1.{time_list[1]}+AND+" + # As there are no dimensions in STIX, the dimension index need not be included in the query for STIX. + if "stx" not in instrument_table: + # To avoid duplicate rows in the output table, the dimension index is set to 1. + final_query += "h2.dimension_index='1'+AND+" + else: + final_query += f"{prefix}{parameter}+AND+" + + where_part = final_query[:-5] + from_part = f"{data_table} AS h1" + select_part = ( + "h1.instrument, h1.descriptor, h1.level, h1.begin_time, h1.end_time, " + "h1.data_item_id, h1.filesize, h1.filename, h1.soop_name" + ) + if instrument_table: + from_part += f" JOIN {instrument_table} AS h2 USING (data_item_oid)" + select_part += ", h2.detector, h2.wavelength, h2.dimension_index" + return where_part, from_part, select_part + @staticmethod def _construct_payload(query): """ @@ -42,28 +99,53 @@ def _construct_payload(query): Parameters ---------- - payload : dict[str] - Payload to send to the TAP server. + query : list[str] + List of query items. + + Returns + ------- + dict + Payload dictionary to be sent with the query. """ - # Construct ADQL query - url_query = {} - url_query["SELECT"] = "*" - # Assume science data by default - url_query["FROM"] = "v_sc_data_item" + # Default data table + data_table = "v_sc_data_item" + instrument_table = None + # Mapping is established between the SOAR instrument names and its corresponding SOAR instrument table alias. + instrument_mapping = { + "SOLOHI": "SHI", + "EUI": "EUI", + "STIX": "STX", + "SPICE": "SPI", + "PHI": "PHI", + "METIS": "MET", + } + + instrument_name = None for q in query: - if q.startswith("level") and q.split("=")[1][1:3] == "LL": - # Low latency data - url_query["FROM"] = "v_ll_data_item" + if q.startswith("instrument") or q.startswith("descriptor") and not instrument_name: + instrument_name = q.split("=")[1][1:-1].split("-")[0].upper() + elif q.startswith("level") and q.split("=")[1][1:3] == "LL": + data_table = "v_ll_data_item" - url_query["WHERE"] = "+AND+".join(query) - adql_query = "+".join([f"{item}+{url_query[item]}" for item in url_query]) + if instrument_name: + if instrument_name in instrument_mapping: + instrument_name = instrument_mapping[instrument_name] + instrument_table = f"v_{instrument_name.lower()}_sc_fits" + if data_table == "v_ll_data_item" and instrument_table: + instrument_table = instrument_table.replace("_sc_", "_ll_") - return { - "REQUEST": "doQuery", - "LANG": "ADQL", - "FORMAT": "json", - "QUERY": adql_query, - } + # Need to establish join for remote sensing instruments as they have instrument tables in SOAR. + if instrument_name in ["EUI", "MET", "SPI", "PHI", "SHI"]: + where_part, from_part, select_part = SOARClient.add_join_to_query(query, data_table, instrument_table) + else: + from_part = data_table + select_part = "*" + where_part = "+AND+".join(query) + + adql_query = {"SELECT": select_part, "FROM": from_part, "WHERE": where_part} + + adql_query_str = "+".join([f"{key}+{value}" for key, value in adql_query.items()]) + return {"REQUEST": "doQuery", "LANG": "ADQL", "FORMAT": "json", "QUERY": adql_query_str} @staticmethod def _do_search(query): @@ -92,6 +174,7 @@ def _do_search(query): # Do some list/dict wrangling names = [m["name"] for m in r.json()["metadata"]] info = {name: [] for name in names} + for entry in r.json()["data"]: for i, name in enumerate(names): info[name].append(entry[i]) @@ -113,6 +196,10 @@ def _do_search(query): "SOOP Name": info["soop_name"], }, ) + if "detector" in info: + result_table["Detector"] = info["detector"] + if "wavelength" in info: + result_table["Wavelength"] = info["wavelength"] result_table.sort("Start time") return result_table @@ -160,7 +247,7 @@ def _can_handle_query(cls, *query): True if this client can handle the given query. """ required = {a.Time} - optional = {a.Instrument, a.Level, a.Provider, Product, SOOP} + optional = {a.Instrument, a.Detector, a.Wavelength, a.Level, a.Provider, Product, SOOP} if not cls.check_attr_types_in_query(query, required, optional): return False # check to make sure the instrument attr passed is one provided by the SOAR. diff --git a/sunpy_soar/tests/test_sunpy_soar.py b/sunpy_soar/tests/test_sunpy_soar.py index fe37fd9..4a3a301 100644 --- a/sunpy_soar/tests/test_sunpy_soar.py +++ b/sunpy_soar/tests/test_sunpy_soar.py @@ -150,3 +150,121 @@ def test_when_wrong_provider_passed(): provider = a.Provider.noaa res = Fido.search(time & instrument & provider) assert len(res) == 0 + + +def test_search_wavelength_detector_column(): + instrument = a.Instrument("EUI") + time = a.Time("2021-02-01", "2021-02-02") + level = a.Level(1) + product = a.soar.Product("EUI-FSI174-IMAGE") + res = Fido.search(instrument & time & level & product) + assert "Wavelength" in res[0].columns + assert "Detector" in res[0].columns + + +def test_search_detector_instrument_dimension_2(): + # Instruments "EUI", "METIS", "PHI" and "SOLOHI" have two dimensions in the SOAR data. + # Selecting no dimension index in the query results in two identical output rows. + # To avoid repeating data, we have methods to take dimension index=1, which avoids any repetition. + instrument = a.Instrument("EUI") + time = a.Time("2020-03-03", "2020-03-04") + level = a.Level(1) + detector = a.Detector("HRI_EUV") + res = Fido.search(instrument & time & level & detector) + assert "Detector" in res[0].columns + assert res.file_num == 266 + + +def test_search_detector_instrument_dimension_4(): + # The "SPICE" instrument has four dimensions in the SOAR data. As a result, + # selecting no dimension index in the query results in four identical output rows. + # To avoid repeating data, we have methods to take dimension index=1, which avoids any repetition. + instrument = a.Instrument("SPICE") + time = a.Time("2023-03-03 15:00", "2023-03-03 16:00") + level = a.Level(1) + detector = a.Detector("SW") + res = Fido.search(instrument & time & level & detector) + assert "Detector" in res[0].columns + assert res.file_num == 11 + + +def test_invalid_detector(): + instrument = a.Instrument("SPICE") + time = a.Time("2023-03-03 15:00", "2023-03-03 16:00") + level = a.Level(1) + detector = a.Detector("hello") + res = Fido.search(instrument & time & level & detector) + assert "Detector" in res[0].columns + assert res.file_num == 0 + + +def test_wavelength_column_wavelength_exists(): + # For instruments EUI, METIS and SOLOHI "wavelength" column is available. + # Test to check if the "Wavelength" column exists in the search results. + instrument = a.Instrument("EUI") + time = a.Time("2023-04-03 15:00", "2023-04-03 16:00") + level = a.Level(1) + wavelength = a.Wavelength(304 * u.AA) + res = Fido.search(instrument & time & level & wavelength) + assert "Wavelength" in res[0].columns + assert res.file_num == 12 + + +def test_wavelength_single(): + # Test to check if the wavelength value is filtered for a single value provided. + instrument = a.Instrument("EUI") + time = a.Time("2023-04-03 15:00", "2023-04-03 16:00") + level = a.Level(1) + wavelength = a.Wavelength(304 * u.AA) + res = Fido.search(instrument & time & level & wavelength) + for table in res: + assert all(table["Wavelength"] == 304) + + +def test_wavelength_range(): + # Test to check if the wavelength value is filtered for wavemin and wavemax provided. + instrument = a.Instrument("EUI") + time = a.Time("2023-04-03 15:00", "2023-04-03 16:00") + level = a.Level(1) + wavelength = a.Wavelength(171 * u.AA, 185 * u.AA) + res = Fido.search(instrument & time & level & wavelength) + for table in res: + assert all(table["Wavelength"] == 174) + + +def test_join_science_query(): + result = SOARClient._construct_payload( # NOQA: SLF001 + [ + "instrument='EUI'", + "begin_time>='2021-02-01+00:00:00'+AND+begin_time<='2021-02-02+00:00:00'", + "level='L1'", + "descriptor='eui-fsi174-image'", + ] + ) + + assert result["QUERY"] == ( + "SELECT+h1.instrument, h1.descriptor, h1.level, h1.begin_time, h1.end_time, " + "h1.data_item_id, h1.filesize, h1.filename, h1.soop_name, h2.detector, h2.wavelength, " + "h2.dimension_index+FROM+v_sc_data_item AS h1 JOIN v_eui_sc_fits AS h2 USING (data_item_oid)" + "+WHERE+h1.instrument='EUI'+AND+h1.begin_time>='2021-02-01+00:00:00'+AND+h1.begin_time<='2021-02-02+00:00:00'" + "+AND+h2.dimension_index='1'+AND+h1.level='L1'+AND+h1.descriptor='eui-fsi174-image'" + ) + + +def test_join_low_latency_query(): + result = SOARClient._construct_payload( # NOQA: SLF001 + [ + "instrument='EUI'", + "begin_time>='2021-02-01+00:00:00'+AND+begin_time<='2021-02-02+00:00:00'", + "level='LL01'", + "descriptor='eui-fsi174-image'", + ] + ) + + assert result["QUERY"] == ( + "SELECT+h1.instrument, h1.descriptor, h1.level, h1.begin_time, h1.end_time, " + "h1.data_item_id, h1.filesize, h1.filename, h1.soop_name, h2.detector, h2.wavelength, " + "h2.dimension_index+FROM+v_ll_data_item AS h1 JOIN v_eui_ll_fits AS h2 USING (data_item_oid)" + "+WHERE+h1.instrument='EUI'+AND+h1.begin_time>='2021-02-01+00:00:00'+AND+h1.begin_time<='2021-02-02+00:00:00'" + "+AND+h2.dimension_index='1'+AND+h1.level='LL01'+AND+h1.descriptor='eui-fsi174-image'" + )