Skip to content

Commit

Permalink
Merge pull request #238 from asfadmin/topic-dataset-constants
Browse files Browse the repository at this point in the history
platform/processingLevel concept-id aliasing, DATASET Constants, constants no longer top level imports
  • Loading branch information
SpicyGarlicAlbacoreRoll authored Jan 24, 2024
2 parents 1efbbe0 + 927d621 commit 2bf6b21
Show file tree
Hide file tree
Showing 16 changed files with 655 additions and 106 deletions.
3 changes: 3 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -28,10 +28,13 @@ and uses [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
------
## [v.7.0.0](https://github.com/asfadmin/Discovery-asf_search/compare/v6.7.3...v.7.0.0)
### Added
- Added `collectionAlias` to `ASFSearchOptions` validator map as config param. Set to `False` to disable concept-id aliasing behaviour for `processingLevel` and `platform`.
- Adds warning when scenes in stack are missing state vectors, and logs baseline warnings with `ASF_LOGGER`
- Adds `OPERA-S1-CALIBRATION` entry to `dataset_collections` and corresponding `OPERA_S1_CALIBRATION` constant to `DATASET.py`, used to search for OPERA-S1 `CSLC` and `RTC` calibration data.

### Changed
- Constants are no longer top level import, are now accessible through respective modules
- `processingLevel` and `platform` are now aliased by collection concept-ids, (lists of concept ids by their processing levels/platforms viewable in `dataset.py`), improving search performance and dodging subquery system
- Baseline stacking no longer excludes products with missing state vectors from final stack, like SearchAPI
- `OPERA-S1` dataset no longer includes calibration data (moved to new dataset)
- Adds optional `ASFSession` constructor keyword arguments for new class variables:
Expand Down
1 change: 1 addition & 0 deletions asf_search/ASFSearchOptions/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,5 @@
'host': INTERNAL.CMR_HOST,
'provider': INTERNAL.DEFAULT_PROVIDER,
'session': ASFSession(),
'collectionAlias': True
}
3 changes: 2 additions & 1 deletion asf_search/ASFSearchOptions/validator_map.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,5 +66,6 @@ def validate(key, value):
# Config parameters Parser
'session': parse_session,
'host': parse_string,
'provider': parse_string
'provider': parse_string,
'collectionAlias': bool,
}
2 changes: 1 addition & 1 deletion asf_search/CMR/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,4 @@
from .subquery import build_subqueries
from .translate import translate_product, translate_opts, get_additional_fields
from .field_map import field_map
from .datasets import dataset_collections
from .datasets import dataset_collections, collections_per_platform, collections_by_processing_level, get_concept_id_alias, get_dataset_concept_ids
46 changes: 43 additions & 3 deletions asf_search/CMR/datasets.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
from typing import List


dataset_collections = {
"SENTINEL-1": {
"SENTINEL-1A_SLC": ["C1214470488-ASF", "C1205428742-ASF", "C1234413245-ASFDEV"],
Expand Down Expand Up @@ -327,7 +330,7 @@
}

collections_per_platform = {
"Sentinel-1A": [
"SENTINEL-1A": [
"C1214470488-ASF",
"C1214470533-ASF",
"C1214470576-ASF",
Expand Down Expand Up @@ -413,7 +416,7 @@
"C1244598379-ASFDEV",
"C1240784657-ASFDEV",
],
"Sentinel-1B": [
"SENTINEL-1B": [
"C1327985661-ASF",
"C1327985645-ASF",
"C1595422627-ASF",
Expand Down Expand Up @@ -729,7 +732,7 @@
}


collections_by_processing_level: {
collections_by_processing_level = {
"SLC": [
"C1214470488-ASF",
"C1205428742-ASF",
Expand Down Expand Up @@ -1071,3 +1074,40 @@
"SLOPE": ["C1214408428-ASF", "C1210599503-ASF"],
"STOKES": ["C1214419355-ASF", "C1210599673-ASF"],
}

#################### Helper Methods ####################

def get_concept_id_alias(param_list: List[str], collections_dict: dict) -> List[str]:
"""
param: param_list (List[str]): list of search values to alias
param: collections_dict (dict): The search value to concept-id dictionary to read from
returns List[str]: Returns a list of concept-ids that correspond to the given list of search values
If any of the search values are not keys in the collections_dict, this will instead returns an empty list.
"""
concept_id_aliases = []
for param in param_list:
if alias := collections_dict.get(param):
concept_id_aliases.extend(alias)
else:
return []

return concept_id_aliases

def get_dataset_concept_ids(datasets: List[str]) -> List[str]:
"""
Returns concept-ids for provided dataset(s)
If an invalid datset is provided a ValueError is raised
:param `datasets` (`List[str]`): a list of datasets to grab concept-ids for
:returns `List[str]`: the list of concept-ids associated with the given datasets
"""
output = []
for dataset in datasets:
if collections_by_short_name := dataset_collections.get(dataset):
for concept_ids in collections_by_short_name.values():
output.extend(concept_ids)
else:
raise ValueError(f'Could not find dataset named "{dataset}" provided for dataset keyword.')

return output
112 changes: 83 additions & 29 deletions asf_search/CMR/subquery.py
Original file line number Diff line number Diff line change
@@ -1,57 +1,111 @@
from typing import List
from typing import List, Optional, Tuple
import itertools
from copy import copy

from asf_search.ASFSearchOptions import ASFSearchOptions
from asf_search.constants import CMR_PAGE_SIZE

from asf_search.CMR.datasets import collections_by_processing_level, collections_per_platform, dataset_collections, get_concept_id_alias, get_dataset_concept_ids
from numpy import intersect1d, union1d

def build_subqueries(opts: ASFSearchOptions) -> List[ASFSearchOptions]:
"""
Build a list of sub-queries using the cartesian product of all the list parameters described by opts
:param opts: The search options to split into sub-queries
:return list: A list of ASFSearchOptions objects
"""
params = dict(opts)

# Break out two big list offenders into manageable chunks
if params.get('granule_list') is not None:
params['granule_list'] = chunk_list(params['granule_list'], CMR_PAGE_SIZE)
if params.get('product_list') is not None:
params['product_list'] = chunk_list(params['product_list'], CMR_PAGE_SIZE)
for chunked_key in ['granule_list', 'product_list']:
if params.get(chunked_key) is not None:
params[chunked_key] = chunk_list(params[chunked_key], CMR_PAGE_SIZE)

list_param_names = ['platform', 'season', 'collections', 'dataset'] # these parameters will dodge the subquery system
skip_param_names = ['maxResults']# these params exist in opts, but shouldn't be passed on to subqueries at ALL

params = dict([ (k, v) for k, v in params.items() if k not in skip_param_names ])

collections, aliased_keywords = get_keyword_concept_ids(params, opts.collectionAlias)
params['collections'] = list(union1d(collections, params.get('collections', [])))

for keyword in [*skip_param_names, *aliased_keywords]:
params.pop(keyword, None)

subquery_params, list_params = {}, {}
for k, v in params.items():
if k in list_param_names:
list_params[k] = v
for key, value in params.items():
if key in list_param_names:
list_params[key] = value
else:
subquery_params[k] = v

subquery_params[key] = value
sub_queries = cartesian_product(subquery_params)
return [_build_subquery(query, opts, list_params) for query in sub_queries]

final_sub_query_opts = []
for query in sub_queries:
q = dict()
for p in query:
q.update(p)
q['provider'] = opts.provider
q['host'] = opts.host
q['session'] = copy(opts.session)
for key in list_params.keys():
q[key] = list_params[key]

final_sub_query_opts.append(ASFSearchOptions(**q))

return final_sub_query_opts
def _build_subquery(query: List[Tuple[dict]], opts: ASFSearchOptions, list_params: dict) -> ASFSearchOptions:
"""
Composes query dict and list params into new ASFSearchOptions object
param: query: the cartesian search query options
param: opts: the search options to pull config options from (provider, host, session)
param: list_params: the subquery parameters
"""
q = dict()
for p in query:
q.update(p)
return ASFSearchOptions(
**q,
provider= opts.provider,
host= opts.host,
session= copy(opts.session),
**list_params
)

def get_keyword_concept_ids(params: dict, use_collection_alias: bool=True) -> dict:
"""
Gets concept-ids for dataset, platform, processingLevel keywords
processingLevel is scoped by dataset or platform concept-ids when available
: param params: search parameter dictionary pre-CMR translation
: param use_collection_alias: whether or not to alias platform and processingLevel with concept-ids
: returns two lists:
- list of concept-ids for dataset, platform, and processingLevel
- list of aliased keywords to remove from final parameters
"""
collections = []
aliased_keywords = []

if use_collection_alias:
if 'processingLevel' in params.keys():
collections = get_concept_id_alias(params.get('processingLevel'), collections_by_processing_level)
if len(collections):
aliased_keywords.append('processingLevel')

if 'platform' in params.keys():
platform_concept_ids = get_concept_id_alias(
[platform.upper() for platform in params.get('platform')],
collections_per_platform
)
if len(platform_concept_ids):
aliased_keywords.append('platform')
collections = _get_intersection(platform_concept_ids, collections)

if 'dataset' in params.keys():
aliased_keywords.append('dataset')
dataset_concept_ids = get_dataset_concept_ids(params.get('dataset'))
collections = _get_intersection(dataset_concept_ids, collections)

return collections, aliased_keywords

def _get_intersection(keyword_concept_ids: List[str], intersecting_ids: List[str]) -> List[str]:
"""
Returns the intersection between two lists. If the second list is empty the first list
is return unchaged
"""
if len(intersecting_ids):
return list(intersect1d(intersecting_ids, keyword_concept_ids))

return keyword_concept_ids

def chunk_list(source: List, n: int) -> List:
"""
Breaks a longer list into a list of lists, each of length n
Expand All @@ -70,7 +124,7 @@ def cartesian_product(params):
return p


def format_query_params(params):
def format_query_params(params) -> List[List[dict]]:
listed_params = []

for param_name, param_val in params.items():
Expand All @@ -80,7 +134,7 @@ def format_query_params(params):
return listed_params


def translate_param(param_name, param_val):
def translate_param(param_name, param_val) -> List[dict]:
param_list = []

if not isinstance(param_val, list):
Expand Down
23 changes: 3 additions & 20 deletions asf_search/CMR/translate.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,14 @@
from datetime import datetime
from typing import Any, Dict, List
from asf_search.ASFSearchOptions import ASFSearchOptions
from asf_search.CMR.datasets import get_concept_id_alias
from asf_search.constants import CMR_PAGE_SIZE
import re
from shapely import wkt
from shapely.geometry import Polygon
from shapely.geometry.base import BaseGeometry
from .field_map import field_map
from .datasets import dataset_collections
from .datasets import dataset_collections, collections_per_platform

import logging

Expand Down Expand Up @@ -48,19 +49,6 @@ def translate_opts(opts: ASFSearchOptions) -> list:
if any(key in dict_opts for key in ['start', 'end', 'season']):
dict_opts = fix_date(dict_opts)

if 'dataset' in dict_opts:
if 'collections' not in dict_opts:
dict_opts['collections'] = []

for dataset in dict_opts['dataset']:
if collections_by_short_name := dataset_collections.get(dataset):
for concept_ids in collections_by_short_name.values():
dict_opts['collections'].extend(concept_ids)
else:
raise ValueError(f'Could not find dataset named "{dataset}" provided for dataset keyword.')

dict_opts.pop('dataset')

# convert the above parameters to a list of key/value tuples
cmr_opts = []
for (key, val) in dict_opts.items():
Expand Down Expand Up @@ -99,15 +87,10 @@ def translate_opts(opts: ASFSearchOptions) -> list:

return cmr_opts


def should_use_asf_frame(cmr_opts):
asf_frame_platforms = ['SENTINEL-1A', 'SENTINEL-1B', 'ALOS']
asf_frame_datasets = ['SENTINEL-1', 'OPERA-S1', 'SLC-BURST', 'ALOS PALSAR', 'ALOS AVNIR-2']

asf_frame_collections = []
for dataset in asf_frame_datasets:
for concept_ids in dataset_collections.get(dataset).values():
asf_frame_collections.extend(concept_ids)
asf_frame_collections = get_concept_id_alias(asf_frame_platforms, collections_per_platform)

return any([
p[0] == 'platform[]' and p[1].upper() in asf_frame_platforms
Expand Down
2 changes: 1 addition & 1 deletion asf_search/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
from .ASFProduct import ASFProduct
from .ASFSearchResults import ASFSearchResults
from .ASFSearchOptions import ASFSearchOptions, validators
from .constants import *
from .constants import BEAMMODE, FLIGHT_DIRECTION, INSTRUMENT, PLATFORM, POLARIZATION, PRODUCT_TYPE, INTERNAL, DATASET
from .exceptions import *
from .health import *
from .search import *
Expand Down
5 changes: 3 additions & 2 deletions asf_search/search/error_reporting.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from typing import Dict
from asf_search import ASFSearchOptions, ERROR_REPORTING_ENDPOINT
from asf_search import ASFSearchOptions
from asf_search import INTERNAL
import requests
import logging

Expand All @@ -19,7 +20,7 @@ def report_search_error(search_options: ASFSearchOptions, message: str):
message=f"Error Message: {str(message)}\nUser Agent: {user_agent} \
\nSearch Options: {{\n{search_options_list}\n}}"

response = requests.post(f'https://{ERROR_REPORTING_ENDPOINT}', data={'Message': "This error message and info was automatically generated:\n\n" + message})
response = requests.post(f'https://{INTERNAL.ERROR_REPORTING_ENDPOINT}', data={'Message': "This error message and info was automatically generated:\n\n" + message})

try:
response.raise_for_status()
Expand Down
5 changes: 4 additions & 1 deletion asf_search/search/search_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,10 @@ def search_generator(
if maxResults is not None and \
(getattr(opts, 'granule_list', False) or getattr(opts, 'product_list', False)):
raise ValueError("Cannot use maxResults along with product_list/granule_list.")


if opts.dataset is not None and opts.platform is not None:
raise ValueError("Cannot use dataset along with platform keyword in search.")

preprocess_opts(opts)

url = '/'.join(s.strip('/') for s in [f'https://{opts.host}', f'{INTERNAL.CMR_GRANULE_PATH}'])
Expand Down
2 changes: 1 addition & 1 deletion examples/1-Basic_Overview.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -419,7 +419,7 @@
"source": [
"# asf-search queries CMR with page sizes of 500, \n",
"# so setting maxResults=1000 means asf-search will have to query cmr twice, each time returning 500 products\n",
"large_results_generator = asf.search_generator(maxResults=1000, platform=asf.SENTINEL1A)\n",
"large_results_generator = asf.search_generator(maxResults=1000, platform=asf.PLATFORM.SENTINEL1A)\n",
"\n",
"with open(\"search_results.metalink\", \"w\") as f:\n",
" f.writelines(asf.export.results_to_metalink(large_results_generator))"
Expand Down
2 changes: 1 addition & 1 deletion examples/5-Download.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -220,7 +220,7 @@
"metadata": {},
"outputs": [],
"source": [
"results_with_zips = asf.search(platform=asf.constants.PLATFORM.SENTINEL1, processingLevel=asf.constants.PRODUCT_TYPE.GRD_HD, maxResults=250)\n",
"results_with_zips = asf.search(platform=asf.PLATFORM.SENTINEL1, processingLevel=asf.PRODUCT_TYPE.GRD_HD, maxResults=250)\n",
"\n",
"with results_with_zips[0].remotezip(session=user_pass_session) as z:\n",
" file_paths = [file.filename for file in z.filelist if file.filename.endswith('.tiff')]\n",
Expand Down
Loading

0 comments on commit 2bf6b21

Please sign in to comment.