Skip to content

Commit

Permalink
moves dataset/platform collection aliasing into subquery.py, adds pro…
Browse files Browse the repository at this point in the history
…cessingLevel aliasing
  • Loading branch information
kim committed Nov 16, 2023
1 parent d5e1514 commit 3545799
Show file tree
Hide file tree
Showing 5 changed files with 105 additions and 28 deletions.
2 changes: 1 addition & 1 deletion asf_search/CMR/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,4 @@
from .subquery import build_subqueries
from .translate import translate_product, translate_opts, get_additional_fields
from .field_map import field_map
from .datasets import dataset_collections
from .datasets import dataset_collections, collections_per_platform, collections_by_processing_level
6 changes: 3 additions & 3 deletions asf_search/CMR/datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -327,7 +327,7 @@
}

collections_per_platform = {
"Sentinel-1A": [
"SENTINEL-1A": [
"C1214470488-ASF",
"C1214470533-ASF",
"C1214470576-ASF",
Expand Down Expand Up @@ -413,7 +413,7 @@
"C1244598379-ASFDEV",
"C1240784657-ASFDEV",
],
"Sentinel-1B": [
"SENTINEL-1B": [
"C1327985661-ASF",
"C1327985645-ASF",
"C1595422627-ASF",
Expand Down Expand Up @@ -729,7 +729,7 @@
}


collections_by_processing_level: {
collections_by_processing_level = {
"SLC": [
"C1214470488-ASF",
"C1205428742-ASF",
Expand Down
76 changes: 75 additions & 1 deletion asf_search/CMR/subquery.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
from asf_search.ASFSearchOptions import ASFSearchOptions
from asf_search.constants import CMR_PAGE_SIZE

from asf_search.CMR.datasets import collections_by_processing_level, collections_per_platform, dataset_collections
from numpy import intersect1d

def build_subqueries(opts: ASFSearchOptions) -> List[ASFSearchOptions]:
"""
Expand All @@ -22,11 +24,83 @@ def build_subqueries(opts: ASFSearchOptions) -> List[ASFSearchOptions]:
if params.get('product_list') is not None:
params['product_list'] = chunk_list(params['product_list'], CMR_PAGE_SIZE)

list_param_names = ['platform', 'season', 'collections', 'dataset'] # these parameters will dodge the subquery system
list_param_names = ['platform', 'season', 'collections', 'dataset', 'processingLevel_collections'] # these parameters will dodge the subquery system
skip_param_names = ['maxResults']# these params exist in opts, but shouldn't be passed on to subqueries at ALL

params = dict([ (k, v) for k, v in params.items() if k not in skip_param_names ])

# in case all instances of platform and/or processingLevel can be substituded by a concept id
keyword_collection_aliases = []
if 'processingLevel' in params.keys():
concept_id_aliases = []
for processingLevel in params['processingLevel']:
if alias := collections_by_processing_level.get(processingLevel):
concept_id_aliases.extend(alias)
else:
concept_id_aliases = []
break

if len(concept_id_aliases):
params.pop('processingLevel')
params['processingLevel_collections'] = concept_id_aliases

if 'dataset' in params:
if 'collections' not in params:
params['collections'] = []

for dataset in params['dataset']:
if collections_by_short_name := dataset_collections.get(dataset):
for concept_ids in collections_by_short_name.values():
params['collections'].extend(concept_ids)
else:
raise ValueError(f'Could not find dataset named "{dataset}" provided for dataset keyword.')

if (processingLevel_collections := params.get('processingLevel_collections')) is not None:
if len(processingLevel_collections):
params['collections'] = intersect1d(processingLevel_collections, params['collections']).tolist()

params.pop('processingLevel_collections')

if 'platform' in params:
params.pop('dataset')
params.pop('dataset')

elif 'platform' in params:
if 'collections' not in params:
params['collections'] = []

missing = [platform for platform in params['platform'] if collections_per_platform.get(platform.upper()) is None]
# collections limit platform searches, so if there are any we don't have collections for we skip this optimization
if len(missing) == 0:
for platform in params['platform']:
if (collections := collections_per_platform.get(platform.upper())):
params['collections'].extend(collections)

if (processingLevel_collections := params.get('processingLevel_collections')) is not None:
if len(processingLevel_collections):
params['collections'] = intersect1d(processingLevel_collections, params['collections']).tolist()

params.pop('processingLevel_collections')

params.pop('platform')
else:
if params.get('collections') is None:
params['collections'] = []
if params.get('processingLevel_collections') is not None:
params['collections'] = params.get('processingLevel_collections')
else:
if (processingLevel_collections := params.get('processingLevel_collections')) is not None:
params['collections'] = intersect1d(processingLevel_collections, params['collections']).tolist()

if params.get('processingLevel_collections') is not None:
params.pop('processingLevel_collections')







subquery_params, list_params = {}, {}
for k, v in params.items():
if k in list_param_names:
Expand Down
48 changes: 25 additions & 23 deletions asf_search/CMR/translate.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@
from .field_map import field_map
from .datasets import dataset_collections, collections_per_platform

from numpy import intersect1d

import logging


Expand Down Expand Up @@ -48,32 +50,32 @@ def translate_opts(opts: ASFSearchOptions) -> list:
if any(key in dict_opts for key in ['start', 'end', 'season']):
dict_opts = fix_date(dict_opts)

if 'dataset' in dict_opts:
if 'collections' not in dict_opts:
dict_opts['collections'] = []
# if 'dataset' in dict_opts:
# if 'collections' not in dict_opts:
# dict_opts['collections'] = []

for dataset in dict_opts['dataset']:
if collections_by_short_name := dataset_collections.get(dataset):
for concept_ids in collections_by_short_name.values():
dict_opts['collections'].extend(concept_ids)
else:
raise ValueError(f'Could not find dataset named "{dataset}" provided for dataset keyword.')

dict_opts.pop('dataset')
# for dataset in dict_opts['dataset']:
# if collections_by_short_name := dataset_collections.get(dataset):
# for concept_ids in collections_by_short_name.values():
# dict_opts['collections'].extend(concept_ids)
# else:
# raise ValueError(f'Could not find dataset named "{dataset}" provided for dataset keyword.')

# dict_opts.pop('dataset')

if 'platform' in dict_opts:
if 'collections' not in dict_opts:
dict_opts['collections'] = []
# if 'platform' in dict_opts:
# if 'collections' not in dict_opts:
# dict_opts['collections'] = []

missing = [platform for platform in dict_opts['platform'] if collections_per_platform.get(platform) is None]

# collections limit platform searches, so if there are any we don't have collections for we skip this optimization
if len(missing) == 0:
for platform in dict_opts['platform']:
if (collections := collections_per_platform.get(platform.upper())):
dict_opts['collections'].extend(collections)
print(f"optimizing for platform search {dict_opts['platform']}")
dict_opts.pop('platform')
# missing = [platform for platform in dict_opts['platform'] if collections_per_platform.get(platform) is None]

# # collections limit platform searches, so if there are any we don't have collections for we skip this optimization
# if len(missing) == 0:
# for platform in dict_opts['platform']:
# if (collections := collections_per_platform.get(platform.upper())):
# dict_opts['collections'].extend(collections)
# print(f"optimizing for platform search {dict_opts['platform']}")
# dict_opts.pop('platform')
# convert the above parameters to a list of key/value tuples
cmr_opts = []
for (key, val) in dict_opts.items():
Expand Down
1 change: 1 addition & 0 deletions asf_search/search/search_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,7 @@ def search_generator(
total = 0

queries = build_subqueries(opts)
print(f"# of subqueries {len(queries)}")
for query in queries:
translated_opts = translate_opts(query)
cmr_search_after_header = ""
Expand Down

0 comments on commit 3545799

Please sign in to comment.