moves dataset/platform collection aliasing into subquery.py, adds pro…

…cessingLevel aliasing
asfadmin · Nov 16, 2023 · 3545799 · 3545799
1 parent d5e1514
commit 3545799
Show file tree

Hide file tree

Showing 5 changed files with 105 additions and 28 deletions.
diff --git a/asf_search/CMR/__init__.py b/asf_search/CMR/__init__.py
@@ -2,4 +2,4 @@
 from .subquery import build_subqueries
 from .translate import translate_product, translate_opts, get_additional_fields
 from .field_map import field_map
-from .datasets import dataset_collections
+from .datasets import dataset_collections, collections_per_platform, collections_by_processing_level
diff --git a/asf_search/CMR/datasets.py b/asf_search/CMR/datasets.py
@@ -327,7 +327,7 @@
 }
 
 collections_per_platform = {
-    "Sentinel-1A": [
+    "SENTINEL-1A": [
         "C1214470488-ASF",
         "C1214470533-ASF",
         "C1214470576-ASF",
@@ -413,7 +413,7 @@
         "C1244598379-ASFDEV",
         "C1240784657-ASFDEV",
     ],
-    "Sentinel-1B": [
+    "SENTINEL-1B": [
         "C1327985661-ASF",
         "C1327985645-ASF",
         "C1595422627-ASF",
@@ -729,7 +729,7 @@
 }
 
 
-collections_by_processing_level: {
+collections_by_processing_level = {
     "SLC": [
         "C1214470488-ASF",
         "C1205428742-ASF",

diff --git a/asf_search/CMR/subquery.py b/asf_search/CMR/subquery.py
@@ -5,6 +5,8 @@
 from asf_search.ASFSearchOptions import ASFSearchOptions
 from asf_search.constants import CMR_PAGE_SIZE
 
+from asf_search.CMR.datasets import collections_by_processing_level, collections_per_platform, dataset_collections
+from numpy import intersect1d
 
 def build_subqueries(opts: ASFSearchOptions) -> List[ASFSearchOptions]:
     """
@@ -22,11 +24,83 @@ def build_subqueries(opts: ASFSearchOptions) -> List[ASFSearchOptions]:
     if params.get('product_list') is not None:
         params['product_list'] = chunk_list(params['product_list'], CMR_PAGE_SIZE)
 
-    list_param_names = ['platform', 'season', 'collections', 'dataset']  # these parameters will dodge the subquery system
+    list_param_names = ['platform', 'season', 'collections', 'dataset', 'processingLevel_collections']  # these parameters will dodge the subquery system
     skip_param_names = ['maxResults']# these params exist in opts, but shouldn't be passed on to subqueries at ALL
 
     params = dict([ (k, v) for k, v in params.items() if k not in skip_param_names ])
 
+    # in case all instances of platform and/or processingLevel can be substituded by a concept id
+    keyword_collection_aliases = []
+    if 'processingLevel' in params.keys():
+        concept_id_aliases = []
+        for processingLevel in params['processingLevel']:
+            if alias := collections_by_processing_level.get(processingLevel):
+                concept_id_aliases.extend(alias)
+            else:
+                concept_id_aliases = []
+                break
+
+        if len(concept_id_aliases):
+            params.pop('processingLevel')
+            params['processingLevel_collections'] = concept_id_aliases
+
+    if 'dataset' in params:
+            if 'collections' not in params:
+                params['collections'] = []
+
+            for dataset in params['dataset']:
+                if collections_by_short_name := dataset_collections.get(dataset):
+                    for concept_ids in collections_by_short_name.values():
+                        params['collections'].extend(concept_ids)
+                else:
+                    raise ValueError(f'Could not find dataset named "{dataset}" provided for dataset keyword.')
+
+            if (processingLevel_collections := params.get('processingLevel_collections')) is not None:
+                if len(processingLevel_collections):
+                    params['collections'] = intersect1d(processingLevel_collections, params['collections']).tolist()
+
+                params.pop('processingLevel_collections')
+
+            if 'platform' in params:
+                params.pop('dataset')
+            params.pop('dataset')
+
+    elif 'platform' in params:
+        if 'collections' not in params:
+            params['collections'] = []
+
+        missing = [platform for platform in params['platform'] if collections_per_platform.get(platform.upper()) is None]
+        # collections limit platform searches, so if there are any we don't have collections for we skip this optimization
+        if len(missing) == 0:
+            for platform in params['platform']:
+                if (collections := collections_per_platform.get(platform.upper())):
+                    params['collections'].extend(collections)
+
+            if (processingLevel_collections := params.get('processingLevel_collections')) is not None:
+                if len(processingLevel_collections):
+                    params['collections'] = intersect1d(processingLevel_collections, params['collections']).tolist()
+
+                params.pop('processingLevel_collections')
+
+            params.pop('platform')
+    else:
+        if params.get('collections') is None:
+            params['collections'] = []
+            if params.get('processingLevel_collections') is not None:
+                params['collections'] = params.get('processingLevel_collections')
+        else:
+            if (processingLevel_collections := params.get('processingLevel_collections')) is not None:
+                params['collections'] = intersect1d(processingLevel_collections, params['collections']).tolist()
+
+    if params.get('processingLevel_collections') is not None:
+        params.pop('processingLevel_collections')
+
+
+
+
+
+
+
     subquery_params, list_params = {}, {}
     for k, v in params.items():
         if k in list_param_names:

diff --git a/asf_search/CMR/translate.py b/asf_search/CMR/translate.py
@@ -9,6 +9,8 @@
 from .field_map import field_map
 from .datasets import dataset_collections, collections_per_platform
 
+from numpy import intersect1d
+
 import logging
 
 
@@ -48,32 +50,32 @@ def translate_opts(opts: ASFSearchOptions) -> list:
     if any(key in dict_opts for key in ['start', 'end', 'season']):
         dict_opts = fix_date(dict_opts)
 
-    if 'dataset' in dict_opts:
-        if 'collections' not in dict_opts:
-            dict_opts['collections'] = []
+    # if 'dataset' in dict_opts:
+    #     if 'collections' not in dict_opts:
+    #         dict_opts['collections'] = []
 
-        for dataset in dict_opts['dataset']:
-            if collections_by_short_name := dataset_collections.get(dataset):
-                for concept_ids in collections_by_short_name.values():
-                    dict_opts['collections'].extend(concept_ids)
-            else:
-                raise ValueError(f'Could not find dataset named "{dataset}" provided for dataset keyword.')
-
-        dict_opts.pop('dataset')
+    #     for dataset in dict_opts['dataset']:
+    #         if collections_by_short_name := dataset_collections.get(dataset):
+    #             for concept_ids in collections_by_short_name.values():
+    #                 dict_opts['collections'].extend(concept_ids)
+    #         else:
+    #             raise ValueError(f'Could not find dataset named "{dataset}" provided for dataset keyword.')
+
+    #     dict_opts.pop('dataset')
 
-    if 'platform' in dict_opts:
-        if 'collections' not in dict_opts:
-            dict_opts['collections'] = []
+    # if 'platform' in dict_opts:
+    #     if 'collections' not in dict_opts:
+    #         dict_opts['collections'] = []
 
-        missing = [platform for platform in dict_opts['platform'] if collections_per_platform.get(platform) is None]
-
-        # collections limit platform searches, so if there are any we don't have collections for we skip this optimization
-        if len(missing) == 0:
-            for platform in dict_opts['platform']:
-                if (collections := collections_per_platform.get(platform.upper())):
-                    dict_opts['collections'].extend(collections)
-            print(f"optimizing for platform search {dict_opts['platform']}")
-            dict_opts.pop('platform')
+    #     missing = [platform for platform in dict_opts['platform'] if collections_per_platform.get(platform) is None]
+
+    #     # collections limit platform searches, so if there are any we don't have collections for we skip this optimization
+    #     if len(missing) == 0:
+    #         for platform in dict_opts['platform']:
+    #             if (collections := collections_per_platform.get(platform.upper())):
+    #                 dict_opts['collections'].extend(collections)
+    #         print(f"optimizing for platform search {dict_opts['platform']}")
+    #         dict_opts.pop('platform')
     # convert the above parameters to a list of key/value tuples
     cmr_opts = []
     for (key, val) in dict_opts.items():

diff --git a/asf_search/search/search_generator.py b/asf_search/search/search_generator.py
@@ -82,6 +82,7 @@ def search_generator(
     total = 0
 
     queries = build_subqueries(opts)
+    print(f"# of subqueries {len(queries)}")
     for query in queries:
         translated_opts = translate_opts(query)
         cmr_search_after_header = ""