From a2cc0e17803364da9c1a3f719ec28e7e6d1a7f7d Mon Sep 17 00:00:00 2001 From: BRAUN REMI Date: Mon, 13 Nov 2023 13:51:09 +0100 Subject: [PATCH] ENH: Manage Sentinel-2 as formatted on the cloud (Element84 or Sinergise's way). #104 --- CHANGES.md | 2 +- docs/_build/.jupyter_cache/global.db | Bin 53248 -> 53248 bytes docs/notebooks/aws.ipynb | 54 +++++++++++++-- ...{s2_cloud_product.py => s2_e84_product.py} | 8 ++- eoreader/products/optical/s2_product.py | 64 ++++++++++++++---- eoreader/reader.py | 28 ++++++-- eoreader/utils.py | 2 +- requirements.txt | 2 +- 8 files changed, 129 insertions(+), 31 deletions(-) rename eoreader/products/optical/{s2_cloud_product.py => s2_e84_product.py} (98%) diff --git a/CHANGES.md b/CHANGES.md index 2aca3557..ddc8d1ae 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -5,7 +5,7 @@ - **BREAKING CHANGES: Rename `utils.stack_dict` to `utils.stack` since we are stacking datasets and not dict anymore.** - **BREAKING CHANGES: Band ID for Sentinel-3 OLCI are now int instead of band names (i.e. `7` instead of `Oa07`. The names don't change).** - **ENH: Allow to use bands IDs, names and common name added to mapped names when trying to load a spectral band. ([#111](https://github.com/sertit/eoreader/issues/111)** -- **ENH: Manage Sentinel-2 (currently L2A) as formatted on the cloud (Element84's way). ([#104](https://github.com/sertit/eoreader/issues/104)** +- **ENH: Manage Sentinel-2 as formatted on the cloud (Element84 or Sinergise's way). ([#104](https://github.com/sertit/eoreader/issues/104)** - **ENH: Handle Python 3.12. ([#113](https://github.com/sertit/eoreader/issues/113)** - FIX: Fix jpg, png... quicklooks management when plotting - FIX: Fix an `xarray` issue when trying to compute percentiles when stacking bands diff --git a/docs/_build/.jupyter_cache/global.db b/docs/_build/.jupyter_cache/global.db index 4f89d7b796433a0810a7ccdc044824978c9b36a9..7440cbc064ee623b11eca52518da504090971fbb 100644 GIT binary patch delta 595 zcmaixO-mbL5XU#}K~ZoY_qNn1+Gtb*Tvv?7annRi%5}UNOHqqoDc=Ra5lNZ64 z`2uPzcu-_XAoS?j(qo~tAEDo%P?|s+lY@Bu&CGA+&(zP|`nh{DoywfftfVr@$E%zh zx(N-xo*QzL@DD|l0IH7mscuBD9AN#BRj`z*~ n6STeG!CLUs|L)IvM?uF~QKf*Uh0r*rsMRv$5xi{o;Gjy7gm(1~mTLs^wd<+KxZ{Q@ zCaq+UuHXJ(=_GS9qD&d1H9Xe)f3ToNL`oWH?OgrCLRo7BxZRQa{yT|qsx7ygr998e q__625K@+#}8b0)&@MkdgKZ5n(6+h!_ki(\n", " \n", - " Note: This is experimental for now, use it at your own risk !\n", + " Note: These products are not stored in the `.SAFE` format.\n", " \n", - "" + "\n", + "\n", + "## Let's read data processed by Element84: Sentinel-2 L2A as COGs\n", + "\n", + "See this [registry](https://registry.opendata.aws/sentinel-2-l2a-cogs) (`arn:aws:s3:::sentinel-cogs`)" ] }, { @@ -41,10 +45,7 @@ "id": "36d9150318c0e2fe", "metadata": { "collapsed": false, - "is_executing": true, - "jupyter": { - "outputs_hidden": false - } + "is_executing": true }, "outputs": [ { @@ -91,6 +92,47 @@ "source": [ "blue[:, ::10, ::10].plot(cmap=\"Blues_r\")" ] + }, + { + "cell_type": "markdown", + "source": [ + "## Let's read data processed by Sinergise: Sentinel-2 L1C\n", + "\n", + "See this [registry](https://registry.opendata.aws/sentinel-2/) (`arn:aws:s3:::sentinel-s2-l1c`)\n", + "\n", + "NB: L2A would have been the same (`arn:aws:s3:::sentinel-s2-l2a`)\n", + "\n", + "
\n", + " \n", + " Note: Sinergise data are stored as requester pays in AWS. Don't forget to state this when requesting data!\n", + " \n", + "
" + ], + "metadata": { + "collapsed": false + }, + "id": "1d8a1cb4fde8949c" + }, + { + "cell_type": "code", + "execution_count": null, + "outputs": [], + "source": [ + "with tempenv.TemporaryEnvironment({\n", + " \"AWS_S3_ENDPOINT\": \"s3.eu-central-1.amazonaws.com\",\n", + " \"AWS_SECRET_ACCESS_KEY\": os.getenv(\"AMAZON_AWS_SECRET_ACCESS_KEY\"),\n", + " \"AWS_ACCESS_KEY_ID\": os.getenv(\"AMAZON_AWS_ACCESS_KEY_ID\"),\n", + "}):\n", + " with s3.temp_s3(requester_pays=True):\n", + " path = r\"s3://sentinel-s2-l1c/tiles/10/S/DG/2022/7/8/0\"\n", + " prod = Reader().open(path)\n", + " prod.plot()\n", + " blue = prod.load(BLUE)[BLUE]" + ], + "metadata": { + "collapsed": false + }, + "id": "514d1d1c09b37c14" } ], "metadata": { diff --git a/eoreader/products/optical/s2_cloud_product.py b/eoreader/products/optical/s2_e84_product.py similarity index 98% rename from eoreader/products/optical/s2_cloud_product.py rename to eoreader/products/optical/s2_e84_product.py index 7a87e940..a19b55e4 100644 --- a/eoreader/products/optical/s2_cloud_product.py +++ b/eoreader/products/optical/s2_e84_product.py @@ -61,9 +61,9 @@ LOGGER = logging.getLogger(EOREADER_NAME) -class S2CloudProduct(OpticalProduct): +class S2E84Product(OpticalProduct): """ - Class for Sentinel-2 cloud products + Class for Sentinel-2 stored on AWS and processed by Element 84 (COGs) products https://element84.com/geospatial/introducing-earth-search-v1-new-datasets-now-available/ @@ -608,7 +608,9 @@ def _get_condensed_name(self) -> str: # Used to make the difference between 2 products acquired on the same tile at the same date but cut differently # Sentinel-2 generation time: "%Y%m%dT%H%M%S" -> save only %H%M%S gen_time = self.split_name[-1].split("T")[-1] - return f"{self.get_datetime()}_{self.constellation.name}_{self.tile_name}_{self.product_type.name}_{gen_time}" + + # Force S2 as constellation name for S2_E84 to work + return f"{self.get_datetime()}_S2_{self.tile_name}_{self.product_type.name}_{gen_time}" @cache def get_mean_sun_angles(self) -> (float, float): diff --git a/eoreader/products/optical/s2_product.py b/eoreader/products/optical/s2_product.py index 5eba731f..0baca533 100644 --- a/eoreader/products/optical/s2_product.py +++ b/eoreader/products/optical/s2_product.py @@ -116,7 +116,7 @@ class S2Jp2Masks(ListEnum): BAND_DIR_NAMES = { - S2ProductType.L1C: "IMG_DATA", + S2ProductType.L1C: ".", S2ProductType.L2A: { "01": ["R60m"], "02": ["R10m", "R20m", "R60m"], @@ -164,6 +164,9 @@ def __init__( # L2Ap self._is_l2ap = False + # S2 Sinergise + self._is_sinergise = kwargs.pop("is_sinergise", False) + # Initialization from the super class super().__init__(product_path, archive_path, output_path, remove_tmp, **kwargs) @@ -183,7 +186,9 @@ def _pre_init(self, **kwargs) -> None: """ self._has_cloud_cover = True self.needs_extraction = False - self._use_filename = True + # Use filename for SAFE names, not for others + # S2A_MSIL1C_20191215T110441_N0208_R094_T30TXP_20191215T114155.SAFE has 65 characters + self._use_filename = len(self.filename) > 50 self._raw_units = RawUnits.REFL # Post init done by the super class @@ -486,6 +491,28 @@ def _get_name_constellation_specific(self) -> str: return name + def _get_qi_folder(self): + """""" + if self._is_sinergise: + mask_folder = "qi" + elif self.is_archived: + mask_folder = ".*GRANULE.*QI_DATA" + else: + mask_folder = "**/*GRANULE/*/QI_DATA" + + return mask_folder + + def _get_image_folder(self): + """""" + if self._is_sinergise: + img_folder = "." + elif self.is_archived: + img_folder = ".*GRANULE.*IMG_DATA" + else: + img_folder = "**/*GRANULE/*/IMG_DATA" + + return img_folder + def _get_res_band_folder(self, band_list: list, pixel_size: float = None) -> dict: """ Return the folder containing the bands of a proper S2 products. @@ -544,7 +571,12 @@ def _get_res_band_folder(self, band_list: list, pixel_size: float = None) -> dic s2_bands_folder[band] = band_path else: # Search for the name of the folder into the S2 products - s2_bands_folder[band] = next(self.path.glob(f"**/*/{dir_name}")) + try: + s2_bands_folder[band] = next( + self.path.glob(f"{self._get_image_folder()}/{dir_name}") + ) + except IndexError: + s2_bands_folder[band] = self.path for band in band_list: if band not in s2_bands_folder: @@ -595,12 +627,12 @@ def get_band_paths( if self.is_archived: band_paths[band] = path.get_archived_rio_path( self.path, - f".*{band_folders[band]}.*_B{band_id}.*.jp2", + f".*{band_folders[band]}.*B{band_id}.*.jp2", ) else: band_paths[band] = path.get_file_in_dir( band_folders[band], - f"_B{band_id}", + f"B{band_id}", extension="jp2", ) except (FileNotFoundError, IndexError) as ex: @@ -778,7 +810,7 @@ def _open_mask_lt_4_0( self, mask_id: Union[str, S2GmlMasks], band: Union[BandNames, str] = None ) -> gpd.GeoDataFrame: """ - Open S2 mask (GML files stored in QI_DATA) as :code:`gpd.GeoDataFrame`. + Open S2 mask (GML files stored in QI_DATA/qi) as :code:`gpd.GeoDataFrame`. Masks than can be called that way are: @@ -839,7 +871,7 @@ def _open_mask_lt_4_0( with zipfile.ZipFile(self.path, "r") as zip_ds: filenames = [f.filename for f in zip_ds.filelist] regex = re.compile( - f".*GRANULE.*QI_DATA.*{mask_id.value}_B{band_name}.gml" + f"{self._get_qi_folder()}.*{mask_id.value}_B{band_name}.gml" ) mask_path = zip_ds.extract( list(filter(regex.match, filenames))[0], tmp_dir.name @@ -848,7 +880,7 @@ def _open_mask_lt_4_0( # Get mask path mask_path = path.get_file_in_dir( self.path, - f"**/*GRANULE/*/QI_DATA/*{mask_id.value}_B{band_name}.gml", + f"{self._get_qi_folder()}/*{mask_id.value}_B{band_name}.gml", exact_name=True, ) @@ -902,13 +934,13 @@ def _open_mask_gt_4_0( if self.is_archived: mask_path = path.get_archived_rio_path( - self.path, f".*GRANULE.*QI_DATA.*{mask_id.value}_B{band_id}.jp2" + self.path, f"{self._get_qi_folder()}.*{mask_id.value}_B{band_id}.jp2" ) else: # Get mask path mask_path = path.get_file_in_dir( self.path, - f"**/*GRANULE/*/QI_DATA/*{mask_id.value}_B{band_id}.jp2", + f"{self._get_qi_folder()}/*{mask_id.value}_B{band_id}.jp2", exact_name=True, ) @@ -1200,7 +1232,9 @@ def _get_condensed_name(self) -> str: # Used to make the difference between 2 products acquired on the same tile at the same date but cut differently # Sentinel-2 generation time: "%Y%m%dT%H%M%S" -> save only %H%M%S gen_time = self.split_name[-1].split("T")[-1] - return f"{self.get_datetime()}_{self.constellation.name}_{self.tile_name}_{self.product_type.name}_{gen_time}" + + # Force S2 as constellation name for S2_SIN to work + return f"{self.get_datetime()}_S2_{self.tile_name}_{self.product_type.name}_{gen_time}" @cache def get_mean_sun_angles(self) -> (float, float): @@ -1254,8 +1288,12 @@ def _read_mtd(self) -> (etree._Element, dict): Returns: (etree._Element, dict): Metadata XML root and its namespaces """ - mtd_from_path = "GRANULE/*/MTD*.xml" - mtd_archived = r"GRANULE.*MTD.*\.xml" + if self._is_sinergise: + mtd_from_path = "metadata.xml" + mtd_archived = r"metadata\.xml" + else: + mtd_from_path = "GRANULE/*/MTD*.xml" + mtd_archived = r"GRANULE.*MTD.*\.xml" return self._read_mtd_xml(mtd_from_path, mtd_archived) diff --git a/eoreader/reader.py b/eoreader/reader.py index 47a6e272..871ddaad 100644 --- a/eoreader/reader.py +++ b/eoreader/reader.py @@ -64,11 +64,16 @@ class Constellation(ListEnum): S2 = "Sentinel-2" """Sentinel-2""" - S2_CLOUD = "Sentinel-2 stored on cloud" + S2_E84 = "Sentinel-2 stored on AWS and processed by Element84" + """ + Sentinel-2 stored on AWS and processed by Element84: + - Element84: arn:aws:s3:::sentinel-cogs - https://registry.opendata.aws/sentinel-2-l2a-cogs """ - Sentinel-2 stored on cloud - For now, obly the one created by Element84 are supported: https://stacindex.org/catalogs/earth-search#/43bjKKcJQfxYaT1ir3Ep6uENfjEoQrjkzhd2?t=3 + S2_SIN = "Sentinel-2 stored on AWS and processed by Sinergise" + """ + Sentinel-2 stored on AWS and processed by Sinergise: + arn:aws:s3:::sentinel-s2-l1c and arn:aws:s3:::sentinel-s2-l2a - https://registry.opendata.aws/sentinel-2/ """ S2_THEIA = "Sentinel-2 Theia" @@ -204,8 +209,9 @@ class Constellation(ListEnum): CONSTELLATION_REGEX = { Constellation.S1: r"S1[AB]_(IW|EW|SM|WV)_(RAW|SLC|GRD|OCN)[FHM_]_[0-2]S[SD][HV]_\d{8}T\d{6}_\d{8}T\d{6}_\d{6}_.{11}", Constellation.S2: r"S2[AB]_MSIL(1C|2A)_\d{8}T\d{6}_N\d{4}_R\d{3}_T\d{2}\w{3}_\d{8}T\d{6}", - # Element84 : S2A_31UDQ_20230714_0_L2A - Constellation.S2_CLOUD: r"S2[AB]_\d{2}\w{3}_\d{8}_\d_L(1C|2A)", + # Element84 : S2A_31UDQ_20230714_0_L2A, Sinergise: 0 or 1... + Constellation.S2_E84: r"S2[AB]_\d{2}\w{3}_\d{8}_\d_L(1C|2A)", + Constellation.S2_SIN: r"\d", Constellation.S2_THEIA: r"SENTINEL2[AB]_\d{8}-\d{6}-\d{3}_L(2A|1C)_T\d{2}\w{3}_[CDH](_V\d-\d|)", Constellation.S3_OLCI: r"S3[AB]_OL_[012]_\w{6}_\d{8}T\d{6}_\d{8}T\d{6}_\d{8}T\d{6}_\w{17}_\w{3}_[OFDR]_(NR|ST|NT)_\d{3}", Constellation.S3_SLSTR: r"S3[AB]_SL_[012]_\w{6}_\d{8}T\d{6}_\d{8}T\d{6}_\d{8}T\d{6}_\w{17}_\w{3}_[OFDR]_(NR|ST|NT)_\d{3}", @@ -269,7 +275,14 @@ class Constellation(ListEnum): "regex": r".*s1[ab]-(iw|ew|sm|wv)\d*-(raw|slc|grd|ocn)-[hv]{2}-\d{8}t\d{6}-\d{8}t\d{6}-\d{6}-\w{6}-\d{3}\.xml", }, Constellation.S2: {"nested": 3, "regex": r"MTD_TL.xml"}, - Constellation.S2_CLOUD: rf"{CONSTELLATION_REGEX[Constellation.S2_CLOUD]}\.json", + Constellation.S2_E84: rf"{CONSTELLATION_REGEX[Constellation.S2_E84]}\.json", + Constellation.S2_SIN: { + "nested": -1, # File that can be found at any level (product/**/file) + "regex": [ + r"metadata\.xml", # Too generic name, check also a band + r"B12\.jp2", + ], + }, Constellation.S2_THEIA: rf"{CONSTELLATION_REGEX[Constellation.S2_THEIA]}_MTD_ALL\.xml", Constellation.S3_OLCI: r"Oa\d{2}_radiance.nc", Constellation.S3_SLSTR: r"S\d_radiance_an.nc", @@ -536,6 +549,9 @@ def open( # SPOT-4/5 constellations elif const in [Constellation.SPOT4, Constellation.SPOT5]: sat_class = "spot45_product" + elif const in [Constellation.S2_SIN]: + sat_class = "s2_product" + kwargs["is_sinergise"] = True # Manage both optical and SAR try: diff --git a/eoreader/utils.py b/eoreader/utils.py index 051e5c17..6962e875 100644 --- a/eoreader/utils.py +++ b/eoreader/utils.py @@ -40,7 +40,7 @@ from eoreader.keywords import _prune_keywords LOGGER = logging.getLogger(EOREADER_NAME) -DEFAULT_TILE_SIZE = "auto" +DEFAULT_TILE_SIZE = 1024 UINT16_NODATA = rasters.UINT16_NODATA diff --git a/requirements.txt b/requirements.txt index b0ebf339..d89b3573 100644 --- a/requirements.txt +++ b/requirements.txt @@ -40,7 +40,7 @@ spyndex>=0.3.0 pystac[validation] # SERTIT libs -sertit[full]>=1.31.0 +sertit[full]>=1.32.0 # Optimizations dask[complete]>=2021.10.0