diff --git a/eoreader/products/__init__.py b/eoreader/products/__init__.py index 13c12b25..02bfc92e 100644 --- a/eoreader/products/__init__.py +++ b/eoreader/products/__init__.py @@ -28,6 +28,12 @@ ] from .custom_product import CustomProduct, CustomFields +# STAC products +__all__ += [ + "StacProduct", +] +from .stac_product import StacProduct + # -- Optical -- __all__ += [ "OpticalProduct", diff --git a/eoreader/products/optical/landsat_product.py b/eoreader/products/optical/landsat_product.py index 42c4ecef..14c47195 100644 --- a/eoreader/products/optical/landsat_product.py +++ b/eoreader/products/optical/landsat_product.py @@ -15,6 +15,8 @@ # See the License for the specific language governing permissions and # limitations under the License. """ Landsat products """ +import asyncio +import difflib import logging import tarfile from datetime import datetime @@ -24,13 +26,16 @@ import geopandas as gpd import numpy as np import pandas as pd +import planetary_computer import xarray as xr from lxml import etree from lxml.builder import E +from pystac import Item from rasterio.enums import Resampling -from sertit import path, rasters, rasters_rio +from sertit import AnyPath, path, rasters, rasters_rio from sertit.misc import ListEnum from sertit.types import AnyPathStrType, AnyPathType +from stac_asset import S3Client from eoreader import DATETIME_FMT, EOREADER_NAME, cache, utils from eoreader.bands import ( @@ -39,6 +44,7 @@ CA, CIRRUS, CLOUDS, + EOREADER_STAC_MAP, GREEN, NARROW_NIR, NIR, @@ -61,6 +67,7 @@ from eoreader.exceptions import InvalidProductError, InvalidTypeError from eoreader.products import OpticalProduct from eoreader.products.optical.optical_product import RawUnits +from eoreader.products.stac_product import StacProduct from eoreader.reader import Constellation from eoreader.stac import ASSET_ROLE, BT, DESCRIPTION, GSD, ID, NAME, WV_MAX, WV_MIN from eoreader.utils import simplify @@ -203,7 +210,7 @@ def _pre_init(self, **kwargs) -> None: self._collection = LandsatCollection.COL_1 self.needs_extraction = True # Too slow to read directly tar.gz files - # Post init done by the super class + # Pre init done by the super class super()._pre_init(**kwargs) def _post_init(self, **kwargs) -> None: @@ -1838,3 +1845,108 @@ def get_quicklook_path(self) -> str: ) return quicklook_path + + +class LandsatStacProduct(StacProduct, LandsatProduct): + def __init__( + self, + product_path: AnyPathStrType = None, + archive_path: AnyPathStrType = None, + output_path: AnyPathStrType = None, + remove_tmp: bool = False, + **kwargs, + ) -> None: + asyncio.set_event_loop_policy(asyncio.WindowsSelectorEventLoopPolicy()) + self.kwargs = kwargs + """Custom kwargs""" + + # Copy the kwargs + super_kwargs = kwargs.copy() + + # Get STAC Item + self.item = None + """ STAC Item of the product """ + self.item = super_kwargs.pop("item", None) + if self.item is None: + try: + self.item = Item.from_file(product_path) + except TypeError: + raise InvalidProductError( + "You should either fill 'product_path' or 'item'." + ) + + self.default_clients = [ + planetary_computer, + S3Client(region_name="us-west-2"), # Element84, EarthData + S3Client(requester_pays=True, region_name="us-west-2"), # USGS Landsat + # Not yet handled + # HttpClient(ClientSession(base_url="https://landsatlook.usgs.gov", auth=BasicAuth(login="", password=""))) + ] + self.clients = super_kwargs.pop("client", self.default_clients) + + if product_path is None: + # Canonical link is always the second one + # TODO: check if ok + product_path = AnyPath(self.item.links[1].target).parent + + # Initialization from the super class + super().__init__(product_path, archive_path, output_path, remove_tmp, **kwargs) + + def _get_path(self, band_id: str) -> str: + """ + Get either the archived path of the normal path of a tif file + + Args: + band_id (str): Band ID + + Returns: + AnyPathType: band path + + """ + if band_id.startswith("_"): + band_id = band_id[1:] + + if band_id.lower() in self.item.assets: + asset_name = band_id.lower() + elif band_id.startswith("B"): + band_name = [ + band_name + for band_name, band in self.bands.items() + if band is not None and f"B{band.id}" == band_id + ][0] + asset_name = EOREADER_STAC_MAP[band_name].value + else: + try: + asset_name = difflib.get_close_matches( + band_id, self.item.assets.keys(), cutoff=0.5, n=1 + )[0] + except Exception: + raise FileNotFoundError( + f"Impossible to find an asset in {list(self.item.assets.keys())} close enough to '{band_id}'" + ) + + return self.sign_url(self.item.assets[asset_name].href) + + def _read_mtd(self, force_pd=False) -> (etree._Element, dict): + """ + Read Landsat metadata as: + + - :code:`pandas.DataFrame` whatever its collection is (by default for collection 1) + - XML root + its namespace if the product is retrieved from the 2nd collection (by default for collection 2) + + Args: + force_pd (bool): If collection 2, return a pandas.DataFrame instead of an XML root + namespace + Returns: + Tuple[Union[pd.DataFrame, etree._Element], dict]: + Metadata as a Pandas.DataFrame or as (etree._Element, dict): Metadata XML root and its namespaces + """ + return self._read_mtd_xml_stac(self._get_path("mtl.xml")) + + def get_quicklook_path(self) -> str: + """ + Get quicklook path if existing. + + Returns: + str: Quicklook path + """ + return self._get_path("rendered_preview") diff --git a/eoreader/products/optical/s2_e84_product.py b/eoreader/products/optical/s2_e84_product.py index a19b55e4..c99c89e5 100644 --- a/eoreader/products/optical/s2_e84_product.py +++ b/eoreader/products/optical/s2_e84_product.py @@ -15,17 +15,24 @@ # See the License for the specific language governing permissions and # limitations under the License. """ Sentinel-2 cloud-stored products """ +import asyncio +import difflib +import json import logging from datetime import datetime from typing import Union import geopandas as gpd import numpy as np +import planetary_computer import xarray as xr from lxml import etree +from pystac import Item from rasterio.enums import Resampling -from sertit import files, path, rasters_rio +from sertit import AnyPath, files, path, rasters_rio +from sertit.files import CustomDecoder from sertit.types import AnyPathStrType, AnyPathType +from stac_asset import S3Client, blocking from eoreader import DATETIME_FMT, EOREADER_NAME, cache, utils from eoreader.bands import ( @@ -55,6 +62,7 @@ from eoreader.exceptions import InvalidProductError, InvalidTypeError from eoreader.products import S2ProductType from eoreader.products.optical.optical_product import OpticalProduct, RawUnits +from eoreader.products.stac_product import StacProduct from eoreader.stac import CENTER_WV, FWHM, GSD, ID, NAME from eoreader.utils import simplify @@ -98,7 +106,7 @@ def _pre_init(self, **kwargs) -> None: self.tile_mtd = files.read_json(self._get_path("tileinfo_metadata", ext="json")) self.stac_mtd = files.read_json(self._get_path(self.filename, ext="json")) - # Post init done by the super class + # Pre init done by the super class super()._pre_init(**kwargs) def _post_init(self, **kwargs) -> None: @@ -108,7 +116,7 @@ def _post_init(self, **kwargs) -> None: """ self.tile_name = self._get_tile_name() - # Post init done by the super class + # Pre init done by the super class super()._post_init(**kwargs) def _get_tile_name(self) -> str: @@ -760,3 +768,269 @@ def get_quicklook_path(self) -> str: pass return quicklook_path + + +class S2E84StacProduct(StacProduct, S2E84Product): + def __init__( + self, + product_path: AnyPathStrType = None, + archive_path: AnyPathStrType = None, + output_path: AnyPathStrType = None, + remove_tmp: bool = False, + **kwargs, + ) -> None: + asyncio.set_event_loop_policy(asyncio.WindowsSelectorEventLoopPolicy()) + self.kwargs = kwargs + """Custom kwargs""" + + # Copy the kwargs + super_kwargs = kwargs.copy() + + # Get STAC Item + self.item = None + """ STAC Item of the product """ + self.item = super_kwargs.pop("item", None) + if self.item is None: + try: + self.item = Item.from_file(product_path) + except TypeError: + raise InvalidProductError( + "You should either fill 'product_path' or 'item'." + ) + + self.default_clients = [ + S3Client(region_name="us-west-2"), # Element84, EarthData + S3Client(region_name="eu-central-1", requester_pays=True), # Sinergise + ] + self.clients = super_kwargs.pop("client", self.default_clients) + + if product_path is None: + # Canonical link is always the second one + # TODO: check if ok + product_path = AnyPath(self.item.links[1].target).parent + + # Initialization from the super class + super().__init__(product_path, archive_path, output_path, remove_tmp, **kwargs) + + def _pre_init(self, **kwargs) -> None: + """ + Function used to pre_init the products + (setting needs_extraction and so on) + """ + self._raw_units = RawUnits.REFL + self._has_cloud_cover = True + self._use_filename = False + self.needs_extraction = False + + # Read the JSON tileinfo mtd + self.tile_mtd = json.loads( + blocking.read_href( + self._get_path("tileinfo_metadata"), clients=self.clients + ), + cls=CustomDecoder, + ) + self.stac_mtd = self.item.to_dict() + + # Pre init done by the super class + super(S2E84Product, self)._pre_init(**kwargs) + + def _get_path(self, file_id: str, ext="tif") -> str: + """ + Get either the archived path of the normal path of a tif file + + Args: + band_id (str): Band ID + + Returns: + AnyPathType: band path + """ + if file_id.lower() in self.item.assets: + asset_name = file_id.lower() + elif file_id in [band.id for band in self.bands.values() if band is not None]: + band_name = [ + band_name + for band_name, band in self.bands.items() + if band is not None and f"{band.id}" == file_id + ][0] + asset_name = EOREADER_STAC_MAP[band_name].value + else: + try: + asset_name = difflib.get_close_matches( + file_id, self.item.assets.keys(), cutoff=0.5, n=1 + )[0] + except Exception: + raise FileNotFoundError( + f"Impossible to find an asset in {list(self.item.assets.keys())} close enough to '{file_id}'" + ) + + return self.item.assets[asset_name].href + + def _read_mtd(self) -> (etree._Element, dict): + """ + Read Landsat metadata as: + + - :code:`pandas.DataFrame` whatever its collection is (by default for collection 1) + - XML root + its namespace if the product is retrieved from the 2nd collection (by default for collection 2) + + Args: + force_pd (bool): If collection 2, return a pandas.DataFrame instead of an XML root + namespace + Returns: + Tuple[Union[pd.DataFrame, etree._Element], dict]: + Metadata as a Pandas.DataFrame or as (etree._Element, dict): Metadata XML root and its namespaces + """ + return self._read_mtd_xml_stac(self._get_path("granule_metadata")) + + def get_quicklook_path(self) -> str: + """ + Get quicklook path if existing. + + Returns: + str: Quicklook path + """ + return self._get_path("thumbnail") + + +class S2MPCStacProduct(StacProduct, S2E84Product): + def __init__( + self, + product_path: AnyPathStrType = None, + archive_path: AnyPathStrType = None, + output_path: AnyPathStrType = None, + remove_tmp: bool = False, + **kwargs, + ) -> None: + asyncio.set_event_loop_policy(asyncio.WindowsSelectorEventLoopPolicy()) + self.kwargs = kwargs + """Custom kwargs""" + + # Copy the kwargs + super_kwargs = kwargs.copy() + + # Get STAC Item + self.item = None + """ STAC Item of the product """ + self.item = super_kwargs.pop("item", None) + if self.item is None: + try: + self.item = Item.from_file(product_path) + except TypeError: + raise InvalidProductError( + "You should either fill 'product_path' or 'item'." + ) + + self.default_clients = [planetary_computer] + self.clients = super_kwargs.pop("client", self.default_clients) + + if product_path is None: + # Canonical link is always the second one + # TODO: check if ok + product_path = AnyPath(self.item.links[1].target).parent + + # Initialization from the super class + super().__init__(product_path, archive_path, output_path, remove_tmp, **kwargs) + + def _pre_init(self, **kwargs) -> None: + """ + Function used to pre_init the products + (setting needs_extraction and so on) + """ + self._raw_units = RawUnits.REFL + self._has_cloud_cover = True + self._use_filename = False + self.needs_extraction = False + + self.stac_mtd = self.item.to_dict() + + # Pre init done by the super class + super(S2E84Product, self)._pre_init(**kwargs) + + def _get_name(self) -> str: + """ + Set product real name. + + Returns: + str: True name of the product (from metadata) + """ + return self.item.properties["s2:product_uri"] + + def _to_reflectance( + self, + band_arr: xr.DataArray, + band_path: AnyPathType, + band: BandNames, + **kwargs, + ) -> xr.DataArray: + """ + Converts band to reflectance + + Args: + band_arr (xr.DataArray): Band array to convert + band_path (AnyPathType): Band path + band (BandNames): Band to read + **kwargs: Other keywords + + Returns: + xr.DataArray: Band in reflectance + """ + # TODO + offset = -1000.0 + quantif_value = 10000.0 + + # Compute the correct radiometry of the band + band_arr = (band_arr + offset) / quantif_value + + return band_arr.astype(np.float32) + + def _get_path(self, file_id: str, ext="tif") -> str: + """ + Get either the archived path of the normal path of a tif file + + Args: + band_id (str): Band ID + + Returns: + AnyPathType: band path + """ + if file_id.lower() in self.item.assets: + asset_name = file_id.lower() + elif file_id in [band.id for band in self.bands.values() if band is not None]: + asset_name = [ + band.name + for band in self.bands.values() + if band is not None and f"{band.id}" == file_id + ][0] + else: + try: + asset_name = difflib.get_close_matches( + file_id, self.item.assets.keys(), cutoff=0.5, n=1 + )[0] + except Exception: + raise FileNotFoundError( + f"Impossible to find an asset in {list(self.item.assets.keys())} close enough to '{file_id}'" + ) + + return self.sign_url(self.item.assets[asset_name].href) + + def _read_mtd(self) -> (etree._Element, dict): + """ + Read Landsat metadata as: + + - :code:`pandas.DataFrame` whatever its collection is (by default for collection 1) + - XML root + its namespace if the product is retrieved from the 2nd collection (by default for collection 2) + + Args: + force_pd (bool): If collection 2, return a pandas.DataFrame instead of an XML root + namespace + Returns: + Tuple[Union[pd.DataFrame, etree._Element], dict]: + Metadata as a Pandas.DataFrame or as (etree._Element, dict): Metadata XML root and its namespaces + """ + return self._read_mtd_xml_stac(self._get_path("granule-metadata")) + + def get_quicklook_path(self) -> str: + """ + Get quicklook path if existing. + + Returns: + str: Quicklook path + """ + return self._get_path("rendered_preview") diff --git a/eoreader/products/optical/s2_product.py b/eoreader/products/optical/s2_product.py index eeca8863..5aa3ac52 100644 --- a/eoreader/products/optical/s2_product.py +++ b/eoreader/products/optical/s2_product.py @@ -15,6 +15,8 @@ # See the License for the specific language governing permissions and # limitations under the License. """ Sentinel-2 products """ +import asyncio +import difflib import json import logging import os @@ -28,10 +30,12 @@ import geopandas as gpd import numpy as np import pandas as pd +import planetary_computer import rasterio import xarray as xr from affine import Affine from lxml import etree +from pystac import Item from rasterio import errors, features, transform from rasterio.crs import CRS from rasterio.enums import Resampling @@ -39,6 +43,7 @@ from sertit.misc import ListEnum from sertit.types import AnyPathStrType, AnyPathType from shapely.geometry import box +from stac_asset import S3Client from eoreader import DATETIME_FMT, EOREADER_NAME, cache, utils from eoreader.bands import ( @@ -47,6 +52,7 @@ CA, CIRRUS, CLOUDS, + EOREADER_STAC_MAP, GREEN, NARROW_NIR, NIR, @@ -65,7 +71,7 @@ to_str, ) from eoreader.exceptions import InvalidProductError, InvalidTypeError -from eoreader.products import OpticalProduct +from eoreader.products import OpticalProduct, StacProduct from eoreader.products.optical.optical_product import RawUnits from eoreader.products.product import OrbitDirection from eoreader.stac import CENTER_WV, FWHM, GSD, ID, NAME @@ -1692,3 +1698,182 @@ def get_orbit_direction(self) -> OrbitDirection: od = OrbitDirection.DESCENDING return od + + +class S2StacProduct(StacProduct, S2Product): + def __init__( + self, + product_path: AnyPathStrType = None, + archive_path: AnyPathStrType = None, + output_path: AnyPathStrType = None, + remove_tmp: bool = False, + **kwargs, + ) -> None: + asyncio.set_event_loop_policy(asyncio.WindowsSelectorEventLoopPolicy()) + self.kwargs = kwargs + """Custom kwargs""" + + # Copy the kwargs + super_kwargs = kwargs.copy() + + # Get STAC Item + self.item = None + """ STAC Item of the product """ + self.item = super_kwargs.pop("item", None) + if self.item is None: + try: + self.item = Item.from_file(product_path) + except TypeError: + raise InvalidProductError( + "You should either fill 'product_path' or 'item'." + ) + + self.default_clients = [ + planetary_computer, + S3Client(region_name="us-west-2"), # Element84, EarthData + S3Client(requester_pays=True, region_name="eu-central-1"), # Sinergise + # Not yet handled + # HttpClient(ClientSession(base_url="https://landsatlook.usgs.gov", auth=BasicAuth(login="", password=""))) + ] + self.clients = super_kwargs.pop("client", self.default_clients) + + if product_path is None: + # Canonical link is always the second one + # TODO: check if ok + product_path = AnyPath(self.item.links[1].target).parent + + # Initialization from the super class + super().__init__(product_path, archive_path, output_path, remove_tmp, **kwargs) + + def _pre_init(self, **kwargs) -> None: + """ + Function used to pre_init the products + (setting needs_extraction and so on) + """ + self._raw_units = RawUnits.REFL + self._has_cloud_cover = True + self._use_filename = False + self.needs_extraction = False + + # Pre init done by the super class + super(S2Product, self)._pre_init(**kwargs) + + def _get_path(self, file_id: str, ext="tif") -> str: + """ + Get either the archived path of the normal path of a tif file + + Args: + band_id (str): Band ID + + Returns: + AnyPathType: band path + """ + if file_id.lower() in self.item.assets: + asset_name = file_id.lower() + elif file_id in [band.id for band in self.bands.values() if band is not None]: + band_name = [ + band_name + for band_name, band in self.bands.items() + if band is not None and f"{band.id}" == file_id + ][0] + asset_name = EOREADER_STAC_MAP[band_name].value + else: + try: + asset_name = difflib.get_close_matches( + file_id, self.item.assets.keys(), cutoff=0.5, n=1 + )[0] + except Exception: + raise FileNotFoundError( + f"Impossible to find an asset in {list(self.item.assets.keys())} close enough to '{file_id}'" + ) + + return self.sign_url(self.item.assets[asset_name].href) + + def get_band_paths( + self, band_list: list, pixel_size: float = None, **kwargs + ) -> dict: + """ + Return the paths of required bands. + + .. code-block:: python + + >>> from eoreader.reader import Reader + >>> from eoreader.bands import * + >>> path = r"S2A_MSIL1C_20200824T110631_N0209_R137_T30TTK_20200824T150432.SAFE.zip" + >>> prod = Reader().open(path) + >>> prod.get_band_paths([GREEN, RED]) + { + : 'zip+file://S2A_MSIL1C_20200824T110631_N0209_R137_T30TTK_20200824T150432.SAFE.zip!/S2A_MSIL1C_20200824T110631_N0209_R137_T30TTK_20200824T150432.SAFE/GRANULE/L1C_T30TTK_A027018_20200824T111345/IMG_DATA/T30TTK_20200824T110631_B03.jp2', + : 'zip+file://S2A_MSIL1C_20200824T110631_N0209_R137_T30TTK_20200824T150432.SAFE.zip!/S2A_MSIL1C_20200824T110631_N0209_R137_T30TTK_20200824T150432.SAFE/GRANULE/L1C_T30TTK_A027018_20200824T111345/IMG_DATA/T30TTK_20200824T110631_B04.jp2' + } + + Args: + band_list (list): List of the wanted bands + pixel_size (float): Band pixel size + kwargs: Other arguments used to load bands + + Returns: + dict: Dictionary containing the path of each queried band + """ + band_paths = {} + for band in band_list: + # Get clean band path + clean_band = self._get_clean_band_path( + band, pixel_size=pixel_size, **kwargs + ) + if clean_band.is_file(): + band_paths[band] = clean_band + else: + band_id = self.bands[band].id + try: + band_paths[band] = self._get_path(band_id) + except (FileNotFoundError, IndexError) as ex: + raise InvalidProductError( + f"Non existing {band} ({band_id}) band for {self.path}" + ) from ex + + return band_paths + + @cache + def read_datatake_mtd(self) -> (etree._Element, dict): + """ + Read datatake metadata and outputs the metadata XML root and its namespaces as a dict + (datatake metadata is the file in the root directory named :code:`MTD_MSI(L1C/L2A).xml`) + + .. code-block:: python + + >>> from eoreader.reader import Reader + >>> path = r"S2A_MSIL1C_20200824T110631_N0209_R137_T30TTK_20200824T150432.SAFE.zip" + >>> prod = Reader().open(path) + >>> prod.read_mtd() + (, + {'nl': '{https://psd-14.sentinel2.eo.esa.int/PSD/S2_PDI_Level-2A_Tile_Metadata.xsd}'}) + + Returns: + (etree._Element, dict): Metadata XML root and its namespaces + """ + return self._read_mtd_xml_stac(self._get_path("product-metadata")) + + def _read_mtd(self) -> (etree._Element, dict): + """ + Read Landsat metadata as: + + - :code:`pandas.DataFrame` whatever its collection is (by default for collection 1) + - XML root + its namespace if the product is retrieved from the 2nd collection (by default for collection 2) + + Args: + force_pd (bool): If collection 2, return a pandas.DataFrame instead of an XML root + namespace + Returns: + Tuple[Union[pd.DataFrame, etree._Element], dict]: + Metadata as a Pandas.DataFrame or as (etree._Element, dict): Metadata XML root and its namespaces + """ + return self._read_mtd_xml_stac(self._get_path("granule-metadata")) + + def get_quicklook_path(self) -> str: + """ + Get quicklook path if existing. + + Returns: + str: Quicklook path + """ + return self._get_path("preview") diff --git a/eoreader/products/stac_product.py b/eoreader/products/stac_product.py new file mode 100644 index 00000000..a6d08f48 --- /dev/null +++ b/eoreader/products/stac_product.py @@ -0,0 +1,172 @@ +# -*- coding: utf-8 -*- +# Copyright 2023, SERTIT-ICube - France, https://sertit.unistra.fr/ +# This file is part of eoreader project +# https://github.com/sertit/eoreader +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" Class for STAC products """ +import logging +from io import BytesIO + +import geopandas as gpd +import planetary_computer +import shapely +from lxml import etree +from rasterio import crs +from sertit import geometry, path, rasters, vectors +from stac_asset import blocking + +from eoreader import EOREADER_NAME, cache +from eoreader.products.product import Product +from eoreader.stac import PROJ_EPSG +from eoreader.utils import simplify + +LOGGER = logging.getLogger(EOREADER_NAME) + + +class StacProduct(Product): + """Stac products""" + + item = None + clients = None + default_clients = None + + @cache + def extent(self) -> gpd.GeoDataFrame: + """ + Get UTM extent of stack. + + Returns: + gpd.GeoDataFrame: Extent in UTM + """ + # Get extent + return gpd.GeoDataFrame( + geometry=geometry.from_bounds_to_polygon(*self.item.bbox), + crs=vectors.EPSG_4326, + ).to_crs(self.crs()) + + @cache + @simplify + def footprint(self) -> gpd.GeoDataFrame: + """ + Get UTM footprint of the products (without nodata, *in french == emprise utile*) + + .. code-block:: python + + >>> from eoreader.reader import Reader + >>> path = r"S2A_MSIL1C_20200824T110631_N0209_R137_T30TTK_20200824T150432.SAFE.zip" + >>> prod = Reader().open(path) + >>> prod.footprint() + index geometry + 0 0 POLYGON ((199980.000 4500000.000, 199980.000 4... + + Returns: + gpd.GeoDataFrame: Footprint as a GeoDataFrame + """ + # Get extent + return gpd.GeoDataFrame.from_dict( + data=shapely.polygons(self.item.geometry["coordinates"]), + crs=vectors.EPSG_4326, + ).to_crs(self.crs()) + + @cache + def crs(self) -> crs.CRS: + """ + Get UTM projection of stack. + + Returns: + crs.CRS: CRS object + """ + epsg = self.item.properties.get(PROJ_EPSG) + + if epsg is None: + def_crs = gpd.GeoDataFrame( + geometry=geometry.from_bounds_to_polygon(*self.item.bbox), + crs=vectors.EPSG_4326, + ).estimate_utm_crs() + else: + def_crs = crs.CRS.from_epsg(code=epsg) + + return def_crs + + def plot(self) -> None: + """ + Plot the quicklook if existing + """ + try: + import matplotlib.pyplot as plt + except ModuleNotFoundError: + raise ModuleNotFoundError( + "You need to install 'matplotlib' to plot the product." + ) + else: + quicklook_path = self.get_quicklook_path() + + if quicklook_path is not None: + plt.figure(figsize=(6, 6)) + if path.get_ext(quicklook_path).split("?")[0].lower() in [ + "png", + "jpg", + "jpeg", + ]: + try: + from PIL import Image + except ModuleNotFoundError: + raise ModuleNotFoundError( + "You need to install 'pillow' to plot the product." + ) + + qlk = blocking.read_href(quicklook_path, clients=self.clients) + plt.imshow(Image.open(BytesIO(qlk))) + else: + # Check it + qck = rasters.read(quicklook_path) + if qck.rio.count == 3: + qck.plot.imshow(robust=True) + elif qck.rio.count == 1: + qck.plot(cmap="GnBu_r", robust=True) + else: + pass + + plt.title(f"{self.condensed_name}") + + def sign_url(self, url: str): + # TODO complete if needed + return planetary_computer.sign_url(url) + + def _read_mtd_xml_stac(self, mtd_url, **kwargs) -> (etree._Element, dict): + """ + Read metadata and outputs the metadata XML root and its namespaces as a dicts as a dict + + Args: + mtd_from_path (str): Metadata regex (glob style) to find from extracted product + mtd_archived (str): Metadata regex (re style) to find from archived product + + Returns: + (etree._Element, dict): Metadata XML root and its namespaces + + """ + mtd_str = blocking.read_href(mtd_url, clients=self.clients) + root = etree.fromstring(mtd_str) + + # Get namespaces map (only useful ones) + nsmap = {key: f"{{{ns}}}" for key, ns in root.nsmap.items()} + pop_list = ["xsi", "xs", "xlink"] + for ns in pop_list: + if ns in nsmap.keys(): + nsmap.pop(ns) + + return root, nsmap + + +# TODO: expose other functions diff --git a/eoreader/reader.py b/eoreader/reader.py index ccb96489..e49955e3 100644 --- a/eoreader/reader.py +++ b/eoreader/reader.py @@ -25,11 +25,12 @@ from typing import Union from zipfile import BadZipFile +from pystac import Item from sertit import AnyPath, path, strings from sertit.misc import ListEnum from sertit.types import AnyPathStrType -from eoreader import EOREADER_NAME +from eoreader import EOREADER_NAME, utils LOGGER = logging.getLogger(EOREADER_NAME) @@ -480,7 +481,40 @@ def open( Product: Correct products """ - product_path = AnyPath(product_path) + if isinstance(product_path, Item): + # TODO + + # Get first asset URL + first_asset_url = list(product_path.assets.values())[0].href + + # Convert to URI handled by cloudpathlib (https://s3.endpoint/bucket/file.extension) + # https://rapidmapping.s3.eu-west-1.amazonaws.com/EMSR706/AOI07/DEL_PRODUCT/EMSR706_AOI07_DEL_PRODUCT_v1.zip + # https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/26/S/KG/2023/11/S2A_26SKG_20231114_0_L2A/AOT.tif + # https://s3.unistra.fr/sertit-eoreader-ci/all_sar/SLH_49732/ + + # https://docs.aws.amazon.com/AmazonS3/latest/userguide/access-bucket-intro.html + # https://bucket-name.s3.region-code.amazonaws.com/key-name - Virtual-hosted–style access + # https://s3.region-code.amazonaws.com/bucket-name/key-name - Path-style access + + # to + # s3://sentinel-cogs/sentinel-s2-l2a-cogs/39/K/ZU/2023/10/S2A_39KZU_20231031_0_L2A + # s3://sertit-eoreader-ci/all_sar/SLH_49732/ + asset_url_split = utils.get_split_name(first_asset_url, "/") + endpoint = asset_url_split[1] + if endpoint.startswith("s3"): + bucket = asset_url_split[2] + end_uri = "/".join(asset_url_split[3:]) + else: + endpoint_split = endpoint.split(".") + bucket = endpoint_split[0] + endpoint = "/".join(asset_url_split[1:]) + end_uri = "/".join(asset_url_split[2:]) + + first_asset_uri = AnyPath(f"s3://{bucket}/{end_uri}") + product_path = first_asset_uri.parent + else: + product_path = AnyPath(product_path) + if not product_path.exists(): FileNotFoundError(f"Non existing product: {product_path}") @@ -513,58 +547,7 @@ def open( ) if is_valid: - sat_class = const.name.lower() + "_product" - - # Channel correctly the constellations to their generic files (just in case) - # TerraSAR-like constellations - if const in [Constellation.TDX, Constellation.PAZ]: - sat_class = "tsx_product" - const = None # All product names are the same, so assess it with MTD - # Maxar-like constellations - elif const in [ - Constellation.QB, - Constellation.GE01, - Constellation.WV01, - Constellation.WV02, - Constellation.WV03, - Constellation.WV04, - ]: - sat_class = "maxar_product" - const = None # All product names are the same, so assess it with MTD - # Lansat constellations - elif const in [ - Constellation.L1, - Constellation.L2, - Constellation.L3, - Constellation.L4, - Constellation.L5, - Constellation.L7, - Constellation.L8, - Constellation.L9, - ]: - sat_class = "landsat_product" - # SPOT-6/7 constellations - elif const in [Constellation.SPOT6, Constellation.SPOT7]: - sat_class = "spot67_product" - # SPOT-4/5 constellations - elif const in [Constellation.SPOT4, Constellation.SPOT5]: - sat_class = "spot45_product" - elif const in [Constellation.S2_SIN]: - sat_class = "s2_product" - kwargs["is_sinergise"] = True - - # Manage both optical and SAR - try: - mod = importlib.import_module( - f"eoreader.products.sar.{sat_class}" - ) - except ModuleNotFoundError: - mod = importlib.import_module( - f"eoreader.products.optical.{sat_class}" - ) - - class_ = getattr(mod, strings.snake_to_camel_case(sat_class)) - prod = class_( + prod = create_product( product_path=product_path, archive_path=archive_path, output_path=output_path, @@ -758,3 +741,69 @@ def is_filename_valid( raise BadZipFile(f"{product_path} is not a zip file") return is_valid + + +def create_product( + product_path, + archive_path, + output_path, + remove_tmp, + constellation: Constellation, + **kwargs, +): + sat_class = constellation.name.lower() + "_product" + + # Channel correctly the constellations to their generic files (just in case) + # TerraSAR-like constellations + if constellation in [Constellation.TDX, Constellation.PAZ]: + sat_class = "tsx_product" + constellation = None # All product names are the same, so assess it with MTD + # Maxar-like constellations + elif constellation in [ + Constellation.QB, + Constellation.GE01, + Constellation.WV01, + Constellation.WV02, + Constellation.WV03, + Constellation.WV04, + ]: + sat_class = "maxar_product" + constellation = None # All product names are the same, so assess it with MTD + # Lansat constellations + elif constellation in [ + Constellation.L1, + Constellation.L2, + Constellation.L3, + Constellation.L4, + Constellation.L5, + Constellation.L7, + Constellation.L8, + Constellation.L9, + ]: + sat_class = "landsat_product" + # SPOT-6/7 constellations + elif constellation in [Constellation.SPOT6, Constellation.SPOT7]: + sat_class = "spot67_product" + # SPOT-4/5 constellations + elif constellation in [Constellation.SPOT4, Constellation.SPOT5]: + sat_class = "spot45_product" + elif constellation in [Constellation.S2_SIN]: + sat_class = "s2_product" + kwargs["is_sinergise"] = True + + # Manage both optical and SAR + try: + mod = importlib.import_module(f"eoreader.products.sar.{sat_class}") + except ModuleNotFoundError: + mod = importlib.import_module(f"eoreader.products.optical.{sat_class}") + + class_ = getattr(mod, strings.snake_to_camel_case(sat_class)) + prod = class_( + product_path=product_path, + archive_path=archive_path, + output_path=output_path, + remove_tmp=remove_tmp, + constellation=constellation, + **kwargs, + ) + return prod diff --git a/requirements.txt b/requirements.txt index 19a88868..d1627cc1 100644 --- a/requirements.txt +++ b/requirements.txt @@ -36,9 +36,6 @@ geopandas>=0.11.0 # Spectral indices spyndex>=0.3.0 -# STAC -pystac[validation] - # SERTIT libs sertit[full]>=1.32.1 @@ -48,3 +45,8 @@ methodtools # Plot matplotlib + +# MPC, AWS and STAC +pystac[validation] +stac-asset +planetary_computer