Skip to content

Commit

Permalink
OPTIM: Cache the access to any archived file list, as this operation …
Browse files Browse the repository at this point in the history
…is expensive when done with large archives stored on the cloud (and thus better done only once).
  • Loading branch information
remi-braun committed Dec 10, 2024
1 parent 54990ef commit 348d645
Show file tree
Hide file tree
Showing 30 changed files with 200 additions and 102 deletions.
3 changes: 2 additions & 1 deletion CHANGES.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,11 @@
- FIX: Use the sun elevation angle rather than the sun zenith angle for STAC [#158](https://github.com/sertit/eoreader/issues/158)
- FIX: Create comparison operators for `BandNames`, removing the `xarray RuntimeWarning` about `sort order is undefined for incomparable objects`.
- FIX: Add some missing `@cache` around time-consuming functions
- OPTIM: Cache the access to any archived file list, as this operation is expensive when done with large archives stored on the cloud (and thus better done only once).
- CI: Remove useless verbosity in CI
- DOC: Update `conf.py` (remove useless hunks and set Sphinx 7 as base)
- DOC: Added the [PAZ product guide](https://earth.esa.int/eogateway/documents/20142/37627/PAZ-Image-Products-Guide.pdf) to the PAZ Product documentation instead of the TerraSAR-X one - by @guillemc23
- DEPS: Pin `sertit>=1.44`
- DEPS: Pin `sertit>=1.44.1`

## 0.21.7 (2024-11-08)

Expand Down
7 changes: 3 additions & 4 deletions eoreader/products/optical/dimap_v2_product.py
Original file line number Diff line number Diff line change
Expand Up @@ -997,8 +997,7 @@ def open_mask(self, mask_str: str, **kwargs) -> gpd.GeoDataFrame:
try:
if self.is_archived:
# Open the zip file
mask = vectors.read(
self.path,
mask = self._read_archived_vector(
archive_regex=rf".*MASKS.*{mask_str}.*\.GML",
crs=crs,
)
Expand Down Expand Up @@ -1272,8 +1271,8 @@ def get_quicklook_path(self) -> str:
quicklook_path = None
try:
if self.is_archived:
quicklook_path = self.path / path.get_archived_path(
self.path, file_regex=".*PREVIEW.*JPG"
quicklook_path = self.path / self._get_archived_path(
file_regex=".*PREVIEW.*JPG"
)
else:
quicklook_path = next(self.path.glob("*PREVIEW*.JPG"))
Expand Down
9 changes: 3 additions & 6 deletions eoreader/products/optical/gs2_product.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,6 @@
import xarray as xr
from lxml import etree
from rasterio import crs as riocrs
from sertit import files, path
from sertit.misc import ListEnum
from sertit.types import AnyPathType

Expand Down Expand Up @@ -458,9 +457,7 @@ def _get_ortho_path(self, **kwargs) -> AnyPathType:
if self.product_type in self._proj_prod_type:
# Compute RPCSs
if self.is_archived:
rpcs_file = io.BytesIO(
files.read_archived_file(self.path, r".*_RPC\.txt")
)
rpcs_file = io.BytesIO(self._read_archived_file(r".*_RPC\.txt"))
else:
rpcs_file = self.path.joinpath(self.name + "_RPC.txt")

Expand All @@ -479,8 +476,8 @@ def get_quicklook_path(self) -> str:
quicklook_path = None
try:
if self.is_archived:
quicklook_path = self.path / path.get_archived_path(
self.path, file_regex=r".*QL\.png"
quicklook_path = self.path / self._get_archived_path(
file_regex=r".*QL\.png"
)
else:
quicklook_path = str(next(self.path.glob("*QL.png")))
Expand Down
6 changes: 3 additions & 3 deletions eoreader/products/optical/hls_product.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,7 @@ def _get_path(self, band_id: str) -> AnyPathType:
"""
if self.is_archived:
prod_path = path.get_archived_rio_path(self.path, rf".*{band_id}\.tif")
prod_path = self._get_archived_rio_path(rf".*{band_id}\.tif")
else:
prod_path = path.get_file_in_dir(
self.path, f"*{band_id}.tif", exact_name=True
Expand Down Expand Up @@ -1030,8 +1030,8 @@ def get_quicklook_path(self) -> str:
quicklook_path = None
try:
if self.is_archived:
quicklook_path = self.path / path.get_archived_path(
self.path, file_regex=r".*.jpg"
quicklook_path = self.path / self._get_archived_path(
file_regex=r".*.jpg"
)
else:
quicklook_path = str(next(self.path.glob("*.jpg")))
Expand Down
6 changes: 3 additions & 3 deletions eoreader/products/optical/landsat_product.py
Original file line number Diff line number Diff line change
Expand Up @@ -249,7 +249,7 @@ def _get_path(self, band_id: str) -> AnyPathType:
regex = rf".*(RT|T1|T2)(_SR|_ST|){band_id}\."
else:
regex = rf".*{band_id}\."
prod_path = path.get_archived_rio_path(self.path, regex)
prod_path = self._get_archived_rio_path(regex)
else:
prod_path = path.get_file_in_dir(
self.path, f"*{band_id}.TIF", exact_name=True
Expand Down Expand Up @@ -1836,8 +1836,8 @@ def get_quicklook_path(self) -> str:
quicklook_path = None
try:
if self.is_archived:
quicklook_path = self.path / path.get_archived_path(
self.path, file_regex=r".*thumb_large\.jpeg"
quicklook_path = self.path / self._get_archived_path(
file_regex=r".*thumb_large\.jpeg"
)
else:
quicklook_path = next(self.path.glob("*thumb_large.jpeg"))
Expand Down
4 changes: 2 additions & 2 deletions eoreader/products/optical/maxar_product.py
Original file line number Diff line number Diff line change
Expand Up @@ -1295,8 +1295,8 @@ def get_quicklook_path(self) -> str:
quicklook_path = None
try:
if self.is_archived:
quicklook_path = self.path / path.get_archived_path(
self.path, file_regex=r".*BROWSE\.JPG"
quicklook_path = self.path / self._get_archived_path(
file_regex=r".*BROWSE\.JPG"
)
else:
quicklook_path = next(self.path.glob("*BROWSE.JPG"))
Expand Down
6 changes: 3 additions & 3 deletions eoreader/products/optical/planet_product.py
Original file line number Diff line number Diff line change
Expand Up @@ -177,14 +177,14 @@ def _pre_init(self, **kwargs) -> None:
# Manage mask type
try:
if self.is_archived:
path.get_archived_path(self.path, r".*udm2.*\.tif")
self._get_archived_path(r".*udm2.*\.tif")
else:
next(self.path.glob("**/*udm2*.tif"))
self._mask_type = PlanetMaskType.UDM2
except (FileNotFoundError, StopIteration):
try:
if self.is_archived:
path.get_archived_path(self.path, r".*udm.*\.tif")
self._get_archived_path(r".*udm.*\.tif")
else:
next(self.path.glob("**/*udm*.tif"))
self._mask_type = PlanetMaskType.UDM
Expand Down Expand Up @@ -1012,7 +1012,7 @@ def _get_path(
if self.is_archived:
regex = rf".*{filename}\w*[_]*\.{extension}"

ok_paths = path.get_archived_rio_path(self.path, regex, as_list=True)
ok_paths = self._get_archived_rio_path(regex, as_list=True)
else:
ok_paths = [
str(p) for p in self.path.glob(f"**/*{filename}*.{extension}")
Expand Down
8 changes: 4 additions & 4 deletions eoreader/products/optical/re_product.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@

import numpy as np
import xarray as xr
from sertit import files, path
from sertit import files
from sertit.misc import ListEnum
from sertit.types import AnyPathType

Expand Down Expand Up @@ -108,7 +108,7 @@ def _post_init(self, **kwargs) -> None:
"""
try:
if self.is_archived:
path.get_archived_path(self.path, r".*udm\.tif")
self._get_archived_path(r".*udm\.tif")
else:
next(self.path.glob("**/*udm.tif"))
self._has_udm = True
Expand Down Expand Up @@ -380,8 +380,8 @@ def get_quicklook_path(self) -> str:
quicklook_path = None
try:
if self.is_archived:
quicklook_path = path.get_archived_rio_path(
self.path, file_regex=r".*_browse\.tif"
quicklook_path = self._get_archived_rio_path(
file_regex=r".*_browse\.tif"
)
else:
quicklook_path = str(next(self.path.glob("**/*_browse.tif")))
Expand Down
6 changes: 2 additions & 4 deletions eoreader/products/optical/s2_e84_product.py
Original file line number Diff line number Diff line change
Expand Up @@ -236,7 +236,7 @@ def _get_path(self, file_id: str, ext: str = "tif") -> AnyPathType:
"""
if self.is_archived:
prod_path = path.get_archived_rio_path(self.path, rf".*{file_id}\.{ext}")
prod_path = self._get_archived_rio_path(rf".*{file_id}\.{ext}")
else:
prod_path = path.get_file_in_dir(
self.path, f"*{file_id}.{ext}", exact_name=True
Expand Down Expand Up @@ -768,9 +768,7 @@ def get_quicklook_path(self) -> str:
quicklook_path = None
try:
if self.is_archived:
quicklook_path = self.path / path.get_archived_path(
self.path, file_regex=r".*.jpg"
)
quicklook_path = self._get_archived_path(file_regex=r".*.jpg")
else:
quicklook_path = str(next(self.path.glob("*.jpg")))
except (StopIteration, FileNotFoundError):
Expand Down
23 changes: 11 additions & 12 deletions eoreader/products/optical/s2_product.py
Original file line number Diff line number Diff line change
Expand Up @@ -563,8 +563,8 @@ def _get_res_band_folder(self, band_list: list, pixel_size: float = None) -> dic
if self.is_archived:
# Get the band folder (use dirname is the first of the list is a band)
band_path = os.path.dirname(
path.get_archived_rio_path(
self.path, f"{self._get_image_folder()}.*{dir_name}"
self._get_archived_rio_path(
f"{self._get_image_folder()}.*{dir_name}"
)
)

Expand Down Expand Up @@ -635,8 +635,7 @@ def get_band_paths(
band_id = self.bands[band].id
try:
if self.is_archived:
band_paths[band] = path.get_archived_rio_path(
self.path,
band_paths[band] = self._get_archived_rio_path(
f".*{band_folders[band]}.*B{band_id}.*.jp2",
)
else:
Expand Down Expand Up @@ -947,8 +946,8 @@ def _open_mask_gt_4_0(
band_id = band

if self.is_archived:
mask_path = path.get_archived_rio_path(
self.path, f"{self._get_qi_folder()}.*{mask_id.value}_B{band_id}.jp2"
mask_path = self._get_archived_rio_path(
f"{self._get_qi_folder()}.*{mask_id.value}_B{band_id}.jp2"
)
else:
# Get mask path
Expand Down Expand Up @@ -1642,25 +1641,25 @@ def get_quicklook_path(self) -> str:
quicklook_path = None
try:
if self.is_archived:
quicklook_path = self.path / path.get_archived_path(
self.path, file_regex=r".*ql\.jpg"
quicklook_path = self.path / self._get_archived_path(
file_regex=r".*ql\.jpg"
)
else:
quicklook_path = next(self.path.glob("**/*ql.jpg"))
except (StopIteration, FileNotFoundError):
try:
if self.is_archived:
quicklook_path = self.path / path.get_archived_path(
self.path, file_regex=r".*preview\.jpg"
quicklook_path = self.path / self._get_archived_path(
file_regex=r".*preview\.jpg"
)
else:
quicklook_path = next(self.path.glob("**/preview.jpg"))
except (StopIteration, FileNotFoundError):
# Use the PVI
try:
if self.is_archived:
quicklook_path = path.get_archived_rio_path(
self.path, file_regex=r".*PVI\.jp2"
quicklook_path = self._get_archived_rio_path(
file_regex=r".*PVI\.jp2"
)
else:
quicklook_path = next(self.path.glob("**/*PVI.jp2"))
Expand Down
12 changes: 5 additions & 7 deletions eoreader/products/optical/s2_theia_product.py
Original file line number Diff line number Diff line change
Expand Up @@ -333,8 +333,8 @@ def get_band_paths(
band_id = self.bands[band].id
try:
if self.is_archived:
band_paths[band] = path.get_archived_rio_path(
self.path, rf".*FRE_B{band_id}\.tif"
band_paths[band] = self._get_archived_rio_path(
rf".*FRE_B{band_id}\.tif"
)
else:
band_paths[band] = path.get_file_in_dir(
Expand Down Expand Up @@ -508,9 +508,7 @@ def get_mask_path(self, mask_id: str, res_id: str) -> AnyPathType:
mask_regex = f"*{mask_id}_{res_id}.tif"
try:
if self.is_archived:
mask_path = path.get_archived_rio_path(
self.path, mask_regex.replace("*", ".*")
)
mask_path = self._get_archived_rio_path(mask_regex.replace("*", ".*"))
else:
mask_path = path.get_file_in_dir(
self.path.joinpath("MASKS"), mask_regex, exact_name=True
Expand Down Expand Up @@ -836,8 +834,8 @@ def get_quicklook_path(self) -> str:
quicklook_path = None
try:
if self.is_archived:
quicklook_path = self.path / path.get_archived_path(
self.path, file_regex=r".*QKL_ALL\.jpg"
quicklook_path = self.path / self._get_archived_path(
file_regex=r".*QKL_ALL\.jpg"
)
else:
quicklook_path = next(self.path.glob("**/*QKL_ALL.jpg"))
Expand Down
4 changes: 2 additions & 2 deletions eoreader/products/optical/s3_olci_product.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
import rasterio
import xarray as xr
from rasterio.enums import Resampling
from sertit import path, rasters, rasters_rio
from sertit import rasters, rasters_rio
from sertit.types import AnyPathStrType, AnyPathType

from eoreader import EOREADER_NAME, cache, utils
Expand Down Expand Up @@ -462,7 +462,7 @@ def get_raw_band_paths(self, **kwargs) -> dict:
filename = self._replace(self._radiance_file, band=self.bands[band].name)

if self.is_archived:
raw_path = path.get_archived_path(self.path, f".*{filename}")
raw_path = self._get_archived_path(f".*{filename}")
else:
try:
raw_path = next(self.path.glob(f"*{filename}"))
Expand Down
4 changes: 1 addition & 3 deletions eoreader/products/optical/s3_product.py
Original file line number Diff line number Diff line change
Expand Up @@ -943,9 +943,7 @@ def get_quicklook_path(self) -> str:
"""
try:
if self.is_archived:
quicklook_path = self.path / path.get_archived_path(
self.path, file_regex=r".*.jpg"
)
quicklook_path = self._get_archived_path(file_regex=r".*.jpg")
else:
quicklook_path = str(next(self.path.glob("**/*.jpg")))
except (FileNotFoundError, StopIteration):
Expand Down
2 changes: 1 addition & 1 deletion eoreader/products/optical/s3_slstr_product.py
Original file line number Diff line number Diff line change
Expand Up @@ -536,7 +536,7 @@ def _get_raw_band_path(self, band: BandNames, **kwargs) -> AnyPathType:
filename = band

if self.is_archived:
raw_path = path.get_archived_path(self.path, f".*{filename}*")
raw_path = self._get_archived_path(f".*{filename}*")
else:
try:
raw_path = next(self.path.glob(f"*{filename}*"))
Expand Down
4 changes: 2 additions & 2 deletions eoreader/products/optical/sky_product.py
Original file line number Diff line number Diff line change
Expand Up @@ -485,12 +485,12 @@ def _read_mtd(self) -> (etree._Element, dict):
# MTD are geojson -> open as gpd.GeoDataFrame
try:
if self.is_archived:
data = vectors.read(self.path, archive_regex=mtd_archived)
data = self._read_archived_vector(archive_regex=mtd_archived)

else:
try:
mtd_file = next(self.path.glob(mtd_from_path))
data = vectors.read(mtd_file, archive_regex=f".*{mtd_archived}")
data = vectors.read(mtd_file)
except StopIteration as ex:
raise InvalidProductError(
f"Metadata file ({mtd_from_path}) not found in {self.path}"
Expand Down
5 changes: 2 additions & 3 deletions eoreader/products/optical/spot45_product.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,6 @@
import xarray as xr
from lxml import etree
from rasterio import crs as riocrs
from sertit import path
from sertit.misc import ListEnum
from sertit.types import AnyPathType

Expand Down Expand Up @@ -663,8 +662,8 @@ def get_quicklook_path(self) -> str:
quicklook_path = None
try:
if self.is_archived:
quicklook_path = self.path / path.get_archived_path(
self.path, file_regex=r".*PREVIEW\.JPG"
quicklook_path = self.path / self._get_archived_path(
file_regex=r".*PREVIEW\.JPG"
)
else:
quicklook_path = str(next(self.path.glob("*PREVIEW.JPG")))
Expand Down
10 changes: 5 additions & 5 deletions eoreader/products/optical/sv1_product.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,7 +142,7 @@ def _post_init(self, **kwargs) -> None:
"""
try:
if self.is_archived:
path.get_archived_path(self.path, r".*PSH\.xml")
self._get_archived_path(r".*PSH\.xml")
else:
next(self.path.glob("*PSH.xml"))
self.band_combi = Sv1BandCombination.PSH
Expand Down Expand Up @@ -307,7 +307,7 @@ def footprint(self) -> gpd.GeoDataFrame:
gpd.GeoDataFrame: Footprint as a GeoDataFrame
"""
if self.is_archived:
footprint = vectors.read(self.path, archive_regex=r".*\.shp")
footprint = self._read_archived_vector(archive_regex=r".*\.shp")
else:
try:
footprint = vectors.read(next(self.path.glob("*.shp")))
Expand Down Expand Up @@ -392,7 +392,7 @@ def _get_name_constellation_specific(self) -> str:

try:
if self.is_archived:
footprint_path = path.get_archived_path(self.path, r".*\.shp")
footprint_path = self._get_archived_path(r".*\.shp")
else:
footprint_path = next(self.path.glob("*.shp"))
except (FileNotFoundError, StopIteration):
Expand Down Expand Up @@ -741,8 +741,8 @@ def get_quicklook_path(self) -> str:
quicklook_path = None
try:
if self.is_archived:
quicklook_path = self.path / path.get_archived_path(
self.path, file_regex=r".*MUX\.jpg"
quicklook_path = self.path / self._get_archived_path(
file_regex=r".*MUX\.jpg"
)
else:
quicklook_path = str(next(self.path.glob("*MUX.jpg")))
Expand Down
Loading

0 comments on commit 348d645

Please sign in to comment.