From f5b95c1d9d7ed121927a7b5ef4138930871de177 Mon Sep 17 00:00:00 2001 From: BRAUN REMI Date: Mon, 9 Dec 2024 15:32:21 +0100 Subject: [PATCH] ENH: Allow `'auto'` in `EOREADER_TILE_SIZE`, to set `chunks="auto"` when reading data. #180 --- CHANGES.md | 1 + CI/scripts_utils.py | 11 +++++++---- eoreader/utils.py | 9 ++++++++- 3 files changed, 16 insertions(+), 5 deletions(-) diff --git a/CHANGES.md b/CHANGES.md index 6fbe4cc9..fdfd7270 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -4,6 +4,7 @@ - ENH: Add a new type (`BandsType`) for list of BandType - ENH: Add a new environment variable `EOREADER_NOF_BANDS_IN_CHUNKS` to control the number of the bands in chunks when using `dask`. +- ENH: Allow `'auto'` in `EOREADER_TILE_SIZE`, to set `chunks="auto"` when reading data. - FIX: Fix stack `save_as_int` to use updated int values - by @TabeaW - FIX: Fixed PAZ Product Regex to properly indentify PAZ ST products as `PAZProduct` - by @guillemc23 - FIX: Fixed PNEO Product Regex to properly indentify PNEO products as `PneoProduct` - by @guillemc23 diff --git a/CI/scripts_utils.py b/CI/scripts_utils.py index 7d373b64..af3bf5c5 100644 --- a/CI/scripts_utils.py +++ b/CI/scripts_utils.py @@ -8,7 +8,7 @@ import tempenv from rasterio.errors import NotGeoreferencedWarning -from sertit import AnyPath, ci, s3, unistra +from sertit import AnyPath, ci, dask, s3, unistra from sertit.types import AnyPathType from sertit.unistra import get_db2_path, get_db3_path, get_geodatastore @@ -125,13 +125,16 @@ def dask_env_wrapper(): # LOGGER.info("Using DASK Local Cluster") # function() # else: - os.environ[TILE_SIZE] = "2048" + os.environ[TILE_SIZE] = "auto" if use_dask(): LOGGER.info("Using DASK threading by chunking the data") + with tempenv.TemporaryEnvironment( + {"CLOUDPATHLIB_FORCE_OVERWRITE_FROM_CLOUD": "1"} + ), dask.get_or_create_dask_client(): + function() else: LOGGER.info("No chunking will be done. Beware of memory overflow errors!") - - function() + function() return dask_env_wrapper diff --git a/eoreader/utils.py b/eoreader/utils.py index a3c2917a..88d0129f 100644 --- a/eoreader/utils.py +++ b/eoreader/utils.py @@ -112,6 +112,7 @@ def get_split_name(name: str, sep: str = "_") -> list: def use_dask(): """Use Dask or not""" # Check environment variable + # TODO: this is not optional if dask exists in env since sertit 1.43.1 _use_dask = os.getenv(USE_DASK, "1").lower() in ("1", "true") # Check installed libs @@ -166,7 +167,13 @@ def read( nof_bands_in_chunks = os.getenv(NOF_BANDS_IN_CHUNKS, DEFAULT_NOF_BANDS_IN_CHUNKS) if use_dask(): - chunks = kwargs.get("chunks", {"band": 1, "x": tile_size, "y": tile_size}) + if tile_size in [True, "auto", "True", "true"]: + chunks = kwargs.get("chunks", "auto") + else: + chunks = kwargs.get( + "chunks", + {"band": nof_bands_in_chunks, "x": int(tile_size), "y": int(tile_size)}, + ) # LOGGER.debug(f"Current chunking: {chunks}") else: # LOGGER.debug("Dask use is not enabled. No chunk will be used, but you may encounter memory overflow errors.")