Skip to content

Commit

Permalink
ENH: Allow 'auto' in EOREADER_TILE_SIZE, to set chunks="auto" w…
Browse files Browse the repository at this point in the history
…hen reading data. #180
  • Loading branch information
remi-braun committed Dec 9, 2024
1 parent 3709c20 commit f5b95c1
Show file tree
Hide file tree
Showing 3 changed files with 16 additions and 5 deletions.
1 change: 1 addition & 0 deletions CHANGES.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

- ENH: Add a new type (`BandsType`) for list of BandType
- ENH: Add a new environment variable `EOREADER_NOF_BANDS_IN_CHUNKS` to control the number of the bands in chunks when using `dask`.
- ENH: Allow `'auto'` in `EOREADER_TILE_SIZE`, to set `chunks="auto"` when reading data.
- FIX: Fix stack `save_as_int` to use updated int values - by @TabeaW
- FIX: Fixed PAZ Product Regex to properly indentify PAZ ST products as `PAZProduct` - by @guillemc23
- FIX: Fixed PNEO Product Regex to properly indentify PNEO products as `PneoProduct` - by @guillemc23
Expand Down
11 changes: 7 additions & 4 deletions CI/scripts_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@

import tempenv
from rasterio.errors import NotGeoreferencedWarning
from sertit import AnyPath, ci, s3, unistra
from sertit import AnyPath, ci, dask, s3, unistra
from sertit.types import AnyPathType
from sertit.unistra import get_db2_path, get_db3_path, get_geodatastore

Expand Down Expand Up @@ -125,13 +125,16 @@ def dask_env_wrapper():
# LOGGER.info("Using DASK Local Cluster")
# function()
# else:
os.environ[TILE_SIZE] = "2048"
os.environ[TILE_SIZE] = "auto"
if use_dask():
LOGGER.info("Using DASK threading by chunking the data")
with tempenv.TemporaryEnvironment(
{"CLOUDPATHLIB_FORCE_OVERWRITE_FROM_CLOUD": "1"}
), dask.get_or_create_dask_client():
function()
else:
LOGGER.info("No chunking will be done. Beware of memory overflow errors!")

function()
function()

return dask_env_wrapper

Expand Down
9 changes: 8 additions & 1 deletion eoreader/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,7 @@ def get_split_name(name: str, sep: str = "_") -> list:
def use_dask():
"""Use Dask or not"""
# Check environment variable
# TODO: this is not optional if dask exists in env since sertit 1.43.1
_use_dask = os.getenv(USE_DASK, "1").lower() in ("1", "true")

# Check installed libs
Expand Down Expand Up @@ -166,7 +167,13 @@ def read(
nof_bands_in_chunks = os.getenv(NOF_BANDS_IN_CHUNKS, DEFAULT_NOF_BANDS_IN_CHUNKS)

if use_dask():
chunks = kwargs.get("chunks", {"band": 1, "x": tile_size, "y": tile_size})
if tile_size in [True, "auto", "True", "true"]:
chunks = kwargs.get("chunks", "auto")
else:
chunks = kwargs.get(
"chunks",
{"band": nof_bands_in_chunks, "x": int(tile_size), "y": int(tile_size)},
)
# LOGGER.debug(f"Current chunking: {chunks}")
else:
# LOGGER.debug("Dask use is not enabled. No chunk will be used, but you may encounter memory overflow errors.")
Expand Down

0 comments on commit f5b95c1

Please sign in to comment.