Skip to content

Commit c80fdd1

Browse files
authored
Cache improvements. (#274)
- Reduce exception clutter from normal cache misses. - Add settings to fully or selectively disable caching.
1 parent ecfe664 commit c80fdd1

File tree

3 files changed

+169
-44
lines changed

3 files changed

+169
-44
lines changed

epymorph/cache.py

Lines changed: 69 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
from hashlib import sha256
44
from io import BytesIO
55
from math import log
6-
from os import PathLike, getenv
6+
from os import PathLike
77
from pathlib import Path
88
from shutil import rmtree
99
from sys import modules
@@ -15,28 +15,47 @@
1515
import requests
1616
from platformdirs import user_cache_path
1717

18-
19-
def _cache_path() -> Path:
20-
"""
21-
Get epymorph's cache directory.
22-
23-
Returns
24-
-------
25-
:
26-
The path.
27-
"""
28-
if (path_var := getenv("EPYMORPH_CACHE_PATH")) is not None:
29-
# Load path from env var
30-
path = Path(path_var)
31-
else:
32-
# fall back to platform-specific default path
33-
path = user_cache_path(appname="epymorph")
34-
# ensure cache directory exists
35-
path.mkdir(parents=True, exist_ok=True)
36-
return path
37-
38-
39-
CACHE_PATH = _cache_path()
18+
from epymorph.settings import declare_setting, env_flag, env_path, env_path_list
19+
20+
EPYMORPH_CACHE_PATH = declare_setting(
21+
name="EPYMORPH_CACHE_PATH",
22+
description=(
23+
"Optional path to use as the location to store cached files. "
24+
"By default, epymorph uses a path which is appropriate to your OS."
25+
),
26+
getter=lambda: env_path(
27+
name="EPYMORPH_CACHE_PATH",
28+
default_value=user_cache_path(appname="epymorph"),
29+
ensure_exists=True,
30+
),
31+
)
32+
"""An environment variable for epymorph's cache path."""
33+
34+
EPYMORPH_CACHE_DISABLED = declare_setting(
35+
name="EPYMORPH_CACHE_DISABLED",
36+
description=(
37+
"An optional boolean value; true to disable all cache interactions. "
38+
"Default is false."
39+
),
40+
getter=lambda: env_flag("EPYMORPH_CACHE_DISABLED", False),
41+
)
42+
"""An environment variable to entirely disable caching."""
43+
44+
EPYMORPH_CACHE_DISABLED_PATHS = declare_setting(
45+
name="EPYMORPH_CACHE_DISABLED_PATHS",
46+
description=(
47+
"An optional list of paths (separated by semicolons); "
48+
"when attempting to load or save a file using the cache, "
49+
"epymorph will check if the cache path starts with one of "
50+
"these paths, and if so, interactions with the cache will be "
51+
"skipped entirely."
52+
),
53+
getter=lambda: env_path_list("EPYMORPH_CACHE_DISABLED_PATHS"),
54+
)
55+
"""An environment variable for paths which should have caching disabled."""
56+
57+
58+
CACHE_PATH = EPYMORPH_CACHE_PATH.get()
4059
"""The root directory for epymorph's cached files."""
4160

4261

@@ -398,7 +417,11 @@ def load_file_from_cache(from_path: str | PathLike[str]) -> BytesIO:
398417
"""
399418
try:
400419
return load_file(_resolve_cache_path(from_path))
420+
except FileMissingError:
421+
# missing file is a normal cache miss; no extra context needed
422+
raise CacheMissError() from None
401423
except FileError as e:
424+
# any other file error is abnormal and extra context will help debug
402425
raise CacheMissError() from e
403426

404427

@@ -425,23 +448,34 @@ def load_or_fetch(cache_path: Path, fetch: Callable[[], BytesIO]) -> BytesIO:
425448
:
426449
The file bytes.
427450
"""
428-
try:
451+
cache_disabled = EPYMORPH_CACHE_DISABLED.get() or any(
452+
cache_path.is_relative_to(p) # is the file's cache path in a disabled path?
453+
for p in EPYMORPH_CACHE_DISABLED_PATHS.get()
454+
)
455+
456+
if not cache_disabled:
429457
# Try to load from cache.
430-
return load_file_from_cache(cache_path)
431-
except CacheMissError:
432-
# On cache miss, fetch file contents.
433-
file = fetch()
458+
try:
459+
return load_file_from_cache(cache_path)
460+
except CacheMissError:
461+
# passing through the exception context means the cache miss
462+
# doesn't clutter up the exception stack if fetching the file
463+
# from source fails.
464+
pass
465+
466+
# On cache miss, fetch file contents.
467+
file = fetch()
468+
469+
if not cache_disabled:
434470
# And attempt to save the file to the cache for next time.
435471
try:
436472
save_file_to_cache(cache_path, file)
437473
except FileWriteError as e:
438-
# Failure to save to the cache is not worth stopping the program:
439-
# raise a warning.
440-
warn(
441-
f"Unable to save file to the cache ({cache_path}). Cause:\n{e}",
442-
CacheWarning,
443-
)
444-
return file
474+
# Failure to save to the cache is not worth stopping the program.
475+
wrn = f"Unable to save file to the cache ({cache_path}). Cause:\n{e}"
476+
warn(wrn, CacheWarning)
477+
478+
return file
445479

446480

447481
def load_or_fetch_url(url: str, cache_path: Path) -> BytesIO:

epymorph/geography/us_tiger.py

Lines changed: 14 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -278,15 +278,20 @@ def _load_summary_from_cache(
278278
with np.load(content["data.npz"]) as data_npz:
279279
return on_hit(**{k: v.tolist() for k, v in data_npz.items()})
280280
except CacheMissError:
281-
data = on_miss()
282-
data_bytes = BytesIO()
283-
# NOTE: Python doesn't include a type for dataclass instances;
284-
# you can import DataclassInstance from _typeshed, but that seems
285-
# to break test discovery. Oh well; just ignore this one.
286-
model_dict = asdict(data) # type: ignore
287-
np.savez_compressed(data_bytes, **model_dict)
288-
save_bundle_to_cache(path, _CACHE_VERSION, {"data.npz": data_bytes})
289-
return data
281+
# passing through the exception context means the cache miss
282+
# doesn't clutter up the exception stack if fetching the file
283+
# from source fails.
284+
pass
285+
286+
data = on_miss()
287+
data_bytes = BytesIO()
288+
# NOTE: Python doesn't include a type for dataclass instances;
289+
# you can import DataclassInstance from _typeshed, but that seems
290+
# to break test discovery. Oh well; just ignore this one.
291+
model_dict = asdict(data) # type: ignore
292+
np.savez_compressed(data_bytes, **model_dict)
293+
save_bundle_to_cache(path, _CACHE_VERSION, {"data.npz": data_bytes})
294+
return data
290295

291296

292297
##########

epymorph/settings.py

Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
"""
44

55
import os
6+
from pathlib import Path
67
from typing import Callable, Generic, NamedTuple, TypeVar, overload
78
from warnings import warn
89

@@ -87,6 +88,91 @@ def env_flag(name: str, default_value: bool | None = None) -> bool | None:
8788
return default_value
8889

8990

91+
@overload
92+
def env_path(
93+
name: str,
94+
default_value: Path,
95+
*,
96+
ensure_exists: bool = False,
97+
) -> Path: ...
98+
99+
100+
@overload
101+
def env_path(
102+
name: str,
103+
default_value: None = None,
104+
*,
105+
ensure_exists: bool = False,
106+
) -> Path | None: ...
107+
108+
109+
def env_path(
110+
name: str,
111+
default_value: Path | None = None,
112+
*,
113+
ensure_exists: bool = False,
114+
) -> Path | None:
115+
"""
116+
Load an environment variable assuming it represents a Path.
117+
118+
Parameters
119+
----------
120+
name :
121+
The name of the environment variable to load.
122+
default_value :
123+
A default value to use in case the variable is not present or can't be
124+
interpreted as a Path.
125+
ensure_exists :
126+
True to attempt to create the Path (as a directory) if it doesn't already exist.
127+
128+
Returns
129+
-------
130+
:
131+
If the named variable is present and if the value can be interpreted
132+
as a Path, return the Path. Else return `default_value`.
133+
"""
134+
value = os.getenv(name)
135+
if value is None:
136+
path = default_value
137+
else:
138+
try:
139+
path = Path(value)
140+
except TypeError:
141+
path = default_value
142+
143+
if path is not None and ensure_exists:
144+
path.mkdir(parents=True, exist_ok=True)
145+
146+
return path
147+
148+
149+
def env_path_list(name: str) -> list[Path]:
150+
"""
151+
Load an environment variable assuming it represents a semicolon-separated list of
152+
Paths.
153+
154+
Note: empty string paths will be ignored, since these are likely to be mistakes.
155+
156+
Parameters
157+
----------
158+
name :
159+
The name of the environment variable to load.
160+
161+
Returns
162+
-------
163+
:
164+
If the named variable is present and if the value can be interpreted
165+
as a list of Paths, return the Paths. Else return an empty list.
166+
"""
167+
value = os.getenv(name)
168+
if value is None:
169+
return []
170+
try:
171+
return [Path(x.strip()) for x in value.split(";") if x.strip() != ""]
172+
except TypeError:
173+
return []
174+
175+
90176
ValueT = TypeVar("ValueT")
91177
"""The type of the value of a setting."""
92178

0 commit comments

Comments
 (0)