Skip to content

Commit

Permalink
Exclude special dotfiles at all levels of a Zarr
Browse files Browse the repository at this point in the history
  • Loading branch information
jwodder committed Oct 28, 2022
1 parent fea7f91 commit 0064071
Show file tree
Hide file tree
Showing 8 changed files with 29 additions and 12 deletions.
3 changes: 3 additions & 0 deletions dandi/cli/tests/test_digest.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,9 @@ def test_digest_zarr_with_excluded_dotfiles():
os.mkdir("sample.zarr/.dandi")
Path("sample.zarr", ".dandi", "somefile.txt").touch()
Path("sample.zarr", ".gitattributes").touch()
Path("sample.zarr", "arr_0", ".gitmodules").touch()
Path("sample.zarr", "arr_1", ".datalad").mkdir()
Path("sample.zarr", "arr_1", ".datalad", "config").touch()
r = runner.invoke(digest, ["--digest", "zarr-checksum", "sample.zarr"])
assert r.exit_code == 0
assert r.output == "sample.zarr: 4313ab36412db2981c3ed391b38604d6-5--1516\n"
2 changes: 1 addition & 1 deletion dandi/download.py
Original file line number Diff line number Diff line change
Expand Up @@ -880,7 +880,7 @@ def digest_callback(path: str, algoname: str, d: str) -> None:
d = dirs.popleft()
is_empty = True
for p in list(d.iterdir()):
if d == zarr_basepath and exclude_from_zarr(p):
if exclude_from_zarr(p):
is_empty = False
elif (
p.is_file()
Expand Down
11 changes: 8 additions & 3 deletions dandi/files/_private.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@
ZarrBIDSAsset,
)
from .zarr import ZarrAsset
from ..utils import exclude_from_zarr


class DandiFileType(Enum):
Expand All @@ -35,9 +34,9 @@ class DandiFileType(Enum):
@staticmethod
def classify(path: Path) -> DandiFileType:
if path.is_dir():
if not any(p for p in path.iterdir() if not exclude_from_zarr(p)):
raise UnknownAssetError("Empty directories cannot be assets")
if path.suffix in ZARR_EXTENSIONS:
if is_empty_zarr(path):
raise UnknownAssetError("Empty directories cannot be Zarr assets")
return DandiFileType.ZARR
raise UnknownAssetError(
f"Directory has unrecognized suffix {path.suffix!r}"
Expand Down Expand Up @@ -96,3 +95,9 @@ def __call__(self, filepath: Path, path: str) -> DandiFile:
)
self.bids_dataset_description.dataset_files.append(df)
return df


def is_empty_zarr(path: Path) -> bool:
""":meta private:"""
zf = ZarrAsset(filepath=path, path=path.name)
return not any(zf.iterfiles())
2 changes: 1 addition & 1 deletion dandi/files/zarr.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ def is_dir(self) -> bool:

def iterdir(self) -> Iterator[LocalZarrEntry]:
for p in self.filepath.iterdir():
if self.is_root() and exclude_from_zarr(p):
if exclude_from_zarr(p):
continue
if p.is_dir() and not any(p.iterdir()):
# Ignore empty directories
Expand Down
4 changes: 1 addition & 3 deletions dandi/support/digests.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,9 +124,7 @@ def digest_file(f: Path) -> Tuple[Path, str, int]:
return (f, dgst, os.path.getsize(f))

zcc = ZCTree()
for p, digest, size in threaded_walk(
path, digest_file, exclude=lambda p: p.parent == path and exclude_from_zarr(p)
):
for p, digest, size in threaded_walk(path, digest_file, exclude=exclude_from_zarr):
zcc.add(p.relative_to(path), digest, size)
return zcc.get_digest()

Expand Down
3 changes: 3 additions & 0 deletions dandi/tests/test_download.py
Original file line number Diff line number Diff line change
Expand Up @@ -356,6 +356,8 @@ def test_download_different_zarr_onto_excluded_dotfiles(
(zarr_path / ".dandi" / "somefile.txt").touch()
(zarr_path / ".datalad").mkdir()
(zarr_path / ".gitattributes").touch()
(zarr_path / "arr_0").mkdir()
(zarr_path / "arr_0" / ".gitmodules").touch()
download(
zarr_dandiset.dandiset.version_api_url, tmp_path, existing="overwrite-different"
)
Expand All @@ -367,6 +369,7 @@ def test_download_different_zarr_onto_excluded_dotfiles(
zarr_path / ".gitattributes",
zarr_path / ".zgroup",
zarr_path / "arr_0",
zarr_path / "arr_0" / ".gitmodules",
zarr_path / "arr_0" / ".zarray",
zarr_path / "arr_0" / "0",
zarr_path / "arr_1",
Expand Down
14 changes: 11 additions & 3 deletions dandi/tests/test_files.py
Original file line number Diff line number Diff line change
Expand Up @@ -229,11 +229,16 @@ def test_find_dandi_files_with_bids(tmp_path: Path) -> None:
def test_dandi_file_zarr_with_excluded_dotfiles(tmp_path: Path) -> None:
zarr_path = tmp_path / "foo.zarr"
mkpaths(
zarr_path, ".git/data", ".gitattributes", ".dandi/somefile.txt", ".datalad/"
zarr_path,
".git/data",
".gitattributes",
".dandi/somefile.txt",
".datalad/",
"arr_0/.gitmodules",
)
with pytest.raises(UnknownAssetError):
dandi_file(zarr_path)
(zarr_path / "foo").touch()
(zarr_path / "arr_0" / "foo").touch()
zf = dandi_file(zarr_path)
assert isinstance(zf, ZarrAsset)

Expand Down Expand Up @@ -368,6 +373,9 @@ def test_upload_zarr_with_excluded_dotfiles(
(filepath / ".dandi").mkdir()
(filepath / ".dandi" / "somefile.txt").write_text("Hello world!\n")
(filepath / ".gitattributes").write_text("* eol=lf\n")
(filepath / "arr_0" / ".gitmodules").write_text("# Empty\n")
(filepath / "arr_1" / ".datalad").mkdir()
(filepath / "arr_1" / ".datalad" / "config").write_text("# Empty\n")
zf = dandi_file(filepath)
assert isinstance(zf, ZarrAsset)
asset = zf.upload(new_dandiset.dandiset, {})
Expand Down Expand Up @@ -407,6 +415,6 @@ def test_validate_deep_zarr(tmp_path: Path) -> None:
def test_validate_zarr_deep_via_excluded_dotfiles(tmp_path: Path) -> None:
zarr_path = tmp_path / "foo.zarr"
zarr.save(zarr_path, np.arange(1000), np.arange(1000, 0, -1))
mkpaths(zarr_path, ".git/a/b/c/d/e/f/g.txt")
mkpaths(zarr_path, ".git/a/b/c/d/e/f/g.txt", "a/b/c/.git/d/e/f/g.txt")
zf = dandi_file(zarr_path)
assert zf.get_validation_errors() == []
2 changes: 1 addition & 1 deletion dandi/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -796,6 +796,6 @@ def is_page2_url(page1: str, page2: str) -> bool:
def exclude_from_zarr(path: Path) -> bool:
"""
Returns `True` if the ``path`` is a file or directory that should be
excluded from consideration when located at the root of a Zarr
excluded from consideration when located in a Zarr
"""
return path.name in (".dandi", ".datalad", ".git", ".gitattributes", ".gitmodules")

0 comments on commit 0064071

Please sign in to comment.