diff --git a/dandi/cli/tests/test_digest.py b/dandi/cli/tests/test_digest.py index 70de0a35c..550788116 100644 --- a/dandi/cli/tests/test_digest.py +++ b/dandi/cli/tests/test_digest.py @@ -77,6 +77,9 @@ def test_digest_zarr_with_excluded_dotfiles(): os.mkdir("sample.zarr/.dandi") Path("sample.zarr", ".dandi", "somefile.txt").touch() Path("sample.zarr", ".gitattributes").touch() + Path("sample.zarr", "arr_0", ".gitmodules").touch() + Path("sample.zarr", "arr_1", ".datalad").mkdir() + Path("sample.zarr", "arr_1", ".datalad", "config").touch() r = runner.invoke(digest, ["--digest", "zarr-checksum", "sample.zarr"]) assert r.exit_code == 0 assert r.output == "sample.zarr: 4313ab36412db2981c3ed391b38604d6-5--1516\n" diff --git a/dandi/download.py b/dandi/download.py index 81d836de1..0a6b9b6d6 100644 --- a/dandi/download.py +++ b/dandi/download.py @@ -880,7 +880,7 @@ def digest_callback(path: str, algoname: str, d: str) -> None: d = dirs.popleft() is_empty = True for p in list(d.iterdir()): - if d == zarr_basepath and exclude_from_zarr(p): + if exclude_from_zarr(p): is_empty = False elif ( p.is_file() diff --git a/dandi/files/_private.py b/dandi/files/_private.py index df465d1be..c7db67d0e 100644 --- a/dandi/files/_private.py +++ b/dandi/files/_private.py @@ -20,7 +20,6 @@ ZarrBIDSAsset, ) from .zarr import ZarrAsset -from ..utils import exclude_from_zarr class DandiFileType(Enum): @@ -35,9 +34,9 @@ class DandiFileType(Enum): @staticmethod def classify(path: Path) -> DandiFileType: if path.is_dir(): - if not any(p for p in path.iterdir() if not exclude_from_zarr(p)): - raise UnknownAssetError("Empty directories cannot be assets") if path.suffix in ZARR_EXTENSIONS: + if is_empty_zarr(path): + raise UnknownAssetError("Empty directories cannot be Zarr assets") return DandiFileType.ZARR raise UnknownAssetError( f"Directory has unrecognized suffix {path.suffix!r}" @@ -96,3 +95,9 @@ def __call__(self, filepath: Path, path: str) -> DandiFile: ) self.bids_dataset_description.dataset_files.append(df) return df + + +def is_empty_zarr(path: Path) -> bool: + """:meta private:""" + zf = ZarrAsset(filepath=path, path=path.name) + return not any(zf.iterfiles()) diff --git a/dandi/files/zarr.py b/dandi/files/zarr.py index 5f1fb433c..212877bdb 100644 --- a/dandi/files/zarr.py +++ b/dandi/files/zarr.py @@ -80,7 +80,7 @@ def is_dir(self) -> bool: def iterdir(self) -> Iterator[LocalZarrEntry]: for p in self.filepath.iterdir(): - if self.is_root() and exclude_from_zarr(p): + if exclude_from_zarr(p): continue if p.is_dir() and not any(p.iterdir()): # Ignore empty directories diff --git a/dandi/support/digests.py b/dandi/support/digests.py index 8bfbde7e1..c0295be22 100644 --- a/dandi/support/digests.py +++ b/dandi/support/digests.py @@ -124,9 +124,7 @@ def digest_file(f: Path) -> Tuple[Path, str, int]: return (f, dgst, os.path.getsize(f)) zcc = ZCTree() - for p, digest, size in threaded_walk( - path, digest_file, exclude=lambda p: p.parent == path and exclude_from_zarr(p) - ): + for p, digest, size in threaded_walk(path, digest_file, exclude=exclude_from_zarr): zcc.add(p.relative_to(path), digest, size) return zcc.get_digest() diff --git a/dandi/tests/test_download.py b/dandi/tests/test_download.py index 904a84fcc..7570b4ecd 100644 --- a/dandi/tests/test_download.py +++ b/dandi/tests/test_download.py @@ -356,6 +356,8 @@ def test_download_different_zarr_onto_excluded_dotfiles( (zarr_path / ".dandi" / "somefile.txt").touch() (zarr_path / ".datalad").mkdir() (zarr_path / ".gitattributes").touch() + (zarr_path / "arr_0").mkdir() + (zarr_path / "arr_0" / ".gitmodules").touch() download( zarr_dandiset.dandiset.version_api_url, tmp_path, existing="overwrite-different" ) @@ -367,6 +369,7 @@ def test_download_different_zarr_onto_excluded_dotfiles( zarr_path / ".gitattributes", zarr_path / ".zgroup", zarr_path / "arr_0", + zarr_path / "arr_0" / ".gitmodules", zarr_path / "arr_0" / ".zarray", zarr_path / "arr_0" / "0", zarr_path / "arr_1", diff --git a/dandi/tests/test_files.py b/dandi/tests/test_files.py index 5d7086f37..4c95043ce 100644 --- a/dandi/tests/test_files.py +++ b/dandi/tests/test_files.py @@ -229,11 +229,16 @@ def test_find_dandi_files_with_bids(tmp_path: Path) -> None: def test_dandi_file_zarr_with_excluded_dotfiles(tmp_path: Path) -> None: zarr_path = tmp_path / "foo.zarr" mkpaths( - zarr_path, ".git/data", ".gitattributes", ".dandi/somefile.txt", ".datalad/" + zarr_path, + ".git/data", + ".gitattributes", + ".dandi/somefile.txt", + ".datalad/", + "arr_0/.gitmodules", ) with pytest.raises(UnknownAssetError): dandi_file(zarr_path) - (zarr_path / "foo").touch() + (zarr_path / "arr_0" / "foo").touch() zf = dandi_file(zarr_path) assert isinstance(zf, ZarrAsset) @@ -368,6 +373,9 @@ def test_upload_zarr_with_excluded_dotfiles( (filepath / ".dandi").mkdir() (filepath / ".dandi" / "somefile.txt").write_text("Hello world!\n") (filepath / ".gitattributes").write_text("* eol=lf\n") + (filepath / "arr_0" / ".gitmodules").write_text("# Empty\n") + (filepath / "arr_1" / ".datalad").mkdir() + (filepath / "arr_1" / ".datalad" / "config").write_text("# Empty\n") zf = dandi_file(filepath) assert isinstance(zf, ZarrAsset) asset = zf.upload(new_dandiset.dandiset, {}) @@ -407,6 +415,6 @@ def test_validate_deep_zarr(tmp_path: Path) -> None: def test_validate_zarr_deep_via_excluded_dotfiles(tmp_path: Path) -> None: zarr_path = tmp_path / "foo.zarr" zarr.save(zarr_path, np.arange(1000), np.arange(1000, 0, -1)) - mkpaths(zarr_path, ".git/a/b/c/d/e/f/g.txt") + mkpaths(zarr_path, ".git/a/b/c/d/e/f/g.txt", "a/b/c/.git/d/e/f/g.txt") zf = dandi_file(zarr_path) assert zf.get_validation_errors() == [] diff --git a/dandi/utils.py b/dandi/utils.py index 996121c50..23ec7e4c1 100644 --- a/dandi/utils.py +++ b/dandi/utils.py @@ -796,6 +796,6 @@ def is_page2_url(page1: str, page2: str) -> bool: def exclude_from_zarr(path: Path) -> bool: """ Returns `True` if the ``path`` is a file or directory that should be - excluded from consideration when located at the root of a Zarr + excluded from consideration when located in a Zarr """ return path.name in (".dandi", ".datalad", ".git", ".gitattributes", ".gitmodules")