Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add RemoteDandiset.has_data_standard() convenience function #958

Draft
wants to merge 16 commits into
base: master
Choose a base branch
from
Draft
29 changes: 29 additions & 0 deletions dandi/dandiapi.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,11 @@

T = TypeVar("T")

DATA_STANDARD_MAP = dict(
NWB="RRID:SCR_015242",
BIDS="RRID:SCR_016124",
)


class AssetType(Enum):
"""
Expand Down Expand Up @@ -1223,6 +1228,30 @@
self, metadata=asset_metadata, jobs=jobs, replacing=replace_asset
)

def has_data_standard(self, data_standard: str) -> bool:
"""
Returns True if the Dandiset contains one or more files of the indicated
standard. Otherwise, returns False. This is determined by checking for
the RRID of the standard in the "dataStandard" field of the assetsSummary of
the dandiset.

:param data_standard: can be "NWB", "BIDS", or an RRID of a standard.
:type data_standard: str
"""
if data_standard in DATA_STANDARD_MAP:
rrid = DATA_STANDARD_MAP[data_standard]
elif data_standard.startswith("RRID:"):
rrid = data_standard
else:
raise ValueError(
"'data_standard' must be an RRID (of form 'RRID:XXX_NNNNNNN`) or one "
f"of the following values: {', '.join(DATA_STANDARD_MAP.keys())}"
)
assets_summary = self.get_raw_metadata()["assetsSummary"]
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

well, assetsSummary seems to be non optional, so must be present in a valid metadata record, but if not we would get KeyError here... which could be good as to alert user that smth is wrong with metadata in that dandisets. But should it be an exception really or just a warning? I would lean to a be a warning and just return False. WDYT @jwodder and @satra ?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@jwodder and @bendichter WDYT - better be strict or robust?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd be fine with changing this to throw a warning if assetsSummary is missing

bendichter marked this conversation as resolved.
Show resolved Hide resolved
if "dataStandard" not in assets_summary:
return False

Check warning on line 1252 in dandi/dandiapi.py

View check run for this annotation

Codecov / codecov/patch

dandi/dandiapi.py#L1252

Added line #L1252 was not covered by tests
return any(x["identifier"] == rrid for x in assets_summary["dataStandard"])


class BaseRemoteAsset(ABC, APIBase):
"""
Expand Down
21 changes: 21 additions & 0 deletions dandi/tests/test_dandiapi.py
Original file line number Diff line number Diff line change
Expand Up @@ -701,3 +701,24 @@ def test_rename_type_mismatch(text_dandiset: SampleDandiset, dest: str) -> None:
assert asset1a.get_raw_metadata()["path"] == "file.txt"
with pytest.raises(NotFoundError):
text_dandiset.dandiset.get_asset_by_path(dest)


def test_dandiset_has_data_standard():
with DandiAPIClient() as client:
dandiset = client.get_dandiset("000003", version_id="0.210812.1448")
assert dandiset.has_data_standard("NWB")
assert dandiset.has_data_standard("RRID:SCR_015242")
assert not dandiset.has_data_standard("RRID:XXX_000000")
assert not dandiset.has_data_standard("BIDS")


def test_dandiset_has_data_standard_incorrect_arg():
with DandiAPIClient() as client:
dandiset = client.get_dandiset("000003", version_id="0.210812.1448")
with pytest.raises(ValueError) as exc_info:
dandiset.has_data_standard("NWC")
assert (
str(exc_info.value)
== "'data_standard' must be an RRID (of form 'RRID:XXX_NNNNNNN`) or one of the "
"following values: NWB, BIDS"
)