Skip to content

Commit aa55b89

Browse files
authored
Merge pull request #1381 from dandi/gh-1352
Update for Pydantic v2
2 parents 7ca670b + 728882f commit aa55b89

20 files changed

+149
-116
lines changed

dandi/cli/cmd_download.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
from ..dandiarchive import _dandi_url_parser, parse_dandi_url
1010
from ..dandiset import Dandiset
1111
from ..download import DownloadExisting, DownloadFormat, PathType
12-
from ..utils import get_instance
12+
from ..utils import get_instance, joinurl
1313

1414

1515
# The use of f-strings apparently makes this not a proper docstring, and so
@@ -131,9 +131,9 @@ def download(
131131
pass
132132
else:
133133
if instance.gui is not None:
134-
url = [f"{instance.gui}/#/dandiset/{dandiset_id}/draft"]
134+
url = [joinurl(instance.gui, f"/#/dandiset/{dandiset_id}/draft")]
135135
else:
136-
url = [f"{instance.api}/dandisets/{dandiset_id}/"]
136+
url = [joinurl(instance.api, f"/dandisets/{dandiset_id}/")]
137137

138138
return download.download(
139139
url,

dandi/cli/cmd_ls.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -96,8 +96,8 @@ def ls(
9696
all_fields = tuple(
9797
sorted(
9898
set(common_fields)
99-
| models.Dandiset.__fields__.keys()
100-
| models.Asset.__fields__.keys()
99+
| models.Dandiset.model_fields.keys()
100+
| models.Asset.model_fields.keys()
101101
)
102102
)
103103
else:
@@ -345,7 +345,7 @@ def fn():
345345
path,
346346
schema_version=schema,
347347
digest=Digest.dandi_etag(digest),
348-
).json_dict()
348+
).model_dump(mode="json", exclude_none=True)
349349
else:
350350
if path.endswith(tuple(ZARR_EXTENSIONS)):
351351
if use_fake_digest:

dandi/cli/cmd_service_scripts.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -104,7 +104,7 @@ def reextract_metadata(url: str, diff: bool, when: str) -> None:
104104
lgr.info("Extracting new metadata for asset")
105105
metadata = nwb2asset(asset.as_readable(), digest=digest)
106106
metadata.path = asset.path
107-
mddict = metadata.json_dict()
107+
mddict = metadata.model_dump(mode="json", exclude_none=True)
108108
if diff:
109109
oldmd = asset.get_raw_metadata()
110110
oldmd_str = yaml_dump(oldmd)

dandi/cli/tests/data/update_dandiset_from_doi/biorxiv.json

+1-1
Original file line numberDiff line numberDiff line change
@@ -386,7 +386,7 @@
386386
"includeInCitation": true
387387
}
388388
],
389-
"dateCreated": "2023-04-25T16:28:26.500181+00:00",
389+
"dateCreated": "2023-04-25T16:28:26.500181Z",
390390
"description": "<jats:p>Progress in science requires standardized assays whose results can be readily shared, compared, and reproduced across laboratories. Reproducibility, however, has been a concern in neuroscience, particularly for measurements of mouse behavior. Here we show that a standardized task to probe decision-making in mice produces reproducible results across multiple laboratories. We designed a task for head-fixed mice that combines established assays of perceptual and value-based decision making, and we standardized training protocol and experimental hardware, software, and procedures. We trained 140 mice across seven laboratories in three countries, and we collected 5 million mouse choices into a publicly available database. Learning speed was variable across mice and laboratories, but once training was complete there were no significant differences in behavior across laboratories. Mice in different laboratories adopted similar reliance on visual stimuli, on past successes and failures, and on estimates of stimulus prior probability to guide their choices. These results reveal that a complex mouse behavior can be successfully reproduced across multiple laboratories. They establish a standard for reproducible rodent behavior, and provide an unprecedented dataset and open-access tools to study decision-making in mice. More generally, they indicate a path towards achieving reproducibility in neuroscience through collaborative open-science approaches.</jats:p>",
391391
"assetsSummary": {
392392
"schemaKey": "AssetsSummary",

dandi/cli/tests/data/update_dandiset_from_doi/elife.json

+1-1
Original file line numberDiff line numberDiff line change
@@ -105,7 +105,7 @@
105105
"includeInCitation": true
106106
}
107107
],
108-
"dateCreated": "2023-04-25T16:28:30.453019+00:00",
108+
"dateCreated": "2023-04-25T16:28:30.453019Z",
109109
"description": "<jats:p>Proprioception, the sense of body position, movement, and associated forces, remains poorly understood, despite its critical role in movement. Most studies of area 2, a proprioceptive area of somatosensory cortex, have simply compared neurons\u2019 activities to the movement of the hand through space. Using motion tracking, we sought to elaborate this relationship by characterizing how area 2 activity relates to whole arm movements. We found that a whole-arm model, unlike classic models, successfully predicted how features of neural activity changed as monkeys reached to targets in two workspaces. However, when we then evaluated this whole-arm model across active and passive movements, we found that many neurons did not consistently represent the whole arm over both conditions. These results suggest that 1) neural activity in area 2 includes representation of the whole arm during reaching and 2) many of these neurons represented limb state differently during active and passive movements.</jats:p>",
110110
"assetsSummary": {
111111
"schemaKey": "AssetsSummary",

dandi/cli/tests/data/update_dandiset_from_doi/jneurosci.json

+1-1
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@
4545
"includeInCitation": true
4646
}
4747
],
48-
"dateCreated": "2023-04-25T16:28:28.308094+00:00",
48+
"dateCreated": "2023-04-25T16:28:28.308094Z",
4949
"description": "<jats:p>Reinforcement learning theory plays a key role in understanding the behavioral and neural mechanisms of choice behavior in animals and humans. Especially, intermediate variables of learning models estimated from behavioral data, such as the expectation of reward for each candidate choice (action value), have been used in searches for the neural correlates of computational elements in learning and decision making. The aims of the present study are as follows: (1) to test which computational model best captures the choice learning process in animals and (2) to elucidate how action values are represented in different parts of the corticobasal ganglia circuit. We compared different behavioral learning algorithms to predict the choice sequences generated by rats during a free-choice task and analyzed associated neural activity in the nucleus accumbens (NAc) and ventral pallidum (VP). The major findings of this study were as follows: (1) modified versions of an action\u2013value learning model captured a variety of choice strategies of rats, including win-stay\u2013lose-switch and persevering behavior, and predicted rats' choice sequences better than the best multistep Markov model; and (2) information about action values and future actions was coded in both the NAc and VP, but was less dominant than information about trial types, selected actions, and reward outcome. The results of our model-based analysis suggest that the primary role of the NAc and VP is to monitor information important for updating choice behaviors. Information represented in the NAc and VP might contribute to a choice mechanism that is situated elsewhere.</jats:p>",
5050
"assetsSummary": {
5151
"schemaKey": "AssetsSummary",

dandi/cli/tests/data/update_dandiset_from_doi/nature.json

+1-1
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@
4646
"includeInCitation": true
4747
}
4848
],
49-
"dateCreated": "2023-04-25T16:28:31.601155+00:00",
49+
"dateCreated": "2023-04-25T16:28:31.601155Z",
5050
"description": "<jats:title>Abstract</jats:title><jats:p>Spatial cognition depends on an accurate representation of orientation within an environment. Head direction cells in distributed brain regions receive a range of sensory inputs, but visual input is particularly important for aligning their responses to environmental landmarks. To investigate how population-level heading responses are aligned to visual input, we recorded from retrosplenial cortex (RSC) of head-fixed mice in a moving environment using two-photon calcium imaging. We show that RSC neurons are tuned to the animal\u2019s relative orientation in the environment, even in the absence of head movement. Next, we found that RSC receives functionally distinct projections from visual and thalamic areas and contains several functional classes of neurons. While some functional classes mirror RSC inputs, a newly discovered class coregisters visual and thalamic signals. Finally, decoding analyses reveal unique contributions to heading from each class. Our results suggest an RSC circuit for anchoring heading representations to environmental visual landmarks.</jats:p>",
5151
"assetsSummary": {
5252
"schemaKey": "AssetsSummary",

dandi/cli/tests/data/update_dandiset_from_doi/neuron.json

+1-1
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@
4545
"includeInCitation": true
4646
}
4747
],
48-
"dateCreated": "2023-04-25T16:28:29.373034+00:00",
48+
"dateCreated": "2023-04-25T16:28:29.373034Z",
4949
"description": "A test Dandiset",
5050
"assetsSummary": {
5151
"schemaKey": "AssetsSummary",

dandi/dandiapi.py

+36-37
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
import re
1414
from time import sleep, time
1515
from types import TracebackType
16-
from typing import TYPE_CHECKING, Any, ClassVar, Dict, FrozenSet, List, Optional
16+
from typing import TYPE_CHECKING, Any, Dict, List, Optional
1717
from urllib.parse import quote_plus, urlparse, urlunparse
1818

1919
import click
@@ -44,6 +44,7 @@
4444
get_instance,
4545
is_interactive,
4646
is_page2_url,
47+
joinurl,
4748
)
4849

4950
if TYPE_CHECKING:
@@ -285,16 +286,12 @@ def request(
285286
def get_url(self, path: str) -> str:
286287
"""
287288
Append a slash-separated ``path`` to the instance's base URL. The two
288-
components are separated by a single slash, and any trailing slashes
289-
are removed.
289+
components are separated by a single slash, removing any excess slashes
290+
that would be present after naïve concatenation.
290291
291292
If ``path`` is already an absolute URL, it is returned unchanged.
292293
"""
293-
# Construct the url
294-
if path.lower().startswith(("http://", "https://")):
295-
return path
296-
else:
297-
return self.api_url.rstrip("/") + "/" + path.lstrip("/")
294+
return joinurl(self.api_url, path)
298295

299296
def get(self, path: str, **kwargs: Any) -> Any:
300297
"""
@@ -614,29 +611,21 @@ def get_asset(self, asset_id: str) -> BaseRemoteAsset:
614611
return BaseRemoteAsset.from_base_data(self, info, metadata)
615612

616613

617-
class APIBase(BaseModel):
614+
# `arbitrary_types_allowed` is needed for `client: DandiAPIClient`
615+
class APIBase(BaseModel, populate_by_name=True, arbitrary_types_allowed=True):
618616
"""
619617
Base class for API objects implemented in pydantic.
620618
621619
This class (aside from the `json_dict()` method) is an implementation
622620
detail; do not rely on it.
623621
"""
624622

625-
JSON_EXCLUDE: ClassVar[FrozenSet[str]] = frozenset(["client"])
626-
627623
def json_dict(self) -> dict[str, Any]:
628624
"""
629625
Convert to a JSONable `dict`, omitting the ``client`` attribute and
630626
using the same field names as in the API
631627
"""
632-
data = json.loads(self.json(exclude=self.JSON_EXCLUDE, by_alias=True))
633-
assert isinstance(data, dict)
634-
return data
635-
636-
class Config:
637-
allow_population_by_field_name = True
638-
# To allow `client: Session`:
639-
arbitrary_types_allowed = True
628+
return self.model_dump(mode="json", by_alias=True)
640629

641630

642631
class Version(APIBase):
@@ -710,7 +699,7 @@ class RemoteDandisetData(APIBase):
710699
modified: datetime
711700
contact_person: str
712701
embargo_status: EmbargoStatus
713-
most_recent_published_version: Optional[Version]
702+
most_recent_published_version: Optional[Version] = None
714703
draft_version: Version
715704

716705

@@ -752,7 +741,7 @@ def __init__(
752741
self._version = version
753742
self._data: RemoteDandisetData | None
754743
if data is not None:
755-
self._data = RemoteDandisetData.parse_obj(data)
744+
self._data = RemoteDandisetData.model_validate(data)
756745
else:
757746
self._data = None
758747

@@ -762,7 +751,7 @@ def __str__(self) -> str:
762751
def _get_data(self) -> RemoteDandisetData:
763752
if self._data is None:
764753
try:
765-
self._data = RemoteDandisetData.parse_obj(
754+
self._data = RemoteDandisetData.model_validate(
766755
self.client.get(f"/dandisets/{self.identifier}/")
767756
)
768757
except HTTP404Error:
@@ -875,9 +864,9 @@ def from_data(cls, client: DandiAPIClient, data: dict[str, Any]) -> RemoteDandis
875864
when acquiring data using means outside of this library.
876865
"""
877866
if data.get("most_recent_published_version") is not None:
878-
version = Version.parse_obj(data["most_recent_published_version"])
867+
version = Version.model_validate(data["most_recent_published_version"])
879868
else:
880-
version = Version.parse_obj(data["draft_version"])
869+
version = Version.model_validate(data["draft_version"])
881870
return cls(
882871
client=client, identifier=data["identifier"], version=version, data=data
883872
)
@@ -917,7 +906,7 @@ def get_versions(self, order: str | None = None) -> Iterator[Version]:
917906
for v in self.client.paginate(
918907
f"{self.api_path}versions/", params={"order": order}
919908
):
920-
yield Version.parse_obj(v)
909+
yield Version.model_validate(v)
921910
except HTTP404Error:
922911
raise NotFoundError(f"No such Dandiset: {self.identifier!r}")
923912

@@ -932,7 +921,7 @@ def get_version(self, version_id: str) -> VersionInfo:
932921
`Version`.
933922
"""
934923
try:
935-
return VersionInfo.parse_obj(
924+
return VersionInfo.model_validate(
936925
self.client.get(
937926
f"/dandisets/{self.identifier}/versions/{version_id}/info/"
938927
)
@@ -978,7 +967,7 @@ def get_metadata(self) -> models.Dandiset:
978967
metadata. Consider using `get_raw_metadata()` instead in order to
979968
fetch unstructured, possibly-invalid metadata.
980969
"""
981-
return models.Dandiset.parse_obj(self.get_raw_metadata())
970+
return models.Dandiset.model_validate(self.get_raw_metadata())
982971

983972
def get_raw_metadata(self) -> dict[str, Any]:
984973
"""
@@ -996,7 +985,7 @@ def set_metadata(self, metadata: models.Dandiset) -> None:
996985
"""
997986
Set the metadata for this version of the Dandiset to the given value
998987
"""
999-
self.set_raw_metadata(metadata.json_dict())
988+
self.set_raw_metadata(metadata.model_dump(mode="json", exclude_none=True))
1000989

1001990
def set_raw_metadata(self, metadata: dict[str, Any]) -> None:
1002991
"""
@@ -1049,7 +1038,7 @@ def publish(self, max_time: float = 120) -> RemoteDandiset:
10491038
)
10501039
start = time()
10511040
while time() - start < max_time:
1052-
v = Version.parse_obj(self.client.get(f"{draft_api_path}info/"))
1041+
v = Version.model_validate(self.client.get(f"{draft_api_path}info/"))
10531042
if v.status is VersionStatus.PUBLISHED:
10541043
break
10551044
sleep(0.5)
@@ -1273,7 +1262,7 @@ class BaseRemoteAsset(ABC, APIBase):
12731262

12741263
#: The `DandiAPIClient` instance that returned this `BaseRemoteAsset`
12751264
#: and which the latter will use for API requests
1276-
client: DandiAPIClient
1265+
client: DandiAPIClient = Field(exclude=True)
12771266
#: The asset identifier
12781267
identifier: str = Field(alias="asset_id")
12791268
#: The asset's (forward-slash-separated) path
@@ -1294,6 +1283,15 @@ def __init__(self, **data: Any) -> None: # type: ignore[no-redef]
12941283
# underscores, so we have to do it ourselves.
12951284
self._metadata = data.get("metadata", data.get("_metadata"))
12961285

1286+
def __eq__(self, other: Any) -> bool:
1287+
if type(self) is type(other):
1288+
# dict() includes fields with `exclude=True` (which are absent from
1289+
# the return value of `model_dump()`) but not private fields. We
1290+
# want to compare the former but not the latter.
1291+
return dict(self) == dict(other)
1292+
else:
1293+
return NotImplemented
1294+
12971295
def __str__(self) -> str:
12981296
return f"{self.client._instance_id}:assets/{self.identifier}"
12991297

@@ -1360,7 +1358,7 @@ def get_metadata(self) -> models.Asset:
13601358
valid metadata. Consider using `get_raw_metadata()` instead in
13611359
order to fetch unstructured, possibly-invalid metadata.
13621360
"""
1363-
return models.Asset.parse_obj(self.get_raw_metadata())
1361+
return models.Asset.model_validate(self.get_raw_metadata())
13641362

13651363
def get_raw_metadata(self) -> dict[str, Any]:
13661364
"""Fetch the metadata for the asset as an unprocessed `dict`"""
@@ -1610,7 +1608,7 @@ def iterfiles(self, prefix: str | None = None) -> Iterator[RemoteZarrEntry]:
16101608
for r in self.client.paginate(
16111609
f"{self.client.api_url}/zarr/{self.zarr}/files", params={"prefix": prefix}
16121610
):
1613-
data = ZarrEntryServerData.parse_obj(r)
1611+
data = ZarrEntryServerData.model_validate(r)
16141612
yield RemoteZarrEntry.from_server_data(self, data)
16151613

16161614
def get_entry_by_path(self, path: str) -> RemoteZarrEntry:
@@ -1667,13 +1665,12 @@ class RemoteAsset(BaseRemoteAsset):
16671665
`RemoteDandiset`.
16681666
"""
16691667

1670-
JSON_EXCLUDE = frozenset(["client", "dandiset_id", "version_id"])
1671-
16721668
#: The identifier for the Dandiset to which the asset belongs
1673-
dandiset_id: str
1669+
dandiset_id: str = Field(exclude=True)
1670+
16741671
#: The identifier for the version of the Dandiset to which the asset
16751672
#: belongs
1676-
version_id: str
1673+
version_id: str = Field(exclude=True)
16771674

16781675
@classmethod
16791676
def from_data(
@@ -1738,7 +1735,9 @@ def set_metadata(self, metadata: models.Asset) -> None:
17381735
Set the metadata for the asset to the given value and update the
17391736
`RemoteAsset` in place.
17401737
"""
1741-
return self.set_raw_metadata(metadata.json_dict())
1738+
return self.set_raw_metadata(
1739+
metadata.model_dump(mode="json", exclude_none=True)
1740+
)
17421741

17431742
@abstractmethod
17441743
def set_raw_metadata(self, metadata: dict[str, Any]) -> None:

dandi/dandiarchive.py

+3-4
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@
3737
from typing import Any
3838
from urllib.parse import unquote as urlunquote
3939

40-
from pydantic import AnyHttpUrl, parse_obj_as
40+
from pydantic import AnyHttpUrl, TypeAdapter
4141
import requests
4242

4343
from . import get_logger
@@ -82,9 +82,8 @@ class ParsedDandiURL(ABC):
8282
def api_url(self) -> AnyHttpUrl:
8383
"""The base URL of the Dandi API service, without a trailing slash"""
8484
# Kept for backwards compatibility
85-
r = parse_obj_as(AnyHttpUrl, self.instance.api.rstrip("/"))
86-
assert isinstance(r, AnyHttpUrl)
87-
return r # type: ignore[no-any-return]
85+
adapter = TypeAdapter(AnyHttpUrl)
86+
return adapter.validate_python(self.instance.api.rstrip("/"))
8887

8988
def get_client(self) -> DandiAPIClient:
9089
"""

dandi/files/bases.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -97,7 +97,7 @@ def get_metadata(
9797
"""Return the Dandiset metadata inside the file"""
9898
with open(self.filepath) as f:
9999
meta = yaml_load(f, typ="safe")
100-
return DandisetMeta.unvalidated(**meta)
100+
return DandisetMeta.model_construct(**meta)
101101

102102
# TODO: @validate_cache.memoize_path
103103
def get_validation_errors(
@@ -183,7 +183,7 @@ def get_validation_errors(
183183
)
184184
try:
185185
asset = self.get_metadata(digest=self._DUMMY_DIGEST)
186-
BareAsset(**asset.dict())
186+
BareAsset(**asset.model_dump())
187187
except ValidationError as e:
188188
if devel_debug:
189189
raise

dandi/files/bids.py

+7-2
Original file line numberDiff line numberDiff line change
@@ -94,7 +94,9 @@ def _validate(self) -> None:
9494
)
9595
# Don't apply eta-reduction to the lambda, as mypy needs to be
9696
# assured that defaultdict's argument takes no parameters.
97-
self._asset_metadata = defaultdict(lambda: BareAsset.unvalidated())
97+
self._asset_metadata = defaultdict(
98+
lambda: BareAsset.model_construct() # type: ignore[call-arg]
99+
)
98100
for result in results:
99101
if result.id in BIDS_ASSET_ERRORS:
100102
assert result.path
@@ -230,7 +232,10 @@ def get_metadata(
230232
bids_metadata = BIDSAsset.get_metadata(self, digest, ignore_errors)
231233
nwb_metadata = NWBAsset.get_metadata(self, digest, ignore_errors)
232234
return BareAsset(
233-
**{**bids_metadata.dict(), **nwb_metadata.dict(exclude_none=True)}
235+
**{
236+
**bids_metadata.model_dump(),
237+
**nwb_metadata.model_dump(exclude_none=True),
238+
}
234239
)
235240

236241

0 commit comments

Comments
 (0)