Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

TEMP: just exploring identifiers and potentially deriving more specific ones #256

Draft
wants to merge 1 commit into
base: master
Choose a base branch
from
Draft
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 33 additions & 8 deletions dandischema/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,23 @@ class DigestType(Enum):
dandi_zarr_checksum = "dandi:dandi-zarr-checksum"


Identifier = str

ORCID = Annotated[
Identifier,
StringConstraints(pattern=r"^[0-9]{4}-[0-9]{4}-[0-9]{4}-[0-9]{3}[0-9X]$"),
]

RORID = Annotated[
Identifier,
StringConstraints(pattern=r"^(https://ror.org/)?0[0-9a-z]{6}[0-9a-z]{2}$"),
]

DANDI = Annotated[Identifier, StringConstraints(pattern=r"^DANDI:0[0-9]{5}$")]

RRID = Annotated[Identifier, StringConstraints(pattern=r"^SCR_[0-9]{6}$")]


class IdentifierType(Enum):
"""An enumeration of identifiers"""

Expand All @@ -122,6 +139,12 @@ class IdentifierType(Enum):
rrid = "dandi:rrid"


# TODO: somewhere/somehow add regexes for each of the IdentifierType values (str)
# Good UI could then associate based on regex matching, but overall model should
# likely be more explicit on what Identifier was used or allowed in each particular case.
# So we should make it possible to traverse all Identifier subclasses and collect regexes.


class LicenseType(Enum):
"""An enumeration of supported licenses"""

Expand Down Expand Up @@ -613,9 +636,13 @@ class PropertyValue(DandiBaseModel):
valueReference: Optional["PropertyValue"] = Field(
None, json_schema_extra={"nskey": "schema"}
) # Note: recursive (circular or not)
# TODO: check if here it is really the Union[IdentifierType, AnyHttpUrl] which was
# intended, or may be the dict[IdentifierType, AnyHttpUrl] as to point to specific
# identifiers, or even just an `Identifier` which is currently just a str.
# Note: seems to be not used yet
propertyID: Optional[Union[IdentifierType, AnyHttpUrl]] = Field(
None,
description="A commonly used identifier for"
description="A commonly used identifier for "
"the characteristic represented by the property. "
"For example, a known prefix like DOI or a full URL.",
json_schema_extra={"nskey": "schema"},
Expand Down Expand Up @@ -643,12 +670,6 @@ def ensure_value(cls, val: Union[Any, List[Any]]) -> Union[Any, List[Any]]:
# https://docs.pydantic.dev/latest/concepts/postponed_annotations/#self-referencing-or-recursive-models
PropertyValue.model_rebuild()

Identifier = str
ORCID = str
RORID = str
DANDI = str
RRID = str


class BaseType(DandiBaseModel):
"""Base class for enumerated types"""
Expand Down Expand Up @@ -826,7 +847,9 @@ class ContactPoint(DandiBaseModel):
_ldmeta = {"nskey": "schema"}


# ???: should it be hidden away since not used directly and only subclasses are used?
class Contributor(DandiBaseModel):
# Note: type to be overloaded by subclasses
identifier: Optional[Identifier] = Field(
None,
title="A common identifier",
Expand Down Expand Up @@ -876,7 +899,8 @@ class Organization(Contributor):
None,
title="A ror.org identifier",
description="Use an ror.org identifier for institutions.",
pattern=r"^https://ror.org/[a-z0-9]+$",
# ??? Could it be removed, since should be enforced/checked by RORID type
# pattern=r"^https://ror.org/[a-z0-9]+$",
json_schema_extra={"nskey": "schema"},
)

Expand Down Expand Up @@ -1343,6 +1367,7 @@ class Participant(DandiBaseModel):
when the Participant or Subject engaged in the production of data being described.
"""

# name?
identifier: Identifier = Field(json_schema_extra={"nskey": "schema"})
altName: Optional[List[Identifier]] = Field(
None, json_schema_extra={"nskey": "dandi"}
Expand Down
Loading