Skip to content

Commit

Permalink
Merge pull request #260 from dandi/enh-version
Browse files Browse the repository at this point in the history
Add "version" to datacite record and bundle datacite json serializations
  • Loading branch information
yarikoptic authored Nov 12, 2024
2 parents 2ac69ca + a9c909f commit 9d068b9
Show file tree
Hide file tree
Showing 10 changed files with 2,228 additions and 49 deletions.
27 changes: 17 additions & 10 deletions dandischema/datacite.py → dandischema/datacite/__init__.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,20 @@
"""
Interfaces and data to interact with DataCite metadata
"""

# TODO: RF into submodules for some next "minor" taking care not to break

from copy import deepcopy
from functools import lru_cache
import json
from pathlib import Path
import re
from typing import Any, Dict, Union

from jsonschema import Draft7Validator
import requests

from .models import NAME_PATTERN, Organization, Person, PublishedDandiset, RoleType
from ..models import NAME_PATTERN, Organization, Person, PublishedDandiset, RoleType

DATACITE_CONTRTYPE = {
"ContactPerson",
Expand Down Expand Up @@ -83,6 +92,8 @@ def to_datacite(
]

attributes["doi"] = meta.doi
if meta.version:
attributes["version"] = meta.version
attributes["titles"] = [{"title": meta.name}]
attributes["descriptions"] = [
{"description": meta.description, "descriptionType": "Abstract"}
Expand Down Expand Up @@ -240,15 +251,11 @@ def to_datacite(
return datacite_dict


def _get_datacite_schema() -> Any:
sr = requests.get(
"https://raw.githubusercontent.com/datacite/schema/"
"732cc7ef29f4cad4d6adfac83544133cd57a2e5e/"
"source/json/kernel-4.3/datacite_4.3_schema.json"
)
sr.raise_for_status()
schema = sr.json()
return schema
@lru_cache()
def _get_datacite_schema(version_id: str = "datacite-4.3-17-gaa5db56") -> Any:
"""Load datacite schema based on the version id provided."""
schema_folder = Path(__file__).parent / "schema"
return json.loads((schema_folder / f"{version_id}.json").read_text())


def validate_datacite(datacite_dict: dict) -> None:
Expand Down
48 changes: 48 additions & 0 deletions dandischema/datacite/schema/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
Folder contains copies of jsonschema serializations of datacite which were
initially kept within datacite repository,

https://github.com/datacite/schema/tree/master/source/json

but then moved to the "origin" of their manufacturing -- inveniosoftware

https://github.com/inveniosoftware/datacite/tree/master/datacite/schemas

Those serializations are not "scripted" and apparently produced manually.
Related issues/inquiries:

- https://github.com/datacite/schema/issues/149
- https://github.com/inveniosoftware/datacite/issues/101

Versions in the suffixes of the files here were produced based on
original version tag and output of git describe so we capture "order" within
MAJOR.MINOR versions. e.g.

❯ git describe --tags --match 4.3 732cc7
4.3-72-g732cc7e
❯ git describe --tags --match 4.3 aa5db5
4.3-17-gaa5db56

for those from datacite, for the last one based on commit when was last
modified (not current master). In "inveniosoftware" there are no tags for
versions of datacite, so we base ordering of 0.1.0 first tag there:

❯ git log datacite-v4.3.json
commit 24fc2ba3ded44512ce8569dc11c958da4a29f70a
Author: Thorge Petersen <[email protected]>
Date: Fri Aug 12 09:47:45 2022 +0200

schema: change affiliation definition to match property name

commit dc8403fd8556858e8917b960b0721884c52a588e
Author: Tom Morrell <[email protected]>
Date: Thu Aug 15 12:17:21 2019 -0700

schema: Add support for DataCite 4.3 metadata schema

❯ git describe --match v0.1.0 24fc2ba3ded44512ce8569dc11c958da4a29f70a
v0.1.0-66-g24fc2ba

so we get

❯ cp /home/yoh/proj/datacite/inveniosoftware-datacite/datacite/schemas/datacite-v4.3.json inveniosoftware-4.3-66-g24fc2ba.json
❯ cp /home/yoh/proj/datacite/inveniosoftware-datacite/datacite/schemas/datacite-v4.5.json inveniosoftware-4.5-81-g160250d.json
Loading

0 comments on commit 9d068b9

Please sign in to comment.