diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index fc38234..ac684f6 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -56,6 +56,9 @@ repos: hooks: - id: mypy language_version: python3 + additional_dependencies: [ + types-requests, + ] - repo: https://github.com/adrienverge/yamllint.git rev: v1.26.3 diff --git a/pyproject.toml b/pyproject.toml index 420abd8..21c182e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -21,6 +21,7 @@ dependencies = [ "omero-py>=5.8", "entrypoints", "types-entrypoints", + "types-requests", "future", "rdflib", "pyld", diff --git a/src/omero_rdf/__init__.py b/src/omero_rdf/__init__.py index be658d2..d2257e9 100644 --- a/src/omero_rdf/__init__.py +++ b/src/omero_rdf/__init__.py @@ -36,6 +36,9 @@ from rdflib.namespace import DCTERMS, RDF from rdflib_pyld_compat import pyld_jsonld_from_rdflib_graph +import requests + + HELP = """A plugin for exporting rdf from OMERO omero-rdf creates a stream of RDF triples from the starting object that @@ -85,6 +88,58 @@ def _wrapper(self, *args: Any, **kwargs: Any): # type: ignore return _wrapper +def fetch_jsonld_context(url: str) -> Optional[Dict[str, Any]]: + """ + Fetch JSON-LD context from a URL. + + Args: + url: The URL of the JSON-LD document + + Returns: + The @context object or None if not found/error + """ + try: + # Make HTTP request + response = requests.get(url, headers={"Accept": "application/ld+json"}) + response.raise_for_status() + + # Parse JSON + data = response.json() + + # Extract @context + if "@context" in data: + return data["@context"] + else: + logging.warning("No @context found in %s", url) + return None + + except requests.RequestException: + logging.warning("Network error", exc_info=True) + return None + except json.JSONDecodeError: + logging.warning("JSON parsing error", exc_info=True) + return None + + +def key_in_context(key: str, context: Dict[str, Any] | None): + """ + Check if a key is in the context. + + Args: + key: The key to check + context: The JSON-LD context + + Returns: + True if the key is in the context, False otherwise + """ + if context is None: + raise Exception("context is None") + if key.startswith("omero:"): + return key[6:] in context + else: + return key in context + + class Format: """ Output mechanisms split into two types: streaming and non-streaming. @@ -245,8 +300,15 @@ class Handler: """ - OME = "http://www.openmicroscopy.org/rdf/2016-06/ome_core/" - OMERO = "http://www.openmicroscopy.org/TBD/omero/" + url = ( + "https://gist.githubusercontent.com/stefanches7/" + "5b3402331d901bb3c3384bac047c4ac2/raw/cd45da585bfa" + "630a56ef55670d2b5da2be50ff76/context.ld.json" + ) + context = fetch_jsonld_context(url) + + OME = "ome:" + OMERO = "ome:" def __init__( self, @@ -306,6 +368,12 @@ def get_key(self, key: str) -> Optional[URIRef]: # Types that we want to omit fo return None else: + if not key_in_context(key, self.context): + logging.warning( + "Did not find in OMERO context: %s. " + "Add it to the spreadsheet of new fields", + key, + ) if key.startswith("omero:"): return URIRef(f"{self.OMERO}{key[6:]}") else: @@ -417,7 +485,12 @@ def rdf( # Types that we want to omit for now pass else: - + if not key_in_context(k, self.context): + logging.warning( + "Did not find in OMERO context: %s. " + "Add it to the spreadsheet of new fields", + k, + ) if k.startswith("omero:"): key = URIRef(f"{self.OMERO}{k[6:]}") else: