From 54bc90af6146ad200b6c66f7d440b8b7af9f2370 Mon Sep 17 00:00:00 2001 From: Stefan Dvoretskii Date: Tue, 10 Dec 2024 16:08:17 +0100 Subject: [PATCH 1/4] fetch context of ome json-ld, throw errors if a thing is not in context --- src/omero_rdf/__init__.py | 62 ++++++++++++++++++++++++++++++++++++++- 1 file changed, 61 insertions(+), 1 deletion(-) diff --git a/src/omero_rdf/__init__.py b/src/omero_rdf/__init__.py index 2630dc6..67bcede 100644 --- a/src/omero_rdf/__init__.py +++ b/src/omero_rdf/__init__.py @@ -33,6 +33,11 @@ from rdflib import BNode, Graph, Literal, URIRef from rdflib.namespace import DCTERMS, RDF + +import requests +import json +from typing import Dict, Any, Optional + HELP = """A plugin for exporting rdf from OMERO omero-rdf creates a stream of RDF triples from the starting object that @@ -78,6 +83,55 @@ def _wrapper(self, *args: Any, **kwargs: Any): # type: ignore return _wrapper + +def fetch_jsonld_context(url: str) -> Optional[Dict[str, Any]]: + """ + Fetch JSON-LD context from a URL. + + Args: + url: The URL of the JSON-LD document + + Returns: + The @context object or None if not found/error + """ + try: + # Make HTTP request + response = requests.get(url, headers={'Accept': 'application/ld+json'}) + response.raise_for_status() + + # Parse JSON + data = response.json() + + # Extract @context + if '@context' in data: + return data['@context'] + else: + print(f"No @context found in {url}") + return None + + except requests.RequestException as e: + print(f"Network error: {e}") + return None + except json.JSONDecodeError as e: + print(f"JSON parsing error: {e}") + return None + +def key_in_context(key: str, context: Dict[str, Any]): + """ + Check if a key is in the context. + + Args: + key: The key to check + context: The JSON-LD context + + Returns: + True if the key is in the context, False otherwise + """ + if key.startswith("omero:"): + return key[6:] in context + else: + return key in context + class Handler: """ Instances are used to generate triples. @@ -86,6 +140,9 @@ class Handler: TBD """ + + url = "https://gist.githubusercontent.com/stefanches7/5b3402331d901bb3c3384bac047c4ac2/raw/cd45da585bfa630a56ef55670d2b5da2be50ff76/context.ld.json" + context = fetch_jsonld_context(url) OME = "http://www.openmicroscopy.org/rdf/2016-06/ome_core/" OMERO = "http://www.openmicroscopy.org/TBD/omero/" @@ -150,6 +207,8 @@ def get_key(self, key: str) -> Optional[URIRef]: # Types that we want to omit fo return None else: + if not key_in_context(key, self.context): + logging.warning("Did not find in OMERO context: %s. Add it to the spreadsheet of new fields", key) if key.startswith("omero:"): return URIRef(f"{self.OMERO}{key[6:]}") else: @@ -261,7 +320,8 @@ def rdf( # Types that we want to omit for now pass else: - + if not key_in_context(key, self.context): + logging.warning("Did not find in OMERO context: %s. Add it to the spreadsheet of new fields", key) if k.startswith("omero:"): key = URIRef(f"{self.OMERO}{k[6:]}") else: From 0af0df97bd043c6fbf13dfe5edd161c5985037d1 Mon Sep 17 00:00:00 2001 From: Stefan Dvoretskii Date: Tue, 10 Dec 2024 16:20:48 +0100 Subject: [PATCH 2/4] convert omero and ome namespaces both to be "ome" namespace, point "ome" to the new gist context --- src/omero_rdf/__init__.py | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/src/omero_rdf/__init__.py b/src/omero_rdf/__init__.py index 67bcede..4b630ef 100644 --- a/src/omero_rdf/__init__.py +++ b/src/omero_rdf/__init__.py @@ -144,8 +144,8 @@ class Handler: url = "https://gist.githubusercontent.com/stefanches7/5b3402331d901bb3c3384bac047c4ac2/raw/cd45da585bfa630a56ef55670d2b5da2be50ff76/context.ld.json" context = fetch_jsonld_context(url) - OME = "http://www.openmicroscopy.org/rdf/2016-06/ome_core/" - OMERO = "http://www.openmicroscopy.org/TBD/omero/" + OME = "ome:" + OMERO = "ome:" def __init__( self, @@ -167,12 +167,8 @@ def __init__( self.graph = Graph() self.graph.bind("wd", "http://www.wikidata.org/prop/direct/") self.graph.bind( - "ome", "http://www.openmicroscopy.org/rdf/2016-06/ome_core/" + "ome", "https://gist.githubusercontent.com/stefanches7/5b3402331d901bb3c3384bac047c4ac2/raw/cd45da585bfa630a56ef55670d2b5da2be50ff76/context.ld.json" ) - self.graph.bind( - "ome-xml", "http://www.openmicroscopy.org/Schemas/OME/2016-06#" - ) # FIXME - self.graph.bind("omero", "http://www.openmicroscopy.org/TBD/omero/") # self.graph.bind("xs", XMLSCHEMA) # TODO: Allow handlers to register namespaces From 583755833994162a3e660e674587ccd0da51ad9e Mon Sep 17 00:00:00 2001 From: Stefan Dvoretskii Date: Tue, 10 Dec 2024 16:53:08 +0100 Subject: [PATCH 3/4] replace print to stdout with a logger --- src/omero_rdf/__init__.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/omero_rdf/__init__.py b/src/omero_rdf/__init__.py index 4b630ef..6a1dc93 100644 --- a/src/omero_rdf/__init__.py +++ b/src/omero_rdf/__init__.py @@ -106,14 +106,14 @@ def fetch_jsonld_context(url: str) -> Optional[Dict[str, Any]]: if '@context' in data: return data['@context'] else: - print(f"No @context found in {url}") + logging.warning(f"No @context found in {url}") return None except requests.RequestException as e: - print(f"Network error: {e}") + logging.warning(f"Network error: {e}") return None except json.JSONDecodeError as e: - print(f"JSON parsing error: {e}") + logging.warning(f"JSON parsing error: {e}") return None def key_in_context(key: str, context: Dict[str, Any]): From c2fe574d4f13079cc6d3e8db745eb0f033c97836 Mon Sep 17 00:00:00 2001 From: Stefan Dvoretskii Date: Tue, 10 Dec 2024 16:59:02 +0100 Subject: [PATCH 4/4] bugfix: variable "k" instead of "key" --- src/omero_rdf/__init__.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/omero_rdf/__init__.py b/src/omero_rdf/__init__.py index 6a1dc93..e489f86 100644 --- a/src/omero_rdf/__init__.py +++ b/src/omero_rdf/__init__.py @@ -316,8 +316,8 @@ def rdf( # Types that we want to omit for now pass else: - if not key_in_context(key, self.context): - logging.warning("Did not find in OMERO context: %s. Add it to the spreadsheet of new fields", key) + if not key_in_context(k, self.context): + logging.warning("Did not find in OMERO context: %s. Add it to the spreadsheet of new fields", k) if k.startswith("omero:"): key = URIRef(f"{self.OMERO}{k[6:]}") else: