Skip to content

Commit

Permalink
#387 drastic changes: REMOVAL OF ONTOBIO from kg2_util and validation
Browse files Browse the repository at this point in the history
  • Loading branch information
ecwood committed Aug 29, 2024
1 parent 2eb8b00 commit 2375994
Show file tree
Hide file tree
Showing 3 changed files with 12 additions and 46 deletions.
4 changes: 2 additions & 2 deletions validate/run-validation-tests.sh
Original file line number Diff line number Diff line change
Expand Up @@ -58,8 +58,8 @@ ${curl_get} ${infores_catalog_yaml_url} -o ${infores_catalog_yaml}
${python_command} -u ${VALIDATE_CODE_DIR}/validate_curies_to_categories_yaml.py \
${curies_to_categories_file} \
${curies_to_urls_file} \
${biolink_model_owl_url} \
${biolink_model_owl_local_file}
${biolink_model_yaml_url} \
${biolink_model_yaml_local_file}

${python_command} -u ${VALIDATE_CODE_DIR}/validate_curies_to_urls_map_yaml.py \
${curies_to_urls_file} \
Expand Down
12 changes: 6 additions & 6 deletions validate/validate_curies_to_categories_yaml.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

__author__ = 'Stephen Ramsey'
__copyright__ = 'Oregon State University'
__credits__ = ['Stephen Ramsey']
__credits__ = ['Stephen Ramsey', 'Erica Wood']
__license__ = 'MIT'
__version__ = '0.1.0'
__maintainer__ = ''
Expand All @@ -22,8 +22,8 @@ def make_arg_parser():
arg_parser = argparse.ArgumentParser(description='validate_curies_to_categories.py: checks the file `curies-to-categories.yaml` for correctness.')
arg_parser.add_argument('curiesToCategoriesFile', type=str)
arg_parser.add_argument('curiesToURLsMapFile', type=str)
arg_parser.add_argument('biolinkModelOWLURL', type=str)
arg_parser.add_argument('biolinkModelOWLLocalFile', type=str)
arg_parser.add_argument('biolinkModelYAMLURL', type=str)
arg_parser.add_argument('biolinkModelYAMLLocalFile', type=str)
return arg_parser


Expand All @@ -37,8 +37,8 @@ def make_arg_parser():
curies_to_url_map_data_bidir = {next(iter(listitem.keys())) for listitem in curies_to_url_map_data['use_for_bidirectional_mapping']}

kg2_util.download_file_if_not_exist_locally(biolink_model_url, biolink_model_file_name)
biolink_ont = kg2_util.make_ontology_from_local_file(biolink_model_file_name)
biolink_categories_ontology_depths = kg2_util.get_biolink_categories_ontology_depths(biolink_ont)
biolink_model = kg2_util.safe_load_yaml_from_string(kg2_util.read_file_to_string(biolink_model_file_name))
_, biolink_categories = kg2_util.identify_biolink_terms(biolink_model)

for prefix in curies_to_categories_data['prefix-mappings'].keys():
assert prefix in curies_to_url_map_data_bidir, prefix
Expand All @@ -53,4 +53,4 @@ def make_arg_parser():
for category in categories_to_check:
category_camelcase = kg2_util.convert_space_case_to_camel_case(category)
category_curie = kg2_util.CURIE_PREFIX_BIOLINK + ':' + category_camelcase
assert category_curie in biolink_categories_ontology_depths, category_curie
assert category_curie in biolink_categories, category_curie
42 changes: 4 additions & 38 deletions validate/validate_kg2_util_curies_urls_categories.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,6 @@
import kg2_util
import json

DESCENDANT_KEY = "is_a"
BASE_PREDICATE = "related to"
BASE_CATEGORY = "named thing"

def make_arg_parser():
arg_parser = argparse.ArgumentParser(description='validate_kg2_util_curies_urls_categories.py: ' +
'checks the file `kg2_util.py` for correctness for its CURIE IDs, Base URLs, and biolink categories.')
Expand All @@ -29,34 +25,6 @@ def make_arg_parser():
arg_parser.add_argument('biolinkModelLocalFile', type=str)
return arg_parser

def construct_biolink_term_set(is_a_base, biolink_terms):
output_set = set()
for key in biolink_terms:
key_is_a = biolink_terms[key]
if key_is_a == is_a_base:
for item in construct_biolink_term_set(key, biolink_terms):
output_set.add(item)
output_set.add(is_a_base)
return output_set

def identify_biolink_terms(biolink_model):
biolink_predicate_terms = dict()
biolink_category_terms = dict()
for predicate in biolink_model["slots"]:
if DESCENDANT_KEY in biolink_model["slots"][predicate]:
biolink_predicate_terms[predicate] = biolink_model["slots"][predicate][DESCENDANT_KEY]

for category in biolink_model["classes"]:
if DESCENDANT_KEY in biolink_model["classes"][category]:
biolink_category_terms[category] = biolink_model["classes"][category][DESCENDANT_KEY]

biolink_predicates = construct_biolink_term_set("related to", biolink_predicate_terms)
biolink_categories = construct_biolink_term_set("named thing", biolink_category_terms)

return list(biolink_predicates), list(biolink_categories)



args = make_arg_parser().parse_args()
biolink_model_url = args.biolinkModelURL
biolink_model_file_name = args.biolinkModelLocalFile
Expand All @@ -73,7 +41,7 @@ def identify_biolink_terms(biolink_model):

kg2_util.download_file_if_not_exist_locally(biolink_model_url, biolink_model_file_name)
biolink_model = kg2_util.safe_load_yaml_from_string(kg2_util.read_file_to_string(biolink_model_file_name))
biolink_edge_labels, biolink_categories = identify_biolink_terms(biolink_model)
biolink_edge_labels, biolink_categories = kg2_util.identify_biolink_terms(biolink_model)

for variable_name in dir(kg2_util):
variable_value = getattr(kg2_util, variable_name)
Expand All @@ -84,14 +52,12 @@ def identify_biolink_terms(biolink_model):
assert url_str in valid_base_urls, url_str
elif variable_name.startswith('BIOLINK_CATEGORY_'):
category_label = variable_value
category_camelcase = kg2_util.convert_space_case_to_camel_case(category_label)
category_curie = kg2_util.CURIE_PREFIX_BIOLINK + ':' + category_camelcase
assert category_curie in biolink_categories, category_curie
assert category_label in biolink_categories, category_curie
# assert category_label in categories_to_check, category_label
elif variable_name.startswith('CURIE_ID_'):
curie_id = variable_value
assert ':' in curie_id, variable_name
assert curie_id.split(':')[0] in curies_to_url_map_data_bidir, variable_name
elif variable_name.startswith('EDGE_LABEL_BIOLINK_'):
relation_label = variable_value
assert kg2_util.CURIE_PREFIX_BIOLINK + ':' + relation_label in biolink_edge_labels, relation_label
relation_label = variable_value.replace('_', ' ')
assert relation_label in biolink_edge_labels, relation_label

0 comments on commit 2375994

Please sign in to comment.