Skip to content

Commit

Permalink
#316 improving HL7 to actually work and adding in HPO
Browse files Browse the repository at this point in the history
  • Loading branch information
ecwood committed Aug 19, 2023
1 parent 2cfa148 commit ae432ac
Show file tree
Hide file tree
Showing 2 changed files with 23 additions and 2 deletions.
1 change: 1 addition & 0 deletions kg2_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,7 @@
CURIE_PREFIX_HCPCS = 'HCPCS'
CURIE_PREFIX_HGNC = 'HGNC'
CURIE_PREFIX_HMDB = 'HMDB'
CURIE_PREFIX_HP = 'HP'
CURIE_PREFIX_IAO = 'IAO'
CURIE_PREFIX_IDENTIFIERS_ORG_REGISTRY = 'identifiers_org_registry'
CURIE_PREFIX_ISBN = 'ISBN'
Expand Down
24 changes: 22 additions & 2 deletions umls_list_jsonl_to_kg_jsonl.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@
HCPCS_PREFIX = kg2_util.CURIE_PREFIX_HCPCS
HGNC_PREFIX = kg2_util.CURIE_PREFIX_HGNC
HL7_PREFIX = kg2_util.CURIE_PREFIX_UMLS
HPO_PREFIX = kg2_util.CURIE_PREFIX_HP

UMLS_SOURCE_PREFIX = kg2_util.CURIE_PREFIX_UMLS_SOURCE

Expand Down Expand Up @@ -107,12 +108,12 @@ def get_basic_info(curie_prefix, node_id, info, accession_heirarchy):
provided_by = make_node_id(UMLS_SOURCE_PREFIX, curie_prefix)
cuis = info.get(CUIS_KEY, list())
tuis = info.get(TUIS_KEY, list())
iri = IRI_MAPPINGS[curie_prefix] + node_id
if curie_prefix == kg2_util.UMLS_SOURCE_PREFIX:
if curie_prefix == kg2_util.CURIE_PREFIX_UMLS:
if len(cuis) != 1:
return None, None, None, None, None, None, None, None
node_id = cuis[0]
node_curie = make_node_id(curie_prefix, node_id)
iri = IRI_MAPPINGS[curie_prefix] + node_id
category = TUI_MAPPINGS[str(tuple(tuis))]

names = info.get(NAMES_KEY, dict())
Expand Down Expand Up @@ -267,6 +268,7 @@ def process_hl7_item(node_id, info, nodes_output, edges_output):
provided_by = make_node_id(UMLS_SOURCE_PREFIX, 'HL7')

# Currently not used, but extracting them in case we want them in the future - descriptions from https://www.nlm.nih.gov/research/umls/knowledge_sources/metathesaurus/release/attribute_names.html
attributes = info.get(INFO_KEY, dict())
hl7at = attributes.get('HL7AT', list())
hl7ii = attributes.get('HL7II', list())
hl7im = attributes.get('HL7IM', list())
Expand Down Expand Up @@ -306,6 +308,21 @@ def process_hl7_item(node_id, info, nodes_output, edges_output):
make_umls_node(node_curie, iri, name, category, "2023", provided_by, synonyms, create_description("", tuis), nodes_output)


def process_hpo_item(node_id, info, nodes_output, edges_output):
accession_heirarchy = ['PT', 'SY', 'ET', 'OP', 'IS', 'OET'] # https://www.nlm.nih.gov/research/umls/knowledge_sources/metathesaurus/release/precedence_suppressibility.html
node_curie, iri, name, provided_by, category, synonyms, cuis, tuis = get_basic_info(HPO_PREFIX, node_id, info, accession_heirarchy)

# Currently not used, but extracting them in case we want them in the future
attributes = info.get(INFO_KEY, dict())
sid = attributes.get('SID', list())
hpo_comment = attributes.get('HPO_COMMENT', list())
date_created = attributes.get('DATE_CREATED', list())
syn_qualifier = attributes.get('SYN_QUALIFIER', list())
ref = attributes.get('REF', list())

make_umls_node(node_curie, iri, name, category, "2023", provided_by, synonyms, create_description("", tuis), nodes_output)


if __name__ == '__main__':
print("Starting umls_list_jsonl_to_kg_jsonl.py at", kg2_util.date())
args = get_args()
Expand Down Expand Up @@ -368,6 +385,9 @@ def process_hl7_item(node_id, info, nodes_output, edges_output):
process_hl7_item(node_id, value, nodes_output, edges_output)

if source == 'HPO':
process_hpo_item(node_id, value, nodes_output, edges_output)

if source == 'ICD10PCS':
name_keys.add(get_name_keys(value.get(NAMES_KEY, dict())))
attribute_keys.update(get_attribute_keys(value.get(INFO_KEY, dict())))

Expand Down

0 comments on commit ae432ac

Please sign in to comment.