diff --git a/tui_combo_mappings.json b/tui_combo_mappings.json index ea707d0a..6aebaa1b 100644 --- a/tui_combo_mappings.json +++ b/tui_combo_mappings.json @@ -256,11 +256,14 @@ "('T028', 'T116', 'T123')": "polypeptide", "('T028', 'T191')": "disease", "('T028',)": "named thing", + "('T029', 'T030')": "anatomical entity", "('T029', 'T061')": "procedure", "('T029', 'T078', 'T170')": "publication", "('T029', 'T081', 'T167', 'T170')": "named thing", "('T029', 'T082')": "anatomical entity", "('T029', 'T116', 'T129')": "polypeptide", + "('T029', 'T170')": "publication", + "('T029', 'T184')": "phenotypic feature", "('T029',)": "anatomical entity", "('T030', 'T033')": "anatomical entity", "('T030',)": "anatomical entity", diff --git a/umls_list_jsonl_to_kg_jsonl.py b/umls_list_jsonl_to_kg_jsonl.py index 2063e960..c544fa97 100644 --- a/umls_list_jsonl_to_kg_jsonl.py +++ b/umls_list_jsonl_to_kg_jsonl.py @@ -192,6 +192,50 @@ def process_drugbank_item(node_id, info, tui_mappings, iri_mappings, nodes_outpu nodes_output.write(node) +def process_fma_item(node_id, info, tui_mappings, iri_mappings, nodes_output, edges_output): + curie_prefix = "FMA" # This should be replaced with a kg2_util prefix at some point + provided_by = make_node_id(UMLS_SOURCE_PREFIX, curie_prefix) + iri = iri_mappings[curie_prefix] + node_id + node_curie = make_node_id(curie_prefix, node_id) + cuis = info.get(CUIS_KEY, list()) + tuis = info.get(TUIS_KEY, list()) + + # Currently not used, but extracting them in case we want them in the future + authority = info.get(INFO_KEY, dict()).get('AUTHORITY', list()) + date_last_modified = info.get(INFO_KEY, dict()).get('DATE_LAST_MODIFIED', list()) + + name = str() + synonyms = list() + names = info.get(NAMES_KEY, dict()) + pt = names.get('PT', dict()) + synonyms += [syn for syn in names.get('SY', dict()).get('Y', list())] + synonyms += [syn for syn in names.get('SY', dict()).get('N', list())] + if 'Y' in pt: + name = pt.get('Y', '') + if len(name) > 1: + synonyms += name[1:] + name = name[0] + elif 'N' in pt: + name = pt.get('N', '') + if len(name) > 1: + synonyms += name[1:] + name = name[0] + else: + name = synonyms[0] + synonyms = synonyms[1:] + name = name[0] + + node = kg2_util.make_node(node_curie, iri, name, tui_mappings[str(tuple(tuis))], "2023", provided_by) + node['synonym'] = synonyms + description = str() + for tui in tuis: + description += "; UMLS Semantic Type: STY:" + tui + description.strip("; ") + node['description'] = description + + nodes_output.write(node) + + if __name__ == '__main__': args = get_args() input_file_name = args.inputFile @@ -238,5 +282,8 @@ def process_drugbank_item(node_id, info, tui_mappings, iri_mappings, nodes_outpu if source == 'DRUGBANK': process_drugbank_item(node_id, value, tui_mappings, iri_mappings, nodes_output, edges_output) + if source == 'FMA': + process_fma_item(node_id, value, tui_mappings, iri_mappings, nodes_output, edges_output) + kg2_util.end_read_jsonlines(input_read_jsonlines_info) kg2_util.close_kg2_jsonlines(nodes_info, edges_info, output_nodes_file_name, output_edges_file_name) \ No newline at end of file