Skip to content

Commit

Permalink
#281 try two with custom dictionary search
Browse files Browse the repository at this point in the history
  • Loading branch information
ecwood committed Jul 17, 2023
1 parent c885961 commit 3315a25
Showing 1 changed file with 43 additions and 6 deletions.
49 changes: 43 additions & 6 deletions semmeddb_tuple_list_json_to_kg_json.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,9 @@

ANYTHING_REGEX = "(.)*"
EXCLUDE_EMPTY_STR = "n/a"

SEMANTIC_TYPE_EXCLUSION = "semantic type exclusion"
DOMAIN_EXCLUSION = "Domain exclusion"
RANGE_EXCLUSION = "Range exclusion"

def get_remapped_cuis(retired_cui_file_name: str) -> dict:
"""
Expand All @@ -53,6 +55,9 @@ def get_remapped_cuis(retired_cui_file_name: str) -> dict:
remapped_cuis[old_cui] = new_cui
return remapped_cuis

def date():
return print(datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"))


def make_regex_form(subject_code, predicate, object_code):
if subject_code is None or subject_code == EXCLUDE_EMPTY_STR:
Expand Down Expand Up @@ -194,19 +199,45 @@ def get_rels_to_make_for_row(subject_str: str, object_str: str, predicate: str,

def create_semmed_exclude_list(semmed_exclude_list_name):
semmed_list = kg2_util.safe_load_yaml_from_string(kg2_util.read_file_to_string(semmed_exclude_list_name))
exclusions = list()
exclusions = dict()

# Exclusion types
exclusions[SEMANTIC_TYPE_EXCLUSION] = set()
exclusions[DOMAIN_EXCLUSION] = dict()
exclusions[RANGE_EXCLUSION] = dict()

for exclude_item in semmed_list['excluded_semmedb_records']:
exclusions.append(make_regex_form(exclude_item['semmed_subject_code'], exclude_item['semmed_predicate'], exclude_item['semmed_object_code']))
exclusion_type = exclude_item['exclusion_type']
assert exclusion_type in exclusions, exclusion_type

sub_code = exclude_item['semmed_subject_code']
obj_code = exclude_item['semmed_object_code']
pred = exclude_item['semmed_predicate']

return re.compile("|".join(exclusions))
if exclusion_type == SEMANTIC_TYPE_EXCLUSION:
if sub_code != EXCLUDE_EMPTY_STR:
exclusions[SEMANTIC_TYPE_EXCLUSION].add(sub_code)
if obj_code != EXCLUDE_EMPTY_STR:
exclusions[SEMANTIC_TYPE_EXCLUSION].add(obj_code)

if exclusion_type == DOMAIN_EXCLUSION:
if pred not in exclusions[DOMAIN_EXCLUSION]:
exclusions[DOMAIN_EXCLUSION][pred] = set()
exclusions[DOMAIN_EXCLUSION][pred].add(sub_code)

if exclusion_type == RANGE_EXCLUSION:
if pred not in exclusions[RANGE_EXCLUSION]:
exclusions[RANGE_EXCLUSION][pred] = set()
exclusions[RANGE_EXCLUSION][pred].add(obj_code)

return exclusions


if __name__ == '__main__':
args = make_arg_parser().parse_args()
mrcui_file_name = args.mrcui_file_name # '/home/ubuntu/kg2-build/umls/META/MRCUI.RRF'
semmed_exclude_list_name = args.semmedExcludeList
EXCLUDE_LIST_REGEX = create_semmed_exclude_list(semmed_exclude_list_name)
exclusions = create_semmed_exclude_list(semmed_exclude_list_name)
input_file_name = args.inputFile
output_file_name = args.outputFile
test_mode = args.test
Expand Down Expand Up @@ -243,10 +274,16 @@ def create_semmed_exclude_list(semmed_exclude_list_name):
else:
negated = False

date()
domain_range_exclusion = False
if EXCLUDE_LIST_REGEX.match(make_regex_form(subject_semtype, predicate, object_semtype)) is not None:

if subject_semtype in exclusions[SEMANTIC_TYPE_EXCLUSION] or object_semtype in exclusions[SEMANTIC_TYPE_EXCLUSION] \
or subject_semtype in exclusions[DOMAIN_EXCLUSION].get(predicate, set()) or object_semtype in exclusions[RANGE_EXCLUSION].get(predicate, set()):
domain_range_exclusion = True

date()
print("--")

# Create the new edge(s) based on this SemMedDB row
for rel_to_make in get_rels_to_make_for_row(subject_cui_str, object_cui_str, predicate, remapped_cuis):
subject_curie = rel_to_make[0]
Expand Down

0 comments on commit 3315a25

Please sign in to comment.