-
Notifications
You must be signed in to change notification settings - Fork 12
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Convert SSSOM TSV => JSON => TSV and confirm the MappingSetDataFrame
object remains consistent.
#429
Convert SSSOM TSV => JSON => TSV and confirm the MappingSetDataFrame
object remains consistent.
#429
Changes from 1 commit
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -212,21 +212,28 @@ def parse_sssom_table( | |
logging.info(f"Externally provided metadata {k}:{v} is added to metadata set.") | ||
sssom_metadata[k] = v | ||
meta = sssom_metadata | ||
|
||
if "curie_map" in sssom_metadata: | ||
if CURIE_MAP in sssom_metadata: | ||
if prefix_map: | ||
# Convert sssom_metadata[CURIE_MAP] keys to lowercase for case-insensitive comparison | ||
curie_map_lower = {k.lower(): k for k in sssom_metadata[CURIE_MAP].keys()} | ||
|
||
for k, v in prefix_map.items(): | ||
if k in sssom_metadata[CURIE_MAP]: | ||
if sssom_metadata[CURIE_MAP][k] != v: | ||
k_lower = k.lower() | ||
|
||
if k_lower in curie_map_lower: | ||
original_key = curie_map_lower[k_lower] | ||
if sssom_metadata[CURIE_MAP][original_key] != v: | ||
logging.warning( | ||
f"SSSOM prefix map {k} ({sssom_metadata[CURIE_MAP][k]}) " | ||
f"conflicts with provided ({prefix_map[k]})." | ||
f"SSSOM prefix map {original_key} ({sssom_metadata[CURIE_MAP][original_key]}) " | ||
f"conflicts with provided ({v})." | ||
) | ||
sssom_metadata[CURIE_MAP][original_key] = v | ||
else: | ||
logging.info( | ||
f"Externally provided metadata {k}:{v} is added to metadata set." | ||
) | ||
sssom_metadata[CURIE_MAP][k] = v | ||
|
||
prefix_map = sssom_metadata[CURIE_MAP] | ||
|
||
meta_all = _get_prefix_map_and_metadata(prefix_map=prefix_map, meta=meta) | ||
|
@@ -264,10 +271,32 @@ def parse_sssom_json( | |
) -> MappingSetDataFrame: | ||
"""Parse a TSV to a :class:`MappingSetDocument` to a :class`MappingSetDataFrame`.""" | ||
raise_for_bad_path(file_path) | ||
metadata = _get_prefix_map_and_metadata(prefix_map=prefix_map, meta=meta) | ||
|
||
with open(file_path) as json_file: | ||
jsondoc = json.load(json_file) | ||
|
||
# Get prefix map from jsondoc and update metadata. | ||
# This takes priority over default prefix_map in case of a tie. | ||
jsondoc_prefix_map = jsondoc["@context"] | ||
|
||
# Convert keys in both maps to lower case for comparison | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is this wise? I think we should not be doing all of this lowercasing. The prefix maps should be correct as they are. Doing all of these ad-hoc operations makes sssom-py insanely hard to work with (I can tell you this from first hand experience since I have tried to understand the whole history of the package) |
||
if prefix_map: | ||
lowercase_prefix_map = {k.lower(): v for k, v in prefix_map.items()} | ||
|
||
# Iterate over jsondoc_prefix_map | ||
for key, value in jsondoc_prefix_map.items(): | ||
# If lowercase key exists in lowercase_prefix_map, update the value and key | ||
if key.lower() in lowercase_prefix_map: | ||
# Remove the old key-value pair | ||
if key in prefix_map: | ||
del prefix_map[key] | ||
elif key.lower() in prefix_map: | ||
del prefix_map[key.lower()] | ||
# Add the new key-value pair | ||
prefix_map[key] = value | ||
|
||
metadata = _get_prefix_map_and_metadata(prefix_map=prefix_map, meta=meta) | ||
|
||
msdf = from_sssom_json(jsondoc=jsondoc, prefix_map=metadata.prefix_map, meta=metadata.metadata) | ||
# df: pd.DataFrame = msdf.df | ||
# if mapping_predicates and not df.empty(): | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
#curie_map: | ||
# FBbt: "http://purl.obolibrary.org/obo/FBbt_" | ||
# ORCID: "https://orcid.org/" | ||
# UBERON: "http://purl.obolibrary.org/obo/UBERON_" | ||
#creator_id: | ||
# - "ORCID:0000-0002-6095-8718" | ||
subject_id subject_label predicate_id object_id mapping_justification | ||
FBbt:00000001 organism semapv:crossSpeciesExactMatch UBERON:0000468 semapv:ManualMappingCuration |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -19,11 +19,12 @@ | |
from_sssom_dataframe, | ||
from_sssom_json, | ||
from_sssom_rdf, | ||
parse_sssom_json, | ||
parse_sssom_table, | ||
) | ||
from sssom.typehints import Metadata | ||
from sssom.util import PREFIX_MAP_KEY, sort_df_rows_columns | ||
from sssom.writers import write_table | ||
from sssom.writers import to_json, write_json, write_table | ||
from tests.test_data import data_dir as test_data_dir | ||
from tests.test_data import test_out_dir | ||
|
||
|
@@ -245,3 +246,20 @@ def test_parse_obographs_merged(self): | |
) | ||
msdf = parse_sssom_table(outfile) | ||
self.assertTrue(custom_curie_map.items() <= msdf.prefix_map.items()) | ||
|
||
def test_tsv_to_json_and_back(self): | ||
"""Test converting SSSOM TSV => JSON => SSSOM TSV such that it is reproducible.""" | ||
sample_tsv = f"{test_data_dir}/sample1.sssom.tsv" | ||
json_outfile = f"{test_out_dir}/sample1.json" | ||
msdf1 = parse_sssom_table(sample_tsv) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @hrshdhgd other hints: when writing tests, make the things you're testing more explicit. The way this test is written, it doesn't really test anythign specific. Why not explicitly check that |
||
msdf1.clean_prefix_map() | ||
json_doc = to_json(msdf1) | ||
|
||
self.assertEqual(msdf1.prefix_map, json_doc["@context"]) | ||
|
||
with open(json_outfile, "w") as file: | ||
write_json(msdf1, file) | ||
|
||
msdf2 = parse_sssom_json(json_outfile) | ||
msdf2.clean_prefix_map() | ||
self.assertEqual(msdf1.prefix_map, msdf2.prefix_map) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I have rewritten all of this code using
curies
already and am waiting for #426 (ready) then #401 (needs some more work) to get merged. I think it's better that you reduce effort on re-writing it again, since thecuries
solution is much more clean and elegant.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I split the code for updating this into #431