Skip to content

Commit

Permalink
Merge pull request #3 from monarch-initiative/monarch-phenotype-profi…
Browse files Browse the repository at this point in the history
…le-ingest-mappingtable

Monarch phenotype profile ingest mappingtable
  • Loading branch information
kevinschaper authored Oct 15, 2024
2 parents 434f739 + 7f972bd commit 51db960
Show file tree
Hide file tree
Showing 7 changed files with 49 additions and 22 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -15,13 +15,7 @@

# Initiate koza app and mondo map from sssom file
koza_app = get_koza_app("hpoa_gene_to_phenotype")

try:
# For ingest (this data is unavailable for mock_koza)
mondo_map = koza_app.source.config.sssom_config.lut
except:
# For testing ingest (allows mock_koza to bring in map_cache)
mondo_map = koza_app.get_map('mondo_map')
mondo_map = koza_app.get_map('mondo_map')


while (row := koza_app.get_row()) is not None:
Expand All @@ -39,11 +33,9 @@
org_id = row["disease_id"].replace("ORPHA:", "Orphanet:")
dis_id = org_id
if dis_id in mondo_map:
if "MONDO" in mondo_map[dis_id]:
dis_id = mondo_map[dis_id]["MONDO"]

dis_id = mondo_map[dis_id]['subject_id']

# TO DO: we may want to incorporate the original disease id somehow?
# TO DO: Need to add in the disease_context_qualifier information here once biolink is updated

association = GeneToPhenotypicFeatureAssociation(id="uuid:" + str(uuid.uuid1()),
subject=gene_id,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,9 @@ metadata: "src/monarch_phenotype_profile_ingest/metadata.yaml"
files:
- 'data/genes_to_phenotype.txt'

depends_on:
- 'src/monarch_phenotype_profile_ingest/mondo_sssom_config.yaml'

header: 0
format: 'csv'
delimiter: '\t'
Expand Down Expand Up @@ -41,13 +44,14 @@ edge_properties:

transform_mode: 'flat'

# For mondo disease_id mapping
# We are using the mapping table functionality instead (may switch to this down the road...)
# # For mondo disease_id mapping
# sssom_config:
# files:
# - "data/mondo.sssom.tsv"
# subject_target_prefixes:
# - "MONDO"
# use_match:
# subject_target_prefixes:
# - "MONDO"
# use_match:
# - "exact"


27 changes: 27 additions & 0 deletions src/monarch_phenotype_profile_ingest/mondo_sssom_config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
name: 'mondo_map'

files:
- './data/mondo.sssom.tsv'

format: 'csv'
delimiter: '\t'
header: 51

filters:
- inclusion: 'include'
column: 'predicate_id'
filter_code: 'eq'
value: 'skos:exactMatch'

key: object_id

values:
- subject_id

columns:
- subject_id
- subject_label
- predicate_id
- object_id
- object_label
- mapping_justification
2 changes: 1 addition & 1 deletion tests/test_disease_mode_of_inheritance_transform.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
from biolink_model.datamodel.pydanticmodel_v2 import DiseaseOrPhenotypicFeatureToGeneticInheritanceAssociation
from koza.utils.testing_utils import mock_koza # noqa: F401

INGEST_NAME = "disease_mode_of_inheritance"
INGEST_NAME = "hpoa_disease_mode_of_inheritance"
INGEST_CODE = "./src/monarch_phenotype_profile_ingest/disease_mode_of_inheritance_transform.py"


Expand Down
2 changes: 1 addition & 1 deletion tests/test_disease_to_phenotype_transform.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
from biolink_model.datamodel.pydanticmodel_v2 import DiseaseToPhenotypicFeatureAssociation
from koza.utils.testing_utils import mock_koza # noqa: F401

INGEST_NAME = "disease_to_phenotype"
INGEST_NAME = "hpoa_disease_to_phenotype"
INGEST_CODE = "./src/monarch_phenotype_profile_ingest/disease_to_phenotype_transform.py"


Expand Down
2 changes: 1 addition & 1 deletion tests/test_gene_to_disease_transform.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ def row():
@pytest.fixture
def basic_g2d_entities(mock_koza, row):
return mock_koza(
name="gene_to_disease",
name="hpoa_gene_to_disease",
data=row,
transform_code="./src/monarch_phenotype_profile_ingest/gene_to_disease_transform.py"
)
Expand Down
12 changes: 8 additions & 4 deletions tests/test_gene_to_phenotype_transform.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,17 +12,21 @@ def map_cache():
Therefore, we supply the same information we would have any ways done, as a koza map instead.
This requires a minor alteration to the ingest_transform.py script itself
"""
return {"mondo_map": {"MONDO:0013588":"OMIM:614129",
"MONDO:0009341":"OMIM:235730",
"MONDO:0013212":"OMIM:613287"}}
# return {"mondo_map": {"MONDO:0013588":"OMIM:614129",
# "MONDO:0009341":"OMIM:235730",
# "MONDO:0013212":"OMIM:613287"}}

return {"mondo_map": {"MONDO:0013588":{"subject_id":"OMIM:614129"}},
"MONDO:0009341":{"subject_id":"OMIM:235730"},
"MONDO:0013212":{"subject_id":"OMIM:613287"}}


@pytest.fixture
def source_name():
"""
:return: string source name of HPOA Gene to Phenotype ingest
"""
return "gene_to_phenotype"
return "hpoa_gene_to_phenotype"


@pytest.fixture
Expand Down

0 comments on commit 51db960

Please sign in to comment.