Skip to content

Commit

Permalink
improving display
Browse files Browse the repository at this point in the history
  • Loading branch information
pnrobinson committed Aug 14, 2024
1 parent 6cbcf81 commit df96a83
Show file tree
Hide file tree
Showing 5 changed files with 166 additions and 53 deletions.
2 changes: 1 addition & 1 deletion src/pyphetools/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from . import validation


__version__ = "0.9.99"
__version__ = "0.9.100"


__all__ = [
Expand Down
4 changes: 3 additions & 1 deletion src/pyphetools/pp/v202/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

# The private package members structure mimic the structure of the protobuf files of the `phenopacket-schema`.

from ._base import OntologyClass, ExternalReference, Evidence, Procedure
from ._base import OntologyClass, ExternalReference, Evidence, Procedure, display_time_element, time_element_to_days
# We re-export Timestamp
from ._base import GestationalAge, Age, AgeRange, TimeInterval, TimeElement, Timestamp, File
from ._biosample import Biosample
Expand Down Expand Up @@ -41,4 +41,6 @@
'SequenceLocation', 'SequenceState', 'LiteralSequenceExpression', 'DerivedSequenceExpression',
'RepeatedSequenceExpression', 'CytobandInterval', 'ChromosomeLocation', 'Allele', 'Haplotype', 'CopyNumber',
'VariationSet', 'Variation',
# functions
'display_time_element', 'time_element_to_days'
]
123 changes: 123 additions & 0 deletions src/pyphetools/pp/v202/_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,129 @@
from ..parse import extract_message_scalar, extract_pb_message_scalar, extract_oneof_scalar, extract_pb_oneof_scalar



def display_time_element(time_element: "TimeElement") -> typing.Optional[str]:
"""
Generate a string representing the age of an individual intended for display in a notebook etc.
We determine the type of element and use specialized methods for GestationalAge, Age, AgeRange, and OntologyClass.
Timestamp and TimeInterval not supported because they do not represent ages (in general)
"""
if time_element.element is None:
return None
# try the various kinds of element we support
if time_element.gestational_age:
gest_age = time_element.gestational_age
weeks = gest_age.weeks
days = gest_age.days
if days > 0:
return f"G{weeks}W{days}D"
else:
return f"G{weeks}W"
if time_element.age:
age = time_element.age
return age.iso8601duration
if time_element.ontology_class:
oclass = time_element.ontology_class
return f"{oclass.label} ({oclass.id})"
if time_element.age_range:
age_range = time_element.age_range
start_display = display_time_element(time_element=age_range.start)
end_display = display_time_element(time_element=age_range.end)
return f"range: {start_display}-{end_display}"
# if we get here, we could not identify the type of TimeElement
# Timestamp and TimeInterval not supported because they do not represent ages (in general)
print((f"Did not recognize type of TimeElement {type(time_element.element)}"))
raise ValueError(f"Did not recognize type of TimeElement {type(time_element.element)}")



def iso_to_days(iso_age:str) -> int:
"""
Transform the ISO8601 age strings (e.g., P3Y2M) into the corresponding number of days to facilitate sorting.
Note that if age is not provided we want to sort it to the end of the list so we transform to a very high number of days.
:param iso_age: ISO8601 age string (e.g., P3Y2M)
:type iso_age: str
:returns: number of days
:rtype: int
"""
if not isinstance(iso_age, str):
raise ValueError(f"Warning, did not recognize type of iso_age: {iso_age}, type={type(iso_age)}")
elif not iso_age.startswith("P"):
raise ValueError(f"Invalid age string: {iso_age}")
else:
days = 0
age = iso_age[1:]
N = len(age)
y = age.find("Y")
if y != -1:
days = days + int(365.25*int(age[:y]))
age = age[y+1:]
m = age.find("M")
if m != -1:
days = days + int(30.436875*int(age[:m]))
age = age[m+1:]
d = age.find("D")
if d != -1:
days = days + int(age[:d])
return days

def ontology_class_to_days(oclass: "OntologyClass") -> float:
HPO_AGE_TO_DAYS = {
"Antenatal onset": -1,
"Embryonal onset": -7 * 40,
"Fetal onset": -7 * 29,
"Late first trimester onset": -7 * 29,
"Second trimester onset": -7 * 26,
"Third trimester onset": -7 * 22,
"Congenital onset": 0,
"Neonatal onset": 1,
"Pediatrial onset": 29,
"Infantile onset": 29,
"Childhood onset": 365.25,
"Juvenile onset": 5 * 365.25,
"Adult onset": 16 * 365.25,
"Young adult onset": 16 * 365.25,
"Early young adult onset": 16 * 365.25,
"Intermediate young adult onset": 19 * 365.25,
"Late young adult onset": 25 * 365.25,
"Middle age onset": 40 * 365.25,
"Late onset": 60 * 365.25,
}
if oclass.label not in HPO_AGE_TO_DAYS:
raise ValueError(f"Did not recongize HPO Onset class: {oclass.label} ({oclass.id})")
return HPO_AGE_TO_DAYS.get(oclass.label)

def time_element_to_days(time_element: "TimeElement") -> float:
"""
This function sorts time elements. Note that we make the assumption that any element that is a GestationalAge has the age of -1 day and thus will sort
before other elements, but we will not sort the gestational ages more specifically. If a study focuses specifically on gestational age, then a different sorting routine should be implemented.
"""
if time_element.element is None:
return -2
if time_element.gestational_age:
return -1
if time_element.age:
return iso_to_days(time_element.age.iso8601duration)
if time_element.ontology_class:
oclass = time_element.ontology_class
return ontology_class_to_days(oclass=oclass)
if time_element.age_range:
age_range = time_element.age_range
return time_element_to_days(time_element=age_range.start)
# if we get here, we could not identify the type of TimeElement
# Timestamp and TimeInterval not supported because they do not represent ages (in general)

raise ValueError(f"Did not recognize type of TimeElement {type(time_element.element)}")








class OntologyClass(MessageMixin):
"""
`OntologyClass` represents classes (terms) from ontologies, and is used in many places throughout
Expand Down
78 changes: 33 additions & 45 deletions src/pyphetools/visualization/individual_table.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,14 @@
from collections import defaultdict
import phenopackets as PPKt
from typing import Dict, List, Set
import sys
import typing


from ..creation.constants import Constants
from ..creation import Individual, HpTerm, MetaData
from .simple_patient import SimplePatient
from .html_table_generator import HtmlTableGenerator
from ..pp.v202 import display_time_element, time_element_to_days
from ..pp.v202 import TimeElement as TimeElement202


class Age2Day:
Expand Down Expand Up @@ -38,8 +40,9 @@ class IndividualTable:
table = PhenopacketTable.from_individuals(individual_list=individuals, metadata=metadata)
display(HTML(table.to_html()))
"""
def __init__(self, individual_list:List[Individual],
metadata:MetaData=None) -> None:
def __init__(self,
individual_list: typing.List[Individual],
metadata: MetaData=None) -> None:
"""
:param individual_list: List of Indidivual objects to be displayed
:type individual_list: List[Individual]
Expand Down Expand Up @@ -74,7 +77,7 @@ def __init__(self, individual_list:List[Individual],
self._caption = f"{n_phenopackets} phenopackets - {pmid_str}"


def to_html(self):
def to_html(self) -> str:
header_items = ["Individual", "Disease", "Genotype", "Phenotypic features"]
rows = []
for spat in self._spat_list:
Expand All @@ -83,7 +86,7 @@ def to_html(self):
return generator.get_html()


def _individual_to_phenopacket(self, individual, metadata):
def _individual_to_phenopacket(self, individual, metadata) -> PPKt.Phenopacket:
"""Create a phenopacket with the information from this individual
We try to get information about the publication from the Individual object first. If this is not
Expand Down Expand Up @@ -118,16 +121,23 @@ def _individual_to_phenopacket(self, individual, metadata):
metadata.external_references.append(extref)
return individual.to_ga4gh_phenopacket(metadata=metadata)

def _simple_patient_to_table_row(self, spat:SimplePatient) -> List[str]:
def _simple_patient_to_table_row(self, spat:SimplePatient) -> typing.List[str]:
"""
private method intended to create one table row that represents one individual
:param spat: An object that represents one individual
:type spat: SimplePatient
"""
row_items = []
# Patient information
age_string = spat.get_age() or "age: n/a"
pat_info = spat.get_subject_id() + " (" + spat.get_sex() + "; " + age_string + ")"
time_element = spat.get_age()
if time_element is None:
age_string = None
else:
age_string = display_time_element(time_element=time_element)
if age_string is None:
pat_info = spat.get_subject_id() + " (" + spat.get_sex() + ")"
else:
pat_info = spat.get_subject_id() + " (" + spat.get_sex() + "; " + age_string + ")"
row_items.append( pat_info)
row_items.append( spat.get_disease())
# Variant information
Expand All @@ -150,7 +160,8 @@ def _simple_patient_to_table_row(self, spat:SimplePatient) -> List[str]:
return row_items


def get_hpo_cell(self, term_by_age_dict:Dict[str,HpTerm]) -> str:
def get_hpo_cell(self,
term_by_age_dict: typing.Dict[str, HpTerm]) -> str:
"""
:param term_by_age_dict: A dictionary with key - ISO8601 string, value - list of HpTerm objects
:type term_by_age_dict: Dict[str,HpTerm]
Expand All @@ -163,55 +174,32 @@ def get_hpo_cell(self, term_by_age_dict:Dict[str,HpTerm]) -> str:
for onset in sorted_age:
hpo_list = term_by_age_dict.get(onset.key)
hpos = "; ".join([hpo.__str__() for hpo in hpo_list])
if onset.key == Constants.NOT_PROVIDED:
if onset.key.element is None:
lines.append(hpos)
else:
lines.append(f"<b>{onset.key}</b>: {hpos}")
return "<br/>".join(lines)

@staticmethod
def iso_to_days(iso_age:str) -> int:
"""
Transform the ISO8601 age strings (e.g., P3Y2M) into the corresponding number of days to facilitate sorting.
Note that if age is not provided we want to sort it to the end of the list so we transform to a very high number of days.

:param iso_age: ISO8601 age string (e.g., P3Y2M)
:type iso_age: str
:returns: number of days
:rtype: int
"""
if iso_age == Constants.NOT_PROVIDED:
days = sys.maxsize
elif not iso_age.startswith("P"):
raise ValueError(f"Invlaid age string: {age}")
else:
days = 0
age = iso_age[1:]
N = len(age)
y = age.find("Y")
if y != -1:
days = days + int(365.25*int(age[:y]))
age = age[y+1:]
m = age.find("M")
if m != -1:
days = days + int(30.436875*int(age[:m]))
age = age[m+1:]
d = age.find("D")
if d != -1:
days = days + int(age[:d])
return days

@staticmethod
def get_sorted_age2data_list(ages:Set[str]) -> List[Age2Day]:
def get_sorted_age2data_list(ages: typing.Set[typing.Union[str,TimeElement202]]) -> typing.List[Age2Day]:
"""
Create a sorted list of Age2Day objects that we use to display the age in the HTML output.
Create a sorted list of Age2Day objects that we use to display the age in the HTML output. We show terms without age of onset before all other terms.
:param ages: A set of ISO 8601 age strings
:type ages: Set[str]
:returns: A list of sorted Age2Day objects
:rtype: List[Age2Day]
"""
age2day_list = list(Age2Day(age, IndividualTable.iso_to_days(age)) for age in ages)
age_to_days_dict = dict()
for age in ages:
if isinstance(age, str):
days = -2
else:
days = time_element_to_days(time_element=age)
age_to_days_dict[age] = days

age2day_list = list(Age2Day(age, age_to_days_dict.get(age)) for age in ages)
sorted_list = sorted(age2day_list, key=lambda x: x.days)
return sorted_list
12 changes: 6 additions & 6 deletions src/pyphetools/visualization/simple_patient.py
Original file line number Diff line number Diff line change
Expand Up @@ -170,19 +170,19 @@ def get_age_in_years(self) -> typing.Optional[float]:
def get_disease(self) -> str:
return self._disease or "n/a"

def get_observed_hpo_d(self):
def get_observed_hpo_d(self) -> typing.Dict[str, HpTerm]:
"""
returns map of observed phenotypic features with key (string) HP id, value, HpTerm from creation submodule
"""
return self._observed_hpo_terms

def get_excluded_hpo_d(self):
def get_excluded_hpo_d(self) -> typing.Dict[str, HpTerm]:
"""
:return: map of excluded phenotypic features with key (string) HP id, value, HpTerm from creation submodule
"""
return self._excluded_hpo_terms

def get_total_hpo_count(self):
def get_total_hpo_count(self) -> int:
"""
:return: total count of HPO terms (observed and excluded)
:rtype: int
Expand All @@ -192,10 +192,10 @@ def get_total_hpo_count(self):
def get_variant_list(self):
return self._variant_list

def has_pmid(self):
def has_pmid(self) -> bool:
return self._pmid is not None

def get_pmid(self):
def get_pmid(self) -> str:
return self._pmid

def contains_observed_term_id(self, hpo_term_id) -> bool:
Expand All @@ -211,7 +211,7 @@ def get_excluded_term_by_id(self, hpo_term_id)-> typing.Optional[HpTerm]:
return self._excluded_hpo_terms.get(hpo_term_id)


def get_term_by_age_dict(self):
def get_term_by_age_dict(self) -> typing.Dict[typing.Union[str,TimeElement202], typing.List[HpTerm]]:
return self._by_age_dictionary

def is_deceased(self) -> bool:
Expand Down

0 comments on commit df96a83

Please sign in to comment.