Skip to content

Commit

Permalink
methods for working with pp-v202-classes
Browse files Browse the repository at this point in the history
  • Loading branch information
pnrobinson committed Aug 31, 2024
1 parent df96a83 commit 2e67965
Show file tree
Hide file tree
Showing 10 changed files with 537 additions and 5 deletions.
2 changes: 1 addition & 1 deletion src/pyphetools/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from . import validation


__version__ = "0.9.100"
__version__ = "0.9.101"


__all__ = [
Expand Down
3 changes: 3 additions & 0 deletions src/pyphetools/creation/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,12 @@
from .hp_term import HpTerm, HpTermBuilder
from .import_template import TemplateImporter
from .individual import Individual
from .measurements import Measurements
from .metadata import MetaData
from .mode_of_inheritance import Moi
from .ontology_terms import OntologyTerms
from .option_column_mapper import OptionColumnMapper
from .promoter_variant import PromoterVariant
from .pyphetools_age import PyPheToolsAge, AgeSorter, HPO_ONSET_TERMS
from .sex_column_mapper import SexColumnMapper
from .simple_column_mapper import SimpleColumnMapper
Expand Down
41 changes: 41 additions & 0 deletions src/pyphetools/creation/hgvs_variant.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,11 @@
import phenopackets
from .variant import Variant
from ..pp.v202 import GeneDescriptor as GeneDescriptor202
from ..pp.v202 import VariantInterpretation as VariantInterpretation202
from ..pp.v202 import VariationDescriptor as VariationDescriptor202
from ..pp.v202 import Expression as Expression202
from ..pp.v202 import MoleculeContext as MoleculeContext202
from ..pp.v202 import VcfRecord as VcfRecord202
import string
from typing import Dict
import random
Expand Down Expand Up @@ -141,3 +147,38 @@ def to_ga4gh(self, acmg=None):
vdescriptor.vcf_record.CopyFrom(vcf_record)
vinterpretation.variation_descriptor.CopyFrom(vdescriptor)
return vinterpretation

def to_variant_interpretation_202(self,
acmg:str=None) -> VariantInterpretation202:
"""
Transform this Variant object into a "variantInterpretation" message of the GA4GH Phenopacket schema
"""

vcf_record = VcfRecord202(genome_assembly=self._assembly,
chrom=self._chr,
pos=self._position,
ref=self._ref,
alt=self._alt)
vdescriptor = VariationDescriptor202(id=self._variant_id, vcf_record=vcf_record, molecule_context=MoleculeContext202.genomic)
if self._hgnc_id is not None and self._symbol is not None:
gene_descriptor = GeneDescriptor202(value_id=self._hgnc_id, symbol=self._symbol)
vdescriptor.gene_context = gene_descriptor
if self._hgvs is not None:
hgvs_expression = Expression202(syntax="hgvs.c", value=self._hgvs)
vdescriptor.expressions.append(hgvs_expression)
if self._g_hgvs is not None:
hgvs_expression = Expression202(syntax="hgvs.g", value=self._g_hgvs)
vdescriptor.expressions.append(hgvs_expression)
gt_term = Variant._get_genotype_term(self._genotype)
if gt_term is not None:
vdescriptor.allelic_state = gt_term
else:
print(f"Did not recognize genotype {self._genotype}")
vinterpretation = VariantInterpretation202(variation_descriptor=vdescriptor)
acmg_code = Variant._get_acmg_classification(acmg=acmg)
if acmg_code is not None:
vinterpretation.acmg_pathogenicity_classification = acmg_code
else:
print(f"Warning- did not recognize ACMG category {acmg}")

return vinterpretation
99 changes: 99 additions & 0 deletions src/pyphetools/creation/measurements.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
import typing
from ..pp.v202 import Measurement as Measurement202
from ..pp.v202 import OntologyClass as OntologyClass202
from ..pp.v202 import Value as Value202
from ..pp.v202 import Quantity as Quantity202
from ..pp.v202 import ReferenceRange as ReferenceRange202


ng_per_dl = OntologyClass202(id="UCUM:ng/dL", label="nanogram per deciliter")
ng_per_ml = OntologyClass202(id="UCUM:ng/mL", label="nanogram per milliliter")
pg_per_l = OntologyClass202(id="UCUM:pg/L", label="picogram per liter")
pg_per_ml = OntologyClass202(id="UCUM:pg/mL", label="picogram per milliliter")
nmol_per_l = OntologyClass202(id="UCUM:nmol/L", label="nanomole per liter")


class Measurements:




@staticmethod
def _with_reference_range(assay: OntologyClass202,
unit: OntologyClass202,
value: float,
low: float,
high: float) -> Measurement202:
refrange = ReferenceRange202(unit=unit, low=low, high=high)
val = Value202(Quantity202(unit=ng_per_dl, value=value,reference_range=refrange))
return Measurement202(assay=assay,measurement_value=val)

@staticmethod
def _without_reference_range(assay: OntologyClass202,
unit: OntologyClass202,
value: float) -> Measurement202:
val = Value202(Quantity202(unit=ng_per_dl, value=value))
return Measurement202(assay=assay,measurement_value=val)

@staticmethod
def _from_assay_and_values(assay: OntologyClass202,
unit: OntologyClass202,
value: float,
low: float,
high: float) -> Measurement202:
if low is not None and high is not None:
return Measurements._with_reference_range(assay=assay, unit=ng_per_dl, value=value, low=low, high=high)
else:
return Measurements._without_reference_range(assay=assay, unit=ng_per_dl, value=value)

@staticmethod
def nanogram_per_deciliter(code: str,
label: str,
concentration: float,
low: float = None,
high: float = None) -> Measurement202:
assay = OntologyClass202(id=code, label=label)
return Measurements._from_assay_and_values(assay=assay, unit=ng_per_dl, value=concentration, low=low, high=high)


@staticmethod
def nanogram_per_milliliter(code: str,
label: str,
concentration: float,
low: float = None,
high: float = None) -> Measurement202:
assay = OntologyClass202(id=code, label=label)
return Measurements._from_assay_and_values(assay=assay, unit=ng_per_ml, value=concentration, low=low, high=high)


@staticmethod
def picogram_per_liter(code: str,
label: str,
concentration: float,
low: float = None,
high: float = None) -> Measurement202:
assay = OntologyClass202(id=code, label=label)
return Measurements._from_assay_and_values(assay=assay, unit=pg_per_l, value=concentration, low=low, high=high)

@staticmethod
def picogram_per_milliliter(code: str,
label: str,
concentration: float,
low: float = None,
high: float = None) -> Measurement202:
assay = OntologyClass202(id=code, label=label)
return Measurements._from_assay_and_values(assay=assay, unit=pg_per_ml, value=concentration, low=low, high=high)


@staticmethod
def nanomole_per_liter(code: str,
label: str,
concentration: float,
low: float = None,
high: float = None) -> Measurement202:
assay = OntologyClass202(id=code, label=label)
return Measurements._from_assay_and_values(assay=assay, unit=nmol_per_l, value=concentration, low=low, high=high)




38 changes: 38 additions & 0 deletions src/pyphetools/creation/ontology_terms.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
from ..pp.v202 import OntologyClass as OntologyClass202
import typing


class OntologyTerms:
"""
Convenience class that offers Builders for commonly used OntologyClass objects.
"""

@staticmethod
def heterozygous() -> OntologyClass202:
"""
Sequence Ontology class for heterozygous
"""
return OntologyClass202(id="GENO:0000135", label="heterozygous")

@staticmethod
def homozygous() -> OntologyClass202:
"""
Sequence Ontology class for homozygous
"""
return OntologyClass202(id="GENO:0000136", label="homozygous")

@staticmethod
def hemizygous() -> OntologyClass202:
"""
Sequence Ontology class for hemizygous
"""
return OntologyClass202(id="GENO:0000134", label="hemizygous")

## Onset terms
def congenital_onset() -> OntologyClass202:
"""
HPO class for Congenital onset
"""
return OntologyClass202(id="HP:0003577", label="Congenital onset")


170 changes: 170 additions & 0 deletions src/pyphetools/creation/promoter_variant.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,170 @@
import string
import random
import typing
from ..pp.v202 import AcmgPathogenicityClassification as ACMG202
from ..pp.v202 import GeneDescriptor as GeneDescriptor202
from ..pp.v202 import VariationDescriptor as VariationDescriptor202
from ..pp.v202 import VariantInterpretation as VariantInterpretation202
from ..pp.v202 import MoleculeContext as MoleculeContext202
from ..pp.v202 import OntologyClass as OntologyClass202


from .variant import Variant


class PromoterVariant(Variant):


def __init__(self,
description: str,
gene_symbol: str,
gene_id: str,
sequence_ontology_term: OntologyClass202,
genotype: OntologyClass202,
variant_id: str=None) -> None:
if variant_id is None:
self._variant_id = "var_" + "".join(random.choices(string.ascii_letters, k=25))
else:
self._variant_id = variant_id
self._description = description.strip()
if gene_symbol is None:
raise ValueError(f"Need to pass a valid gene symbol!")
self._gene_symbol = gene_symbol
if gene_id is None:
raise ValueError(f"Need to pass a valid HGNC gene id!")
self._hgnc_id = gene_id
self._sequence_ontology_term = sequence_ontology_term
self._genotype = genotype

def to_ga4gh_variant_interpretation():
raise NotImplementedError("This method will be deprecated")


def to_variant_interpretation(self, acmg=None) -> VariantInterpretation202:
"""
Transform this PromoterVariant object into a VariantInterpretation message (pp.v202 class)
"""
gene_descriptor = GeneDescriptor202(value_id=self._hgnc_id, symbol=self._gene_symbol)
vdescriptor = VariationDescriptor202(id=self._variant_id,
molecule_context=MoleculeContext202.genomic,
gene_context=gene_descriptor,
label=self._description,
structural_type=self._sequence_ontology_term)
if self._genotype is not None:
vdescriptor.allelic_state = self._genotype
acmg_code = Variant._get_acmg_classification(acmg=acmg)
vinterpretation = VariantInterpretation202(variation_descriptor=vdescriptor, acmg_pathogenicity_classification=acmg_code)
return vinterpretation


@staticmethod
def two_KB_upstream_variant(description: str,
gene_symbol: str,
gene_id: str,
genotype:str = None,
variant_id=None):
"""
A sequence variant located within 2KB 5' of a gene.
:param cell_contents: the string from the original table that we want to map as a structural variant
:type cell_contents: str
:param gene_symbol: the gene affected by the structural variant, e.g., GLI3
:type gene_symbol: str
:param gene_id: the identifier (using HGNC) of the gene, e.g., GLI3 is HGNC:4319
:type gene_id: str
:param variant_id: an identifier for the variant
:type variant_id: str, optional
"""
gt_term = PromoterVariant._get_genotype_term(genotype=genotype)
so_term = OntologyClass202(id="SO:0001636", label="2KB_upstream_variant")
return PromoterVariant(description=description,
gene_symbol=gene_symbol,
gene_id=gene_id,
sequence_ontology_term=so_term,
genotype=gt_term,
variant_id=variant_id)


@staticmethod
def five_KB_upstream_variant(description: str,
gene_symbol: str,
gene_id: str,
genotype: str = None,
variant_id=None):
"""
A sequence variant located within 5KB 5' of a gene.
:param cell_contents: the string from the original table that we want to map as a structural variant
:type cell_contents: str
:param gene_symbol: the gene affected by the structural variant, e.g., GLI3
:type gene_symbol: str
:param gene_id: the identifier (using HGNC) of the gene, e.g., GLI3 is HGNC:4319
:type gene_id: str
:param variant_id: an identifier for the variant
:type variant_id: str, optional
"""
gt_term = Variant._get_genotype_term(genotype=genotype)
so_term = OntologyClass202(id="SO:0001635", label="5KB_upstream_variant")
return PromoterVariant(description=description,
gene_symbol=gene_symbol,
gene_id=gene_id,
sequence_ontology_term=so_term,
genotype=gt_term,
variant_id=variant_id)


@staticmethod
def upstream_transcript_variant(description: str,
gene_symbol: str,
gene_id: str,
genotype: str = None,
variant_id=None):
"""
A feature variant, where the alteration occurs upstream of the transcript TSS.
:param cell_contents: the string from the original table that we want to map as a structural variant
:type cell_contents: str
:param gene_symbol: the gene affected by the structural variant, e.g., GLI3
:type gene_symbol: str
:param gene_id: the identifier (using HGNC) of the gene, e.g., GLI3 is HGNC:4319
:type gene_id: str
:param variant_id: an identifier for the variant
:type variant_id: str, optional
"""
gt_term = PromoterVariant._get_genotype_term(genotype=genotype)
so_term = OntologyClass202(id="SO:0001986", label="upstream_transcript_variant")
return PromoterVariant(description=description,
gene_symbol=gene_symbol,
gene_id=gene_id,
sequence_ontology_term=so_term,
genotype=gt_term,
variant_id=variant_id)

@staticmethod
def upstream_gene_variant(description: str,
gene_symbol: str,
gene_id: str,
genotype: str = None,
variant_id=None):
"""
A sequence variant located 5' of a gene.
:param cell_contents: the string from the original table that we want to map as a structural variant
:type cell_contents: str
:param gene_symbol: the gene affected by the structural variant, e.g., GLI3
:type gene_symbol: str
:param gene_id: the identifier (using HGNC) of the gene, e.g., GLI3 is HGNC:4319
:type gene_id: str
:param variant_id: an identifier for the variant
:type variant_id: str, optional
"""
gt_term = PromoterVariant._get_genotype_term(genotype=genotype)
so_term = OntologyClass202(id="SO:0001631", label="upstream_gene_variant")
return PromoterVariant(description=description,
gene_symbol=gene_symbol,
gene_id=gene_id,
sequence_ontology_term=so_term,
genotype=gt_term,
variant_id=variant_id)


Loading

0 comments on commit 2e67965

Please sign in to comment.