From 11f9318a0ae242f35edb45d60a005de6d28e9456 Mon Sep 17 00:00:00 2001 From: Sunny Sun <38218185+sunnyosun@users.noreply.github.com> Date: Fri, 12 Jan 2024 04:57:25 -0500 Subject: [PATCH] =?UTF-8?q?=F0=9F=9A=9A=20Rename=20`Bionty`=20to=20`Public?= =?UTF-8?q?Ontology`=20class=20(#536)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * 🚚 Rename Bionty to PublicOntology class * πŸ’š Use 3.10 * πŸ’š Fix import * πŸ’š Fix import * πŸ’š Fix * πŸ’š Add quotes --- .github/workflows/build.yml | 8 +- bionty/__init__.py | 15 ++- bionty/_display_sources.py | 2 +- bionty/{_bionty.py => _public_ontology.py} | 141 +++++++++++---------- bionty/entities/_bfxpipeline.py | 4 +- bionty/entities/_biosample.py | 4 +- bionty/entities/_cellline.py | 4 +- bionty/entities/_cellmarker.py | 4 +- bionty/entities/_celltype.py | 4 +- bionty/entities/_developmentalstage.py | 4 +- bionty/entities/_disease.py | 4 +- bionty/entities/_drug.py | 4 +- bionty/entities/_ethnicity.py | 4 +- bionty/entities/_experimentalfactor.py | 9 +- bionty/entities/_gene.py | 4 +- bionty/entities/_organism.py | 4 +- bionty/entities/_pathway.py | 4 +- bionty/entities/_phenotype.py | 4 +- bionty/entities/_protein.py | 4 +- bionty/entities/_tissue.py | 4 +- docs/developer/updating_source.md | 4 +- docs/guide/concepts.md | 6 +- docs/guide/extend.md | 14 +- docs/guide/ontology.ipynb | 36 ++---- docs/guide/sources.ipynb | 18 +-- scripts/upload_new_ontologies.py | 6 +- tests/entities/test_experimentalfactor.py | 2 +- tests/test_bionty.py | 2 +- 28 files changed, 158 insertions(+), 165 deletions(-) rename bionty/{_bionty.py => _public_ontology.py} (85%) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 01e9e34e..958fd0a2 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -17,7 +17,7 @@ jobs: strategy: fail-fast: false matrix: - python-version: ["3.9"] + python-version: ["3.10"] group: ["bionty-unit", "bionty-docs"] timeout-minutes: 25 @@ -61,7 +61,7 @@ jobs: aws-region: eu-central-1 - name: Lint - if: matrix.python-version == '3.9' && matrix.group == 'bionty-unit' + if: matrix.python-version == '3.10' && matrix.group == 'bionty-unit' run: | nox -s lint - name: Build @@ -75,7 +75,7 @@ jobs: path: .coverage - name: Deploy docs - if: ${{ matrix.python-version == '3.9' && matrix.group == 'bionty-docs' }} + if: ${{ matrix.python-version == '3.10' && matrix.group == 'bionty-docs' }} id: netlify uses: nwtgck/actions-netlify@v1.2 with: @@ -94,7 +94,7 @@ jobs: - uses: actions/checkout@v3 - uses: actions/setup-python@v4 with: - python-version: 3.9 + python-version: "3.10" cache: "pip" cache-dependency-path: ".github/workflows/build.yml" - run: | diff --git a/bionty/__init__.py b/bionty/__init__.py index 7e2ff7dd..98e73d7c 100644 --- a/bionty/__init__.py +++ b/bionty/__init__.py @@ -31,10 +31,10 @@ .. autosummary:: :toctree: . - Bionty - BiontyField + PublicOntology + PublicOntologyField -Bionty sources: +PublicOntology sources: .. autosummary:: :toctree: . @@ -65,11 +65,11 @@ # dynamic classes from . import dev - -# tools -from ._bionty import Bionty, BiontyField from ._display_sources import display_available_sources, display_currently_used_sources from ._ontology import Ontology + +# tools +from ._public_ontology import PublicOntology, PublicOntologyField from ._settings import settings # sources @@ -92,6 +92,7 @@ from .entities._tissue import Tissue # backward compat -Entity = Bionty +Entity = PublicOntology +Bionty = PublicOntology Readout = ExperimentalFactor Species = Organism diff --git a/bionty/_display_sources.py b/bionty/_display_sources.py index 64106db5..accbe5f0 100644 --- a/bionty/_display_sources.py +++ b/bionty/_display_sources.py @@ -18,7 +18,7 @@ def display_available_sources() -> pd.DataFrame: return parse_sources_yaml(settings.local_sources).set_index("entity") # type: ignore -# This function naming is consistent with the `currently_used` field in BiontySource SQL table +# This function naming is consistent with the `currently_used` field in PublicSource SQL table # Do not rename! def display_currently_used_sources() -> pd.DataFrame: """Displays all currently used sources. diff --git a/bionty/_bionty.py b/bionty/_public_ontology.py similarity index 85% rename from bionty/_bionty.py rename to bionty/_public_ontology.py index 80387dc6..2fdff126 100644 --- a/bionty/_bionty.py +++ b/bionty/_public_ontology.py @@ -1,7 +1,6 @@ from __future__ import annotations import logging -from functools import cached_property from typing import ( TYPE_CHECKING, Iterable, @@ -26,10 +25,10 @@ def encode_filenames( - organism: str, source: str, version: str, entity: Bionty | str + organism: str, source: str, version: str, entity: PublicOntology | str ) -> tuple[str, str]: """Encode names of the cached files.""" - if isinstance(entity, Bionty): + if isinstance(entity, PublicOntology): entity_name = entity.__class__.__name__ else: entity_name = entity @@ -41,8 +40,8 @@ def encode_filenames( return parquet_filename, ontology_filename -class Bionty: - """Bionty base model.""" +class PublicOntology: + """PublicOntology object.""" def __init__( self, @@ -76,7 +75,7 @@ def __init__( f"please consider:\n" f" close your instance via `lamin close` and use Bionty stand alone\n" f" OR\n" - f" modify currently_used {self.__class__.__name__} source in `lnschema_bionty.BiontySource`" + f" modify currently_used {self.__class__.__name__} source in `lnschema_bionty.PublicSource`" ) # fmt: on @@ -105,33 +104,34 @@ def __init__( # set column names/fields as attributes for col_name in self._df.columns: try: - setattr(self, col_name, BiontyField(self, col_name)) - # Some fields of an ontology (e.g. Gene) are not Bionty class attributes and must be skipped. + setattr(self, col_name, PublicOntologyField(self, col_name)) + # Some fields of an ontology (e.g. Gene) are not PublicOntology class attributes and must be skipped. except AttributeError: pass def __repr__(self) -> str: # fmt: off representation = ( - f"{self.__class__.__name__}\n" + f"PublicOntology\n" + f"Entity: {self.__class__.__name__}\n" f"Organism: {self.organism}\n" f"Source: {self.source}, {self.version}\n" f"#terms: {self._df.shape[0] if hasattr(self, '_df') else ''}\n\n" - f"πŸ“– {self.__class__.__name__}.df(): ontology reference table\n" - f"πŸ”Ž {self.__class__.__name__}.lookup(): autocompletion of terms\n" - f"🎯 {self.__class__.__name__}.search(): free text search of terms\n" - f"βœ… {self.__class__.__name__}.validate(): strictly validate values\n" - f"🧐 {self.__class__.__name__}.inspect(): full inspection of values\n" - f"πŸ‘½ {self.__class__.__name__}.standardize(): convert to standardized names\n" - f"πŸͺœ {self.__class__.__name__}.diff(): difference between two versions\n" - f"πŸ”— {self.__class__.__name__}.ontology: Pronto.Ontology object" + f"πŸ“– .df(): ontology reference table\n" + f"πŸ”Ž .lookup(): autocompletion of terms\n" + f"🎯 .search(): free text search of terms\n" + f"βœ… .validate(): strictly validate values\n" + f"🧐 .inspect(): full inspection of values\n" + f"πŸ‘½ .standardize(): convert to standardized names\n" + f"πŸͺœ .diff(): difference between two versions\n" + f"πŸ”— .to_pronto(): Pronto.Ontology object" ) # fmt: on return representation @property def organism(self): - """The `name` of `Organism` Bionty.""" + """The `name` of `Organism`.""" return self._organism @property @@ -141,12 +141,12 @@ def source(self): @property def version(self): - """The `name` of `version` entity Bionty.""" + """Version of the source.""" return self._version @property def fields(self) -> set: - """All Bionty entity fields.""" + """All PublicOntology entity fields.""" blacklist = {"include_id_prefixes"} fields = { field @@ -155,23 +155,6 @@ def fields(self) -> set: } return fields - blacklist - @cached_property - def ontology(self): - """The Pronto Ontology object. - - See: https://pronto.readthedocs.io/en/stable/api/pronto.Ontology.html - """ - if self._local_ontology_path is None: - logger.error(f"{self.__class__.__name__} has no Pronto Ontology object!") - return - else: - self._download_ontology_file( - localpath=self._local_ontology_path, - url=self._url, - md5=self._md5, - ) - return Ontology(handle=self._local_ontology_path) - def _download_ontology_file(self, localpath: Path, url: str, md5: str = "") -> None: """Download ontology source file to _local_ontology_path.""" if not localpath.exists(): @@ -299,7 +282,7 @@ def _set_file_paths(self) -> None: else: self._local_ontology_path = settings.dynamicdir / self._ontology_filename - def _get_default_field(self, field: BiontyField | str | None = None) -> str: + def _get_default_field(self, field: PublicOntologyField | str | None = None) -> str: """Default to name field.""" if field is None: if "name" in self._df.columns: @@ -325,7 +308,7 @@ def _load_df(self) -> pd.DataFrame: # If download is not possible, write a parquet file of the ontology df if not self._url.endswith("parquet"): if not self._local_parquet_path.exists(): - df = self.ontology.to_df( + df = self.to_pronto().to_df( source=self.source, include_id_prefixes=self.include_id_prefixes ) df.to_parquet(self._local_parquet_path) @@ -334,6 +317,22 @@ def _load_df(self) -> pd.DataFrame: df = pd.read_parquet(self._local_parquet_path) return df + def to_pronto(self): + """The Pronto Ontology object. + + See: https://pronto.readthedocs.io/en/stable/api/pronto.Ontology.html + """ + if self._local_ontology_path is None: + logger.error(f"{self.__class__.__name__} has no Pronto Ontology object!") + return + else: + self._download_ontology_file( + localpath=self._local_ontology_path, + url=self._url, + md5=self._md5, + ) + return Ontology(handle=self._local_ontology_path) + def df(self) -> pd.DataFrame: """Pandas DataFrame of the ontology. @@ -352,7 +351,7 @@ def df(self) -> pd.DataFrame: def validate( self, values: Iterable, - field: BiontyField, + field: PublicOntologyField, *, mute: bool = False, **kwargs, @@ -375,7 +374,7 @@ def validate( def inspect( self, values: Iterable, - field: BiontyField, + field: PublicOntologyField, *, mute: bool = False, **kwargs, @@ -384,7 +383,7 @@ def inspect( Args: values: Identifiers that will be checked against the field. - field: The BiontyField of the ontology to compare against. + field: The PublicOntologyField of the ontology to compare against. Examples are 'ontology_id' to map against the source ID or 'name' to map against the ontologies field names. return_df: Whether to return a Pandas DataFrame. @@ -398,9 +397,9 @@ def inspect( Examples: >>> import bionty as bt - >>> gene_bt = bt.Gene() + >>> public = bt.Gene() >>> gene_symbols = ["A1CF", "A1BG", "FANCD1", "FANCD20"] - >>> gene_bt.inspect(gene_symbols, field=gene_bt.symbol) + >>> public.inspect(gene_symbols, field=public.symbol) """ from lamin_utils._inspect import inspect @@ -419,14 +418,14 @@ def inspect( def standardize( self, values: Iterable, - field: BiontyField | str | None = None, + field: PublicOntologyField | str | None = None, *, return_field: str = None, return_mapper: bool = False, case_sensitive: bool = False, mute: bool = False, keep: Literal["first", "last", False] = "first", - synonyms_field: BiontyField | str = "synonyms", + synonyms_field: PublicOntologyField | str = "synonyms", ) -> dict[str, str] | list[str]: """Convert into standardized names. @@ -454,9 +453,9 @@ def standardize( Examples: >>> import bionty as bt - >>> gene_bt = bt.Gene() + >>> public = bt.Gene() >>> gene_symbols = ["A1CF", "A1BG", "FANCD1", "FANCD20"] - >>> standardized_symbols = gene_bt.standardize(gene_symbols, gene_bt.symbol) + >>> standardized_symbols = public.standardize(gene_symbols, public.symbol) """ from lamin_utils._standardize import standardize as map_synonyms @@ -482,8 +481,8 @@ def map_synonyms( return_mapper: bool = False, case_sensitive: bool = False, keep: Literal["first", "last", False] = "first", - synonyms_field: BiontyField | str = "synonyms", - field: BiontyField | str | None = None, + synonyms_field: PublicOntologyField | str = "synonyms", + field: PublicOntologyField | str | None = None, ) -> dict[str, str] | list[str]: """Maps input synonyms to standardized names.""" logger.warning("`map_synonyms()` is deprecated, use `.standardize()`!'") @@ -496,8 +495,8 @@ def map_synonyms( field=field, ) - def lookup(self, field: BiontyField | str | None = None) -> tuple: - """An auto-complete object for a Bionty field. + def lookup(self, field: PublicOntologyField | str | None = None) -> tuple: + """An auto-complete object for a PublicOntology field. Args: field: The field to lookup the values for. @@ -524,16 +523,16 @@ def search( self, string: str, *, - field: BiontyField | str | None = None, + field: PublicOntologyField | str | None = None, limit: int | None = None, case_sensitive: bool = False, - synonyms_field: BiontyField | str | None = "synonyms", + synonyms_field: PublicOntologyField | str | None = "synonyms", ): - """Search a given string against a Bionty field. + """Search a given string against a PublicOntology field. Args: string: The input string to match against the field values. - field: The BiontyField of the ontology the input string is matching against. + field: The PublicOntologyField of the ontology the input string is matching against. top_hit: Return all entries ranked by matching ratios. If True, only return the top match. Defaults to False. @@ -548,8 +547,8 @@ def search( Examples: >>> import bionty as bt - >>> celltype_bt = bt.CellType() - >>> celltype_bt.search("gamma delta T cell") + >>> public = bt.CellType() + >>> public.search("gamma delta T cell") """ from lamin_utils._search import search @@ -562,11 +561,13 @@ def search( synonyms_field=str(synonyms_field), ) - def diff(self, compare_to: Bionty, **kwargs) -> tuple[pd.DataFrame, pd.DataFrame]: - """Determines a diff between two Bionty objects' ontologies. + def diff( + self, compare_to: PublicOntology, **kwargs + ) -> tuple[pd.DataFrame, pd.DataFrame]: + """Determines a diff between two PublicOntology objects' ontologies. Args: - compare_to: Bionty object that must be of the same class as the calling object. + compare_to: PublicOntology object that must be of the same class as the calling object. kwargs: Are passed to pd.DataFrame.compare() Returns: @@ -576,20 +577,20 @@ def diff(self, compare_to: Bionty, **kwargs) -> tuple[pd.DataFrame, pd.DataFrame Examples: >>> import bionty as bt - >>> disease_bt_1 = bt.Disease(source="mondo", version="2023-04-04") - >>> disease_bt_2 = bt.Disease(source="mondo", version="2023-04-04") - >>> new_entries, modified_entries = disease_bt_1.diff(disease_bt_2) + >>> public_1 = bt.Disease(source="mondo", version="2023-04-04") + >>> public_2 = bt.Disease(source="mondo", version="2023-04-04") + >>> new_entries, modified_entries = public_1.diff(public_2) >>> print(new_entries.head()) >>> print(modified_entries.head()) """ if type(self) is not type(compare_to): - raise ValueError("Both Bionty objects must be of the same class.") + raise ValueError("Both PublicOntology objects must be of the same class.") if not self.source == compare_to.source: - raise ValueError("Both Bionty objects must use the same source.") + raise ValueError("Both PublicOntology objects must use the same source.") if self.version == compare_to.version: - raise ValueError("The versions of the Bionty objects must differ.") + raise ValueError("The versions of the PublicOntology objects must differ.") # The 'parents' column (among potentially others) contain Numpy array values. # We transform them to tuples to determine the diff. @@ -623,10 +624,10 @@ def _convert_arrays_to_tuples(arr): # pragma: no cover return new_entries, modified_entries -class BiontyField: - """Field of a Bionty model.""" +class PublicOntologyField: + """Field of a PublicOntology model.""" - def __init__(self, parent: Bionty, name: str): + def __init__(self, parent: PublicOntology, name: str): self.parent = parent self.name = name diff --git a/bionty/entities/_bfxpipeline.py b/bionty/entities/_bfxpipeline.py index 8f73f438..00345318 100644 --- a/bionty/entities/_bfxpipeline.py +++ b/bionty/entities/_bfxpipeline.py @@ -4,14 +4,14 @@ import pandas as pd -from bionty._bionty import Bionty +from bionty._public_ontology import PublicOntology from bionty.dev._io import s3_bionty_assets from ._shared_docstrings import _doc_params, doc_entites @_doc_params(doc_entities=doc_entites) -class BFXPipeline(Bionty): +class BFXPipeline(PublicOntology): """Bioinformatics pipelines. Args: diff --git a/bionty/entities/_biosample.py b/bionty/entities/_biosample.py index 930579e1..2145418d 100644 --- a/bionty/entities/_biosample.py +++ b/bionty/entities/_biosample.py @@ -1,12 +1,12 @@ from typing import Literal, Optional -from bionty._bionty import Bionty +from bionty._public_ontology import PublicOntology from ._shared_docstrings import _doc_params, doc_entites @_doc_params(doc_entities=doc_entites) -class BioSample(Bionty): +class BioSample(PublicOntology): """BioSample attributes. 1. NCBI BioSample Attributes diff --git a/bionty/entities/_cellline.py b/bionty/entities/_cellline.py index b5e0cc78..8e95c963 100644 --- a/bionty/entities/_cellline.py +++ b/bionty/entities/_cellline.py @@ -1,12 +1,12 @@ from typing import Literal, Optional -from bionty._bionty import Bionty +from bionty._public_ontology import PublicOntology from ._shared_docstrings import _doc_params, doc_entites @_doc_params(doc_entities=doc_entites) -class CellLine(Bionty): +class CellLine(PublicOntology): """Cell line. 1. Cell Line Ontology diff --git a/bionty/entities/_cellmarker.py b/bionty/entities/_cellmarker.py index 83b7029e..6eb6a6e1 100644 --- a/bionty/entities/_cellmarker.py +++ b/bionty/entities/_cellmarker.py @@ -1,12 +1,12 @@ from typing import Literal, Optional -from bionty._bionty import Bionty +from bionty._public_ontology import PublicOntology from ._shared_docstrings import _doc_params, doc_entites @_doc_params(doc_entities=doc_entites) -class CellMarker(Bionty): +class CellMarker(PublicOntology): """Cell markers. 1. Cell Marker Ontology diff --git a/bionty/entities/_celltype.py b/bionty/entities/_celltype.py index b8eb6319..b609e2d4 100644 --- a/bionty/entities/_celltype.py +++ b/bionty/entities/_celltype.py @@ -1,12 +1,12 @@ from typing import Literal, Optional -from bionty._bionty import Bionty +from bionty._public_ontology import PublicOntology from ._shared_docstrings import _doc_params, doc_entites @_doc_params(doc_entities=doc_entites) -class CellType(Bionty): +class CellType(PublicOntology): """Cell type ontologies. 1. Cell ontology diff --git a/bionty/entities/_developmentalstage.py b/bionty/entities/_developmentalstage.py index a2dfa24c..d2a4062b 100644 --- a/bionty/entities/_developmentalstage.py +++ b/bionty/entities/_developmentalstage.py @@ -1,12 +1,12 @@ from typing import Literal, Optional -from bionty._bionty import Bionty +from bionty._public_ontology import PublicOntology from ._shared_docstrings import _doc_params, doc_entites @_doc_params(doc_entities=doc_entites) -class DevelopmentalStage(Bionty): +class DevelopmentalStage(PublicOntology): """Developmental Stage. 1. Developmental Stage Ontology diff --git a/bionty/entities/_disease.py b/bionty/entities/_disease.py index a0780989..8cb4aad8 100644 --- a/bionty/entities/_disease.py +++ b/bionty/entities/_disease.py @@ -1,12 +1,12 @@ from typing import Literal, Optional -from bionty._bionty import Bionty +from bionty._public_ontology import PublicOntology from ._shared_docstrings import _doc_params, doc_entites @_doc_params(doc_entities=doc_entites) -class Disease(Bionty): +class Disease(PublicOntology): """Disease ontologies. 1. Mondo diff --git a/bionty/entities/_drug.py b/bionty/entities/_drug.py index 178367b6..7249d7e7 100644 --- a/bionty/entities/_drug.py +++ b/bionty/entities/_drug.py @@ -1,12 +1,12 @@ from typing import Literal, Optional -from bionty._bionty import Bionty +from bionty._public_ontology import PublicOntology from ._shared_docstrings import _doc_params, doc_entites @_doc_params(doc_entities=doc_entites) -class Drug(Bionty): +class Drug(PublicOntology): """Drug ontologies. 1. DRON diff --git a/bionty/entities/_ethnicity.py b/bionty/entities/_ethnicity.py index eb3caa74..e5eb06f9 100644 --- a/bionty/entities/_ethnicity.py +++ b/bionty/entities/_ethnicity.py @@ -1,12 +1,12 @@ from typing import Literal, Optional -from bionty._bionty import Bionty +from bionty._public_ontology import PublicOntology from ._shared_docstrings import _doc_params, doc_entites @_doc_params(doc_entities=doc_entites) -class Ethnicity(Bionty): +class Ethnicity(PublicOntology): """Ethnicity. 1. Human Ancestry Ontology diff --git a/bionty/entities/_experimentalfactor.py b/bionty/entities/_experimentalfactor.py index 172973e6..a1c3d35c 100644 --- a/bionty/entities/_experimentalfactor.py +++ b/bionty/entities/_experimentalfactor.py @@ -4,13 +4,13 @@ import pandas as pd from lamin_utils import logger -from bionty._bionty import Bionty from bionty._ontology import Ontology +from bionty._public_ontology import PublicOntology from bionty.entities._shared_docstrings import _doc_params, organism_removed @_doc_params(doc_entities=organism_removed) -class ExperimentalFactor(Bionty): +class ExperimentalFactor(PublicOntology): """Experimental Factor. 1. Experimental Factor Ontology @@ -20,7 +20,7 @@ class ExperimentalFactor(Bionty): Args: {doc_entities} - Also see: `bionty.Bionty `__ + Also see: `bionty.PublicOntology `__ """ def __init__( @@ -38,8 +38,7 @@ def __init__( **kwargs, ) - @cached_property - def ontology(self) -> Ontology: # type:ignore + def to_pronto(self) -> Ontology: # type:ignore """The Pronto Ontology object. See: https://pronto.readthedocs.io/en/stable/api/pronto.Ontology.html diff --git a/bionty/entities/_gene.py b/bionty/entities/_gene.py index cf91868b..a722ebcc 100644 --- a/bionty/entities/_gene.py +++ b/bionty/entities/_gene.py @@ -3,7 +3,7 @@ import pandas as pd from lamin_utils import logger -from bionty._bionty import Bionty +from bionty._public_ontology import PublicOntology from bionty._settings import settings from bionty.dev._io import s3_bionty_assets @@ -12,7 +12,7 @@ @_doc_params(doc_entities=doc_entites) -class Gene(Bionty): +class Gene(PublicOntology): """Gene. 1. Ensembl diff --git a/bionty/entities/_organism.py b/bionty/entities/_organism.py index f3532434..d4c7d5ba 100644 --- a/bionty/entities/_organism.py +++ b/bionty/entities/_organism.py @@ -2,13 +2,13 @@ import pandas as pd -from bionty._bionty import Bionty +from bionty._public_ontology import PublicOntology from bionty.dev._io import s3_bionty_assets from bionty.entities._shared_docstrings import _doc_params, organism_removed @_doc_params(doc_entities=organism_removed) -class Organism(Bionty): +class Organism(PublicOntology): """Organism. 1. Organism ontology diff --git a/bionty/entities/_pathway.py b/bionty/entities/_pathway.py index 302b8a57..7d090f45 100644 --- a/bionty/entities/_pathway.py +++ b/bionty/entities/_pathway.py @@ -1,12 +1,12 @@ from typing import Literal, Optional -from bionty._bionty import Bionty +from bionty._public_ontology import PublicOntology from ._shared_docstrings import _doc_params, doc_entites @_doc_params(doc_entities=doc_entites) -class Pathway(Bionty): +class Pathway(PublicOntology): """Pathway. 1. Gene Ontology diff --git a/bionty/entities/_phenotype.py b/bionty/entities/_phenotype.py index 5ff04fb5..ba7d502a 100644 --- a/bionty/entities/_phenotype.py +++ b/bionty/entities/_phenotype.py @@ -1,12 +1,12 @@ from typing import Literal, Optional -from bionty._bionty import Bionty +from bionty._public_ontology import PublicOntology from ._shared_docstrings import _doc_params, doc_entites @_doc_params(doc_entities=doc_entites) -class Phenotype(Bionty): +class Phenotype(PublicOntology): """Phenotype. 1. Human Phenotype Ontology diff --git a/bionty/entities/_protein.py b/bionty/entities/_protein.py index b4a25fb0..e8b91518 100644 --- a/bionty/entities/_protein.py +++ b/bionty/entities/_protein.py @@ -1,12 +1,12 @@ from typing import Literal, Optional -from bionty._bionty import Bionty +from bionty._public_ontology import PublicOntology from ._shared_docstrings import _doc_params, doc_entites @_doc_params(doc_entities=doc_entites) -class Protein(Bionty): +class Protein(PublicOntology): """Protein. 1. Uniprot diff --git a/bionty/entities/_tissue.py b/bionty/entities/_tissue.py index 66ed8867..58a40958 100644 --- a/bionty/entities/_tissue.py +++ b/bionty/entities/_tissue.py @@ -1,12 +1,12 @@ from typing import Literal, Optional -from bionty._bionty import Bionty +from bionty._public_ontology import PublicOntology from ._shared_docstrings import _doc_params, doc_entites @_doc_params(doc_entities=doc_entites) -class Tissue(Bionty): +class Tissue(PublicOntology): """Tissue. 1. Uberon diff --git a/docs/developer/updating_source.md b/docs/developer/updating_source.md index a2bd66c3..877d3afb 100644 --- a/docs/developer/updating_source.md +++ b/docs/developer/updating_source.md @@ -9,8 +9,8 @@ Here we document the steps to required steps to take when updating the `source.y ## Steps 1. Adapt the `sources.yaml` file as required. - Add a new top level entry if you've added a new Bionty entity or add a new source/version while confirming to the yaml structure. -2. If you've added a new source or version, use Bionty's `diff` function to determine whether any terms were deleted. + Add a new top level entry if you've added a new PublicOntology entity or add a new source/version while confirming to the yaml structure. +2. If you've added a new source or version, use PublicOntology's `diff` function to determine whether any terms were deleted. Consult the rest of the team if so. Ensure that your pull request contains a summary of the diff. 3. Specify the new entity or the latest version in your local `.current_source.yaml`. Run the tests. diff --git a/docs/guide/concepts.md b/docs/guide/concepts.md index aa92fc99..82ac41cf 100644 --- a/docs/guide/concepts.md +++ b/docs/guide/concepts.md @@ -9,11 +9,11 @@ Let's define a biological entity (e.g., `Organism`) to be a variable that takes 3. Terms in the vocabularies have different granularity, and are often hierarchical. 4. Typically, vocabularies are based on a given version of a public reference ontology, but contain additional β€œcustom” terms corresponding to "new knowledge" absent from reference ontologies. For example, new cell types or states, new synthetic genes, etc. -## Bionty object +## PublicOntology object -The central class {class}`~bionty.Bionty` models 3 of the 4 above-mentioned properties of biological entities: +The central class {class}`~bionty.PublicOntology` models 3 of the 4 above-mentioned properties of biological entities: -1. Every `Bionty` object comes with a table of terms in which each column corresponds to an alternative vocabulary for the entity. +1. Every `PublicOntology` object comes with a table of terms in which each column corresponds to an alternative vocabulary for the entity. 2. Every table is versioned & has a tracked reference source (typically, a public ontology). 3. Most tables have a children column that allows mapping hierarchies. 4. Adding user-defined records amounts to managing bioregistries, and we recommend using Bionty's SQL extension ([lnschema_bionty](https://lamin.ai/docs/lnschema-bionty)). diff --git a/docs/guide/extend.md b/docs/guide/extend.md index dcdcc69d..44ec646c 100644 --- a/docs/guide/extend.md +++ b/docs/guide/extend.md @@ -67,9 +67,9 @@ If desired, the new ontology can be set as default. See {doc}`./config` for more ## New entities -Adding new entities to Bionty requires subclassing the {class}`bionty.Bionty` and modifying the `sources_local.yaml` file. +Adding new entities to Bionty requires subclassing the {class}`bionty.PublicOntology` and modifying the `sources_local.yaml` file. -The {class}`bionty.Bionty` requires several properties to be defined: +The {class}`bionty.PublicOntology` requires several properties to be defined: ```python organism: str, @@ -77,15 +77,15 @@ source: str, version: str, ``` -These are automatically populated by either the currently used Bionty sources (see {doc}`./config`) or explicitly passed as parameters when initializing an Entity. +These are automatically populated by either the currently used PublicOntology sources (see {doc}`./config`) or explicitly passed as parameters when initializing an Entity. -Hence, a new Bionty class `MyEntity` would be defined as: +Hence, a new PublicOntology class `MyEntity` would be defined as: ```python -from bionty import Bionty +from bionty import PublicOntology -class MyEntity(Bionty): +class MyEntity(PublicOntology): """MyEntity.""" def __init__( @@ -119,4 +119,4 @@ MyEntity: md5: "md5 if available or leave out this row" ``` -Great! Now we've added a new Bionty class, which can be used with all Bionty functions! πŸŽ‰ +Great! Now we've added a new PublicOntology class, which can be used with all PublicOntology functions! πŸŽ‰ diff --git a/docs/guide/ontology.ipynb b/docs/guide/ontology.ipynb index de3f0cf0..567871de 100644 --- a/docs/guide/ontology.ipynb +++ b/docs/guide/ontology.ipynb @@ -7,7 +7,7 @@ "source": [ "# Get Pronto ontology objects\n", "\n", - "When it comes to ontology defined vocabularies, such as cell type, tissue, disease, and phenotype, [Pronto Ontology object](https://pronto.readthedocs.io/en/stable/api/pronto.Ontology.html) can be accessed via `{entity}.ontology`\n", + "When it comes to ontology defined vocabularies, such as cell type, tissue, disease, and phenotype, [Pronto Ontology object](https://pronto.readthedocs.io/en/stable/api/pronto.Ontology.html) can be accessed via `{entity}.to_pronto()`\n", "\n" ] }, @@ -38,9 +38,9 @@ "metadata": {}, "outputs": [], "source": [ - "disease_bt = bt.Disease()\n", + "public = bt.Disease()\n", "\n", - "disease_bt" + "public" ] }, { @@ -50,7 +50,7 @@ "metadata": {}, "outputs": [], "source": [ - "disease_bt.df().head()" + "public.df().head()" ] }, { @@ -60,7 +60,7 @@ "metadata": {}, "outputs": [], "source": [ - "lookup = disease_bt.lookup()" + "lookup = public.lookup()" ] }, { @@ -126,7 +126,7 @@ "id": "66ba4cba", "metadata": {}, "source": [ - "## `.pronto`: Pronto Ontology\n", + "## `.to_pronto()`: Pronto Ontology\n", "\n", "More hierarchical information can be accessed from the Pronto Ontology object:" ] @@ -138,7 +138,7 @@ "metadata": {}, "outputs": [], "source": [ - "pronto_object = disease_bt.ontology" + "pronto_object = public.to_pronto()" ] }, { @@ -199,7 +199,7 @@ "metadata": {}, "outputs": [], "source": [ - "readout_bt = bt.ExperimentalFactor()" + "public = bt.ExperimentalFactor()" ] }, { @@ -209,7 +209,7 @@ "metadata": {}, "outputs": [], "source": [ - "readout_bt.df().head()" + "public.df().head()" ] }, { @@ -219,7 +219,7 @@ "metadata": {}, "outputs": [], "source": [ - "lookup = readout_bt.lookup()" + "lookup = public.lookup()" ] }, { @@ -301,7 +301,7 @@ "id": "afa16569-5f98-41ec-981c-db1b81fbbe91", "metadata": {}, "source": [ - "Bionty provides {func}`bionty.Bionty.diff` to determine a diff between two ontology versions." + "Bionty provides {func}`bionty.PublicOntology.diff` to determine a diff between two ontology versions." ] }, { @@ -313,10 +313,10 @@ }, "outputs": [], "source": [ - "disease_bt_old = bt.Disease(source=\"mondo\", version=\"2023-04-04\")\n", - "disease_bt_new = bt.Disease(source=\"mondo\", version=\"2023-02-06\")\n", + "disease_old = bt.Disease(source=\"mondo\", version=\"2023-04-04\")\n", + "disease_new = bt.Disease(source=\"mondo\", version=\"2023-02-06\")\n", "\n", - "new_entries, modified_entries = disease_bt_old.diff(disease_bt_new)" + "new_entries, modified_entries = disease_old.diff(disease_new)" ] }, { @@ -342,14 +342,6 @@ "source": [ "modified_entries.head()" ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "8ff90ce1-d427-43f6-9c22-868878d59a35", - "metadata": {}, - "outputs": [], - "source": [] } ], "metadata": { diff --git a/docs/guide/sources.ipynb b/docs/guide/sources.ipynb index 07469b6a..3772d575 100644 --- a/docs/guide/sources.ipynb +++ b/docs/guide/sources.ipynb @@ -57,7 +57,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "When initializing a Bionty class, default source is used:" + "When initializing a PublicOntology class, default source is used:" ] }, { @@ -66,9 +66,9 @@ "metadata": {}, "outputs": [], "source": [ - "celltype_bt = bt.CellType()\n", + "public = bt.CellType()\n", "\n", - "celltype_bt" + "public" ] }, { @@ -84,9 +84,9 @@ "metadata": {}, "outputs": [], "source": [ - "celltype_bt = bt.CellType(source=\"cl\", version=\"2022-08-16\")\n", + "public = bt.CellType(source=\"cl\", version=\"2022-08-16\")\n", "\n", - "celltype_bt" + "public" ] }, { @@ -102,9 +102,9 @@ "metadata": {}, "outputs": [], "source": [ - "gene_bt = bt.Gene()\n", + "public = bt.Gene()\n", "\n", - "gene_bt" + "public" ] }, { @@ -113,9 +113,9 @@ "metadata": {}, "outputs": [], "source": [ - "gene_bt = bt.Gene(organism=\"mouse\")\n", + "public = bt.Gene(organism=\"mouse\")\n", "\n", - "gene_bt" + "public" ] }, { diff --git a/scripts/upload_new_ontologies.py b/scripts/upload_new_ontologies.py index 4aebcf85..db08e5fa 100644 --- a/scripts/upload_new_ontologies.py +++ b/scripts/upload_new_ontologies.py @@ -1,7 +1,7 @@ def _upload_ontology_artifacts(instance: str, lamindb_user: str, lamindb_password: str): import bionty as bt import lamindb as ln - from bionty._bionty import encode_filenames + from bionty._public_ontology import encode_filenames from bionty._settings import settings ln.setup.login(lamindb_user, password=lamindb_password) @@ -32,9 +32,9 @@ def _upload_ontology_artifacts(instance: str, lamindb_user: str, lamindb_passwor if not queryset.filter(key=ontology_filename).exists(): local_ontology_filename = settings.dynamicdir / ontology_filename if not local_ontology_filename.exists(): - getattr(bt, entity)( # noqa: B018 + getattr(bt, entity)( organism=row.organism, source=row.source, version=row.version - ).ontology + ).to_pronto() file = ln.File(local_ontology_filename, key=ontology_filename) files.append(file) if len(files) > 0: diff --git a/tests/entities/test_experimentalfactor.py b/tests/entities/test_experimentalfactor.py index f0c241c9..0e01150a 100644 --- a/tests/entities/test_experimentalfactor.py +++ b/tests/entities/test_experimentalfactor.py @@ -25,7 +25,7 @@ def test_efo_experimental_factor_inspect_ontology_id(): def test_parse_efo_term(): ro = bt.ExperimentalFactor(source="efo") - ontology = ro.ontology + ontology = ro.to_pronto() res = _parse_efo_term(term_id="EFO:0008913", ontology=ontology) assert res == { diff --git a/tests/test_bionty.py b/tests/test_bionty.py index b23b4509..c03129a1 100644 --- a/tests/test_bionty.py +++ b/tests/test_bionty.py @@ -33,7 +33,7 @@ def test_diff_successful(): def test_diff_value_errors(): - # Two different Bionty object types + # Two different PublicOntology object types disease_bt = bt.Disease() phenotype_bt = bt.Phenotype() with pytest.raises(ValueError):