Skip to content
This repository has been archived by the owner on Aug 7, 2024. It is now read-only.

Commit

Permalink
πŸ”– Stage 0.26 (#479)
Browse files Browse the repository at this point in the history
* ✨ Add diff (#468)

* ✨ Use input prompt (#467)

* πŸ“ Update release notes

* πŸ“ Build docs standalone again (#465)

* πŸ“ Build docs standalone again

* πŸ“ Fix docstring

* πŸ“ Consolidate readme

* πŸ‘· Strict erroring docs build

* πŸ’š Fix

* πŸ“ Prettify

* πŸ“ Prettier

* πŸ“ Update release notes

* 🚸 Case insensitive search (#466)

* πŸ“ Update release notes

* πŸ”– Release 0.24.2

* ✨ Use input prompt

Signed-off-by: zethson <[email protected]>

---------

Signed-off-by: zethson <[email protected]>
Co-authored-by: github-actions <[email protected]>
Co-authored-by: Alex Wolf <[email protected]>
Co-authored-by: Sunny Sun <[email protected]>

* ✨ Add diff implementation

Signed-off-by: zethson <[email protected]>

* βœ… Add more tests

Signed-off-by: zethson <[email protected]>

* ✨ Add existing entries diff

Signed-off-by: zethson <[email protected]>

* ✨ Add to repr

Signed-off-by: zethson <[email protected]>

* Add to guide

Signed-off-by: zethson <[email protected]>

---------

Signed-off-by: zethson <[email protected]>
Co-authored-by: github-actions <[email protected]>
Co-authored-by: Alex Wolf <[email protected]>
Co-authored-by: Sunny Sun <[email protected]>

* 🍱 Include obsolete terms in .df (#481)

* 🚚 Fix tests (#482)

---------

Signed-off-by: zethson <[email protected]>
Co-authored-by: Lukas Heumos <[email protected]>
Co-authored-by: github-actions <[email protected]>
Co-authored-by: Alex Wolf <[email protected]>
  • Loading branch information
4 people authored Jul 23, 2023
1 parent 77232a9 commit 1e5e8c3
Show file tree
Hide file tree
Showing 8 changed files with 167 additions and 22 deletions.
2 changes: 1 addition & 1 deletion bionty/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@
__version__ = "0.25.2" # denote release candidate for 0.1.0 with 0.1rc1

# prints warning of python versions
from lamin_logger import py_version_warning
from lamin_utils import py_version_warning

py_version_warning("3.8", "3.10")

Expand Down
73 changes: 68 additions & 5 deletions bionty/_bionty.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,15 @@
from __future__ import annotations

import logging
import os
from functools import cached_property
from pathlib import Path
from typing import Dict, Iterable, List, Literal, Optional, Set, Tuple, Union

import numpy as np
import pandas as pd
from lamin_logger import logger
from lamin_logger._lookup import Lookup
from lamin_utils import logger
from lamin_utils._lookup import Lookup

from bionty._md5 import verify_md5

Expand Down Expand Up @@ -103,6 +105,7 @@ def __repr__(self) -> str:
f"🎯 {self.__class__.__name__}.search(): free text search of terms\n"
f"🧐 {self.__class__.__name__}.inspect(): check if identifiers are mappable\n"
f"πŸ‘½ {self.__class__.__name__}.map_synonyms(): map synonyms to standardized names\n"
f"βš– {self.__class__.__name__}.diff(): difference between two versions\n"
f"πŸ”— {self.__class__.__name__}.ontology: Pronto.Ontology object"
)
# fmt: on
Expand Down Expand Up @@ -374,7 +377,7 @@ def inspect(
>>> gene_symbols = ["A1CF", "A1BG", "FANCD1", "FANCD20"]
>>> gene_bt.inspect(gene_symbols, field=gene_bt.symbol)
"""
from lamin_logger._inspect import inspect
from lamin_utils._inspect import inspect

return inspect(
df=self._df,
Expand Down Expand Up @@ -426,7 +429,7 @@ def map_synonyms(
>>> gene_symbols = ["A1CF", "A1BG", "FANCD1", "FANCD20"]
>>> standardized_symbols = gene_bt.map_synonyms(gene_symbols, gene_bt.symbol)
"""
from lamin_logger._map_synonyms import map_synonyms
from lamin_utils._map_synonyms import map_synonyms

return map_synonyms(
df=self._df,
Expand Down Expand Up @@ -489,7 +492,7 @@ def search(
>>> celltype_bt = bt.CellType()
>>> celltype_bt.search("gamma delta T cell")
"""
from lamin_logger._search import search
from lamin_utils._search import search

return search(
df=self._df,
Expand All @@ -500,6 +503,66 @@ def search(
synonyms_field=str(synonyms_field),
)

def diff(self, compare_to: Bionty, **kwargs) -> Tuple[pd.DataFrame, pd.DataFrame]:
"""Determines a diff between two Bionty objects' ontologies.
Args:
compare_to: Bionty object that must be of the same class as the calling object.
kwargs: Are passed to pd.DataFrame.compare()
Returns:
A tuple of two DataFrames:
1. New entries.
2. A pd.DataFrame.compare result which denotes all changes in `self` and `other`.
Examples:
>>> import bionty as bt
>>> disease_bt_1 = bt.Disease(source="mondo", version="2023-04-04")
>>> disease_bt_2 = bt.Disease(source="mondo", version="2023-04-04")
>>> new_entries, modified_entries = disease_bt_1.diff(disease_bt_2)
>>> print(new_entries.head())
>>> print(modified_entries.head())
"""
if not type(self) is type(compare_to):
raise ValueError("Both Bionty objects must be of the same class.")

if not self.source == compare_to.source:
raise ValueError("Both Bionty objects must use the same source.")

if self.version == compare_to.version:
raise ValueError("The versions of the Bionty objects must differ.")

# The 'parents' column (among potentially others) contain Numpy array values.
# We transform them to tuples to determine the diff.
def _convert_arrays_to_tuples(arr): # pragma: no cover
if isinstance(arr, np.ndarray):
return tuple(arr)
else:
return arr

for bt_obj in [self, compare_to]:
for column in bt_obj.df().columns:
if any(isinstance(val, np.ndarray) for val in bt_obj.df()[column]):
bt_obj._df[column] = bt_obj.df()[column].apply(
_convert_arrays_to_tuples
)

# New entries
new_entries = pd.concat([self.df(), compare_to.df()]).drop_duplicates(
keep=False
)

# Changes in existing entries
common_index = self.df().index.intersection(compare_to.df().index)
self_df_common = self.df().loc[common_index]
compare_to_df_common = compare_to.df().loc[common_index]
modified_entries = self_df_common.compare(compare_to_df_common, **kwargs)

logging.info(f"{len(new_entries)} new entries were added.")
logging.info(f"{len(modified_entries)} entries were modified.")

return new_entries, modified_entries


class BiontyField:
"""Field of a Bionty model."""
Expand Down
4 changes: 2 additions & 2 deletions bionty/_ontology.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,8 +67,8 @@ def filter_include_id_prefixes(terms: pronto.ontology._OntologyTerms):

df_values = []
for term in filtered_terms:
# skip terms without id or name and obsolete terms
if (not term.id) or (not term.name) or term.obsolete:
# skip terms without id or name
if (not term.id) or (not term.name):
continue

# term definition text
Expand Down
3 changes: 1 addition & 2 deletions bionty/dev/_handle_sources.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
from typing import Dict, List, Literal, Union

import pandas as pd
from lamin_logger import logger
from lamin_utils import logger

from bionty._settings import settings
from bionty.dev._io import load_yaml, write_yaml
Expand Down Expand Up @@ -90,7 +90,6 @@ def parse_sources_yaml(filepath: Union[str, Path] = PUBLIC_SOURCES) -> pd.DataFr
Args:
filepath: Path to the versions yaml file.
return_df: Whether to return a Pandas DataFrame
Returns:
- entity
Expand Down
2 changes: 1 addition & 1 deletion bionty/entities/_experimentalfactor.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
from typing import Dict, Literal, Optional

import pandas as pd
from lamin_logger import logger
from lamin_utils import logger

from bionty.entities._shared_docstrings import _doc_params, species_removed

Expand Down
74 changes: 64 additions & 10 deletions docs/guide/ontology.ipynb
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
{
"cells": [
{
"attachments": {},
"cell_type": "markdown",
"id": "053a464e-df2f-4386-b537-4f0a4fb46408",
"metadata": {},
Expand All @@ -23,7 +22,6 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"id": "1a61c7de",
"metadata": {},
Expand Down Expand Up @@ -88,7 +86,6 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"id": "4cb4718b",
"metadata": {},
Expand All @@ -107,7 +104,6 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"id": "58cd007f",
"metadata": {},
Expand All @@ -126,7 +122,6 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"id": "66ba4cba",
"metadata": {},
Expand Down Expand Up @@ -179,7 +174,6 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"id": "be4cab83",
"metadata": {},
Expand All @@ -188,7 +182,6 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"id": "879230a6",
"metadata": {},
Expand Down Expand Up @@ -230,7 +223,6 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"id": "689b1622",
"metadata": {},
Expand Down Expand Up @@ -279,7 +271,6 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"id": "9e501882",
"metadata": {},
Expand All @@ -296,6 +287,69 @@
"source": [
"lookup.tumor_size"
]
},
{
"cell_type": "markdown",
"id": "76e2e2f9-19c0-46e2-97f6-4487428ed1c3",
"metadata": {},
"source": [
"## Comparing ontology versions"
]
},
{
"cell_type": "markdown",
"id": "afa16569-5f98-41ec-981c-db1b81fbbe91",
"metadata": {},
"source": [
"Bionty provides {func}`bionty.Bionty.diff` to determine a diff between two ontology versions."
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "f2b7ba3c-6f25-49d2-9dc2-589fee0f0d53",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"disease_bt_old = bt.Disease(source=\"mondo\", version=\"2023-04-04\")\n",
"disease_bt_new = bt.Disease(source=\"mondo\", version=\"2023-02-06\")\n",
"\n",
"new_entries, modified_entries = disease_bt_old.diff(disease_bt_new)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "1f7cbe33-b7f3-4c76-b277-3b849ac2ed98",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"new_entries.head()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "05dfa5e3-a50c-4488-bc86-929fd198c30d",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"modified_entries.head()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "8ff90ce1-d427-43f6-9c22-868878d59a35",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
Expand All @@ -317,7 +371,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.16"
"version": "3.10.10"
},
"vscode": {
"interpreter": {
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ classifiers = [
]
dependencies = [
"pronto>=2.5.4",
"lamin_logger>=0.8.0", # don't pin here as it's pinned in lamindb
"lamin_utils>=0.9.1", # don't pin here as it's pinned in lamindb
"pyyaml",
"pandas",
"pyarrow",
Expand Down
29 changes: 29 additions & 0 deletions tests/test_bionty.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,3 +20,32 @@ def test_reset_sources(monkeypatch):
CURRENT_SOURCES.unlink()
LOCAL_SOURCES.unlink()
bt.reset_sources()


def test_diff_successful():
disease_bt_1 = bt.Disease(source="mondo", version="2023-04-04")
disease_bt_2 = bt.Disease(source="mondo", version="2023-02-06")

new_entries, modified_entries = disease_bt_1.diff(disease_bt_2)
assert len(new_entries) == 819
assert len(modified_entries) == 249


def test_diff_value_errors():
# Two different Bionty object types
disease_bt = bt.Disease()
phenotype_bt = bt.Phenotype()
with pytest.raises(ValueError):
disease_bt.diff(phenotype_bt)

# Different sources
disease_bt_1 = bt.Disease(source="mondo")
disease_bt_2 = bt.Disease(source="doid")
with pytest.raises(ValueError):
disease_bt_1.diff(disease_bt_2)

# Same version
disease_bt_3 = bt.Disease(source="mondo", version="2023-04-04")
disease_bt_4 = bt.Disease(source="mondo", version="2023-04-04")
with pytest.raises(ValueError):
disease_bt_3.diff(disease_bt_4)

0 comments on commit 1e5e8c3

Please sign in to comment.