Skip to content

Commit

Permalink
Update dependency versions (#294)
Browse files Browse the repository at this point in the history
* remove numpy and h5py version fix

* introduced near exact assertions
  • Loading branch information
mumichae authored Mar 21, 2022
1 parent b31d53b commit c4ce6d1
Show file tree
Hide file tree
Showing 12 changed files with 38 additions and 39 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ testing.h5ad
.vscode/settings.json
data
.ipynb_checkpoints
build/
*.egg-info
*dist/
*cache*
Expand Down
17 changes: 6 additions & 11 deletions scib/metrics/lisi.py
Original file line number Diff line number Diff line change
Expand Up @@ -571,15 +571,10 @@ def compute_simpson_index_graph(
return lists

# read distances and indices with nan value handling
indices = pd.read_csv(input_path + '_indices_' + str(chunk_no) + '.txt',
header=None, sep='\n')
indices = indices[0].str.split(',', expand=True)
indices.set_index(keys=0, drop=True, inplace=True) # move cell index to DF index
indices = pd.read_table(input_path + '_indices_' + str(chunk_no) + '.txt', index_col=0, header=None, sep=',')
indices = indices.T
distances = pd.read_csv(input_path + '_distances_' + str(chunk_no) + '.txt',
header=None, sep='\n')
distances = distances[0].str.split(',', expand=True)
distances.set_index(keys=0, drop=True, inplace=True) # move cell index to DF index

distances = pd.read_table(input_path + '_distances_' + str(chunk_no) + '.txt', index_col=0, header=None, sep=',')
distances = distances.T

# get cell ids
Expand All @@ -592,18 +587,18 @@ def compute_simpson_index_graph(
for i in enumerate(chunk_ids):
# get neighbors and distances
# read line i from indices matrix
get_col = indices[str(i[1])]
get_col = indices[i[1]]

if get_col.isnull().sum() > 0:
# not enough neighbors
print(str(i[1]) + " has not enough neighbors.")
print(i[1] + " has not enough neighbors.")
simpson[i[0]] = 1 # np.nan #set nan for testing
continue
else:
knn_idx = get_col.astype('int') - 1 # get 0-based indexing

# read line i from distances matrix
D_act = distances[str(i[1])].values.astype('float')
D_act = distances[i[1]].values.astype('float')

# start lisi estimation
beta = 1
Expand Down
4 changes: 2 additions & 2 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -44,14 +44,14 @@ packages =
scib.metrics
python_requires = >=3.7
install_requires =
numpy==1.18.1
numpy
pandas
seaborn
matplotlib
numba
scanpy>=1.5
anndata>=0.7.2
h5py<3
h5py
rpy2>=3
anndata2ri
scipy
Expand Down
5 changes: 5 additions & 0 deletions tests/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import pandas as pd
import os
import warnings

warnings.filterwarnings('ignore')
import subprocess
import logging
Expand All @@ -13,6 +14,10 @@
LOGGER = logging.getLogger(__name__)


def assert_near_exact(x, y, diff=1e-5):
assert abs(x - y) <= diff


def create_if_missing(dir):
if not os.path.isdir(dir):
os.mkdir(dir)
Expand Down
10 changes: 5 additions & 5 deletions tests/metrics/test_beyond_label_metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,11 @@ def test_cell_cycle(adata_paul15):
adata_int,
batch_key='batch',
organism='mouse',
#recompute_cc=True,
# recompute_cc=True,
verbose=True
)
LOGGER.info(f"score: {score}")
assert score == 1
assert_near_exact(score, 1, diff=1e-12)


def test_cell_cycle_all(adata_paul15):
Expand All @@ -28,14 +28,14 @@ def test_cell_cycle_all(adata_paul15):
adata_int,
batch_key='batch',
organism='mouse',
#recompute_cc=True,
# recompute_cc=True,
agg_func=None,
verbose=True
)
LOGGER.info(f"\nscore: {scores_df}")
assert isinstance(scores_df, pd.DataFrame)
for i in scores_df['score']:
assert i == 1
assert_near_exact(i, 1, diff=1e-12)


def test_hvg_overlap(adata):
Expand All @@ -47,4 +47,4 @@ def test_hvg_overlap(adata):
n_hvg=500
)
LOGGER.info(f"score: {score}")
assert score == 1
assert_near_exact(score, 1, diff=1e-12)
6 changes: 3 additions & 3 deletions tests/metrics/test_clisi.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ def test_clisi_full(adata):
)

LOGGER.info(f"score: {score}")
assert 0.974 <= score <= 0.975
assert_near_exact(score, 0.975, diff=1e-2)


def test_clisi_embed(adata_neighbors):
Expand All @@ -24,7 +24,7 @@ def test_clisi_embed(adata_neighbors):
type_='embed'
)
LOGGER.info(f"score: {score}")
assert 0.981 <= score <= 0.982
assert_near_exact(score, 0.982, diff=1e-2)


def test_clisi_knn(adata_neighbors):
Expand All @@ -36,4 +36,4 @@ def test_clisi_knn(adata_neighbors):
type_='graph'
)
LOGGER.info(f"score: {score}")
assert 0.981 <= score <= 0.982
assert_near_exact(score, 0.982, diff=1e-2)
9 changes: 4 additions & 5 deletions tests/metrics/test_cluster_metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,16 +3,15 @@

def test_nmi_trivial(adata):
score = scib.me.nmi(adata, 'celltype', 'celltype')
assert score == 1
assert_near_exact(score, 1, diff=1e-12)


def test_ari_trivial(adata):
score = scib.me.ari(adata, 'celltype', 'celltype')
assert score == 1
assert_near_exact(score, 1, diff=1e-12)


def test_nmi(adata_neighbors):

_, _, nmi_all = scib.cl.opt_louvain(
adata_neighbors,
label_key='celltype',
Expand All @@ -31,7 +30,7 @@ def test_nmi(adata_neighbors):
def test_ari(adata_clustered):
score = scib.me.ari(adata_clustered, group1='cluster', group2='celltype')
LOGGER.info(f"score: {score}")
assert 0 <= score <= 1
assert_near_exact(score, 0.7614422905830917, diff=1e-2)


def test_isolated_labels_F1(adata_neighbors):
Expand All @@ -44,4 +43,4 @@ def test_isolated_labels_F1(adata_neighbors):
verbose=True
)
LOGGER.info(f"score: {score}")
assert 0 <= score <= 1
assert_near_exact(score, 0.5581395348837209, diff=1e-12)
2 changes: 1 addition & 1 deletion tests/metrics/test_graph_connectivity.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,4 @@
def test_graph_connectivity(adata_neighbors):
score = scib.me.graph_connectivity(adata_neighbors, label_key='celltype')
LOGGER.info(f"score: {score}")
assert score == 0.9670013350457753
assert_near_exact(score, 0.9670013350457753, diff=1e-3)
6 changes: 3 additions & 3 deletions tests/metrics/test_ilisi.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ def test_ilisi_full(adata):
)

LOGGER.info(f"score: {score}")
assert 0.234 <= score <= 0.235
assert_near_exact(score, 0.235, diff=1e-2)


def test_ilisi_embed(adata_neighbors):
Expand All @@ -22,7 +22,7 @@ def test_ilisi_embed(adata_neighbors):
type_='embed'
)
LOGGER.info(f"score: {score}")
assert 0.237 <= score <= 0.238
assert_near_exact(score, 0.238, diff=1e-2)


def test_ilisi_knn(adata_neighbors):
Expand All @@ -33,4 +33,4 @@ def test_ilisi_knn(adata_neighbors):
type_='graph'
)
LOGGER.info(f"score: {score}")
assert 0.237 <= score <= 0.238
assert_near_exact(score, 0.238, diff=1e-2)
6 changes: 3 additions & 3 deletions tests/metrics/test_pcr_metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,13 +14,13 @@ def test_pcr_batch(adata):
scale=True
)
LOGGER.info(f"no PCA precomputed: {score}")
assert 0 <= score < 1e-6
assert_near_exact(score, 0, diff=1e-6)


def test_pcr_batch_precomputed(adata_pca):
score = scib.me.pcr_comparison(adata_pca, adata_pca, covariate='batch', scale=True)
LOGGER.info(f"precomputed PCA: {score}")
assert 0 <= score < 1e-6
assert_near_exact(score, 0, diff=1e-6)


def test_pcr_batch_embedding(adata):
Expand All @@ -34,4 +34,4 @@ def test_pcr_batch_embedding(adata):
scale=True
)
LOGGER.info(f"using embedding: {score}")
assert 0 <= score < 1e-6
assert_near_exact(score, 0, diff=1e-6)
7 changes: 3 additions & 4 deletions tests/metrics/test_silhouette_metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ def test_silhouette(adata_pca):
scale=True
)
LOGGER.info(f"score: {score}")
assert 0 <= score <= 1
assert_near_exact(score, 0.5626532882452011, diff=1e-3)


def test_silhouette_batch(adata_pca):
Expand All @@ -22,7 +22,7 @@ def test_silhouette_batch(adata_pca):
verbose=False
)
LOGGER.info(f"score: {score}")
assert 0 <= score <= 1
assert_near_exact(score, 0.9014384369842835, diff=1e-3)


def test_isolated_labels_silhouette(adata_pca):
Expand All @@ -35,5 +35,4 @@ def test_isolated_labels_silhouette(adata_pca):
verbose=True
)
LOGGER.info(f"score: {score}")
assert score <= 1
assert score >= 0
assert_near_exact(score, 0.6101431176066399, diff=1e-3)
4 changes: 2 additions & 2 deletions tests/metrics/test_trajectory.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ def test_trajectory(adata_neighbors):
pseudotime_key='dpt_pseudotime'
)
LOGGER.info(f"score: {score}")
assert 0.95609 <= score <= 0.9561
assert_near_exact(score, 0.9561, diff=1e-5)


def test_trajectory_batch(adata_neighbors):
Expand All @@ -33,4 +33,4 @@ def test_trajectory_batch(adata_neighbors):
pseudotime_key='dpt_pseudotime'
)
LOGGER.info(f"score: {score}")
assert 0.96316 <= score <= 0.96317
assert_near_exact(score, 0.96317, diff=1e-5)

0 comments on commit c4ce6d1

Please sign in to comment.