Update dependency versions (#294)

* remove numpy and h5py version fix * introduced near exact assertions
theislab · Mar 21, 2022 · c4ce6d1 · c4ce6d1
1 parent b31d53b
commit c4ce6d1
Show file tree

Hide file tree

Showing 12 changed files with 38 additions and 39 deletions.
diff --git a/.gitignore b/.gitignore
@@ -6,6 +6,7 @@ testing.h5ad
 .vscode/settings.json
 data
 .ipynb_checkpoints
+build/
 *.egg-info
 *dist/
 *cache*

diff --git a/scib/metrics/lisi.py b/scib/metrics/lisi.py
@@ -571,15 +571,10 @@ def compute_simpson_index_graph(
         return lists
 
     # read distances and indices with nan value handling
-    indices = pd.read_csv(input_path + '_indices_' + str(chunk_no) + '.txt',
-                          header=None, sep='\n')
-    indices = indices[0].str.split(',', expand=True)
-    indices.set_index(keys=0, drop=True, inplace=True)  # move cell index to DF index
+    indices = pd.read_table(input_path + '_indices_' + str(chunk_no) + '.txt', index_col=0, header=None, sep=',')
     indices = indices.T
-    distances = pd.read_csv(input_path + '_distances_' + str(chunk_no) + '.txt',
-                            header=None, sep='\n')
-    distances = distances[0].str.split(',', expand=True)
-    distances.set_index(keys=0, drop=True, inplace=True)  # move cell index to DF index
+
+    distances = pd.read_table(input_path + '_distances_' + str(chunk_no) + '.txt', index_col=0, header=None, sep=',')
     distances = distances.T
 
     # get cell ids
@@ -592,18 +587,18 @@ def compute_simpson_index_graph(
     for i in enumerate(chunk_ids):
         # get neighbors and distances
         # read line i from indices matrix
-        get_col = indices[str(i[1])]
+        get_col = indices[i[1]]
 
         if get_col.isnull().sum() > 0:
             # not enough neighbors
-            print(str(i[1]) + " has not enough neighbors.")
+            print(i[1] + " has not enough neighbors.")
             simpson[i[0]] = 1  # np.nan #set nan for testing
             continue
         else:
             knn_idx = get_col.astype('int') - 1  # get 0-based indexing
 
         # read line i from distances matrix
-        D_act = distances[str(i[1])].values.astype('float')
+        D_act = distances[i[1]].values.astype('float')
 
         # start lisi estimation
         beta = 1

diff --git a/setup.cfg b/setup.cfg
@@ -44,14 +44,14 @@ packages =
 	scib.metrics
 python_requires = >=3.7
 install_requires = 
-	numpy==1.18.1
+	numpy
 	pandas
 	seaborn
 	matplotlib
 	numba
 	scanpy>=1.5
 	anndata>=0.7.2
-	h5py<3
+	h5py
 	rpy2>=3
 	anndata2ri
 	scipy

diff --git a/tests/common.py b/tests/common.py
@@ -5,6 +5,7 @@
 import pandas as pd
 import os
 import warnings
+
 warnings.filterwarnings('ignore')
 import subprocess
 import logging
@@ -13,6 +14,10 @@
 LOGGER = logging.getLogger(__name__)
 
 
+def assert_near_exact(x, y, diff=1e-5):
+    assert abs(x - y) <= diff
+
+
 def create_if_missing(dir):
     if not os.path.isdir(dir):
         os.mkdir(dir)

diff --git a/tests/metrics/test_beyond_label_metrics.py b/tests/metrics/test_beyond_label_metrics.py
@@ -11,11 +11,11 @@ def test_cell_cycle(adata_paul15):
         adata_int,
         batch_key='batch',
         organism='mouse',
-        #recompute_cc=True,
+        # recompute_cc=True,
         verbose=True
     )
     LOGGER.info(f"score: {score}")
-    assert score == 1
+    assert_near_exact(score, 1, diff=1e-12)
 
 
 def test_cell_cycle_all(adata_paul15):
@@ -28,14 +28,14 @@ def test_cell_cycle_all(adata_paul15):
         adata_int,
         batch_key='batch',
         organism='mouse',
-        #recompute_cc=True,
+        # recompute_cc=True,
         agg_func=None,
         verbose=True
     )
     LOGGER.info(f"\nscore: {scores_df}")
     assert isinstance(scores_df, pd.DataFrame)
     for i in scores_df['score']:
-        assert i == 1
+        assert_near_exact(i, 1, diff=1e-12)
 
 
 def test_hvg_overlap(adata):
@@ -47,4 +47,4 @@ def test_hvg_overlap(adata):
         n_hvg=500
     )
     LOGGER.info(f"score: {score}")
-    assert score == 1
+    assert_near_exact(score, 1, diff=1e-12)
diff --git a/tests/metrics/test_clisi.py b/tests/metrics/test_clisi.py
@@ -11,7 +11,7 @@ def test_clisi_full(adata):
     )
 
     LOGGER.info(f"score: {score}")
-    assert 0.974 <= score <= 0.975
+    assert_near_exact(score, 0.975, diff=1e-2)
 
 
 def test_clisi_embed(adata_neighbors):
@@ -24,7 +24,7 @@ def test_clisi_embed(adata_neighbors):
         type_='embed'
     )
     LOGGER.info(f"score: {score}")
-    assert 0.981 <= score <= 0.982
+    assert_near_exact(score, 0.982, diff=1e-2)
 
 
 def test_clisi_knn(adata_neighbors):
@@ -36,4 +36,4 @@ def test_clisi_knn(adata_neighbors):
         type_='graph'
     )
     LOGGER.info(f"score: {score}")
-    assert 0.981 <= score <= 0.982
+    assert_near_exact(score, 0.982, diff=1e-2)
diff --git a/tests/metrics/test_cluster_metrics.py b/tests/metrics/test_cluster_metrics.py
@@ -3,16 +3,15 @@
 
 def test_nmi_trivial(adata):
     score = scib.me.nmi(adata, 'celltype', 'celltype')
-    assert score == 1
+    assert_near_exact(score, 1, diff=1e-12)
 
 
 def test_ari_trivial(adata):
     score = scib.me.ari(adata, 'celltype', 'celltype')
-    assert score == 1
+    assert_near_exact(score, 1, diff=1e-12)
 
 
 def test_nmi(adata_neighbors):
-
     _, _, nmi_all = scib.cl.opt_louvain(
         adata_neighbors,
         label_key='celltype',
@@ -31,7 +30,7 @@ def test_nmi(adata_neighbors):
 def test_ari(adata_clustered):
     score = scib.me.ari(adata_clustered, group1='cluster', group2='celltype')
     LOGGER.info(f"score: {score}")
-    assert 0 <= score <= 1
+    assert_near_exact(score, 0.7614422905830917, diff=1e-2)
 
 
 def test_isolated_labels_F1(adata_neighbors):
@@ -44,4 +43,4 @@ def test_isolated_labels_F1(adata_neighbors):
         verbose=True
     )
     LOGGER.info(f"score: {score}")
-    assert 0 <= score <= 1
+    assert_near_exact(score, 0.5581395348837209, diff=1e-12)
diff --git a/tests/metrics/test_graph_connectivity.py b/tests/metrics/test_graph_connectivity.py
@@ -4,4 +4,4 @@
 def test_graph_connectivity(adata_neighbors):
     score = scib.me.graph_connectivity(adata_neighbors, label_key='celltype')
     LOGGER.info(f"score: {score}")
-    assert score == 0.9670013350457753
+    assert_near_exact(score, 0.9670013350457753, diff=1e-3)
diff --git a/tests/metrics/test_ilisi.py b/tests/metrics/test_ilisi.py
@@ -10,7 +10,7 @@ def test_ilisi_full(adata):
     )
 
     LOGGER.info(f"score: {score}")
-    assert 0.234 <= score <= 0.235
+    assert_near_exact(score, 0.235, diff=1e-2)
 
 
 def test_ilisi_embed(adata_neighbors):
@@ -22,7 +22,7 @@ def test_ilisi_embed(adata_neighbors):
         type_='embed'
     )
     LOGGER.info(f"score: {score}")
-    assert 0.237 <= score <= 0.238
+    assert_near_exact(score, 0.238, diff=1e-2)
 
 
 def test_ilisi_knn(adata_neighbors):
@@ -33,4 +33,4 @@ def test_ilisi_knn(adata_neighbors):
         type_='graph'
     )
     LOGGER.info(f"score: {score}")
-    assert 0.237 <= score <= 0.238
+    assert_near_exact(score, 0.238, diff=1e-2)
diff --git a/tests/metrics/test_pcr_metrics.py b/tests/metrics/test_pcr_metrics.py
@@ -14,13 +14,13 @@ def test_pcr_batch(adata):
         scale=True
     )
     LOGGER.info(f"no PCA precomputed: {score}")
-    assert 0 <= score < 1e-6
+    assert_near_exact(score, 0, diff=1e-6)
 
 
 def test_pcr_batch_precomputed(adata_pca):
     score = scib.me.pcr_comparison(adata_pca, adata_pca, covariate='batch', scale=True)
     LOGGER.info(f"precomputed PCA: {score}")
-    assert 0 <= score < 1e-6
+    assert_near_exact(score, 0, diff=1e-6)
 
 
 def test_pcr_batch_embedding(adata):
@@ -34,4 +34,4 @@ def test_pcr_batch_embedding(adata):
         scale=True
     )
     LOGGER.info(f"using embedding: {score}")
-    assert 0 <= score < 1e-6
+    assert_near_exact(score, 0, diff=1e-6)
diff --git a/tests/metrics/test_silhouette_metrics.py b/tests/metrics/test_silhouette_metrics.py
@@ -9,7 +9,7 @@ def test_silhouette(adata_pca):
         scale=True
     )
     LOGGER.info(f"score: {score}")
-    assert 0 <= score <= 1
+    assert_near_exact(score, 0.5626532882452011, diff=1e-3)
 
 
 def test_silhouette_batch(adata_pca):
@@ -22,7 +22,7 @@ def test_silhouette_batch(adata_pca):
         verbose=False
     )
     LOGGER.info(f"score: {score}")
-    assert 0 <= score <= 1
+    assert_near_exact(score, 0.9014384369842835, diff=1e-3)
 
 
 def test_isolated_labels_silhouette(adata_pca):
@@ -35,5 +35,4 @@ def test_isolated_labels_silhouette(adata_pca):
         verbose=True
     )
     LOGGER.info(f"score: {score}")
-    assert score <= 1
-    assert score >= 0
+    assert_near_exact(score, 0.6101431176066399, diff=1e-3)
diff --git a/tests/metrics/test_trajectory.py b/tests/metrics/test_trajectory.py
@@ -15,7 +15,7 @@ def test_trajectory(adata_neighbors):
         pseudotime_key='dpt_pseudotime'
     )
     LOGGER.info(f"score: {score}")
-    assert 0.95609 <= score <= 0.9561
+    assert_near_exact(score, 0.9561, diff=1e-5)
 
 
 def test_trajectory_batch(adata_neighbors):
@@ -33,4 +33,4 @@ def test_trajectory_batch(adata_neighbors):
         pseudotime_key='dpt_pseudotime'
     )
     LOGGER.info(f"score: {score}")
-    assert 0.96316 <= score <= 0.96317
+    assert_near_exact(score, 0.96317, diff=1e-5)