From 95c68523809130b989efd6e2106d96e0a4377c9b Mon Sep 17 00:00:00 2001
From: Daniel Obraczka <obraczka@informatik.uni-leipzig.de>
Date: Fri, 22 Dec 2023 10:05:43 +0100
Subject: [PATCH] Major refactor (#33)

* Do not call kcandidates if no hr is used

* Adapted changelog

* Early return for no hubness reduction

* Avoid NotFittedError

* Fixed index order

* Improve no hubness

* Remove float transformation

* Started refactoring

* Fixed some inconsistencies

* More detailled analysis

* Fix import

* Set only fit target flat

* Major refactor and simplification

* Fixed some doc struff
---
 kiez/analysis/estimation.py                   |  11 +-
 kiez/hubness_reduction/__init__.py            |   4 -
 kiez/hubness_reduction/base.py                |  98 ++++++--
 kiez/hubness_reduction/csls.py                |  81 +------
 kiez/hubness_reduction/dis_sim.py             |  94 ++------
 kiez/hubness_reduction/local_scaling.py       |  73 +-----
 kiez/hubness_reduction/mutual_proximity.py    |  49 +---
 kiez/io/temp_file_handling.py                 |   2 +-
 kiez/kiez.py                                  | 143 +++---------
 kiez/neighbors/approximate/faiss.py           | 131 ++---------
 .../exact/sklearn_nearest_neighbors.py        |   3 +-
 kiez/neighbors/neighbor_algorithm_base.py     |  20 +-
 kiez/neighbors/util.py                        |   6 +-
 noxfile.py                                    |  50 +++-
 pyproject.toml                                |   2 +-
 tests/conftest.py                             |  11 +
 .../test_mutual_proximity.py                  |  25 --
 tests/hubness_reduction/test_wrong_inputs.py  |  21 ++
 tests/neighbors/test_alignment.py             | 178 ---------------
 tests/neighbors/test_annoy.py                 |  47 ++--
 tests/neighbors/test_base.py                  |  11 +-
 tests/neighbors/test_faiss.py                 |  12 +-
 tests/neighbors/test_hnsw.py                  |  42 +---
 tests/neighbors/test_nng.py                   |  60 ++---
 tests/neighbors/test_sklearn.py               |   4 +-
 tests/test_kiez.py                            | 216 +++++++-----------
 26 files changed, 433 insertions(+), 961 deletions(-)
 create mode 100644 tests/conftest.py
 delete mode 100644 tests/hubness_reduction/test_mutual_proximity.py
 create mode 100644 tests/hubness_reduction/test_wrong_inputs.py
 delete mode 100644 tests/neighbors/test_alignment.py

diff --git a/kiez/analysis/estimation.py b/kiez/analysis/estimation.py
index b75d41c..22f92d9 100644
--- a/kiez/analysis/estimation.py
+++ b/kiez/analysis/estimation.py
@@ -45,6 +45,7 @@ def _calc_skewness_truncnorm(k_occurrence: np.ndarray) -> float:
     ----------
     k_occurrence: ndarray
         Reverse nearest neighbor count for each object.
+
     Returns
     -------
     skew_truncnorm
@@ -62,7 +63,8 @@ def _calc_skewness_truncnorm(k_occurrence: np.ndarray) -> float:
 def _calc_gini_index(
     k_occurrence: np.ndarray, limiting="memory", verbose: int = 0
 ) -> float:
-    """Hubness measure; Gini index
+    """Hubness measure; Gini index.
+
     Parameters
     ----------
     k_occurrence: ndarray
@@ -73,6 +75,7 @@ def _calc_gini_index(
         otherwise use naive implementation (slow, low memory usage)
     verbose: int
         control verbosity
+
     Returns
     -------
     gini_index
@@ -103,6 +106,7 @@ def _calc_robinhood_index(k_occurrence: np.ndarray) -> float:
     ----------
     k_occurrence: ndarray
         Reverse nearest neighbor count for each object.
+
     Returns
     -------
     robinhood_index
@@ -135,6 +139,7 @@ def _calc_atkinson_index(k_occurrence: np.ndarray, eps: float = 0.5) -> float:
         Reverse nearest neighbor count for each object.
     eps: float
         'Income' weight. Turns the index into a normative measure.
+
     Returns
     -------
     atkinson_index
@@ -156,6 +161,7 @@ def _calc_antihub_occurrence(k_occurrence: np.ndarray) -> Tuple[np.ndarray, floa
     ----------
     k_occurrence: ndarray
         Reverse nearest neighbor count for each object.
+
     Returns
     -------
     antihubs, antihub_occurrence
@@ -180,6 +186,7 @@ def _calc_hub_occurrence(
         Number of queries (or objects in a test set)
     hub_size: float
         Factor to determine hubs
+
     Returns
     -------
     hubs, hub_occurrence
@@ -201,7 +208,7 @@ def hubness_score(
     return_value: str = "all_but_gini",
     store_k_occurrence: bool = False,
 ) -> Union[float, dict]:
-    """Calculates hubness scores from given neighbor indices
+    """Calculate hubness scores from given neighbor indices.
 
     Utilizes findings from [1]_ and [2]_.
 
diff --git a/kiez/hubness_reduction/__init__.py b/kiez/hubness_reduction/__init__.py
index bc90944..1740fba 100644
--- a/kiez/hubness_reduction/__init__.py
+++ b/kiez/hubness_reduction/__init__.py
@@ -1,7 +1,3 @@
-# -*- coding: utf-8 -*-
-# SPDX-License-Identifier: BSD-3-Clause
-# adapted from skhubness: https://github.com/VarIr/scikit-hubness/
-
 from class_resolver import ClassResolver
 
 from .base import HubnessReduction, NoHubnessReduction
diff --git a/kiez/hubness_reduction/base.py b/kiez/hubness_reduction/base.py
index 87c81b7..9040384 100644
--- a/kiez/hubness_reduction/base.py
+++ b/kiez/hubness_reduction/base.py
@@ -1,41 +1,97 @@
-# -*- coding: utf-8 -*-
-# adapted from skhubness
-# SPDX-License-Identifier: BSD-3-Clause
-
+import warnings
 from abc import ABC, abstractmethod
+from typing import Optional, Tuple
+
+import numpy as np
+
+from ..neighbors import NNAlgorithm
 
 
 class HubnessReduction(ABC):
     """Base class for hubness reduction."""
 
-    @abstractmethod
-    def __init__(self, **kwargs):
-        pass
+    def __init__(self, nn_algo: NNAlgorithm, verbose: int = 0, **kwargs):
+        self.nn_algo = nn_algo
+        self.verbose = verbose
+        if nn_algo.n_candidates == 1:
+            raise ValueError(
+                "Cannot perform hubness reduction with a single candidate per query!"
+            )
 
     @abstractmethod
-    def fit(
-        self, neigh_dist, neigh_ind, source, target, assume_sorted, *args, **kwargs
-    ):
+    def _fit(self, neigh_dist, neigh_ind, source, target):
         pass  # pragma: no cover
 
+    def fit(self, source, target=None):
+        self.nn_algo.fit(source, target)
+        if target is None:
+            target = source
+        neigh_dist_t_to_s, neigh_ind_t_to_s = self.nn_algo.kneighbors(
+            k=self.nn_algo.n_candidates,
+            query=target,
+            s_to_t=False,
+            return_distance=True,
+        )
+        self._fit(
+            neigh_dist_t_to_s,
+            neigh_ind_t_to_s,
+            source,
+            target,
+        )
+
     @abstractmethod
-    def transform(self, neigh_dist, neigh_ind, query, assume_sorted, *args, **kwargs):
+    def transform(self, neigh_dist, neigh_ind, query) -> Tuple[np.ndarray, np.ndarray]:
         pass  # pragma: no cover
 
+    def _set_k_if_needed(self, k: Optional[int] = None) -> int:
+        if k is None:
+            warnings.warn(
+                f"No k supplied, setting to n_candidates = {self.nn_algo.n_candidates}"
+            )
+            return self.nn_algo.n_candidates
+        if k > self.nn_algo.n_candidates:
+            warnings.warn(
+                "k > n_candidates supplied! Setting to n_candidates ="
+                f" {self.nn_algo.n_candidates}"
+            )
+            return self.nn_algo.n_candidates
+        return k
 
-class NoHubnessReduction(HubnessReduction):
-    """Compatibility class for neighbor search without hubness reduction."""
+    def kneighbors(self, k: Optional[int] = None) -> Tuple[np.ndarray, np.ndarray]:
+        n_neighbors = self._set_k_if_needed(k)
+        # First obtain candidate neighbors
+        query_dist, query_ind = self.nn_algo.kneighbors(
+            query=None, k=self.nn_algo.n_candidates, return_distance=True
+        )
 
-    def __init__(self, **kwargs):
-        super().__init__(**kwargs)
+        # Second, reduce hubness
+        hubness_reduced_query_dist, query_ind = self.transform(
+            query_dist,
+            query_ind,
+            self.nn_algo.source_,
+        )
+        # Third, sort hubness reduced candidate neighbors to get the final k neighbors
+        kth = np.arange(n_neighbors)
+        mask = np.argpartition(hubness_reduced_query_dist, kth=kth)[:, :n_neighbors]
+        hubness_reduced_query_dist = np.take_along_axis(
+            hubness_reduced_query_dist, mask, axis=1
+        )
+        query_ind = np.take_along_axis(query_ind, mask, axis=1)
+        return hubness_reduced_query_dist, query_ind
 
-    def fit(self, *args, **kwargs):
+
+class NoHubnessReduction(HubnessReduction):
+    """Base class for hubness reduction."""
+
+    def _fit(self, neigh_dist, neigh_ind, source, target):
         pass  # pragma: no cover
 
-    def __repr__(self):
-        return "NoHubnessReduction"
+    def fit(self, source, target=None):
+        self.nn_algo.fit(source, target, only_fit_target=True)
 
-    def transform(
-        self, neigh_dist, neigh_ind, query, assume_sorted=True, *args, **kwargs
-    ):
+    def transform(self, neigh_dist, neigh_ind, query) -> Tuple[np.ndarray, np.ndarray]:
         return neigh_dist, neigh_ind
+
+    def kneighbors(self, k: Optional[int] = None) -> Tuple[np.ndarray, np.ndarray]:
+        n_neighbors = self._set_k_if_needed(k)
+        return self.nn_algo.kneighbors(query=None, k=n_neighbors, return_distance=True)
diff --git a/kiez/hubness_reduction/csls.py b/kiez/hubness_reduction/csls.py
index eeb6eea..c252f77 100644
--- a/kiez/hubness_reduction/csls.py
+++ b/kiez/hubness_reduction/csls.py
@@ -15,13 +15,6 @@ class CSLS(HubnessReduction):
 
     Uses the formula presented in [1]_.
 
-    Parameters
-    ----------
-    k: int, default = 5
-        Number of neighbors to consider for mean distance of k-nearest neighbors
-    verbose: int, default= 0
-        Verbosity level
-
     References
     ----------
     .. [1] Lample, G., Conneau, A., Ranzato, M., Denoyer, L., & Jégou, H. (2018)
@@ -31,23 +24,15 @@ class CSLS(HubnessReduction):
            https://openreview.net/forum?id=H196sainb
     """
 
-    def __init__(self, k: int = 5, verbose: int = 0, *args, **kwargs):
-        super().__init__(**kwargs)
-        self.k = k
-        self.verbose = verbose
-
     def __repr__(self):
-        return f"{self.__class__.__name__}(k={self.k}, verbose = {self.verbose})"
+        return f"{self.__class__.__name__}(verbose = {self.verbose})"
 
-    def fit(
+    def _fit(
         self,
         neigh_dist,
         neigh_ind,
         source=None,
         target=None,
-        assume_sorted=None,
-        *args,
-        **kwargs,
     ) -> CSLS:
         """Fit the model using target, neigh_dist, and neigh_ind as training data.
 
@@ -62,44 +47,14 @@ def fit(
             ignored
         target
             ignored
-        assume_sorted: bool, default=True #noqa: DAR103
-            Assume input matrices are sorted according to neigh_dist.
-            If False, these are sorted here.
-        *args
-            Ignored
-        **kwargs
-            Ignored
+
         Returns
         -------
         CSLS
             Fitted CSLS
-        Raises
-        ------
-        ValueError
-            If self.k < 0
-        TypeError
-            If self.k not int
         """
-        # Check equal number of rows and columns
-        check_consistent_length(neigh_ind, neigh_dist)
-        check_consistent_length(neigh_ind.T, neigh_dist.T)
-        try:
-            if self.k <= 0:
-                raise ValueError(f"Expected k > 0. Got {self.k}")
-        except TypeError as exc:
-            raise TypeError(f"Expected k: int > 0. Got {self.k}") from exc
-
-        # increment to include the k-th element in slicing
-        k = self.k + 1
-
-        if assume_sorted:
-            self.r_dist_train_ = neigh_dist[:, :k]
-            self.r_ind_train_ = neigh_ind[:, :k]
-        else:
-            kth = np.arange(self.k)
-            mask = np.argpartition(neigh_dist, kth=kth)[:, :k]
-            self.r_dist_train_ = np.take_along_axis(neigh_dist, mask, axis=1)
-            self.r_ind_train_ = np.take_along_axis(neigh_ind, mask, axis=1)
+        self.r_dist_train_ = neigh_dist
+        self.r_ind_train_ = neigh_ind
         return self
 
     def transform(
@@ -107,9 +62,6 @@ def transform(
         neigh_dist,
         neigh_ind,
         query,
-        assume_sorted: bool = True,
-        *args,
-        **kwargs,
     ) -> Tuple[np.ndarray, np.ndarray]:
         """Transform distance between test and training data with CSLS.
 
@@ -122,17 +74,12 @@ def transform(
             Neighbor indices corresponding to the values in neigh_dist
         query
             Ignored
-        assume_sorted: bool
-            ignored
-        *args
-            Ignored
-        **kwargs
-            Ignored
 
         Returns
         -------
         hub_reduced_dist, neigh_ind
             CSLS distances, and corresponding neighbor indices
+
         Notes
         -----
         The returned distances are NOT sorted! If you use this class directly,
@@ -142,22 +89,8 @@ def transform(
 
         n_test, n_indexed = neigh_dist.shape
 
-        if n_indexed == 1:
-            warnings.warn(
-                "Cannot perform hubness reduction with a single neighbor per query. "
-                "Skipping hubness reduction, and returning untransformed distances."
-            )
-            return neigh_dist, neigh_ind
-
-        k = self.k
-
         # Find average distances to the k nearest neighbors
-        if assume_sorted:
-            r_dist_test = neigh_dist[:, :k]
-        else:
-            kth = np.arange(self.k)
-            mask = np.argpartition(neigh_dist, kth=kth)[:, :k]
-            r_dist_test = np.take_along_axis(neigh_dist, mask, axis=1)
+        r_dist_test = neigh_dist
 
         hub_reduced_dist = np.empty_like(neigh_dist)
 
diff --git a/kiez/hubness_reduction/dis_sim.py b/kiez/hubness_reduction/dis_sim.py
index ce8240d..49849ba 100644
--- a/kiez/hubness_reduction/dis_sim.py
+++ b/kiez/hubness_reduction/dis_sim.py
@@ -22,12 +22,11 @@ class DisSimLocal(HubnessReduction):
 
     Parameters
     ----------
-    k: int, default = 5
-        Number of neighbors to consider for the local centroids
     squared: bool, default = True
         DisSimLocal operates on squared Euclidean distances.
         If True, return (quasi) squared Euclidean distances;
         if False, return (quasi) Eucldean distances.
+
     References
     ----------
     .. [1] Hara K, Suzuki I, Kobayashi K, Fukumizu K, Radovanović M (2016)
@@ -36,23 +35,35 @@ class DisSimLocal(HubnessReduction):
            https://www.aaai.org/ocs/index.php/AAAI/AAAI16/paper/viewPaper/12055
     """
 
-    def __init__(self, k: int = 5, squared: bool = True, *args, **kwargs):
-        super().__init__()
-        self.k = k
+    def __init__(self, squared: bool = True, **kwargs):
+        super().__init__(**kwargs)
         self.squared = squared
+        if self.nn_algo.metric in ["euclidean", "minkowski"]:
+            self.squared = False
+            if hasattr(self.nn_algo, "p"):
+                if self.nn_algo.p != 2:
+                    raise ValueError(
+                        "DisSimLocal only supports squared Euclidean distances. If"
+                        " the provided NNAlgorithm has a `p` parameter it must be"
+                        f" set to p=2. Now it is p={self.nn_algo.p}"
+                    )
+        elif self.nn_algo.metric in ["sqeuclidean"]:
+            self.squared = True
+        else:
+            raise ValueError(
+                "DisSimLocal only supports squared Euclidean distances, not"
+                f" metric={self.nn_algo.metric}."
+            )
 
     def __repr__(self):
-        return f"{self.__class__.__name__}(k={self.k}, squared = {self.squared})"
+        return f"{self.__class__.__name__}(squared = {self.squared})"
 
-    def fit(
+    def _fit(
         self,
         neigh_dist: np.ndarray,
         neigh_ind: np.ndarray,
         source: np.ndarray,
         target: np.ndarray,
-        assume_sorted: bool = True,
-        *args,
-        **kwargs,
     ) -> DisSimLocal:
         """Fit the model using target, neigh_dist, and neigh_ind as training data.
 
@@ -69,47 +80,14 @@ def fit(
         target: np.ndarray, shape (n_samples, n_features)
             Target embedding, where n_samples is the number of vectors,
             and n_features their dimensionality (number of features).
-        assume_sorted: bool, default=True #noqa: DAR103
-            Assume input matrices are sorted according to neigh_dist.
-            If False, these are sorted here.
-        *args: Ignored
-            Ignored
-        **kwargs: Ignored
-            Ignored
+
         Returns
         -------
         DisSimLocal
             Fitted DisSimLocal
-        Raises
-        ------
-        ValueError
-            If self.k < 0
-        TypeError
-            If self.k not int
         """
-        # Check equal number of rows and columns
-        check_consistent_length(neigh_ind, neigh_dist)
-        check_consistent_length(neigh_ind.T, neigh_dist.T)
-        try:
-            if self.k <= 0:
-                raise ValueError(f"Expected k > 0. Got {self.k}")
-        except TypeError as exc:
-            raise TypeError(f"Expected k: int > 0. Got {self.k}") from exc
-
-        k = self.k
-        if k > neigh_ind.shape[1]:
-            warnings.warn(
-                "Neighborhood parameter k larger than provided neighbors in"
-                f" neigh_dist, neigh_ind. Will reduce to k={neigh_ind.shape[1]}."
-            )
-            k = neigh_ind.shape[1]
-
         # Calculate local neighborhood centroids among the training points
-        if assume_sorted:
-            knn = neigh_ind[:, :k]
-        else:
-            mask = np.argpartition(neigh_dist, kth=k - 1)[:, :k]
-            knn = np.take_along_axis(neigh_ind, mask, axis=1)
+        knn = neigh_ind
         centroids = source[knn].mean(axis=1)
         dist_to_cent = row_norms(target - centroids, squared=True)
 
@@ -117,7 +95,6 @@ def fit(
         self.target_ = target
         self.target_centroids_ = centroids
         self.target_dist_to_centroids_ = dist_to_cent
-
         return self
 
     def transform(
@@ -125,9 +102,6 @@ def transform(
         neigh_dist: np.ndarray,
         neigh_ind: np.ndarray,
         query: np.ndarray,
-        assume_sorted: bool = True,
-        *args,
-        **kwargs,
     ) -> Tuple[np.ndarray, np.ndarray]:
         """Transform distance between test and training data with DisSimLocal.
 
@@ -141,13 +115,12 @@ def transform(
         query: np.ndarray, shape (n_query, n_features)
             Query entities that were used to obtain neighbors
             If none is provided use source that was provided in fit step
-        assume_sorted: bool
-            ignored
 
         Returns
         -------
         hub_reduced_dist, neigh_ind
             DisSimLocal distances, and corresponding neighbor indices
+
         Notes
         -----
         The returned distances are NOT sorted! If you use this class directly,
@@ -157,27 +130,10 @@ def transform(
             self,
             ["target_", "target_centroids_", "target_dist_to_centroids_"],
         )
-        if query is None:
-            query = self.source_
-
         n_test, n_indexed = neigh_dist.shape
 
-        if n_indexed == 1:
-            warnings.warn(
-                "Cannot perform hubness reduction with a single neighbor per query. "
-                "Skipping hubness reduction, and returning untransformed distances."
-            )
-            return neigh_dist, neigh_ind
-
-        k = self.k
-        if k > neigh_ind.shape[1]:
-            warnings.warn(
-                "Neighborhood parameter k larger than provided neighbors in"
-                f" neigh_dist, neigh_ind. Will reduce to k={neigh_ind.shape[1]}."
-            )
-            k = neigh_ind.shape[1]
-
         # Calculate local neighborhood centroids for source objects among target objects
+        k = neigh_ind.shape[1]
         mask = np.argpartition(neigh_dist, kth=k - 1)
         for i, ind in enumerate(neigh_ind):
             neigh_dist[i, :] = euclidean_distances(
diff --git a/kiez/hubness_reduction/local_scaling.py b/kiez/hubness_reduction/local_scaling.py
index b4a28a8..03fb7b2 100644
--- a/kiez/hubness_reduction/local_scaling.py
+++ b/kiez/hubness_reduction/local_scaling.py
@@ -29,6 +29,7 @@ class LocalScaling(HubnessReduction):
         - 'nicdm' rescales distances using a statistic over distances to k neighbors
     verbose: int, default = 0
         If verbose > 0, show progress bar.
+
     References
     ----------
     .. [1] Schnitzer, D., Flexer, A., Schedl, M., & Widmer, G. (2012).
@@ -36,33 +37,26 @@ class LocalScaling(HubnessReduction):
            Learning Research, 13(1), 2871–2902.
     """
 
-    def __init__(
-        self, k: int = 5, method: str = "standard", verbose: int = 0, **kwargs
-    ):
+    def __init__(self, method: str = "standard", **kwargs):
         super().__init__(**kwargs)
-        self.k = k
         self.method = method.lower()
         if self.method not in ["ls", "standard", "nicdm"]:
             raise ValueError(
                 f"Internal: Invalid method {self.method}. Try 'ls' or 'nicdm'."
             )
-        self.verbose = verbose
 
     def __repr__(self):
         return (
-            f"{self.__class__.__name__}(k={self.k}, method = {self.method}, verbose ="
+            f"{self.__class__.__name__}(method = {self.method}, verbose ="
             f" {self.verbose})"
         )
 
-    def fit(
+    def _fit(
         self,
         neigh_dist,
         neigh_ind,
         source,
         target,
-        assume_sorted: bool = True,
-        *args,
-        **kwargs,
     ) -> LocalScaling:
         """Fit the model using neigh_dist and neigh_ind as training data.
 
@@ -77,34 +71,14 @@ def fit(
             Ignored
         target
             Ignored
-        assume_sorted: bool, default = True #noqa: DAR103
-            Assume input matrices are sorted according to neigh_dist.
-            If False, these are sorted here.
-        *args
-            Ignored
-        **kwargs
-            Ignored
+
         Returns
         -------
         LocalScaling
             Fitted LocalScaling
         """
-        # Check equal number of rows and columns
-        check_consistent_length(neigh_ind, neigh_dist)
-        check_consistent_length(neigh_ind.T, neigh_dist.T)
-
-        # increment to include the k-th element in slicing
-        k = self.k + 1
-
-        # Find distances to the k-th neighbor (standard LS) or the k neighbors (NICDM)
-        if assume_sorted:
-            self.r_dist_t_to_s_ = neigh_dist[:, :k]
-            self.r_ind_t_to_s_ = neigh_ind[:, :k]
-        else:
-            kth = np.arange(self.k)
-            mask = np.argpartition(neigh_dist, kth=kth)[:, :k]
-            self.r_dist_t_to_s_ = np.take_along_axis(neigh_dist, mask, axis=1)
-            self.r_ind_t_to_s_ = np.take_along_axis(neigh_ind, mask, axis=1)
+        self.r_dist_t_to_s_ = neigh_dist
+        self.r_ind_t_to_s_ = neigh_ind
         return self
 
     def transform(
@@ -112,9 +86,6 @@ def transform(
         neigh_dist,
         neigh_ind,
         query=None,
-        assume_sorted: bool = True,
-        *args,
-        **kwargs,
     ) -> Tuple[np.ndarray, np.ndarray]:
         """Transform distance between test and training data with Mutual Proximity.
 
@@ -127,18 +98,17 @@ def transform(
             Neighbor indices corresponding to the values in neigh_dist
         query
             Ignored
-        assume_sorted: bool, default = True #noqa: DAR103
-            Assume input matrices are sorted according to neigh_dist.
-            If False, these are partitioned here.
-            NOTE: The returned matrices are never sorted.
+
         Returns
         -------
         hub_reduced_dist, neigh_ind
             Local scaling distances, and corresponding neighbor indices
+
         Raises
         ------
         ValueError
             If wrong self.method was supplied
+
         Notes
         -----
         The returned distances are NOT sorted! If you use this class directly,
@@ -148,23 +118,8 @@ def transform(
 
         n_test, n_indexed = neigh_dist.shape
 
-        if n_indexed == 1:
-            warnings.warn(
-                "Cannot perform hubness reduction with a single neighbor per query. "
-                "Skipping hubness reduction, and returning untransformed distances."
-            )
-            return neigh_dist, neigh_ind
-
-        # increment to include the k-th element in slicing
-        k = self.k + 1
-
         # Find distances to the k-th neighbor (standard LS) or the k neighbors (NICDM)
-        if assume_sorted:
-            r_dist_s_to_t = neigh_dist[:, :k]
-        else:
-            kth = np.arange(self.k)
-            mask = np.argpartition(neigh_dist, kth=kth)[:, :k]
-            r_dist_s_to_t = np.take_along_axis(neigh_dist, mask, axis=1)
+        r_dist_s_to_t = neigh_dist
 
         # Calculate LS or NICDM
         hub_reduced_dist = np.empty_like(neigh_dist)
@@ -178,11 +133,7 @@ def transform(
         )
 
         # Perform standard local scaling...
-        if self.method not in ["ls", "standard", "nicdm"]:
-            raise ValueError(
-                f"Internal: Invalid method {self.method}. Try 'ls' or 'nicdm'."
-            )
-        elif self.method in ["ls", "standard"]:
+        if self.method in ["ls", "standard"]:
             r_t_to_s = self.r_dist_t_to_s_[:, -1]
             r_s_to_t = r_dist_s_to_t[:, -1]
             for i in range_n_test:
diff --git a/kiez/hubness_reduction/mutual_proximity.py b/kiez/hubness_reduction/mutual_proximity.py
index be3f950..358b474 100644
--- a/kiez/hubness_reduction/mutual_proximity.py
+++ b/kiez/hubness_reduction/mutual_proximity.py
@@ -31,6 +31,7 @@ class MutualProximity(HubnessReduction):
         - 'empiric' or 'exact' model distances with the empiric distributions (slow)
     verbose: int, default = 0
         If verbose > 0, show progress bar.
+
     References
     ----------
     .. [1] Schnitzer, D., Flexer, A., Schedl, M., & Widmer, G. (2012).
@@ -38,18 +39,17 @@ class MutualProximity(HubnessReduction):
            Learning Research, 13(1), 2871–2902.
     """
 
-    def __init__(self, method: str = "normal", verbose: int = 0, **kwargs):
+    def __init__(self, method: str = "normal", **kwargs):
         super().__init__(**kwargs)
         if method not in ["exact", "empiric", "normal", "gaussi"]:
             raise ValueError(
                 f'Mutual proximity method "{method}" not recognized. Try "normal"'
                 ' or "empiric".'
             )
-        if method in ["exact", "empiric"]:
+        elif method in ["exact", "empiric"]:
             self.method = "empiric"
         elif method in ["normal", "gaussi"]:
             self.method = "normal"
-        self.verbose = verbose
 
     def __repr__(self):
         return (
@@ -57,15 +57,12 @@ def __repr__(self):
             f" {self.verbose})"
         )
 
-    def fit(
+    def _fit(
         self,
         neigh_dist,
         neigh_ind,
         source,
         target,
-        assume_sorted=None,
-        *args,
-        **kwargs,
     ) -> MutualProximity:
         """Fit the model using neigh_dist and neigh_ind as training data.
 
@@ -80,10 +77,9 @@ def fit(
             Ignored
         target
             Ignored
-        assume_sorted
-            Ignored
+
         Returns
-        ------
+        -------
         MutualProximity
 
         Raises
@@ -91,17 +87,9 @@ def fit(
         ValueError
             If self.method is unknown
         """
-        # Check equal number of rows and columns
-        check_consistent_length(neigh_ind, neigh_dist)
-        check_consistent_length(neigh_ind.T, neigh_dist.T)
-        check_array(neigh_dist, force_all_finite=False)
-        check_array(neigh_ind)
-
         self.n_train = neigh_dist.shape[0]
 
-        if self.method not in ["normal", "empiric"]:
-            raise ValueError(f"Internal: Invalid method {self.method}.")
-        elif self.method == "empiric":
+        if self.method == "empiric":
             self.neigh_dist_t_to_s_ = neigh_dist
             self.neigh_ind_t_to_s_ = neigh_ind
         elif self.method == "normal":
@@ -109,9 +97,7 @@ def fit(
             self.sd_t_to_s_ = np.nanstd(neigh_dist, axis=1, ddof=0)
         return self
 
-    def transform(
-        self, neigh_dist, neigh_ind, query, assume_sorted=None, *args, **kwargs
-    ):
+    def transform(self, neigh_dist, neigh_ind, query):
         """Transform distance between test and training data with Mutual Proximity.
 
         Parameters
@@ -123,16 +109,17 @@ def transform(
             Neighbor indices corresponding to the values in neigh_dist
         query
             Ignored
-        assume_sorted
-            Ignored
+
         Returns
         -------
         hub_reduced_dist, neigh_ind
             Mutual Proximity distances, and corresponding neighbor indices
+
         Raises
         ------
         ValueError
             if self.method is unknown
+
         Notes
         -----
         The returned distances are NOT sorted! If you use this class directly,
@@ -148,18 +135,8 @@ def transform(
             ],
             all_or_any=any,
         )
-        check_array(neigh_dist, force_all_finite="allow-nan")
-        check_array(neigh_ind)
-
         n_test, n_indexed = neigh_dist.shape
 
-        if n_indexed == 1:
-            warnings.warn(
-                "Cannot perform hubness reduction with a single neighbor per query. "
-                "Skipping hubness reduction, and returning untransformed distances."
-            )
-            return neigh_dist, neigh_ind
-
         hub_reduced_dist = np.empty_like(neigh_dist)
 
         # Show progress in hubness reduction loop
@@ -171,9 +148,7 @@ def transform(
         )
 
         # Calculate MP with independent Gaussians
-        if self.method not in ["normal", "empiric"]:
-            raise ValueError(f"Internal: Invalid method {self.method}.")
-        elif self.method == "normal":
+        if self.method == "normal":
             mu_t_to_s = self.mu_t_to_s_
             sd_t_to_s_ = self.sd_t_to_s_
             for i in range_n_test:
diff --git a/kiez/io/temp_file_handling.py b/kiez/io/temp_file_handling.py
index bc3f883..83829d3 100644
--- a/kiez/io/temp_file_handling.py
+++ b/kiez/io/temp_file_handling.py
@@ -19,7 +19,7 @@ def create_tempfile_preferably_in_dir(
     For example, this is useful to try to save into /dev/shm.
 
     Parameters
-    ---------
+    ----------
     suffix: str
         suffix of tempfile
     prefix: str
diff --git a/kiez/kiez.py b/kiez/kiez.py
index 08dacc5..9e23f2d 100644
--- a/kiez/kiez.py
+++ b/kiez/kiez.py
@@ -1,21 +1,20 @@
-# -*- coding: utf-8 -*-
-# SPDX-License-Identifier: BSD-3-Clause
 from __future__ import annotations
 
 import json
+import warnings
 from pathlib import Path
 from typing import Any, Dict, Optional, Tuple, Union
 
 import numpy as np
 from class_resolver import HintOrType
 
-from kiez.hubness_reduction import DisSimLocal, hubness_reduction_resolver
+from kiez.hubness_reduction import hubness_reduction_resolver
 from kiez.hubness_reduction.base import HubnessReduction, NoHubnessReduction
 from kiez.neighbors import NNAlgorithm, nn_algorithm_resolver
 
 
 class Kiez:
-    """Performs hubness reduced nearest neighbor search for entity alignment
+    """Performs hubness reduced nearest neighbor search for entity alignment.
 
     Use the given algorithm to :meth:`fit` the data and calculate the
     :meth:`kneighbors`.
@@ -79,38 +78,47 @@ class Kiez:
 
     def __init__(
         self,
-        n_neighbors: int = 5,
+        n_candidates: int = 10,
         algorithm: HintOrType[NNAlgorithm] = None,
         algorithm_kwargs: Optional[Dict[str, Any]] = None,
         hubness: HintOrType[HubnessReduction] = None,
         hubness_kwargs: Optional[Dict[str, Any]] = None,
     ):
-        if not np.issubdtype(type(n_neighbors), np.integer):
+        if not np.issubdtype(type(n_candidates), np.integer):
             raise TypeError(
-                f"n_neighbors does not take {type(n_neighbors)} value, enter"
+                f"n_neighbors does not take {type(n_candidates)} value, enter"
                 " integer value"
             )
-        elif n_neighbors <= 0:
-            raise ValueError(f"Expected n_neighbors > 0. Got {n_neighbors}")
-        self.n_neighbors = n_neighbors
-        if algorithm is None and algorithm_kwargs is None:
-            algorithm_kwargs = {"n_candidates": n_neighbors}
+        elif n_candidates <= 0:
+            raise ValueError(f"Expected n_candidates > 0. Got {n_candidates}")
+        if algorithm_kwargs is None:
+            algorithm_kwargs = {"n_candidates": n_candidates}
+        elif "n_candidates" not in algorithm_kwargs:
+            algorithm_kwargs["n_candidates"] = n_candidates
         if algorithm is None:
             try:
-                self.algorithm = nn_algorithm_resolver.make("Faiss", algorithm_kwargs)
+                algorithm = nn_algorithm_resolver.make("Faiss", algorithm_kwargs)
             except ImportError:
-                self.algorithm = nn_algorithm_resolver.make(
-                    "SklearnNN", algorithm_kwargs
-                )
+                algorithm = nn_algorithm_resolver.make("SklearnNN", algorithm_kwargs)
         else:
-            self.algorithm = nn_algorithm_resolver.make(algorithm, algorithm_kwargs)
-        assert self.algorithm
+            algorithm = nn_algorithm_resolver.make(algorithm, algorithm_kwargs)
+        assert algorithm
+        if hubness_kwargs is None:
+            hubness_kwargs = dict()
+        hubness_kwargs["nn_algo"] = algorithm
         self.hubness = hubness_reduction_resolver.make(hubness, hubness_kwargs)
-        self._check_algorithm_hubness_compatibility()
+
+    @property
+    def algorithm(self):
+        return self.hubness.nn_algo
+
+    @algorithm.setter
+    def algorithm(self, value):
+        self.hubness.nn_algo = value
 
     def __repr__(self):
         return (
-            f"Kiez(n_neighbors: {self.n_neighbors}, algorithm: {self.algorithm},"
+            f"Kiez(algorithm: {self.algorithm},"
             f" hubness: {self.hubness})"
             f" {self.algorithm._describe_source_target_fitted()}"
         )
@@ -121,43 +129,8 @@ def from_path(cls, path: Union[str, Path]) -> Kiez:
         with open(path) as file:
             return cls(**json.load(file))
 
-    def _kcandidates(
-        self, query_points, *, s_to_t=True, k=None, return_distance=True
-    ) -> Union[np.ndarray, Tuple[np.ndarray, np.ndarray]]:
-        if k is None:
-            k = self.algorithm.n_candidates
-
-        # The number of candidates must not be less than the number of neighbors used downstream
-        if k < self.n_neighbors:
-            k = self.n_neighbors
-        return self.algorithm.kneighbors(
-            k=k,
-            query=query_points,
-            s_to_t=s_to_t,
-            return_distance=return_distance,
-        )
-
-    def _check_algorithm_hubness_compatibility(self):
-        if isinstance(self.hubness, DisSimLocal):
-            if self.algorithm.metric in ["euclidean", "minkowski"]:
-                self.hubness.squared = False
-                if hasattr(self.algorithm, "p"):
-                    if self.algorithm.p != 2:
-                        raise ValueError(
-                            "DisSimLocal only supports squared Euclidean distances. If"
-                            " the provided NNAlgorithm has a `p` parameter it must be"
-                            f" set to p=2. Now it is p={self.algorithm.p}"
-                        )
-            elif self.algorithm.metric in ["sqeuclidean"]:
-                self.hubness.squared = True
-            else:
-                raise ValueError(
-                    "DisSimLocal only supports squared Euclidean distances, not"
-                    f" metric={self.algorithm.metric}."
-                )
-
     def fit(self, source, target=None) -> Kiez:
-        """Fits the algorithm and hubness reduction method
+        """Fits the algorithm and hubness reduction method.
 
         Parameters
         ----------
@@ -171,40 +144,20 @@ def fit(self, source, target=None) -> Kiez:
         Kiez
             Fitted kiez instance
         """
-        self.algorithm.fit(source, target)
-        if target is None:
-            target = source
-        if not isinstance(self.hubness, NoHubnessReduction):
-            neigh_dist_t_to_s, neigh_ind_t_to_s = self._kcandidates(
-                target,
-                s_to_t=False,
-                k=self.algorithm.n_candidates,
-                return_distance=True,
-            )
-            self.hubness.fit(
-                neigh_dist_t_to_s,
-                neigh_ind_t_to_s,
-                source,
-                target,
-                assume_sorted=False,
-            )
+        self.hubness.fit(source, target)
         return self
 
     def kneighbors(
         self,
-        source_query_points=None,
-        k=None,
+        k: Optional[int] = None,
         return_distance=True,
     ) -> Union[np.ndarray, Tuple[np.ndarray, np.ndarray]]:
-        """Retrieves the k-nearest neighbors using the supplied nearest neighbor algorithm and hubness reduction method.
+        """Retrieve the k-nearest neighbors using the supplied nearest neighbor algorithm and hubness reduction method.
 
         Parameters
         ----------
-        source_query_points : matrix of shape (n_samples, n_features), default = None
-            subset of source entity embeddings
-            if `None` all source entities are used for querying
-        k : int, default = None
-            number of nearest neighbors to search for
+        k : Optional[int], default = None
+            k-nearest neighbors, if None is set to number of n_candidates
         return_distance : bool, default = True
             Whether to return distances
             If `False` only indices are returned
@@ -217,33 +170,7 @@ def kneighbors(
         neigh_ind : ndarray of shape (n_queries, n_neighbors)
             Indices of the nearest points in the population matrix.
         """
-        # function loosely adapted from skhubness: https://github.com/VarIr/scikit-hubness
-
-        if k is None:
-            n_neighbors = self.n_neighbors
-        else:
-            n_neighbors = k
-        # First obtain candidate neighbors
-        query_dist, query_ind = self._kcandidates(
-            source_query_points, return_distance=True
-        )
-        query_dist = np.atleast_2d(query_dist)
-        query_ind = np.atleast_2d(query_ind)
-
-        # Second, reduce hubness
-        hubness_reduced_query_dist, query_ind = self.hubness.transform(
-            query_dist,
-            query_ind,
-            source_query_points,
-            assume_sorted=True,
-        )
-        # Third, sort hubness reduced candidate neighbors to get the final k neighbors
-        kth = np.arange(n_neighbors)
-        mask = np.argpartition(hubness_reduced_query_dist, kth=kth)[:, :n_neighbors]
-        hubness_reduced_query_dist = np.take_along_axis(
-            hubness_reduced_query_dist, mask, axis=1
-        )
-        query_ind = np.take_along_axis(query_ind, mask, axis=1)
+        hubness_reduced_query_dist, query_ind = self.hubness.kneighbors(k)
 
         if return_distance:
             result = hubness_reduced_query_dist, query_ind
diff --git a/kiez/neighbors/approximate/faiss.py b/kiez/neighbors/approximate/faiss.py
index f95e1fa..72d0be9 100644
--- a/kiez/neighbors/approximate/faiss.py
+++ b/kiez/neighbors/approximate/faiss.py
@@ -10,19 +10,12 @@
     import faiss
 except ImportError:  # pragma: no cover
     faiss = None
-try:
-    import autofaiss
-except ImportError:  # pragma: no cover
-    autofaiss = None
 
 
 class Faiss(NNAlgorithm):
     """Wrapper for `faiss` library.
 
     Faiss implements a number of (A)NN algorithms and enables the use of GPUs.
-    If it is installed and you let it, kiez utilizes the `autofaiss` package to
-    find the appropriate indexing structure and optimizes the hyperparameters of
-    the algorithm
 
     Parameters
     ----------
@@ -51,14 +44,8 @@ class Faiss(NNAlgorithm):
     >>> k_inst = Kiez(algorithm="Faiss")
     >>> k_inst.fit(source, target)
 
-    get info about selected indices
-
-    >>> k_inst.algorithm.source_index_infos["index_key"]
-    'HNSW15'
-
     >>> k_inst = Kiez(algorithm="Faiss",algorithm_kwargs={"metric":"euclidean","index_key":"Flat"})
 
-
     supply hyperparameters for indexing algorithm
 
     >>> k_inst = Kiez(algorithm="Faiss",algorithm_kwargs={"index_key":"HNSW32","index_param":"efSearch=16383"})
@@ -66,7 +53,6 @@ class Faiss(NNAlgorithm):
     Notes
     -----
     For details about configuring faiss consult their wiki: https://github.com/facebookresearch/faiss/wiki
-    For details about autofaiss see their documentation: https://criteo.github.io/autofaiss/
     """
 
     valid_metrics = ["l2", "euclidean"]
@@ -76,7 +62,7 @@ def __init__(
         self,
         n_candidates: int = 5,
         metric: str = "l2",
-        index_key: Optional[str] = None,
+        index_key: str = "Flat",
         index_param: Optional[str] = None,
         use_gpu: bool = False,
         verbose: int = logging.WARNING,
@@ -97,118 +83,37 @@ def __init__(
         else:
             self.space = metric
         super().__init__(n_candidates=n_candidates, metric=metric, n_jobs=None)
-        use_auto_tune = autofaiss is not None
-        # check index string
-        if index_key:
-            try:
-                faiss.index_factory(1, index_key)
-            except RuntimeError as exc:
-                raise ValueError(
-                    f'Could not parse index "{index_key}".\n Please consult the faiss'
-                    " wiki to create a correct instruction:"
-                    " https://github.com/facebookresearch/faiss/wiki/The-index-factory"
-                ) from exc
-            # user seems to know what they want so no tuning
-            if index_param or index_key == "Flat":
-                use_auto_tune = False
-        elif index_param:
-            warnings.warn(
-                "Index key not set but hyperparameter given. Are you sure this is"
-                " intended?"
-            )
-        else:
-            # no index and no hyperparams so check
-            # if autofaiss is available
-            if autofaiss is None:  # pragma: no cover
-                warnings.warn(
-                    "Please install the `autofaiss` package, to enable automatic index"
-                    " selection.\nYou can install `autofaiss` via: pip install"
-                    " autofaiss\n Will use `Flat` index for now, but there are probably"
-                    " better choices..."
-                )
-                use_auto_tune = False
         self.index_key = index_key
         self.index_param = index_param
-        self.use_auto_tune = use_auto_tune
         self.use_gpu = use_gpu
-        self.index_infos = None
         self.verbose = verbose
 
-    def _source_target_repr(self, is_source: bool):
-        ret_str = f"{self.__class__.__name__}(n_candidates={self.n_candidates},metric={self.metric},"
-        if is_source:
-            ret_str += (
-                f"index_key={self.source_index_key},"
-                f" index_param={{{self.source_index_param}}},"
-            )
-        else:
-            ret_str += (
-                f"index_key={self.target_index_key},"
-                f" index_param={{{self.target_index_param}}},"
-            )
-        ret_str += f"use_auto_tune={self.use_auto_tune}, use_gpu={self.use_gpu})"
-        return ret_str
-
     def __repr__(self):
-        if hasattr(self, "source_index_key") and hasattr(self, "target_index_key"):
-            ret_str = (
-                f"Source: {self._source_target_repr(True)}, "
-                f"Target: {self._source_target_repr(False)}"
-            )
-        elif hasattr(self, "source_index_key"):
-            ret_str = f"{self._source_target_repr(True)}"
-        else:
-            ret_str = (
-                f"{self.__class__.__name__}(n_candidates={self.n_candidates},"
-                + f"metric={self.metric},"
-                + f"index_key={self.index_key},"
-                + f"index_param={{{self.index_param}}},"
-                + f"use_auto_tune={self.use_auto_tune},"
-                + f"use_gpu={self.use_gpu})"
-            )
-        return ret_str
-
-    def _to_float32(self, data):
-        if not data.dtype == "float32":
-            return data.astype("float32")
-        return data
+        return (
+            f"{self.__class__.__name__}(n_candidates={self.n_candidates},"
+            + f"metric={self.metric},"
+            + f"index_key={self.index_key},"
+            + f"index_param={{{self.index_param}}},"
+            + f"use_gpu={self.use_gpu})"
+        )
 
     def _fit(self, data, is_source: bool):
         dim = data.shape[1]
-        if self.use_auto_tune:
-            index, index_infos = autofaiss.build_index(
-                self._to_float32(data),
-                index_key=self.index_key,
-                index_param=self.index_param,
-                metric_type=self.space,
-                save_on_disk=False,
-                use_gpu=self.use_gpu,
-                verbose=self.verbose,
-            )
-            if is_source:
-                self.source_index_key = index_infos["index_key"]
-                self.source_index_param = index_infos["index_param"]
-                self.source_index_infos = index_infos
-            else:
-                self.target_index_key = index_infos["index_key"]
-                self.target_index_param = index_infos["index_param"]
-                self.target_index_infos = index_infos
-        else:
-            index = faiss.index_factory(dim, self.index_key)
-            params = faiss.ParameterSpace()
-            if self.use_gpu:
-                index = faiss.index_cpu_to_all_gpus(index)
-                params = faiss.GpuParameterSpace()
-            if self.index_param is not None:
-                params.set_index_parameters(index, self.index_param)
-            index.add(self._to_float32(data))
+        index = faiss.index_factory(dim, self.index_key)
+        params = faiss.ParameterSpace()
+        if self.use_gpu:
+            index = faiss.index_cpu_to_all_gpus(index)
+            params = faiss.GpuParameterSpace()
+        if self.index_param is not None:
+            params.set_index_parameters(index, self.index_param)
+        index.add(data)
         return index
 
     def _kneighbors(self, k, query, index, return_distance, is_self_querying):
         if is_self_querying:
-            dist, ind = index.search(self._to_float32(self.source_), k)
+            dist, ind = index.search(self.source_, k)
         else:
-            dist, ind = index.search(self._to_float32(query), k)
+            dist, ind = index.search(query, k)
         if return_distance:
             if self.metric == "euclidean":
                 dist = np.sqrt(dist)
diff --git a/kiez/neighbors/exact/sklearn_nearest_neighbors.py b/kiez/neighbors/exact/sklearn_nearest_neighbors.py
index 55378c8..86a1c5b 100644
--- a/kiez/neighbors/exact/sklearn_nearest_neighbors.py
+++ b/kiez/neighbors/exact/sklearn_nearest_neighbors.py
@@ -5,7 +5,7 @@
 
 
 class SklearnNN(NNAlgorithm):
-    """Wrapper for scikit learn's NearestNeighbors class
+    """Wrapper for scikit learn's NearestNeighbors class.
 
     Parameters
     ----------
@@ -40,6 +40,7 @@ class SklearnNN(NNAlgorithm):
         The number of parallel jobs to run for neighbors search.
         ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.
         ``-1`` means using all processors.
+
     Notes
     -----
     See also scikit learn's guide: https://scikit-learn.org/stable/modules/neighbors.html#unsupervised-neighbors
diff --git a/kiez/neighbors/neighbor_algorithm_base.py b/kiez/neighbors/neighbor_algorithm_base.py
index 6f59e53..0e5f397 100644
--- a/kiez/neighbors/neighbor_algorithm_base.py
+++ b/kiez/neighbors/neighbor_algorithm_base.py
@@ -32,7 +32,12 @@ def valid_metrics(self):
     def _fit(self, data, is_source: bool):
         pass  # pragma: no cover
 
-    def fit(self, source: np.ndarray, target: np.ndarray = None):
+    def fit(
+        self,
+        source: np.ndarray,
+        target: np.ndarray = None,
+        only_fit_target: bool = False,
+    ):
         """Indexes the given data using the underlying algorithm
 
         Parameters
@@ -41,6 +46,10 @@ def fit(self, source: np.ndarray, target: np.ndarray = None):
             embeddings of source entities
         target : matrix of shape (m_samples, n_features)
             embeddings of target entities or None in a single-source use case
+        only_fit_target : bool
+            If true only indexes target. Will lead to problems later with many
+            hubness reduction methods and should mainly be used for search
+            without hubness reduction
 
         Raises
         ------
@@ -59,8 +68,11 @@ def fit(self, source: np.ndarray, target: np.ndarray = None):
                     f" but got source.shape: {source.shape} and target.shape:"
                     f" {target.shape}"
                 )
-            self.source_index = self._fit(source, True)
-            self.target_index = self._fit(target, False)
+            if only_fit_target:
+                self.target_index = self._fit(target, True)
+            else:
+                self.source_index = self._fit(source, True)
+                self.target_index = self._fit(target, False)
         self.source_ = source
         self.target_ = target
 
@@ -82,7 +94,7 @@ def _kneighbors(self, query, k, index, return_distance, is_self_querying):
         pass  # pragma: no cover
 
     def kneighbors(self, query=None, k=None, s_to_t=True, return_distance=True):
-        check_is_fitted(self, ["source_index", "target_index"])
+        check_is_fitted(self, ["source_index", "target_index"], all_or_any=any)
         k = self.n_candidates if k is None else k
         is_self_querying = query is None and self.source_equals_target
 
diff --git a/kiez/neighbors/util.py b/kiez/neighbors/util.py
index cbd34cd..56ce333 100644
--- a/kiez/neighbors/util.py
+++ b/kiez/neighbors/util.py
@@ -3,14 +3,14 @@
 from kiez.neighbors import NNAlgorithm, nn_algorithm_resolver
 
 
-def available_ann_algorithms() -> List[Type[NNAlgorithm]]:
-    """Get available approximate nearest neighbor algorithms
+def available_nn_algorithms() -> List[Type[NNAlgorithm]]:
+    """Get available (approximate) nearest neighbor algorithms
     Returns
     -------
     algorithms: List[Type[NNAlgorithm]]
         A tuple of available algorithms
     """
-    possible = ["NMSLIB", "NNG", "Annoy", "Faiss"]
+    possible = ["NMSLIB", "NNG", "Annoy", "Faiss", "SklearnNN"]
     available = []
     for ann in possible:
         try:
diff --git a/noxfile.py b/noxfile.py
index 2efcff3..3884ff6 100644
--- a/noxfile.py
+++ b/noxfile.py
@@ -8,7 +8,15 @@ def tests(session: Session) -> None:
     session.install(".")
     session.install("pytest")
     session.install("pytest-cov")
-    session.run("pytest", *args)
+    session.run(
+        "coverage",
+        "run",
+        "--source=kiez",
+        "--data-file=.coverage.base",
+        "-m",
+        "pytest",
+        *args
+    )
 
 
 @nox_session(python="3.10", venv_backend="conda")
@@ -21,7 +29,15 @@ def test_faiss(session: Session) -> None:
     session.install("autofaiss")
     session.install("pytest")
     session.install("pytest-cov")
-    session.run("pytest", *args)
+    session.run(
+        "coverage",
+        "run",
+        "--source=kiez",
+        "--data-file=.coverage.faiss",
+        "-m",
+        "pytest",
+        *args
+    )
 
 
 @session(python="3.10")
@@ -30,7 +46,15 @@ def test_ngt(session: Session) -> None:
     session.install(".[ngt]")
     session.install("pytest")
     session.install("pytest-cov")
-    session.run("pytest", *args)
+    session.run(
+        "coverage",
+        "run",
+        "--source=kiez",
+        "--data-file=.coverage.ngt",
+        "-m",
+        "pytest",
+        *args
+    )
 
 
 @session(python="3.10")
@@ -39,7 +63,15 @@ def test_nmslib(session: Session) -> None:
     session.install(".[nmslib]")
     session.install("pytest")
     session.install("pytest-cov")
-    session.run("pytest", *args)
+    session.run(
+        "coverage",
+        "run",
+        "--source=kiez",
+        "--data-file=.coverage.nmslib",
+        "-m",
+        "pytest",
+        *args
+    )
 
 
 @session(python="3.10")
@@ -48,7 +80,15 @@ def test_annoy(session: Session) -> None:
     session.install(".[annoy]")
     session.install("pytest")
     session.install("pytest-cov")
-    session.run("pytest", *args)
+    session.run(
+        "coverage",
+        "run",
+        "--source=kiez",
+        "--data-file=.coverage.annoy",
+        "-m",
+        "pytest",
+        *args
+    )
 
 
 locations = ["kiez", "tests", "noxfile.py"]
diff --git a/pyproject.toml b/pyproject.toml
index 28c708f..0706cf4 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -66,7 +66,7 @@ requires = ["poetry-core>=1.0.0"]
 build-backend = "poetry.core.masonry.api"
 
 [tool.flake8]
-ignore = "E203, E266, E501, W503, F403, F401, B950, B905"
+ignore = "E203, E266, E501, W503, F403, F401, B950, B905, C408"
 max-line-length = 88
 max-complexity = 18
 select = "B,C,E,F,W,T4,B9"
diff --git a/tests/conftest.py b/tests/conftest.py
new file mode 100644
index 0000000..5c7cfc3
--- /dev/null
+++ b/tests/conftest.py
@@ -0,0 +1,11 @@
+import numpy as np
+import pytest
+
+
+@pytest.fixture(scope="session", autouse=True)
+def source_target(request):
+    rng = np.random.RandomState(42)
+    n_samples = 20
+    n_samples2 = 50
+    n_features = 5
+    return rng.rand(n_samples, n_features), rng.rand(n_samples2, n_features)
diff --git a/tests/hubness_reduction/test_mutual_proximity.py b/tests/hubness_reduction/test_mutual_proximity.py
deleted file mode 100644
index bf1eb75..0000000
--- a/tests/hubness_reduction/test_mutual_proximity.py
+++ /dev/null
@@ -1,25 +0,0 @@
-import numpy as np
-import pytest
-from numpy.testing import assert_array_equal
-
-from kiez import Kiez
-from kiez.hubness_reduction import MutualProximity
-
-rng = np.random.RandomState(2)
-
-
-def test_wrong_input():
-    with pytest.raises(ValueError) as exc_info:
-        MutualProximity(method="wrong")
-    assert "not recognized" in str(exc_info.value)
-
-
-def test_sqeuclidean(n_samples=20, n_features=5):
-    source = rng.rand(n_samples, n_features)
-    target = rng.rand(n_samples, n_features)
-    k_inst = Kiez(hubness=MutualProximity())
-    k_inst.fit(source, target)
-    ndist, nind = k_inst.kneighbors(k=1)
-    out_dist, out_nind = k_inst.hubness.transform(ndist, nind, None)
-    assert_array_equal(ndist, out_dist)
-    assert_array_equal(nind, out_nind)
diff --git a/tests/hubness_reduction/test_wrong_inputs.py b/tests/hubness_reduction/test_wrong_inputs.py
new file mode 100644
index 0000000..f88a908
--- /dev/null
+++ b/tests/hubness_reduction/test_wrong_inputs.py
@@ -0,0 +1,21 @@
+import numpy as np
+import pytest
+from numpy.testing import assert_array_equal
+
+from kiez import Kiez
+from kiez.hubness_reduction import LocalScaling, MutualProximity
+from kiez.neighbors import SklearnNN
+
+rng = np.random.RandomState(2)
+
+
+def test_wrong_input_mp():
+    with pytest.raises(ValueError) as exc_info:
+        MutualProximity(nn_algo=SklearnNN(), method="wrong")
+    assert "not recognized" in str(exc_info.value)
+
+
+def test_wrong_input_ls():
+    with pytest.raises(ValueError) as exc_info:
+        LocalScaling(nn_algo=SklearnNN(), method="wrong")
+    assert "Invalid" in str(exc_info.value)
diff --git a/tests/neighbors/test_alignment.py b/tests/neighbors/test_alignment.py
deleted file mode 100644
index fe6ac14..0000000
--- a/tests/neighbors/test_alignment.py
+++ /dev/null
@@ -1,178 +0,0 @@
-import numpy as np
-import pytest
-from numpy.testing import assert_array_almost_equal, assert_array_equal
-
-from kiez import Kiez
-from kiez.hubness_reduction import CSLS, DisSimLocal, LocalScaling, MutualProximity
-from kiez.neighbors import Annoy, Faiss, SklearnNN
-from kiez.neighbors.util import available_ann_algorithms
-
-P = (1, 3, 4, np.inf, 2)  # Euclidean last, for tests against approx NN
-rng = np.random.RandomState(2)
-APPROXIMATE_ALGORITHMS = available_ann_algorithms()
-
-MP = [MutualProximity(method=method) for method in ["normal", "empiric"]]
-LS = [LocalScaling(method=method) for method in ["standard", "nicdm"]]
-DSL = [DisSimLocal(squared=val) for val in [True, False]]
-HUBNESS = [None, CSLS(), *MP, *LS, *DSL]
-
-
-@pytest.mark.parametrize("hubness", HUBNESS)
-def test_alignment_source_equals_target(
-    hubness,
-    n_samples=20,
-    n_features=5,
-    n_query_pts=10,
-    n_neighbors=5,
-):
-    source = rng.rand(n_samples, n_features)
-    query = rng.rand(n_query_pts, n_features)
-    exactalgos = [
-        SklearnNN(n_candidates=n_neighbors, algorithm=algo)
-        for algo in ["auto", "kd_tree", "ball_tree", "brute"]
-    ]
-
-    if Faiss in APPROXIMATE_ALGORITHMS:
-        exactalgos.append(
-            Faiss(n_candidates=n_neighbors, metric="euclidean", index_key="Flat")
-        )
-
-    for p in P:
-        results = []
-        results_nodist = []
-
-        for algo in exactalgos:
-            if hubness == "dsl" and p != 2:
-                with pytest.raises(ValueError):
-                    align = Kiez(
-                        n_neighbors=n_neighbors, algorithm=algo, hubness=hubness
-                    )
-                    continue
-            align = Kiez(n_neighbors=n_neighbors, algorithm=algo, hubness=hubness)
-            align.fit(source)
-            results.append(
-                align.kneighbors(source_query_points=query, return_distance=True)
-            )
-            results_nodist.append(
-                align.kneighbors(source_query_points=query, return_distance=False)
-            )
-        for i in range(len(results) - 1):
-            assert_array_almost_equal(results_nodist[i], results[i][1])
-            assert_array_almost_equal(results[i][0], results[i + 1][0], decimal=3)
-            assert_array_almost_equal(results[i][1], results[i + 1][1])
-    # Test approximate NN against exact NN with Euclidean distances
-    assert p == 2, f"Internal: last parameter p={p}, should have been 2"
-
-    ann_algos = [
-        algo_cls(n_candidates=n_neighbors, metric="euclidean")
-        for algo_cls in APPROXIMATE_ALGORITHMS
-    ]
-    for algo in ann_algos:
-        align = Kiez(
-            n_neighbors=n_neighbors,
-            algorithm=algo,
-            hubness=hubness,
-        )
-        align.fit(source)
-        results_approx = align.kneighbors(
-            source_query_points=query, return_distance=True
-        )
-        results_approx_nodist = align.kneighbors(
-            source_query_points=query, return_distance=False
-        )
-        assert_array_equal(results_approx_nodist, results_approx[1])
-        if isinstance(algo, Annoy):  # quite imprecise
-            assert_array_almost_equal(results_approx[0], results[1][0], decimal=0)
-            for i in range(len(results_approx[1])):
-                assert np.intersect1d(results_approx[1][i], results[1][1][i]).size >= 1
-        else:
-            assert_array_almost_equal(results_approx[0], results[1][0], decimal=3)
-            for ra, r in zip(results_approx[1], results[1][1]):
-                assert set(ra) == set(r)
-
-
-@pytest.mark.parametrize("hubness", HUBNESS)
-def test_alignment(
-    hubness,
-    n_samples=20,
-    n_features=5,
-    n_query_pts=10,
-    n_neighbors=5,
-):
-    source = rng.rand(n_query_pts, n_features)
-    target = rng.rand(n_samples, n_features)
-
-    exactalgos = [
-        SklearnNN(n_candidates=n_neighbors, algorithm=algo)
-        for algo in ["auto", "kd_tree", "ball_tree", "brute"]
-    ]
-    if Faiss in APPROXIMATE_ALGORITHMS:
-        exactalgos.append(
-            Faiss(n_candidates=n_neighbors, metric="euclidean", index_key="Flat")
-        )
-
-    for p in P:
-        results = []
-        results_nodist = []
-
-        for algo in exactalgos:
-            if hubness == "dsl" and p != 2:
-                with pytest.raises(ValueError):
-                    align = Kiez(
-                        n_neighbors=n_neighbors, algorithm=algo, hubness=hubness
-                    )
-                    continue
-            align = Kiez(n_neighbors=n_neighbors, algorithm=algo, hubness=hubness)
-            align.fit(source, target)
-            results.append(align.kneighbors(return_distance=True))
-            results_nodist.append(align.kneighbors(return_distance=False))
-        for i in range(len(results) - 1):
-            try:
-                assert_array_almost_equal(results_nodist[i], results[i][1])
-                assert_array_almost_equal(results[i][0], results[i + 1][0])
-                assert_array_almost_equal(results[i][1], results[i + 1][1])
-            except AssertionError as error:
-                # empiric mp with ball tree can give slightly different results
-                # because slight differences in distance provided by ball_tree
-                if not (
-                    isinstance(hubness, MutualProximity) and hubness.method == "empiric"
-                ):
-                    raise error
-    # Test approximate NN against exact NN with Euclidean distances
-    assert p == 2, f"Internal: last parameter p={p}, should have been 2"
-    ann_algos = [
-        algo_cls(n_candidates=n_neighbors, metric="euclidean")
-        for algo_cls in APPROXIMATE_ALGORITHMS
-    ]
-    for algo in ann_algos:
-        align = Kiez(
-            n_neighbors=n_neighbors,
-            algorithm=algo,
-            hubness=hubness,
-        )
-        align.fit(source, target)
-        results_approx = align.kneighbors(
-            source_query_points=source, return_distance=True
-        )
-        results_approx_nodist = align.kneighbors(
-            source_query_points=source, return_distance=False
-        )
-        assert_array_equal(results_approx_nodist, results_approx[1])
-        if isinstance(algo, Annoy):  # quite imprecise
-            assert_array_almost_equal(results_approx[0], results[1][0], decimal=0)
-            for i in range(len(results_approx[1])):
-                try:
-                    assert (
-                        np.intersect1d(results_approx[1][i], results[1][1][i]).size >= 1
-                    ), f"{algo} failed with {hubness}"
-                except AssertionError as error:
-                    # empiric mp with ball tree can give slightly different results
-                    # because slight differences in distance provided by ball_tree
-                    if not (
-                        isinstance(hubness, MutualProximity)
-                        and hubness.method == "empiric"
-                    ):
-                        raise error
-        else:
-            for ra, r in zip(results_approx[1], results[1][1]):
-                assert set(ra) == set(r), f"{algo} failed with {hubness}"
diff --git a/tests/neighbors/test_annoy.py b/tests/neighbors/test_annoy.py
index af86564..76511ad 100644
--- a/tests/neighbors/test_annoy.py
+++ b/tests/neighbors/test_annoy.py
@@ -3,10 +3,10 @@
 from numpy.testing import assert_array_equal
 
 from kiez.neighbors import Annoy
-from kiez.neighbors.util import available_ann_algorithms
+from kiez.neighbors.util import available_nn_algorithms
 
-APPROXIMATE_ALGORITHMS = available_ann_algorithms()
-if Annoy not in APPROXIMATE_ALGORITHMS:
+NN_ALGORITHMS = available_nn_algorithms()
+if Annoy not in NN_ALGORITHMS:
     skip = True
 else:
     skip = False
@@ -30,8 +30,8 @@ def test_minkowski_metric():
 
 
 @pytest.mark.skipif(skip, reason=skip_reason)
-def test_self_query(n_samples=20, n_features=5, n_neighbors=5):
-    source = rng.rand(n_samples, n_features)
+def test_self_query(source_target, n_neighbors=5):
+    source, _ = source_target
     annoy = Annoy(n_candidates=n_neighbors)
     annoy.fit(source, source)
     d, i = annoy.kneighbors()
@@ -40,49 +40,28 @@ def test_self_query(n_samples=20, n_features=5, n_neighbors=5):
 
 
 @pytest.mark.skipif(skip, reason=skip_reason)
-def test_query(tmp_path, n_samples=20, n_features=5, n_neighbors=5):
-    source = rng.rand(n_samples, n_features)
-    target = rng.rand(n_samples, n_features)
+def test_query(tmp_path, source_target, n_neighbors=5):
+    source, target = source_target
     annoy = Annoy(n_candidates=n_neighbors, metric="euclidean")
     annoy.fit(source, target)
-    d, i = annoy.kneighbors(
-        query=source[
-            :5,
-        ]
-    )
+    d, i = annoy.kneighbors()
     annoy2 = Annoy(n_candidates=n_neighbors, metric="minkowski")
     annoy2.fit(source, target)
-    i2 = annoy2.kneighbors(
-        query=source[
-            :5,
-        ],
-        return_distance=False,
-    )
+    i2 = annoy2.kneighbors(return_distance=False)
     assert_array_equal(i, i2)
     annoy3 = Annoy(n_candidates=n_neighbors, mmap_dir=str(tmp_path))
     annoy3.fit(source, target)
-    i3 = annoy3.kneighbors(
-        query=source[
-            :5,
-        ],
-        return_distance=False,
-    )
+    i3 = annoy3.kneighbors(return_distance=False)
     assert_array_equal(i, i3)
     annoy4 = Annoy(n_candidates=n_neighbors, mmap_dir=None)
     annoy4.fit(source, target)
-    i4 = annoy4.kneighbors(
-        query=source[
-            :5,
-        ],
-        return_distance=False,
-    )
+    i4 = annoy4.kneighbors(return_distance=False)
     assert_array_equal(i, i4)
 
 
 @pytest.mark.skipif(skip, reason=skip_reason)
-def test_inner_kneighbors(tmp_path, n_samples=20, n_features=5, n_neighbors=5):
-    source = rng.rand(n_samples, n_features)
-    target = rng.rand(n_samples, n_features)
+def test_inner_kneighbors(tmp_path, source_target, n_neighbors=5):
+    source, target = source_target
     annoy = Annoy(n_candidates=n_neighbors)
     annoy.fit(source, target)
     with pytest.raises(AssertionError) as exc_info:
diff --git a/tests/neighbors/test_base.py b/tests/neighbors/test_base.py
index 4ca55ff..7812b8b 100644
--- a/tests/neighbors/test_base.py
+++ b/tests/neighbors/test_base.py
@@ -2,17 +2,16 @@
 import pytest
 
 from kiez.neighbors import NMSLIB, NNG, Annoy, Faiss, SklearnNN
-from kiez.neighbors.util import available_ann_algorithms
+from kiez.neighbors.util import available_nn_algorithms
 
-APPROXIMATE_ALGORITHMS = available_ann_algorithms()
-ALGORITHMS = [*APPROXIMATE_ALGORITHMS, SklearnNN]
+NN_ALGORITHMS = available_nn_algorithms()
 
 rng = np.random.RandomState(2)
 
 
-@pytest.mark.parametrize("algo_cls", ALGORITHMS)
-def test_str_rep(algo_cls, n_samples=20, n_features=5):
-    source = rng.rand(n_samples, n_features)
+@pytest.mark.parametrize("algo_cls", NN_ALGORITHMS)
+def test_str_rep(algo_cls, source_target):
+    source, _ = source_target
     algo = algo_cls()
     assert "is unfitted" in str(algo._describe_source_target_fitted())
     algo.fit(source, source)
diff --git a/tests/neighbors/test_faiss.py b/tests/neighbors/test_faiss.py
index 417ac6a..014f1ac 100644
--- a/tests/neighbors/test_faiss.py
+++ b/tests/neighbors/test_faiss.py
@@ -3,10 +3,10 @@
 from numpy.testing import assert_array_equal
 
 from kiez.neighbors import Faiss
-from kiez.neighbors.util import available_ann_algorithms
+from kiez.neighbors.util import available_nn_algorithms
 
-APPROXIMATE_ALGORITHMS = available_ann_algorithms()
-if Faiss not in APPROXIMATE_ALGORITHMS:
+NN_ALGORITHMS = available_nn_algorithms()
+if Faiss not in NN_ALGORITHMS:
     skip = True
 else:
     skip = False
@@ -14,10 +14,8 @@
 
 @pytest.mark.skipif(skip, reason="Faiss not installed")
 @pytest.mark.parametrize("single_source", [True, False])
-def test_different_instantiations(single_source):
-    rng = np.random.RandomState(2)
-    source = rng.rand(50, 100)
-    target = rng.rand(50, 100)
+def test_different_instantiations(single_source, source_target):
+    source, target = source_target
     for same_config in [
         (
             {"metric": "l2"},
diff --git a/tests/neighbors/test_hnsw.py b/tests/neighbors/test_hnsw.py
index 8b7768d..539ddcf 100644
--- a/tests/neighbors/test_hnsw.py
+++ b/tests/neighbors/test_hnsw.py
@@ -3,10 +3,10 @@
 from numpy.testing import assert_array_equal
 
 from kiez.neighbors import NMSLIB
-from kiez.neighbors.util import available_ann_algorithms
+from kiez.neighbors.util import available_nn_algorithms
 
-APPROXIMATE_ALGORITHMS = available_ann_algorithms()
-if NMSLIB not in APPROXIMATE_ALGORITHMS:
+NN_ALGORITHMS = available_nn_algorithms()
+if NMSLIB not in NN_ALGORITHMS:
     skip = True
 else:
     skip = False
@@ -23,44 +23,24 @@ def test_wrong_metric():
 
 
 @pytest.mark.skipif(skip, reason=skip_reason)
-def test_sqeuclidean(n_samples=20, n_features=5, n_neighbors=5):
-    source = rng.rand(n_samples, n_features)
-    target = rng.rand(n_samples, n_features)
+def test_sqeuclidean(source_target, n_neighbors=5):
+    source, target = source_target
     hnsw1 = NMSLIB(n_candidates=n_neighbors, metric="sqeuclidean")
     hnsw1.fit(source, target)
-    d, i = hnsw1.kneighbors(
-        query=source[
-            :5,
-        ]
-    )
+    d, i = hnsw1.kneighbors()
     hnsw2 = NMSLIB(n_candidates=n_neighbors)
     hnsw2.fit(source, target)
-    i2 = hnsw2.kneighbors(
-        query=source[
-            :5,
-        ],
-        return_distance=False,
-    )
+    i2 = hnsw2.kneighbors(return_distance=False)
     assert_array_equal(i, i2)
 
 
 @pytest.mark.skipif(skip, reason=skip_reason)
-def test_cosine(n_samples=20, n_features=5, n_neighbors=5):
-    source = rng.rand(n_samples, n_features)
-    target = rng.rand(n_samples, n_features)
+def test_cosine(source_target, n_neighbors=5):
+    source, target = source_target
     hnsw1 = NMSLIB(n_candidates=n_neighbors, metric="cosine")
     hnsw1.fit(source, target)
-    d, i = hnsw1.kneighbors(
-        query=source[
-            :5,
-        ]
-    )
+    d, i = hnsw1.kneighbors()
     hnsw2 = NMSLIB(n_candidates=n_neighbors, metric="cosinesimil")
     hnsw2.fit(source, target)
-    i2 = hnsw2.kneighbors(
-        query=source[
-            :5,
-        ],
-        return_distance=False,
-    )
+    i2 = hnsw2.kneighbors(return_distance=False)
     assert_array_equal(i, i2)
diff --git a/tests/neighbors/test_nng.py b/tests/neighbors/test_nng.py
index bc65ac4..1cba671 100644
--- a/tests/neighbors/test_nng.py
+++ b/tests/neighbors/test_nng.py
@@ -3,10 +3,10 @@
 from numpy.testing import assert_array_equal
 
 from kiez.neighbors import NNG
-from kiez.neighbors.util import available_ann_algorithms
+from kiez.neighbors.util import available_nn_algorithms
 
-APPROXIMATE_ALGORITHMS = available_ann_algorithms()
-if NNG not in APPROXIMATE_ALGORITHMS:
+NN_ALGORITHMS = available_nn_algorithms()
+if NNG not in NN_ALGORITHMS:
     skip = True
 else:
     skip = False
@@ -24,8 +24,8 @@ def test_wrong_metric():
 
 
 @pytest.mark.skipif(skip, reason=skip_reason)
-def test_wrong_dir(n_samples=20, n_features=5):
-    source = rng.rand(n_samples, n_features)
+def test_wrong_dir(source_target):
+    source, _ = source_target
     with pytest.raises(TypeError) as exc_info:
         nng = NNG(index_dir=1)
         nng.fit(source)
@@ -33,26 +33,24 @@ def test_wrong_dir(n_samples=20, n_features=5):
 
 
 @pytest.mark.skipif(skip, reason=skip_reason)
-def test_right_dir(tmp_path, n_samples=20, n_features=5):
-    source = rng.rand(n_samples, n_features)
-    target = rng.rand(n_samples, n_features)
+def test_right_dir(tmp_path, source_target):
+    source, target = source_target
     nng = NNG(index_dir=str(tmp_path))
     nng.fit(source, target)
     assert nng is not None
 
 
 @pytest.mark.skipif(skip, reason=skip_reason)
-def test_none_dir(n_samples=20, n_features=5):
-    source = rng.rand(n_samples, n_features)
-    target = rng.rand(n_samples, n_features)
+def test_none_dir(source_target):
+    source, target = source_target
     nng = NNG(index_dir=None)
     nng.fit(source, target)
     assert nng is not None
 
 
 @pytest.mark.skipif(skip, reason=skip_reason)
-def test_self_query(n_samples=20, n_features=5, n_neighbors=5):
-    source = rng.rand(n_samples, n_features)
+def test_self_query(source_target, n_neighbors=5):
+    source, _ = source_target
     nng = NNG(index_dir=None, n_candidates=n_neighbors, epsilon=0.00001)
     nng.fit(source, source)
     d, i = nng.kneighbors()
@@ -61,42 +59,22 @@ def test_self_query(n_samples=20, n_features=5, n_neighbors=5):
 
 
 @pytest.mark.skipif(skip, reason=skip_reason)
-def test_query(n_samples=20, n_features=5, n_neighbors=5):
-    source = rng.rand(n_samples, n_features)
-    target = rng.rand(n_samples, n_features)
+def test_query(source_target, n_neighbors=5):
+    source, target = source_target
     nng = NNG(index_dir=None, n_candidates=n_neighbors, epsilon=0.00001)
     nng.fit(source, target)
-    d, i = nng.kneighbors(
-        query=source[
-            :5,
-        ]
-    )
-    i2 = nng.kneighbors(
-        query=source[
-            :5,
-        ],
-        return_distance=False,
-    )
+    d, i = nng.kneighbors()
+    i2 = nng.kneighbors(return_distance=False)
     assert_array_equal(i, i2)
 
 
 @pytest.mark.skipif(skip, reason=skip_reason)
-def test_sqeuclidean(n_samples=20, n_features=5, n_neighbors=5):
-    source = rng.rand(n_samples, n_features)
-    target = rng.rand(n_samples, n_features)
+def test_sqeuclidean(source_target, n_neighbors=5):
+    source, target = source_target
     nng1 = NNG(index_dir=None, n_candidates=n_neighbors, metric="sqeuclidean")
     nng1.fit(source, target)
-    d, i = nng1.kneighbors(
-        query=source[
-            :5,
-        ]
-    )
+    d, i = nng1.kneighbors()
     nng2 = NNG(index_dir=None, n_candidates=n_neighbors)
     nng2.fit(source, target)
-    i2 = nng2.kneighbors(
-        query=source[
-            :5,
-        ],
-        return_distance=False,
-    )
+    i2 = nng2.kneighbors(return_distance=False)
     assert_array_equal(i, i2)
diff --git a/tests/neighbors/test_sklearn.py b/tests/neighbors/test_sklearn.py
index 8154736..8aec387 100644
--- a/tests/neighbors/test_sklearn.py
+++ b/tests/neighbors/test_sklearn.py
@@ -6,8 +6,8 @@
 rng = np.random.RandomState(2)
 
 
-def test_self_query(n_samples=20, n_features=5, n_neighbors=5):
-    source = rng.rand(n_samples, n_features)
+def test_self_query(source_target, n_neighbors=5):
+    source, _ = source_target
     sklearnnn = SklearnNN()
     sklearnnn.fit(source, source)
     d, i = sklearnnn.kneighbors()
diff --git a/tests/test_kiez.py b/tests/test_kiez.py
index 367e20e..78bafe2 100644
--- a/tests/test_kiez.py
+++ b/tests/test_kiez.py
@@ -1,174 +1,124 @@
 import pathlib
 from unittest import mock
 
-import numpy as np
 import pytest
-from numpy.testing import assert_array_equal
-from sklearn.neighbors import NearestNeighbors
 
 from kiez import Kiez
-from kiez.hubness_reduction import (
-    DisSimLocal,
-    HubnessReduction,
-    LocalScaling,
-    NoHubnessReduction,
-)
+from kiez.hubness_reduction import HubnessReduction, LocalScaling
 from kiez.neighbors import NMSLIB, NNAlgorithm, SklearnNN
-from kiez.neighbors.util import available_ann_algorithms
+from kiez.neighbors.util import available_nn_algorithms
+
+NN_ALGORITHMS = available_nn_algorithms()
+
+MP = [("MutualProximity", dict(method=method)) for method in ["normal", "empiric"]]
+LS = [("LocalScaling", dict(method=method)) for method in ["standard", "nicdm"]]
+DSL = [("DisSimLocal", dict(squared=val)) for val in [True, False]]
+HUBNESS_AND_KWARGS = [(None, {}), ("CSLS", {}), *MP, *LS, *DSL]
 
-APPROXIMATE_ALGORITHMS = available_ann_algorithms()
 
 HERE = pathlib.Path(__file__).parent.resolve()
-rng = np.random.RandomState(2)
-
-
-class CustomHubness(HubnessReduction):
-    """Test class to make sure user created classes work"""
-
-    def __init__(self, **kwargs):
-        super().__init__(**kwargs)
-
-    def fit(self, *args, **kwargs):
-        pass  # pragma: no cover
-
-    def __repr__(self):
-        return "NoHubnessReduction"
-
-    def transform(
-        self,
-        neigh_dist,
-        neigh_ind,
-        query,
-        assume_sorted=True,
-        return_distance=True,
-        *args,
-        **kwargs,
-    ):
-        if return_distance:
-            return neigh_dist, neigh_ind
-        else:
-            return neigh_ind
-
-
-class CustomAlgorithm(NNAlgorithm):
-    """Test class to make sure user created classes work"""
-
-    valid_metrics = ["minkowski"]
-
-    def __init__(
-        self,
-        n_candidates=5,
-        algorithm="auto",
-        leaf_size=30,
-        metric="minkowski",
-        p=2,
-        metric_params=None,
-        n_jobs=None,
-    ):
-        super().__init__(n_candidates=n_candidates, metric=metric, n_jobs=n_jobs)
-        self.algorithm = algorithm
-        self.leaf_size = leaf_size
-        self.p = p
-        self.metric_params = metric_params
-
-    def _fit(self, data, is_source: bool):
-        nn = NearestNeighbors(
-            n_neighbors=self.n_candidates,
-            algorithm=self.algorithm,
-            leaf_size=self.leaf_size,
-            metric=self.metric,
-            p=self.p,
-            metric_params=self.metric_params,
-            n_jobs=self.n_jobs,
-        )
-        nn.fit(data)
-        return nn
-
-    def _kneighbors(self, k, query, index, return_distance, is_self_querying):
-        if is_self_querying:
-            return index.kneighbors(
-                X=None, n_neighbors=k, return_distance=return_distance
-            )
-        return index.kneighbors(X=query, n_neighbors=k, return_distance=return_distance)
-
-
-def test_hubness_resolver(n_samples=20, n_features=5):
-    source = rng.rand(n_samples, n_features)
-    target = rng.rand(n_samples, n_features)
-    res = []
-    for algo in [
-        SklearnNN(),
-        SklearnNN,
-        "SklearnNN",
-        CustomAlgorithm,
-        CustomAlgorithm(),
-    ]:
-        for hub in [
-            NoHubnessReduction(),
-            NoHubnessReduction,
-            None,
-            "NoHubnessReduction",
-            CustomHubness,
-            CustomHubness(),
-        ]:
-            k_inst = Kiez(algorithm=algo, hubness=hub)
-            k_inst.fit(source, target)
-            res.append(k_inst.kneighbors(source, k=1))
-    for i in range(len(res) - 1):
-        assert_array_equal(res[i][0], res[i + 1][0])
-        assert_array_equal(res[i][1], res[i + 1][1])
-
-
-def test_wrong_kcandidates(n_samples=20, n_features=5):
-    source = rng.rand(n_samples, n_features)
-    target = rng.rand(n_samples, n_features)
-    k_inst = Kiez()
-    k_inst.fit(source, target)
-    nn_ind = k_inst._kcandidates(source, k=1, return_distance=False)
-    assert nn_ind.shape == (20, 5)
 
 
-def test_non_default_kneighbors(n_samples=20, n_features=5):
-    source = rng.rand(n_samples, n_features)
-    target = rng.rand(n_samples, n_features)
-    k_inst = Kiez()
+def test_no_hub(source_target):
+    source, target = source_target
+    n_cand = 10
+    k_inst = Kiez(n_candidates=n_cand)
     k_inst.fit(source, target)
-    nn_ind = k_inst.kneighbors(source, k=1, return_distance=False)
-    assert nn_ind.shape == (20, 1)
+    # check only created target index
+    assert not hasattr(k_inst.algorithm, "source_index")
+    k_inst.algorithm = SklearnNN()
+    assert "f{k_inst}"
+    assert (
+        Kiez(
+            n_candidates=n_cand,
+            algorithm="SklearnNN",
+            algorithm_kwargs=dict(metric="minkowski"),
+        ).algorithm.n_candidates
+        == n_cand
+    )
+
+
+def assert_different_neighbors(k_inst, n_cand):
+    dist, neigh = k_inst.kneighbors()
+    assert neigh.shape[1] == n_cand
+    assert dist.shape[1] == n_cand
+
+    neigh = k_inst.kneighbors(return_distance=False)
+    assert neigh.shape[1] == n_cand
+
+    dist, neigh = k_inst.kneighbors(k=1)
+    assert neigh.shape[1] == 1
+    assert dist.shape[1] == 1
+
+    dist, neigh = k_inst.kneighbors(k=20)
+    assert neigh.shape[1] == n_cand
+    assert dist.shape[1] == n_cand
+
+
+@pytest.mark.parametrize("algo", NN_ALGORITHMS)
+def test_algo_resolver(source_target, algo, n_cand=5):
+    source, target = source_target
+    k_inst = Kiez(algorithm=algo, n_candidates=n_cand)
+    k_inst.fit(source, target)
+    assert_different_neighbors(k_inst, n_cand)
+
+
+@pytest.mark.parametrize("hub,hubkwargs", HUBNESS_AND_KWARGS)
+def test_hubness_resolver(hub, hubkwargs, source_target, n_cand=5):
+    source, target = source_target
+    k_inst = Kiez(
+        algorithm="SklearnNN",
+        n_candidates=n_cand,
+        hubness=hub,
+        hubness_kwargs=hubkwargs,
+    )
+    assert f"{k_inst}" is not None
+    k_inst.fit(source, target)
+    assert_different_neighbors(k_inst, n_cand)
+    k_inst.fit(source, None)
+    assert_different_neighbors(k_inst, n_cand)
+    with pytest.raises(ValueError) as exc_info:
+        k_inst = Kiez(
+            algorithm="SklearnNN",
+            n_candidates=1,
+            hubness=hub,
+            hubness_kwargs=hubkwargs,
+        )
+    assert "Cannot" in str(exc_info.value)
 
 
-def test_n_neighbors_wrong():
+def test_n_candidates_wrong():
     with pytest.raises(ValueError) as exc_info:
-        Kiez(n_neighbors=-1)
+        Kiez(n_candidates=-1)
     assert "Expected" in str(exc_info.value)
 
 
-def test_n_neighbors_wrong_type():
+def test_n_candidates_wrong_type():
     with pytest.raises(TypeError) as exc_info:
-        Kiez(n_neighbors="1")
+        Kiez(n_candidates="1")
     assert "does not" in str(exc_info.value)
 
 
 def test_dis_sim_local_wrong():
     with pytest.raises(ValueError) as exc_info:
-        Kiez(algorithm=SklearnNN(p=1), hubness=DisSimLocal())
+        Kiez(algorithm=SklearnNN(p=1), hubness="DisSimLocal")
     assert "only supports" in str(exc_info.value)
 
 
 def test_dis_sim_local_wrong_metric():
     with pytest.raises(ValueError) as exc_info:
-        Kiez(algorithm=SklearnNN(metric="cosine"), hubness=DisSimLocal())
+        Kiez(algorithm=SklearnNN(metric="cosine"), hubness="DisSimLocal")
     assert "only supports" in str(exc_info.value)
 
 
 def test_dis_sim_local_squaring():
-    if NMSLIB in APPROXIMATE_ALGORITHMS:
-        k_inst = Kiez(algorithm=NMSLIB(metric="sqeuclidean"), hubness=DisSimLocal())
+    if NMSLIB in NN_ALGORITHMS:
+        k_inst = Kiez(algorithm=NMSLIB(metric="sqeuclidean"), hubness="DisSimLocal")
         assert k_inst.hubness.squared
 
 
 def test_from_config():
-    if NMSLIB in APPROXIMATE_ALGORITHMS:
+    if NMSLIB in NN_ALGORITHMS:
         path = HERE.joinpath("example_conf.json")
         kiez = Kiez.from_path(path)
         assert kiez.hubness is not None