Skip to content

Commit

Permalink
Enable torch (#35)
Browse files Browse the repository at this point in the history
* Enable torch

* Move to device

* Change skip reason

* Set use_gpu flag to true

* Add torch_utils import

* Add float conversion

* Fix float conversion in test

* Remove for loop in CSLS

* Remove more for loops

* Enable torch sorting and adapt test

* Use tolerance

* Adapt tolerance

* Adapt local scaling for torch

* Forgot variable rename

* Adapt Mutual proximity

* Use indices not dist

* Add print

* Adapted fit in MP

* Use std for torch

* Remove ddof

* Adapt tolerance

* Add torch support for dissimlocal

* Change metric in test

* Change metric everywhere

* Use torch for euclidean if needed for faiss

* Use torch row_norm

* Remove unnecessary variable switch

* Set knn

* Add prints

* Fixed DisSimLocal Torch bug

* Refactor code deduplication DisSimLocal

* Fix MP and cleanup

* Cleanup incompatible type hints for py3.8 and remove autofaiss mentions

* Added possibility to show available NN and HR

* Adapted Changelog
  • Loading branch information
dobraczka authored Jan 11, 2024
1 parent e97e28e commit 23f2667
Show file tree
Hide file tree
Showing 25 changed files with 697 additions and 171 deletions.
19 changes: 19 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,25 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

## Unreleased

## [0.5.0] - 2024-01-11

### Added

- Support for torch, when using Faiss
- More metrics for Faiss

### Changed

- Simplified kneighbors API, i.e. no queries can be supplied anymore, since they need to come from the source anyway

### Removed

- Autofaiss support was removed

### Fixed

- Several efficiency problems when not using Hubness Reduction were addressed

## [0.4.4] - 2023-10-18

### Changed
Expand Down
31 changes: 25 additions & 6 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,6 @@ If you have a GPU you can make kiez faster by installing [faiss](https://github.
conda env create -n kiez-faiss python=3.10
conda activate kiez-faiss
conda install -c pytorch -c nvidia faiss-gpu=1.7.4 mkl=2021 blas=1.0=mkl
pip install autofaiss
pip install kiez
```

Expand Down Expand Up @@ -66,16 +65,36 @@ rng = np.random.RandomState(0)
source = rng.rand(100,50)
target = rng.rand(100,50)
# prepare algorithm and hubness reduction
from kiez.neighbors import Faiss
faiss = Faiss(n_candidates=10)
from kiez.hubness_reduction import CSLS
hr = CSLS()
algo_kwargs = {"n_candidates": 10}
k_inst = Kiez(n_neighbors=5, algorithm="Faiss" algorithm_kwargs=algo_kwargs, hubness="CSLS")
# fit and get neighbors
k_inst = Kiez(n_neighbors=5, algorithm=faiss, hubness=hr)
k_inst.fit(source, target)
nn_dist, nn_ind = k_inst.kneighbors()
```

## Torch Support
Beginning with version 0.5.0 torch can be used, when using `Faiss` as NN library:

```python

from kiez import Kiez
import torch
source = torch.randn((100,10))
target = torch.randn((200,10))
k_inst = Kiez(algorithm="Faiss", hubness="CSLS")
k_inst.fit(source, target)
nn_dist, nn_ind = k_inst.kneighbors()
```

You can also utilize tensor on the GPU:

```python

k_inst = Kiez(algorithm="Faiss", algorithm_kwargs={"use_gpu":True}, hubness="CSLS")
k_inst.fit(source.cuda(), target.cuda())
nn_dist, nn_ind = k_inst.kneighbors()
```

## Documentation
You can find more documentation on [readthedocs](https://kiez.readthedocs.io)

Expand Down
1 change: 0 additions & 1 deletion docs/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,6 @@ If you have a GPU you can make kiez faster by installing `faiss <https://github.
conda env create -n kiez-faiss python=3.10
conda activate kiez-faiss
conda install -c pytorch -c nvidia faiss-gpu=1.7.4 mkl=2021 blas=1.0=mkl
pip install autofaiss
pip install kiez
For more information see their `installation instructions <https://github.com/facebookresearch/faiss/blob/main/INSTALL.md>`_.
Expand Down
1 change: 0 additions & 1 deletion docs/source/installation.rst
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@ If you have a GPU you can make kiez faster by installing `faiss <https://github.
conda env create -n kiez-faiss python=3.10
conda activate kiez-faiss
conda install -c pytorch -c nvidia faiss-gpu=1.7.4 mkl=2021 blas=1.0=mkl
pip install autofaiss
pip install kiez
For more information see their `installation instructions <https://github.com/facebookresearch/faiss/blob/main/INSTALL.md>`_.
Expand Down
49 changes: 36 additions & 13 deletions kiez/hubness_reduction/base.py
Original file line number Diff line number Diff line change
@@ -1,28 +1,36 @@
import warnings
from abc import ABC, abstractmethod
from typing import Optional, Tuple
from typing import Optional, Tuple, TypeVar

import numpy as np

from ..neighbors import NNAlgorithm

try:
import torch
except ImportError:
torch = None

T = TypeVar("T")


class HubnessReduction(ABC):
"""Base class for hubness reduction."""

def __init__(self, nn_algo: NNAlgorithm, verbose: int = 0, **kwargs):
self.nn_algo = nn_algo
self.verbose = verbose
self._use_torch = False
if nn_algo.n_candidates == 1:
raise ValueError(
"Cannot perform hubness reduction with a single candidate per query!"
)

@abstractmethod
def _fit(self, neigh_dist, neigh_ind, source, target):
def _fit(self, neigh_dist: T, neigh_ind: T, source: T, target: T):
pass # pragma: no cover

def fit(self, source, target=None):
def fit(self, source: T, target: Optional[T] = None):
self.nn_algo.fit(source, target)
if target is None:
target = source
Expand All @@ -32,6 +40,8 @@ def fit(self, source, target=None):
s_to_t=False,
return_distance=True,
)
if torch and isinstance(neigh_dist_t_to_s, torch.Tensor):
self._use_torch = True
self._fit(
neigh_dist_t_to_s,
neigh_ind_t_to_s,
Expand All @@ -40,7 +50,7 @@ def fit(self, source, target=None):
)

@abstractmethod
def transform(self, neigh_dist, neigh_ind, query) -> Tuple[np.ndarray, np.ndarray]:
def transform(self, neigh_dist, neigh_ind, query) -> Tuple[T, T]:
pass # pragma: no cover

def _set_k_if_needed(self, k: Optional[int] = None) -> int:
Expand All @@ -59,7 +69,24 @@ def _set_k_if_needed(self, k: Optional[int] = None) -> int:
return self.nn_algo.n_candidates
return k

def kneighbors(self, k: Optional[int] = None) -> Tuple[np.ndarray, np.ndarray]:
@staticmethod
def _sort(hubness_reduced_query_dist, query_ind, n_neighbors: int) -> Tuple[T, T]:
if torch and isinstance(hubness_reduced_query_dist, torch.Tensor):
mask = torch.argsort(hubness_reduced_query_dist)[:, :n_neighbors]
hubness_reduced_query_dist = torch.take_along_dim(
hubness_reduced_query_dist, mask, dim=1
)
query_ind = torch.take_along_dim(query_ind, mask, dim=1)
else:
kth = np.arange(n_neighbors)
mask = np.argpartition(hubness_reduced_query_dist, kth=kth)[:, :n_neighbors]
hubness_reduced_query_dist = np.take_along_axis(
hubness_reduced_query_dist, mask, axis=1
)
query_ind = np.take_along_axis(query_ind, mask, axis=1)
return hubness_reduced_query_dist, query_ind

def kneighbors(self, k: Optional[int] = None) -> Tuple[T, T]:
n_neighbors = self._set_k_if_needed(k)
# First obtain candidate neighbors
query_dist, query_ind = self.nn_algo.kneighbors(
Expand All @@ -73,13 +100,9 @@ def kneighbors(self, k: Optional[int] = None) -> Tuple[np.ndarray, np.ndarray]:
self.nn_algo.source_,
)
# Third, sort hubness reduced candidate neighbors to get the final k neighbors
kth = np.arange(n_neighbors)
mask = np.argpartition(hubness_reduced_query_dist, kth=kth)[:, :n_neighbors]
hubness_reduced_query_dist = np.take_along_axis(
hubness_reduced_query_dist, mask, axis=1
return HubnessReduction._sort(
hubness_reduced_query_dist, query_ind, n_neighbors
)
query_ind = np.take_along_axis(query_ind, mask, axis=1)
return hubness_reduced_query_dist, query_ind


class NoHubnessReduction(HubnessReduction):
Expand All @@ -91,9 +114,9 @@ def _fit(self, neigh_dist, neigh_ind, source, target):
def fit(self, source, target=None):
self.nn_algo.fit(source, target, only_fit_target=True)

def transform(self, neigh_dist, neigh_ind, query) -> Tuple[np.ndarray, np.ndarray]:
def transform(self, neigh_dist, neigh_ind, query) -> Tuple[T, T]:
return neigh_dist, neigh_ind

def kneighbors(self, k: Optional[int] = None) -> Tuple[np.ndarray, np.ndarray]:
def kneighbors(self, k: Optional[int] = None) -> Tuple[T, T]:
n_neighbors = self._set_k_if_needed(k)
return self.nn_algo.kneighbors(query=None, k=n_neighbors, return_distance=True)
30 changes: 8 additions & 22 deletions kiez/hubness_reduction/csls.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
from __future__ import annotations
from typing import Tuple, TypeVar

import numpy as np
from sklearn.utils.validation import check_is_fitted
from tqdm.auto import tqdm

from .base import HubnessReduction

T = TypeVar("T")


class CSLS(HubnessReduction):
"""Hubness reduction with Cross-domain similarity local scaling.
Expand All @@ -30,7 +30,7 @@ def _fit(
neigh_ind,
source=None,
target=None,
) -> CSLS:
) -> "CSLS":
"""Fit the model using target, neigh_dist, and neigh_ind as training data.
Parameters
Expand Down Expand Up @@ -59,7 +59,7 @@ def transform(
neigh_dist,
neigh_ind,
query,
) -> tuple[np.ndarray, np.ndarray]:
) -> Tuple[T, T]:
"""Transform distance between test and training data with CSLS.
Parameters
Expand All @@ -84,27 +84,13 @@ def transform(
"""
check_is_fitted(self, "r_dist_train_")

n_test, n_indexed = neigh_dist.shape

# Find average distances to the k nearest neighbors
r_dist_test = neigh_dist

hub_reduced_dist = np.empty_like(neigh_dist)

# Optionally show progress of local scaling loop
disable_tqdm = not self.verbose
range_n_test = tqdm(
range(n_test),
desc="CSLS",
disable=disable_tqdm,
)

r_train = self.r_dist_train_.mean(axis=1)
r_test = r_dist_test.mean(axis=1)
for i in range_n_test:
hub_reduced_dist[i, :] = (
2 * neigh_dist[i] - r_test[i] - r_train[neigh_ind[i]]
)
r_test = r_dist_test.mean(axis=1).reshape(-1, 1)

hub_reduced_dist = 2 * neigh_dist - r_test - r_train[neigh_ind]
# Return the hubness reduced distances
# These must be sorted downstream
return hub_reduced_dist, neigh_ind
Loading

0 comments on commit 23f2667

Please sign in to comment.