Skip to content

Commit

Permalink
Merge branch 'release-2.5.2'
Browse files Browse the repository at this point in the history
  • Loading branch information
VarIr committed Jan 29, 2019
2 parents 3c31a32 + f9b5878 commit 54a5c68
Show file tree
Hide file tree
Showing 2 changed files with 61 additions and 52 deletions.
2 changes: 1 addition & 1 deletion hub_toolbox/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
Contact: <[email protected]>
"""

__version__ = '2.5'
__version__ = '2.5.2'

try:
import numpy
Expand Down
111 changes: 60 additions & 51 deletions hub_toolbox/hubness_analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,51 +29,56 @@

CITATION = \
"""
Feldbauer, R., Flexer, A.: Centering Versus Scaling for Hubness Reduction.
In: Villa, E.P.A., Masulli, P., Pons Rivero, J.A. (eds.) ICANN 2016, part I.
LNCS, vol. 9886, pp. 175–183. Springer International Publishing, Cham (2016).
(tech report available at http://www.ofai.at/cgi-bin/tr-online?number+2016-05)
R. Feldbauer, M. Leodolter, C. Plant and A. Flexer,
"Fast Approximate Hubness Reduction for Large High-Dimensional Data",
2018 IEEE International Conference on Big Knowledge (ICBK), Singapore, 2018,
pp. 358-367. doi: 10.1109/ICBK.2018.00055
(tech report available at http://www.ofai.at/cgi-bin/tr-online?number+2018-02)
or
Schnitzer, D., Flexer, A., Schedl, M., Widmer, G.: Local and global scaling
reduce hubs in space. J. Mach. Learn. Res. 13(1), 2871–2902 (2012).
(full paper available at http://www.jmlr.org/papers/v13/schnitzer12a.html)
R. Feldbauer, A. Flexer, "A comprehensive empirical comparison of hubness reduction in high-dimensional spaces"
Knowledge and Information Systems, 2018, https://doi.org/10.1007/s10115-018-1205-y
"""

def _primary_distance(D:np.ndarray, metric):

def _primary_distance(D: np.ndarray, metric):
"""Return `D`, identical. (Dummy function.)"""
return D


# New types of hubness reduction methods must be added here
SEC_DIST = {'mp' : mutual_proximity_empiric,
'mp_gaussi' : mutual_proximity_gaussi,
'mp_gammai' : mutual_proximity_gammai,
'ls' : local_scaling,
'nicdm' : nicdm,
'snn' : shared_nearest_neighbors,
'cent' : centering,
'wcent' : weighted_centering,
'lcent' : localized_centering,
'dsg' : dis_sim_global,
'dsl' : dis_sim_local,
'orig' : _primary_distance # a dummy function
}

class HubnessAnalysis():
SEC_DIST = {'mp': mutual_proximity_empiric,
'mp_gaussi': mutual_proximity_gaussi,
'mp_gammai': mutual_proximity_gammai,
'ls': local_scaling,
'nicdm': nicdm,
'snn': shared_nearest_neighbors,
'cent': centering,
'wcent': weighted_centering,
'lcent': localized_centering,
'dsg': dis_sim_global,
'dsl': dis_sim_local,
'orig': _primary_distance # a dummy function
}


class HubnessAnalysis:
"""The main hubness analysis class.
For more detailed analyses (optimizing parameters, using similarity data,
etc.) please use the individual modules.
Examples
--------
>>> from hub_toolbox.HubnessAnalysis import HubnessAnalysis
>>> from hub_toolbox.hubness_analysis import HubnessAnalysis
>>> hub = HubnessAnalysis()
>>> hub.analyse_hubness()
>>> hub.analyze_hubness()
>>> hub = HubnessAnalysis(D, classes, vectors)
>>> hub.analyse_hubness()
>>> from hub_toolbox.io import load_dexter
>>> D, y, X = load_dexter()
>>> hub = HubnessAnalysis(D, classes=y, vectors=X)
>>> hub.analyze_hubness()
Notes
-----
Expand All @@ -87,11 +92,11 @@ class HubnessAnalysis():
See also
--------
analyse_hubness : additional parameters (e.g. k-occurence, k-NN)
analyse_hubness : additional parameters (e.g. k-occurrence, k-NN)
"""

def __init__(self, D:np.ndarray=None, classes:np.ndarray=None,
vectors:np.ndarray=None, metric:str='distance'):
def __init__(self, D: np.ndarray = None, classes: np.ndarray = None,
vectors: np.ndarray = None, metric: str = 'distance'):
"""Initialize a quick hubness analysis.
Parameters
Expand Down Expand Up @@ -143,18 +148,18 @@ def __init__(self, D:np.ndarray=None, classes:np.ndarray=None,

@property
def _header(self):
return {'mp' : "MUTUAL PROXIMITY (Empiric)",
'mp_gaussi' : "MUTUAL PROXIMITY (Independent Gaussians)",
'mp_gammai' : "MUTUAL PROXIMITY (Independent Gamma)",
'ls' : "LOCAL SCALING (original)",
'nicdm' : "LOCAL SCALING (NICDM)",
'snn' : "SHARED NEAREST NEIGHBORS",
'cent' : "CENTERING",
'wcent' : "WEIGHTED CENTERING",
'lcent' : "LOCALIZED CENTERING",
'dsg' : "DISSIM GLOBAL",
'dsl' : "DISSIM LOCAL",
'orig' : "ORIGINAL DATA"}
return {'mp': "MUTUAL PROXIMITY (Empiric)",
'mp_gaussi': "MUTUAL PROXIMITY (Independent Gaussians)",
'mp_gammai': "MUTUAL PROXIMITY (Independent Gamma)",
'ls': "LOCAL SCALING (original)",
'nicdm': "LOCAL SCALING (NICDM)",
'snn': "SHARED NEAREST NEIGHBORS",
'cent': "CENTERING",
'wcent': "WEIGHTED CENTERING",
'lcent': "LOCALIZED CENTERING",
'dsg': "DISSIM GLOBAL",
'dsl': "DISSIM LOCAL",
'orig': "ORIGINAL DATA"}

def _calc_intrinsic_dim(self):
"""Calculate intrinsic dimension estimate."""
Expand All @@ -163,7 +168,7 @@ def _calc_intrinsic_dim(self):

def analyze_hubness(self, experiments="orig,mp,mp_gaussi,nicdm,cent,dsg",
hubness_k=(5, 10), knn_k=(1, 5, 20),
print_results=True, verbose:int=0):
print_results=True, verbose: int = 0):
"""Analyse hubness in original data and rescaled distances.
Parameters
Expand Down Expand Up @@ -211,8 +216,10 @@ def analyze_hubness(self, experiments="orig,mp,mp_gaussi,nicdm,cent,dsg",
print("Experiment {}/{} ({})".
format(i+1, len(experiments), exp_type), end="\r")
experiment = HubnessExperiment(D=self.D,
secondary_distance_type=exp_type, metric=self.metric,
classes=self.classes, vectors=self.vectors)
secondary_distance_type=exp_type,
metric=self.metric,
classes=self.classes,
vectors=self.vectors)
if self.D is not None:
experiment._calc_secondary_distance()
for k in hubness_k:
Expand Down Expand Up @@ -311,12 +318,13 @@ def print_analysis_report(self, experiment=None, report_nr:int=0):
print()
return

class HubnessExperiment():

class HubnessExperiment:
"""Perform a single hubness experiment"""

def __init__(self, D:np.ndarray, secondary_distance_type:str,
metric:str='distance', classes:np.ndarray=None,
vectors:np.ndarray=None):
def __init__(self, D: np.ndarray, secondary_distance_type: str,
metric: str = 'distance', classes: np.ndarray = None,
vectors: np.ndarray = None):
"""Initialize a hubness experiment"""

io.check_distance_matrix_shape(D)
Expand Down Expand Up @@ -363,7 +371,7 @@ def _calc_secondary_distance(self):
format(self.secondary_distance_type))
return self

def _calc_hubness(self, k:int=5):
def _calc_hubness(self, k: int = 5):
"""Calculate hubness (skewness of `k`-occurence).
Also calculate percentage of anti hubs (`k`-occurence == 0) and
Expand All @@ -376,7 +384,7 @@ def _calc_hubness(self, k:int=5):
self.max_hub_k_occurence[k] = 100 * N_k.max() / self.n
return self

def _calc_knn_accuracy(self, k:int=5):
def _calc_knn_accuracy(self, k: int = 5):
"""Calculate `k`-NN accuracy."""
acc, _, _ = score(D=self.secondary_distance, target=self.classes,
k=k, metric=self.metric)
Expand All @@ -390,6 +398,7 @@ def _calc_gk_index(self):
metric=self.metric)
return self


if __name__ == "__main__":
hub = HubnessAnalysis()
hub.analyze_hubness()

0 comments on commit 54a5c68

Please sign in to comment.