forked from bghojogh/MDS-SammonMapping-Isomap
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Sammon_mapping.py
147 lines (128 loc) · 5.75 KB
/
Sammon_mapping.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
# taken from: https://github.com/tompollard/sammon
# example: https://data-farmers.github.io/2019-06-10-sammon-mapping/
def sammon(x, n, display = 2, inputdist = 'raw', maxhalves = 20, maxiter = 500, tolfun = 1e-9, init = 'default'):
import numpy as np
from scipy.spatial.distance import cdist
"""Perform Sammon mapping on dataset x
y = sammon(x) applies the Sammon nonlinear mapping procedure on
multivariate data x, where each row represents a pattern and each column
represents a feature. On completion, y contains the corresponding
co-ordinates of each point on the map. By default, a two-dimensional
map is created. Note if x contains any duplicated rows, SAMMON will
fail (ungracefully).
[y,E] = sammon(x) also returns the value of the cost function in E (i.e.
the stress of the mapping).
An N-dimensional output map is generated by y = sammon(x,n) .
A set of optimisation options can be specified using optional
arguments, y = sammon(x,n,[OPTS]):
maxiter - maximum number of iterations
tolfun - relative tolerance on objective function
maxhalves - maximum number of step halvings
input - {'raw','distance'} if set to 'distance', X is
interpreted as a matrix of pairwise distances.
display - 0 to 2. 0 least verbose, 2 max verbose.
init - {'pca', 'cmdscale', random', 'default'}
default is 'pca' if input is 'raw',
'msdcale' if input is 'distance'
The default options are retrieved by calling sammon(x) with no
parameters.
File : sammon.py
Date : 18 April 2014
Authors : Tom J. Pollard ([email protected])
: Ported from MATLAB implementation by
Gavin C. Cawley and Nicola L. C. Talbot
Description : Simple python implementation of Sammon's non-linear
mapping algorithm [1].
References : [1] Sammon, John W. Jr., "A Nonlinear Mapping for Data
Structure Analysis", IEEE Transactions on Computers,
vol. C-18, no. 5, pp 401-409, May 1969.
Copyright : (c) Dr Gavin C. Cawley, November 2007.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
"""
# Create distance matrix unless given by parameters
if inputdist == 'distance':
D = x
if init == 'default':
init = 'cmdscale'
else:
D = cdist(x, x)
if init == 'default':
init = 'pca'
if inputdist == 'distance' and init == 'pca':
raise ValueError("Cannot use init == 'pca' when inputdist == 'distance'")
if np.count_nonzero(np.diagonal(D)) > 0:
raise ValueError("The diagonal of the dissimilarity matrix must be zero")
# Remaining initialisation
N = x.shape[0]
scale = 0.5 / D.sum()
D = D + np.eye(N)
if np.count_nonzero(D<=0) > 0:
raise ValueError("Off-diagonal dissimilarities must be strictly positive")
Dinv = 1 / D
if init == 'pca':
[UU,DD,_] = np.linalg.svd(x)
y = UU[:,:n]*DD[:n]
elif init == 'cmdscale':
from cmdscale import cmdscale
y,e = cmdscale(D)
y = y[:,:n]
else:
y = np.random.normal(0.0,1.0,[N,n])
one = np.ones([N,n])
d = cdist(y,y) + np.eye(N)
dinv = 1. / d
delta = D-d
E = ((delta**2)*Dinv).sum()
# Get on with it
for i in range(maxiter):
# Compute gradient, Hessian and search direction (note it is actually
# 1/4 of the gradient and Hessian, but the step size is just the ratio
# of the gradient and the diagonal of the Hessian so it doesn't
# matter).
delta = dinv - Dinv
deltaone = np.dot(delta,one)
g = np.dot(delta,y) - (y * deltaone)
dinv3 = dinv ** 3
y2 = y ** 2
H = np.dot(dinv3,y2) - deltaone - np.dot(2,y) * np.dot(dinv3,y) + y2 * np.dot(dinv3,one)
s = -g.flatten(order='F') / np.abs(H.flatten(order='F'))
y_old = y
# Use step-halving procedure to ensure progress is made
for j in range(maxhalves):
s_reshape = np.reshape(s, (-1,n),order='F')
y = y_old + s_reshape
d = cdist(y, y) + np.eye(N)
dinv = 1 / d
delta = D - d
E_new = ((delta**2)*Dinv).sum()
if E_new < E:
break
else:
s = 0.5*s
# Bomb out if too many halving steps are required
if j == maxhalves-1:
print('Warning: maxhalves exceeded. Sammon mapping may not converge...')
# Evaluate termination criterion
if abs((E - E_new) / E) < tolfun:
if display:
print('TolFun exceeded: Optimisation terminated')
break
# Report progress
E = E_new
if display > 1:
print('epoch = %d : E = %12.10f'% (i+1, E * scale))
if i == maxiter-1:
print('Warning: maxiter exceeded. Sammon mapping may not have converged...')
# Fiddle stress to match the original Sammon paper
E = E * scale
return [y,E]