-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathmixent.go
359 lines (323 loc) · 11.7 KB
/
mixent.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
// Copyright ©2016 The Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// mixent implements routines for estimating the entropy of a mixture distribution.
// See
// Estimating Mixture Entropy using Pairwise Distances by A. Kolchinksy and
// B. Tracey
// for more information.
// Documentation notation: the mixture distribution is
// p(x) = \sum_{i=1}^N w_i p_i(x)
// The symbol X will be used to represent the mixture distribution p(x), and
// X_i will represent p_i(x), that is the distribution of the i^th mixture component.
// It will also use \sum_i as a shorthand for \sum_{i=1}^N.
//
// Please note that some of the types implemented here work for all mixture
// distributions, while others only work for special cases, such as mixture of
// Gaussians.
package mixent
import (
"math"
"gonum.org/v1/gonum/floats"
"gonum.org/v1/gonum/mat"
"gonum.org/v1/gonum/stat"
"gonum.org/v1/gonum/stat/distmv"
)
var (
dimensionMismatch = "mixent: component dimension mismatch"
)
// Component is an individual component in a mixture distribution.
type Component interface {
Entropy() float64
}
// Estimator is a type that can estimate the entropy of a mixture of Components.
type Estimator interface {
MixtureEntropy(components []Component, weights []float64) float64
}
// Distancer is a type that can compute the distance between two components.
// The distance returned must be greater than zero, and must equal zero if a and
// b are identical distributions.
type Distancer interface {
Distance(a, b Component) float64
}
// DistNormaler is a type that can compute the distance between two Normal distributions.
type DistNormaler interface {
DistNormal(l, r *distmv.Normal) float64
}
// NormalDistance wraps a DistNormaler for use with components.
type NormalDistance struct {
DistNormaler
}
// Distance computes the distance between two Normal components. Distance panics
// if the underlying type of the components is not *distmv.Normal or if the
// dimensions are unequal.
func (n NormalDistance) Distance(l, r Component) float64 {
l1 := l.(*distmv.Normal)
l2 := r.(*distmv.Normal)
return n.DistNormal(l1, l2)
}
// DistNormaler is a type that can compute the distance between two Normal distributions.
type DistUniformer interface {
DistUniform(l, r *distmv.Uniform) float64
}
// NormalDistance wraps a DistUniformer for use with components.
type UniformDistance struct {
DistUniformer
}
// Distance computes the distance between two Normal components. Distance panics
// if the underlying type of the components is not *distmv.Uniform or if the
// dimensions are unequal.
func (u UniformDistance) Distance(l, r Component) float64 {
l1 := l.(*distmv.Uniform)
l2 := r.(*distmv.Uniform)
return u.DistUniform(l1, l2)
}
// unifLogVolOverlap computes the log of the volume of the hyper-rectangle where
// both uniform distributions have positive probability.
func unifLogVolOverlap(b1, b2 []distmv.Bound) float64 {
var logVolOverlap float64
for dim, v1 := range b1 {
v2 := b2[dim]
// If the surfaces don't overlap, then the volume is 0
if v1.Max <= v2.Min || v2.Max <= v1.Min {
return math.Inf(-1)
}
vol := math.Min(v1.Max, v2.Max) - math.Max(v1.Min, v2.Min)
logVolOverlap += math.Log(vol)
}
return logVolOverlap
}
// ELK implements the Expected Likelihood Kernel.
// ELX(X_i, X_j) = \int_x p_i(x) p_j(x) dx
// The Expected Likelihood Kernel can be used to find a lower bound on the mixture
// entropy. See the Mixture Entropy method for more information
type ELK struct{}
// KernelNormal computes the log of the Expected Likelihood Kernel for two Gaussians.
// ELK = 𝒩(μ_i; μ_j, Σ_i+Σ_j)
func (ELK) LogKernelNormal(l, r *distmv.Normal) float64 {
if l.Dim() != r.Dim() {
panic("mixent: normal dimension mismatch")
}
covl := l.CovarianceMatrix(nil)
covr := r.CovarianceMatrix(nil)
covSum := mat.NewSymDense(l.Dim(), nil)
covSum.AddSym(covl, covr)
norm, ok := distmv.NewNormal(r.Mean(nil), covSum, nil)
if !ok {
// This should be impossible because the sum of PSD matrices is PSD.
panic("mixent: covariance sum not positive definite")
}
return norm.LogProb(l.Mean(nil))
}
func (ELK) LogKernelUniform(l, r *distmv.Uniform) float64 {
// ELK is \int_x p_i(x) p_j(x) dx. So the value is constant over the region
// of overlap.
// = volume * p(x) * q(x)
// = log(vol) + log(p(x)) + log(q(x))
bl := l.Bounds(nil)
br := r.Bounds(nil)
overlap := unifLogVolOverlap(bl, br)
logPl := -l.Entropy()
logPr := -r.Entropy()
return overlap + logPl + logPr
}
// MixtureEntropy computes an estimate of the mixture entropy using the Expected
// Likelihood kernel. The lower bound is
// H(x) ≈ -\sum_i w_i log(\sum_j w_j z_{i,j})
// Currently only works with components that are *distmv.Normal or *distmv.Uniform.
func (elk ELK) MixtureEntropy(components []Component, weights []float64) float64 {
n := len(components)
if weights == nil {
weights = make([]float64, n)
for i := range weights {
weights[i] = 1 / float64(n)
}
}
if len(weights) != len(components) {
panic("mixent: length mismatch")
}
inner := make([]float64, n)
var ent float64
for i, a := range components {
for j, b := range components {
var logKernel float64
switch t := a.(type) {
case *distmv.Normal:
logKernel = elk.LogKernelNormal(t, b.(*distmv.Normal))
case *distmv.Uniform:
logKernel = elk.LogKernelUniform(t, b.(*distmv.Uniform))
}
inner[j] = logKernel + math.Log(weights[j])
}
ent -= weights[i] * floats.LogSumExp(inner)
}
return ent
}
// ELKDist is a distance metric based on the expected likelihood distance.
// It is equal to
// - ln(ELK(X_i, X_j)/sqrt(ELK(X_i,X_i)*ELK(X_j,X_j)))
type ELKDist struct{}
func (ELKDist) DistNormal(l, r *distmv.Normal) float64 {
return -ELK{}.LogKernelNormal(l, r) + 0.5*ELK{}.LogKernelNormal(l, l) + 0.5*ELK{}.LogKernelNormal(r, r)
}
func (ELKDist) DistUniform(l, r *distmv.Uniform) float64 {
return -ELK{}.LogKernelUniform(l, r) + 0.5*ELK{}.LogKernelUniform(l, l) + 0.5*ELK{}.LogKernelUniform(r, r)
}
// JointEntropy provides an upper bound on the mixture entropy using
// the joint entropy of the mixture and the weights. See the MixtureEntropy
// method for more information.
type JointEntropy struct{}
// MixtureEntropy computes an upper bound to the mixture entropy for arbitrary
// mixture components.
//
// The joint entropy between the mixture distribution and the weight distribution
// is an upper bound on the entropy of the distribution
// H(X) ≤ H(X, W) = H(X|W) + H(W) = \sum_i w_i H(p_i) + H(W)
//
// If weights is nil, all components are assumed to have the same mixture entropy.
func (JointEntropy) MixtureEntropy(components []Component, weights []float64) float64 {
avgEnt := AvgEnt{}.MixtureEntropy(components, weights)
if weights == nil {
return avgEnt + math.Log(float64(len(components)))
}
avgEnt += stat.Entropy(weights)
return avgEnt
}
// AvgEnt provides a lower bound on the mixture entropy using the average entropy
// of the components. See the MixtureEntropy method for more information.
type AvgEnt struct{}
// MixtureEntropy computes the entropy of the mixture conditional on the mixture
// weight, which is a lower bound to the true entropy of the mixture distribution.
// H(X) ≥ H(X|W) = \sum_i w_i H(X_i)
// i.e. the average entropy of the components.
//
// If weights is nil, all components are assumed to have the same mixture entropy.
func (AvgEnt) MixtureEntropy(components []Component, weights []float64) float64 {
if weights == nil {
var avgEnt float64
nf := float64(len(components))
for _, v := range components {
avgEnt += v.Entropy() / nf
}
return avgEnt
}
var avgEnt float64
for i, v := range components {
avgEnt += weights[i] * v.Entropy()
}
return avgEnt
}
// PairwiseDistance estimates the entropy using a pairwise distance metric.
// See the MixtureEntropy method for more information.
type PairwiseDistance struct {
Distancer Distancer
}
// MixtureEntropy estimates the entropy using a distance metric between each pair
// of distributions. It implements EQUATION IN PAPER, that is
// H(x) ≈ \sum_i w_i H(X_i) - \sum_i w_i ln(\sum_j w_j exp(-D(X_i || X_j)))
// As shown in
// Estimating Mixture Entropy using Pairwise Distances by A. Kolchinksy and
// B. Tracey
// this estimator has several nice properties (see the paper for the necessary
// conditions on D for the following to hold).
//
// 1) The estimate returned is always larger than Conditional and smaller than
// JointMixtureWeight. These two estimators differ by H(weights), so this provides
// a bound on the error.
//
// 2) The pairwise estimator becomes an exact estimate of the entropy as the mixture
// components become clustered. The number of clusters is arbitrary, so this estimator
// is exact when all mixture components are the same, all are very far apart from
// one another, or clustered into k far apart clusters.
//
// 3) The Bhattacharrya distance metric is a lower-bound on the mixture entropy
// and the Kullback-Leibler divergence is an upper bound on the mixture entropy.
// Any distance metric D_B ≤ D ≤ D_KL will provide an estimate in between these
// two distances.
//
// If weights is nil, all components are assumed to have the same mixture entropy.
func (p PairwiseDistance) MixtureEntropy(components []Component, weights []float64) float64 {
n := len(components)
if weights == nil {
weights = make([]float64, n)
for i := range weights {
weights[i] = 1 / float64(n)
}
}
var avgEntropy float64
for i, v := range components {
avgEntropy += weights[i] * v.Entropy()
}
var outer float64
inner := make([]float64, n) // Storing the sums for numeric stability
for i, xi := range components {
for j, xj := range components {
inner[j] = math.Log(weights[j]) - p.Distancer.Distance(xi, xj)
}
outer += weights[i] * floats.LogSumExp(inner)
}
return avgEntropy - outer
}
// ComponentCenters estimates the entropy based on the probability at the component
// centers.
type ComponentCenters struct{}
func (ComponentCenters) gaussianCenters(components []Component) *mat.Dense {
r := len(components)
c := components[0].(*distmv.Normal).Dim()
centers := mat.NewDense(r, c, nil)
for i := 0; i < r; i++ {
centers.SetRow(i, components[i].(*distmv.Normal).Mean(nil))
}
return centers
}
func (ComponentCenters) uniformCenters(components []Component) *mat.Dense {
r := len(components)
c := components[0].(*distmv.Uniform).Dim()
centers := mat.NewDense(r, c, nil)
for i := 0; i < r; i++ {
b := components[i].(*distmv.Uniform).Bounds(nil)
for j := range b {
centers.Set(i, j, (b[j].Max+b[j].Min)/2)
}
}
return centers
}
// MixtureEntropy computes the estimate of the entropy based on the average
// probability of the cluster centers.
// H(x) ≈ - \sum_i w_i ln \sum_j w_j p_j(μ_i)
// If weights is nil, all components are assumed to have the same mixture entropy.
//
// Currently only coded for Gaussian and Uniform components.
func (comp ComponentCenters) MixtureEntropy(components []Component, weights []float64) float64 {
var centers *mat.Dense
switch components[0].(type) {
default:
panic("componentcenters: unknown mixture type")
case *distmv.Normal:
centers = comp.gaussianCenters(components)
case *distmv.Uniform:
centers = comp.uniformCenters(components)
}
n := len(components)
if weights == nil {
weights = make([]float64, n)
for i := range weights {
weights[i] = 1 / float64(n)
}
}
if len(weights) != len(components) {
panic("mixent: length of components does not match weights")
}
// Compute center probabilities
var ent float64
inner := make([]float64, n)
for i := range components {
center := centers.RawRowView(i)
for j, comp := range components {
lp := comp.(distmv.LogProber)
inner[j] = math.Log(weights[j]) + lp.LogProb(center)
}
ent += weights[i] * floats.LogSumExp(inner)
}
return -ent
}