-
Notifications
You must be signed in to change notification settings - Fork 13
/
GMM.py
120 lines (113 loc) · 4.3 KB
/
GMM.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
import numpy as np
from scipy import random
from matplotlib.patches import Ellipse
import matplotlib.transforms as transforms
from scipy.stats import multivariate_normal
class GMM():
def __init__(self, k, dim, init_mu=None, init_sigma=None, init_pi=None, colors=None):
'''
Define a model with known number of clusters and dimensions.
input:
- k: Number of Gaussian clusters
- dim: Dimension
- init_mu: initial value of mean of clusters (k, dim)
(default) random from uniform[-10, 10]
- init_sigma: initial value of covariance matrix of clusters (k, dim, dim)
(default) Identity matrix for each cluster
- init_pi: initial value of cluster weights (k,)
(default) equal value to all cluster i.e. 1/k
- colors: Color valu for plotting each cluster (k, 3)
(default) random from uniform[0, 1]
'''
self.k = k
self.dim = dim
if(init_mu is None):
init_mu = random.rand(k, dim)*20 - 10
self.mu = init_mu
if(init_sigma is None):
init_sigma = np.zeros((k, dim, dim))
for i in range(k):
init_sigma[i] = np.eye(dim)
self.sigma = init_sigma
if(init_pi is None):
init_pi = np.ones(self.k)/self.k
self.pi = init_pi
if(colors is None):
colors = random.rand(k, 3)
self.colors = colors
def init_em(self, X):
'''
Initialization for EM algorithm.
input:
- X: data (batch_size, dim)
'''
self.data = X
self.num_points = X.shape[0]
self.z = np.zeros((self.num_points, self.k))
def e_step(self):
'''
E-step of EM algorithm.
'''
for i in range(self.k):
self.z[:, i] = self.pi[i] * multivariate_normal.pdf(self.data, mean=self.mu[i], cov=self.sigma[i])
self.z /= self.z.sum(axis=1, keepdims=True)
def m_step(self):
'''
M-step of EM algorithm.
'''
sum_z = self.z.sum(axis=0)
self.pi = sum_z / self.num_points
self.mu = np.matmul(self.z.T, self.data)
self.mu /= sum_z[:, None]
for i in range(self.k):
j = np.expand_dims(self.data, axis=1) - self.mu[i]
s = np.matmul(j.transpose([0, 2, 1]), j)
self.sigma[i] = np.matmul(s.transpose(1, 2, 0), self.z[:, i] )
self.sigma[i] /= sum_z[i]
def log_likelihood(self, X):
'''
Compute the log-likelihood of X under current parameters
input:
- X: Data (batch_size, dim)
output:
- log-likelihood of X: Sum_n Sum_k log(pi_k * N( X_n | mu_k, sigma_k ))
'''
ll = []
for d in X:
tot = 0
for i in range(self.k):
tot += self.pi[i] * multivariate_normal.pdf(d, mean=self.mu[i], cov=self.sigma[i])
ll.append(np.log(tot))
return np.sum(ll)
def plot_gaussian(self, mean, cov, ax, n_std=3.0, facecolor='none', **kwargs):
'''
Utility function to plot one Gaussian from mean and covariance.
'''
pearson = cov[0, 1]/np.sqrt(cov[0, 0] * cov[1, 1])
ell_radius_x = np.sqrt(1 + pearson)
ell_radius_y = np.sqrt(1 - pearson)
ellipse = Ellipse((0, 0),
width=ell_radius_x * 2,
height=ell_radius_y * 2,
facecolor=facecolor,
**kwargs)
scale_x = np.sqrt(cov[0, 0]) * n_std
mean_x = mean[0]
scale_y = np.sqrt(cov[1, 1]) * n_std
mean_y = mean[1]
transf = transforms.Affine2D() \
.rotate_deg(45) \
.scale(scale_x, scale_y) \
.translate(mean_x, mean_y)
ellipse.set_transform(transf + ax.transData)
return ax.add_patch(ellipse)
def draw(self, ax, n_std=2.0, facecolor='none', **kwargs):
'''
Function to draw the Gaussians.
Note: Only for two-dimensionl dataset
'''
if(self.dim != 2):
print("Drawing available only for 2D case.")
return
for i in range(self.k):
self.plot_gaussian(self.mu[i], self.sigma[i], ax, n_std=n_std, edgecolor=self.colors[i], **kwargs)