-
Notifications
You must be signed in to change notification settings - Fork 21
/
Copy pathmodel.py
209 lines (166 loc) · 7.69 KB
/
model.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
from __future__ import print_function
from __future__ import absolute_import
from copy import copy
from builtins import range
import autograd.numpy as np
from autograd.util import flatten
from autograd.core import primitive
import autograd.scipy.stats.norm as norm
def build_gru(input_count, state_count, output_count):
"""Constructor for gated-recurrent unit.
@param input_count: integer
number of input dimensions
@param state_count: integer
number of hidden dimensions
@param output_count: integer
number of binary outputs
(no continuous support at the moment)
@return predict: function
used to predict y_hat
@return log_likelihood: function
used to compute log likelihood
@return parser: WeightsParser object
object to organize weights
"""
parser = WeightsParser()
parser.add_shape('init_hiddens', (1, state_count))
parser.add_shape('update_x_weights', (input_count + 1, state_count))
parser.add_shape('update_h_weights', (state_count, state_count))
parser.add_shape('reset_x_weights', (input_count + 1, state_count))
parser.add_shape('reset_h_weights', (state_count, state_count))
parser.add_shape('thidden_x_weights', (input_count + 1, state_count))
parser.add_shape('thidden_h_weights', (state_count, state_count))
parser.add_shape('output_h_weights', (state_count, output_count))
def update(curr_input, prev_hiddens, update_x_weights,
update_h_weights, reset_x_weights, reset_h_weights,
thidden_x_weights, thidden_h_weights):
"""Update function for GRU."""
update = sigmoid(np.dot(curr_input, update_x_weights) +
np.dot(prev_hiddens, update_h_weights))
reset = sigmoid(np.dot(curr_input, reset_x_weights) +
np.dot(prev_hiddens, reset_h_weights))
thiddens = np.tanh(np.dot(curr_input, thidden_x_weights) +
np.dot(reset * prev_hiddens, thidden_h_weights))
hiddens = (1 - update) * prev_hiddens + update * thiddens
return hiddens
def outputs(weights, input_set, fence_set, output_set=None, return_pred_set=False):
update_x_weights = parser.get(weights, 'update_x_weights')
update_h_weights = parser.get(weights, 'update_h_weights')
reset_x_weights = parser.get(weights, 'reset_x_weights')
reset_h_weights = parser.get(weights, 'reset_h_weights')
thidden_x_weights = parser.get(weights, 'thidden_x_weights')
thidden_h_weights = parser.get(weights, 'thidden_h_weights')
output_h_weights = parser.get(weights, 'output_h_weights')
data_count = len(fence_set) - 1
feat_count = input_set.shape[0]
ll = 0.0
n_i_track = 0
fence_base = fence_set[0]
pred_set = None
if return_pred_set:
pred_set = np.zeros((output_count, input_set.shape[1]))
# loop through sequences and time steps
for data_iter in range(data_count):
hiddens = copy(parser.get(weights, 'init_hiddens'))
fence_post_1 = fence_set[data_iter] - fence_base
fence_post_2 = fence_set[data_iter + 1] - fence_base
time_count = fence_post_2 - fence_post_1
curr_input = input_set[:, fence_post_1:fence_post_2]
for time_iter in range(time_count):
hiddens = update(np.expand_dims(np.hstack((curr_input[:, time_iter], 1)), axis=0),
hiddens, update_x_weights, update_h_weights, reset_x_weights,
reset_h_weights, thidden_x_weights, thidden_h_weights)
if output_set is not None:
# subtract a small number so -1
out_proba = sigmoid(np.sign(output_set[:, n_i_track] - 1e-3) *
np.dot(hiddens, output_h_weights))
out_lproba = safe_log(out_proba)
ll += np.sum(out_lproba)
else:
out_proba = sigmoid(np.dot(hiddens, output_h_weights))
out_lproba = safe_log(out_proba)
if return_pred_set:
pred_set[:, n_i_track] = out_lproba[0]
n_i_track += 1
return ll, pred_set
def predict(weights, input_set, fence_set):
_, output_set = outputs(weights, input_set, fence_set, return_pred_set=True)
return np.exp(output_set)
def log_likelihood(weights, input_set, fence_set, output_set):
ll, _ = outputs(weights, input_set, fence_set, output_set=output_set)
return ll
return predict, log_likelihood, parser
def build_mlp(layer_sizes, activation=np.tanh, output_activation=lambda x: x):
"""Constructor for multilayer perceptron.
@param layer_sizes: list of integers
list of layer sizes in the perceptron.
@param activation: function (default: np.tanh)
what activation to use after first N - 1 layers.
@param output_activation: function (default: linear)
what activation to use after last layer.
@return predict: function
used to predict y_hat
@return log_likelihood: function
used to compute log likelihood
@return parser: WeightsParser object
object to organize weights
"""
parser = WeightsParser()
for i, shape in enumerate(zip(layer_sizes[:-1], layer_sizes[1:])):
parser.add_shape(('weights', i), shape)
parser.add_shape(('biases', i), (1, shape[1]))
def predict(weights, X):
cur_X = copy(X.T)
for layer in range(len(layer_sizes) - 1):
cur_W = parser.get(weights, ('weights', layer))
cur_B = parser.get(weights, ('biases', layer))
cur_Z = np.dot(cur_X, cur_W) + cur_B
cur_X = activation(cur_Z)
return output_activation(cur_Z.T)
def log_likelihood(weights, X, y):
y_hat = predict(weights, X)
return mse(y.T, y_hat.T)
return predict, log_likelihood, parser
def mse(y_true, y_pred):
return np.mean(np.power(y_true.ravel() - y_pred.ravel(), 2))
def sigmoid(x):
return 0.5 * (np.tanh(x) + 1)
@primitive
def softplus(x):
""" Numerically stable transform from real line to positive reals
Returns np.log(1.0 + np.exp(x))
Autograd friendly and fully vectorized
@param x: array of values in (-\infty, +\infty)
@return ans : array of values in (0, +\infty), same size as x
"""
if not isinstance(x, float):
mask1 = x > 0
mask0 = np.logical_not(mask1)
out = np.zeros_like(x)
out[mask0] = np.log1p(np.exp(x[mask0]))
out[mask1] = x[mask1] + np.log1p(np.exp(-x[mask1]))
return out
if x > 0:
return x + np.log1p(np.exp(-x))
else:
return np.log1p(np.exp(x))
def make_grad_softplus(ans, x):
x = np.asarray(x)
def gradient_product(g):
return np.full(x.shape, g) * np.exp(x - ans)
return gradient_product
softplus.defgrad(make_grad_softplus)
def safe_log(x, minval=1e-100):
return np.log(np.maximum(x, minval))
class WeightsParser(object):
"""A helper class to index into a parameter vector."""
def __init__(self):
self.idxs_and_shapes = {}
self.num_weights = 0
def add_shape(self, name, shape):
start = self.num_weights
self.num_weights += np.prod(shape)
self.idxs_and_shapes[name] = (slice(start, self.num_weights), shape)
def get(self, vect, name):
idxs, shape = self.idxs_and_shapes[name]
return np.reshape(vect[idxs], shape)