-
Notifications
You must be signed in to change notification settings - Fork 912
/
neural_net.py
99 lines (75 loc) · 3.07 KB
/
neural_net.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
"""A multi-layer perceptron for classification of MNIST handwritten digits."""
from data import load_mnist
import autograd.numpy as np
import autograd.numpy.random as npr
from autograd import grad
from autograd.misc.flatten import flatten
from autograd.misc.optimizers import adam
from autograd.scipy.special import logsumexp
def init_random_params(scale, layer_sizes, rs=npr.RandomState(0)):
"""Build a list of (weights, biases) tuples,
one for each layer in the net."""
return [
(
scale * rs.randn(m, n), # weight matrix
scale * rs.randn(n),
) # bias vector
for m, n in zip(layer_sizes[:-1], layer_sizes[1:])
]
def neural_net_predict(params, inputs):
"""Implements a deep neural network for classification.
params is a list of (weights, bias) tuples.
inputs is an (N x D) matrix.
returns normalized class log-probabilities."""
for W, b in params:
outputs = np.dot(inputs, W) + b
inputs = np.tanh(outputs)
return outputs - logsumexp(outputs, axis=1, keepdims=True)
def l2_norm(params):
"""Computes l2 norm of params by flattening them into a vector."""
flattened, _ = flatten(params)
return np.dot(flattened, flattened)
def log_posterior(params, inputs, targets, L2_reg):
log_prior = -L2_reg * l2_norm(params)
log_lik = np.sum(neural_net_predict(params, inputs) * targets)
return log_prior + log_lik
def accuracy(params, inputs, targets):
target_class = np.argmax(targets, axis=1)
predicted_class = np.argmax(neural_net_predict(params, inputs), axis=1)
return np.mean(predicted_class == target_class)
if __name__ == "__main__":
# Model parameters
layer_sizes = [784, 200, 100, 10]
L2_reg = 1.0
# Training parameters
param_scale = 0.1
batch_size = 256
num_epochs = 5
step_size = 0.001
print("Loading training data...")
N, train_images, train_labels, test_images, test_labels = load_mnist()
init_params = init_random_params(param_scale, layer_sizes)
num_batches = int(np.ceil(len(train_images) / batch_size))
def batch_indices(iter):
idx = iter % num_batches
return slice(idx * batch_size, (idx + 1) * batch_size)
# Define training objective
def objective(params, iter):
idx = batch_indices(iter)
return -log_posterior(params, train_images[idx], train_labels[idx], L2_reg)
# Get gradient of objective using autograd.
objective_grad = grad(objective)
print(" Epoch | Train accuracy | Test accuracy ")
def print_perf(params, iter, gradient):
if iter % num_batches == 0:
train_acc = accuracy(params, train_images, train_labels)
test_acc = accuracy(params, test_images, test_labels)
print(f"{iter // num_batches:15}|{train_acc:20}|{test_acc:20}")
# The optimizers provided can optimize lists, tuples, or dicts of parameters.
optimized_params = adam(
objective_grad,
init_params,
step_size=step_size,
num_iters=num_epochs * num_batches,
callback=print_perf,
)