forked from mikejt33/cnn_fruit
-
Notifications
You must be signed in to change notification settings - Fork 0
/
cnn_numpy.py
247 lines (215 loc) · 9.74 KB
/
cnn_numpy.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Mon Apr 23 13:31:43 2018
@author: Michael
"""
import pickle #saving and loading our serialized model
import numpy as np #matrix math
from app.model.preprocessor import Preprocessor as img_prep #image preprocessing
def read_file(filename):
file_queue = tf.train.string_input_producer([filename])
reader = tf.TFRecordReader()
_, serialized_example = reader.read(file_queue)
features = tf.parse_single_example(
serialized_example,
features={
'image_raw': tf.FixedLenFeature([], tf.string),
'label': tf.FixedLenFeature([], tf.int64),
'height': tf.FixedLenFeature([], tf.int64),
'width': tf.FixedLenFeature([], tf.int64)
}
)
image = tf.image.decode_jpeg(features['image_raw'], channels=3)
height = tf.cast(features['height'], tf.int32)
width = tf.cast(features['width'], tf.int32)
image = tf.reshape(image, [HEIGHT, WIDTH, 3])
label = tf.cast(features['label'], tf.int32)
return image, label
#class for loading our saved model and classifying new images
class LiteOCR:
def __init__(self, fn="alpha_weights.pkl", pool_size=2):
#load the weights from the pickle file and the meta data
[weights, meta] = pickle.load(open(fn, 'rb'), encoding='latin1') #currently, this class MUST be initialized from a pickle file
#list to store labels
self.vocab = meta["vocab"]
#how many rows and columns in an image
self.img_rows = meta["img_side"] ; self.img_cols = meta["img_side"]
#load our CNN
self.CNN = LiteCNN()
#with our saved weights
self.CNN.load_weights(weights)
#define the pooling layers size
self.CNN.pool_size=int(pool_size)
#classify new image
def predict(self, image):
print(image.shape)
#vectorize the image into the right shape for our network
X = np.reshape(image, (1, 1, self.img_rows, self.img_cols))
X = X.astype("float32")
#make the prediction
predicted_i = self.CNN.predict(X)
#return the predicted label
return self.vocab[predicted_i]
class LiteCNN:
def __init__(self):
# a place to store the layers
self.layers = []
# size of pooling area for max pooling
self.pool_size = None
def load_weights(self, weights):
assert not self.layers, "Weights can only be loaded once!"
#add the saved matrix values to the convolutional network
for k in range(len(weights.keys())):
self.layers.append(weights['layer_{}'.format(k)])
def predict(self, X):
#here is where the network magic happens at a high level
h = self.cnn_layer(X, layer_i=0, border_mode="full"); X= h
h = self.relu_layer(X); X = h;
h = self.cnn_layer(X, layer_i=2, border_mode="valid"); X = h
h = self.relu_layer(X); X = h;
h = self.maxpooling_layer(X); X = h
h = self.dropout_layer(X, .25); X = h
h = self.flatten_layer(X, layer_i=7); X = h;
h = self.dense_layer(X, fully, layer_i=10); x = H
h = self.softmax_layer2D(X); x = h
max_i = self.classify(X)
return max_i[0]
#given our feature map we've learned from convolving around the image
#lets make it more dense by performing pooling, specifically max pooling
#we'll select the max values from the image matrix and use that as our new feature map
def maxpooling_layer(self, convolved_features):
#given our learned features and images
nb_features = convolved_features.shape[0]
nb_images = convolved_features.shape[1]
conv_dim = convolved_features.shape[2]
res_dim = int(conv_dim / self.pool_size) #assumed square shape
#initialize our more dense feature list as empty
pooled_features = np.zeros((nb_features, nb_images, res_dim, res_dim))
#for each image
for image_i in range(nb_images):
#and each feature map
for feature_i in range(nb_features):
#begin by the row
for pool_row in range(res_dim):
#define start and end points
row_start = pool_row * self.pool_size
row_end = row_start + self.pool_size
#for each column (so its a 2D iteration)
for pool_col in range(res_dim):
#define start and end points
col_start = pool_col * self.pool_size
col_end = col_start + self.pool_size
#define a patch given our defined starting ending points
patch = convolved_features[feature_i, image_i, row_start : row_end,col_start : col_end]
#then take the max value from that patch
#store it. this is our new learned feature/filter
pooled_features[feature_i, image_i, pool_row, pool_col] = np.max(patch)
return pooled_features
#convolution is the most important of the matrix operations here
#well define our input, lauyer number, and a border mode (explained below)
def cnn_layer(self, X, layer_i=0, border_mode = "full"):
#we'll store our feature maps and bias value in these 2 vars
features = self.layers[layer_i]["param_0"]
bias = self.layers[layer_i]["param_1"]
#how big is our filter/patch?
patch_dim = features[0].shape[-1]
#how many features do we have?
nb_features = features.shape[0]
#How big is our image?
image_dim = X.shape[2] #assume image square
#R G B values
image_channels = X.shape[1]
#how many images do we have?
nb_images = X.shape[0]
#With border mode "full" you get an output that is the "full" size as the input.
#That means that the filter has to go outside the bounds of the input by "filter size / 2" -
#the area outside of the input is normally padded with zeros.
if border_mode == "full":
conv_dim = image_dim + patch_dim - 1
#With border mode "valid" you get an output that is smaller than the input because
#the convolution is only computed where the input and the filter fully overlap.
elif border_mode == "valid":
conv_dim = image_dim - patch_dim + 1
#we'll initialize our feature matrix
convolved_features = np.zeros((nb_images, nb_features, conv_dim, conv_dim));
#then we'll iterate through each image that we have
for image_i in range(nb_images):
#for each feature
for feature_i in range(nb_features):
#lets initialize a convolved image as empty
convolved_image = np.zeros((conv_dim, conv_dim))
#then for each channel (r g b )
for channel in range(image_channels):
#lets extract a feature from our feature map
feature = features[feature_i, channel, :, :]
#then define a channel specific part of our image
image = X[image_i, channel, :, :]
#perform convolution on our image, using a given feature filter
convolved_image += self.convolve2d(image, feature, border_mode);
#add a bias to our convoved image
convolved_image = convolved_image + bias[feature_i]
#add it to our list of convolved features (learnings)
convolved_features[image_i, feature_i, :, :] = convolved_image
return convolved_features
#In a dense layer, every node in the layer is connected to every node in the preceding layer.
def dense_layer(self, X, layer_i=0):
#so we'll initialize our weight and bias for this layer
W = self.layers[layer_i]["param_0"]
b = self.layers[layer_i]["param_1"]
#and multiply it by our input (dot product)
output = np.dot(X, W) + b
return output
@staticmethod
#so what does the convolution operation look like?, given an image and a feature map (filter)
def convolve2d(image, feature, border_mode="full"):
#we'll define the tensor dimensions of the image and the feature
image_dim = np.array(image.shape)
feature_dim = np.array(feature.shape)
#as well as a target dimension
target_dim = image_dim + feature_dim - 1
#then we'll perform a fast fourier transform on both the input and the filter
#performing a convolution can be written as a for loop but for many convolutions
#this approach is too comp. expensive/slow. it can be performed orders of magnitude
#faster using a fast fourier transform.
fft_result = np.fft.fft2(image, target_dim) * np.fft.fft2(feature, target_dim)
#and set the result to our target
target = np.fft.ifft2(fft_result).real
if border_mode == "valid":
# To compute a valid shape, either np.all(x_shape >= y_shape) or
# np.all(y_shape >= x_shape).
#decide a target dimension to convolve around
valid_dim = image_dim - feature_dim + 1
if np.any(valid_dim < 1):
valid_dim = feature_dim - image_dim + 1
start_i = (target_dim - valid_dim) // 2
end_i = start_i + valid_dim
target = target[start_i[0]:end_i[0], start_i[1]:end_i[1]]
return target
def relu_layer(x):
#turn all negative values in a matrix into zeros
z = np.zeros_like(x)
return np.where(x>z,x,z)
def softmax_layer2D(w):
#this function will calculate the probabilities of each
#target class over all possible target classes.
maxes = np.amax(w, axis=1)
maxes = maxes.reshape(maxes.shape[0], 1)
e = np.exp(w - maxes)
dist = e / np.sum(e, axis=1, keepdims=True)
return dist
#affect the probability a node will be turned off by multiplying it
#by a p values (.25 we define)
def dropout_layer(X, p):
retain_prob = 1. - p
X *= retain_prob
return X
#get the largest probabililty value from the list
def classify(X):
return X.argmax(axis=-1)
#tensor transformation, less dimensions
def flatten_layer(X):
flatX = np.zeros((X.shape[0],np.prod(X.shape[1:])))
for i in range(X.shape[0]):
flatX[i,:] = X[i].flatten(order='C')
return flatX