-
Notifications
You must be signed in to change notification settings - Fork 151
/
Copy pathlabel_image.py
130 lines (114 loc) · 5.35 KB
/
label_image.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
## Credit: Some parts of the program has been taken from OpenCV documentation
#importing required libraries
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import argparse
import sys
import time
import numpy as np
import tensorflow as tf
#function to load TensorFlow graph from a model file
def load_graph(model_file):
graph = tf.Graph() #creating a tensorflow computation graph
graph_def = tf.GraphDef()
with open(model_file, "rb") as f:
graph_def.ParseFromString(f.read()) #parsing binary graph definition
with graph.as_default(): #setting this graph as default computation graph
tf.import_graph_def(graph_def) #importing graph definitions into current graph
return graph
#function to read and pre-process the image
def read_tensor_from_image_file(file_name, input_height=299, input_width=299,
input_mean=0, input_std=255):
input_name = "file_reader"
output_name = "normalized"
file_reader = tf.read_file(file_name, input_name)
if file_name.endswith(".png"): # if a PNG image, setting the number of color channels to 3
image_reader = tf.image.decode_png(file_reader, channels = 3,
name='png_reader')
elif file_name.endswith(".gif"): # if a GIF image, removing the singleton dimension
image_reader = tf.squeeze(tf.image.decode_gif(file_reader,
name='gif_reader'))
elif file_name.endswith(".bmp"): # if bmp, then decoding a BMP image
image_reader = tf.image.decode_bmp(file_reader, name='bmp_reader')
else: #default: decoding the image as a JPEG with 3 color channels
image_reader = tf.image.decode_jpeg(file_reader, channels = 3,
name='jpeg_reader')
float_caster = tf.cast(image_reader, tf.float32) #converting the image into float32 dtype
dims_expander = tf.expand_dims(float_caster, 0); #adding batch dimension
resized = tf.image.resize_bilinear(dims_expander, [input_height, input_width]) #resizing the image
normalized = tf.divide(tf.subtract(resized, [input_mean]), [input_std]) #normalizing the image
sess = tf.Session()
result = sess.run(normalized)
return result
#function for loading labels from a file
def load_labels(label_file):
label = []
proto_as_ascii_lines = tf.gfile.GFile(label_file).readlines()
for l in proto_as_ascii_lines:
label.append(l.rstrip()) #appending labels after stripping newline characters
return label
#main function for image classification
def main(img):
file_name = img
model_file = "retrained_graph.pb"
label_file = "retrained_labels.txt"
input_height = 224
input_width = 224
input_mean = 128
input_std = 128
input_layer = "input"
output_layer = "final_result"
#parsing command-line arguments
parser = argparse.ArgumentParser()
parser.add_argument("--image", help="image to be processed")
parser.add_argument("--graph", help="graph/model to be executed")
parser.add_argument("--labels", help="name of file containing labels")
parser.add_argument("--input_height", type=int, help="input height")
parser.add_argument("--input_width", type=int, help="input width")
parser.add_argument("--input_mean", type=int, help="input mean")
parser.add_argument("--input_std", type=int, help="input std")
parser.add_argument("--input_layer", help="name of input layer")
parser.add_argument("--output_layer", help="name of output layer")
args = parser.parse_args()
#over-riding default values with command line arguments(if provided)
if args.graph:
model_file = args.graph
if args.image:
file_name = args.image
if args.labels:
label_file = args.labels
if args.input_height:
input_height = args.input_height
if args.input_width:
input_width = args.input_width
if args.input_mean:
input_mean = args.input_mean
if args.input_std:
input_std = args.input_std
if args.input_layer:
input_layer = args.input_layer
if args.output_layer:
output_layer = args.output_layer
graph = load_graph(model_file)
t = read_tensor_from_image_file(file_name, #reading and pre-processing the image input
input_height=input_height,
input_width=input_width,
input_mean=input_mean,
input_std=input_std)
input_name = "import/" + input_layer
output_name = "import/" + output_layer
input_operation = graph.get_operation_by_name(input_name); # obtaining references to the input and output operations within the graph
output_operation = graph.get_operation_by_name(output_name);
#running the image through the model
with tf.Session(graph=graph) as sess:
start = time.time() #starting the timer
results = sess.run(output_operation.outputs[0],
{input_operation.outputs[0]: t})
end=time.time() #recording the end time for measuring performance
results = np.squeeze(results) #removing dimensions of size 1, making it a 1D Array
#identifying the top k results
top_k = results.argsort()[-5:][::-1]
labels = load_labels(label_file)
for i in top_k:
return labels[i] #returning the label with highest confidence