forked from rykov8/ssd_keras
-
Notifications
You must be signed in to change notification settings - Fork 86
/
tbpp_utils.py
214 lines (174 loc) · 9.33 KB
/
tbpp_utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
"""Some utils for TextBoxes++."""
import numpy as np
import matplotlib.pyplot as plt
from ssd_utils import PriorUtil as SSDPriorUtil
from ssd_utils import iou, non_maximum_suppression, non_maximum_suppression_slow
from utils.bboxes import polygon_to_rbox3, rbox3_to_polygon
from utils.vis import plot_box
class PriorUtil(SSDPriorUtil):
"""Utility for SSD prior boxes.
"""
def encode(self, gt_data, overlap_threshold=0.5, debug=False):
# calculation is done with normalized sizes
# TODO: empty ground truth
if gt_data.shape[0] == 0:
print('gt_data', type(gt_data), gt_data.shape)
num_classes = 2
num_priors = self.priors.shape[0]
gt_polygons = np.copy(gt_data[:,:8]) # normalized quadrilaterals
gt_rboxes = np.array([polygon_to_rbox3(np.reshape(p, (-1,2))) for p in gt_data[:,:8]])
# minimum horizontal bounding rectangles
gt_xmin = np.min(gt_data[:,0:8:2], axis=1)
gt_ymin = np.min(gt_data[:,1:8:2], axis=1)
gt_xmax = np.max(gt_data[:,0:8:2], axis=1)
gt_ymax = np.max(gt_data[:,1:8:2], axis=1)
gt_boxes = self.gt_boxes = np.array([gt_xmin,gt_ymin,gt_xmax,gt_ymax]).T # normalized xmin, ymin, xmax, ymax
gt_class_idx = np.asarray(gt_data[:,-1]+0.5, dtype=np.int)
gt_one_hot = np.zeros([len(gt_class_idx),num_classes])
gt_one_hot[range(len(gt_one_hot)),gt_class_idx] = 1 # one_hot classes including background
gt_iou = np.array([iou(b, self.priors_norm) for b in gt_boxes]).T
# assigne gt to priors
max_idxs = np.argmax(gt_iou, axis=1)
max_val = gt_iou[np.arange(num_priors), max_idxs]
prior_mask = max_val > overlap_threshold
match_indices = max_idxs[prior_mask]
self.match_indices = dict(zip(list(np.ix_(prior_mask)[0]), list(match_indices)))
# prior labels
confidence = np.zeros((num_priors, num_classes))
confidence[:,0] = 1
confidence[prior_mask] = gt_one_hot[match_indices]
gt_xy = (gt_boxes[:,2:4] + gt_boxes[:,0:2]) / 2.
gt_wh = gt_boxes[:,2:4] - gt_boxes[:,0:2]
gt_xy = gt_xy[match_indices]
gt_wh = gt_wh[match_indices]
gt_polygons = gt_polygons[match_indices]
gt_rboxes = gt_rboxes[match_indices]
priors_xy = self.priors_xy[prior_mask] / self.image_size
priors_wh = self.priors_wh[prior_mask] / self.image_size
variances_xy = self.priors_variances[prior_mask,0:2]
variances_wh = self.priors_variances[prior_mask,2:4]
# compute local offsets for
offsets = np.zeros((num_priors, 4))
offsets[prior_mask,0:2] = (gt_xy - priors_xy) / priors_wh
offsets[prior_mask,2:4] = np.log(gt_wh / priors_wh)
offsets[prior_mask,0:2] /= variances_xy
offsets[prior_mask,2:4] /= variances_wh
# compute local offsets for quadrilaterals
offsets_quads = np.zeros((num_priors, 8))
priors_xy_minmax = np.hstack([priors_xy-priors_wh/2, priors_xy+priors_wh/2])
#ref = np.tile(priors_xy, (1,4))
ref = priors_xy_minmax[:,(0,1,2,1,2,3,0,3)] # corner points
offsets_quads[prior_mask,:] = (gt_polygons - ref) / np.tile(priors_wh, (1,4)) / np.tile(variances_xy, (1,4))
# compute local offsets for rotated bounding boxes
offsets_rboxs = np.zeros((num_priors, 5))
offsets_rboxs[prior_mask,0:2] = (gt_rboxes[:,0:2] - priors_xy) / priors_wh / variances_xy
offsets_rboxs[prior_mask,2:4] = (gt_rboxes[:,2:4] - priors_xy) / priors_wh / variances_xy
offsets_rboxs[prior_mask,4] = np.log(gt_rboxes[:,4] / priors_wh[:,1]) / variances_wh[:,1]
return np.concatenate([offsets, offsets_quads, offsets_rboxs, confidence], axis=1)
def decode(self, model_output, confidence_threshold=0.01, keep_top_k=200, fast_nms=True, sparse=True):
# calculation is done with normalized sizes
# mbox_loc, mbox_quad, mbox_rbox, mbox_conf
# 4,8,5,2
# boxes, quad, rboxes, confs, labels
# 4,8,5,1,1
prior_mask = model_output[:,17:] > confidence_threshold
if sparse:
# compute boxes only if the confidence is high enough and the class is not background
mask = np.any(prior_mask[:,1:], axis=1)
prior_mask = prior_mask[mask]
mask = np.ix_(mask)[0]
model_output = model_output[mask]
priors_xy = self.priors_xy[mask] / self.image_size
priors_wh = self.priors_wh[mask] / self.image_size
priors_variances = self.priors_variances[mask,:]
else:
priors_xy = self.priors_xy / self.image_size
priors_wh = self.priors_wh / self.image_size
priors_variances = self.priors_variances
offsets = model_output[:,:4]
offsets_quads = model_output[:,4:12]
offsets_rboxs = model_output[:,12:17]
confidence = model_output[:,17:]
priors_xy_minmax = np.hstack([priors_xy-priors_wh/2, priors_xy+priors_wh/2])
ref = priors_xy_minmax[:,(0,1,2,1,2,3,0,3)] # corner points
variances_xy = priors_variances[:,0:2]
variances_wh = priors_variances[:,2:4]
num_priors = offsets.shape[0]
num_classes = confidence.shape[1]
# compute bounding boxes from local offsets
boxes = np.empty((num_priors, 4))
offsets = offsets * priors_variances
boxes_xy = priors_xy + offsets[:,0:2] * priors_wh
boxes_wh = priors_wh * np.exp(offsets[:,2:4])
boxes[:,0:2] = boxes_xy - boxes_wh / 2. # xmin, ymin
boxes[:,2:4] = boxes_xy + boxes_wh / 2. # xmax, ymax
boxes = np.clip(boxes, 0.0, 1.0)
# do non maximum suppression
results = []
for c in range(1, num_classes):
mask = prior_mask[:,c]
boxes_to_process = boxes[mask]
if len(boxes_to_process) > 0:
confs_to_process = confidence[mask, c]
if fast_nms:
idx = non_maximum_suppression(
boxes_to_process, confs_to_process,
self.nms_thresh, self.nms_top_k)
else:
idx = non_maximum_suppression_slow(
boxes_to_process, confs_to_process,
self.nms_thresh, self.nms_top_k)
good_boxes = boxes_to_process[idx]
good_confs = confs_to_process[idx][:, None]
labels = np.ones((len(idx),1)) * c
good_quads = ref[mask][idx] + offsets_quads[mask][idx] * np.tile(priors_wh[mask][idx] * variances_xy[mask][idx], (1,4))
good_rboxs = np.empty((len(idx), 5))
good_rboxs[:,0:2] = priors_xy[mask][idx] + offsets_rboxs[mask][idx,0:2] * priors_wh[mask][idx] * variances_xy[mask][idx]
good_rboxs[:,2:4] = priors_xy[mask][idx] + offsets_rboxs[mask][idx,2:4] * priors_wh[mask][idx] * variances_xy[mask][idx]
good_rboxs[:,4] = np.exp(offsets_rboxs[mask][idx,4] * variances_wh[mask][idx,1]) * priors_wh[mask][idx,1]
c_pred = np.concatenate((good_boxes, good_quads, good_rboxs, good_confs, labels), axis=1)
results.extend(c_pred)
if len(results) > 0:
results = np.array(results)
order = np.argsort(-results[:, 17])
results = results[order]
results = results[:keep_top_k]
else:
results = np.empty((0,6))
self.results = results
return results
def plot_results(self, results=None, classes=None, show_labels=False, gt_data=None, confidence_threshold=None):
if results is None:
results = self.results
if confidence_threshold is not None:
mask = results[:, 17] > confidence_threshold
results = results[mask]
if classes is not None:
colors = plt.cm.hsv(np.linspace(0, 1, len(classes)+1)).tolist()
ax = plt.gca()
im = plt.gci()
h, w = im.get_size()
# draw ground truth
if gt_data is not None:
for box in gt_data:
label = np.nonzero(box[4:])[0][0]+1
color = 'g' if classes == None else colors[label]
xy = np.reshape(box[:8], (-1,2)) * (w,h)
ax.add_patch(plt.Polygon(xy, fill=True, color=color, linewidth=1, alpha=0.3))
# draw prediction
for r in results:
bbox = r[0:4]
quad = r[4:12]
rbox = r[12:17]
confidence = r[17]
label = int(r[18])
plot_box(bbox*(w,h,w,h), box_format='xyxy', color='b')
plot_box(np.reshape(quad,(-1,2))*(w,h), box_format='polygon', color='r')
plot_box(rbox3_to_polygon(rbox)*(w,h), box_format='polygon', color='g')
plt.plot(rbox[[0,2]]*(w,w), rbox[[1,3]]*(h,h), 'oc', markersize=4)
if show_labels:
label_name = label if classes == None else classes[label]
color = 'r' if classes == None else colors[label]
xmin, ymin = bbox[:2]*(w,h)
display_txt = '%0.2f, %s' % (confidence, label_name)
ax.text(xmin, ymin, display_txt, bbox={'facecolor':color, 'alpha':0.5})