-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathgesture-recognizer.py
235 lines (201 loc) · 10.1 KB
/
gesture-recognizer.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
from enum import Enum
import cv2
import numpy as np
from keras.models import load_model
MODEL_PATH = "model/hand_model_gray.hdf5"
class GestureRecognizer:
"""
Class for tracking the hand and labeling the gesture.
"""
# Mapping between the output of our model and a textual representation
CLASSES = {
0: 'fist',
1: 'five',
2: 'point',
3: 'swing'
}
def __init__(self, capture=0, model_path=MODEL_PATH):
self.bg = None
self.frame = None # current frame
self.tracker = cv2.TrackerKCF_create()
self.kernel = np.ones((3, 3), np.uint8)
self.recognizer = load_model(model_path)
self.is_tracking = False
self.hand_bbox = (116, 116, 170, 170)
# Display positions (pixel coordinates)
self.positions = {
'hand_pose': (15, 40), # hand pose text
'fps': (15, 20), # fps counter
'null_pos': (200, 200) # used as null point for mouse control
}
# Begin capturing video
self.video = cv2.VideoCapture(1)
if not self.video.isOpened():
print("Could not open video")
def __del__(self):
cv2.destroyAllWindows()
self.video.release()
# Helper function for applying a mask to an array
def mask_array(self, array, imask):
if array.shape[:2] != imask.shape:
raise Exception("Shapes of input and imask are incompatible")
output = np.zeros_like(array, dtype=np.uint8)
for i, row in enumerate(imask):
output[i, row] = array[i, row]
return output
def extract_foreground(self):
# Find the absolute difference between the background frame and current frame
diff = cv2.absdiff(self.bg, self.frame)
mask = cv2.cvtColor(diff, cv2.COLOR_BGR2GRAY)
# Threshold the mask
th, thresh = cv2.threshold(mask, 10, 255, cv2.THRESH_BINARY)
# Opening, closing and dilation
opening = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, self.kernel)
closing = cv2.morphologyEx(opening, cv2.MORPH_CLOSE, self.kernel)
img_dilation = cv2.dilate(closing, self.kernel, iterations=2)
# Get mask indexes
imask = img_dilation > 0
# Get foreground from mask
foreground = self.mask_array(self.frame, imask)
return foreground, mask
def run(self):
# Capture, process, display loop
while True:
# Read a new frame
ok, self.frame = self.video.read()
display = self.frame.copy() # Frame we'll do all the graphical drawing to
data_display = np.zeros_like(display, dtype=np.uint8) # Black screen to display data
if not ok:
break
# Start FPS timer
timer = cv2.getTickCount()
if self.bg is None:
cv2.putText(display,
"Press 'r' to reset the foreground extraction.",
self.positions['hand_pose'],
cv2.FONT_HERSHEY_SIMPLEX,
0.75, (0, 127, 64), 2)
cv2.imshow("display", display)
k = cv2.waitKey(1) & 0xff
if k == 27: break # ESC pressed
elif k == 114 or k == 108:
# r pressed
self.bg = self.frame.copy()
self.hand_bbox = (116, 116, 170, 170)
self.is_tracking = False
else:
# Extract the foreground
foreground, mask = self.extract_foreground()
foreground_display = foreground.copy()
# Get hand from mask using the bounding box
hand_crop = mask[int(self.hand_bbox[1]):int(self.hand_bbox[1]+self.hand_bbox[3]),
int(self.hand_bbox[0]):int(self.hand_bbox[0]+self.hand_bbox[2])]
# Update tracker
if self.is_tracking:
tracking, self.hand_bbox = self.tracker.update(foreground)
try:
# Resize cropped hand and make prediction on gesture
hand_crop_resized = np.expand_dims(cv2.resize(hand_crop, (54, 54)), axis=0).reshape((1, 54, 54, 1))
prediction = self.recognizer.predict(hand_crop_resized)
predi = prediction[0].argmax() # Get the index of the greatest confidence
gesture = self.CLASSES[predi]
for i, pred in enumerate(prediction[0]):
# Draw confidence bar for each gesture
barx = self.positions['hand_pose'][0]
bary = 60 + i*60
bar_height = 20
bar_length = int(400 * pred) + barx # calculate length of confidence bar
# Make the most confidence prediction green
if i == predi:
colour = (0, 255, 0)
else:
colour = (0, 0, 255)
cv2.putText(data_display,
"{}: {}".format(self.CLASSES[i], pred),
(self.positions['hand_pose'][0], 30 + i*60),
cv2.FONT_HERSHEY_SIMPLEX,
0.75, (255, 255, 255), 2)
cv2.rectangle(data_display,
(barx, bary),
(bar_length,
bary - bar_height),
colour,
-1, 1)
cv2.putText(display,
"hand pose: {}".format(gesture),
self.positions['hand_pose'],
cv2.FONT_HERSHEY_SIMPLEX,
0.75, (0, 0, 255), 2)
cv2.putText(foreground_display,
"hand pose: {}".format(gesture),
self.positions['hand_pose'],
cv2.FONT_HERSHEY_SIMPLEX,
0.75, (0, 0, 255), 2)
except Exception as ex:
cv2.putText(display,
"hand pose: error",
self.positions['hand_pose'],
cv2.FONT_HERSHEY_SIMPLEX,
0.75, (0, 0, 255), 2)
cv2.putText(foreground_display,
"hand pose: error",
self.positions['hand_pose'],
cv2.FONT_HERSHEY_SIMPLEX,
0.75, (0, 0, 255), 2)
# Draw bounding box
p1 = (int(self.hand_bbox[0]), int(self.hand_bbox[1]))
p2 = (int(self.hand_bbox[0] + self.hand_bbox[2]), int(self.hand_bbox[1] + self.hand_bbox[3]))
cv2.rectangle(foreground_display, p1, p2, (255, 0, 0), 2, 1)
cv2.rectangle(display, p1, p2, (255, 0, 0), 2, 1)
# Calculate difference in hand position
hand_pos = ((p1[0] + p2[0])//2, (p1[1] + p2[1])//2)
mouse_change = ((p1[0] + p2[0])//2 - self.positions['null_pos'][0], self.positions['null_pos'][0] - (p1[1] + p2[1])//2)
# Draw hand moved difference
cv2.circle(display, self.positions['null_pos'], 5, (0,0,255), -1)
cv2.circle(display, hand_pos, 5, (0,255,0), -1)
cv2.line(display, self.positions['null_pos'], hand_pos, (255,0,0),5)
# Calculate Frames per second (FPS)
fps = cv2.getTickFrequency() / (cv2.getTickCount() - timer)
# Display FPS on frame
cv2.putText(foreground_display, "FPS : " + str(int(fps)), self.positions['fps'], cv2.FONT_HERSHEY_SIMPLEX, 0.65, (50, 170, 50), 2)
cv2.putText(display, "FPS : " + str(int(fps)), self.positions['fps'], cv2.FONT_HERSHEY_SIMPLEX, 0.65, (50, 170, 50), 2)
# Display pause command text
cv2.putText(foreground_display,
"hold 'r' to recalibrate until the screen is black",
(15, 400),
cv2.FONT_HERSHEY_SIMPLEX,
0.65, (0, 0, 255), 2)
cv2.putText(foreground_display,
"to recalibrate",
(15, 420),
cv2.FONT_HERSHEY_SIMPLEX,
0.65, (0, 0, 255), 2)
cv2.putText(foreground_display,
"press 'p' to return to paused state",
(15, 450),
cv2.FONT_HERSHEY_SIMPLEX,
0.65, (0, 0, 255), 2)
# Display foreground_display
cv2.imshow("foreground_display", foreground_display)
cv2.imshow("display", display)
# Display result
cv2.imshow("data", data_display)
k = cv2.waitKey(1) & 0xff
if k == 27: break # ESC pressed
elif k == 114 or k == 108: # r pressed
self.bg = self.frame.copy()
self.hand_bbox = (116, 116, 170, 170)
self.is_tracking = False
elif k == 116: # t pressed
# Initialize tracker with first frame and bounding box
self.tracker = cv2.TrackerKCF_create()
self.tracker.init(self.frame, self.hand_bbox)
elif k == 112: # p pressed
# Reset to paused state
self.is_tracking = False
self.bg = None
cv2.destroyAllWindows()
elif k != 255: print(k)
if __name__ == "__main__":
recognizer = GestureRecognizer(1)
recognizer.run()