-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain_python.py
190 lines (143 loc) · 7.19 KB
/
main_python.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
import cv2
import numpy as np
from ultralytics import YOLO
import mediapipe as mp
import argparse
def jitter_block(input_image, block_size, randomness):
"""
Applies block-based shuffling to an image.
Parameters:
input_image (ndarray): The input image represented as a NumPy array.
block_size (int): The size of the square block in pixels.
randomness (int): The randomness value controlling the intensity of jittering.
Returns:
ndarray: The shuffled image as a NumPy array.
"""
height, width, _ = input_image.shape
# Calculate the number of blocks in each dimension
num_blocks_y = height // block_size
num_blocks_x = width // block_size
# Create a copy of the image to avoid modifying the original
jittered_image = np.copy(input_image)
for block_y in range(num_blocks_y):
for block_x in range(num_blocks_x):
# Calculate the coordinates of the block's top-left corner
start_x = block_x * block_size
start_y = block_y * block_size
# Calculate random offsets for the block's position
offset_x = np.random.randint(-randomness, randomness + 1)
offset_y = np.random.randint(-randomness, randomness + 1)
# Calculate the new position of the block
new_x = max(0, min(width - block_size, start_x + offset_x))
new_y = max(0, min(height - block_size, start_y + offset_y))
# Extract the block from the original image
block = input_image[start_y:start_y + block_size, start_x:start_x + block_size, :]
# Place the block at the new position in the jittered image
jittered_image[new_y:new_y + block_size, new_x:new_x + block_size, :] = block
return jittered_image
def pixelate_image(input_image, division_block_size=2, dark_stretch_h=0, dark_stretch_v=0):
"""
Applies Pixelation on input image.
Parameters:
input_image (ndarray): The input image represented as a NumPy array.
division_block_size (int): The size of the square block in pixels.
dark_stretch_h (int): The size of dark line in dimension 0 (horizontal).
dark_stretch_v (int): The size of dark line in dimension 1 (vertical).
Returns:
ndarray: The Pixelated image as a NumPy array.
"""
input_image = cv2.cvtColor(input_image, cv2.COLOR_BGR2GRAY)
input_image = np.stack([np.zeros_like(input_image), input_image, np.zeros_like(input_image)], axis=-1)
height, width, _ = input_image.shape
input_image[[i + j for i in range(0, height, division_block_size) for j in range(dark_stretch_h)], :, :] *= 0
input_image[:, [i + j for i in range(0, width, division_block_size) for j in range(dark_stretch_v)], :] *= 0
input_image = cv2.GaussianBlur(input_image, (5, 5), 0)
return input_image
def bg_removal(input_image):
"""
Performs background removal on the input image.
Parameters:
input_image (ndarray): The input image represented as a NumPy array.
Returns:
tuple: A tuple containing the modified input image with background removed and the output mask.
"""
results = bg_model.predict(input_image.copy())[0]
output_mask = np.zeros_like(input_image, dtype='float32')
if results.masks:
for i in range(len(results.boxes.boxes)):
if int(results.boxes.boxes[i, -1].item()) == 0:
output_mask = results.masks.masks.detach().cpu()[i][..., None]
input_image = np.where(output_mask, input_image, 0)
return input_image, output_mask
def hand_tracking(input_image):
"""
Performs hand tracking on the input image.
Parameters:
input_image (ndarray): The input image represented as a NumPy array.
Returns:
tuple: A tuple containing the modified input image with landmarks drawn and the calculated distance.
"""
# Detect hands in the image
results = hands.process(input_image[:, :, ::-1])
# Check if hands were detected
distance = None
if results.multi_hand_landmarks:
# Loop through each detected hand
for hand_landmarks in results.multi_hand_landmarks:
# Draw landmarks on the image
mp_drawing.draw_landmarks(
input_image, hand_landmarks, mp_hands.HAND_CONNECTIONS)
# Get the landmarks of the thumb tip and index fingertip
thumb_tip = hand_landmarks.landmark[mp_hands.HandLandmark.THUMB_TIP]
index_finger_tip = hand_landmarks.landmark[mp_hands.HandLandmark.INDEX_FINGER_TIP]
# Calculate the distance between the thumb tip and index fingertip
distance = np.sqrt((thumb_tip.x - index_finger_tip.x) ** 2 + (thumb_tip.y - index_finger_tip.y) ** 2)
# Convert the coordinates to image pixel values
height, width, _ = input_image.shape
thumb_x, thumb_y = int(thumb_tip.x * width), int(thumb_tip.y * height)
index_x, index_y = int(index_finger_tip.x * width), int(index_finger_tip.y * height)
# Draw a line from thumb_tip to index_finger_tip
cv2.line(input_image, (thumb_x, thumb_y), (index_x, index_y), (0, 255, 0), 3)
# Return the modified image and distance
return input_image, distance
def run_webcam(input_webcam=0) -> None:
"""
Runs the webcam application for image processing and visualization.
Press 'q' to quit
"""
cap = cv2.VideoCapture(input_webcam)
font = cv2.FONT_HERSHEY_COMPLEX
dis = 0
while True:
ret, raw_frame = cap.read()
assert ret, 'webcam does not return image!!!'
frame, _ = bg_removal(raw_frame)
_, raw_dis = hand_tracking(raw_frame)
dis = int(raw_dis * 20) if raw_dis is not None else dis
dis = min(dis, 5)
frame = pixelate_image(frame, division_block_size=8, dark_stretch_h=2, dark_stretch_v=4)
frame = jitter_block(frame, 4, [0, 1, 2, 6, 12, 28][dis])
cv2.putText(frame, f"Entropy: {'|' * (dis + 1)}", (10, 30), font, 0.7, (255, 255, 255), 2)
cv2.imshow('WebCam', frame)
if cv2.waitKey(1) == ord('q'):
break
cap.release()
cv2.destroyAllWindows()
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument('--model-type', type=str, default='m',
help='Choose between "n" for nano, "s" for small, "m" for medium, "l" for large and "x" for X-large')
parser.add_argument('--webcam-number', type=int, default=0,
help='Specific webcam number based on total webcams connected to your computer')
opt = parser.parse_args()
yolov8_model_type = opt.model_type
print(f"model type: {yolov8_model_type}")
webcam_number = opt.webcam_number
# Load YOLOv8 model for segmentation and background removal
bg_model = YOLO(f"bg_models/yolov8{yolov8_model_type}-seg.pt")
# Load the MediaPipe Hands model
mp_drawing = mp.solutions.drawing_utils
mp_hands = mp.solutions.hands
hands = mp_hands.Hands()
run_webcam(webcam_number)
cv2.destroyAllWindows()