-
Notifications
You must be signed in to change notification settings - Fork 7
/
Copy pathhand_detector_V2.py
170 lines (127 loc) · 6.16 KB
/
hand_detector_V2.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
import numpy as np
import cv2
import math
import time
from hand_detector_utils import *
# from hand_classification_net import *
import torch
#%% Settings
# Use 0 if you use the laptot camera.
#If you use a software to split the video source or similar try 1, 2 etc. (Start with 1)
default_device = 1
# If you only want to test this program without the Unity application set to True to obtain a mirrored feedback
# If you use a third party application to split the video source use that application to obtain the mirror effetct and set this variable to False
flip_frame = True
if(default_device == 1): flip_frame = False
#%% Other variables (don't modify)
# Counter use to FPS improvement
counter = 0
# Variable to save the hand image
hand1 = np.zeros((10, 10, 3))
hand2 = np.zeros((10, 10, 3))
# Predictions regarding the actual frame of the number of finger per hand
finger_predicts_1 = -1
finger_predicts_2 = -1
# Tracking of fingers counter of the two hands. Use to decide wich command send
finger_list_1 = []
finger_list_2 = []
# Flip the image of left hand to improve classifier performance
flip_hand_1 = True
#%%
# Load NN
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
hand_tracker = torch.load("model/model_all_40.pth")
hand_tracker.to(device)
hand_tracker.eval()
finger_counter = torch.load("model/CNN_250_140.pth")
# finger_counter = CNNModel()
# finger_counter.load_state_dict(torch.load("model/model_chk.pt"))
finger_counter.to(device)
finger_counter.eval()
#%%
# Open Camera
try:
capture = cv2.VideoCapture(default_device)
# capture = cv2.VideoCapture(cv2.CAP_DSHOW)
except:
print("No Camera Source Found!")
for i in range(15): ret, frame = capture.read()
while capture.isOpened():
# Capture frames from the camera
ret, frame = capture.read()
# frame = cv2.resize(frame, (320, 160))
if(flip_frame): frame = cv2.flip(frame, 1)
frame_copy = frame.copy()
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Neural network section (Hand tracking and finger counter)
# To improve performance the two network where activated every tot frame
if(counter % 3 == 0):
# Predict hand position
boxes_predict = trackingHandWithRCNN(hand_tracker, frame, device)
# Reset counter
counter = 1
# Performe other action only if at least 1 hand is detected
if(len(boxes_predict) > 0):
if(len(boxes_predict) == 2):
# If the net detect 2 hands, hand1 will becoome the left one and hand2 the right one
if(boxes_predict[0,0] < boxes_predict[1,0]):
hand1 = frame_copy[boxes_predict[0,1]:boxes_predict[0,3], boxes_predict[0,0]:boxes_predict[0,2]]
hand2 = frame_copy[boxes_predict[1,1]:boxes_predict[1,3], boxes_predict[1,0]:boxes_predict[1,2]]
else:
hand1 = frame_copy[boxes_predict[1,1]:boxes_predict[1,3], boxes_predict[1,0]:boxes_predict[1,2]]
hand2 = frame_copy[boxes_predict[0,1]:boxes_predict[0,3], boxes_predict[0,0]:boxes_predict[0,2]]
# Denoise both hand
if(flip_hand_1): hand1 = cv2.flip(hand1, 1)
hand1 = cv2.GaussianBlur(hand1, (3,3), 0)
hand2 = cv2.GaussianBlur(hand2, (3,3), 0)
# Predict finger for both hand
finger_predicts_1 = predictFingers(finger_counter, hand1, device)
finger_predicts_2 = predictFingers(finger_counter, hand2, device)
else:
# If only 1 (or more than 2) hand are detected take only the first
hand1 = frame_copy[boxes_predict[0,1]:boxes_predict[0,3], boxes_predict[0,0]:boxes_predict[0,2]]
hand2 = np.zeros((10, 10, 3))
# Denoise hand image
if(flip_hand_1 and boxes_predict[0, 2] < frame.shape[1] / 2): hand1 = cv2.flip(hand1, 1)
hand1 = cv2.GaussianBlur(hand1, (3,3), 0)
# Predict fingers for the hand and set the other finger counter to -1
finger_predicts_1 = predictFingers(finger_counter, hand1, device)
finger_predicts_2 = -1
# Add finger number to the list
finger_list_1.append(finger_predicts_1)
finger_list_2.append(finger_predicts_2)
# Mantain only the last five element
if(len(finger_list_1) > 5):
finger_list_1 = finger_list_1[-5:]
if(len(finger_list_2) > 5):
finger_list_2 = finger_list_2[-5:]
# print("finger 1: ", finger_predicts_1)
cv2.putText(frame, "finger 1: " + str(finger_predicts_1), (50,50), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 3)
# if(finger_predicts_2 != -1):
# print(" finger 2: ", finger_predicts_2)
cv2.putText(frame, "finger 2: " + str(finger_predicts_2), (50,100), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 3)
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Draw box around hand(s)
if(-1 not in boxes_predict):
for line in boxes_predict:
# Extract point
pt1 = (int(line[0]), int(line[1]))
pt2 = (int(line[2]), int(line[3]))
# Draw rectangle
cv2.rectangle(frame, pt1, pt2, (0, 0, 255), thickness = 4)
# Draw central point of the rectangle
cv2.circle(frame, centralPointInBox(line) , 10, [255,0,255], -1)
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Show the frame (and optionally the hand image)
cv2.imshow("Full Frame", frame)
# cv2.imshow("Frame copy", frame_copy)
# cv2.imshow("Hand 1 (SX)", cv2.resize(hand1, (140, 140)))
# cv2.imshow("Hand 2 (DX)", hand2)
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Close the camera if 'q' is pressed
if cv2.waitKey(1) == ord('q'):
break
# Advance counter
counter += 1
capture.release()
cv2.destroyAllWindows()