-
Notifications
You must be signed in to change notification settings - Fork 0
/
eye_track.py
172 lines (137 loc) · 5.43 KB
/
eye_track.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
import numpy as np
import cv2
import dlib
import collections
# Constants
DELAY_SECONDS = 0.2
ZOOM_FACTOR = 2
CENTER_THRESHOLD = 1.0
TEXT_FONT = cv2.FONT_HERSHEY_SIMPLEX
TEXT_SCALE = 1
TEXT_COLOR = (255, 255, 255)
TEXT_THICKNESS = 2
TEXT_POSITION = (10, 50)
SCREEN_WIDTH = 1500
SCREEN_HEIGHT = 500
# Face detector and shape predictor
detector = dlib.get_frontal_face_detector()
predictor = dlib.shape_predictor('shape_predictor_68_face_landmarks.dat')
# Capture video from mac webcam
cap = cv2.VideoCapture(0)
FPS = int(cap.get(cv2.CAP_PROP_FPS))
BUFFER_SIZE = int(FPS * DELAY_SECONDS)
# Create deques for frames and text for delay
frame_buffer = collections.deque(maxlen=BUFFER_SIZE)
text_buffer = collections.deque(maxlen=BUFFER_SIZE)
# Functions
def detect_faces_and_landmarks(gray_frame):
"""
Uses the detector to find landmarks.
"""
faces = detector(gray_frame)
if len(faces) > 0:
face = faces[0]
landmarks = predictor(gray_frame, face)
return landmarks
return None
def process_eye_region(landmarks):
"""
Processes the eye region to return the thresholded (black & white) eye image,
and relative x-position of the iris.
"""
# Collect the points (landmarks) that outline the right eye (36-41).
right_eye_points = np.array([(landmarks.part(i).x, landmarks.part(i).y) for i in range(36, 42)])
# Create a mask with the same dimensions as the frame
mask = np.zeros_like(gray)
cv2.fillPoly(mask, [right_eye_points], 255)
# Extract and crop the eye region
eye_region = cv2.bitwise_and(gray, gray, mask=mask)
x, y, w, h = cv2.boundingRect(right_eye_points)
eye_region_cropped = eye_region[y:y + h, x:x + w]
# Threshold based on median intensity
median_intensity = np.median(eye_region_cropped[eye_region_cropped > 0])
threshold_value = max(0, int(median_intensity))
_, thresholded_eye = cv2.threshold(eye_region_cropped, threshold_value, 255, cv2.THRESH_BINARY)
# Collect dark pixels in the thresholded eye region
dark_pixels = np.column_stack(np.where(thresholded_eye == 0))
# Find the "center" of the dark pixels
if dark_pixels.size > 0:
mean_dark_x = np.mean(dark_pixels[:, 1]) # x-coordinates are in the second column
center_x = w / 2
relative_x_position = mean_dark_x - center_x
return thresholded_eye, relative_x_position
return thresholded_eye, None
def estimate_gaze(relative_x_position):
"""
Estimates the location of gaze based on the relative x-position of the iris.
"""
if relative_x_position is None:
return "Unknown"
elif abs(relative_x_position) <= CENTER_THRESHOLD:
return "Center"
elif relative_x_position > CENTER_THRESHOLD:
return "Left"
else:
return "Right"
def draw_lines_on_eye(eye_region, mean_dark_x, center_x, h):
"""
Draws lines on eye to visualize estimated iris location.
"""
# Draw a vertical line at the center of the eye region
cv2.line(eye_region, (int(center_x), 0), (int(center_x), h), (0, 255, 0), 1)
# Draw a vertical line at the mean x-position of the dark pixels
if mean_dark_x is not None:
cv2.line(eye_region, (int(mean_dark_x), 0), (int(mean_dark_x), h), (255, 0, 0), 1)
def update_buffers(zoomed_eye, location_text):
"""
Updates buffers with delayed frames and related text.
"""
frame_buffer.append(zoomed_eye)
text_buffer.append(location_text)
def display_delayed_output():
"""
Displays the delayed video and text output.
"""
if len(frame_buffer) == BUFFER_SIZE:
delayed_zoomed_eye = frame_buffer.popleft()
delayed_text = text_buffer.popleft()
# Display delayed video
cv2.imshow('Delayed Zoomed Eye', delayed_zoomed_eye)
# Display delayed text
text_size = cv2.getTextSize(delayed_text, TEXT_FONT, TEXT_SCALE, TEXT_THICKNESS)[0]
text_window = np.zeros((100, text_size[0] + 20, 3), dtype=np.uint8)
cv2.putText(text_window, delayed_text, TEXT_POSITION, TEXT_FONT, TEXT_SCALE, TEXT_COLOR, TEXT_THICKNESS)
cv2.imshow('Direction', text_window)
# Get the size of the displayed image, and put the windows in the center of the screen
window_width = delayed_zoomed_eye.shape[1]
window_height = delayed_zoomed_eye.shape[0]
window_x = (SCREEN_WIDTH - window_width) // 2
window_y = (SCREEN_HEIGHT - window_height) // 2
cv2.moveWindow('Delayed Zoomed Eye', window_x, window_y)
cv2.moveWindow('Direction', window_x, window_y - 100)
# Main loop
while True:
ret, frame = cap.read()
if not ret:
break
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
# Detect faces and landmarks
landmarks = detect_faces_and_landmarks(gray)
if landmarks is None:
continue
# Process the eye region and calculate relative x-position
thresholded_eye, relative_x_position = process_eye_region(landmarks)
# Estimate gaze direction (Left, Right, Center)
gaze_text = estimate_gaze(relative_x_position)
# Zoom in on the thresholded eye region
zoomed_thresholded_eye = cv2.resize(thresholded_eye, (0, 0), fx=ZOOM_FACTOR, fy=ZOOM_FACTOR)
# Update buffers with the zoomed frame and gaze text
update_buffers(zoomed_thresholded_eye, gaze_text)
# Display delayed output
display_delayed_output()
# Break the loop when ESC is pressed
if cv2.waitKey(1) == 27:
break
# When finished
cap.release()
cv2.destroyAllWindows()