In this tutorial, you will get the full source code of the webcam Eye Blink Counter. Basically, we will use Python 3, OpenCV & MediaPipe to create this computer vision script. Our code will perform eye blink detection and then keep track of how many times the user blinks his eyes. This value is then printed inside a GUI Desktop App screen as “Total Blinks”.
pip install opencv-python --user
pip install mediapipe --user
code.py
import cv2 as cv
import mediapipe as mp
import time
import math
import numpy as np
# variables
frame_counter =0
CEF_COUNTER =0
TOTAL_BLINKS =0
# constants
CLOSED_EYES_FRAME =3
FONTS =cv.FONT_HERSHEY_COMPLEX
# face bounder indices
FACE_OVAL=[ 10, 338, 297, 332, 284, 251, 389, 356, 454, 323, 361, 288, 397, 365, 379, 378, 400, 377, 152, 148, 176, 149, 150, 136, 172, 58, 132, 93, 234, 127, 162, 21, 54, 103,67, 109]
# lips indices for Landmarks
LIPS=[ 61, 146, 91, 181, 84, 17, 314, 405, 321, 375,291, 308, 324, 318, 402, 317, 14, 87, 178, 88, 95,185, 40, 39, 37,0 ,267 ,269 ,270 ,409, 415, 310, 311, 312, 13, 82, 81, 42, 183, 78 ]
LOWER_LIPS =[61, 146, 91, 181, 84, 17, 314, 405, 321, 375, 291, 308, 324, 318, 402, 317, 14, 87, 178, 88, 95]
UPPER_LIPS=[ 185, 40, 39, 37,0 ,267 ,269 ,270 ,409, 415, 310, 311, 312, 13, 82, 81, 42, 183, 78]
# Left eyes indices
LEFT_EYE =[ 362, 382, 381, 380, 374, 373, 390, 249, 263, 466, 388, 387, 386, 385,384, 398 ]
LEFT_EYEBROW =[ 336, 296, 334, 293, 300, 276, 283, 282, 295, 285 ]
# right eyes indices
RIGHT_EYE=[ 33, 7, 163, 144, 145, 153, 154, 155, 133, 173, 157, 158, 159, 160, 161 , 246 ]
RIGHT_EYEBROW=[ 70, 63, 105, 66, 107, 55, 65, 52, 53, 46 ]
map_face_mesh = mp.solutions.face_mesh
# camera object
camera = cv.VideoCapture(0)
# colors
# values =(blue, green, red) opencv accepts BGR values not RGB
YELLOW =(0,255,255)
GREEN = (0,255,0)
PINK = (147,20,255)
def colorBackgroundText(img, text, font, fontScale, textPos, textThickness=1,textColor=(0,255,0), bgColor=(0,0,0), pad_x=3, pad_y=3):
"""
Draws text with background, with control transparency
@param img:(mat) which you want to draw text
@param text: (string) text you want draw
@param font: fonts face, like FONT_HERSHEY_COMPLEX, FONT_HERSHEY_PLAIN etc.
@param fontScale: (double) the size of text, how big it should be.
@param textPos: tuple(x,y) position where you want to draw text
@param textThickness:(int) fonts weight, how bold it should be
@param textPos: tuple(x,y) position where you want to draw text
@param textThickness:(int) fonts weight, how bold it should be.
@param textColor: tuple(BGR), values -->0 to 255 each
@param bgColor: tuple(BGR), values -->0 to 255 each
@param pad_x: int(pixels) padding of in x direction
@param pad_y: int(pixels) 1 to 1.0 (), controls transparency of text background
@return: img(mat) with draw with background
"""
(t_w, t_h), _= cv.getTextSize(text, font, fontScale, textThickness) # getting the text size
x, y = textPos
cv.rectangle(img, (x-pad_x, y+ pad_y), (x+t_w+pad_x, y-t_h-pad_y), bgColor,-1) # draw rectangle
cv.putText(img,text, textPos,font, fontScale, textColor,textThickness ) # draw in text
return img
def textWithBackground(img, text, font, fontScale, textPos, textThickness=1,textColor=(0,255,0), bgColor=(0,0,0), pad_x=3, pad_y=3, bgOpacity=0.5):
"""
Draws text with background, with control transparency
@param img:(mat) which you want to draw text
@param text: (string) text you want draw
@param font: fonts face, like FONT_HERSHEY_COMPLEX, FONT_HERSHEY_PLAIN etc.
@param fontScale: (double) the size of text, how big it should be.
@param textPos: tuple(x,y) position where you want to draw text
@param textThickness:(int) fonts weight, how bold it should be
@param textPos: tuple(x,y) position where you want to draw text
@param textThickness:(int) fonts weight, how bold it should be.
@param textColor: tuple(BGR), values -->0 to 255 each
@param bgColor: tuple(BGR), values -->0 to 255 each
@param pad_x: int(pixels) padding of in x direction
@param pad_y: int(pixels) 1 to 1.0 (), controls transparency of text background
@return: img(mat) with draw with background
"""
(t_w, t_h), _= cv.getTextSize(text, font, fontScale, textThickness) # getting the text size
x, y = textPos
overlay = img.copy() # coping the image
cv.rectangle(overlay, (x-pad_x, y+ pad_y), (x+t_w+pad_x, y-t_h-pad_y), bgColor,-1) # draw rectangle
new_img = cv.addWeighted(overlay, bgOpacity, img, 1 - bgOpacity, 0) # overlaying the rectangle on the image.
cv.putText(new_img,text, textPos,font, fontScale, textColor,textThickness ) # draw in text
img = new_img
return img
# landmark detection function
def landmarksDetection(img, results, draw=False):
img_height, img_width= img.shape[:2]
# list[(x,y), (x,y)....]
mesh_coord = [(int(point.x * img_width), int(point.y * img_height)) for point in results.multi_face_landmarks[0].landmark]
if draw :
[cv.circle(img, p, 2, (0,255,0), -1) for p in mesh_coord]
# returning the list of tuples for each landmarks
return mesh_coord
# Euclaidean distance
def euclaideanDistance(point, point1):
x, y = point
x1, y1 = point1
distance = math.sqrt((x1 - x)**2 + (y1 - y)**2)
return distance
# Blinking Ratio
def blinkRatio(img, landmarks, right_indices, left_indices):
# Right eyes
# horizontal line
rh_right = landmarks[right_indices[0]]
rh_left = landmarks[right_indices[8]]
# vertical line
rv_top = landmarks[right_indices[12]]
rv_bottom = landmarks[right_indices[4]]
# LEFT_EYE
# horizontal line
lh_right = landmarks[left_indices[0]]
lh_left = landmarks[left_indices[8]]
# vertical line
lv_top = landmarks[left_indices[12]]
lv_bottom = landmarks[left_indices[4]]
rhDistance = euclaideanDistance(rh_right, rh_left)
rvDistance = euclaideanDistance(rv_top, rv_bottom)
lvDistance = euclaideanDistance(lv_top, lv_bottom)
lhDistance = euclaideanDistance(lh_right, lh_left)
reRatio = rhDistance/rvDistance
leRatio = lhDistance/lvDistance
ratio = (reRatio+leRatio)/2
return ratio
with map_face_mesh.FaceMesh(min_detection_confidence =0.5, min_tracking_confidence=0.5) as face_mesh:
# starting time here
start_time = time.time()
# starting Video loop here.
while True:
frame_counter +=1 # frame counter
ret, frame = camera.read() # getting frame from camera
if not ret:
break # no more frames break
# resizing frame
frame = cv.resize(frame, None, fx=1.5, fy=1.5, interpolation=cv.INTER_CUBIC)
frame_height, frame_width= frame.shape[:2]
rgb_frame = cv.cvtColor(frame, cv.COLOR_RGB2BGR)
results = face_mesh.process(rgb_frame)
if results.multi_face_landmarks:
mesh_coords = landmarksDetection(frame, results, False)
ratio = blinkRatio(frame, mesh_coords, RIGHT_EYE, LEFT_EYE)
colorBackgroundText(frame, f'Ratio : {round(ratio,2)}', FONTS, 0.7, (30,100),2, PINK, YELLOW)
if ratio >3.6:
CEF_COUNTER +=1
colorBackgroundText(frame, f'Blink', FONTS, 1.7, (int(frame_height/2), 100), 2, YELLOW, pad_x=6, pad_y=6, )
else:
if CEF_COUNTER>CLOSED_EYES_FRAME:
TOTAL_BLINKS +=1
CEF_COUNTER =0
colorBackgroundText(frame, f'Total Blinks: {TOTAL_BLINKS}', FONTS, 0.7, (30,150),2)
cv.polylines(frame, [np.array([mesh_coords[p] for p in LEFT_EYE ], dtype=np.int32)], True, GREEN, 1, cv.LINE_AA)
cv.polylines(frame, [np.array([mesh_coords[p] for p in RIGHT_EYE ], dtype=np.int32)], True, GREEN, 1, cv.LINE_AA)
# calculating frame per seconds FPS
end_time = time.time()-start_time
fps = frame_counter/end_time
frame =textWithBackground(frame,f'FPS: {round(fps,1)}',FONTS, 1.0, (30, 50), bgOpacity=0.9, textThickness=2)
# writing image for thumbnail drawing shape
cv.imshow('Eye Blink Counter by www.edopedia.com', frame)
key = cv.waitKey(2)
if key==ord('q') or key ==ord('Q'):
break
cv.destroyAllWindows()
camera.release()