2 Copyright (c) 2020 Adrian I. Lam
4 Permission is hereby granted, free of charge, to any person obtaining a copy
5 of this software and associated documentation files (the "Software"), to deal
6 in the Software without restriction, including without limitation the rights
7 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8 copies of the Software, and to permit persons to whom the Software is
9 furnished to do so, subject to the following conditions:
11 The above copyright notice and this permission notice shall be included in all
12 copies or substantial portions of the Software.
14 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
29 #include <opencv2/opencv.hpp>
31 #include <dlib/opencv.h>
32 #include <dlib/image_processing/frontal_face_detector.h>
33 #include <dlib/image_processing.h>
34 #include <dlib/image_processing/render_face_detections.h>
36 #include "facial_landmark_detector.h"
37 #include "math_utils.h"
40 static void filterPush(std
::deque
<double>& buf
, double newval
,
43 buf
.push_back(newval
);
44 while (buf
.size() > numTaps
)
50 FacialLandmarkDetector
::FacialLandmarkDetector(std
::string cfgPath
)
55 if (!webcam
.open(m_cfg
.cvVideoCaptureId
))
57 throw std
::runtime_error("Unable to open webcam");
60 detector
= dlib
::get_frontal_face_detector();
61 dlib
::deserialize(m_cfg
.predictorPath
) >> predictor
;
64 FacialLandmarkDetector
::Params FacialLandmarkDetector
::getParams(void) const
68 params
.faceXAngle
= avg(m_faceXAngle
);
69 params
.faceYAngle
= avg(m_faceYAngle
) + m_cfg
.faceYAngleCorrection
;
70 // + 10 correct for angle between computer monitor and webcam
71 params
.faceZAngle
= avg(m_faceZAngle
);
72 params
.mouthOpenness
= avg(m_mouthOpenness
);
73 params
.mouthForm
= avg(m_mouthForm
);
75 double leftEye
= avg(m_leftEyeOpenness
, 1);
76 double rightEye
= avg(m_rightEyeOpenness
, 1);
77 // Just combine the two to get better synchronized blinks
78 // This effectively disables winks, so if we want to
79 // support winks in the future (see below) we will need
80 // a better way to handle this out-of-sync blinks.
81 double bothEyes
= (leftEye
+ rightEye
) / 2;
84 // Detect winks and make them look better
85 // Commenting out - winks are difficult to be detected by the
86 // dlib data set anyway... maybe in the future we can
87 // add a runtime option to enable/disable...
88 /*if (right == 0 && left > 0.2)
92 else if (left == 0 && right > 0.2)
97 params
.leftEyeOpenness
= leftEye
;
98 params
.rightEyeOpenness
= rightEye
;
100 if (leftEye
<= m_cfg
.eyeSmileEyeOpenThreshold
&&
101 rightEye
<= m_cfg
.eyeSmileEyeOpenThreshold
&&
102 params
.mouthForm
> m_cfg
.eyeSmileMouthFormThreshold
&&
103 params
.mouthOpenness
> m_cfg
.eyeSmileMouthOpenThreshold
)
105 params
.leftEyeSmile
= 1;
106 params
.rightEyeSmile
= 1;
110 params
.leftEyeSmile
= 0;
111 params
.rightEyeSmile
= 0;
117 void FacialLandmarkDetector
::stop(void)
122 void FacialLandmarkDetector
::mainLoop(void)
127 if (!webcam
.read(frame
))
129 throw std
::runtime_error("Unable to read from webcam");
132 if (m_cfg
.lateralInversion
)
134 cv
::flip(frame
, flipped
, 1);
140 dlib
::cv_image
<dlib
::bgr_pixel
> cimg(flipped
);
142 if (m_cfg
.showWebcamVideo
)
147 std
::vector
<dlib
::rectangle
> faces
= detector(cimg
);
149 if (faces
.size() > 0)
151 dlib
::rectangle face
= faces
[0];
152 dlib
::full_object_detection shape
= predictor(cimg
, face
);
154 /* The coordinates seem to be rather noisy in general.
155 * We will push everything through some moving average filters
156 * to reduce noise. The number of taps is determined empirically
157 * until we get something good.
158 * An alternative method would be to get some better dataset
159 * for dlib - perhaps even to train on a custom data set just for the user.
162 // Face rotation: X direction (left-right)
163 double faceXRot
= calcFaceXAngle(shape
);
164 filterPush(m_faceXAngle
, faceXRot
, m_cfg
.faceXAngleNumTaps
);
166 // Mouth form (smile / laugh) detection
167 double mouthForm
= calcMouthForm(shape
);
168 filterPush(m_mouthForm
, mouthForm
, m_cfg
.mouthFormNumTaps
);
170 // Face rotation: Y direction (up-down)
171 double faceYRot
= calcFaceYAngle(shape
, faceXRot
, mouthForm
);
172 filterPush(m_faceYAngle
, faceYRot
, m_cfg
.faceYAngleNumTaps
);
174 // Face rotation: Z direction (head tilt)
175 double faceZRot
= calcFaceZAngle(shape
);
176 filterPush(m_faceZAngle
, faceZRot
, m_cfg
.faceZAngleNumTaps
);
179 double mouthOpen
= calcMouthOpenness(shape
, mouthForm
);
180 filterPush(m_mouthOpenness
, mouthOpen
, m_cfg
.mouthOpenNumTaps
);
183 double eyeLeftOpen
= calcEyeOpenness(LEFT
, shape
, faceYRot
);
184 filterPush(m_leftEyeOpenness
, eyeLeftOpen
, m_cfg
.leftEyeOpenNumTaps
);
185 double eyeRightOpen
= calcEyeOpenness(RIGHT
, shape
, faceYRot
);
186 filterPush(m_rightEyeOpenness
, eyeRightOpen
, m_cfg
.rightEyeOpenNumTaps
);
190 if (m_cfg
.showWebcamVideo
&& m_cfg
.renderLandmarksOnVideo
)
193 win
.add_overlay(dlib
::render_face_detections(shape
));
198 if (m_cfg
.showWebcamVideo
&& m_cfg
.renderLandmarksOnVideo
)
204 cv
::waitKey(m_cfg
.cvWaitKeyMs
);
208 double FacialLandmarkDetector
::calcEyeAspectRatio(
209 dlib
::point
& p1
, dlib
::point
& p2
,
210 dlib
::point
& p3
, dlib
::point
& p4
,
211 dlib
::point
& p5
, dlib
::point
& p6
) const
213 double eyeWidth
= dist(p1
, p4
);
214 double eyeHeight1
= dist(p2
, p6
);
215 double eyeHeight2
= dist(p3
, p5
);
217 return (eyeHeight1
+ eyeHeight2
) / (2 * eyeWidth
);
220 double FacialLandmarkDetector
::calcEyeOpenness(
222 dlib
::full_object_detection
& shape
,
223 double faceYAngle
) const
225 double eyeAspectRatio
;
228 eyeAspectRatio
= calcEyeAspectRatio(shape
.part(42), shape
.part(43), shape
.part(44),
229 shape
.part(45), shape
.part(46), shape
.part(47));
233 eyeAspectRatio
= calcEyeAspectRatio(shape
.part(36), shape
.part(37), shape
.part(38),
234 shape
.part(39), shape
.part(40), shape
.part(41));
237 // Apply correction due to faceYAngle
238 double corrEyeAspRat
= eyeAspectRatio
/ std
::cos(degToRad(faceYAngle
));
240 return linearScale01(corrEyeAspRat
, m_cfg
.eyeClosedThreshold
, m_cfg
.eyeOpenThreshold
);
245 double FacialLandmarkDetector
::calcMouthForm(dlib
::full_object_detection
& shape
) const
247 /* Mouth form parameter: 0 for normal mouth, 1 for fully smiling / laughing.
248 * Compare distance between the two corners of the mouth
249 * to the distance between the two eyes.
252 /* An alternative (my initial attempt) was to compare the corners of
253 * the mouth to the top of the upper lip - they almost lie on a
254 * straight line when smiling / laughing. But that is only true
255 * when facing straight at the camera. When looking up / down,
256 * the angle changes. So here we'll use the distance approach instead.
259 auto eye1
= centroid(shape
.part(36), shape
.part(37), shape
.part(38),
260 shape
.part(39), shape
.part(40), shape
.part(41));
261 auto eye2
= centroid(shape
.part(42), shape
.part(43), shape
.part(44),
262 shape
.part(45), shape
.part(46), shape
.part(47));
263 double distEyes
= dist(eye1
, eye2
);
264 double distMouth
= dist(shape
.part(48), shape
.part(54));
266 double form
= linearScale01(distMouth
/ distEyes
,
267 m_cfg
.mouthNormalThreshold
,
268 m_cfg
.mouthSmileThreshold
);
273 double FacialLandmarkDetector
::calcMouthOpenness(
274 dlib
::full_object_detection
& shape
,
275 double mouthForm
) const
277 // Use points for the bottom of the upper lip, and top of the lower lip
278 // We have 3 pairs of points available, which give the mouth height
279 // on the left, in the middle, and on the right, resp.
280 // First let's try to use an average of all three.
281 double heightLeft
= dist(shape
.part(63), shape
.part(65));
282 double heightMiddle
= dist(shape
.part(62), shape
.part(66));
283 double heightRight
= dist(shape
.part(61), shape
.part(67));
285 double avgHeight
= (heightLeft
+ heightMiddle
+ heightRight
) / 3;
287 // Now, normalize it with the width of the mouth.
288 double width
= dist(shape
.part(60), shape
.part(64));
290 double normalized
= avgHeight
/ width
;
292 double scaled
= linearScale01(normalized
,
293 m_cfg
.mouthClosedThreshold
,
294 m_cfg
.mouthOpenThreshold
,
297 // Apply correction according to mouthForm
298 // Notice that when you smile / laugh, width is increased
299 scaled
*= (1 + m_cfg
.mouthOpenLaughCorrection
* mouthForm
);
304 double FacialLandmarkDetector
::calcFaceXAngle(dlib
::full_object_detection
& shape
) const
306 // This function will be easier to understand if you refer to the
307 // diagram in faceXAngle.png
309 // Construct the y-axis using (1) average of four points on the nose and
310 // (2) average of four points on the upper lip.
312 auto y0
= centroid(shape
.part(27), shape
.part(28), shape
.part(29),
314 auto y1
= centroid(shape
.part(50), shape
.part(51), shape
.part(52),
317 // Now drop a perpedicular from the left and right edges of the face,
318 // and calculate the ratio between the lengths of these perpendiculars
320 auto left
= centroid(shape
.part(14), shape
.part(15), shape
.part(16));
321 auto right
= centroid(shape
.part(0), shape
.part(1), shape
.part(2));
323 // Constructing a perpendicular:
324 // Join the left/right point and the upper lip. The included angle
325 // can now be determined using cosine rule.
326 // Then sine of this angle is the perpendicular divided by the newly
328 double opp
= dist(right
, y0
);
329 double adj1
= dist(y0
, y1
);
330 double adj2
= dist(y1
, right
);
331 double angle
= solveCosineRuleAngle(opp
, adj1
, adj2
);
332 double perpRight
= adj2
* std
::sin(angle
);
334 opp
= dist(left
, y0
);
335 adj2
= dist(y1
, left
);
336 angle
= solveCosineRuleAngle(opp
, adj1
, adj2
);
337 double perpLeft
= adj2
* std
::sin(angle
);
339 // Model the head as a sphere and look from above.
340 double theta
= std
::asin((perpRight
- perpLeft
) / (perpRight
+ perpLeft
));
342 theta
= radToDeg(theta
);
343 if (theta
< -30) theta
= -30;
344 if (theta
> 30) theta
= 30;
348 double FacialLandmarkDetector
::calcFaceYAngle(dlib
::full_object_detection
& shape
, double faceXAngle
, double mouthForm
) const
351 // angle between the two left/right points and the tip
352 double c
= dist(shape
.part(31), shape
.part(35));
353 double a
= dist(shape
.part(30), shape
.part(31));
354 double b
= dist(shape
.part(30), shape
.part(35));
356 double angle
= solveCosineRuleAngle(c
, a
, b
);
358 // This probably varies a lot from person to person...
360 // Best is probably to work out some trigonometry again,
361 // but just linear interpolation seems to work ok...
363 // Correct for X rotation
364 double corrAngle
= angle
* (1 + (std
::abs(faceXAngle
) / 30
365 * m_cfg
.faceYAngleXRotCorrection
));
367 // Correct for smiles / laughs - this increases the angle
368 corrAngle
*= (1 - mouthForm
* m_cfg
.faceYAngleSmileCorrection
);
370 if (corrAngle
>= m_cfg
.faceYAngleZeroValue
)
372 return -30 * linearScale01(corrAngle
,
373 m_cfg
.faceYAngleZeroValue
,
374 m_cfg
.faceYAngleDownThreshold
,
379 return 30 * (1 - linearScale01(corrAngle
,
380 m_cfg
.faceYAngleUpThreshold
,
381 m_cfg
.faceYAngleZeroValue
,
386 double FacialLandmarkDetector
::calcFaceZAngle(dlib
::full_object_detection
& shape
) const
388 // Use average of eyes and nose
390 auto eyeRight
= centroid(shape
.part(36), shape
.part(37), shape
.part(38),
391 shape
.part(39), shape
.part(40), shape
.part(41));
392 auto eyeLeft
= centroid(shape
.part(42), shape
.part(43), shape
.part(44),
393 shape
.part(45), shape
.part(46), shape
.part(47));
395 auto noseLeft
= shape
.part(35);
396 auto noseRight
= shape
.part(31);
398 double eyeYDiff
= eyeRight
.y() - eyeLeft
.y();
399 double eyeXDiff
= eyeRight
.x() - eyeLeft
.x();
401 double angle1
= std
::atan(eyeYDiff
/ eyeXDiff
);
403 double noseYDiff
= noseRight
.y() - noseLeft
.y();
404 double noseXDiff
= noseRight
.x() - noseLeft
.x();
406 double angle2
= std
::atan(noseYDiff
/ noseXDiff
);
408 return radToDeg((angle1
+ angle2
) / 2);
411 void FacialLandmarkDetector
::parseConfig(std
::string cfgPath
)
413 populateDefaultConfig();
416 std
::ifstream
file(cfgPath
);
420 throw std
::runtime_error("Failed to open config file");
424 unsigned int lineNum
= 0;
426 while (std
::getline(file
, line
))
435 std
::istringstream
ss(line
);
436 std
::string paramName
;
439 if (paramName
== "cvVideoCaptureId")
441 if (!(ss
>> m_cfg
.cvVideoCaptureId
))
443 throwConfigError(paramName
, "int",
447 else if (paramName
== "predictorPath")
449 if (!(ss
>> m_cfg
.predictorPath
))
451 throwConfigError(paramName
, "std::string",
455 else if (paramName
== "faceYAngleCorrection")
457 if (!(ss
>> m_cfg
.faceYAngleCorrection
))
459 throwConfigError(paramName
, "double",
463 else if (paramName
== "eyeSmileEyeOpenThreshold")
465 if (!(ss
>> m_cfg
.eyeSmileEyeOpenThreshold
))
467 throwConfigError(paramName
, "double",
471 else if (paramName
== "eyeSmileMouthFormThreshold")
473 if (!(ss
>> m_cfg
.eyeSmileMouthFormThreshold
))
475 throwConfigError(paramName
, "double",
479 else if (paramName
== "eyeSmileMouthOpenThreshold")
481 if (!(ss
>> m_cfg
.eyeSmileMouthOpenThreshold
))
483 throwConfigError(paramName
, "double",
487 else if (paramName
== "showWebcamVideo")
489 if (!(ss
>> m_cfg
.showWebcamVideo
))
491 throwConfigError(paramName
, "bool",
495 else if (paramName
== "renderLandmarksOnVideo")
497 if (!(ss
>> m_cfg
.renderLandmarksOnVideo
))
499 throwConfigError(paramName
, "bool",
503 else if (paramName
== "lateralInversion")
505 if (!(ss
>> m_cfg
.lateralInversion
))
507 throwConfigError(paramName
, "bool",
511 else if (paramName
== "faceXAngleNumTaps")
513 if (!(ss
>> m_cfg
.faceXAngleNumTaps
))
515 throwConfigError(paramName
, "std::size_t",
519 else if (paramName
== "faceYAngleNumTaps")
521 if (!(ss
>> m_cfg
.faceYAngleNumTaps
))
523 throwConfigError(paramName
, "std::size_t",
527 else if (paramName
== "faceZAngleNumTaps")
529 if (!(ss
>> m_cfg
.faceZAngleNumTaps
))
531 throwConfigError(paramName
, "std::size_t",
535 else if (paramName
== "mouthFormNumTaps")
537 if (!(ss
>> m_cfg
.mouthFormNumTaps
))
539 throwConfigError(paramName
, "std::size_t",
543 else if (paramName
== "mouthOpenNumTaps")
545 if (!(ss
>> m_cfg
.mouthOpenNumTaps
))
547 throwConfigError(paramName
, "std::size_t",
551 else if (paramName
== "leftEyeOpenNumTaps")
553 if (!(ss
>> m_cfg
.leftEyeOpenNumTaps
))
555 throwConfigError(paramName
, "std::size_t",
559 else if (paramName
== "rightEyeOpenNumTaps")
561 if (!(ss
>> m_cfg
.rightEyeOpenNumTaps
))
563 throwConfigError(paramName
, "std::size_t",
567 else if (paramName
== "cvWaitKeyMs")
569 if (!(ss
>> m_cfg
.cvWaitKeyMs
))
571 throwConfigError(paramName
, "int",
575 else if (paramName
== "eyeClosedThreshold")
577 if (!(ss
>> m_cfg
.eyeClosedThreshold
))
579 throwConfigError(paramName
, "double",
583 else if (paramName
== "eyeOpenThreshold")
585 if (!(ss
>> m_cfg
.eyeOpenThreshold
))
587 throwConfigError(paramName
, "double",
591 else if (paramName
== "mouthNormalThreshold")
593 if (!(ss
>> m_cfg
.mouthNormalThreshold
))
595 throwConfigError(paramName
, "double",
599 else if (paramName
== "mouthSmileThreshold")
601 if (!(ss
>> m_cfg
.mouthSmileThreshold
))
603 throwConfigError(paramName
, "double",
607 else if (paramName
== "mouthClosedThreshold")
609 if (!(ss
>> m_cfg
.mouthClosedThreshold
))
611 throwConfigError(paramName
, "double",
615 else if (paramName
== "mouthOpenThreshold")
617 if (!(ss
>> m_cfg
.mouthOpenThreshold
))
619 throwConfigError(paramName
, "double",
623 else if (paramName
== "mouthOpenLaughCorrection")
625 if (!(ss
>> m_cfg
.mouthOpenLaughCorrection
))
627 throwConfigError(paramName
, "double",
631 else if (paramName
== "faceYAngleXRotCorrection")
633 if (!(ss
>> m_cfg
.faceYAngleXRotCorrection
))
635 throwConfigError(paramName
, "double",
639 else if (paramName
== "faceYAngleSmileCorrection")
641 if (!(ss
>> m_cfg
.faceYAngleSmileCorrection
))
643 throwConfigError(paramName
, "double",
647 else if (paramName
== "faceYAngleZeroValue")
649 if (!(ss
>> m_cfg
.faceYAngleZeroValue
))
651 throwConfigError(paramName
, "double",
655 else if (paramName
== "faceYAngleUpThreshold")
657 if (!(ss
>> m_cfg
.faceYAngleUpThreshold
))
659 throwConfigError(paramName
, "double",
663 else if (paramName
== "faceYAngleDownThreshold")
665 if (!(ss
>> m_cfg
.faceYAngleDownThreshold
))
667 throwConfigError(paramName
, "double",
673 std
::ostringstream oss
;
674 oss
<< "Unrecognized parameter name at line " << lineNum
675 << ": " << paramName
;
676 throw std
::runtime_error(oss
.str());
683 void FacialLandmarkDetector
::populateDefaultConfig(void)
685 // These are values that I've personally tested to work OK for my face.
686 // Your milage may vary - hence the config file.
688 m_cfg
.cvVideoCaptureId
= 0;
689 m_cfg
.predictorPath
= "shape_predictor_68_face_landmarks.dat";
690 m_cfg
.faceYAngleCorrection
= 10;
691 m_cfg
.eyeSmileEyeOpenThreshold
= 0.6;
692 m_cfg
.eyeSmileMouthFormThreshold
= 0.75;
693 m_cfg
.eyeSmileMouthOpenThreshold
= 0.5;
694 m_cfg
.showWebcamVideo
= true;
695 m_cfg
.renderLandmarksOnVideo
= true;
696 m_cfg
.lateralInversion
= true;
697 m_cfg
.cvWaitKeyMs
= 5;
698 m_cfg
.faceXAngleNumTaps
= 11;
699 m_cfg
.faceYAngleNumTaps
= 11;
700 m_cfg
.faceZAngleNumTaps
= 11;
701 m_cfg
.mouthFormNumTaps
= 3;
702 m_cfg
.mouthOpenNumTaps
= 3;
703 m_cfg
.leftEyeOpenNumTaps
= 3;
704 m_cfg
.rightEyeOpenNumTaps
= 3;
705 m_cfg
.eyeClosedThreshold
= 0.2;
706 m_cfg
.eyeOpenThreshold
= 0.25;
707 m_cfg
.mouthNormalThreshold
= 0.75;
708 m_cfg
.mouthSmileThreshold
= 1.0;
709 m_cfg
.mouthClosedThreshold
= 0.1;
710 m_cfg
.mouthOpenThreshold
= 0.4;
711 m_cfg
.mouthOpenLaughCorrection
= 0.2;
712 m_cfg
.faceYAngleXRotCorrection
= 0.15;
713 m_cfg
.faceYAngleSmileCorrection
= 0.075;
714 m_cfg
.faceYAngleZeroValue
= 1.8;
715 m_cfg
.faceYAngleDownThreshold
= 2.3;
716 m_cfg
.faceYAngleUpThreshold
= 1.3;
719 void FacialLandmarkDetector
::throwConfigError(std
::string paramName
,
720 std
::string expectedType
,
722 unsigned int lineNum
)
724 std
::ostringstream ss
;
725 ss
<< "Error parsing config file for parameter " << paramName
726 << "\nAt line " << lineNum
<< ": " << line
727 << "\nExpecting value of type " << expectedType
;
729 throw std
::runtime_error(ss
.str());