2 Copyright (c) 2020 Adrian I. Lam
4 Permission is hereby granted, free of charge, to any person obtaining a copy
5 of this software and associated documentation files (the "Software"), to deal
6 in the Software without restriction, including without limitation the rights
7 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8 copies of the Software, and to permit persons to whom the Software is
9 furnished to do so, subject to the following conditions:
11 The above copyright notice and this permission notice shall be included in all
12 copies or substantial portions of the Software.
14 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
29 #include <opencv2/opencv.hpp>
31 #include <dlib/opencv.h>
32 #include <dlib/image_processing/frontal_face_detector.h>
33 #include <dlib/image_processing.h>
34 #include <dlib/image_processing/render_face_detections.h>
36 #include "facial_landmark_detector.h"
37 #include "math_utils.h"
40 static void filterPush(std
::deque
<double>& buf
, double newval
,
43 buf
.push_back(newval
);
44 while (buf
.size() > numTaps
)
50 FacialLandmarkDetector
::FacialLandmarkDetector(std
::string cfgPath
)
55 if (!webcam
.open(m_cfg
.cvVideoCaptureId
))
57 throw std
::runtime_error("Unable to open webcam");
60 detector
= dlib
::get_frontal_face_detector();
61 dlib
::deserialize(m_cfg
.predictorPath
) >> predictor
;
64 FacialLandmarkDetector
::Params FacialLandmarkDetector
::getParams(void) const
68 params
.faceXAngle
= avg(m_faceXAngle
);
69 params
.faceYAngle
= avg(m_faceYAngle
) + m_cfg
.faceYAngleCorrection
;
70 // + 10 correct for angle between computer monitor and webcam
71 params
.faceZAngle
= avg(m_faceZAngle
);
72 params
.mouthOpenness
= avg(m_mouthOpenness
);
73 params
.mouthForm
= avg(m_mouthForm
);
75 double leftEye
= avg(m_leftEyeOpenness
, 1);
76 double rightEye
= avg(m_rightEyeOpenness
, 1);
77 // Just combine the two to get better synchronized blinks
78 // This effectively disables winks, so if we want to
79 // support winks in the future (see below) we will need
80 // a better way to handle this out-of-sync blinks.
81 double bothEyes
= (leftEye
+ rightEye
) / 2;
84 // Detect winks and make them look better
85 // Commenting out - winks are difficult to be detected by the
86 // dlib data set anyway... maybe in the future we can
87 // add a runtime option to enable/disable...
88 /*if (right == 0 && left > 0.2)
92 else if (left == 0 && right > 0.2)
97 params
.leftEyeOpenness
= leftEye
;
98 params
.rightEyeOpenness
= rightEye
;
100 if (leftEye
<= m_cfg
.eyeSmileEyeOpenThreshold
&&
101 rightEye
<= m_cfg
.eyeSmileEyeOpenThreshold
&&
102 params
.mouthForm
> m_cfg
.eyeSmileMouthFormThreshold
&&
103 params
.mouthOpenness
> m_cfg
.eyeSmileMouthOpenThreshold
)
105 params
.leftEyeSmile
= 1;
106 params
.rightEyeSmile
= 1;
110 params
.leftEyeSmile
= 0;
111 params
.rightEyeSmile
= 0;
114 params
.autoBlink
= m_cfg
.autoBlink
;
115 params
.autoBreath
= m_cfg
.autoBreath
;
116 params
.randomMotion
= m_cfg
.randomMotion
;
121 void FacialLandmarkDetector
::stop(void)
126 void FacialLandmarkDetector
::mainLoop(void)
131 if (!webcam
.read(frame
))
133 throw std
::runtime_error("Unable to read from webcam");
136 if (m_cfg
.lateralInversion
)
138 cv
::flip(frame
, flipped
, 1);
144 dlib
::cv_image
<dlib
::bgr_pixel
> cimg(flipped
);
146 if (m_cfg
.showWebcamVideo
)
151 std
::vector
<dlib
::rectangle
> faces
= detector(cimg
);
153 if (faces
.size() > 0)
155 dlib
::rectangle face
= faces
[0];
156 dlib
::full_object_detection shape
= predictor(cimg
, face
);
158 /* The coordinates seem to be rather noisy in general.
159 * We will push everything through some moving average filters
160 * to reduce noise. The number of taps is determined empirically
161 * until we get something good.
162 * An alternative method would be to get some better dataset
163 * for dlib - perhaps even to train on a custom data set just for the user.
166 // Face rotation: X direction (left-right)
167 double faceXRot
= calcFaceXAngle(shape
);
168 filterPush(m_faceXAngle
, faceXRot
, m_cfg
.faceXAngleNumTaps
);
170 // Mouth form (smile / laugh) detection
171 double mouthForm
= calcMouthForm(shape
);
172 filterPush(m_mouthForm
, mouthForm
, m_cfg
.mouthFormNumTaps
);
174 // Face rotation: Y direction (up-down)
175 double faceYRot
= calcFaceYAngle(shape
, faceXRot
, mouthForm
);
176 filterPush(m_faceYAngle
, faceYRot
, m_cfg
.faceYAngleNumTaps
);
178 // Face rotation: Z direction (head tilt)
179 double faceZRot
= calcFaceZAngle(shape
);
180 filterPush(m_faceZAngle
, faceZRot
, m_cfg
.faceZAngleNumTaps
);
183 double mouthOpen
= calcMouthOpenness(shape
, mouthForm
);
184 filterPush(m_mouthOpenness
, mouthOpen
, m_cfg
.mouthOpenNumTaps
);
187 double eyeLeftOpen
= calcEyeOpenness(LEFT
, shape
, faceYRot
);
188 filterPush(m_leftEyeOpenness
, eyeLeftOpen
, m_cfg
.leftEyeOpenNumTaps
);
189 double eyeRightOpen
= calcEyeOpenness(RIGHT
, shape
, faceYRot
);
190 filterPush(m_rightEyeOpenness
, eyeRightOpen
, m_cfg
.rightEyeOpenNumTaps
);
194 if (m_cfg
.showWebcamVideo
&& m_cfg
.renderLandmarksOnVideo
)
197 win
.add_overlay(dlib
::render_face_detections(shape
));
202 if (m_cfg
.showWebcamVideo
&& m_cfg
.renderLandmarksOnVideo
)
208 cv
::waitKey(m_cfg
.cvWaitKeyMs
);
212 double FacialLandmarkDetector
::calcEyeAspectRatio(
213 dlib
::point
& p1
, dlib
::point
& p2
,
214 dlib
::point
& p3
, dlib
::point
& p4
,
215 dlib
::point
& p5
, dlib
::point
& p6
) const
217 double eyeWidth
= dist(p1
, p4
);
218 double eyeHeight1
= dist(p2
, p6
);
219 double eyeHeight2
= dist(p3
, p5
);
221 return (eyeHeight1
+ eyeHeight2
) / (2 * eyeWidth
);
224 double FacialLandmarkDetector
::calcEyeOpenness(
226 dlib
::full_object_detection
& shape
,
227 double faceYAngle
) const
229 double eyeAspectRatio
;
232 eyeAspectRatio
= calcEyeAspectRatio(shape
.part(42), shape
.part(43), shape
.part(44),
233 shape
.part(45), shape
.part(46), shape
.part(47));
237 eyeAspectRatio
= calcEyeAspectRatio(shape
.part(36), shape
.part(37), shape
.part(38),
238 shape
.part(39), shape
.part(40), shape
.part(41));
241 // Apply correction due to faceYAngle
242 double corrEyeAspRat
= eyeAspectRatio
/ std
::cos(degToRad(faceYAngle
));
244 return linearScale01(corrEyeAspRat
, m_cfg
.eyeClosedThreshold
, m_cfg
.eyeOpenThreshold
);
249 double FacialLandmarkDetector
::calcMouthForm(dlib
::full_object_detection
& shape
) const
251 /* Mouth form parameter: 0 for normal mouth, 1 for fully smiling / laughing.
252 * Compare distance between the two corners of the mouth
253 * to the distance between the two eyes.
256 /* An alternative (my initial attempt) was to compare the corners of
257 * the mouth to the top of the upper lip - they almost lie on a
258 * straight line when smiling / laughing. But that is only true
259 * when facing straight at the camera. When looking up / down,
260 * the angle changes. So here we'll use the distance approach instead.
263 auto eye1
= centroid(shape
.part(36), shape
.part(37), shape
.part(38),
264 shape
.part(39), shape
.part(40), shape
.part(41));
265 auto eye2
= centroid(shape
.part(42), shape
.part(43), shape
.part(44),
266 shape
.part(45), shape
.part(46), shape
.part(47));
267 double distEyes
= dist(eye1
, eye2
);
268 double distMouth
= dist(shape
.part(48), shape
.part(54));
270 double form
= linearScale01(distMouth
/ distEyes
,
271 m_cfg
.mouthNormalThreshold
,
272 m_cfg
.mouthSmileThreshold
);
277 double FacialLandmarkDetector
::calcMouthOpenness(
278 dlib
::full_object_detection
& shape
,
279 double mouthForm
) const
281 // Use points for the bottom of the upper lip, and top of the lower lip
282 // We have 3 pairs of points available, which give the mouth height
283 // on the left, in the middle, and on the right, resp.
284 // First let's try to use an average of all three.
285 double heightLeft
= dist(shape
.part(63), shape
.part(65));
286 double heightMiddle
= dist(shape
.part(62), shape
.part(66));
287 double heightRight
= dist(shape
.part(61), shape
.part(67));
289 double avgHeight
= (heightLeft
+ heightMiddle
+ heightRight
) / 3;
291 // Now, normalize it with the width of the mouth.
292 double width
= dist(shape
.part(60), shape
.part(64));
294 double normalized
= avgHeight
/ width
;
296 double scaled
= linearScale01(normalized
,
297 m_cfg
.mouthClosedThreshold
,
298 m_cfg
.mouthOpenThreshold
,
301 // Apply correction according to mouthForm
302 // Notice that when you smile / laugh, width is increased
303 scaled
*= (1 + m_cfg
.mouthOpenLaughCorrection
* mouthForm
);
308 double FacialLandmarkDetector
::calcFaceXAngle(dlib
::full_object_detection
& shape
) const
310 // This function will be easier to understand if you refer to the
311 // diagram in faceXAngle.png
313 // Construct the y-axis using (1) average of four points on the nose and
314 // (2) average of four points on the upper lip.
316 auto y0
= centroid(shape
.part(27), shape
.part(28), shape
.part(29),
318 auto y1
= centroid(shape
.part(50), shape
.part(51), shape
.part(52),
321 // Now drop a perpedicular from the left and right edges of the face,
322 // and calculate the ratio between the lengths of these perpendiculars
324 auto left
= centroid(shape
.part(14), shape
.part(15), shape
.part(16));
325 auto right
= centroid(shape
.part(0), shape
.part(1), shape
.part(2));
327 // Constructing a perpendicular:
328 // Join the left/right point and the upper lip. The included angle
329 // can now be determined using cosine rule.
330 // Then sine of this angle is the perpendicular divided by the newly
332 double opp
= dist(right
, y0
);
333 double adj1
= dist(y0
, y1
);
334 double adj2
= dist(y1
, right
);
335 double angle
= solveCosineRuleAngle(opp
, adj1
, adj2
);
336 double perpRight
= adj2
* std
::sin(angle
);
338 opp
= dist(left
, y0
);
339 adj2
= dist(y1
, left
);
340 angle
= solveCosineRuleAngle(opp
, adj1
, adj2
);
341 double perpLeft
= adj2
* std
::sin(angle
);
343 // Model the head as a sphere and look from above.
344 double theta
= std
::asin((perpRight
- perpLeft
) / (perpRight
+ perpLeft
));
346 theta
= radToDeg(theta
);
347 if (theta
< -30) theta
= -30;
348 if (theta
> 30) theta
= 30;
352 double FacialLandmarkDetector
::calcFaceYAngle(dlib
::full_object_detection
& shape
, double faceXAngle
, double mouthForm
) const
355 // angle between the two left/right points and the tip
356 double c
= dist(shape
.part(31), shape
.part(35));
357 double a
= dist(shape
.part(30), shape
.part(31));
358 double b
= dist(shape
.part(30), shape
.part(35));
360 double angle
= solveCosineRuleAngle(c
, a
, b
);
362 // This probably varies a lot from person to person...
364 // Best is probably to work out some trigonometry again,
365 // but just linear interpolation seems to work ok...
367 // Correct for X rotation
368 double corrAngle
= angle
* (1 + (std
::abs(faceXAngle
) / 30
369 * m_cfg
.faceYAngleXRotCorrection
));
371 // Correct for smiles / laughs - this increases the angle
372 corrAngle
*= (1 - mouthForm
* m_cfg
.faceYAngleSmileCorrection
);
374 if (corrAngle
>= m_cfg
.faceYAngleZeroValue
)
376 return -30 * linearScale01(corrAngle
,
377 m_cfg
.faceYAngleZeroValue
,
378 m_cfg
.faceYAngleDownThreshold
,
383 return 30 * (1 - linearScale01(corrAngle
,
384 m_cfg
.faceYAngleUpThreshold
,
385 m_cfg
.faceYAngleZeroValue
,
390 double FacialLandmarkDetector
::calcFaceZAngle(dlib
::full_object_detection
& shape
) const
392 // Use average of eyes and nose
394 auto eyeRight
= centroid(shape
.part(36), shape
.part(37), shape
.part(38),
395 shape
.part(39), shape
.part(40), shape
.part(41));
396 auto eyeLeft
= centroid(shape
.part(42), shape
.part(43), shape
.part(44),
397 shape
.part(45), shape
.part(46), shape
.part(47));
399 auto noseLeft
= shape
.part(35);
400 auto noseRight
= shape
.part(31);
402 double eyeYDiff
= eyeRight
.y() - eyeLeft
.y();
403 double eyeXDiff
= eyeRight
.x() - eyeLeft
.x();
405 double angle1
= std
::atan(eyeYDiff
/ eyeXDiff
);
407 double noseYDiff
= noseRight
.y() - noseLeft
.y();
408 double noseXDiff
= noseRight
.x() - noseLeft
.x();
410 double angle2
= std
::atan(noseYDiff
/ noseXDiff
);
412 return radToDeg((angle1
+ angle2
) / 2);
415 void FacialLandmarkDetector
::parseConfig(std
::string cfgPath
)
417 populateDefaultConfig();
420 std
::ifstream
file(cfgPath
);
424 throw std
::runtime_error("Failed to open config file");
428 unsigned int lineNum
= 0;
430 while (std
::getline(file
, line
))
439 std
::istringstream
ss(line
);
440 std
::string paramName
;
443 if (paramName
== "cvVideoCaptureId")
445 if (!(ss
>> m_cfg
.cvVideoCaptureId
))
447 throwConfigError(paramName
, "int",
451 else if (paramName
== "predictorPath")
453 if (!(ss
>> m_cfg
.predictorPath
))
455 throwConfigError(paramName
, "std::string",
459 else if (paramName
== "faceYAngleCorrection")
461 if (!(ss
>> m_cfg
.faceYAngleCorrection
))
463 throwConfigError(paramName
, "double",
467 else if (paramName
== "eyeSmileEyeOpenThreshold")
469 if (!(ss
>> m_cfg
.eyeSmileEyeOpenThreshold
))
471 throwConfigError(paramName
, "double",
475 else if (paramName
== "eyeSmileMouthFormThreshold")
477 if (!(ss
>> m_cfg
.eyeSmileMouthFormThreshold
))
479 throwConfigError(paramName
, "double",
483 else if (paramName
== "eyeSmileMouthOpenThreshold")
485 if (!(ss
>> m_cfg
.eyeSmileMouthOpenThreshold
))
487 throwConfigError(paramName
, "double",
491 else if (paramName
== "showWebcamVideo")
493 if (!(ss
>> m_cfg
.showWebcamVideo
))
495 throwConfigError(paramName
, "bool",
499 else if (paramName
== "renderLandmarksOnVideo")
501 if (!(ss
>> m_cfg
.renderLandmarksOnVideo
))
503 throwConfigError(paramName
, "bool",
507 else if (paramName
== "lateralInversion")
509 if (!(ss
>> m_cfg
.lateralInversion
))
511 throwConfigError(paramName
, "bool",
515 else if (paramName
== "faceXAngleNumTaps")
517 if (!(ss
>> m_cfg
.faceXAngleNumTaps
))
519 throwConfigError(paramName
, "std::size_t",
523 else if (paramName
== "faceYAngleNumTaps")
525 if (!(ss
>> m_cfg
.faceYAngleNumTaps
))
527 throwConfigError(paramName
, "std::size_t",
531 else if (paramName
== "faceZAngleNumTaps")
533 if (!(ss
>> m_cfg
.faceZAngleNumTaps
))
535 throwConfigError(paramName
, "std::size_t",
539 else if (paramName
== "mouthFormNumTaps")
541 if (!(ss
>> m_cfg
.mouthFormNumTaps
))
543 throwConfigError(paramName
, "std::size_t",
547 else if (paramName
== "mouthOpenNumTaps")
549 if (!(ss
>> m_cfg
.mouthOpenNumTaps
))
551 throwConfigError(paramName
, "std::size_t",
555 else if (paramName
== "leftEyeOpenNumTaps")
557 if (!(ss
>> m_cfg
.leftEyeOpenNumTaps
))
559 throwConfigError(paramName
, "std::size_t",
563 else if (paramName
== "rightEyeOpenNumTaps")
565 if (!(ss
>> m_cfg
.rightEyeOpenNumTaps
))
567 throwConfigError(paramName
, "std::size_t",
571 else if (paramName
== "cvWaitKeyMs")
573 if (!(ss
>> m_cfg
.cvWaitKeyMs
))
575 throwConfigError(paramName
, "int",
579 else if (paramName
== "eyeClosedThreshold")
581 if (!(ss
>> m_cfg
.eyeClosedThreshold
))
583 throwConfigError(paramName
, "double",
587 else if (paramName
== "eyeOpenThreshold")
589 if (!(ss
>> m_cfg
.eyeOpenThreshold
))
591 throwConfigError(paramName
, "double",
595 else if (paramName
== "mouthNormalThreshold")
597 if (!(ss
>> m_cfg
.mouthNormalThreshold
))
599 throwConfigError(paramName
, "double",
603 else if (paramName
== "mouthSmileThreshold")
605 if (!(ss
>> m_cfg
.mouthSmileThreshold
))
607 throwConfigError(paramName
, "double",
611 else if (paramName
== "mouthClosedThreshold")
613 if (!(ss
>> m_cfg
.mouthClosedThreshold
))
615 throwConfigError(paramName
, "double",
619 else if (paramName
== "mouthOpenThreshold")
621 if (!(ss
>> m_cfg
.mouthOpenThreshold
))
623 throwConfigError(paramName
, "double",
627 else if (paramName
== "mouthOpenLaughCorrection")
629 if (!(ss
>> m_cfg
.mouthOpenLaughCorrection
))
631 throwConfigError(paramName
, "double",
635 else if (paramName
== "faceYAngleXRotCorrection")
637 if (!(ss
>> m_cfg
.faceYAngleXRotCorrection
))
639 throwConfigError(paramName
, "double",
643 else if (paramName
== "faceYAngleSmileCorrection")
645 if (!(ss
>> m_cfg
.faceYAngleSmileCorrection
))
647 throwConfigError(paramName
, "double",
651 else if (paramName
== "faceYAngleZeroValue")
653 if (!(ss
>> m_cfg
.faceYAngleZeroValue
))
655 throwConfigError(paramName
, "double",
659 else if (paramName
== "faceYAngleUpThreshold")
661 if (!(ss
>> m_cfg
.faceYAngleUpThreshold
))
663 throwConfigError(paramName
, "double",
667 else if (paramName
== "faceYAngleDownThreshold")
669 if (!(ss
>> m_cfg
.faceYAngleDownThreshold
))
671 throwConfigError(paramName
, "double",
675 else if (paramName
== "autoBlink")
677 if (!(ss
>> m_cfg
.autoBlink
))
679 throwConfigError(paramName
, "bool",
683 else if (paramName
== "autoBreath")
685 if (!(ss
>> m_cfg
.autoBreath
))
687 throwConfigError(paramName
, "bool",
691 else if (paramName
== "randomMotion")
693 if (!(ss
>> m_cfg
.randomMotion
))
695 throwConfigError(paramName
, "bool",
701 std
::ostringstream oss
;
702 oss
<< "Unrecognized parameter name at line " << lineNum
703 << ": " << paramName
;
704 throw std
::runtime_error(oss
.str());
711 void FacialLandmarkDetector
::populateDefaultConfig(void)
713 // These are values that I've personally tested to work OK for my face.
714 // Your milage may vary - hence the config file.
716 m_cfg
.cvVideoCaptureId
= 0;
717 m_cfg
.predictorPath
= "shape_predictor_68_face_landmarks.dat";
718 m_cfg
.faceYAngleCorrection
= 10;
719 m_cfg
.eyeSmileEyeOpenThreshold
= 0.6;
720 m_cfg
.eyeSmileMouthFormThreshold
= 0.75;
721 m_cfg
.eyeSmileMouthOpenThreshold
= 0.5;
722 m_cfg
.showWebcamVideo
= true;
723 m_cfg
.renderLandmarksOnVideo
= true;
724 m_cfg
.lateralInversion
= true;
725 m_cfg
.cvWaitKeyMs
= 5;
726 m_cfg
.faceXAngleNumTaps
= 11;
727 m_cfg
.faceYAngleNumTaps
= 11;
728 m_cfg
.faceZAngleNumTaps
= 11;
729 m_cfg
.mouthFormNumTaps
= 3;
730 m_cfg
.mouthOpenNumTaps
= 3;
731 m_cfg
.leftEyeOpenNumTaps
= 3;
732 m_cfg
.rightEyeOpenNumTaps
= 3;
733 m_cfg
.eyeClosedThreshold
= 0.2;
734 m_cfg
.eyeOpenThreshold
= 0.25;
735 m_cfg
.mouthNormalThreshold
= 0.75;
736 m_cfg
.mouthSmileThreshold
= 1.0;
737 m_cfg
.mouthClosedThreshold
= 0.1;
738 m_cfg
.mouthOpenThreshold
= 0.4;
739 m_cfg
.mouthOpenLaughCorrection
= 0.2;
740 m_cfg
.faceYAngleXRotCorrection
= 0.15;
741 m_cfg
.faceYAngleSmileCorrection
= 0.075;
742 m_cfg
.faceYAngleZeroValue
= 1.8;
743 m_cfg
.faceYAngleDownThreshold
= 2.3;
744 m_cfg
.faceYAngleUpThreshold
= 1.3;
745 m_cfg
.autoBlink
= false;
746 m_cfg
.autoBreath
= false;
747 m_cfg
.randomMotion
= false;
750 void FacialLandmarkDetector
::throwConfigError(std
::string paramName
,
751 std
::string expectedType
,
753 unsigned int lineNum
)
755 std
::ostringstream ss
;
756 ss
<< "Error parsing config file for parameter " << paramName
757 << "\nAt line " << lineNum
<< ": " << line
758 << "\nExpecting value of type " << expectedType
;
760 throw std
::runtime_error(ss
.str());