2 Copyright (c) 2020-2021 Adrian I. Lam
4 Permission is hereby granted, free of charge, to any person obtaining a copy
5 of this software and associated documentation files (the "Software"), to deal
6 in the Software without restriction, including without limitation the rights
7 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8 copies of the Software, and to permit persons to whom the Software is
9 furnished to do so, subject to the following conditions:
11 The above copyright notice and this permission notice shall be included in all
12 copies or substantial portions of the Software.
14 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 #include <sys/types.h>
31 #include <sys/socket.h>
32 #include <arpa/inet.h>
35 #include "facial_landmark_detector.h"
36 #include "math_utils.h"
39 static void filterPush(std
::deque
<double>& buf
, double newval
,
42 buf
.push_back(newval
);
43 while (buf
.size() > numTaps
)
49 FacialLandmarkDetector
::FacialLandmarkDetector(std
::string cfgPath
)
54 struct sockaddr_in addr
;
55 addr
.sin_family
= AF_INET
;
56 addr
.sin_port
= htons(m_cfg
.osfPort
);
57 addr
.sin_addr
.s_addr
= inet_addr(m_cfg
.osfIpAddress
.c_str());
59 m_sock
= socket(AF_INET
, SOCK_DGRAM
, IPPROTO_UDP
);
62 throw std
::runtime_error("Cannot create UDP socket");
65 int ret
= bind(m_sock
, (struct sockaddr
*)&addr
, sizeof addr
);
68 throw std
::runtime_error("Cannot bind socket");
72 FacialLandmarkDetector
::~FacialLandmarkDetector()
77 FacialLandmarkDetector
::Params FacialLandmarkDetector
::getParams(void) const
81 params
.faceXAngle
= avg(m_faceXAngle
);
82 params
.faceYAngle
= avg(m_faceYAngle
) + m_cfg
.faceYAngleCorrection
;
83 // + 10 correct for angle between computer monitor and webcam
84 params
.faceZAngle
= avg(m_faceZAngle
);
85 params
.mouthOpenness
= avg(m_mouthOpenness
);
86 params
.mouthForm
= avg(m_mouthForm
);
88 double leftEye
= avg(m_leftEyeOpenness
, 1);
89 double rightEye
= avg(m_rightEyeOpenness
, 1);
90 bool sync
= !m_cfg
.winkEnable
;
94 if (rightEye
< 0.1 && leftEye
> 0.2)
99 else if (leftEye
< 0.1 && rightEye
> 0.2)
112 // Combine the two to get better synchronized blinks
113 double bothEyes
= (leftEye
+ rightEye
) / 2;
118 params
.leftEyeOpenness
= leftEye
;
119 params
.rightEyeOpenness
= rightEye
;
121 if (leftEye
<= m_cfg
.eyeSmileEyeOpenThreshold
&&
122 rightEye
<= m_cfg
.eyeSmileEyeOpenThreshold
&&
123 params
.mouthForm
> m_cfg
.eyeSmileMouthFormThreshold
&&
124 params
.mouthOpenness
> m_cfg
.eyeSmileMouthOpenThreshold
)
126 params
.leftEyeSmile
= 1;
127 params
.rightEyeSmile
= 1;
131 params
.leftEyeSmile
= 0;
132 params
.rightEyeSmile
= 0;
135 params
.autoBlink
= m_cfg
.autoBlink
;
136 params
.autoBreath
= m_cfg
.autoBreath
;
137 params
.randomMotion
= m_cfg
.randomMotion
;
142 void FacialLandmarkDetector
::stop(void)
147 void FacialLandmarkDetector
::mainLoop(void)
151 // Read UDP packet from OSF
152 static const int nPoints
= 68;
153 static const int packetFrameSize
= 8 + 4 + 2 * 4 + 2 * 4 + 1 + 4 + 3 * 4 + 3 * 4
154 + 4 * 4 + 4 * 68 + 4 * 2 * 68 + 4 * 3 * 70 + 4 * 14;
156 static const int landmarksOffset
= 8 + 4 + 2 * 4 + 2 * 4 + 1 + 4 + 3 * 4 + 3 * 4
159 uint8_t buf
[packetFrameSize
];
160 ssize_t recvSize
= recv(m_sock
, buf
, sizeof buf
, 0);
162 if (recvSize
!= packetFrameSize
) continue;
163 // Note: This is dependent on endianness, and we would assume that
164 // the OSF instance is run on a machine with the same endianness
165 // as our current machine.
166 int recvFaceId
= *(int *)(buf
+ 8);
167 if (recvFaceId
!= m_faceId
) continue; // We only support one face
169 Point landmarks
[nPoints
];
171 for (int i
= 0; i
< nPoints
; i
++)
173 float x
= *(float *)(buf
+ landmarksOffset
+ i
* 2 * sizeof(float));
174 float y
= *(float *)(buf
+ landmarksOffset
+ (i
* 2 + 1) * sizeof(float));
180 /* The coordinates seem to be rather noisy in general.
181 * We will push everything through some moving average filters
182 * to reduce noise. The number of taps is determined empirically
183 * until we get something good.
184 * An alternative method would be to get some better dataset -
185 * perhaps even to train on a custom data set just for the user.
188 // Face rotation: X direction (left-right)
189 double faceXRot
= calcFaceXAngle(landmarks
);
190 filterPush(m_faceXAngle
, faceXRot
, m_cfg
.faceXAngleNumTaps
);
192 // Mouth form (smile / laugh) detection
193 double mouthForm
= calcMouthForm(landmarks
);
194 filterPush(m_mouthForm
, mouthForm
, m_cfg
.mouthFormNumTaps
);
196 // Face rotation: Y direction (up-down)
197 double faceYRot
= calcFaceYAngle(landmarks
, faceXRot
, mouthForm
);
198 filterPush(m_faceYAngle
, faceYRot
, m_cfg
.faceYAngleNumTaps
);
200 // Face rotation: Z direction (head tilt)
201 double faceZRot
= calcFaceZAngle(landmarks
);
202 filterPush(m_faceZAngle
, faceZRot
, m_cfg
.faceZAngleNumTaps
);
205 double mouthOpen
= calcMouthOpenness(landmarks
, mouthForm
);
206 filterPush(m_mouthOpenness
, mouthOpen
, m_cfg
.mouthOpenNumTaps
);
209 double eyeLeftOpen
= calcEyeOpenness(LEFT
, landmarks
, faceYRot
);
210 filterPush(m_leftEyeOpenness
, eyeLeftOpen
, m_cfg
.leftEyeOpenNumTaps
);
211 double eyeRightOpen
= calcEyeOpenness(RIGHT
, landmarks
, faceYRot
);
212 filterPush(m_rightEyeOpenness
, eyeRightOpen
, m_cfg
.rightEyeOpenNumTaps
);
214 // Eyebrows: the landmark detection doesn't work very well for my face,
215 // so I've not implemented them.
219 double FacialLandmarkDetector
::calcEyeAspectRatio(
220 Point
& p1
, Point
& p2
,
221 Point
& p3
, Point
& p4
,
222 Point
& p5
, Point
& p6
) const
224 double eyeWidth
= dist(p1
, p4
);
225 double eyeHeight1
= dist(p2
, p6
);
226 double eyeHeight2
= dist(p3
, p5
);
228 return (eyeHeight1
+ eyeHeight2
) / (2 * eyeWidth
);
231 double FacialLandmarkDetector
::calcEyeOpenness(
234 double faceYAngle
) const
236 double eyeAspectRatio
;
239 eyeAspectRatio
= calcEyeAspectRatio(landmarks
[42], landmarks
[43], landmarks
[44],
240 landmarks
[45], landmarks
[46], landmarks
[47]);
244 eyeAspectRatio
= calcEyeAspectRatio(landmarks
[36], landmarks
[37], landmarks
[38],
245 landmarks
[39], landmarks
[40], landmarks
[41]);
248 // Apply correction due to faceYAngle
249 double corrEyeAspRat
= eyeAspectRatio
/ std
::cos(degToRad(faceYAngle
));
251 return linearScale01(corrEyeAspRat
, m_cfg
.eyeClosedThreshold
, m_cfg
.eyeOpenThreshold
);
256 double FacialLandmarkDetector
::calcMouthForm(Point landmarks
[]) const
258 /* Mouth form parameter: 0 for normal mouth, 1 for fully smiling / laughing.
259 * Compare distance between the two corners of the mouth
260 * to the distance between the two eyes.
263 /* An alternative (my initial attempt) was to compare the corners of
264 * the mouth to the top of the upper lip - they almost lie on a
265 * straight line when smiling / laughing. But that is only true
266 * when facing straight at the camera. When looking up / down,
267 * the angle changes. So here we'll use the distance approach instead.
270 auto eye1
= centroid(landmarks
[36], landmarks
[37], landmarks
[38],
271 landmarks
[39], landmarks
[40], landmarks
[41]);
272 auto eye2
= centroid(landmarks
[42], landmarks
[43], landmarks
[44],
273 landmarks
[45], landmarks
[46], landmarks
[47]);
274 double distEyes
= dist(eye1
, eye2
);
275 double distMouth
= dist(landmarks
[58], landmarks
[62]);
277 double form
= linearScale01(distMouth
/ distEyes
,
278 m_cfg
.mouthNormalThreshold
,
279 m_cfg
.mouthSmileThreshold
);
284 double FacialLandmarkDetector
::calcMouthOpenness(
286 double mouthForm
) const
288 // Use points for the bottom of the upper lip, and top of the lower lip
289 // We have 3 pairs of points available, which give the mouth height
290 // on the left, in the middle, and on the right, resp.
291 // First let's try to use an average of all three.
292 double heightLeft
= dist(landmarks
[61], landmarks
[63]);
293 double heightMiddle
= dist(landmarks
[60], landmarks
[64]);
294 double heightRight
= dist(landmarks
[59], landmarks
[65]);
296 double avgHeight
= (heightLeft
+ heightMiddle
+ heightRight
) / 3;
298 // Now, normalize it with the width of the mouth.
299 double width
= dist(landmarks
[58], landmarks
[62]);
301 double normalized
= avgHeight
/ width
;
303 double scaled
= linearScale01(normalized
,
304 m_cfg
.mouthClosedThreshold
,
305 m_cfg
.mouthOpenThreshold
,
308 // Apply correction according to mouthForm
309 // Notice that when you smile / laugh, width is increased
310 scaled
*= (1 + m_cfg
.mouthOpenLaughCorrection
* mouthForm
);
315 double FacialLandmarkDetector
::calcFaceXAngle(Point landmarks
[]) const
317 // This function will be easier to understand if you refer to the
318 // diagram in faceXAngle.png
320 // Construct the y-axis using (1) average of four points on the nose and
321 // (2) average of five points on the upper lip.
323 auto y0
= centroid(landmarks
[27], landmarks
[28], landmarks
[29],
325 auto y1
= centroid(landmarks
[48], landmarks
[49], landmarks
[50],
326 landmarks
[51], landmarks
[52]);
328 // Now drop a perpedicular from the left and right edges of the face,
329 // and calculate the ratio between the lengths of these perpendiculars
331 auto left
= centroid(landmarks
[14], landmarks
[15], landmarks
[16]);
332 auto right
= centroid(landmarks
[0], landmarks
[1], landmarks
[2]);
334 // Constructing a perpendicular:
335 // Join the left/right point and the upper lip. The included angle
336 // can now be determined using cosine rule.
337 // Then sine of this angle is the perpendicular divided by the newly
339 double opp
= dist(right
, y0
);
340 double adj1
= dist(y0
, y1
);
341 double adj2
= dist(y1
, right
);
342 double angle
= solveCosineRuleAngle(opp
, adj1
, adj2
);
343 double perpRight
= adj2
* std
::sin(angle
);
345 opp
= dist(left
, y0
);
346 adj2
= dist(y1
, left
);
347 angle
= solveCosineRuleAngle(opp
, adj1
, adj2
);
348 double perpLeft
= adj2
* std
::sin(angle
);
350 // Model the head as a sphere and look from above.
351 double theta
= std
::asin((perpRight
- perpLeft
) / (perpRight
+ perpLeft
));
353 theta
= radToDeg(theta
);
354 if (theta
< -30) theta
= -30;
355 if (theta
> 30) theta
= 30;
359 double FacialLandmarkDetector
::calcFaceYAngle(Point landmarks
[], double faceXAngle
, double mouthForm
) const
362 // angle between the two left/right points and the tip
363 double c
= dist(landmarks
[31], landmarks
[35]);
364 double a
= dist(landmarks
[30], landmarks
[31]);
365 double b
= dist(landmarks
[30], landmarks
[35]);
367 double angle
= solveCosineRuleAngle(c
, a
, b
);
369 // This probably varies a lot from person to person...
371 // Best is probably to work out some trigonometry again,
372 // but just linear interpolation seems to work ok...
374 // Correct for X rotation
375 double corrAngle
= angle
* (1 + (std
::abs(faceXAngle
) / 30
376 * m_cfg
.faceYAngleXRotCorrection
));
378 // Correct for smiles / laughs - this increases the angle
379 corrAngle
*= (1 - mouthForm
* m_cfg
.faceYAngleSmileCorrection
);
381 if (corrAngle
>= m_cfg
.faceYAngleZeroValue
)
383 return -30 * linearScale01(corrAngle
,
384 m_cfg
.faceYAngleZeroValue
,
385 m_cfg
.faceYAngleDownThreshold
,
390 return 30 * (1 - linearScale01(corrAngle
,
391 m_cfg
.faceYAngleUpThreshold
,
392 m_cfg
.faceYAngleZeroValue
,
397 double FacialLandmarkDetector
::calcFaceZAngle(Point landmarks
[]) const
399 // Use average of eyes and nose
401 auto eyeRight
= centroid(landmarks
[36], landmarks
[37], landmarks
[38],
402 landmarks
[39], landmarks
[40], landmarks
[41]);
403 auto eyeLeft
= centroid(landmarks
[42], landmarks
[43], landmarks
[44],
404 landmarks
[45], landmarks
[46], landmarks
[47]);
406 auto noseLeft
= landmarks
[35];
407 auto noseRight
= landmarks
[31];
409 double eyeYDiff
= eyeRight
.y
- eyeLeft
.y
;
410 double eyeXDiff
= eyeRight
.x
- eyeLeft
.x
;
412 double angle1
= std
::atan(eyeYDiff
/ eyeXDiff
);
414 double noseYDiff
= noseRight
.y
- noseLeft
.y
;
415 double noseXDiff
= noseRight
.x
- noseLeft
.x
;
417 double angle2
= std
::atan(noseYDiff
/ noseXDiff
);
419 return radToDeg((angle1
+ angle2
) / 2);
422 void FacialLandmarkDetector
::parseConfig(std
::string cfgPath
)
424 populateDefaultConfig();
427 std
::ifstream
file(cfgPath
);
431 throw std
::runtime_error("Failed to open config file");
435 unsigned int lineNum
= 0;
437 while (std
::getline(file
, line
))
446 std
::istringstream
ss(line
);
447 std
::string paramName
;
450 if (paramName
== "osfIpAddress")
452 if (!(ss
>> m_cfg
.osfIpAddress
))
454 throwConfigError(paramName
, "std::string",
458 else if (paramName
== "osfPort")
460 if (!(ss
>> m_cfg
.osfPort
))
462 throwConfigError(paramName
, "int",
466 else if (paramName
== "faceYAngleCorrection")
468 if (!(ss
>> m_cfg
.faceYAngleCorrection
))
470 throwConfigError(paramName
, "double",
474 else if (paramName
== "eyeSmileEyeOpenThreshold")
476 if (!(ss
>> m_cfg
.eyeSmileEyeOpenThreshold
))
478 throwConfigError(paramName
, "double",
482 else if (paramName
== "eyeSmileMouthFormThreshold")
484 if (!(ss
>> m_cfg
.eyeSmileMouthFormThreshold
))
486 throwConfigError(paramName
, "double",
490 else if (paramName
== "eyeSmileMouthOpenThreshold")
492 if (!(ss
>> m_cfg
.eyeSmileMouthOpenThreshold
))
494 throwConfigError(paramName
, "double",
498 else if (paramName
== "faceXAngleNumTaps")
500 if (!(ss
>> m_cfg
.faceXAngleNumTaps
))
502 throwConfigError(paramName
, "std::size_t",
506 else if (paramName
== "faceYAngleNumTaps")
508 if (!(ss
>> m_cfg
.faceYAngleNumTaps
))
510 throwConfigError(paramName
, "std::size_t",
514 else if (paramName
== "faceZAngleNumTaps")
516 if (!(ss
>> m_cfg
.faceZAngleNumTaps
))
518 throwConfigError(paramName
, "std::size_t",
522 else if (paramName
== "mouthFormNumTaps")
524 if (!(ss
>> m_cfg
.mouthFormNumTaps
))
526 throwConfigError(paramName
, "std::size_t",
530 else if (paramName
== "mouthOpenNumTaps")
532 if (!(ss
>> m_cfg
.mouthOpenNumTaps
))
534 throwConfigError(paramName
, "std::size_t",
538 else if (paramName
== "leftEyeOpenNumTaps")
540 if (!(ss
>> m_cfg
.leftEyeOpenNumTaps
))
542 throwConfigError(paramName
, "std::size_t",
546 else if (paramName
== "rightEyeOpenNumTaps")
548 if (!(ss
>> m_cfg
.rightEyeOpenNumTaps
))
550 throwConfigError(paramName
, "std::size_t",
554 else if (paramName
== "eyeClosedThreshold")
556 if (!(ss
>> m_cfg
.eyeClosedThreshold
))
558 throwConfigError(paramName
, "double",
562 else if (paramName
== "eyeOpenThreshold")
564 if (!(ss
>> m_cfg
.eyeOpenThreshold
))
566 throwConfigError(paramName
, "double",
570 else if (paramName
== "winkEnable")
572 if (!(ss
>> m_cfg
.winkEnable
))
574 throwConfigError(paramName
, "bool",
578 else if (paramName
== "mouthNormalThreshold")
580 if (!(ss
>> m_cfg
.mouthNormalThreshold
))
582 throwConfigError(paramName
, "double",
586 else if (paramName
== "mouthSmileThreshold")
588 if (!(ss
>> m_cfg
.mouthSmileThreshold
))
590 throwConfigError(paramName
, "double",
594 else if (paramName
== "mouthClosedThreshold")
596 if (!(ss
>> m_cfg
.mouthClosedThreshold
))
598 throwConfigError(paramName
, "double",
602 else if (paramName
== "mouthOpenThreshold")
604 if (!(ss
>> m_cfg
.mouthOpenThreshold
))
606 throwConfigError(paramName
, "double",
610 else if (paramName
== "mouthOpenLaughCorrection")
612 if (!(ss
>> m_cfg
.mouthOpenLaughCorrection
))
614 throwConfigError(paramName
, "double",
618 else if (paramName
== "faceYAngleXRotCorrection")
620 if (!(ss
>> m_cfg
.faceYAngleXRotCorrection
))
622 throwConfigError(paramName
, "double",
626 else if (paramName
== "faceYAngleSmileCorrection")
628 if (!(ss
>> m_cfg
.faceYAngleSmileCorrection
))
630 throwConfigError(paramName
, "double",
634 else if (paramName
== "faceYAngleZeroValue")
636 if (!(ss
>> m_cfg
.faceYAngleZeroValue
))
638 throwConfigError(paramName
, "double",
642 else if (paramName
== "faceYAngleUpThreshold")
644 if (!(ss
>> m_cfg
.faceYAngleUpThreshold
))
646 throwConfigError(paramName
, "double",
650 else if (paramName
== "faceYAngleDownThreshold")
652 if (!(ss
>> m_cfg
.faceYAngleDownThreshold
))
654 throwConfigError(paramName
, "double",
658 else if (paramName
== "autoBlink")
660 if (!(ss
>> m_cfg
.autoBlink
))
662 throwConfigError(paramName
, "bool",
666 else if (paramName
== "autoBreath")
668 if (!(ss
>> m_cfg
.autoBreath
))
670 throwConfigError(paramName
, "bool",
674 else if (paramName
== "randomMotion")
676 if (!(ss
>> m_cfg
.randomMotion
))
678 throwConfigError(paramName
, "bool",
684 std
::ostringstream oss
;
685 oss
<< "Unrecognized parameter name at line " << lineNum
686 << ": " << paramName
;
687 throw std
::runtime_error(oss
.str());
694 void FacialLandmarkDetector
::populateDefaultConfig(void)
696 // These are values that I've personally tested to work OK for my face.
697 // Your milage may vary - hence the config file.
699 m_cfg
.osfIpAddress
= "127.0.0.1";
700 m_cfg
.osfPort
= 11573;
701 m_cfg
.faceYAngleCorrection
= 10;
702 m_cfg
.eyeSmileEyeOpenThreshold
= 0.6;
703 m_cfg
.eyeSmileMouthFormThreshold
= 0.75;
704 m_cfg
.eyeSmileMouthOpenThreshold
= 0.5;
705 m_cfg
.faceXAngleNumTaps
= 7;
706 m_cfg
.faceYAngleNumTaps
= 7;
707 m_cfg
.faceZAngleNumTaps
= 7;
708 m_cfg
.mouthFormNumTaps
= 3;
709 m_cfg
.mouthOpenNumTaps
= 3;
710 m_cfg
.leftEyeOpenNumTaps
= 3;
711 m_cfg
.rightEyeOpenNumTaps
= 3;
712 m_cfg
.eyeClosedThreshold
= 0.18;
713 m_cfg
.eyeOpenThreshold
= 0.21;
714 m_cfg
.winkEnable
= true;
715 m_cfg
.mouthNormalThreshold
= 0.75;
716 m_cfg
.mouthSmileThreshold
= 1.0;
717 m_cfg
.mouthClosedThreshold
= 0.1;
718 m_cfg
.mouthOpenThreshold
= 0.4;
719 m_cfg
.mouthOpenLaughCorrection
= 0.2;
720 m_cfg
.faceYAngleXRotCorrection
= 0.15;
721 m_cfg
.faceYAngleSmileCorrection
= 0.075;
722 m_cfg
.faceYAngleZeroValue
= 1.8;
723 m_cfg
.faceYAngleDownThreshold
= 2.3;
724 m_cfg
.faceYAngleUpThreshold
= 1.3;
725 m_cfg
.autoBlink
= false;
726 m_cfg
.autoBreath
= false;
727 m_cfg
.randomMotion
= false;
730 void FacialLandmarkDetector
::throwConfigError(std
::string paramName
,
731 std
::string expectedType
,
733 unsigned int lineNum
)
735 std
::ostringstream ss
;
736 ss
<< "Error parsing config file for parameter " << paramName
737 << "\nAt line " << lineNum
<< ": " << line
738 << "\nExpecting value of type " << expectedType
;
740 throw std
::runtime_error(ss
.str());