2 Copyright (c) 2020-2021 Adrian I. Lam
4 Permission is hereby granted, free of charge, to any person obtaining a copy
5 of this software and associated documentation files (the "Software"), to deal
6 in the Software without restriction, including without limitation the rights
7 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8 copies of the Software, and to permit persons to whom the Software is
9 furnished to do so, subject to the following conditions:
11 The above copyright notice and this permission notice shall be included in all
12 copies or substantial portions of the Software.
14 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
32 # include <WinSock2.h>
33 # include <ws2tcpip.h>
36 # include <sys/types.h>
37 # include <sys/socket.h>
38 # include <arpa/inet.h>
42 #include "facial_landmark_detector.h"
43 #include "math_utils.h"
46 static void filterPush(std
::deque
<double>& buf
, double newval
,
49 buf
.push_back(newval
);
50 while (buf
.size() > numTaps
)
56 FacialLandmarkDetector
::FacialLandmarkDetector(std
::string cfgPath
)
61 #ifdef _WIN32 // WinSock2 should be initialized before using
63 if (WSAStartup(MAKEWORD(2, 2), &wsaData
) != 0)
69 struct sockaddr_in addr
;
70 addr
.sin_family
= AF_INET
;
71 addr
.sin_port
= htons(m_cfg
.osfPort
);
72 addr
.sin_addr
.s_addr
= inet_addr(m_cfg
.osfIpAddress
.c_str());
74 m_sock
= socket(AF_INET
, SOCK_DGRAM
, IPPROTO_UDP
);
77 throw std
::runtime_error("Cannot create UDP socket");
80 int ret
= bind(m_sock
, (struct sockaddr
*)&addr
, sizeof addr
);
83 throw std
::runtime_error("Cannot bind socket");
87 FacialLandmarkDetector
::~FacialLandmarkDetector()
96 FacialLandmarkDetector
::Params FacialLandmarkDetector
::getParams(void) const
100 params
.faceXAngle
= avg(m_faceXAngle
);
101 params
.faceYAngle
= avg(m_faceYAngle
) + m_cfg
.faceYAngleCorrection
;
102 // + 10 correct for angle between computer monitor and webcam
103 params
.faceZAngle
= avg(m_faceZAngle
);
104 params
.mouthOpenness
= avg(m_mouthOpenness
);
105 params
.mouthForm
= avg(m_mouthForm
);
107 double leftEye
= avg(m_leftEyeOpenness
, 1);
108 double rightEye
= avg(m_rightEyeOpenness
, 1);
109 bool sync
= !m_cfg
.winkEnable
;
111 if (m_cfg
.winkEnable
)
113 if (rightEye
< 0.1 && leftEye
> 0.2)
118 else if (leftEye
< 0.1 && rightEye
> 0.2)
131 // Combine the two to get better synchronized blinks
132 double bothEyes
= (leftEye
+ rightEye
) / 2;
137 params
.leftEyeOpenness
= leftEye
;
138 params
.rightEyeOpenness
= rightEye
;
140 if (leftEye
<= m_cfg
.eyeSmileEyeOpenThreshold
&&
141 rightEye
<= m_cfg
.eyeSmileEyeOpenThreshold
&&
142 params
.mouthForm
> m_cfg
.eyeSmileMouthFormThreshold
&&
143 params
.mouthOpenness
> m_cfg
.eyeSmileMouthOpenThreshold
)
145 params
.leftEyeSmile
= 1;
146 params
.rightEyeSmile
= 1;
150 params
.leftEyeSmile
= 0;
151 params
.rightEyeSmile
= 0;
154 params
.autoBlink
= m_cfg
.autoBlink
;
155 params
.autoBreath
= m_cfg
.autoBreath
;
156 params
.randomMotion
= m_cfg
.randomMotion
;
161 void FacialLandmarkDetector
::stop(void)
166 void FacialLandmarkDetector
::mainLoop(void)
170 // Read UDP packet from OSF
171 static const int nPoints
= 68;
172 static const int packetFrameSize
= 8 + 4 + 2 * 4 + 2 * 4 + 1 + 4 + 3 * 4 + 3 * 4
173 + 4 * 4 + 4 * 68 + 4 * 2 * 68 + 4 * 3 * 70 + 4 * 14;
175 static const int landmarksOffset
= 8 + 4 + 2 * 4 + 2 * 4 + 1 + 4 + 3 * 4 + 3 * 4
178 char buf
[packetFrameSize
];
179 auto recvSize
= recv(m_sock
, buf
, sizeof buf
, 0);
181 if (recvSize
!= packetFrameSize
) continue;
182 // Note: This is dependent on endianness, and we would assume that
183 // the OSF instance is run on a machine with the same endianness
184 // as our current machine.
185 int recvFaceId
= *(int *)(buf
+ 8);
186 if (recvFaceId
!= m_faceId
) continue; // We only support one face
188 Point landmarks
[nPoints
];
190 for (int i
= 0; i
< nPoints
; i
++)
192 float x
= *(float *)(buf
+ landmarksOffset
+ i
* 2 * sizeof(float));
193 float y
= *(float *)(buf
+ landmarksOffset
+ (i
* 2 + 1) * sizeof(float));
199 /* The coordinates seem to be rather noisy in general.
200 * We will push everything through some moving average filters
201 * to reduce noise. The number of taps is determined empirically
202 * until we get something good.
203 * An alternative method would be to get some better dataset -
204 * perhaps even to train on a custom data set just for the user.
207 // Face rotation: X direction (left-right)
208 double faceXRot
= calcFaceXAngle(landmarks
);
209 filterPush(m_faceXAngle
, faceXRot
, m_cfg
.faceXAngleNumTaps
);
211 // Mouth form (smile / laugh) detection
212 double mouthForm
= calcMouthForm(landmarks
);
213 filterPush(m_mouthForm
, mouthForm
, m_cfg
.mouthFormNumTaps
);
215 // Face rotation: Y direction (up-down)
216 double faceYRot
= calcFaceYAngle(landmarks
, faceXRot
, mouthForm
);
217 filterPush(m_faceYAngle
, faceYRot
, m_cfg
.faceYAngleNumTaps
);
219 // Face rotation: Z direction (head tilt)
220 double faceZRot
= calcFaceZAngle(landmarks
);
221 filterPush(m_faceZAngle
, faceZRot
, m_cfg
.faceZAngleNumTaps
);
224 double mouthOpen
= calcMouthOpenness(landmarks
, mouthForm
);
225 filterPush(m_mouthOpenness
, mouthOpen
, m_cfg
.mouthOpenNumTaps
);
228 double eyeLeftOpen
= calcEyeOpenness(LEFT
, landmarks
, faceYRot
);
229 filterPush(m_leftEyeOpenness
, eyeLeftOpen
, m_cfg
.leftEyeOpenNumTaps
);
230 double eyeRightOpen
= calcEyeOpenness(RIGHT
, landmarks
, faceYRot
);
231 filterPush(m_rightEyeOpenness
, eyeRightOpen
, m_cfg
.rightEyeOpenNumTaps
);
233 // Eyebrows: the landmark detection doesn't work very well for my face,
234 // so I've not implemented them.
238 double FacialLandmarkDetector
::calcEyeAspectRatio(
239 Point
& p1
, Point
& p2
,
240 Point
& p3
, Point
& p4
,
241 Point
& p5
, Point
& p6
) const
243 double eyeWidth
= dist(p1
, p4
);
244 double eyeHeight1
= dist(p2
, p6
);
245 double eyeHeight2
= dist(p3
, p5
);
247 return (eyeHeight1
+ eyeHeight2
) / (2 * eyeWidth
);
250 double FacialLandmarkDetector
::calcEyeOpenness(
253 double faceYAngle
) const
255 double eyeAspectRatio
;
258 eyeAspectRatio
= calcEyeAspectRatio(landmarks
[42], landmarks
[43], landmarks
[44],
259 landmarks
[45], landmarks
[46], landmarks
[47]);
263 eyeAspectRatio
= calcEyeAspectRatio(landmarks
[36], landmarks
[37], landmarks
[38],
264 landmarks
[39], landmarks
[40], landmarks
[41]);
267 // Apply correction due to faceYAngle
268 double corrEyeAspRat
= eyeAspectRatio
/ std
::cos(degToRad(faceYAngle
));
270 return linearScale01(corrEyeAspRat
, m_cfg
.eyeClosedThreshold
, m_cfg
.eyeOpenThreshold
);
275 double FacialLandmarkDetector
::calcMouthForm(Point landmarks
[]) const
277 /* Mouth form parameter: 0 for normal mouth, 1 for fully smiling / laughing.
278 * Compare distance between the two corners of the mouth
279 * to the distance between the two eyes.
282 /* An alternative (my initial attempt) was to compare the corners of
283 * the mouth to the top of the upper lip - they almost lie on a
284 * straight line when smiling / laughing. But that is only true
285 * when facing straight at the camera. When looking up / down,
286 * the angle changes. So here we'll use the distance approach instead.
289 auto eye1
= centroid(landmarks
[36], landmarks
[37], landmarks
[38],
290 landmarks
[39], landmarks
[40], landmarks
[41]);
291 auto eye2
= centroid(landmarks
[42], landmarks
[43], landmarks
[44],
292 landmarks
[45], landmarks
[46], landmarks
[47]);
293 double distEyes
= dist(eye1
, eye2
);
294 double distMouth
= dist(landmarks
[58], landmarks
[62]);
296 double form
= linearScale01(distMouth
/ distEyes
,
297 m_cfg
.mouthNormalThreshold
,
298 m_cfg
.mouthSmileThreshold
);
303 double FacialLandmarkDetector
::calcMouthOpenness(
305 double mouthForm
) const
307 // Use points for the bottom of the upper lip, and top of the lower lip
308 // We have 3 pairs of points available, which give the mouth height
309 // on the left, in the middle, and on the right, resp.
310 // First let's try to use an average of all three.
311 double heightLeft
= dist(landmarks
[61], landmarks
[63]);
312 double heightMiddle
= dist(landmarks
[60], landmarks
[64]);
313 double heightRight
= dist(landmarks
[59], landmarks
[65]);
315 double avgHeight
= (heightLeft
+ heightMiddle
+ heightRight
) / 3;
317 // Now, normalize it with the width of the mouth.
318 double width
= dist(landmarks
[58], landmarks
[62]);
320 double normalized
= avgHeight
/ width
;
322 double scaled
= linearScale01(normalized
,
323 m_cfg
.mouthClosedThreshold
,
324 m_cfg
.mouthOpenThreshold
,
327 // Apply correction according to mouthForm
328 // Notice that when you smile / laugh, width is increased
329 scaled
*= (1 + m_cfg
.mouthOpenLaughCorrection
* mouthForm
);
334 double FacialLandmarkDetector
::calcFaceXAngle(Point landmarks
[]) const
336 // This function will be easier to understand if you refer to the
337 // diagram in faceXAngle.png
339 // Construct the y-axis using (1) average of four points on the nose and
340 // (2) average of five points on the upper lip.
342 auto y0
= centroid(landmarks
[27], landmarks
[28], landmarks
[29],
344 auto y1
= centroid(landmarks
[48], landmarks
[49], landmarks
[50],
345 landmarks
[51], landmarks
[52]);
347 // Now drop a perpedicular from the left and right edges of the face,
348 // and calculate the ratio between the lengths of these perpendiculars
350 auto left
= centroid(landmarks
[14], landmarks
[15], landmarks
[16]);
351 auto right
= centroid(landmarks
[0], landmarks
[1], landmarks
[2]);
353 // Constructing a perpendicular:
354 // Join the left/right point and the upper lip. The included angle
355 // can now be determined using cosine rule.
356 // Then sine of this angle is the perpendicular divided by the newly
358 double opp
= dist(right
, y0
);
359 double adj1
= dist(y0
, y1
);
360 double adj2
= dist(y1
, right
);
361 double angle
= solveCosineRuleAngle(opp
, adj1
, adj2
);
362 double perpRight
= adj2
* std
::sin(angle
);
364 opp
= dist(left
, y0
);
365 adj2
= dist(y1
, left
);
366 angle
= solveCosineRuleAngle(opp
, adj1
, adj2
);
367 double perpLeft
= adj2
* std
::sin(angle
);
369 // Model the head as a sphere and look from above.
370 double theta
= std
::asin((perpRight
- perpLeft
) / (perpRight
+ perpLeft
));
372 theta
= radToDeg(theta
);
373 if (theta
< -30) theta
= -30;
374 if (theta
> 30) theta
= 30;
378 double FacialLandmarkDetector
::calcFaceYAngle(Point landmarks
[], double faceXAngle
, double mouthForm
) const
381 // angle between the two left/right points and the tip
382 double c
= dist(landmarks
[31], landmarks
[35]);
383 double a
= dist(landmarks
[30], landmarks
[31]);
384 double b
= dist(landmarks
[30], landmarks
[35]);
386 double angle
= solveCosineRuleAngle(c
, a
, b
);
388 // This probably varies a lot from person to person...
390 // Best is probably to work out some trigonometry again,
391 // but just linear interpolation seems to work ok...
393 // Correct for X rotation
394 double corrAngle
= angle
* (1 + (std
::abs(faceXAngle
) / 30
395 * m_cfg
.faceYAngleXRotCorrection
));
397 // Correct for smiles / laughs - this increases the angle
398 corrAngle
*= (1 - mouthForm
* m_cfg
.faceYAngleSmileCorrection
);
400 if (corrAngle
>= m_cfg
.faceYAngleZeroValue
)
402 return -30 * linearScale01(corrAngle
,
403 m_cfg
.faceYAngleZeroValue
,
404 m_cfg
.faceYAngleDownThreshold
,
409 return 30 * (1 - linearScale01(corrAngle
,
410 m_cfg
.faceYAngleUpThreshold
,
411 m_cfg
.faceYAngleZeroValue
,
416 double FacialLandmarkDetector
::calcFaceZAngle(Point landmarks
[]) const
418 // Use average of eyes and nose
420 auto eyeRight
= centroid(landmarks
[36], landmarks
[37], landmarks
[38],
421 landmarks
[39], landmarks
[40], landmarks
[41]);
422 auto eyeLeft
= centroid(landmarks
[42], landmarks
[43], landmarks
[44],
423 landmarks
[45], landmarks
[46], landmarks
[47]);
425 auto noseLeft
= landmarks
[35];
426 auto noseRight
= landmarks
[31];
428 double eyeYDiff
= eyeRight
.y
- eyeLeft
.y
;
429 double eyeXDiff
= eyeRight
.x
- eyeLeft
.x
;
431 double angle1
= std
::atan(eyeYDiff
/ eyeXDiff
);
433 double noseYDiff
= noseRight
.y
- noseLeft
.y
;
434 double noseXDiff
= noseRight
.x
- noseLeft
.x
;
436 double angle2
= std
::atan(noseYDiff
/ noseXDiff
);
438 return radToDeg((angle1
+ angle2
) / 2);
441 void FacialLandmarkDetector
::parseConfig(std
::string cfgPath
)
443 populateDefaultConfig();
446 std
::ifstream
file(cfgPath
);
450 throw std
::runtime_error("Failed to open config file");
454 unsigned int lineNum
= 0;
456 while (std
::getline(file
, line
))
465 std
::istringstream
ss(line
);
466 std
::string paramName
;
469 if (paramName
== "osfIpAddress")
471 if (!(ss
>> m_cfg
.osfIpAddress
))
473 throwConfigError(paramName
, "std::string",
477 else if (paramName
== "osfPort")
479 if (!(ss
>> m_cfg
.osfPort
))
481 throwConfigError(paramName
, "int",
485 else if (paramName
== "faceYAngleCorrection")
487 if (!(ss
>> m_cfg
.faceYAngleCorrection
))
489 throwConfigError(paramName
, "double",
493 else if (paramName
== "eyeSmileEyeOpenThreshold")
495 if (!(ss
>> m_cfg
.eyeSmileEyeOpenThreshold
))
497 throwConfigError(paramName
, "double",
501 else if (paramName
== "eyeSmileMouthFormThreshold")
503 if (!(ss
>> m_cfg
.eyeSmileMouthFormThreshold
))
505 throwConfigError(paramName
, "double",
509 else if (paramName
== "eyeSmileMouthOpenThreshold")
511 if (!(ss
>> m_cfg
.eyeSmileMouthOpenThreshold
))
513 throwConfigError(paramName
, "double",
517 else if (paramName
== "faceXAngleNumTaps")
519 if (!(ss
>> m_cfg
.faceXAngleNumTaps
))
521 throwConfigError(paramName
, "std::size_t",
525 else if (paramName
== "faceYAngleNumTaps")
527 if (!(ss
>> m_cfg
.faceYAngleNumTaps
))
529 throwConfigError(paramName
, "std::size_t",
533 else if (paramName
== "faceZAngleNumTaps")
535 if (!(ss
>> m_cfg
.faceZAngleNumTaps
))
537 throwConfigError(paramName
, "std::size_t",
541 else if (paramName
== "mouthFormNumTaps")
543 if (!(ss
>> m_cfg
.mouthFormNumTaps
))
545 throwConfigError(paramName
, "std::size_t",
549 else if (paramName
== "mouthOpenNumTaps")
551 if (!(ss
>> m_cfg
.mouthOpenNumTaps
))
553 throwConfigError(paramName
, "std::size_t",
557 else if (paramName
== "leftEyeOpenNumTaps")
559 if (!(ss
>> m_cfg
.leftEyeOpenNumTaps
))
561 throwConfigError(paramName
, "std::size_t",
565 else if (paramName
== "rightEyeOpenNumTaps")
567 if (!(ss
>> m_cfg
.rightEyeOpenNumTaps
))
569 throwConfigError(paramName
, "std::size_t",
573 else if (paramName
== "eyeClosedThreshold")
575 if (!(ss
>> m_cfg
.eyeClosedThreshold
))
577 throwConfigError(paramName
, "double",
581 else if (paramName
== "eyeOpenThreshold")
583 if (!(ss
>> m_cfg
.eyeOpenThreshold
))
585 throwConfigError(paramName
, "double",
589 else if (paramName
== "winkEnable")
591 if (!(ss
>> m_cfg
.winkEnable
))
593 throwConfigError(paramName
, "bool",
597 else if (paramName
== "mouthNormalThreshold")
599 if (!(ss
>> m_cfg
.mouthNormalThreshold
))
601 throwConfigError(paramName
, "double",
605 else if (paramName
== "mouthSmileThreshold")
607 if (!(ss
>> m_cfg
.mouthSmileThreshold
))
609 throwConfigError(paramName
, "double",
613 else if (paramName
== "mouthClosedThreshold")
615 if (!(ss
>> m_cfg
.mouthClosedThreshold
))
617 throwConfigError(paramName
, "double",
621 else if (paramName
== "mouthOpenThreshold")
623 if (!(ss
>> m_cfg
.mouthOpenThreshold
))
625 throwConfigError(paramName
, "double",
629 else if (paramName
== "mouthOpenLaughCorrection")
631 if (!(ss
>> m_cfg
.mouthOpenLaughCorrection
))
633 throwConfigError(paramName
, "double",
637 else if (paramName
== "faceYAngleXRotCorrection")
639 if (!(ss
>> m_cfg
.faceYAngleXRotCorrection
))
641 throwConfigError(paramName
, "double",
645 else if (paramName
== "faceYAngleSmileCorrection")
647 if (!(ss
>> m_cfg
.faceYAngleSmileCorrection
))
649 throwConfigError(paramName
, "double",
653 else if (paramName
== "faceYAngleZeroValue")
655 if (!(ss
>> m_cfg
.faceYAngleZeroValue
))
657 throwConfigError(paramName
, "double",
661 else if (paramName
== "faceYAngleUpThreshold")
663 if (!(ss
>> m_cfg
.faceYAngleUpThreshold
))
665 throwConfigError(paramName
, "double",
669 else if (paramName
== "faceYAngleDownThreshold")
671 if (!(ss
>> m_cfg
.faceYAngleDownThreshold
))
673 throwConfigError(paramName
, "double",
677 else if (paramName
== "autoBlink")
679 if (!(ss
>> m_cfg
.autoBlink
))
681 throwConfigError(paramName
, "bool",
685 else if (paramName
== "autoBreath")
687 if (!(ss
>> m_cfg
.autoBreath
))
689 throwConfigError(paramName
, "bool",
693 else if (paramName
== "randomMotion")
695 if (!(ss
>> m_cfg
.randomMotion
))
697 throwConfigError(paramName
, "bool",
703 std
::ostringstream oss
;
704 oss
<< "Unrecognized parameter name at line " << lineNum
705 << ": " << paramName
;
706 throw std
::runtime_error(oss
.str());
713 void FacialLandmarkDetector
::populateDefaultConfig(void)
715 // These are values that I've personally tested to work OK for my face.
716 // Your milage may vary - hence the config file.
718 m_cfg
.osfIpAddress
= "127.0.0.1";
719 m_cfg
.osfPort
= 11573;
720 m_cfg
.faceYAngleCorrection
= 10;
721 m_cfg
.eyeSmileEyeOpenThreshold
= 0.6;
722 m_cfg
.eyeSmileMouthFormThreshold
= 0.75;
723 m_cfg
.eyeSmileMouthOpenThreshold
= 0.5;
724 m_cfg
.faceXAngleNumTaps
= 7;
725 m_cfg
.faceYAngleNumTaps
= 7;
726 m_cfg
.faceZAngleNumTaps
= 7;
727 m_cfg
.mouthFormNumTaps
= 3;
728 m_cfg
.mouthOpenNumTaps
= 3;
729 m_cfg
.leftEyeOpenNumTaps
= 3;
730 m_cfg
.rightEyeOpenNumTaps
= 3;
731 m_cfg
.eyeClosedThreshold
= 0.18;
732 m_cfg
.eyeOpenThreshold
= 0.21;
733 m_cfg
.winkEnable
= true;
734 m_cfg
.mouthNormalThreshold
= 0.75;
735 m_cfg
.mouthSmileThreshold
= 1.0;
736 m_cfg
.mouthClosedThreshold
= 0.1;
737 m_cfg
.mouthOpenThreshold
= 0.4;
738 m_cfg
.mouthOpenLaughCorrection
= 0.2;
739 m_cfg
.faceYAngleXRotCorrection
= 0.15;
740 m_cfg
.faceYAngleSmileCorrection
= 0.075;
741 m_cfg
.faceYAngleZeroValue
= 1.8;
742 m_cfg
.faceYAngleDownThreshold
= 2.3;
743 m_cfg
.faceYAngleUpThreshold
= 1.3;
744 m_cfg
.autoBlink
= false;
745 m_cfg
.autoBreath
= false;
746 m_cfg
.randomMotion
= false;
749 void FacialLandmarkDetector
::throwConfigError(std
::string paramName
,
750 std
::string expectedType
,
752 unsigned int lineNum
)
754 std
::ostringstream ss
;
755 ss
<< "Error parsing config file for parameter " << paramName
756 << "\nAt line " << lineNum
<< ": " << line
757 << "\nExpecting value of type " << expectedType
;
759 throw std
::runtime_error(ss
.str());