#include <sstream>
#include <cmath>
-#include <opencv2/opencv.hpp>
-
-#include <dlib/opencv.h>
-#include <dlib/image_processing/frontal_face_detector.h>
-#include <dlib/image_processing.h>
-#include <dlib/image_processing/render_face_detections.h>
-
#include "facial_landmark_detector.h"
#include "math_utils.h"
{
parseConfig(cfgPath);
- if (!webcam.open(m_cfg.cvVideoCaptureId))
- {
- throw std::runtime_error("Unable to open webcam");
- }
-
- detector = dlib::get_frontal_face_detector();
- dlib::deserialize(m_cfg.predictorPath) >> predictor;
+ // TODO setup UDP connection here?
}
FacialLandmarkDetector::Params FacialLandmarkDetector::getParams(void) const
{
while (!m_stop)
{
- cv::Mat frame;
- if (!webcam.read(frame))
- {
- throw std::runtime_error("Unable to read from webcam");
- }
- cv::Mat flipped;
if (m_cfg.lateralInversion)
{
- cv::flip(frame, flipped, 1);
+ // TODO is it something we can do here? Or in OSF only?
}
- else
- {
- flipped = frame;
- }
- dlib::cv_image<dlib::bgr_pixel> cimg(flipped);
- if (m_cfg.showWebcamVideo)
- {
- win.set_image(cimg);
- }
+ // TODO get the array of landmark coordinates here
+ Point landmarks[68];
- std::vector<dlib::rectangle> faces = detector(cimg);
- if (faces.size() > 0)
- {
- dlib::rectangle face = faces[0];
- dlib::full_object_detection shape = predictor(cimg, face);
-
- /* The coordinates seem to be rather noisy in general.
- * We will push everything through some moving average filters
- * to reduce noise. The number of taps is determined empirically
- * until we get something good.
- * An alternative method would be to get some better dataset
- * for dlib - perhaps even to train on a custom data set just for the user.
- */
-
- // Face rotation: X direction (left-right)
- double faceXRot = calcFaceXAngle(shape);
- filterPush(m_faceXAngle, faceXRot, m_cfg.faceXAngleNumTaps);
-
- // Mouth form (smile / laugh) detection
- double mouthForm = calcMouthForm(shape);
- filterPush(m_mouthForm, mouthForm, m_cfg.mouthFormNumTaps);
-
- // Face rotation: Y direction (up-down)
- double faceYRot = calcFaceYAngle(shape, faceXRot, mouthForm);
- filterPush(m_faceYAngle, faceYRot, m_cfg.faceYAngleNumTaps);
-
- // Face rotation: Z direction (head tilt)
- double faceZRot = calcFaceZAngle(shape);
- filterPush(m_faceZAngle, faceZRot, m_cfg.faceZAngleNumTaps);
-
- // Mouth openness
- double mouthOpen = calcMouthOpenness(shape, mouthForm);
- filterPush(m_mouthOpenness, mouthOpen, m_cfg.mouthOpenNumTaps);
-
- // Eye openness
- double eyeLeftOpen = calcEyeOpenness(LEFT, shape, faceYRot);
- filterPush(m_leftEyeOpenness, eyeLeftOpen, m_cfg.leftEyeOpenNumTaps);
- double eyeRightOpen = calcEyeOpenness(RIGHT, shape, faceYRot);
- filterPush(m_rightEyeOpenness, eyeRightOpen, m_cfg.rightEyeOpenNumTaps);
-
- // TODO eyebrows?
-
- if (m_cfg.showWebcamVideo && m_cfg.renderLandmarksOnVideo)
- {
- win.clear_overlay();
- win.add_overlay(dlib::render_face_detections(shape));
- }
- }
- else
- {
- if (m_cfg.showWebcamVideo && m_cfg.renderLandmarksOnVideo)
- {
- win.clear_overlay();
- }
- }
+ /* The coordinates seem to be rather noisy in general.
+ * We will push everything through some moving average filters
+ * to reduce noise. The number of taps is determined empirically
+ * until we get something good.
+ * An alternative method would be to get some better dataset -
+ * perhaps even to train on a custom data set just for the user.
+ */
+
+ // Face rotation: X direction (left-right)
+ double faceXRot = calcFaceXAngle(landmarks);
+ filterPush(m_faceXAngle, faceXRot, m_cfg.faceXAngleNumTaps);
- cv::waitKey(m_cfg.cvWaitKeyMs);
+ // Mouth form (smile / laugh) detection
+ double mouthForm = calcMouthForm(landmarks);
+ filterPush(m_mouthForm, mouthForm, m_cfg.mouthFormNumTaps);
+
+ // Face rotation: Y direction (up-down)
+ double faceYRot = calcFaceYAngle(landmarks, faceXRot, mouthForm);
+ filterPush(m_faceYAngle, faceYRot, m_cfg.faceYAngleNumTaps);
+
+ // Face rotation: Z direction (head tilt)
+ double faceZRot = calcFaceZAngle(landmarks);
+ filterPush(m_faceZAngle, faceZRot, m_cfg.faceZAngleNumTaps);
+
+ // Mouth openness
+ double mouthOpen = calcMouthOpenness(landmarks, mouthForm);
+ filterPush(m_mouthOpenness, mouthOpen, m_cfg.mouthOpenNumTaps);
+
+ // Eye openness
+ double eyeLeftOpen = calcEyeOpenness(LEFT, landmarks, faceYRot);
+ filterPush(m_leftEyeOpenness, eyeLeftOpen, m_cfg.leftEyeOpenNumTaps);
+ double eyeRightOpen = calcEyeOpenness(RIGHT, landmarks, faceYRot);
+ filterPush(m_rightEyeOpenness, eyeRightOpen, m_cfg.rightEyeOpenNumTaps);
+
+ // TODO eyebrows?
}
}
double FacialLandmarkDetector::calcEyeAspectRatio(
- dlib::point& p1, dlib::point& p2,
- dlib::point& p3, dlib::point& p4,
- dlib::point& p5, dlib::point& p6) const
+ Point& p1, Point& p2,
+ Point& p3, Point& p4,
+ Point& p5, Point& p6) const
{
double eyeWidth = dist(p1, p4);
double eyeHeight1 = dist(p2, p6);
double FacialLandmarkDetector::calcEyeOpenness(
LeftRight eye,
- dlib::full_object_detection& shape,
+ Point landmarks[],
double faceYAngle) const
{
double eyeAspectRatio;
if (eye == LEFT)
{
- eyeAspectRatio = calcEyeAspectRatio(shape.part(42), shape.part(43), shape.part(44),
- shape.part(45), shape.part(46), shape.part(47));
+ eyeAspectRatio = calcEyeAspectRatio(landmarks[42], landmarks[43], landmarks[44],
+ landmarks[45], landmarks[46], landmarks[47]);
}
else
{
- eyeAspectRatio = calcEyeAspectRatio(shape.part(36), shape.part(37), shape.part(38),
- shape.part(39), shape.part(40), shape.part(41));
+ eyeAspectRatio = calcEyeAspectRatio(landmarks[36], landmarks[37], landmarks[38],
+ landmarks[39], landmarks[40], landmarks[41]);
}
// Apply correction due to faceYAngle
-double FacialLandmarkDetector::calcMouthForm(dlib::full_object_detection& shape) const
+double FacialLandmarkDetector::calcMouthForm(Point landmarks[]) const
{
/* Mouth form parameter: 0 for normal mouth, 1 for fully smiling / laughing.
* Compare distance between the two corners of the mouth
* the angle changes. So here we'll use the distance approach instead.
*/
- auto eye1 = centroid(shape.part(36), shape.part(37), shape.part(38),
- shape.part(39), shape.part(40), shape.part(41));
- auto eye2 = centroid(shape.part(42), shape.part(43), shape.part(44),
- shape.part(45), shape.part(46), shape.part(47));
+ auto eye1 = centroid(landmarks[36], landmarks[37], landmarks[38],
+ landmarks[39], landmarks[40], landmarks[41]);
+ auto eye2 = centroid(landmarks[42], landmarks[43], landmarks[44],
+ landmarks[45], landmarks[46], landmarks[47]);
double distEyes = dist(eye1, eye2);
- double distMouth = dist(shape.part(48), shape.part(54));
+ double distMouth = dist(landmarks[58], landmarks[62]);
double form = linearScale01(distMouth / distEyes,
m_cfg.mouthNormalThreshold,
}
double FacialLandmarkDetector::calcMouthOpenness(
- dlib::full_object_detection& shape,
+ Point landmarks[],
double mouthForm) const
{
// Use points for the bottom of the upper lip, and top of the lower lip
// We have 3 pairs of points available, which give the mouth height
// on the left, in the middle, and on the right, resp.
// First let's try to use an average of all three.
- double heightLeft = dist(shape.part(63), shape.part(65));
- double heightMiddle = dist(shape.part(62), shape.part(66));
- double heightRight = dist(shape.part(61), shape.part(67));
+ double heightLeft = dist(landmarks[61], landmarks[63]);
+ double heightMiddle = dist(landmarks[60], landmarks[64]);
+ double heightRight = dist(landmarks[59], landmarks[65]);
double avgHeight = (heightLeft + heightMiddle + heightRight) / 3;
// Now, normalize it with the width of the mouth.
- double width = dist(shape.part(60), shape.part(64));
+ double width = dist(landmarks[58], landmarks[62]);
double normalized = avgHeight / width;
return scaled;
}
-double FacialLandmarkDetector::calcFaceXAngle(dlib::full_object_detection& shape) const
+double FacialLandmarkDetector::calcFaceXAngle(Point landmarks[]) const
{
// This function will be easier to understand if you refer to the
// diagram in faceXAngle.png
// Construct the y-axis using (1) average of four points on the nose and
- // (2) average of four points on the upper lip.
+ // (2) average of five points on the upper lip.
- auto y0 = centroid(shape.part(27), shape.part(28), shape.part(29),
- shape.part(30));
- auto y1 = centroid(shape.part(50), shape.part(51), shape.part(52),
- shape.part(62));
+ auto y0 = centroid(landmarks[27], landmarks[28], landmarks[29],
+ landmarks[30]);
+ auto y1 = centroid(landmarks[48], landmarks[49], landmarks[50],
+ landmarks[51], landmarks[52]);
// Now drop a perpedicular from the left and right edges of the face,
// and calculate the ratio between the lengths of these perpendiculars
- auto left = centroid(shape.part(14), shape.part(15), shape.part(16));
- auto right = centroid(shape.part(0), shape.part(1), shape.part(2));
+ auto left = centroid(landmarks[14], landmarks[15], landmarks[16]);
+ auto right = centroid(landmarks[0], landmarks[1], landmarks[2]);
// Constructing a perpendicular:
// Join the left/right point and the upper lip. The included angle
return theta;
}
-double FacialLandmarkDetector::calcFaceYAngle(dlib::full_object_detection& shape, double faceXAngle, double mouthForm) const
+double FacialLandmarkDetector::calcFaceYAngle(Point landmarks[], double faceXAngle, double mouthForm) const
{
// Use the nose
// angle between the two left/right points and the tip
- double c = dist(shape.part(31), shape.part(35));
- double a = dist(shape.part(30), shape.part(31));
- double b = dist(shape.part(30), shape.part(35));
+ double c = dist(landmarks[31], landmarks[35]);
+ double a = dist(landmarks[30], landmarks[31]);
+ double b = dist(landmarks[30], landmarks[35]);
double angle = solveCosineRuleAngle(c, a, b);
}
}
-double FacialLandmarkDetector::calcFaceZAngle(dlib::full_object_detection& shape) const
+double FacialLandmarkDetector::calcFaceZAngle(Point landmarks[]) const
{
// Use average of eyes and nose
- auto eyeRight = centroid(shape.part(36), shape.part(37), shape.part(38),
- shape.part(39), shape.part(40), shape.part(41));
- auto eyeLeft = centroid(shape.part(42), shape.part(43), shape.part(44),
- shape.part(45), shape.part(46), shape.part(47));
+ auto eyeRight = centroid(landmarks[36], landmarks[37], landmarks[38],
+ landmarks[39], landmarks[40], landmarks[41]);
+ auto eyeLeft = centroid(landmarks[42], landmarks[43], landmarks[44],
+ landmarks[45], landmarks[46], landmarks[47]);
- auto noseLeft = shape.part(35);
- auto noseRight = shape.part(31);
+ auto noseLeft = landmarks[35];
+ auto noseRight = landmarks[31];
- double eyeYDiff = eyeRight.y() - eyeLeft.y();
- double eyeXDiff = eyeRight.x() - eyeLeft.x();
+ double eyeYDiff = eyeRight.y - eyeLeft.y;
+ double eyeXDiff = eyeRight.x - eyeLeft.x;
double angle1 = std::atan(eyeYDiff / eyeXDiff);
- double noseYDiff = noseRight.y() - noseLeft.y();
- double noseXDiff = noseRight.x() - noseLeft.x();
+ double noseYDiff = noseRight.y - noseLeft.y;
+ double noseXDiff = noseRight.x - noseLeft.x;
double angle2 = std::atan(noseYDiff / noseXDiff);
std::string paramName;
if (ss >> paramName)
{
- if (paramName == "cvVideoCaptureId")
- {
- if (!(ss >> m_cfg.cvVideoCaptureId))
- {
- throwConfigError(paramName, "int",
- line, lineNum);
- }
- }
- else if (paramName == "predictorPath")
- {
- if (!(ss >> m_cfg.predictorPath))
- {
- throwConfigError(paramName, "std::string",
- line, lineNum);
- }
- }
- else if (paramName == "faceYAngleCorrection")
+ if (paramName == "faceYAngleCorrection")
{
if (!(ss >> m_cfg.faceYAngleCorrection))
{
line, lineNum);
}
}
- else if (paramName == "showWebcamVideo")
- {
- if (!(ss >> m_cfg.showWebcamVideo))
- {
- throwConfigError(paramName, "bool",
- line, lineNum);
- }
- }
- else if (paramName == "renderLandmarksOnVideo")
- {
- if (!(ss >> m_cfg.renderLandmarksOnVideo))
- {
- throwConfigError(paramName, "bool",
- line, lineNum);
- }
- }
else if (paramName == "lateralInversion")
{
if (!(ss >> m_cfg.lateralInversion))
line, lineNum);
}
}
- else if (paramName == "cvWaitKeyMs")
- {
- if (!(ss >> m_cfg.cvWaitKeyMs))
- {
- throwConfigError(paramName, "int",
- line, lineNum);
- }
- }
else if (paramName == "eyeClosedThreshold")
{
if (!(ss >> m_cfg.eyeClosedThreshold))
// These are values that I've personally tested to work OK for my face.
// Your milage may vary - hence the config file.
- m_cfg.cvVideoCaptureId = 0;
- m_cfg.predictorPath = "shape_predictor_68_face_landmarks.dat";
m_cfg.faceYAngleCorrection = 10;
m_cfg.eyeSmileEyeOpenThreshold = 0.6;
m_cfg.eyeSmileMouthFormThreshold = 0.75;
m_cfg.eyeSmileMouthOpenThreshold = 0.5;
- m_cfg.showWebcamVideo = true;
- m_cfg.renderLandmarksOnVideo = true;
m_cfg.lateralInversion = true;
- m_cfg.cvWaitKeyMs = 5;
m_cfg.faceXAngleNumTaps = 11;
m_cfg.faceYAngleNumTaps = 11;
m_cfg.faceZAngleNumTaps = 11;