[facial-landmarks-for-cubism.git] / src / facial_landmark_detector.cpp

/****
Copyright (c) 2020 Adrian I. Lam

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
****/

#include <stdexcept>
#include <fstream>
#include <string>
#include <sstream>
#include <cmath>

#include "facial_landmark_detector.h"
#include "math_utils.h"


static void filterPush(std::deque<double>& buf, double newval,
                       std::size_t numTaps)
{
    buf.push_back(newval);
    while (buf.size() > numTaps)
    {
        buf.pop_front();
    }
}

FacialLandmarkDetector::FacialLandmarkDetector(std::string cfgPath)
    : m_stop(false)
{
    parseConfig(cfgPath);

    // TODO setup UDP connection here?
}

FacialLandmarkDetector::Params FacialLandmarkDetector::getParams(void) const
{
    Params params;

    params.faceXAngle = avg(m_faceXAngle);
    params.faceYAngle = avg(m_faceYAngle) + m_cfg.faceYAngleCorrection;
    // + 10 correct for angle between computer monitor and webcam
    params.faceZAngle = avg(m_faceZAngle);
    params.mouthOpenness = avg(m_mouthOpenness);
    params.mouthForm = avg(m_mouthForm);

    double leftEye = avg(m_leftEyeOpenness, 1);
    double rightEye = avg(m_rightEyeOpenness, 1);
    // Just combine the two to get better synchronized blinks
    // This effectively disables winks, so if we want to
    // support winks in the future (see below) we will need
    // a better way to handle this out-of-sync blinks.
    double bothEyes = (leftEye + rightEye) / 2;
    leftEye = bothEyes;
    rightEye = bothEyes;
    // Detect winks and make them look better
    // Commenting out - winks are difficult to be detected by the
    // dlib data set anyway... maybe in the future we can
    // add a runtime option to enable/disable...
    /*if (right == 0 && left > 0.2)
    {
        left = 1;
    }
    else if (left == 0 && right > 0.2)
    {
        right = 1;
    }
    */
    params.leftEyeOpenness = leftEye;
    params.rightEyeOpenness = rightEye;

    if (leftEye <= m_cfg.eyeSmileEyeOpenThreshold &&
        rightEye <= m_cfg.eyeSmileEyeOpenThreshold &&
        params.mouthForm > m_cfg.eyeSmileMouthFormThreshold &&
        params.mouthOpenness > m_cfg.eyeSmileMouthOpenThreshold)
    {
        params.leftEyeSmile = 1;
        params.rightEyeSmile = 1;
    }
    else
    {
        params.leftEyeSmile = 0;
        params.rightEyeSmile = 0;
    }

    params.autoBlink = m_cfg.autoBlink;
    params.autoBreath = m_cfg.autoBreath;
    params.randomMotion = m_cfg.randomMotion;

    return params;
}

void FacialLandmarkDetector::stop(void)
{
    m_stop = true;
}

void FacialLandmarkDetector::mainLoop(void)
{
    while (!m_stop)
    {
        if (m_cfg.lateralInversion)
        {
            // TODO is it something we can do here? Or in OSF only?
        }

        // TODO get the array of landmark coordinates here
        Point landmarks[68];


        /* The coordinates seem to be rather noisy in general.
         * We will push everything through some moving average filters
         * to reduce noise. The number of taps is determined empirically
         * until we get something good.
         * An alternative method would be to get some better dataset -
         * perhaps even to train on a custom data set just for the user.
         */

        // Face rotation: X direction (left-right)
        double faceXRot = calcFaceXAngle(landmarks);
        filterPush(m_faceXAngle, faceXRot, m_cfg.faceXAngleNumTaps);

        // Mouth form (smile / laugh) detection
        double mouthForm = calcMouthForm(landmarks);
        filterPush(m_mouthForm, mouthForm, m_cfg.mouthFormNumTaps);

        // Face rotation: Y direction (up-down)
        double faceYRot = calcFaceYAngle(landmarks, faceXRot, mouthForm);
        filterPush(m_faceYAngle, faceYRot, m_cfg.faceYAngleNumTaps);

        // Face rotation: Z direction (head tilt)
        double faceZRot = calcFaceZAngle(landmarks);
        filterPush(m_faceZAngle, faceZRot, m_cfg.faceZAngleNumTaps);

        // Mouth openness
        double mouthOpen = calcMouthOpenness(landmarks, mouthForm);
        filterPush(m_mouthOpenness, mouthOpen, m_cfg.mouthOpenNumTaps);

        // Eye openness
        double eyeLeftOpen = calcEyeOpenness(LEFT, landmarks, faceYRot);
        filterPush(m_leftEyeOpenness, eyeLeftOpen, m_cfg.leftEyeOpenNumTaps);
        double eyeRightOpen = calcEyeOpenness(RIGHT, landmarks, faceYRot);
        filterPush(m_rightEyeOpenness, eyeRightOpen, m_cfg.rightEyeOpenNumTaps);

        // TODO eyebrows?
    }
}

double FacialLandmarkDetector::calcEyeAspectRatio(
    Point& p1, Point& p2,
    Point& p3, Point& p4,
    Point& p5, Point& p6) const
{
    double eyeWidth = dist(p1, p4);
    double eyeHeight1 = dist(p2, p6);
    double eyeHeight2 = dist(p3, p5);

    return (eyeHeight1 + eyeHeight2) / (2 * eyeWidth);
}

double FacialLandmarkDetector::calcEyeOpenness(
    LeftRight eye,
    Point landmarks[],
    double faceYAngle) const
{
    double eyeAspectRatio;
    if (eye == LEFT)
    {
        eyeAspectRatio = calcEyeAspectRatio(landmarks[42], landmarks[43], landmarks[44],
                                            landmarks[45], landmarks[46], landmarks[47]);
    }
    else
    {
        eyeAspectRatio = calcEyeAspectRatio(landmarks[36], landmarks[37], landmarks[38],
                                            landmarks[39], landmarks[40], landmarks[41]);
    }

    // Apply correction due to faceYAngle
    double corrEyeAspRat = eyeAspectRatio / std::cos(degToRad(faceYAngle));

    return linearScale01(corrEyeAspRat, m_cfg.eyeClosedThreshold, m_cfg.eyeOpenThreshold);
}


double FacialLandmarkDetector::calcMouthForm(Point landmarks[]) const
{
    /* Mouth form parameter: 0 for normal mouth, 1 for fully smiling / laughing.
     * Compare distance between the two corners of the mouth
     * to the distance between the two eyes.
     */

    /* An alternative (my initial attempt) was to compare the corners of
     * the mouth to the top of the upper lip - they almost lie on a
     * straight line when smiling / laughing. But that is only true
     * when facing straight at the camera. When looking up / down,
     * the angle changes. So here we'll use the distance approach instead.
     */

    auto eye1 = centroid(landmarks[36], landmarks[37], landmarks[38],
                         landmarks[39], landmarks[40], landmarks[41]);
    auto eye2 = centroid(landmarks[42], landmarks[43], landmarks[44],
                         landmarks[45], landmarks[46], landmarks[47]);
    double distEyes = dist(eye1, eye2);
    double distMouth = dist(landmarks[58], landmarks[62]);

    double form = linearScale01(distMouth / distEyes,
                                m_cfg.mouthNormalThreshold,
                                m_cfg.mouthSmileThreshold);

    return form;
}

double FacialLandmarkDetector::calcMouthOpenness(
    Point landmarks[],
    double mouthForm) const
{
    // Use points for the bottom of the upper lip, and top of the lower lip
    // We have 3 pairs of points available, which give the mouth height
    // on the left, in the middle, and on the right, resp.
    // First let's try to use an average of all three.
    double heightLeft   = dist(landmarks[61], landmarks[63]);
    double heightMiddle = dist(landmarks[60], landmarks[64]);
    double heightRight  = dist(landmarks[59], landmarks[65]);

    double avgHeight = (heightLeft + heightMiddle + heightRight) / 3;

    // Now, normalize it with the width of the mouth.
    double width = dist(landmarks[58], landmarks[62]);

    double normalized = avgHeight / width;

    double scaled = linearScale01(normalized,
                                  m_cfg.mouthClosedThreshold,
                                  m_cfg.mouthOpenThreshold,
                                  true, false);

    // Apply correction according to mouthForm
    // Notice that when you smile / laugh, width is increased
    scaled *= (1 + m_cfg.mouthOpenLaughCorrection * mouthForm);

    return scaled;
}

double FacialLandmarkDetector::calcFaceXAngle(Point landmarks[]) const
{
    // This function will be easier to understand if you refer to the
    // diagram in faceXAngle.png

    // Construct the y-axis using (1) average of four points on the nose and
    // (2) average of five points on the upper lip.

    auto y0 = centroid(landmarks[27], landmarks[28], landmarks[29],
                       landmarks[30]);
    auto y1 = centroid(landmarks[48], landmarks[49], landmarks[50],
                       landmarks[51], landmarks[52]);

    // Now drop a perpedicular from the left and right edges of the face,
    // and calculate the ratio between the lengths of these perpendiculars

    auto left = centroid(landmarks[14], landmarks[15], landmarks[16]);
    auto right = centroid(landmarks[0], landmarks[1], landmarks[2]);

    // Constructing a perpendicular:
    // Join the left/right point and the upper lip. The included angle
    // can now be determined using cosine rule.
    // Then sine of this angle is the perpendicular divided by the newly
    // created line.
    double opp = dist(right, y0);
    double adj1 = dist(y0, y1);
    double adj2 = dist(y1, right);
    double angle = solveCosineRuleAngle(opp, adj1, adj2);
    double perpRight = adj2 * std::sin(angle);

    opp = dist(left, y0);
    adj2 = dist(y1, left);
    angle = solveCosineRuleAngle(opp, adj1, adj2);
    double perpLeft = adj2 * std::sin(angle);

    // Model the head as a sphere and look from above.
    double theta = std::asin((perpRight - perpLeft) / (perpRight + perpLeft));

    theta = radToDeg(theta);
    if (theta < -30) theta = -30;
    if (theta > 30) theta = 30;
    return theta;
}

double FacialLandmarkDetector::calcFaceYAngle(Point landmarks[], double faceXAngle, double mouthForm) const
{
    // Use the nose
    // angle between the two left/right points and the tip
    double c = dist(landmarks[31], landmarks[35]);
    double a = dist(landmarks[30], landmarks[31]);
    double b = dist(landmarks[30], landmarks[35]);

    double angle = solveCosineRuleAngle(c, a, b);

    // This probably varies a lot from person to person...

    // Best is probably to work out some trigonometry again,
    // but just linear interpolation seems to work ok...

    // Correct for X rotation
    double corrAngle = angle * (1 + (std::abs(faceXAngle) / 30
                                     * m_cfg.faceYAngleXRotCorrection));

    // Correct for smiles / laughs - this increases the angle
    corrAngle *= (1 - mouthForm * m_cfg.faceYAngleSmileCorrection);

    if (corrAngle >= m_cfg.faceYAngleZeroValue)
    {
        return -30 * linearScale01(corrAngle,
                                   m_cfg.faceYAngleZeroValue,
                                   m_cfg.faceYAngleDownThreshold,
                                   false, false);
    }
    else
    {
        return 30 * (1 - linearScale01(corrAngle,
                                       m_cfg.faceYAngleUpThreshold,
                                       m_cfg.faceYAngleZeroValue,
                                       false, false));
    }
}

double FacialLandmarkDetector::calcFaceZAngle(Point landmarks[]) const
{
    // Use average of eyes and nose

    auto eyeRight = centroid(landmarks[36], landmarks[37], landmarks[38],
                             landmarks[39], landmarks[40], landmarks[41]);
    auto eyeLeft  = centroid(landmarks[42], landmarks[43], landmarks[44],
                             landmarks[45], landmarks[46], landmarks[47]);

    auto noseLeft  = landmarks[35];
    auto noseRight = landmarks[31];

    double eyeYDiff = eyeRight.y - eyeLeft.y;
    double eyeXDiff = eyeRight.x - eyeLeft.x;

    double angle1 = std::atan(eyeYDiff / eyeXDiff);

    double noseYDiff = noseRight.y - noseLeft.y;
    double noseXDiff = noseRight.x - noseLeft.x;

    double angle2 = std::atan(noseYDiff / noseXDiff);

    return radToDeg((angle1 + angle2) / 2);
}

void FacialLandmarkDetector::parseConfig(std::string cfgPath)
{
    populateDefaultConfig();
    if (cfgPath != "")
    {
        std::ifstream file(cfgPath);

        if (!file)
        {
            throw std::runtime_error("Failed to open config file");
        }

        std::string line;
        unsigned int lineNum = 0;

        while (std::getline(file, line))
        {
            lineNum++;

            if (line[0] == '#')
            {
                continue;
            }

            std::istringstream ss(line);
            std::string paramName;
            if (ss >> paramName)
            {
                if (paramName == "faceYAngleCorrection")
                {
                    if (!(ss >> m_cfg.faceYAngleCorrection))
                    {
                        throwConfigError(paramName, "double",
                                         line, lineNum);
                    }
                }
                else if (paramName == "eyeSmileEyeOpenThreshold")
                {
                    if (!(ss >> m_cfg.eyeSmileEyeOpenThreshold))
                    {
                        throwConfigError(paramName, "double",
                                         line, lineNum);
                    }
                }
                else if (paramName == "eyeSmileMouthFormThreshold")
                {
                    if (!(ss >> m_cfg.eyeSmileMouthFormThreshold))
                    {
                        throwConfigError(paramName, "double",
                                         line, lineNum);
                    }
                }
                else if (paramName == "eyeSmileMouthOpenThreshold")
                {
                    if (!(ss >> m_cfg.eyeSmileMouthOpenThreshold))
                    {
                        throwConfigError(paramName, "double",
                                         line, lineNum);
                    }
                }
                else if (paramName == "lateralInversion")
                {
                    if (!(ss >> m_cfg.lateralInversion))
                    {
                        throwConfigError(paramName, "bool",
                                         line, lineNum);
                    }
                }
                else if (paramName == "faceXAngleNumTaps")
                {
                    if (!(ss >> m_cfg.faceXAngleNumTaps))
                    {
                        throwConfigError(paramName, "std::size_t",
                                         line, lineNum);
                    }
                }
                else if (paramName == "faceYAngleNumTaps")
                {
                    if (!(ss >> m_cfg.faceYAngleNumTaps))
                    {
                        throwConfigError(paramName, "std::size_t",
                                         line, lineNum);
                    }
                }
                else if (paramName == "faceZAngleNumTaps")
                {
                    if (!(ss >> m_cfg.faceZAngleNumTaps))
                    {
                        throwConfigError(paramName, "std::size_t",
                                         line, lineNum);
                    }
                }
                else if (paramName == "mouthFormNumTaps")
                {
                    if (!(ss >> m_cfg.mouthFormNumTaps))
                    {
                        throwConfigError(paramName, "std::size_t",
                                         line, lineNum);
                    }
                }
                else if (paramName == "mouthOpenNumTaps")
                {
                    if (!(ss >> m_cfg.mouthOpenNumTaps))
                    {
                        throwConfigError(paramName, "std::size_t",
                                         line, lineNum);
                    }
                }
                else if (paramName == "leftEyeOpenNumTaps")
                {
                    if (!(ss >> m_cfg.leftEyeOpenNumTaps))
                    {
                        throwConfigError(paramName, "std::size_t",
                                         line, lineNum);
                    }
                }
                else if (paramName == "rightEyeOpenNumTaps")
                {
                    if (!(ss >> m_cfg.rightEyeOpenNumTaps))
                    {
                        throwConfigError(paramName, "std::size_t",
                                         line, lineNum);
                    }
                }
                else if (paramName == "eyeClosedThreshold")
                {
                    if (!(ss >> m_cfg.eyeClosedThreshold))
                    {
                        throwConfigError(paramName, "double",
                                         line, lineNum);
                    }
                }
                else if (paramName == "eyeOpenThreshold")
                {
                    if (!(ss >> m_cfg.eyeOpenThreshold))
                    {
                        throwConfigError(paramName, "double",
                                         line, lineNum);
                    }
                }
                else if (paramName == "mouthNormalThreshold")
                {
                    if (!(ss >> m_cfg.mouthNormalThreshold))
                    {
                        throwConfigError(paramName, "double",
                                         line, lineNum);
                    }
                }
                else if (paramName == "mouthSmileThreshold")
                {
                    if (!(ss >> m_cfg.mouthSmileThreshold))
                    {
                        throwConfigError(paramName, "double",
                                         line, lineNum);
                    }
                }
                else if (paramName == "mouthClosedThreshold")
                {
                    if (!(ss >> m_cfg.mouthClosedThreshold))
                    {
                        throwConfigError(paramName, "double",
                                         line, lineNum);
                    }
                }
                else if (paramName == "mouthOpenThreshold")
                {
                    if (!(ss >> m_cfg.mouthOpenThreshold))
                    {
                        throwConfigError(paramName, "double",
                                         line, lineNum);
                    }
                }
                else if (paramName == "mouthOpenLaughCorrection")
                {
                    if (!(ss >> m_cfg.mouthOpenLaughCorrection))
                    {
                        throwConfigError(paramName, "double",
                                         line, lineNum);
                    }
                }
                else if (paramName == "faceYAngleXRotCorrection")
                {
                    if (!(ss >> m_cfg.faceYAngleXRotCorrection))
                    {
                        throwConfigError(paramName, "double",
                                         line, lineNum);
                    }
                }
                else if (paramName == "faceYAngleSmileCorrection")
                {
                    if (!(ss >> m_cfg.faceYAngleSmileCorrection))
                    {
                        throwConfigError(paramName, "double",
                                         line, lineNum);
                    }
                }
                else if (paramName == "faceYAngleZeroValue")
                {
                    if (!(ss >> m_cfg.faceYAngleZeroValue))
                    {
                        throwConfigError(paramName, "double",
                                         line, lineNum);
                    }
                }
                else if (paramName == "faceYAngleUpThreshold")
                {
                    if (!(ss >> m_cfg.faceYAngleUpThreshold))
                    {
                        throwConfigError(paramName, "double",
                                         line, lineNum);
                    }
                }
                else if (paramName == "faceYAngleDownThreshold")
                {
                    if (!(ss >> m_cfg.faceYAngleDownThreshold))
                    {
                        throwConfigError(paramName, "double",
                                         line, lineNum);
                    }
                }
                else if (paramName == "autoBlink")
                {
                    if (!(ss >> m_cfg.autoBlink))
                    {
                        throwConfigError(paramName, "bool",
                                         line, lineNum);
                    }
                }
                else if (paramName == "autoBreath")
                {
                    if (!(ss >> m_cfg.autoBreath))
                    {
                        throwConfigError(paramName, "bool",
                                         line, lineNum);
                    }
                }
                else if (paramName == "randomMotion")
                {
                    if (!(ss >> m_cfg.randomMotion))
                    {
                        throwConfigError(paramName, "bool",
                                         line, lineNum);
                    }
                }
                else
                {
                    std::ostringstream oss;
                    oss << "Unrecognized parameter name at line " << lineNum
                        << ": " << paramName;
                    throw std::runtime_error(oss.str());
                }
            }
        }
    }
}

void FacialLandmarkDetector::populateDefaultConfig(void)
{
    // These are values that I've personally tested to work OK for my face.
    // Your milage may vary - hence the config file.

    m_cfg.faceYAngleCorrection = 10;
    m_cfg.eyeSmileEyeOpenThreshold = 0.6;
    m_cfg.eyeSmileMouthFormThreshold = 0.75;
    m_cfg.eyeSmileMouthOpenThreshold = 0.5;
    m_cfg.lateralInversion = true;
    m_cfg.faceXAngleNumTaps = 11;
    m_cfg.faceYAngleNumTaps = 11;
    m_cfg.faceZAngleNumTaps = 11;
    m_cfg.mouthFormNumTaps = 3;
    m_cfg.mouthOpenNumTaps = 3;
    m_cfg.leftEyeOpenNumTaps = 3;
    m_cfg.rightEyeOpenNumTaps = 3;
    m_cfg.eyeClosedThreshold = 0.2;
    m_cfg.eyeOpenThreshold = 0.25;
    m_cfg.mouthNormalThreshold = 0.75;
    m_cfg.mouthSmileThreshold = 1.0;
    m_cfg.mouthClosedThreshold = 0.1;
    m_cfg.mouthOpenThreshold = 0.4;
    m_cfg.mouthOpenLaughCorrection = 0.2;
    m_cfg.faceYAngleXRotCorrection = 0.15;
    m_cfg.faceYAngleSmileCorrection = 0.075;
    m_cfg.faceYAngleZeroValue = 1.8;
    m_cfg.faceYAngleDownThreshold = 2.3;
    m_cfg.faceYAngleUpThreshold = 1.3;
    m_cfg.autoBlink = false;
    m_cfg.autoBreath = false;
    m_cfg.randomMotion = false;
}

void FacialLandmarkDetector::throwConfigError(std::string paramName,
                                              std::string expectedType,
                                              std::string line,
                                              unsigned int lineNum)
{
    std::ostringstream ss;
    ss << "Error parsing config file for parameter " << paramName
       << "\nAt line " << lineNum << ": " << line
       << "\nExpecting value of type " << expectedType;

    throw std::runtime_error(ss.str());
}
Commit	Line	Data
	1	/****
	2	Copyright (c) 2020 Adrian I. Lam
	3
	4	Permission is hereby granted, free of charge, to any person obtaining a copy
	5	of this software and associated documentation files (the "Software"), to deal
	6	in the Software without restriction, including without limitation the rights
	7	to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
	8	copies of the Software, and to permit persons to whom the Software is
	9	furnished to do so, subject to the following conditions:
	10
	11	The above copyright notice and this permission notice shall be included in all
	12	copies or substantial portions of the Software.
	13
	14	THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
	15	IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
	16	FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
	17	AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
	18	LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
	19	OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
	20	SOFTWARE.
	21	****/
	22
	23	#include <stdexcept>
	24	#include <fstream>
	25	#include <string>
	26	#include <sstream>
	27	#include <cmath>
	28
	29	#include "facial_landmark_detector.h"
	30	#include "math_utils.h"
	31
	32
	33	static void filterPush(std::deque<double>& buf, double newval,
	34	std::size_t numTaps)
	35	{
	36	buf.push_back(newval);
	37	while (buf.size() > numTaps)
	38	{
	39	buf.pop_front();
	40	}
	41	}
	42
	43	FacialLandmarkDetector::FacialLandmarkDetector(std::string cfgPath)
	44	: m_stop(false)
	45	{
	46	parseConfig(cfgPath);
	47
	48	// TODO setup UDP connection here?
	49	}
	50
	51	FacialLandmarkDetector::Params FacialLandmarkDetector::getParams(void) const
	52	{
	53	Params params;
	54
	55	params.faceXAngle = avg(m_faceXAngle);
	56	params.faceYAngle = avg(m_faceYAngle) + m_cfg.faceYAngleCorrection;
	57	// + 10 correct for angle between computer monitor and webcam
	58	params.faceZAngle = avg(m_faceZAngle);
	59	params.mouthOpenness = avg(m_mouthOpenness);
	60	params.mouthForm = avg(m_mouthForm);
	61
	62	double leftEye = avg(m_leftEyeOpenness, 1);
	63	double rightEye = avg(m_rightEyeOpenness, 1);
	64	// Just combine the two to get better synchronized blinks
	65	// This effectively disables winks, so if we want to
	66	// support winks in the future (see below) we will need
	67	// a better way to handle this out-of-sync blinks.
	68	double bothEyes = (leftEye + rightEye) / 2;
	69	leftEye = bothEyes;
	70	rightEye = bothEyes;
	71	// Detect winks and make them look better
	72	// Commenting out - winks are difficult to be detected by the
	73	// dlib data set anyway... maybe in the future we can
	74	// add a runtime option to enable/disable...
	75	/*if (right == 0 && left > 0.2)
	76	{
	77	left = 1;
	78	}
	79	else if (left == 0 && right > 0.2)
	80	{
	81	right = 1;
	82	}
	83	*/
	84	params.leftEyeOpenness = leftEye;
	85	params.rightEyeOpenness = rightEye;
	86
	87	if (leftEye <= m_cfg.eyeSmileEyeOpenThreshold &&
	88	rightEye <= m_cfg.eyeSmileEyeOpenThreshold &&
	89	params.mouthForm > m_cfg.eyeSmileMouthFormThreshold &&
	90	params.mouthOpenness > m_cfg.eyeSmileMouthOpenThreshold)
	91	{
	92	params.leftEyeSmile = 1;
	93	params.rightEyeSmile = 1;
	94	}
	95	else
	96	{
	97	params.leftEyeSmile = 0;
	98	params.rightEyeSmile = 0;
	99	}
	100
	101	params.autoBlink = m_cfg.autoBlink;
	102	params.autoBreath = m_cfg.autoBreath;
	103	params.randomMotion = m_cfg.randomMotion;
	104
	105	return params;
	106	}
	107
	108	void FacialLandmarkDetector::stop(void)
	109	{
	110	m_stop = true;
	111	}
	112
	113	void FacialLandmarkDetector::mainLoop(void)
	114	{
	115	while (!m_stop)
	116	{
	117	if (m_cfg.lateralInversion)
	118	{
	119	// TODO is it something we can do here? Or in OSF only?
	120	}
	121
	122	// TODO get the array of landmark coordinates here
	123	Point landmarks[68];
	124
	125
	126	/* The coordinates seem to be rather noisy in general.
	127	* We will push everything through some moving average filters
	128	* to reduce noise. The number of taps is determined empirically
	129	* until we get something good.
	130	* An alternative method would be to get some better dataset -
	131	* perhaps even to train on a custom data set just for the user.
	132	*/
	133
	134	// Face rotation: X direction (left-right)
	135	double faceXRot = calcFaceXAngle(landmarks);
	136	filterPush(m_faceXAngle, faceXRot, m_cfg.faceXAngleNumTaps);
	137
	138	// Mouth form (smile / laugh) detection
	139	double mouthForm = calcMouthForm(landmarks);
	140	filterPush(m_mouthForm, mouthForm, m_cfg.mouthFormNumTaps);
	141
	142	// Face rotation: Y direction (up-down)
	143	double faceYRot = calcFaceYAngle(landmarks, faceXRot, mouthForm);
	144	filterPush(m_faceYAngle, faceYRot, m_cfg.faceYAngleNumTaps);
	145
	146	// Face rotation: Z direction (head tilt)
	147	double faceZRot = calcFaceZAngle(landmarks);
	148	filterPush(m_faceZAngle, faceZRot, m_cfg.faceZAngleNumTaps);
	149
	150	// Mouth openness
	151	double mouthOpen = calcMouthOpenness(landmarks, mouthForm);
	152	filterPush(m_mouthOpenness, mouthOpen, m_cfg.mouthOpenNumTaps);
	153
	154	// Eye openness
	155	double eyeLeftOpen = calcEyeOpenness(LEFT, landmarks, faceYRot);
	156	filterPush(m_leftEyeOpenness, eyeLeftOpen, m_cfg.leftEyeOpenNumTaps);
	157	double eyeRightOpen = calcEyeOpenness(RIGHT, landmarks, faceYRot);
	158	filterPush(m_rightEyeOpenness, eyeRightOpen, m_cfg.rightEyeOpenNumTaps);
	159
	160	// TODO eyebrows?
	161	}
	162	}
	163
	164	double FacialLandmarkDetector::calcEyeAspectRatio(
	165	Point& p1, Point& p2,
	166	Point& p3, Point& p4,
	167	Point& p5, Point& p6) const
	168	{
	169	double eyeWidth = dist(p1, p4);
	170	double eyeHeight1 = dist(p2, p6);
	171	double eyeHeight2 = dist(p3, p5);
	172
	173	return (eyeHeight1 + eyeHeight2) / (2 * eyeWidth);
	174	}
	175
	176	double FacialLandmarkDetector::calcEyeOpenness(
	177	LeftRight eye,
	178	Point landmarks[],
	179	double faceYAngle) const
	180	{
	181	double eyeAspectRatio;
	182	if (eye == LEFT)
	183	{
	184	eyeAspectRatio = calcEyeAspectRatio(landmarks[42], landmarks[43], landmarks[44],
	185	landmarks[45], landmarks[46], landmarks[47]);
	186	}
	187	else
	188	{
	189	eyeAspectRatio = calcEyeAspectRatio(landmarks[36], landmarks[37], landmarks[38],
	190	landmarks[39], landmarks[40], landmarks[41]);
	191	}
	192
	193	// Apply correction due to faceYAngle
	194	double corrEyeAspRat = eyeAspectRatio / std::cos(degToRad(faceYAngle));
	195
	196	return linearScale01(corrEyeAspRat, m_cfg.eyeClosedThreshold, m_cfg.eyeOpenThreshold);
	197	}
	198
	199
	200
	201	double FacialLandmarkDetector::calcMouthForm(Point landmarks[]) const
	202	{
	203	/* Mouth form parameter: 0 for normal mouth, 1 for fully smiling / laughing.
	204	* Compare distance between the two corners of the mouth
	205	* to the distance between the two eyes.
	206	*/
	207
	208	/* An alternative (my initial attempt) was to compare the corners of
	209	* the mouth to the top of the upper lip - they almost lie on a
	210	* straight line when smiling / laughing. But that is only true
	211	* when facing straight at the camera. When looking up / down,
	212	* the angle changes. So here we'll use the distance approach instead.
	213	*/
	214
	215	auto eye1 = centroid(landmarks[36], landmarks[37], landmarks[38],
	216	landmarks[39], landmarks[40], landmarks[41]);
	217	auto eye2 = centroid(landmarks[42], landmarks[43], landmarks[44],
	218	landmarks[45], landmarks[46], landmarks[47]);
	219	double distEyes = dist(eye1, eye2);
	220	double distMouth = dist(landmarks[58], landmarks[62]);
	221
	222	double form = linearScale01(distMouth / distEyes,
	223	m_cfg.mouthNormalThreshold,
	224	m_cfg.mouthSmileThreshold);
	225
	226	return form;
	227	}
	228
	229	double FacialLandmarkDetector::calcMouthOpenness(
	230	Point landmarks[],
	231	double mouthForm) const
	232	{
	233	// Use points for the bottom of the upper lip, and top of the lower lip
	234	// We have 3 pairs of points available, which give the mouth height
	235	// on the left, in the middle, and on the right, resp.
	236	// First let's try to use an average of all three.
	237	double heightLeft = dist(landmarks[61], landmarks[63]);
	238	double heightMiddle = dist(landmarks[60], landmarks[64]);
	239	double heightRight = dist(landmarks[59], landmarks[65]);
	240
	241	double avgHeight = (heightLeft + heightMiddle + heightRight) / 3;
	242
	243	// Now, normalize it with the width of the mouth.
	244	double width = dist(landmarks[58], landmarks[62]);
	245
	246	double normalized = avgHeight / width;
	247
	248	double scaled = linearScale01(normalized,
	249	m_cfg.mouthClosedThreshold,
	250	m_cfg.mouthOpenThreshold,
	251	true, false);
	252
	253	// Apply correction according to mouthForm
	254	// Notice that when you smile / laugh, width is increased
	255	scaled = (1 + m_cfg.mouthOpenLaughCorrection mouthForm);
	256
	257	return scaled;
	258	}
	259
	260	double FacialLandmarkDetector::calcFaceXAngle(Point landmarks[]) const
	261	{
	262	// This function will be easier to understand if you refer to the
	263	// diagram in faceXAngle.png
	264
	265	// Construct the y-axis using (1) average of four points on the nose and
	266	// (2) average of five points on the upper lip.
	267
	268	auto y0 = centroid(landmarks[27], landmarks[28], landmarks[29],
	269	landmarks[30]);
	270	auto y1 = centroid(landmarks[48], landmarks[49], landmarks[50],
	271	landmarks[51], landmarks[52]);
	272
	273	// Now drop a perpedicular from the left and right edges of the face,
	274	// and calculate the ratio between the lengths of these perpendiculars
	275
	276	auto left = centroid(landmarks[14], landmarks[15], landmarks[16]);
	277	auto right = centroid(landmarks[0], landmarks[1], landmarks[2]);
	278
	279	// Constructing a perpendicular:
	280	// Join the left/right point and the upper lip. The included angle
	281	// can now be determined using cosine rule.
	282	// Then sine of this angle is the perpendicular divided by the newly
	283	// created line.
	284	double opp = dist(right, y0);
	285	double adj1 = dist(y0, y1);
	286	double adj2 = dist(y1, right);
	287	double angle = solveCosineRuleAngle(opp, adj1, adj2);
	288	double perpRight = adj2 * std::sin(angle);
	289
	290	opp = dist(left, y0);
	291	adj2 = dist(y1, left);
	292	angle = solveCosineRuleAngle(opp, adj1, adj2);
	293	double perpLeft = adj2 * std::sin(angle);
	294
	295	// Model the head as a sphere and look from above.
	296	double theta = std::asin((perpRight - perpLeft) / (perpRight + perpLeft));
	297
	298	theta = radToDeg(theta);
	299	if (theta < -30) theta = -30;
	300	if (theta > 30) theta = 30;
	301	return theta;
	302	}
	303
	304	double FacialLandmarkDetector::calcFaceYAngle(Point landmarks[], double faceXAngle, double mouthForm) const
	305	{
	306	// Use the nose
	307	// angle between the two left/right points and the tip
	308	double c = dist(landmarks[31], landmarks[35]);
	309	double a = dist(landmarks[30], landmarks[31]);
	310	double b = dist(landmarks[30], landmarks[35]);
	311
	312	double angle = solveCosineRuleAngle(c, a, b);
	313
	314	// This probably varies a lot from person to person...
	315
	316	// Best is probably to work out some trigonometry again,
	317	// but just linear interpolation seems to work ok...
	318
	319	// Correct for X rotation
	320	double corrAngle = angle * (1 + (std::abs(faceXAngle) / 30
	321	* m_cfg.faceYAngleXRotCorrection));
	322
	323	// Correct for smiles / laughs - this increases the angle
	324	corrAngle = (1 - mouthForm m_cfg.faceYAngleSmileCorrection);
	325
	326	if (corrAngle >= m_cfg.faceYAngleZeroValue)
	327	{
	328	return -30 * linearScale01(corrAngle,
	329	m_cfg.faceYAngleZeroValue,
	330	m_cfg.faceYAngleDownThreshold,
	331	false, false);
	332	}
	333	else
	334	{
	335	return 30 * (1 - linearScale01(corrAngle,
	336	m_cfg.faceYAngleUpThreshold,
	337	m_cfg.faceYAngleZeroValue,
	338	false, false));
	339	}
	340	}
	341
	342	double FacialLandmarkDetector::calcFaceZAngle(Point landmarks[]) const
	343	{
	344	// Use average of eyes and nose
	345
	346	auto eyeRight = centroid(landmarks[36], landmarks[37], landmarks[38],
	347	landmarks[39], landmarks[40], landmarks[41]);
	348	auto eyeLeft = centroid(landmarks[42], landmarks[43], landmarks[44],
	349	landmarks[45], landmarks[46], landmarks[47]);
	350
	351	auto noseLeft = landmarks[35];
	352	auto noseRight = landmarks[31];
	353
	354	double eyeYDiff = eyeRight.y - eyeLeft.y;
	355	double eyeXDiff = eyeRight.x - eyeLeft.x;
	356
	357	double angle1 = std::atan(eyeYDiff / eyeXDiff);
	358
	359	double noseYDiff = noseRight.y - noseLeft.y;
	360	double noseXDiff = noseRight.x - noseLeft.x;
	361
	362	double angle2 = std::atan(noseYDiff / noseXDiff);
	363
	364	return radToDeg((angle1 + angle2) / 2);
	365	}
	366
	367	void FacialLandmarkDetector::parseConfig(std::string cfgPath)
	368	{
	369	populateDefaultConfig();
	370	if (cfgPath != "")
	371	{
	372	std::ifstream file(cfgPath);
	373
	374	if (!file)
	375	{
	376	throw std::runtime_error("Failed to open config file");
	377	}
	378
	379	std::string line;
	380	unsigned int lineNum = 0;
	381
	382	while (std::getline(file, line))
	383	{
	384	lineNum++;
	385
	386	if (line[0] == '#')
	387	{
	388	continue;
	389	}
	390
	391	std::istringstream ss(line);
	392	std::string paramName;
	393	if (ss >> paramName)
	394	{
	395	if (paramName == "faceYAngleCorrection")
	396	{
	397	if (!(ss >> m_cfg.faceYAngleCorrection))
	398	{
	399	throwConfigError(paramName, "double",
	400	line, lineNum);
	401	}
	402	}
	403	else if (paramName == "eyeSmileEyeOpenThreshold")
	404	{
	405	if (!(ss >> m_cfg.eyeSmileEyeOpenThreshold))
	406	{
	407	throwConfigError(paramName, "double",
	408	line, lineNum);
	409	}
	410	}
	411	else if (paramName == "eyeSmileMouthFormThreshold")
	412	{
	413	if (!(ss >> m_cfg.eyeSmileMouthFormThreshold))
	414	{
	415	throwConfigError(paramName, "double",
	416	line, lineNum);
	417	}
	418	}
	419	else if (paramName == "eyeSmileMouthOpenThreshold")
	420	{
	421	if (!(ss >> m_cfg.eyeSmileMouthOpenThreshold))
	422	{
	423	throwConfigError(paramName, "double",
	424	line, lineNum);
	425	}
	426	}
	427	else if (paramName == "lateralInversion")
	428	{
	429	if (!(ss >> m_cfg.lateralInversion))
	430	{
	431	throwConfigError(paramName, "bool",
	432	line, lineNum);
	433	}
	434	}
	435	else if (paramName == "faceXAngleNumTaps")
	436	{
	437	if (!(ss >> m_cfg.faceXAngleNumTaps))
	438	{
	439	throwConfigError(paramName, "std::size_t",
	440	line, lineNum);
	441	}
	442	}
	443	else if (paramName == "faceYAngleNumTaps")
	444	{
	445	if (!(ss >> m_cfg.faceYAngleNumTaps))
	446	{
	447	throwConfigError(paramName, "std::size_t",
	448	line, lineNum);
	449	}
	450	}
	451	else if (paramName == "faceZAngleNumTaps")
	452	{
	453	if (!(ss >> m_cfg.faceZAngleNumTaps))
	454	{
	455	throwConfigError(paramName, "std::size_t",
	456	line, lineNum);
	457	}
	458	}
	459	else if (paramName == "mouthFormNumTaps")
	460	{
	461	if (!(ss >> m_cfg.mouthFormNumTaps))
	462	{
	463	throwConfigError(paramName, "std::size_t",
	464	line, lineNum);
	465	}
	466	}
	467	else if (paramName == "mouthOpenNumTaps")
	468	{
	469	if (!(ss >> m_cfg.mouthOpenNumTaps))
	470	{
	471	throwConfigError(paramName, "std::size_t",
	472	line, lineNum);
	473	}
	474	}
	475	else if (paramName == "leftEyeOpenNumTaps")
	476	{
	477	if (!(ss >> m_cfg.leftEyeOpenNumTaps))
	478	{
	479	throwConfigError(paramName, "std::size_t",
	480	line, lineNum);
	481	}
	482	}
	483	else if (paramName == "rightEyeOpenNumTaps")
	484	{
	485	if (!(ss >> m_cfg.rightEyeOpenNumTaps))
	486	{
	487	throwConfigError(paramName, "std::size_t",
	488	line, lineNum);
	489	}
	490	}
	491	else if (paramName == "eyeClosedThreshold")
	492	{
	493	if (!(ss >> m_cfg.eyeClosedThreshold))
	494	{
	495	throwConfigError(paramName, "double",
	496	line, lineNum);
	497	}
	498	}
	499	else if (paramName == "eyeOpenThreshold")
	500	{
	501	if (!(ss >> m_cfg.eyeOpenThreshold))
	502	{
	503	throwConfigError(paramName, "double",
	504	line, lineNum);
	505	}
	506	}
	507	else if (paramName == "mouthNormalThreshold")
	508	{
	509	if (!(ss >> m_cfg.mouthNormalThreshold))
	510	{
	511	throwConfigError(paramName, "double",
	512	line, lineNum);
	513	}
	514	}
	515	else if (paramName == "mouthSmileThreshold")
	516	{
	517	if (!(ss >> m_cfg.mouthSmileThreshold))
	518	{
	519	throwConfigError(paramName, "double",
	520	line, lineNum);
	521	}
	522	}
	523	else if (paramName == "mouthClosedThreshold")
	524	{
	525	if (!(ss >> m_cfg.mouthClosedThreshold))
	526	{
	527	throwConfigError(paramName, "double",
	528	line, lineNum);
	529	}
	530	}
	531	else if (paramName == "mouthOpenThreshold")
	532	{
	533	if (!(ss >> m_cfg.mouthOpenThreshold))
	534	{
	535	throwConfigError(paramName, "double",
	536	line, lineNum);
	537	}
	538	}
	539	else if (paramName == "mouthOpenLaughCorrection")
	540	{
	541	if (!(ss >> m_cfg.mouthOpenLaughCorrection))
	542	{
	543	throwConfigError(paramName, "double",
	544	line, lineNum);
	545	}
	546	}
	547	else if (paramName == "faceYAngleXRotCorrection")
	548	{
	549	if (!(ss >> m_cfg.faceYAngleXRotCorrection))
	550	{
	551	throwConfigError(paramName, "double",
	552	line, lineNum);
	553	}
	554	}
	555	else if (paramName == "faceYAngleSmileCorrection")
	556	{
	557	if (!(ss >> m_cfg.faceYAngleSmileCorrection))
	558	{
	559	throwConfigError(paramName, "double",
	560	line, lineNum);
	561	}
	562	}
	563	else if (paramName == "faceYAngleZeroValue")
	564	{
	565	if (!(ss >> m_cfg.faceYAngleZeroValue))
	566	{
	567	throwConfigError(paramName, "double",
	568	line, lineNum);
	569	}
	570	}
	571	else if (paramName == "faceYAngleUpThreshold")
	572	{
	573	if (!(ss >> m_cfg.faceYAngleUpThreshold))
	574	{
	575	throwConfigError(paramName, "double",
	576	line, lineNum);
	577	}
	578	}
	579	else if (paramName == "faceYAngleDownThreshold")
	580	{
	581	if (!(ss >> m_cfg.faceYAngleDownThreshold))
	582	{
	583	throwConfigError(paramName, "double",
	584	line, lineNum);
	585	}
	586	}
	587	else if (paramName == "autoBlink")
	588	{
	589	if (!(ss >> m_cfg.autoBlink))
	590	{
	591	throwConfigError(paramName, "bool",
	592	line, lineNum);
	593	}
	594	}
	595	else if (paramName == "autoBreath")
	596	{
	597	if (!(ss >> m_cfg.autoBreath))
	598	{
	599	throwConfigError(paramName, "bool",
	600	line, lineNum);
	601	}
	602	}
	603	else if (paramName == "randomMotion")
	604	{
	605	if (!(ss >> m_cfg.randomMotion))
	606	{
	607	throwConfigError(paramName, "bool",
	608	line, lineNum);
	609	}
	610	}
	611	else
	612	{
	613	std::ostringstream oss;
	614	oss << "Unrecognized parameter name at line " << lineNum
	615	<< ": " << paramName;
	616	throw std::runtime_error(oss.str());
	617	}
	618	}
	619	}
	620	}
	621	}
	622
	623	void FacialLandmarkDetector::populateDefaultConfig(void)
	624	{
	625	// These are values that I've personally tested to work OK for my face.
	626	// Your milage may vary - hence the config file.
	627
	628	m_cfg.faceYAngleCorrection = 10;
	629	m_cfg.eyeSmileEyeOpenThreshold = 0.6;
	630	m_cfg.eyeSmileMouthFormThreshold = 0.75;
	631	m_cfg.eyeSmileMouthOpenThreshold = 0.5;
	632	m_cfg.lateralInversion = true;
	633	m_cfg.faceXAngleNumTaps = 11;
	634	m_cfg.faceYAngleNumTaps = 11;
	635	m_cfg.faceZAngleNumTaps = 11;
	636	m_cfg.mouthFormNumTaps = 3;
	637	m_cfg.mouthOpenNumTaps = 3;
	638	m_cfg.leftEyeOpenNumTaps = 3;
	639	m_cfg.rightEyeOpenNumTaps = 3;
	640	m_cfg.eyeClosedThreshold = 0.2;
	641	m_cfg.eyeOpenThreshold = 0.25;
	642	m_cfg.mouthNormalThreshold = 0.75;
	643	m_cfg.mouthSmileThreshold = 1.0;
	644	m_cfg.mouthClosedThreshold = 0.1;
	645	m_cfg.mouthOpenThreshold = 0.4;
	646	m_cfg.mouthOpenLaughCorrection = 0.2;
	647	m_cfg.faceYAngleXRotCorrection = 0.15;
	648	m_cfg.faceYAngleSmileCorrection = 0.075;
	649	m_cfg.faceYAngleZeroValue = 1.8;
	650	m_cfg.faceYAngleDownThreshold = 2.3;
	651	m_cfg.faceYAngleUpThreshold = 1.3;
	652	m_cfg.autoBlink = false;
	653	m_cfg.autoBreath = false;
	654	m_cfg.randomMotion = false;
	655	}
	656
	657	void FacialLandmarkDetector::throwConfigError(std::string paramName,
	658	std::string expectedType,
	659	std::string line,
	660	unsigned int lineNum)
	661	{
	662	std::ostringstream ss;
	663	ss << "Error parsing config file for parameter " << paramName
	664	<< "\nAt line " << lineNum << ": " << line
	665	<< "\nExpecting value of type " << expectedType;
	666
	667	throw std::runtime_error(ss.str());
	668	}
	669