From cb483d3b7886b05435837a83ec6d2ab9987aa7ea Mon Sep 17 00:00:00 2001 From: Adrian Iain Lam Date: Sat, 29 May 2021 01:43:13 +0100 Subject: [PATCH] Use OpenSeeFace to control Live2D model --- config.txt | 29 ++++--- include/facial_landmark_detector.h | 19 +++-- src/facial_landmark_detector.cpp | 151 +++++++++++++++++++++++++++---------- src/math_utils.h | 6 +- 4 files changed, 145 insertions(+), 60 deletions(-) diff --git a/config.txt b/config.txt index 9830251..d9cc7b4 100644 --- a/config.txt +++ b/config.txt @@ -8,6 +8,10 @@ # very first character of the line, i.e. without any preceeding whitespace. +## Section 0: OpenSeeFace connection parameters +osfIpAddress 127.0.0.1 +osfPort 11573 + ## Section 1: Cubism params calculation control # # These values control how the facial landmarks are translated into @@ -15,9 +19,6 @@ # to person. The following values seem to work OK for my face, but # your milage may vary. -# If 1, laterally invert the image (create a mirror image); if 0, don't invert -lateralInversion 1 - # Section 1.0: Live2D automatic functionality # Set 1 to enable, 0 to disable. # If these are set, the automatic functionality in Live2D will be enabled. @@ -60,16 +61,13 @@ faceYAngleSmileCorrection 0.075 # Section 1.2: Eye control # This is mainly calculated based on the eye aspect ratio (eye height -# divided by eye width). Note that currently an average of the values -# of both eyes is applied - mainly due to two reasons: (1) the dlib -# dataset I'm using fails to detect winks for me, and (2) if this is -# not done, I frequently get asynchronous blinks which just looks ugly. +# divided by eye width). # Maximum eye aspect ratio when the eye is closed -eyeClosedThreshold 0.2 +eyeClosedThreshold 0.18 # Minimum eye aspect ratio when the eye is open -eyeOpenThreshold 0.25 +eyeOpenThreshold 0.21 # Max eye aspect ratio to switch to a closed "smiley eye" eyeSmileEyeOpenThreshold 0.6 @@ -82,6 +80,13 @@ eyeSmileMouthFormThreshold 0.75 # "Mouth open" is 1 when fully open, and 0 when closed eyeSmileMouthOpenThreshold 0.5 +# Enable winks (experimental) +# Winks may or may not work well on your face, depending on the dataset. +# If all you get is ugly asynchronous blinks, consider setting this to +# zero instead. +# Also, this seems to not work very well when wearing glasses. +winkEnable 1 + # Section 1.3: Mouth control # Two parameters are passed to Cubism to control the mouth: @@ -118,9 +123,9 @@ mouthOpenLaughCorrection 0.2 # but it will also cause more lag between your movement and the movement # of the avatar, and quick movements (e.g. blinks) may be completely missed. -faceXAngleNumTaps 11 -faceYAngleNumTaps 11 -faceZAngleNumTaps 11 +faceXAngleNumTaps 7 +faceYAngleNumTaps 7 +faceZAngleNumTaps 7 mouthFormNumTaps 3 mouthOpenNumTaps 3 leftEyeOpenNumTaps 3 diff --git a/include/facial_landmark_detector.h b/include/facial_landmark_detector.h index 85f3512..3956c2f 100644 --- a/include/facial_landmark_detector.h +++ b/include/facial_landmark_detector.h @@ -1,10 +1,10 @@ // -*- mode: c++ -*- -#ifndef __FACIAL_LANDMARK_DETECTOR_H__ -#define __FACIAL_LANDMARK_DETECTOR_H__ +#ifndef FACIAL_LANDMARK_DETECTOR_H +#define FACIAL_LANDMARK_DETECTOR_H /**** -Copyright (c) 2020 Adrian I. Lam +Copyright (c) 2020-2021 Adrian I. Lam Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -58,11 +58,12 @@ public: bool autoBreath; bool randomMotion; // TODO eyebrows currently not supported... - // I'd like to include them, but the dlib detection is very + // I'd like to include them, but the dlib / OSF detection is very // noisy and inaccurate (at least for my face). }; FacialLandmarkDetector(std::string cfgPath); + ~FacialLandmarkDetector(); Params getParams(void) const; @@ -71,6 +72,9 @@ public: void mainLoop(void); private: + FacialLandmarkDetector(const FacialLandmarkDetector&) = delete; + FacialLandmarkDetector& operator=(const FacialLandmarkDetector &) = delete; + enum LeftRight : bool { LEFT, @@ -79,6 +83,9 @@ private: bool m_stop; + int m_sock; + static const int m_faceId = 0; // Only support one face for now + double calcEyeAspectRatio(Point& p1, Point& p2, Point& p3, Point& p4, Point& p5, Point& p6) const; @@ -115,11 +122,12 @@ private: struct Config { + std::string osfIpAddress; + int osfPort; double faceYAngleCorrection; double eyeSmileEyeOpenThreshold; double eyeSmileMouthFormThreshold; double eyeSmileMouthOpenThreshold; - bool lateralInversion; std::size_t faceXAngleNumTaps; std::size_t faceYAngleNumTaps; std::size_t faceZAngleNumTaps; @@ -139,6 +147,7 @@ private: double faceYAngleZeroValue; double faceYAngleUpThreshold; double faceYAngleDownThreshold; + bool winkEnable; bool autoBlink; bool autoBreath; bool randomMotion; diff --git a/src/facial_landmark_detector.cpp b/src/facial_landmark_detector.cpp index da24999..b953240 100644 --- a/src/facial_landmark_detector.cpp +++ b/src/facial_landmark_detector.cpp @@ -1,5 +1,5 @@ /**** -Copyright (c) 2020 Adrian I. Lam +Copyright (c) 2020-2021 Adrian I. Lam Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -26,6 +26,12 @@ SOFTWARE. #include #include +#include +#include +#include +#include +#include + #include "facial_landmark_detector.h" #include "math_utils.h" @@ -45,7 +51,27 @@ FacialLandmarkDetector::FacialLandmarkDetector(std::string cfgPath) { parseConfig(cfgPath); - // TODO setup UDP connection here? + struct sockaddr_in addr; + addr.sin_family = AF_INET; + addr.sin_port = htons(m_cfg.osfPort); + addr.sin_addr.s_addr = inet_addr(m_cfg.osfIpAddress.c_str()); + + m_sock = socket(AF_INET, SOCK_DGRAM, IPPROTO_UDP); + if (m_sock < 0) + { + throw std::runtime_error("Cannot create UDP socket"); + } + + int ret = bind(m_sock, (struct sockaddr *)&addr, sizeof addr); + if (ret != 0) + { + throw std::runtime_error("Cannot bind socket"); + } +} + +FacialLandmarkDetector::~FacialLandmarkDetector() +{ + close(m_sock); } FacialLandmarkDetector::Params FacialLandmarkDetector::getParams(void) const @@ -61,26 +87,34 @@ FacialLandmarkDetector::Params FacialLandmarkDetector::getParams(void) const double leftEye = avg(m_leftEyeOpenness, 1); double rightEye = avg(m_rightEyeOpenness, 1); - // Just combine the two to get better synchronized blinks - // This effectively disables winks, so if we want to - // support winks in the future (see below) we will need - // a better way to handle this out-of-sync blinks. - double bothEyes = (leftEye + rightEye) / 2; - leftEye = bothEyes; - rightEye = bothEyes; - // Detect winks and make them look better - // Commenting out - winks are difficult to be detected by the - // dlib data set anyway... maybe in the future we can - // add a runtime option to enable/disable... - /*if (right == 0 && left > 0.2) + bool sync = !m_cfg.winkEnable; + + if (m_cfg.winkEnable) { - left = 1; + if (rightEye < 0.1 && leftEye > 0.2) + { + leftEye = 1; + rightEye = 0; + } + else if (leftEye < 0.1 && rightEye > 0.2) + { + leftEye = 0; + rightEye = 1; + } + else + { + sync = true; + } } - else if (left == 0 && right > 0.2) + + if (sync) { - right = 1; + // Combine the two to get better synchronized blinks + double bothEyes = (leftEye + rightEye) / 2; + leftEye = bothEyes; + rightEye = bothEyes; } - */ + params.leftEyeOpenness = leftEye; params.rightEyeOpenness = rightEye; @@ -114,14 +148,34 @@ void FacialLandmarkDetector::mainLoop(void) { while (!m_stop) { - if (m_cfg.lateralInversion) - { - // TODO is it something we can do here? Or in OSF only? - } + // Read UDP packet from OSF + static const int nPoints = 68; + static const int packetFrameSize = 8 + 4 + 2 * 4 + 2 * 4 + 1 + 4 + 3 * 4 + 3 * 4 + + 4 * 4 + 4 * 68 + 4 * 2 * 68 + 4 * 3 * 70 + 4 * 14; + + static const int landmarksOffset = 8 + 4 + 2 * 4 + 2 * 4 + 1 + 4 + 3 * 4 + 3 * 4 + + 4 * 4 + 4 * 68; - // TODO get the array of landmark coordinates here - Point landmarks[68]; + uint8_t buf[packetFrameSize]; + ssize_t recvSize = recv(m_sock, buf, sizeof buf, 0); + if (recvSize != packetFrameSize) continue; + // Note: This is dependent on endianness, and we would assume that + // the OSF instance is run on a machine with the same endianness + // as our current machine. + int recvFaceId = *(int *)(buf + 8); + if (recvFaceId != m_faceId) continue; // We only support one face + + Point landmarks[nPoints]; + + for (int i = 0; i < nPoints; i++) + { + float x = *(float *)(buf + landmarksOffset + i * 2 * sizeof(float)); + float y = *(float *)(buf + landmarksOffset + (i * 2 + 1) * sizeof(float)); + + landmarks[i].x = x; + landmarks[i].y = y; + } /* The coordinates seem to be rather noisy in general. * We will push everything through some moving average filters @@ -157,7 +211,8 @@ void FacialLandmarkDetector::mainLoop(void) double eyeRightOpen = calcEyeOpenness(RIGHT, landmarks, faceYRot); filterPush(m_rightEyeOpenness, eyeRightOpen, m_cfg.rightEyeOpenNumTaps); - // TODO eyebrows? + // Eyebrows: the landmark detection doesn't work very well for my face, + // so I've not implemented them. } } @@ -392,7 +447,23 @@ void FacialLandmarkDetector::parseConfig(std::string cfgPath) std::string paramName; if (ss >> paramName) { - if (paramName == "faceYAngleCorrection") + if (paramName == "osfIpAddress") + { + if (!(ss >> m_cfg.osfIpAddress)) + { + throwConfigError(paramName, "std::string", + line, lineNum); + } + } + else if (paramName == "osfPort") + { + if (!(ss >> m_cfg.osfPort)) + { + throwConfigError(paramName, "int", + line, lineNum); + } + } + else if (paramName == "faceYAngleCorrection") { if (!(ss >> m_cfg.faceYAngleCorrection)) { @@ -424,14 +495,6 @@ void FacialLandmarkDetector::parseConfig(std::string cfgPath) line, lineNum); } } - else if (paramName == "lateralInversion") - { - if (!(ss >> m_cfg.lateralInversion)) - { - throwConfigError(paramName, "bool", - line, lineNum); - } - } else if (paramName == "faceXAngleNumTaps") { if (!(ss >> m_cfg.faceXAngleNumTaps)) @@ -504,6 +567,14 @@ void FacialLandmarkDetector::parseConfig(std::string cfgPath) line, lineNum); } } + else if (paramName == "winkEnable") + { + if (!(ss >> m_cfg.winkEnable)) + { + throwConfigError(paramName, "bool", + line, lineNum); + } + } else if (paramName == "mouthNormalThreshold") { if (!(ss >> m_cfg.mouthNormalThreshold)) @@ -629,16 +700,16 @@ void FacialLandmarkDetector::populateDefaultConfig(void) m_cfg.eyeSmileEyeOpenThreshold = 0.6; m_cfg.eyeSmileMouthFormThreshold = 0.75; m_cfg.eyeSmileMouthOpenThreshold = 0.5; - m_cfg.lateralInversion = true; - m_cfg.faceXAngleNumTaps = 11; - m_cfg.faceYAngleNumTaps = 11; - m_cfg.faceZAngleNumTaps = 11; + m_cfg.faceXAngleNumTaps = 7; + m_cfg.faceYAngleNumTaps = 7; + m_cfg.faceZAngleNumTaps = 7; m_cfg.mouthFormNumTaps = 3; m_cfg.mouthOpenNumTaps = 3; m_cfg.leftEyeOpenNumTaps = 3; m_cfg.rightEyeOpenNumTaps = 3; - m_cfg.eyeClosedThreshold = 0.2; - m_cfg.eyeOpenThreshold = 0.25; + m_cfg.eyeClosedThreshold = 0.18; + m_cfg.eyeOpenThreshold = 0.21; + m_cfg.winkEnable = true; m_cfg.mouthNormalThreshold = 0.75; m_cfg.mouthSmileThreshold = 1.0; m_cfg.mouthClosedThreshold = 0.1; diff --git a/src/math_utils.h b/src/math_utils.h index f6b3d19..24e53ab 100644 --- a/src/math_utils.h +++ b/src/math_utils.h @@ -1,10 +1,10 @@ // -*- mode: c++ -*- -#ifndef __FACE_DETECTOR_MATH_UTILS_H__ -#define __FACE_DETECTOR_MATH_UTILS_H__ +#ifndef FACE_DETECTOR_MATH_UTILS_H +#define FACE_DETECTOR_MATH_UTILS_H /**** -Copyright (c) 2020 Adrian I. Lam +Copyright (c) 2020-2021 Adrian I. Lam Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal -- 2.7.4