이전에 파이썬으로 블라즈핸드 돌렸는데
이번에는 C++로 적용
아래는 블라즈팜, 블라즈핸즈 돌려보는 단일 코드
#include <opencv2/opencv.hpp>
#include <vector>
std::vector<std::array<int, 2>> HAND_CONNECTIONS = {
{0, 1}, {1, 2}, {2, 3}, {3, 4},
{5, 6}, {6, 7}, {7, 8},
{9, 10}, {10, 11}, {11, 12},
{13, 14}, {14, 15}, {15, 16},
{17, 18}, {18, 19}, {19, 20},
{0, 5}, {5, 9}, {9, 13}, {13, 17}, {0, 17}
};
// var and funcs for blazepalm
struct PalmDetection {
float ymin;
float xmin;
float ymax;
float xmax;
cv::Point2d kp_arr[7];
float score;
};
int blazeHandSize = 256;
int blazePalmSize = 128;
float palmMinScoreThresh = 0.7;
float palmMinNMSThresh = 0.4;
int palmMinNumKeyPoints = 7;
void ResizeAndPad(
cv::Mat& srcimg, cv::Mat& img256,
cv::Mat& img128, float& scale, cv::Scalar& pad
);
std::vector<PalmDetection> PredictPalmDetections(cv::Mat& img, cv::Mat& frame);
PalmDetection GetPalmDetection(cv::Mat regressor, cv::Mat classificator,
int stride, int anchor_count, int column, int row, int anchor, int offset);
float sigmoid(float x);
std::vector<PalmDetection> DenormalizePalmDetections(std::vector<PalmDetection> detections, int width, int height, cv::Scalar pad);
void DrawPalmDetections(cv::Mat& img, std::vector<PalmDetection> denormDets);
std::vector<PalmDetection> FilteringDets(std::vector<PalmDetection> detections, int width, int height);
std::vector<cv::Rect> convertHandRects(std::vector<PalmDetection> filteredDets);
void DrawRects(cv::Mat& img, std::vector<cv::Rect> rects);
void GetHandImages(cv::Mat& img, std::vector<cv::Rect> rects, std::vector<cv::Mat>& handImgs);
std::vector<cv::Mat> PredictHandDetections(std::vector<cv::Mat>& imgs);
std::vector<cv::Mat> DenormalizeHandLandmarks(std::vector<cv::Mat> imgs_landmarks, std::vector<cv::Rect> rects);
void DrawHandDetections(cv::Mat img, std::vector<cv::Mat> denorm_imgs_landmarks);
cv::dnn::Net blazePalm = cv::dnn::readNetFromONNX("c:/blazepalm_old.onnx");
cv::dnn::Net blazeHand = cv::dnn::readNetFromONNX("c:/blazehand.onnx");
float blazeHandDScale = 3;
int webcamWidth = 640;
int webcamHeight = 480;
cv::Mat img256;
cv::Mat img128;
float scale;
cv::Scalar pad;
int main() {
cv::Mat frame;
cv::VideoCapture cap(0);
if (!cap.isOpened()) {
std::cout << "Failed to open camera" << std::endl;
return -1;
}
while (true) {
bool ret = cap.read(frame);
if (!ret)
{
break;
}
ResizeAndPad(frame, img256, img128, scale, pad);
//UE_LOG(LogTemp, Log, TEXT("scale value: %f, pad value: (%f, %f)"), scale, pad[0], pad[1]);
std::vector<PalmDetection> normDets = PredictPalmDetections(img128, frame);
std::vector<PalmDetection> denormDets = DenormalizePalmDetections(normDets, webcamWidth, webcamHeight, pad);
std::vector<PalmDetection> filteredDets = FilteringDets(denormDets, webcamWidth, webcamHeight);
std::vector<cv::Rect> handRects = convertHandRects(filteredDets);
std::vector<cv::Mat> handImgs;
GetHandImages(frame, handRects, handImgs);
std::vector<cv::Mat> imgs_landmarks = PredictHandDetections(handImgs);
std::vector<cv::Mat> denorm_imgs_landmarks = DenormalizeHandLandmarks(imgs_landmarks, handRects);
std::string dets_size_str = "filtered dets : " + std::to_string(filteredDets.size()) + ", norm dets : " + std::to_string(normDets.size()) + ", denorm dets : " + std::to_string(denormDets.size());
cv::putText(frame, dets_size_str, cv::Point(30, 30), cv::FONT_HERSHEY_SIMPLEX, 0.5, cv::Scalar(255, 0, 0), 2);
DrawRects(frame, handRects);
DrawPalmDetections(frame, filteredDets);
DrawHandDetections(frame, denorm_imgs_landmarks);
cv::imshow("Camera Streaming", frame);
cv::imshow("img256", img256);
cv::imshow("img128", img128);
if (cv::waitKey(1) == 'q') {
break;
}
}
cap.release();
cv::destroyAllWindows();
return 0;
}
void ResizeAndPad(
cv::Mat& srcimg, cv::Mat& img256,
cv::Mat& img128, float& scale, cv::Scalar& pad
)
{
float h1, w1;
int padw, padh;
cv::Size size0 = srcimg.size();
if (size0.height >= size0.width) {
h1 = 256;
w1 = 256 * size0.width / size0.height;
padh = 0;
padw = static_cast<int>(256 - w1);
scale = size0.width / static_cast<float>(w1);
}
else {
h1 = 256 * size0.height / size0.width;
w1 = 256;
padh = static_cast<int>(256 - h1);
padw = 0;
scale = size0.height / static_cast<float>(h1);
}
//UE_LOG(LogTemp, Log, TEXT("scale value: %f, size0.height: %d, size0.width : %d, h1 : %f"), scale, size0.height, size0.width, h1);
int padh1 = static_cast<int>(padh / 2);
int padh2 = static_cast<int>(padh / 2) + static_cast<int>(padh % 2);
int padw1 = static_cast<int>(padw / 2);
int padw2 = static_cast<int>(padw / 2) + static_cast<int>(padw % 2);
pad = cv::Scalar(static_cast<float>(padh1 * scale), static_cast<float>(padw1 * scale));
cv::resize(srcimg, img256, cv::Size(w1, h1));
cv::copyMakeBorder(img256, img256, padh1, padh2, padw1, padw2, cv::BORDER_CONSTANT, cv::Scalar(0, 0, 0));
cv::resize(img256, img128, cv::Size(128, 128));
cv::cvtColor(img256, img256, cv::COLOR_BGR2RGB);
cv::cvtColor(img128, img128, cv::COLOR_BGR2RGB);
}
std::vector<PalmDetection> PredictPalmDetections(cv::Mat& img, cv::Mat& frame)
{
std::vector<PalmDetection> beforeNMSResults;
std::vector<PalmDetection> afterNMSResults;
std::vector<float> scores;
std::vector<int> indices;
std::vector<cv::Rect> boundingBoxes;
cv::Mat tensor;
img.convertTo(tensor, CV_32F);
tensor = tensor / 127.5 - 1;
cv::Mat blob = cv::dnn::blobFromImage(tensor, 1.0, tensor.size(), 0, false, false, CV_32F);
std::vector<cv::String> outNames(2);
outNames[0] = "regressors";
outNames[1] = "classificators";
blazePalm.setInput(blob);
std::vector<cv::Mat> outputs;
blazePalm.forward(outputs, outNames);
cv::Mat classificator = outputs[0];
cv::Mat regressor = outputs[1];
for (int y = 0; y < 16; ++y) {
for (int x = 0; x < 16; ++x) {
for (int a = 0; a < 2; ++a) {
PalmDetection res = GetPalmDetection(regressor, classificator, 8, 2, x, y, a, 0);
if (res.score != 0)
{
beforeNMSResults.push_back(res);
cv::Point2d startPt = cv::Point2d(res.xmin * 128, res.ymin * 128);
cv::Point2d endPt = cv::Point2d(res.xmax * 128, res.ymax * 128);
boundingBoxes.push_back(cv::Rect(startPt, endPt));
scores.push_back(res.score);
}
}
}
}
for (int y = 0; y < 8; ++y) {
for (int x = 0; x < 8; ++x) {
for (int a = 0; a < 6; ++a) {
PalmDetection res = GetPalmDetection(regressor, classificator, 16, 6, x, y, a, 512);
if (res.score != 0)
{
beforeNMSResults.push_back(res);
cv::Point2d startPt = cv::Point2d(res.xmin * 128, res.ymin * 128);
cv::Point2d endPt = cv::Point2d(res.xmax * 128, res.ymax * 128);
boundingBoxes.push_back(cv::Rect(startPt, endPt));
scores.push_back(res.score);
}
}
}
}
std::vector<PalmDetection> denormBeforeNMSResults = DenormalizePalmDetections(beforeNMSResults, 640, 480, cv::Scalar(80, 0));
for (auto& denormBeforeNMSResult : denormBeforeNMSResults)
{
cv::Point2d startPt = cv::Point2d(denormBeforeNMSResult.xmin, denormBeforeNMSResult.ymin);
cv::Point2d endPt = cv::Point2d(denormBeforeNMSResult.xmax, denormBeforeNMSResult.ymax);
//cv::rectangle(frame, cv::Rect(startPt, endPt), cv::Scalar(0, 0, 255), 1);
std::string score_str = std::to_string(static_cast<int>(denormBeforeNMSResult.score * 100));
//cv::putText(frame, score_str, cv::Point(startPt.x, startPt.y - 10), cv::FONT_HERSHEY_SIMPLEX, 0.4, cv::Scalar(0, 0, 255), 2);
/*
for (int i = 0; i < palmMinNumKeyPoints; i++)
cv::circle(frame, denormBeforeNMSResult.kp_arr[i], 3, cv::Scalar(0, 0, 255), -1);
*/
}
cv::dnn::NMSBoxes(boundingBoxes, scores, palmMinScoreThresh, palmMinNMSThresh, indices);
for (int i = 0; i < indices.size(); i++) {
int idx = indices[i];
afterNMSResults.push_back(beforeNMSResults[idx]);
}
std::vector<PalmDetection> denormAfterNMSResults = DenormalizePalmDetections(afterNMSResults, 640, 480, cv::Scalar(80, 0));
for (auto& denormAfterNMSResult : denormAfterNMSResults)
{
cv::Point2d startPt = cv::Point2d(denormAfterNMSResult.xmin, denormAfterNMSResult.ymin);
cv::Point2d endPt = cv::Point2d(denormAfterNMSResult.xmax, denormAfterNMSResult.ymax);
cv::rectangle(frame, cv::Rect(startPt, endPt), cv::Scalar(0, 255, 255), 2);
for (int i = 0; i < palmMinNumKeyPoints; i++)
cv::circle(frame, denormAfterNMSResult.kp_arr[i], 6, cv::Scalar(0, 255, 255), -1);
}
return afterNMSResults;
}
PalmDetection GetPalmDetection(cv::Mat regressor, cv::Mat classificator,
int stride, int anchor_count, int column, int row, int anchor, int offset) {
PalmDetection res;
int index = (int(row * 128 / stride) + column) * anchor_count + anchor + offset;
float origin_score = regressor.at<float>(0, index, 0);
float score = sigmoid(origin_score);
if (score < palmMinScoreThresh) return res;
float x = classificator.at<float>(0, index, 0);
float y = classificator.at<float>(0, index, 1);
float w = classificator.at<float>(0, index, 2);
float h = classificator.at<float>(0, index, 3);
x += (column + 0.5) * stride - w / 2;
y += (row + 0.5) * stride - h / 2;
res.ymin = (y) / blazePalmSize;
res.xmin = (x) / blazePalmSize;
res.ymax = (y + h) / blazePalmSize;
res.xmax = (x + w) / blazePalmSize;
//std::cout << "score : " << score << ", coord : " << res.ymin << ", " << res.xmin << ", " << res.ymax << ", " << res.xmax << std::endl;
//if ((res.ymin < 0) || (res.xmin < 0) || (res.xmax > 1) || (res.ymax > 1)) return res;
res.score = score;
std::vector<cv::Point2d> kpts;
for (int key_id = 0; key_id < palmMinNumKeyPoints; key_id++)
{
float kpt_x = classificator.at<float>(0, index, 4 + key_id * 2);
float kpt_y = classificator.at<float>(0, index, 5 + key_id * 2);
kpt_x += (column + 0.5) * stride;
kpt_x = kpt_x / blazePalmSize;
kpt_y += (row + 0.5) * stride;
kpt_y = kpt_y / blazePalmSize;
//UE_LOG(LogTemp, Log, TEXT("kpt id(%d) : (%f, %f)"), key_id, kpt_x, kpt_y);
res.kp_arr[key_id] = cv::Point2d(kpt_x, kpt_y);
}
return res;
}
float sigmoid(float x) {
return 1 / (1 + exp(-x));
}
std::vector<PalmDetection> DenormalizePalmDetections(std::vector<PalmDetection> detections, int width, int height, cv::Scalar pad)
{
std::vector<PalmDetection> denormDets;
int scale = 0;
if (width > height)
scale = width;
else
scale = height;
for (auto& det : detections)
{
PalmDetection denormDet;
denormDet.ymin = det.ymin * scale - pad[0];
denormDet.xmin = det.xmin * scale - pad[1];
denormDet.ymax = det.ymax * scale - pad[0];
denormDet.xmax = det.xmax * scale - pad[1];
denormDet.score = det.score;
for (int i = 0; i < palmMinNumKeyPoints; i++)
{
cv::Point2d pt_new = cv::Point2d(det.kp_arr[i].x * scale - pad[1], det.kp_arr[i].y * scale - pad[0]);
//UE_LOG(LogTemp, Log, TEXT("denorm kpt id(%d) : (%f, %f)"), i, pt_new.x, pt_new.y);
denormDet.kp_arr[i] = pt_new;
}
denormDets.push_back(denormDet);
}
return denormDets;
}
void DrawPalmDetections(cv::Mat& img, std::vector<PalmDetection> denormDets)
{
for (auto& denormDet : denormDets)
{
cv::Point2d startPt = cv::Point2d(denormDet.xmin, denormDet.ymin);
cv::Point2d endPt = cv::Point2d(denormDet.xmax, denormDet.ymax);
cv::rectangle(img, cv::Rect(startPt, endPt), cv::Scalar(255, 0, 0), 1);
for (int i = 0; i < palmMinNumKeyPoints; i++)
cv::circle(img, denormDet.kp_arr[i], 5, cv::Scalar(255, 0, 0), -1);
std::string score_str = std::to_string(static_cast<int>(denormDet.score * 100)) + "%";
cv::putText(img, score_str, cv::Point(startPt.x, startPt.y - 20), cv::FONT_HERSHEY_SIMPLEX, 0.5, cv::Scalar(0, 0, 255), 2);
}
}
std::vector<PalmDetection> FilteringDets(std::vector<PalmDetection> detections, int width, int height)
{
std::vector<PalmDetection> filteredDets;
for (auto& denormDet : detections)
{
cv::Point2d startPt = cv::Point2d(denormDet.xmin, denormDet.ymin);
if (startPt.x < 10 || startPt.y < 10)
continue;
if (startPt.x > width || startPt.y > height)
continue;
int w = denormDet.xmax - denormDet.xmin;
int y = denormDet.ymax - denormDet.ymin;
if ((w * y < 40 * 40) || (w * y > (width * 0.7) * (height * 0.7)))
continue;
filteredDets.push_back(denormDet);
}
return filteredDets;
}
std::vector<cv::Rect> convertHandRects(std::vector<PalmDetection> filteredDets)
{
std::vector<cv::Rect> handRects;
for (auto& det : filteredDets)
{
int width = det.xmax - det.xmin;
int height = det.ymax - det.ymin;
int large_length = 0;
int center_x = static_cast<int>((det.xmax + det.xmin) / 2);
int center_y = static_cast<int>((det.ymax + det.ymin) / 2);
if (width > height)
large_length = height * blazeHandDScale;
else
large_length = height * blazeHandDScale;
int start_x = center_x - static_cast<int>(large_length / 2);
int start_y = center_y - static_cast<int>(large_length / 2) - large_length * 0.1;
int end_x = center_x + static_cast<int>(large_length / 2);
int end_y = center_y + static_cast<int>(large_length / 2) - large_length * 0.1;
start_x = std::max(start_x, 0);
start_y = std::max(start_y, 0);
end_x = std::min(end_x, webcamWidth);
end_y = std::min(end_y, webcamHeight);
cv::Point2d startPt = cv::Point2d(start_x, start_y);
cv::Point2d endPt = cv::Point2d(end_x, end_y);
cv::Rect handRect(startPt, endPt);
handRects.push_back(handRect);
}
return handRects;
}
void DrawRects(cv::Mat& img, std::vector<cv::Rect> rects)
{
for (auto& rect : rects)
{
cv::rectangle(img, rect, cv::Scalar(255, 255, 255), 1);
}
}
void GetHandImages(cv::Mat& img, std::vector<cv::Rect> rects, std::vector<cv::Mat>& handImgs)
{
for (auto& rect : rects)
{
//std::cout << "img size : " << img.size() << ", rect : " << rect << std::endl;
//cv::Mat roi(cv::Size(rect.width, rect.height), img.type(), cv::Scalar(0, 0, 0))
cv::Mat roi = img(rect).clone();
cv::cvtColor(roi, roi, cv::COLOR_BGR2RGB);
cv::resize(roi, roi, cv::Size(blazeHandSize, blazeHandSize));
handImgs.push_back(roi);
}
}
std::vector<cv::Mat> PredictHandDetections(std::vector<cv::Mat>& imgs)
{
std::vector<cv::Mat> imgs_landmarks;
for (auto& img : imgs)
{
cv::Mat tensor;
img.convertTo(tensor, CV_32F);
tensor = tensor / blazeHandSize;
cv::Mat blob = cv::dnn::blobFromImage(tensor, 1.0, tensor.size(), 0, false, false, CV_32F);
std::vector<cv::String> outNames(3);
outNames[0] = "hand_flag";
outNames[1] = "handedness";
outNames[2] = "landmarks";
blazeHand.setInput(blob);
std::vector<cv::Mat> outputs;
blazeHand.forward(outputs, outNames);
cv::Mat handflag = outputs[0];
cv::Mat handedness = outputs[1];
cv::Mat landmarks = outputs[2];
imgs_landmarks.push_back(landmarks);
}
return imgs_landmarks;
}
std::vector<cv::Mat> DenormalizeHandLandmarks(std::vector<cv::Mat> imgs_landmarks, std::vector<cv::Rect> rects)
{
std::vector<cv::Mat> denorm_imgs_landmarks;
for (int i = 0; i < imgs_landmarks.size(); i++)
{
cv::Mat landmarks = imgs_landmarks.at(i);
cv::Rect rect = rects.at(i);
cv::Mat squeezed = landmarks.reshape(0, landmarks.size[1]);
//std::cout << "squeezed size: " << squeezed.size[0] << "," << squeezed.size[1] << std::endl;
for (int j = 0; j < squeezed.size[0]; j++)
{
squeezed.at<float>(j, 0) = squeezed.at<float>(j, 0) * rect.width + rect.x;
squeezed.at<float>(j, 1) = squeezed.at<float>(j, 1) * rect.height + rect.y;
squeezed.at<float>(j, 2) = squeezed.at<float>(j, 2) * rect.height * -1;
}
denorm_imgs_landmarks.push_back(squeezed);
}
//std::cout << std::endl;
return denorm_imgs_landmarks;
}
void DrawHandDetections(cv::Mat img, std::vector<cv::Mat> denorm_imgs_landmarks)
{
for (auto& denorm_landmarks : denorm_imgs_landmarks)
{
for (int i = 0; i < denorm_landmarks.size[0]; i++)
{
float x = denorm_landmarks.at<float>(i, 0);
float y = denorm_landmarks.at<float>(i, 1);
cv::Point pt(x, y);
cv::circle(img, pt, 5, cv::Scalar(0, 255, 0), -1);
}
for (auto& connection : HAND_CONNECTIONS)
{
int startPtIdx = connection[0];
int endPtIdx = connection[1];
float startPtX = denorm_landmarks.at<float>(startPtIdx, 0);
float startPtY = denorm_landmarks.at<float>(startPtIdx, 1);
float endPtX = denorm_landmarks.at<float>(endPtIdx, 0);
float endPtY = denorm_landmarks.at<float>(endPtIdx, 1);
cv::line(img, cv::Point(startPtX, startPtY), cv::Point(endPtX, endPtY), cv::Scalar(255, 255, 255), 3);
}
}
}
언리얼에 적용 결과
블라즈 해더 cpp
// Fill out your copyright notice in the Description page of Project Settings.
#pragma once
#include "PreOpenCVHeaders.h"
#include <opencv2/opencv.hpp>
#include "PostOpenCVHeaders.h"
#include "CoreMinimal.h"
/**
*
*/
class HANDDESKTOP_API Blaze
{
public:
Blaze();
~Blaze();
cv::dnn::Net blazePalm;
cv::dnn::Net blazeHand;
// var and funcs for blazepalm
struct PalmDetection {
float ymin;
float xmin;
float ymax;
float xmax;
cv::Point2d kp_arr[7];
float score;
};
int blazeHandSize = 256;
int blazePalmSize = 128;
float palmMinScoreThresh = 0.7;
float palmMinNMSThresh = 0.4;
int palmMinNumKeyPoints = 7;
void ResizeAndPad(
cv::Mat& srcimg, cv::Mat& img256,
cv::Mat& img128, float& scale, cv::Scalar& pad
);
// funcs for blaze palm
std::vector<PalmDetection> PredictPalmDetections(cv::Mat& img);
PalmDetection GetPalmDetection(cv::Mat regressor, cv::Mat classificator,
int stride, int anchor_count, int column, int row, int anchor, int offset);
float sigmoid(float x);
std::vector<PalmDetection> DenormalizePalmDetections(std::vector<PalmDetection> detections, int width, int height, cv::Scalar pad);
void DrawPalmDetections(cv::Mat& img, std::vector<Blaze::PalmDetection> denormDets);
void DrawDetsInfo(cv::Mat& img, std::vector<Blaze::PalmDetection> filteredDets, std::vector<Blaze::PalmDetection> normDets, std::vector<Blaze::PalmDetection> denormDets);
std::vector<PalmDetection> FilteringDets(std::vector<PalmDetection> detections, int width, int height);
// vars and funcs for blaze hand
std::vector<std::array<int, 2>> HAND_CONNECTIONS = {
{0, 1}, {1, 2}, {2, 3}, {3, 4},
{5, 6}, {6, 7}, {7, 8},
{9, 10}, {10, 11}, {11, 12},
{13, 14}, {14, 15}, {15, 16},
{17, 18}, {18, 19}, {19, 20},
{0, 5}, {5, 9}, {9, 13}, {13, 17}, {0, 17}
};
float blazeHandDScale = 3;
int webcamWidth = 640;
int webcamHeight = 480;
std::vector<cv::Rect> convertHandRects(std::vector<PalmDetection> filteredDets);
void DrawRects(cv::Mat& img, std::vector<cv::Rect> rects);
void GetHandImages(cv::Mat& img, std::vector<cv::Rect> rects, std::vector<cv::Mat>& handImgs);
std::vector<cv::Mat> PredictHandDetections(std::vector<cv::Mat>& imgs);
std::vector<cv::Mat> DenormalizeHandLandmarks(std::vector<cv::Mat> imgs_landmarks, std::vector<cv::Rect> rects);
void DrawHandDetections(cv::Mat img, std::vector<cv::Mat> denorm_imgs_landmarks);
};
// Fill out your copyright notice in the Description page of Project Settings.
#include "Blaze.h"
Blaze::Blaze()
{
this->blazePalm = cv::dnn::readNet("c:/blazepalm_old.onnx");
this->blazeHand = cv::dnn::readNet("c:/blazehand.onnx");
}
Blaze::~Blaze()
{
}
void Blaze::ResizeAndPad(
cv::Mat& srcimg, cv::Mat& img256,
cv::Mat& img128, float& scale, cv::Scalar& pad
)
{
float h1, w1;
int padw, padh;
cv::Size size0 = srcimg.size();
if (size0.height >= size0.width) {
h1 = 256;
w1 = 256 * size0.width / size0.height;
padh = 0;
padw = static_cast<int>(256 - w1);
scale = size0.width / static_cast<float>(w1);
}
else {
h1 = 256 * size0.height / size0.width;
w1 = 256;
padh = static_cast<int>(256 - h1);
padw = 0;
scale = size0.height / static_cast<float>(h1);
}
//UE_LOG(LogTemp, Log, TEXT("scale value: %f, size0.height: %d, size0.width : %d, h1 : %f"), scale, size0.height, size0.width, h1);
int padh1 = static_cast<int>(padh / 2);
int padh2 = static_cast<int>(padh / 2) + static_cast<int>(padh % 2);
int padw1 = static_cast<int>(padw / 2);
int padw2 = static_cast<int>(padw / 2) + static_cast<int>(padw % 2);
pad = cv::Scalar(static_cast<float>(padh1 * scale), static_cast<float>(padw1 * scale));
cv::resize(srcimg, img256, cv::Size(w1, h1));
cv::copyMakeBorder(img256, img256, padh1, padh2, padw1, padw2, cv::BORDER_CONSTANT, cv::Scalar(0, 0, 0));
cv::resize(img256, img128, cv::Size(128, 128));
cv::cvtColor(img256, img256, cv::COLOR_BGR2RGB);
cv::cvtColor(img128, img128, cv::COLOR_BGR2RGB);
}
std::vector<Blaze::PalmDetection> Blaze::PredictPalmDetections(cv::Mat& img)
{
std::vector<Blaze::PalmDetection> beforeNMSResults;
std::vector<Blaze::PalmDetection> afterNMSResults;
std::vector<float> scores;
std::vector<int> indices;
std::vector<cv::Rect> boundingBoxes;
cv::Mat tensor;
img.convertTo(tensor, CV_32F);
tensor = tensor / 127.5 - 1;
cv::Mat blob = cv::dnn::blobFromImage(tensor, 1.0, tensor.size(), 0, false, false, CV_32F);
std::vector<cv::String> outNames(2);
outNames[0] = "regressors";
outNames[1] = "classificators";
blazePalm.setInput(blob);
std::vector<cv::Mat> outputs;
blazePalm.forward(outputs, outNames);
cv::Mat classificator = outputs[0];
cv::Mat regressor = outputs[1];
for (int y = 0; y < 16; ++y) {
for (int x = 0; x < 16; ++x) {
for (int a = 0; a < 2; ++a) {
PalmDetection res = GetPalmDetection(regressor, classificator, 8, 2, x, y, a, 0);
if (res.score != 0)
{
beforeNMSResults.push_back(res);
cv::Point2d startPt = cv::Point2d(res.xmin * blazePalmSize, res.ymin * blazePalmSize);
cv::Point2d endPt = cv::Point2d(res.xmax * blazePalmSize, res.ymax * blazePalmSize);
boundingBoxes.push_back(cv::Rect(startPt, endPt));
scores.push_back(res.score);
}
}
}
}
for (int y = 0; y < 8; ++y) {
for (int x = 0; x < 8; ++x) {
for (int a = 0; a < 6; ++a) {
PalmDetection res = GetPalmDetection(regressor, classificator, 16, 6, x, y, a, 512);
if (res.score != 0)
{
beforeNMSResults.push_back(res);
cv::Point2d startPt = cv::Point2d(res.xmin * blazePalmSize, res.ymin * blazePalmSize);
cv::Point2d endPt = cv::Point2d(res.xmax * blazePalmSize, res.ymax * blazePalmSize);
boundingBoxes.push_back(cv::Rect(startPt, endPt));
scores.push_back(res.score);
}
}
}
}
cv::dnn::NMSBoxes(boundingBoxes, scores, palmMinScoreThresh, palmMinNMSThresh, indices);
for (int i = 0; i < indices.size(); i++) {
int idx = indices[i];
afterNMSResults.push_back(beforeNMSResults[idx]);
}
return afterNMSResults;
}
Blaze::PalmDetection Blaze::GetPalmDetection(cv::Mat regressor, cv::Mat classificator,
int stride, int anchor_count, int column, int row, int anchor, int offset) {
Blaze::PalmDetection res;
int index = (int(row * 128 / stride) + column) * anchor_count + anchor + offset;
float origin_score = regressor.at<float>(0, index, 0);
float score = sigmoid(origin_score);
if (score < palmMinScoreThresh) return res;
float x = classificator.at<float>(0, index, 0);
float y = classificator.at<float>(0, index, 1);
float w = classificator.at<float>(0, index, 2);
float h = classificator.at<float>(0, index, 3);
x += (column + 0.5) * stride - w / 2;
y += (row + 0.5) * stride - h / 2;
res.ymin = (y) / blazePalmSize;
res.xmin = (x) / blazePalmSize;
res.ymax = (y + h) / blazePalmSize;
res.xmax = (x + w) / blazePalmSize;
//if ((res.ymin < 0) || (res.xmin < 0) || (res.xmax > 1) || (res.ymax > 1)) return res;
res.score = score;
std::vector<cv::Point2d> kpts;
for (int key_id = 0; key_id < palmMinNumKeyPoints; key_id++)
{
float kpt_x = classificator.at<float>(0, index, 4 + key_id * 2);
float kpt_y = classificator.at<float>(0, index, 5 + key_id * 2);
kpt_x += (column + 0.5) * stride;
kpt_x = kpt_x / blazePalmSize;
kpt_y += (row + 0.5) * stride;
kpt_y = kpt_y / blazePalmSize;
//UE_LOG(LogTemp, Log, TEXT("kpt id(%d) : (%f, %f)"), key_id, kpt_x, kpt_y);
res.kp_arr[key_id] = cv::Point2d(kpt_x, kpt_y);
}
return res;
}
float Blaze::sigmoid(float x) {
return 1 / (1 + exp(-x));
}
std::vector<Blaze::PalmDetection> Blaze::DenormalizePalmDetections(std::vector<Blaze::PalmDetection> detections, int width, int height, cv::Scalar pad)
{
std::vector<Blaze::PalmDetection> denormDets;
int scale = 0;
if (width > height)
scale = width;
else
scale = height;
for (auto& det : detections)
{
Blaze::PalmDetection denormDet;
denormDet.ymin = det.ymin * scale - pad[0];
denormDet.xmin = det.xmin * scale - pad[1];
denormDet.ymax = det.ymax * scale - pad[0];
denormDet.xmax = det.xmax * scale - pad[1];
denormDet.score = det.score;
for (int i = 0; i < palmMinNumKeyPoints; i++)
{
cv::Point2d pt_new = cv::Point2d(det.kp_arr[i].x * scale - pad[1], det.kp_arr[i].y * scale - pad[0]);
//UE_LOG(LogTemp, Log, TEXT("denorm kpt id(%d) : (%f, %f)"), i, pt_new.x, pt_new.y);
denormDet.kp_arr[i] = pt_new;
}
denormDets.push_back(denormDet);
}
return denormDets;
}
void Blaze::DrawPalmDetections(cv::Mat& img, std::vector<Blaze::PalmDetection> denormDets)
{
for (auto& denormDet : denormDets)
{
cv::Point2d startPt = cv::Point2d(denormDet.xmin, denormDet.ymin);
cv::Point2d endPt = cv::Point2d(denormDet.xmax, denormDet.ymax);
cv::rectangle(img, cv::Rect(startPt, endPt), cv::Scalar(255, 0, 0), 1);
for (int i = 0; i < palmMinNumKeyPoints; i++)
cv::circle(img, denormDet.kp_arr[i], 5, cv::Scalar(255, 0, 0), -1);
std::string score_str = std::to_string(static_cast<int>(denormDet.score * 100))+ "%";
cv::putText(img, score_str, cv::Point(startPt.x, startPt.y - 20), cv::FONT_HERSHEY_SIMPLEX, 0.5, cv::Scalar(0, 0, 255), 2);
}
}
void Blaze::DrawDetsInfo(cv::Mat& img, std::vector<Blaze::PalmDetection> filteredDets, std::vector<Blaze::PalmDetection> normDets, std::vector<Blaze::PalmDetection> denormDets)
{
//puttext num of filtered/denorm dets
std::string dets_size_str = "filtered dets : " + std::to_string(filteredDets.size()) + ", norm dets : " + std::to_string(normDets.size()) + ", denorm dets : " + std::to_string(denormDets.size());
cv::putText(img, dets_size_str, cv::Point(30, 30), cv::FONT_HERSHEY_SIMPLEX, 0.5, cv::Scalar(255, 0, 0), 2);
for (int i = 0; i < filteredDets.size(); i++)
{
auto& det = filteredDets.at(i);
std::ostringstream oss;
oss << "filteredDets : (" << det.xmin << ", " << det.ymin << "),(" <<
det.xmax << ", " << det.ymax << ")";
std::string det_str = oss.str();
cv::putText(img, det_str, cv::Point(30, 50 + 20 * i), cv::FONT_HERSHEY_SIMPLEX, 0.4, cv::Scalar(0, 0, 255), 2);
}
}
std::vector<Blaze::PalmDetection> Blaze::FilteringDets(std::vector<Blaze::PalmDetection> detections, int width, int height)
{
std::vector<Blaze::PalmDetection> filteredDets;
for (auto& denormDet : detections)
{
cv::Point2d startPt = cv::Point2d(denormDet.xmin, denormDet.ymin);
if (startPt.x < 10 || startPt.y < 10)
continue;
if (startPt.x > width || startPt.y > height)
continue;
int w = denormDet.xmax - denormDet.xmin;
int y = denormDet.ymax - denormDet.ymin;
if ((w * y < 40 * 40 )|| (w * y > (width * 0.7) * (height * 0.7)))
continue;
filteredDets.push_back(denormDet);
}
return filteredDets;
}
std::vector<cv::Rect> Blaze::convertHandRects(std::vector<PalmDetection> filteredDets)
{
std::vector<cv::Rect> handRects;
for (auto& det : filteredDets)
{
int width = det.xmax - det.xmin;
int height = det.ymax - det.ymin;
int large_length = 0;
int center_x = static_cast<int>((det.xmax + det.xmin) / 2);
int center_y = static_cast<int>((det.ymax + det.ymin) / 2);
if (width > height)
large_length = height * blazeHandDScale;
else
large_length = height * blazeHandDScale;
int start_x = center_x - static_cast<int>(large_length / 2);
int start_y = center_y - static_cast<int>(large_length / 2) - large_length * 0.1;
int end_x = center_x + static_cast<int>(large_length / 2);
int end_y = center_y + static_cast<int>(large_length / 2) - large_length * 0.1;
start_x = std::max(start_x, 0);
start_y = std::max(start_y, 0);
end_x = std::min(end_x, webcamWidth);
end_y = std::min(end_y, webcamHeight);
cv::Point2d startPt = cv::Point2d(start_x, start_y);
cv::Point2d endPt = cv::Point2d(end_x, end_y);
cv::Rect handRect(startPt, endPt);
handRects.push_back(handRect);
}
return handRects;
}
void Blaze::DrawRects(cv::Mat& img, std::vector<cv::Rect> rects)
{
for (auto& rect : rects)
{
cv::rectangle(img, rect, cv::Scalar(255, 255, 255), 1);
}
}
void Blaze::GetHandImages(cv::Mat& img, std::vector<cv::Rect> rects, std::vector<cv::Mat>& handImgs)
{
for (auto& rect : rects)
{
//std::cout << "img size : " << img.size() << ", rect : " << rect << std::endl;
//cv::Mat roi(cv::Size(rect.width, rect.height), img.type(), cv::Scalar(0, 0, 0))
cv::Mat roi = img(rect).clone();
cv::cvtColor(roi, roi, cv::COLOR_BGR2RGB);
cv::resize(roi, roi, cv::Size(blazeHandSize, blazeHandSize));
handImgs.push_back(roi);
}
}
std::vector<cv::Mat> Blaze::PredictHandDetections(std::vector<cv::Mat>& imgs)
{
std::vector<cv::Mat> imgs_landmarks;
for (auto& img : imgs)
{
cv::Mat tensor;
img.convertTo(tensor, CV_32F);
tensor = tensor / blazeHandSize;
cv::Mat blob = cv::dnn::blobFromImage(tensor, 1.0, tensor.size(), 0, false, false, CV_32F);
std::vector<cv::String> outNames(3);
outNames[0] = "hand_flag";
outNames[1] = "handedness";
outNames[2] = "landmarks";
blazeHand.setInput(blob);
std::vector<cv::Mat> outputs;
blazeHand.forward(outputs, outNames);
cv::Mat handflag = outputs[0];
cv::Mat handedness = outputs[1];
cv::Mat landmarks = outputs[2];
imgs_landmarks.push_back(landmarks);
}
return imgs_landmarks;
}
std::vector<cv::Mat> Blaze::DenormalizeHandLandmarks(std::vector<cv::Mat> imgs_landmarks, std::vector<cv::Rect> rects)
{
std::vector<cv::Mat> denorm_imgs_landmarks;
for (int i = 0; i < imgs_landmarks.size(); i++)
{
cv::Mat landmarks = imgs_landmarks.at(i);
cv::Rect rect = rects.at(i);
cv::Mat squeezed = landmarks.reshape(0, landmarks.size[1]);
//std::cout << "squeezed size: " << squeezed.size[0] << "," << squeezed.size[1] << std::endl;
for (int j = 0; j < squeezed.size[0]; j++)
{
squeezed.at<float>(j, 0) = squeezed.at<float>(j, 0) * rect.width + rect.x;
squeezed.at<float>(j, 1) = squeezed.at<float>(j, 1) * rect.height + rect.y;
squeezed.at<float>(j, 2) = squeezed.at<float>(j, 2) * rect.height * -1;
}
denorm_imgs_landmarks.push_back(squeezed);
}
//std::cout << std::endl;
return denorm_imgs_landmarks;
}
void Blaze::DrawHandDetections(cv::Mat img, std::vector<cv::Mat> denorm_imgs_landmarks)
{
for (auto& denorm_landmarks : denorm_imgs_landmarks)
{
for (int i = 0; i < denorm_landmarks.size[0]; i++)
{
float x = denorm_landmarks.at<float>(i, 0);
float y = denorm_landmarks.at<float>(i, 1);
cv::Point pt(x, y);
cv::circle(img, pt, 5, cv::Scalar(0, 255, 0), -1);
}
for (auto& connection : HAND_CONNECTIONS)
{
int startPtIdx = connection[0];
int endPtIdx = connection[1];
float startPtX = denorm_landmarks.at<float>(startPtIdx, 0);
float startPtY = denorm_landmarks.at<float>(startPtIdx, 1);
float endPtX = denorm_landmarks.at<float>(endPtIdx, 0);
float endPtY = denorm_landmarks.at<float>(endPtIdx, 1);
cv::line(img, cv::Point(startPtX, startPtY), cv::Point(endPtX, endPtY), cv::Scalar(255, 255, 255), 3);
}
}
}
데스크탑 게임모드베이스 헤더, cpp
// Fill out your copyright notice in the Description page of Project Settings.
#pragma once
#include "Blaze.h"
#include "Windows/AllowWindowsPlatformTypes.h"
#include <Windows.h>
#include "Windows/HideWindowsPlatformTypes.h"
#include "PreOpenCVHeaders.h"
#include <opencv2/opencv.hpp>
#include "PostOpenCVHeaders.h"
#include "CoreMinimal.h"
#include "GameFramework/GameModeBase.h"
#include "DesktopGameModeBase.generated.h"
/**
*
*/
UCLASS()
class HANDDESKTOP_API ADesktopGameModeBase : public AGameModeBase
{
GENERATED_BODY()
protected:
// Called when the game starts or when spawned
virtual void BeginPlay() override;
public:
UFUNCTION(BlueprintCallable)
void ReadFrame();
int monitorWidth = 1920;
int monitorHeight = 1080;
UPROPERTY(EditAnywhere, BlueprintReadWrite)
UTexture2D* imageTextureScreen1;
UPROPERTY(EditAnywhere, BlueprintReadWrite)
UTexture2D* imageTextureScreen2;
UPROPERTY(EditAnywhere, BlueprintReadWrite)
UTexture2D* imageTextureScreen3;
cv::Mat imageScreen1;
cv::Mat imageScreen2;
cv::Mat imageScreen3;
void ScreensToCVMats();
void CVMatsToTextures();
int webcamWidth = 640;
int webcamHeight = 480;
cv::VideoCapture capture;
cv::Mat webcamImage;
UPROPERTY(EditAnywhere, BlueprintReadWrite)
UTexture2D* webcamTexture;
void MatToTexture2D(const cv::Mat InMat);
//var and functions with blaze
Blaze blaze;
cv::Mat img256;
cv::Mat img128;
float scale;
cv::Scalar pad;
};
// Fill out your copyright notice in the Description page of Project Settings.
#include "DesktopGameModeBase.h"
void ADesktopGameModeBase::BeginPlay()
{
Super::BeginPlay();
blaze = Blaze();
capture = cv::VideoCapture(0);
if (!capture.isOpened())
{
UE_LOG(LogTemp, Log, TEXT("Open Webcam failed"));
return;
}
else
{
UE_LOG(LogTemp, Log, TEXT("Open Webcam Success"));
}
capture.set(cv::CAP_PROP_FRAME_WIDTH, webcamWidth);
capture.set(cv::CAP_PROP_FRAME_HEIGHT, webcamHeight);
webcamTexture = UTexture2D::CreateTransient(monitorWidth, monitorHeight, PF_B8G8R8A8);
imageScreen1 = cv::Mat(monitorHeight, monitorWidth, CV_8UC4);
imageScreen2 = cv::Mat(monitorHeight, monitorWidth, CV_8UC4);
imageScreen3 = cv::Mat(monitorHeight, monitorWidth, CV_8UC4);
imageTextureScreen1 = UTexture2D::CreateTransient(monitorWidth, monitorHeight, PF_B8G8R8A8);
imageTextureScreen2 = UTexture2D::CreateTransient(monitorWidth, monitorHeight, PF_B8G8R8A8);
imageTextureScreen3 = UTexture2D::CreateTransient(monitorWidth, monitorHeight, PF_B8G8R8A8);
}
void ADesktopGameModeBase::ReadFrame()
{
if (!capture.isOpened())
{
return;
}
capture.read(webcamImage);
/*
get filtered detections
*/
blaze.ResizeAndPad(webcamImage, img256, img128, scale, pad);
//UE_LOG(LogTemp, Log, TEXT("scale value: %f, pad value: (%f, %f)"), scale, pad[0], pad[1]);
std::vector<Blaze::PalmDetection> normDets = blaze.PredictPalmDetections(img128);
std::vector<Blaze::PalmDetection> denormDets = blaze.DenormalizePalmDetections(normDets, webcamWidth, webcamHeight, pad);
std::vector<Blaze::PalmDetection> filteredDets = blaze.FilteringDets(denormDets, webcamWidth, webcamHeight);
std::vector<cv::Rect> handRects = blaze.convertHandRects(filteredDets);
std::vector<cv::Mat> handImgs;
blaze.GetHandImages(webcamImage, handRects, handImgs);
std::vector<cv::Mat> imgs_landmarks = blaze.PredictHandDetections(handImgs);
std::vector<cv::Mat> denorm_imgs_landmarks = blaze.DenormalizeHandLandmarks(imgs_landmarks, handRects);
//draw hand rects/ plam detection/ dets info/ hand detection
blaze.DrawRects(webcamImage, handRects);
blaze.DrawPalmDetections(webcamImage, filteredDets);
blaze.DrawDetsInfo(webcamImage, filteredDets, normDets, denormDets);
blaze.DrawHandDetections(webcamImage, denorm_imgs_landmarks);
//cv::mat to utexture2d
MatToTexture2D(webcamImage);
/*
모니터 시각화
*/
ScreensToCVMats();
CVMatsToTextures();
}
void ADesktopGameModeBase::MatToTexture2D(const cv::Mat InMat)
{
if (InMat.type() == CV_8UC3)//example for pre-conversion of Mat
{
cv::Mat resizedImage;
cv::resize(InMat, resizedImage, cv::Size(monitorWidth, monitorHeight));
cv::Mat bgraImage;
//if the Mat is in BGR space, convert it to BGRA. There is no three channel texture in UE (at least with eight bit)
cv::cvtColor(resizedImage, bgraImage, cv::COLOR_BGR2BGRA);
//actually copy the data to the new texture
FTexture2DMipMap& Mip = webcamTexture->GetPlatformData()->Mips[0];
void* Data = Mip.BulkData.Lock(LOCK_READ_WRITE);//lock the texture data
FMemory::Memcpy(Data, bgraImage.data, bgraImage.total() * bgraImage.elemSize());//copy the data
Mip.BulkData.Unlock();
webcamTexture->PostEditChange();
webcamTexture->UpdateResource();
}
else if (InMat.type() == CV_8UC4)
{
//actually copy the data to the new texture
FTexture2DMipMap& Mip = webcamTexture->GetPlatformData()->Mips[0];
void* Data = Mip.BulkData.Lock(LOCK_READ_WRITE);//lock the texture data
FMemory::Memcpy(Data, InMat.data, InMat.total() * InMat.elemSize());//copy the data
Mip.BulkData.Unlock();
webcamTexture->PostEditChange();
webcamTexture->UpdateResource();
}
//if the texture hasnt the right pixel format, abort.
webcamTexture->PostEditChange();
webcamTexture->UpdateResource();
}
void ADesktopGameModeBase::ScreensToCVMats()
{
HDC hScreenDC = GetDC(NULL);
HDC hMemoryDC = CreateCompatibleDC(hScreenDC);
int screenWidth = GetDeviceCaps(hScreenDC, HORZRES);
int screenHeight = GetDeviceCaps(hScreenDC, VERTRES);
HBITMAP hBitmap = CreateCompatibleBitmap(hScreenDC, screenWidth, screenHeight);
HBITMAP hOldBitmap = (HBITMAP)SelectObject(hMemoryDC, hBitmap);
//screen 1
BitBlt(hMemoryDC, 0, 0, screenWidth, screenHeight, hScreenDC, 0, 0, SRCCOPY);
GetBitmapBits(hBitmap, imageScreen1.total() * imageScreen1.elemSize(), imageScreen1.data);
//screen 2
BitBlt(hMemoryDC, 0, 0, screenWidth, screenHeight, hScreenDC, 1920, 0, SRCCOPY);
GetBitmapBits(hBitmap, imageScreen2.total() * imageScreen2.elemSize(), imageScreen2.data);
//screen 3
BitBlt(hMemoryDC, 0, 0, screenWidth, screenHeight, hScreenDC, 3840, 0, SRCCOPY);
GetBitmapBits(hBitmap, imageScreen3.total() * imageScreen3.elemSize(), imageScreen3.data);
SelectObject(hMemoryDC, hOldBitmap);
DeleteDC(hScreenDC);
DeleteDC(hMemoryDC);
DeleteObject(hBitmap);
DeleteObject(hOldBitmap);
}
void ADesktopGameModeBase::CVMatsToTextures()
{
for (int i = 0; i < 3; i++)
{
if (i == 0)
{
FTexture2DMipMap& Mip = imageTextureScreen1->GetPlatformData()->Mips[0];
void* Data = Mip.BulkData.Lock(LOCK_READ_WRITE);//lock the texture data
FMemory::Memcpy(Data, imageScreen1.data, imageScreen1.total() * imageScreen1.elemSize());//copy the data
Mip.BulkData.Unlock();
imageTextureScreen1->PostEditChange();
imageTextureScreen1->UpdateResource();
}
else if (i == 1)
{
FTexture2DMipMap& Mip = imageTextureScreen2->GetPlatformData()->Mips[0];
void* Data = Mip.BulkData.Lock(LOCK_READ_WRITE);//lock the texture data
FMemory::Memcpy(Data, imageScreen2.data, imageScreen2.total() * imageScreen2.elemSize());//copy the data
Mip.BulkData.Unlock();
imageTextureScreen2->PostEditChange();
imageTextureScreen2->UpdateResource();
}
else if (i == 2)
{
FTexture2DMipMap& Mip = imageTextureScreen3->GetPlatformData()->Mips[0];
void* Data = Mip.BulkData.Lock(LOCK_READ_WRITE);//lock the texture data
FMemory::Memcpy(Data, imageScreen3.data, imageScreen3.total() * imageScreen3.elemSize());//copy the data
Mip.BulkData.Unlock();
imageTextureScreen3->PostEditChange();
imageTextureScreen3->UpdateResource();
}
}
}
'컴퓨터과학 > 언리얼' 카테고리의 다른 글
HandDesktop12 - 손 관절 회전 방향 구하기(fail?) (0) | 2024.02.01 |
---|---|
HandDesktop11 - 손 메시 제어하기 (0) | 2024.01.31 |
HandDesktop09 - 기존 문제들 원인 찾음, blaze 갈아엎기, 블라즈팜 문제해결 (0) | 2024.01.26 |
HandDesktop08 - blaze hand 사용하기 (0) | 2024.01.26 |
HandDesktop07 - blaze hand 사용 준비하기 (0) | 2024.01.26 |