Blame - source/use_case/object_detection/src/DetectorPostProcessing.cc - ml/ethos-u/ml-embedded-evaluation-kit

blob: e97e6b334062df5c26575ca675b17749a7323d24 [file] [log] [blame]

Isabella Gottardi	3107aa2	2022-01-27 16:39:37 +0000	[diff] [blame]	1	/*
				2	* Copyright (c) 2022 Arm Limited. All rights reserved.
				3	* SPDX-License-Identifier: Apache-2.0
				4	*
				5	* Licensed under the Apache License, Version 2.0 (the "License");
				6	* you may not use this file except in compliance with the License.
				7	* You may obtain a copy of the License at
				8	*
				9	* http://www.apache.org/licenses/LICENSE-2.0
				10	*
				11	* Unless required by applicable law or agreed to in writing, software
				12	* distributed under the License is distributed on an "AS IS" BASIS,
				13	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
				14	* See the License for the specific language governing permissions and
				15	* limitations under the License.
				16	*/
				17	#include "DetectorPostProcessing.hpp"
Richard Burton	ed35a6f	2022-02-14 11:55:35 +0000	[diff] [blame^]	18	#include "PlatformMath.hpp"
Isabella Gottardi	3107aa2	2022-01-27 16:39:37 +0000	[diff] [blame]	19
				20	#include <algorithm>
				21	#include <cmath>
				22
				23	namespace arm {
				24	namespace app {
				25	namespace object_detection {
				26
				27	DetectorPostprocessing::DetectorPostprocessing(
				28	const float threshold,
				29	const float nms,
				30	int numClasses,
				31	int topN)
				32	: m_threshold(threshold),
				33	m_nms(nms),
				34	m_numClasses(numClasses),
				35	m_topN(topN)
				36	{}
				37
				38	void DetectorPostprocessing::RunPostProcessing(
				39	uint8_t* imgIn,
				40	uint32_t imgRows,
				41	uint32_t imgCols,
				42	TfLiteTensor* modelOutput0,
				43	TfLiteTensor* modelOutput1,
				44	std::vector<DetectionResult>& resultsOut)
				45	{
				46	/* init postprocessing */
				47	Network net {
				48	.inputWidth = static_cast<int>(imgCols),
				49	.inputHeight = static_cast<int>(imgRows),
				50	.numClasses = m_numClasses,
				51	.branches = {
				52	Branch {
				53	.resolution = static_cast<int>(imgCols/32),
				54	.numBox = 3,
				55	.anchor = anchor1,
				56	.modelOutput = modelOutput0->data.int8,
				57	.scale = ((TfLiteAffineQuantization*)(modelOutput0->quantization.params))->scale->data[0],
				58	.zeroPoint = ((TfLiteAffineQuantization*)(modelOutput0->quantization.params))->zero_point->data[0],
				59	.size = modelOutput0->bytes
				60	},
				61	Branch {
				62	.resolution = static_cast<int>(imgCols/16),
				63	.numBox = 3,
				64	.anchor = anchor2,
				65	.modelOutput = modelOutput1->data.int8,
				66	.scale = ((TfLiteAffineQuantization*)(modelOutput1->quantization.params))->scale->data[0],
				67	.zeroPoint = ((TfLiteAffineQuantization*)(modelOutput1->quantization.params))->zero_point->data[0],
				68	.size = modelOutput1->bytes
				69	}
				70	},
				71	.topN = m_topN
				72	};
				73	/* End init */
				74
				75	/* Start postprocessing */
				76	int originalImageWidth = originalImageSize;
				77	int originalImageHeight = originalImageSize;
				78
Richard Burton	ed35a6f	2022-02-14 11:55:35 +0000	[diff] [blame^]	79	std::forward_list<image::Detection> detections;
Isabella Gottardi	3107aa2	2022-01-27 16:39:37 +0000	[diff] [blame]	80	GetNetworkBoxes(net, originalImageWidth, originalImageHeight, m_threshold, detections);
				81
				82	/* Do nms */
				83	CalculateNMS(detections, net.numClasses, m_nms);
				84
				85	for (auto& it: detections) {
				86	float xMin = it.bbox.x - it.bbox.w / 2.0f;
				87	float xMax = it.bbox.x + it.bbox.w / 2.0f;
				88	float yMin = it.bbox.y - it.bbox.h / 2.0f;
				89	float yMax = it.bbox.y + it.bbox.h / 2.0f;
				90
				91	if (xMin < 0) {
				92	xMin = 0;
				93	}
				94	if (yMin < 0) {
				95	yMin = 0;
				96	}
				97	if (xMax > originalImageWidth) {
				98	xMax = originalImageWidth;
				99	}
				100	if (yMax > originalImageHeight) {
				101	yMax = originalImageHeight;
				102	}
				103
				104	float boxX = xMin;
				105	float boxY = yMin;
				106	float boxWidth = xMax - xMin;
				107	float boxHeight = yMax - yMin;
				108
				109	for (int j = 0; j < net.numClasses; ++j) {
				110	if (it.prob[j] > 0) {
				111
				112	DetectionResult tmpResult = {};
				113	tmpResult.m_normalisedVal = it.prob[j];
				114	tmpResult.m_x0 = boxX;
				115	tmpResult.m_y0 = boxY;
				116	tmpResult.m_w = boxWidth;
				117	tmpResult.m_h = boxHeight;
				118
				119	resultsOut.push_back(tmpResult);
				120
				121	/* TODO: Instead of draw on the image, return the boxes and draw on the LCD */
				122	DrawBoxOnImage(imgIn, originalImageWidth, originalImageHeight, boxX, boxY, boxWidth, boxHeight);;
				123	}
				124	}
				125	}
				126	}
				127
Isabella Gottardi	3107aa2	2022-01-27 16:39:37 +0000	[diff] [blame]	128
Richard Burton	ed35a6f	2022-02-14 11:55:35 +0000	[diff] [blame^]	129	void DetectorPostprocessing::InsertTopNDetections(std::forward_list<image::Detection>& detections, image::Detection& det)
Isabella Gottardi	3107aa2	2022-01-27 16:39:37 +0000	[diff] [blame]	130	{
Richard Burton	ed35a6f	2022-02-14 11:55:35 +0000	[diff] [blame^]	131	std::forward_list<image::Detection>::iterator it;
				132	std::forward_list<image::Detection>::iterator last_it;
Isabella Gottardi	3107aa2	2022-01-27 16:39:37 +0000	[diff] [blame]	133	for ( it = detections.begin(); it != detections.end(); ++it ) {
				134	if(it->objectness > det.objectness)
				135	break;
				136	last_it = it;
				137	}
				138	if(it != detections.begin()) {
				139	detections.emplace_after(last_it, det);
				140	detections.pop_front();
				141	}
				142	}
				143
Richard Burton	ed35a6f	2022-02-14 11:55:35 +0000	[diff] [blame^]	144	void DetectorPostprocessing::GetNetworkBoxes(Network& net, int imageWidth, int imageHeight, float threshold, std::forward_list<image::Detection>& detections)
Isabella Gottardi	3107aa2	2022-01-27 16:39:37 +0000	[diff] [blame]	145	{
				146	int numClasses = net.numClasses;
				147	int num = 0;
Richard Burton	ed35a6f	2022-02-14 11:55:35 +0000	[diff] [blame^]	148	auto det_objectness_comparator = [](image::Detection& pa, image::Detection& pb) {
Isabella Gottardi	3107aa2	2022-01-27 16:39:37 +0000	[diff] [blame]	149	return pa.objectness < pb.objectness;
				150	};
				151	for (size_t i = 0; i < net.branches.size(); ++i) {
				152	int height = net.branches[i].resolution;
				153	int width = net.branches[i].resolution;
				154	int channel = net.branches[i].numBox*(5+numClasses);
				155
				156	for (int h = 0; h < net.branches[i].resolution; h++) {
				157	for (int w = 0; w < net.branches[i].resolution; w++) {
				158	for (int anc = 0; anc < net.branches[i].numBox; anc++) {
				159
				160	/* Objectness score */
				161	int bbox_obj_offset = h * width * channel + w * channel + anc * (numClasses + 5) + 4;
Richard Burton	ed35a6f	2022-02-14 11:55:35 +0000	[diff] [blame^]	162	float objectness = math::MathUtils::SigmoidF32(((float)net.branches[i].modelOutput[bbox_obj_offset] - net.branches[i].zeroPoint) * net.branches[i].scale);
Isabella Gottardi	3107aa2	2022-01-27 16:39:37 +0000	[diff] [blame]	163
				164	if(objectness > threshold) {
Richard Burton	ed35a6f	2022-02-14 11:55:35 +0000	[diff] [blame^]	165	image::Detection det;
Isabella Gottardi	3107aa2	2022-01-27 16:39:37 +0000	[diff] [blame]	166	det.objectness = objectness;
				167	/* Get bbox prediction data for each anchor, each feature point */
				168	int bbox_x_offset = bbox_obj_offset -4;
				169	int bbox_y_offset = bbox_x_offset + 1;
				170	int bbox_w_offset = bbox_x_offset + 2;
				171	int bbox_h_offset = bbox_x_offset + 3;
				172	int bbox_scores_offset = bbox_x_offset + 5;
				173
				174	det.bbox.x = ((float)net.branches[i].modelOutput[bbox_x_offset] - net.branches[i].zeroPoint) * net.branches[i].scale;
				175	det.bbox.y = ((float)net.branches[i].modelOutput[bbox_y_offset] - net.branches[i].zeroPoint) * net.branches[i].scale;
				176	det.bbox.w = ((float)net.branches[i].modelOutput[bbox_w_offset] - net.branches[i].zeroPoint) * net.branches[i].scale;
				177	det.bbox.h = ((float)net.branches[i].modelOutput[bbox_h_offset] - net.branches[i].zeroPoint) * net.branches[i].scale;
				178
				179
				180	float bbox_x, bbox_y;
				181
				182	/* Eliminate grid sensitivity trick involved in YOLOv4 */
Richard Burton	ed35a6f	2022-02-14 11:55:35 +0000	[diff] [blame^]	183	bbox_x = math::MathUtils::SigmoidF32(det.bbox.x);
				184	bbox_y = math::MathUtils::SigmoidF32(det.bbox.y);
Isabella Gottardi	3107aa2	2022-01-27 16:39:37 +0000	[diff] [blame]	185	det.bbox.x = (bbox_x + w) / width;
				186	det.bbox.y = (bbox_y + h) / height;
				187
				188	det.bbox.w = exp(det.bbox.w) * net.branches[i].anchor[anc*2] / net.inputWidth;
				189	det.bbox.h = exp(det.bbox.h) * net.branches[i].anchor[anc*2+1] / net.inputHeight;
				190
				191	for (int s = 0; s < numClasses; s++) {
Richard Burton	ed35a6f	2022-02-14 11:55:35 +0000	[diff] [blame^]	192	float sig = math::MathUtils::SigmoidF32(((float)net.branches[i].modelOutput[bbox_scores_offset + s] - net.branches[i].zeroPoint) * net.branches[i].scale)*objectness;
Isabella Gottardi	3107aa2	2022-01-27 16:39:37 +0000	[diff] [blame]	193	det.prob.emplace_back((sig > threshold) ? sig : 0);
				194	}
				195
				196	/* Correct_YOLO_boxes */
				197	det.bbox.x *= imageWidth;
				198	det.bbox.w *= imageWidth;
				199	det.bbox.y *= imageHeight;
				200	det.bbox.h *= imageHeight;
				201
				202	if (num < net.topN \|\| net.topN <=0) {
				203	detections.emplace_front(det);
				204	num += 1;
				205	} else if (num == net.topN) {
				206	detections.sort(det_objectness_comparator);
				207	InsertTopNDetections(detections,det);
				208	num += 1;
				209	} else {
				210	InsertTopNDetections(detections,det);
				211	}
				212	}
				213	}
				214	}
				215	}
				216	}
				217	if(num > net.topN)
				218	num -=1;
				219	}
				220
Isabella Gottardi	3107aa2	2022-01-27 16:39:37 +0000	[diff] [blame]	221	void DetectorPostprocessing::DrawBoxOnImage(uint8_t* imgIn, int imWidth, int imHeight, int boxX,int boxY, int boxWidth, int boxHeight)
				222	{
				223	auto CheckAndFixOffset = [](int im_width,int im_height,int& offset) {
				224	if ( (offset) >= im_widthim_heightchannelsImageDisplayed) {
				225	offset = im_width * im_height * channelsImageDisplayed -1;
				226	}
				227	else if ( (offset) < 0) {
				228	offset = 0;
				229	}
				230	};
				231
				232	/* Consistency checks */
				233	if (!imgIn) {
				234	return;
				235	}
				236
				237	int offset=0;
				238	for (int i=0; i < boxWidth; i++) {
				239	/* Draw two horizontal lines */
				240	for (int line=0; line < 2; line++) {
				241	/top/
				242	offset =(i + (boxY + line)imWidth + boxX) channelsImageDisplayed; /* channelsImageDisplayed for rgb or grayscale*/
				243	CheckAndFixOffset(imWidth,imHeight,offset);
				244	imgIn[offset] = 0xFF;
				245	/bottom/
				246	offset = (i + (boxY + boxHeight - line)imWidth + boxX) channelsImageDisplayed;
				247	CheckAndFixOffset(imWidth,imHeight,offset);
				248	imgIn[offset] = 0xFF;
				249	}
				250	}
				251
				252	for (int i=0; i < boxHeight; i++) {
				253	/* Draw two vertical lines */
				254	for (int line=0; line < 2; line++) {
				255	/left/
				256	offset = ((i + boxY)imWidth + boxX + line)channelsImageDisplayed;
				257	CheckAndFixOffset(imWidth,imHeight,offset);
				258	imgIn[offset] = 0xFF;
				259	/right/
				260	offset = ((i + boxY)imWidth + boxX + boxWidth - line)channelsImageDisplayed;
				261	CheckAndFixOffset(imWidth,imHeight, offset);
				262	imgIn[offset] = 0xFF;
				263	}
				264	}
				265
				266	}
				267
				268	} /* namespace object_detection */
				269	} /* namespace app */
				270	} /* namespace arm */