blob: e781b62bf23bcb82ea68140dbd5c27e37e0d8235 [file] [log] [blame]
/*
* Copyright (c) 2022 Arm Limited. All rights reserved.
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "DetectorPostProcessing.hpp"
#include <algorithm>
#include <cmath>
#include <stdint.h>
#include <forward_list>
typedef struct boxabs {
float left, right, top, bot;
} boxabs;
typedef struct branch {
int resolution;
int num_box;
float *anchor;
int8_t *tf_output;
float scale;
int zero_point;
size_t size;
float scale_x_y;
} branch;
typedef struct network {
int input_w;
int input_h;
int num_classes;
int num_branch;
branch *branchs;
int topN;
} network;
typedef struct box {
float x, y, w, h;
} box;
typedef struct detection{
box bbox;
float *prob;
float objectness;
} detection;
static int sort_class;
static void free_dets(std::forward_list<detection> &dets){
std::forward_list<detection>::iterator it;
for ( it = dets.begin(); it != dets.end(); ++it ){
free(it->prob);
}
}
float sigmoid(float x)
{
return 1.f/(1.f + exp(-x));
}
static bool det_objectness_comparator(detection &pa, detection &pb)
{
return pa.objectness < pb.objectness;
}
static void insert_topN_det(std::forward_list<detection> &dets, detection det)
{
std::forward_list<detection>::iterator it;
std::forward_list<detection>::iterator last_it;
for ( it = dets.begin(); it != dets.end(); ++it ){
if(it->objectness > det.objectness)
break;
last_it = it;
}
if(it != dets.begin()){
dets.emplace_after(last_it, det);
free(dets.begin()->prob);
dets.pop_front();
}
else{
free(det.prob);
}
}
static std::forward_list<detection> get_network_boxes(network *net, int image_w, int image_h, float thresh, int *num)
{
std::forward_list<detection> dets;
int i;
int num_classes = net->num_classes;
*num = 0;
for (i = 0; i < net->num_branch; ++i) {
int height = net->branchs[i].resolution;
int width = net->branchs[i].resolution;
int channel = net->branchs[i].num_box*(5+num_classes);
for (int h = 0; h < net->branchs[i].resolution; h++) {
for (int w = 0; w < net->branchs[i].resolution; w++) {
for (int anc = 0; anc < net->branchs[i].num_box; anc++) {
// objectness score
int bbox_obj_offset = h * width * channel + w * channel + anc * (num_classes + 5) + 4;
float objectness = sigmoid(((float)net->branchs[i].tf_output[bbox_obj_offset] - net->branchs[i].zero_point) * net->branchs[i].scale);
if(objectness > thresh){
detection det;
det.prob = (float*)calloc(num_classes, sizeof(float));
det.objectness = objectness;
//get bbox prediction data for each anchor, each feature point
int bbox_x_offset = bbox_obj_offset -4;
int bbox_y_offset = bbox_x_offset + 1;
int bbox_w_offset = bbox_x_offset + 2;
int bbox_h_offset = bbox_x_offset + 3;
int bbox_scores_offset = bbox_x_offset + 5;
//int bbox_scores_step = 1;
det.bbox.x = ((float)net->branchs[i].tf_output[bbox_x_offset] - net->branchs[i].zero_point) * net->branchs[i].scale;
det.bbox.y = ((float)net->branchs[i].tf_output[bbox_y_offset] - net->branchs[i].zero_point) * net->branchs[i].scale;
det.bbox.w = ((float)net->branchs[i].tf_output[bbox_w_offset] - net->branchs[i].zero_point) * net->branchs[i].scale;
det.bbox.h = ((float)net->branchs[i].tf_output[bbox_h_offset] - net->branchs[i].zero_point) * net->branchs[i].scale;
float bbox_x, bbox_y;
// Eliminate grid sensitivity trick involved in YOLOv4
bbox_x = sigmoid(det.bbox.x); //* net->branchs[i].scale_x_y - (net->branchs[i].scale_x_y - 1) / 2;
bbox_y = sigmoid(det.bbox.y); //* net->branchs[i].scale_x_y - (net->branchs[i].scale_x_y - 1) / 2;
det.bbox.x = (bbox_x + w) / width;
det.bbox.y = (bbox_y + h) / height;
det.bbox.w = exp(det.bbox.w) * net->branchs[i].anchor[anc*2] / net->input_w;
det.bbox.h = exp(det.bbox.h) * net->branchs[i].anchor[anc*2+1] / net->input_h;
for (int s = 0; s < num_classes; s++) {
det.prob[s] = sigmoid(((float)net->branchs[i].tf_output[bbox_scores_offset + s] - net->branchs[i].zero_point) * net->branchs[i].scale)*objectness;
det.prob[s] = (det.prob[s] > thresh) ? det.prob[s] : 0;
}
//correct_yolo_boxes
det.bbox.x *= image_w;
det.bbox.w *= image_w;
det.bbox.y *= image_h;
det.bbox.h *= image_h;
if (*num < net->topN || net->topN <=0){
dets.emplace_front(det);
*num += 1;
}
else if(*num == net->topN){
dets.sort(det_objectness_comparator);
insert_topN_det(dets,det);
*num += 1;
}else{
insert_topN_det(dets,det);
}
}
}
}
}
}
if(*num > net->topN)
*num -=1;
return dets;
}
// init part
static branch create_brach(int resolution, int num_box, float *anchor, int8_t *tf_output, size_t size, float scale, int zero_point)
{
branch b;
b.resolution = resolution;
b.num_box = num_box;
b.anchor = anchor;
b.tf_output = tf_output;
b.size = size;
b.scale = scale;
b.zero_point = zero_point;
return b;
}
static network creat_network(int input_w, int input_h, int num_classes, int num_branch, branch* branchs, int topN)
{
network net;
net.input_w = input_w;
net.input_h = input_h;
net.num_classes = num_classes;
net.num_branch = num_branch;
net.branchs = branchs;
net.topN = topN;
return net;
}
// NMS part
static float Calc1DOverlap(float x1_center, float width1, float x2_center, float width2)
{
float left_1 = x1_center - width1/2;
float left_2 = x2_center - width2/2;
float leftest;
if (left_1 > left_2) {
leftest = left_1;
} else {
leftest = left_2;
}
float right_1 = x1_center + width1/2;
float right_2 = x2_center + width2/2;
float rightest;
if (right_1 < right_2) {
rightest = right_1;
} else {
rightest = right_2;
}
return rightest - leftest;
}
static float CalcBoxIntersect(box box1, box box2)
{
float width = Calc1DOverlap(box1.x, box1.w, box2.x, box2.w);
if (width < 0) return 0;
float height = Calc1DOverlap(box1.y, box1.h, box2.y, box2.h);
if (height < 0) return 0;
float total_area = width*height;
return total_area;
}
static float CalcBoxUnion(box box1, box box2)
{
float boxes_intersection = CalcBoxIntersect(box1, box2);
float boxes_union = box1.w*box1.h + box2.w*box2.h - boxes_intersection;
return boxes_union;
}
static float CalcBoxIOU(box box1, box box2)
{
float boxes_intersection = CalcBoxIntersect(box1, box2);
if (boxes_intersection == 0) return 0;
float boxes_union = CalcBoxUnion(box1, box2);
if (boxes_union == 0) return 0;
return boxes_intersection / boxes_union;
}
static bool CompareProbs(detection &prob1, detection &prob2)
{
return prob1.prob[sort_class] > prob2.prob[sort_class];
}
static void CalcNMS(std::forward_list<detection> &detections, int classes, float iou_threshold)
{
int k;
for (k = 0; k < classes; ++k) {
sort_class = k;
detections.sort(CompareProbs);
for (std::forward_list<detection>::iterator it=detections.begin(); it != detections.end(); ++it){
if (it->prob[k] == 0) continue;
for (std::forward_list<detection>::iterator itc=std::next(it, 1); itc != detections.end(); ++itc){
if (itc->prob[k] == 0) continue;
if (CalcBoxIOU(it->bbox, itc->bbox) > iou_threshold) {
itc->prob[k] = 0;
}
}
}
}
}
static void inline check_and_fix_offset(int im_w,int im_h,int *offset)
{
if (!offset) return;
if ( (*offset) >= im_w*im_h*FORMAT_MULTIPLY_FACTOR)
(*offset) = im_w*im_h*FORMAT_MULTIPLY_FACTOR -1;
else if ( (*offset) < 0)
*offset =0;
}
static void DrawBoxOnImage(uint8_t *img_in,int im_w,int im_h,int bx,int by,int bw,int bh)
{
if (!img_in) {
return;
}
int offset=0;
for (int i=0; i < bw; i++) {
/*draw two lines */
for (int line=0; line < 2; line++) {
/*top*/
offset =(i + (by + line)*im_w + bx)*FORMAT_MULTIPLY_FACTOR;
check_and_fix_offset(im_w,im_h,&offset);
img_in[offset] = 0xFF; /* FORMAT_MULTIPLY_FACTOR for rgb or grayscale*/
/*bottom*/
offset = (i + (by + bh - line)*im_w + bx)*FORMAT_MULTIPLY_FACTOR;
check_and_fix_offset(im_w,im_h,&offset);
img_in[offset] = 0xFF;
}
}
for (int i=0; i < bh; i++) {
/*draw two lines */
for (int line=0; line < 2; line++) {
/*left*/
offset = ((i + by)*im_w + bx + line)*FORMAT_MULTIPLY_FACTOR;
check_and_fix_offset(im_w,im_h,&offset);
img_in[offset] = 0xFF;
/*right*/
offset = ((i + by)*im_w + bx + bw - line)*FORMAT_MULTIPLY_FACTOR;
check_and_fix_offset(im_w,im_h,&offset);
img_in[offset] = 0xFF;
}
}
}
void arm::app::RunPostProcessing(uint8_t *img_in,TfLiteTensor* model_output[2],std::vector<arm::app::DetectionResult> & results_out)
{
TfLiteTensor* output[2] = {nullptr,nullptr};
int input_w = INPUT_IMAGE_WIDTH;
int input_h = INPUT_IMAGE_HEIGHT;
for(int anchor=0;anchor<2;anchor++)
{
output[anchor] = model_output[anchor];
}
/* init postprocessing */
int num_classes = 1;
int num_branch = 2;
int topN = 0;
branch* branchs = (branch*)calloc(num_branch, sizeof(branch));
/*NOTE: anchors are different for any given input model size, estimated during training phase */
float anchor1[] = {38, 77, 47, 97, 61, 126};
float anchor2[] = {14, 26, 19, 37, 28, 55 };
branchs[0] = create_brach(INPUT_IMAGE_WIDTH/32, 3, anchor1, output[0]->data.int8, output[0]->bytes, ((TfLiteAffineQuantization*)(output[0]->quantization.params))->scale->data[0], ((TfLiteAffineQuantization*)(output[0]->quantization.params))->zero_point->data[0]);
branchs[1] = create_brach(INPUT_IMAGE_WIDTH/16, 3, anchor2, output[1]->data.int8, output[1]->bytes, ((TfLiteAffineQuantization*)(output[1]->quantization.params))->scale->data[0],((TfLiteAffineQuantization*)(output[1]->quantization.params))->zero_point->data[0]);
network net = creat_network(input_w, input_h, num_classes, num_branch, branchs,topN);
/* end init */
/* start postprocessing */
int nboxes=0;
float thresh = .5;//50%
float nms = .45;
int orig_image_width = ORIGINAL_IMAGE_WIDTH;
int orig_image_height = ORIGINAL_IMAGE_HEIGHT;
std::forward_list<detection> dets = get_network_boxes(&net, orig_image_width, orig_image_height, thresh, &nboxes);
/* do nms */
CalcNMS(dets, net.num_classes, nms);
uint8_t temp_unsuppressed_counter = 0;
int j;
for (std::forward_list<detection>::iterator it=dets.begin(); it != dets.end(); ++it){
float xmin = it->bbox.x - it->bbox.w / 2.0f;
float xmax = it->bbox.x + it->bbox.w / 2.0f;
float ymin = it->bbox.y - it->bbox.h / 2.0f;
float ymax = it->bbox.y + it->bbox.h / 2.0f;
if (xmin < 0) xmin = 0;
if (ymin < 0) ymin = 0;
if (xmax > orig_image_width) xmax = orig_image_width;
if (ymax > orig_image_height) ymax = orig_image_height;
float bx = xmin;
float by = ymin;
float bw = xmax - xmin;
float bh = ymax - ymin;
for (j = 0; j < net.num_classes; ++j) {
if (it->prob[j] > 0) {
arm::app::DetectionResult tmp_result = {};
tmp_result.m_normalisedVal = it->prob[j];
tmp_result.m_x0=bx;
tmp_result.m_y0=by;
tmp_result.m_w=bw;
tmp_result.m_h=bh;
results_out.push_back(tmp_result);
DrawBoxOnImage(img_in,orig_image_width,orig_image_height,bx,by,bw,bh);
temp_unsuppressed_counter++;
}
}
}
free_dets(dets);
free(branchs);
}
void arm::app::RgbToGrayscale(const uint8_t *rgb,uint8_t *gray, int im_w,int im_h)
{
float R=0.299;
float G=0.587;
float B=0.114;
for (int i=0; i< im_w*im_h; i++ ) {
uint32_t int_gray = rgb[i*3 + 0]*R + rgb[i*3 + 1]*G+ rgb[i*3 + 2]*B;
/*clip if need */
if (int_gray <= UINT8_MAX) {
gray[i] = int_gray;
} else {
gray[i] = UINT8_MAX;
}
}
}