Face detection demo from Emza Visual Sense
Signed-off-by: Michael Levit michaell@emza-vs.com

Change-Id: I7958b05b5dbe9a785e0f8a241b716c17a9ca976f
diff --git a/source/use_case/object_detection/src/DetectorPostProcessing.cc b/source/use_case/object_detection/src/DetectorPostProcessing.cc
new file mode 100755
index 0000000..e781b62
--- /dev/null
+++ b/source/use_case/object_detection/src/DetectorPostProcessing.cc
@@ -0,0 +1,447 @@
+/*

+ * Copyright (c) 2022 Arm Limited. All rights reserved.

+ * SPDX-License-Identifier: Apache-2.0

+ *

+ * Licensed under the Apache License, Version 2.0 (the "License");

+ * you may not use this file except in compliance with the License.

+ * You may obtain a copy of the License at

+ *

+ *     http://www.apache.org/licenses/LICENSE-2.0

+ *

+ * Unless required by applicable law or agreed to in writing, software

+ * distributed under the License is distributed on an "AS IS" BASIS,

+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

+ * See the License for the specific language governing permissions and

+ * limitations under the License.

+ */

+#include "DetectorPostProcessing.hpp"

+#include <algorithm>

+#include <cmath>

+#include <stdint.h>

+#include <forward_list>

+

+

+typedef struct boxabs {

+    float left, right, top, bot;

+} boxabs;

+

+

+typedef struct branch {

+    int resolution;

+    int num_box;

+    float *anchor;

+    int8_t *tf_output;

+    float scale;

+    int zero_point;

+    size_t size;

+    float scale_x_y;

+} branch;

+

+typedef struct network {

+    int input_w;

+    int input_h;

+    int num_classes;

+    int num_branch;

+    branch *branchs;

+    int topN;

+} network;

+

+

+typedef struct box {

+    float x, y, w, h;

+} box;

+

+typedef struct detection{

+    box bbox;

+    float *prob;

+    float objectness;

+} detection;

+

+

+

+static int sort_class;

+

+static void free_dets(std::forward_list<detection> &dets){

+    std::forward_list<detection>::iterator it;

+    for ( it = dets.begin(); it != dets.end(); ++it ){

+        free(it->prob);

+    }

+}

+

+float sigmoid(float x)

+{

+    return 1.f/(1.f + exp(-x));

+} 

+

+static bool det_objectness_comparator(detection &pa, detection &pb)

+{

+    return pa.objectness < pb.objectness;

+}

+

+static void insert_topN_det(std::forward_list<detection> &dets, detection det)

+{

+    std::forward_list<detection>::iterator it;

+    std::forward_list<detection>::iterator last_it;

+    for ( it = dets.begin(); it != dets.end(); ++it ){

+        if(it->objectness > det.objectness)

+            break;

+        last_it = it;

+    }

+    if(it != dets.begin()){

+        dets.emplace_after(last_it, det);

+        free(dets.begin()->prob);

+        dets.pop_front();

+    }

+    else{

+        free(det.prob);

+    }

+}

+

+static std::forward_list<detection> get_network_boxes(network *net, int image_w, int image_h, float thresh, int *num)

+{

+    std::forward_list<detection> dets;

+    int i;

+    int num_classes = net->num_classes;

+    *num = 0;

+

+    for (i = 0; i < net->num_branch; ++i) {

+        int height  = net->branchs[i].resolution;

+        int width = net->branchs[i].resolution;

+        int channel  = net->branchs[i].num_box*(5+num_classes);

+

+        for (int h = 0; h < net->branchs[i].resolution; h++) {

+            for (int w = 0; w < net->branchs[i].resolution; w++) {

+                for (int anc = 0; anc < net->branchs[i].num_box; anc++) {

+                    

+                    // objectness score

+                    int bbox_obj_offset = h * width * channel + w * channel + anc * (num_classes + 5) + 4;

+                    float objectness = sigmoid(((float)net->branchs[i].tf_output[bbox_obj_offset] - net->branchs[i].zero_point) * net->branchs[i].scale);

+

+                    if(objectness > thresh){

+                        detection det;

+                        det.prob = (float*)calloc(num_classes, sizeof(float));

+                        det.objectness = objectness;

+                        //get bbox prediction data for each anchor, each feature point

+                        int bbox_x_offset = bbox_obj_offset -4;

+                        int bbox_y_offset = bbox_x_offset + 1;

+                        int bbox_w_offset = bbox_x_offset + 2;

+                        int bbox_h_offset = bbox_x_offset + 3;

+                        int bbox_scores_offset = bbox_x_offset + 5;

+                        //int bbox_scores_step = 1;

+                        det.bbox.x = ((float)net->branchs[i].tf_output[bbox_x_offset] - net->branchs[i].zero_point) * net->branchs[i].scale;

+                        det.bbox.y = ((float)net->branchs[i].tf_output[bbox_y_offset] - net->branchs[i].zero_point) * net->branchs[i].scale;

+                        det.bbox.w = ((float)net->branchs[i].tf_output[bbox_w_offset] - net->branchs[i].zero_point) * net->branchs[i].scale;

+                        det.bbox.h = ((float)net->branchs[i].tf_output[bbox_h_offset] - net->branchs[i].zero_point) * net->branchs[i].scale;

+                        

+

+                        float bbox_x, bbox_y;

+

+                        // Eliminate grid sensitivity trick involved in YOLOv4

+                        bbox_x = sigmoid(det.bbox.x); //* net->branchs[i].scale_x_y - (net->branchs[i].scale_x_y - 1) / 2;

+                        bbox_y = sigmoid(det.bbox.y); //* net->branchs[i].scale_x_y - (net->branchs[i].scale_x_y - 1) / 2;

+                        det.bbox.x = (bbox_x + w) / width;

+                        det.bbox.y = (bbox_y + h) / height;

+

+                        det.bbox.w = exp(det.bbox.w) * net->branchs[i].anchor[anc*2] / net->input_w;

+                        det.bbox.h = exp(det.bbox.h) * net->branchs[i].anchor[anc*2+1] / net->input_h;

+                        

+                        for (int s = 0; s < num_classes; s++) {

+                            det.prob[s] = sigmoid(((float)net->branchs[i].tf_output[bbox_scores_offset + s] - net->branchs[i].zero_point) * net->branchs[i].scale)*objectness;

+                            det.prob[s] = (det.prob[s] > thresh) ? det.prob[s] : 0;

+                        }

+

+                        //correct_yolo_boxes 

+                        det.bbox.x *= image_w;

+                        det.bbox.w *= image_w;

+                        det.bbox.y *= image_h;

+                        det.bbox.h *= image_h;

+

+                        if (*num < net->topN || net->topN <=0){

+                            dets.emplace_front(det);

+                            *num += 1;

+                        }

+                        else if(*num ==  net->topN){

+                            dets.sort(det_objectness_comparator);

+                            insert_topN_det(dets,det);

+                            *num += 1;

+                        }else{

+                            insert_topN_det(dets,det);

+                        }

+                    }

+                }

+            }

+        }

+    }

+    if(*num > net->topN)

+        *num -=1;

+    return dets;

+}

+

+// init part

+

+static branch create_brach(int resolution, int num_box, float *anchor, int8_t *tf_output, size_t size, float scale, int zero_point)

+{

+    branch b;

+    b.resolution = resolution;

+    b.num_box = num_box;

+    b.anchor = anchor;

+    b.tf_output = tf_output;

+    b.size = size;

+    b.scale = scale;

+    b.zero_point = zero_point;

+    return b;

+}

+

+static network creat_network(int input_w, int input_h, int num_classes, int num_branch, branch* branchs, int topN)

+{

+    network net;

+    net.input_w = input_w;

+    net.input_h = input_h;

+    net.num_classes = num_classes;

+    net.num_branch = num_branch;

+    net.branchs = branchs;

+    net.topN = topN;

+    return net;

+}

+

+// NMS part

+

+static float Calc1DOverlap(float x1_center, float width1, float x2_center, float width2)

+{

+    float left_1 = x1_center - width1/2;

+    float left_2 = x2_center - width2/2;

+    float leftest;

+    if (left_1 > left_2) {

+        leftest = left_1;

+    } else {

+        leftest = left_2;    

+    }

+        

+    float right_1 = x1_center + width1/2;

+    float right_2 = x2_center + width2/2;

+    float rightest;

+    if (right_1 < right_2) {

+        rightest = right_1;

+    } else {

+        rightest = right_2;    

+    }

+        

+    return rightest - leftest;

+}

+

+

+static float CalcBoxIntersect(box box1, box box2)

+{

+    float width = Calc1DOverlap(box1.x, box1.w, box2.x, box2.w);

+    if (width < 0) return 0;

+    float height = Calc1DOverlap(box1.y, box1.h, box2.y, box2.h);

+    if (height < 0) return 0;

+    

+    float total_area = width*height;

+    return total_area;

+}

+

+

+static float CalcBoxUnion(box box1, box box2)

+{

+    float boxes_intersection = CalcBoxIntersect(box1, box2);

+    float boxes_union = box1.w*box1.h + box2.w*box2.h - boxes_intersection;

+    return boxes_union;

+}

+

+

+static float CalcBoxIOU(box box1, box box2)

+{

+    float boxes_intersection = CalcBoxIntersect(box1, box2); 

+    

+    if (boxes_intersection == 0) return 0;    

+    

+    float boxes_union = CalcBoxUnion(box1, box2);

+

+    if (boxes_union == 0) return 0;    

+    

+    return boxes_intersection / boxes_union;

+}

+

+

+static bool CompareProbs(detection &prob1, detection &prob2)

+{

+    return prob1.prob[sort_class] > prob2.prob[sort_class];

+}

+

+

+static void CalcNMS(std::forward_list<detection> &detections, int classes, float iou_threshold)

+{

+    int k;

+    

+    for (k = 0; k < classes; ++k) {

+        sort_class = k;

+        detections.sort(CompareProbs);

+        

+        for (std::forward_list<detection>::iterator it=detections.begin(); it != detections.end(); ++it){

+            if (it->prob[k] == 0) continue;

+            for (std::forward_list<detection>::iterator itc=std::next(it, 1); itc != detections.end(); ++itc){

+                if (itc->prob[k] == 0) continue;

+                if (CalcBoxIOU(it->bbox, itc->bbox) > iou_threshold) {

+                    itc->prob[k] = 0;

+                }

+            }

+        }

+    }

+}

+

+

+static void inline check_and_fix_offset(int im_w,int im_h,int *offset) 

+{

+    

+    if (!offset) return;    

+    

+    if ( (*offset) >= im_w*im_h*FORMAT_MULTIPLY_FACTOR)

+        (*offset) = im_w*im_h*FORMAT_MULTIPLY_FACTOR -1;

+    else if ( (*offset) < 0)

+            *offset =0;    

+    

+}

+

+

+static void DrawBoxOnImage(uint8_t *img_in,int im_w,int im_h,int bx,int by,int bw,int bh) 

+{

+    

+    if (!img_in) {

+        return;

+    }

+    

+    int offset=0;

+    for (int i=0; i < bw; i++) {        

+        /*draw two lines */

+        for (int line=0; line < 2; line++) {

+            /*top*/

+            offset =(i + (by + line)*im_w + bx)*FORMAT_MULTIPLY_FACTOR;

+            check_and_fix_offset(im_w,im_h,&offset);

+            img_in[offset] = 0xFF;  /* FORMAT_MULTIPLY_FACTOR for rgb or grayscale*/

+            /*bottom*/

+            offset = (i + (by + bh - line)*im_w + bx)*FORMAT_MULTIPLY_FACTOR;

+            check_and_fix_offset(im_w,im_h,&offset);

+            img_in[offset] = 0xFF;    

+        }                

+    }

+    

+    for (int i=0; i < bh; i++) {

+        /*draw two lines */

+        for (int line=0; line < 2; line++) {

+            /*left*/

+            offset = ((i + by)*im_w + bx + line)*FORMAT_MULTIPLY_FACTOR;

+            check_and_fix_offset(im_w,im_h,&offset);            

+            img_in[offset] = 0xFF;

+            /*right*/

+            offset = ((i + by)*im_w + bx + bw - line)*FORMAT_MULTIPLY_FACTOR;

+            check_and_fix_offset(im_w,im_h,&offset);            

+            img_in[offset] = 0xFF;    

+        }

+    }

+

+}

+

+

+void arm::app::RunPostProcessing(uint8_t *img_in,TfLiteTensor* model_output[2],std::vector<arm::app::DetectionResult> & results_out)

+{

+       

+    TfLiteTensor* output[2] = {nullptr,nullptr};

+    int input_w = INPUT_IMAGE_WIDTH;

+    int input_h = INPUT_IMAGE_HEIGHT;

+  

+    for(int anchor=0;anchor<2;anchor++)

+    {

+         output[anchor] = model_output[anchor];

+    }

+

+    /* init postprocessing 	 */

+    int num_classes = 1;

+    int num_branch = 2;

+    int topN = 0;

+

+    branch* branchs = (branch*)calloc(num_branch, sizeof(branch));

+

+    /*NOTE: anchors are different for any given input model size, estimated during training phase */

+    float anchor1[] = {38, 77, 47, 97, 61, 126};

+    float anchor2[] = {14, 26, 19, 37, 28, 55 };

+

+

+    branchs[0] = create_brach(INPUT_IMAGE_WIDTH/32, 3, anchor1, output[0]->data.int8, output[0]->bytes, ((TfLiteAffineQuantization*)(output[0]->quantization.params))->scale->data[0], ((TfLiteAffineQuantization*)(output[0]->quantization.params))->zero_point->data[0]);

+

+    branchs[1] = create_brach(INPUT_IMAGE_WIDTH/16, 3, anchor2, output[1]->data.int8, output[1]->bytes, ((TfLiteAffineQuantization*)(output[1]->quantization.params))->scale->data[0],((TfLiteAffineQuantization*)(output[1]->quantization.params))->zero_point->data[0]);

+

+    network net = creat_network(input_w, input_h, num_classes, num_branch, branchs,topN);

+    /* end init */

+

+    /* start postprocessing */

+    int nboxes=0;

+    float thresh = .5;//50%

+    float nms = .45;

+    int orig_image_width = ORIGINAL_IMAGE_WIDTH;

+    int orig_image_height = ORIGINAL_IMAGE_HEIGHT;

+    std::forward_list<detection> dets = get_network_boxes(&net, orig_image_width, orig_image_height, thresh, &nboxes);

+    /* do nms */

+    CalcNMS(dets, net.num_classes, nms);

+    uint8_t temp_unsuppressed_counter = 0;

+    int j;

+    for (std::forward_list<detection>::iterator it=dets.begin(); it != dets.end(); ++it){

+        float xmin = it->bbox.x - it->bbox.w / 2.0f;

+        float xmax = it->bbox.x + it->bbox.w / 2.0f;

+        float ymin = it->bbox.y - it->bbox.h / 2.0f;

+        float ymax = it->bbox.y + it->bbox.h / 2.0f;

+

+        if (xmin < 0) xmin = 0;

+        if (ymin < 0) ymin = 0;

+        if (xmax > orig_image_width) xmax = orig_image_width;

+        if (ymax > orig_image_height) ymax = orig_image_height;

+

+        float bx = xmin;

+        float by = ymin;

+        float bw = xmax - xmin;

+        float bh = ymax - ymin;

+

+        for (j = 0; j <  net.num_classes; ++j) {

+            if (it->prob[j] > 0) {

+

+                arm::app::DetectionResult tmp_result = {};

+                

+                tmp_result.m_normalisedVal = it->prob[j];

+                tmp_result.m_x0=bx;

+                tmp_result.m_y0=by;

+                tmp_result.m_w=bw;

+                tmp_result.m_h=bh;

+                

+                results_out.push_back(tmp_result);

+

+                DrawBoxOnImage(img_in,orig_image_width,orig_image_height,bx,by,bw,bh);

+                

+                temp_unsuppressed_counter++;

+            }

+        }

+    }

+

+    free_dets(dets);

+    free(branchs);

+

+}

+

+void arm::app::RgbToGrayscale(const uint8_t *rgb,uint8_t *gray, int im_w,int im_h) 

+{

+    float R=0.299;

+    float G=0.587; 

+    float B=0.114; 

+    for (int i=0; i< im_w*im_h; i++ ) {

+

+        uint32_t  int_gray = rgb[i*3 + 0]*R + rgb[i*3 + 1]*G+ rgb[i*3 + 2]*B;

+        /*clip if need */

+        if (int_gray <= UINT8_MAX) {

+            gray[i] =  int_gray;

+        } else {

+            gray[i] = UINT8_MAX;

+        }

+

+    }

+

+}

+