source/use_case/object_detection/src/DetectorPostProcessing.cc - ml/ethos-u/ml-embedded-evaluation-kit - Gitiles

 /*
  * Copyright (c) 2022 Arm Limited. All rights reserved.
  * SPDX-License-Identifier: Apache-2.0
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
  * You may obtain a copy of the License at
  *
  *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
 #include "DetectorPostProcessing.hpp"
 #include <algorithm>
 #include <cmath>
 #include <stdint.h>
 #include <forward_list>


 typedef struct boxabs {
     float left, right, top, bot;
 } boxabs;


 typedef struct branch {
     int resolution;
     int num_box;
     float *anchor;
     int8_t *tf_output;
     float scale;
     int zero_point;
     size_t size;
     float scale_x_y;
 } branch;

 typedef struct network {
     int input_w;
     int input_h;
     int num_classes;
     int num_branch;
     branch *branchs;
     int topN;
 } network;


 typedef struct box {
     float x, y, w, h;
 } box;

 typedef struct detection{
     box bbox;
     float *prob;
     float objectness;
 } detection;


 static int sort_class;

 static void free_dets(std::forward_list<detection> &dets){
     std::forward_list<detection>::iterator it;
     for ( it = dets.begin(); it != dets.end(); ++it ){
         free(it->prob);
     }
 }

 float sigmoid(float x)
 {
     return 1.f/(1.f + exp(-x));
 }

 static bool det_objectness_comparator(detection &pa, detection &pb)
 {
     return pa.objectness < pb.objectness;
 }

 static void insert_topN_det(std::forward_list<detection> &dets, detection det)
 {
     std::forward_list<detection>::iterator it;
     std::forward_list<detection>::iterator last_it;
     for ( it = dets.begin(); it != dets.end(); ++it ){
         if(it->objectness > det.objectness)
             break;
         last_it = it;
     }
     if(it != dets.begin()){
         dets.emplace_after(last_it, det);
         free(dets.begin()->prob);
         dets.pop_front();
     }
     else{
         free(det.prob);
     }
 }

 static std::forward_list<detection> get_network_boxes(network *net, int image_w, int image_h, float thresh, int *num)
 {
     std::forward_list<detection> dets;
     int i;
     int num_classes = net->num_classes;
     *num = 0;

     for (i = 0; i < net->num_branch; ++i) {
         int height  = net->branchs[i].resolution;
         int width = net->branchs[i].resolution;
         int channel  = net->branchs[i].num_box*(5+num_classes);

         for (int h = 0; h < net->branchs[i].resolution; h++) {
             for (int w = 0; w < net->branchs[i].resolution; w++) {
                 for (int anc = 0; anc < net->branchs[i].num_box; anc++) {

                     // objectness score
                     int bbox_obj_offset = h * width * channel + w * channel + anc * (num_classes + 5) + 4;
                     float objectness = sigmoid(((float)net->branchs[i].tf_output[bbox_obj_offset] - net->branchs[i].zero_point) * net->branchs[i].scale);

                     if(objectness > thresh){
                         detection det;
                         det.prob = (float*)calloc(num_classes, sizeof(float));
                         det.objectness = objectness;
                         //get bbox prediction data for each anchor, each feature point
                         int bbox_x_offset = bbox_obj_offset -4;
                         int bbox_y_offset = bbox_x_offset + 1;
                         int bbox_w_offset = bbox_x_offset + 2;
                         int bbox_h_offset = bbox_x_offset + 3;
                         int bbox_scores_offset = bbox_x_offset + 5;
                         //int bbox_scores_step = 1;
                         det.bbox.x = ((float)net->branchs[i].tf_output[bbox_x_offset] - net->branchs[i].zero_point) * net->branchs[i].scale;
                         det.bbox.y = ((float)net->branchs[i].tf_output[bbox_y_offset] - net->branchs[i].zero_point) * net->branchs[i].scale;
                         det.bbox.w = ((float)net->branchs[i].tf_output[bbox_w_offset] - net->branchs[i].zero_point) * net->branchs[i].scale;
                         det.bbox.h = ((float)net->branchs[i].tf_output[bbox_h_offset] - net->branchs[i].zero_point) * net->branchs[i].scale;


                         float bbox_x, bbox_y;

                         // Eliminate grid sensitivity trick involved in YOLOv4
                         bbox_x = sigmoid(det.bbox.x); //* net->branchs[i].scale_x_y - (net->branchs[i].scale_x_y - 1) / 2;
                         bbox_y = sigmoid(det.bbox.y); //* net->branchs[i].scale_x_y - (net->branchs[i].scale_x_y - 1) / 2;
                         det.bbox.x = (bbox_x + w) / width;
                         det.bbox.y = (bbox_y + h) / height;

                         det.bbox.w = exp(det.bbox.w) * net->branchs[i].anchor[anc*2] / net->input_w;
                         det.bbox.h = exp(det.bbox.h) * net->branchs[i].anchor[anc*2+1] / net->input_h;

                         for (int s = 0; s < num_classes; s++) {
                             det.prob[s] = sigmoid(((float)net->branchs[i].tf_output[bbox_scores_offset + s] - net->branchs[i].zero_point) * net->branchs[i].scale)*objectness;
                             det.prob[s] = (det.prob[s] > thresh) ? det.prob[s] : 0;
                         }

                         //correct_yolo_boxes
                         det.bbox.x *= image_w;
                         det.bbox.w *= image_w;
                         det.bbox.y *= image_h;
                         det.bbox.h *= image_h;

                         if (*num < net->topN || net->topN <=0){
                             dets.emplace_front(det);
                             *num += 1;
                         }
                         else if(*num ==  net->topN){
                             dets.sort(det_objectness_comparator);
                             insert_topN_det(dets,det);
                             *num += 1;
                         }else{
                             insert_topN_det(dets,det);
                         }
                     }
                 }
             }
         }
     }
     if(*num > net->topN)
         *num -=1;
     return dets;
 }

 // init part

 static branch create_brach(int resolution, int num_box, float *anchor, int8_t *tf_output, size_t size, float scale, int zero_point)
 {
     branch b;
     b.resolution = resolution;
     b.num_box = num_box;
     b.anchor = anchor;
     b.tf_output = tf_output;
     b.size = size;
     b.scale = scale;
     b.zero_point = zero_point;
     return b;
 }

 static network creat_network(int input_w, int input_h, int num_classes, int num_branch, branch* branchs, int topN)
 {
     network net;
     net.input_w = input_w;
     net.input_h = input_h;
     net.num_classes = num_classes;
     net.num_branch = num_branch;
     net.branchs = branchs;
     net.topN = topN;
     return net;
 }

 // NMS part

 static float Calc1DOverlap(float x1_center, float width1, float x2_center, float width2)
 {
     float left_1 = x1_center - width1/2;
     float left_2 = x2_center - width2/2;
     float leftest;
     if (left_1 > left_2) {
         leftest = left_1;
     } else {
         leftest = left_2;
     }

     float right_1 = x1_center + width1/2;
     float right_2 = x2_center + width2/2;
     float rightest;
     if (right_1 < right_2) {
         rightest = right_1;
     } else {
         rightest = right_2;
     }

     return rightest - leftest;
 }


 static float CalcBoxIntersect(box box1, box box2)
 {
     float width = Calc1DOverlap(box1.x, box1.w, box2.x, box2.w);
     if (width < 0) return 0;
     float height = Calc1DOverlap(box1.y, box1.h, box2.y, box2.h);
     if (height < 0) return 0;

     float total_area = width*height;
     return total_area;
 }


 static float CalcBoxUnion(box box1, box box2)
 {
     float boxes_intersection = CalcBoxIntersect(box1, box2);
     float boxes_union = box1.w*box1.h + box2.w*box2.h - boxes_intersection;
     return boxes_union;
 }


 static float CalcBoxIOU(box box1, box box2)
 {
     float boxes_intersection = CalcBoxIntersect(box1, box2);

     if (boxes_intersection == 0) return 0;

     float boxes_union = CalcBoxUnion(box1, box2);

     if (boxes_union == 0) return 0;

     return boxes_intersection / boxes_union;
 }


 static bool CompareProbs(detection &prob1, detection &prob2)
 {
     return prob1.prob[sort_class] > prob2.prob[sort_class];
 }


 static void CalcNMS(std::forward_list<detection> &detections, int classes, float iou_threshold)
 {
     int k;

     for (k = 0; k < classes; ++k) {
         sort_class = k;
         detections.sort(CompareProbs);

         for (std::forward_list<detection>::iterator it=detections.begin(); it != detections.end(); ++it){
             if (it->prob[k] == 0) continue;
             for (std::forward_list<detection>::iterator itc=std::next(it, 1); itc != detections.end(); ++itc){
                 if (itc->prob[k] == 0) continue;
                 if (CalcBoxIOU(it->bbox, itc->bbox) > iou_threshold) {
                     itc->prob[k] = 0;
                 }
             }
         }
     }
 }


 static void inline check_and_fix_offset(int im_w,int im_h,int *offset)
 {

     if (!offset) return;

     if ( (*offset) >= im_w*im_h*FORMAT_MULTIPLY_FACTOR)
         (*offset) = im_w*im_h*FORMAT_MULTIPLY_FACTOR -1;
     else if ( (*offset) < 0)
             *offset =0;

 }


 static void DrawBoxOnImage(uint8_t *img_in,int im_w,int im_h,int bx,int by,int bw,int bh)
 {

     if (!img_in) {
         return;
     }

     int offset=0;
     for (int i=0; i < bw; i++) {
         /*draw two lines */
         for (int line=0; line < 2; line++) {
             /*top*/
             offset =(i + (by + line)*im_w + bx)*FORMAT_MULTIPLY_FACTOR;
             check_and_fix_offset(im_w,im_h,&offset);
             img_in[offset] = 0xFF;  /* FORMAT_MULTIPLY_FACTOR for rgb or grayscale*/
             /*bottom*/
             offset = (i + (by + bh - line)*im_w + bx)*FORMAT_MULTIPLY_FACTOR;
             check_and_fix_offset(im_w,im_h,&offset);
             img_in[offset] = 0xFF;
         }
     }

     for (int i=0; i < bh; i++) {
         /*draw two lines */
         for (int line=0; line < 2; line++) {
             /*left*/
             offset = ((i + by)*im_w + bx + line)*FORMAT_MULTIPLY_FACTOR;
             check_and_fix_offset(im_w,im_h,&offset);
             img_in[offset] = 0xFF;
             /*right*/
             offset = ((i + by)*im_w + bx + bw - line)*FORMAT_MULTIPLY_FACTOR;
             check_and_fix_offset(im_w,im_h,&offset);
             img_in[offset] = 0xFF;
         }
     }

 }


 void arm::app::RunPostProcessing(uint8_t *img_in,TfLiteTensor* model_output[2],std::vector<arm::app::DetectionResult> & results_out)
 {

     TfLiteTensor* output[2] = {nullptr,nullptr};
     int input_w = INPUT_IMAGE_WIDTH;
     int input_h = INPUT_IMAGE_HEIGHT;

     for(int anchor=0;anchor<2;anchor++)
     {
          output[anchor] = model_output[anchor];
     }

     /* init postprocessing 	 */
     int num_classes = 1;
     int num_branch = 2;
     int topN = 0;

     branch* branchs = (branch*)calloc(num_branch, sizeof(branch));

     /*NOTE: anchors are different for any given input model size, estimated during training phase */
     float anchor1[] = {38, 77, 47, 97, 61, 126};
     float anchor2[] = {14, 26, 19, 37, 28, 55 };


     branchs[0] = create_brach(INPUT_IMAGE_WIDTH/32, 3, anchor1, output[0]->data.int8, output[0]->bytes, ((TfLiteAffineQuantization*)(output[0]->quantization.params))->scale->data[0], ((TfLiteAffineQuantization*)(output[0]->quantization.params))->zero_point->data[0]);

     branchs[1] = create_brach(INPUT_IMAGE_WIDTH/16, 3, anchor2, output[1]->data.int8, output[1]->bytes, ((TfLiteAffineQuantization*)(output[1]->quantization.params))->scale->data[0],((TfLiteAffineQuantization*)(output[1]->quantization.params))->zero_point->data[0]);

     network net = creat_network(input_w, input_h, num_classes, num_branch, branchs,topN);
     /* end init */

     /* start postprocessing */
     int nboxes=0;
     float thresh = .5;//50%
     float nms = .45;
     int orig_image_width = ORIGINAL_IMAGE_WIDTH;
     int orig_image_height = ORIGINAL_IMAGE_HEIGHT;
     std::forward_list<detection> dets = get_network_boxes(&net, orig_image_width, orig_image_height, thresh, &nboxes);
     /* do nms */
     CalcNMS(dets, net.num_classes, nms);
     uint8_t temp_unsuppressed_counter = 0;
     int j;
     for (std::forward_list<detection>::iterator it=dets.begin(); it != dets.end(); ++it){
         float xmin = it->bbox.x - it->bbox.w / 2.0f;
         float xmax = it->bbox.x + it->bbox.w / 2.0f;
         float ymin = it->bbox.y - it->bbox.h / 2.0f;
         float ymax = it->bbox.y + it->bbox.h / 2.0f;

         if (xmin < 0) xmin = 0;
         if (ymin < 0) ymin = 0;
         if (xmax > orig_image_width) xmax = orig_image_width;
         if (ymax > orig_image_height) ymax = orig_image_height;

         float bx = xmin;
         float by = ymin;
         float bw = xmax - xmin;
         float bh = ymax - ymin;

         for (j = 0; j <  net.num_classes; ++j) {
             if (it->prob[j] > 0) {

                 arm::app::DetectionResult tmp_result = {};

                 tmp_result.m_normalisedVal = it->prob[j];
                 tmp_result.m_x0=bx;
                 tmp_result.m_y0=by;
                 tmp_result.m_w=bw;
                 tmp_result.m_h=bh;

                 results_out.push_back(tmp_result);

                 DrawBoxOnImage(img_in,orig_image_width,orig_image_height,bx,by,bw,bh);

                 temp_unsuppressed_counter++;
             }
         }
     }

     free_dets(dets);
     free(branchs);

 }

 void arm::app::RgbToGrayscale(const uint8_t *rgb,uint8_t *gray, int im_w,int im_h)
 {
     float R=0.299;
     float G=0.587;
     float B=0.114;
     for (int i=0; i< im_w*im_h; i++ ) {

         uint32_t  int_gray = rgb[i*3 + 0]*R + rgb[i*3 + 1]*G+ rgb[i*3 + 2]*B;
         /*clip if need */
         if (int_gray <= UINT8_MAX) {
             gray[i] =  int_gray;
         } else {
             gray[i] = UINT8_MAX;
         }

     }

 }
	/*
	* Copyright (c) 2022 Arm Limited. All rights reserved.
	* SPDX-License-Identifier: Apache-2.0
	*
	* Licensed under the Apache License, Version 2.0 (the "License");
	* you may not use this file except in compliance with the License.
	* You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/
	#include "DetectorPostProcessing.hpp"
	#include <algorithm>
	#include <cmath>
	#include <stdint.h>
	#include <forward_list>


	typedef struct boxabs {
	float left, right, top, bot;
	} boxabs;


	typedef struct branch {
	int resolution;
	int num_box;
	float *anchor;
	int8_t *tf_output;
	float scale;
	int zero_point;
	size_t size;
	float scale_x_y;
	} branch;

	typedef struct network {
	int input_w;
	int input_h;
	int num_classes;
	int num_branch;
	branch *branchs;
	int topN;
	} network;


	typedef struct box {
	float x, y, w, h;
	} box;

	typedef struct detection{
	box bbox;
	float *prob;
	float objectness;
	} detection;



	static int sort_class;

	static void free_dets(std::forward_list<detection> &dets){
	std::forward_list<detection>::iterator it;
	for ( it = dets.begin(); it != dets.end(); ++it ){
	free(it->prob);
	}
	}

	float sigmoid(float x)
	{
	return 1.f/(1.f + exp(-x));
	}

	static bool det_objectness_comparator(detection &pa, detection &pb)
	{
	return pa.objectness < pb.objectness;
	}

	static void insert_topN_det(std::forward_list<detection> &dets, detection det)
	{
	std::forward_list<detection>::iterator it;
	std::forward_list<detection>::iterator last_it;
	for ( it = dets.begin(); it != dets.end(); ++it ){
	if(it->objectness > det.objectness)
	break;
	last_it = it;
	}
	if(it != dets.begin()){
	dets.emplace_after(last_it, det);
	free(dets.begin()->prob);
	dets.pop_front();
	}
	else{
	free(det.prob);
	}
	}

	static std::forward_list<detection> get_network_boxes(network net, int image_w, int image_h, float thresh, int num)
	{
	std::forward_list<detection> dets;
	int i;
	int num_classes = net->num_classes;
	*num = 0;

	for (i = 0; i < net->num_branch; ++i) {
	int height = net->branchs[i].resolution;
	int width = net->branchs[i].resolution;
	int channel = net->branchs[i].num_box*(5+num_classes);

	for (int h = 0; h < net->branchs[i].resolution; h++) {
	for (int w = 0; w < net->branchs[i].resolution; w++) {
	for (int anc = 0; anc < net->branchs[i].num_box; anc++) {

	// objectness score
	int bbox_obj_offset = h * width * channel + w * channel + anc * (num_classes + 5) + 4;
	float objectness = sigmoid(((float)net->branchs[i].tf_output[bbox_obj_offset] - net->branchs[i].zero_point) * net->branchs[i].scale);

	if(objectness > thresh){
	detection det;
	det.prob = (float*)calloc(num_classes, sizeof(float));
	det.objectness = objectness;
	//get bbox prediction data for each anchor, each feature point
	int bbox_x_offset = bbox_obj_offset -4;
	int bbox_y_offset = bbox_x_offset + 1;
	int bbox_w_offset = bbox_x_offset + 2;
	int bbox_h_offset = bbox_x_offset + 3;
	int bbox_scores_offset = bbox_x_offset + 5;
	//int bbox_scores_step = 1;
	det.bbox.x = ((float)net->branchs[i].tf_output[bbox_x_offset] - net->branchs[i].zero_point) * net->branchs[i].scale;
	det.bbox.y = ((float)net->branchs[i].tf_output[bbox_y_offset] - net->branchs[i].zero_point) * net->branchs[i].scale;
	det.bbox.w = ((float)net->branchs[i].tf_output[bbox_w_offset] - net->branchs[i].zero_point) * net->branchs[i].scale;
	det.bbox.h = ((float)net->branchs[i].tf_output[bbox_h_offset] - net->branchs[i].zero_point) * net->branchs[i].scale;


	float bbox_x, bbox_y;

	// Eliminate grid sensitivity trick involved in YOLOv4
	bbox_x = sigmoid(det.bbox.x); //* net->branchs[i].scale_x_y - (net->branchs[i].scale_x_y - 1) / 2;
	bbox_y = sigmoid(det.bbox.y); //* net->branchs[i].scale_x_y - (net->branchs[i].scale_x_y - 1) / 2;
	det.bbox.x = (bbox_x + w) / width;
	det.bbox.y = (bbox_y + h) / height;

	det.bbox.w = exp(det.bbox.w) * net->branchs[i].anchor[anc*2] / net->input_w;
	det.bbox.h = exp(det.bbox.h) * net->branchs[i].anchor[anc*2+1] / net->input_h;

	for (int s = 0; s < num_classes; s++) {
	det.prob[s] = sigmoid(((float)net->branchs[i].tf_output[bbox_scores_offset + s] - net->branchs[i].zero_point) * net->branchs[i].scale)*objectness;
	det.prob[s] = (det.prob[s] > thresh) ? det.prob[s] : 0;
	}

	//correct_yolo_boxes
	det.bbox.x *= image_w;
	det.bbox.w *= image_w;
	det.bbox.y *= image_h;
	det.bbox.h *= image_h;

	if (*num < net->topN \|\| net->topN <=0){
	dets.emplace_front(det);
	*num += 1;
	}
	else if(*num == net->topN){
	dets.sort(det_objectness_comparator);
	insert_topN_det(dets,det);
	*num += 1;
	}else{
	insert_topN_det(dets,det);
	}
	}
	}
	}
	}
	}
	if(*num > net->topN)
	*num -=1;
	return dets;
	}

	// init part

	static branch create_brach(int resolution, int num_box, float anchor, int8_t tf_output, size_t size, float scale, int zero_point)
	{
	branch b;
	b.resolution = resolution;
	b.num_box = num_box;
	b.anchor = anchor;
	b.tf_output = tf_output;
	b.size = size;
	b.scale = scale;
	b.zero_point = zero_point;
	return b;
	}

	static network creat_network(int input_w, int input_h, int num_classes, int num_branch, branch* branchs, int topN)
	{
	network net;
	net.input_w = input_w;
	net.input_h = input_h;
	net.num_classes = num_classes;
	net.num_branch = num_branch;
	net.branchs = branchs;
	net.topN = topN;
	return net;
	}

	// NMS part

	static float Calc1DOverlap(float x1_center, float width1, float x2_center, float width2)
	{
	float left_1 = x1_center - width1/2;
	float left_2 = x2_center - width2/2;
	float leftest;
	if (left_1 > left_2) {
	leftest = left_1;
	} else {
	leftest = left_2;
	}

	float right_1 = x1_center + width1/2;
	float right_2 = x2_center + width2/2;
	float rightest;
	if (right_1 < right_2) {
	rightest = right_1;
	} else {
	rightest = right_2;
	}

	return rightest - leftest;
	}


	static float CalcBoxIntersect(box box1, box box2)
	{
	float width = Calc1DOverlap(box1.x, box1.w, box2.x, box2.w);
	if (width < 0) return 0;
	float height = Calc1DOverlap(box1.y, box1.h, box2.y, box2.h);
	if (height < 0) return 0;

	float total_area = width*height;
	return total_area;
	}


	static float CalcBoxUnion(box box1, box box2)
	{
	float boxes_intersection = CalcBoxIntersect(box1, box2);
	float boxes_union = box1.wbox1.h + box2.wbox2.h - boxes_intersection;
	return boxes_union;
	}


	static float CalcBoxIOU(box box1, box box2)
	{
	float boxes_intersection = CalcBoxIntersect(box1, box2);

	if (boxes_intersection == 0) return 0;

	float boxes_union = CalcBoxUnion(box1, box2);

	if (boxes_union == 0) return 0;

	return boxes_intersection / boxes_union;
	}


	static bool CompareProbs(detection &prob1, detection &prob2)
	{
	return prob1.prob[sort_class] > prob2.prob[sort_class];
	}


	static void CalcNMS(std::forward_list<detection> &detections, int classes, float iou_threshold)
	{
	int k;

	for (k = 0; k < classes; ++k) {
	sort_class = k;
	detections.sort(CompareProbs);

	for (std::forward_list<detection>::iterator it=detections.begin(); it != detections.end(); ++it){
	if (it->prob[k] == 0) continue;
	for (std::forward_list<detection>::iterator itc=std::next(it, 1); itc != detections.end(); ++itc){
	if (itc->prob[k] == 0) continue;
	if (CalcBoxIOU(it->bbox, itc->bbox) > iou_threshold) {
	itc->prob[k] = 0;
	}
	}
	}
	}
	}


	static void inline check_and_fix_offset(int im_w,int im_h,int *offset)
	{

	if (!offset) return;

	if ( (offset) >= im_wim_h*FORMAT_MULTIPLY_FACTOR)
	(offset) = im_wim_h*FORMAT_MULTIPLY_FACTOR -1;
	else if ( (*offset) < 0)
	*offset =0;

	}


	static void DrawBoxOnImage(uint8_t *img_in,int im_w,int im_h,int bx,int by,int bw,int bh)
	{

	if (!img_in) {
	return;
	}

	int offset=0;
	for (int i=0; i < bw; i++) {
	/draw two lines /
	for (int line=0; line < 2; line++) {
	/top/
	offset =(i + (by + line)im_w + bx)FORMAT_MULTIPLY_FACTOR;
	check_and_fix_offset(im_w,im_h,&offset);
	img_in[offset] = 0xFF; /* FORMAT_MULTIPLY_FACTOR for rgb or grayscale*/
	/bottom/
	offset = (i + (by + bh - line)im_w + bx)FORMAT_MULTIPLY_FACTOR;
	check_and_fix_offset(im_w,im_h,&offset);
	img_in[offset] = 0xFF;
	}
	}

	for (int i=0; i < bh; i++) {
	/draw two lines /
	for (int line=0; line < 2; line++) {
	/left/
	offset = ((i + by)im_w + bx + line)FORMAT_MULTIPLY_FACTOR;
	check_and_fix_offset(im_w,im_h,&offset);
	img_in[offset] = 0xFF;
	/right/
	offset = ((i + by)im_w + bx + bw - line)FORMAT_MULTIPLY_FACTOR;
	check_and_fix_offset(im_w,im_h,&offset);
	img_in[offset] = 0xFF;
	}
	}

	}


	void arm::app::RunPostProcessing(uint8_t img_in,TfLiteTensor model_output[2],std::vector<arm::app::DetectionResult> & results_out)
	{

	TfLiteTensor* output[2] = {nullptr,nullptr};
	int input_w = INPUT_IMAGE_WIDTH;
	int input_h = INPUT_IMAGE_HEIGHT;

	for(int anchor=0;anchor<2;anchor++)
	{
	output[anchor] = model_output[anchor];
	}

	/* init postprocessing */
	int num_classes = 1;
	int num_branch = 2;
	int topN = 0;

	branch* branchs = (branch*)calloc(num_branch, sizeof(branch));

	/NOTE: anchors are different for any given input model size, estimated during training phase /
	float anchor1[] = {38, 77, 47, 97, 61, 126};
	float anchor2[] = {14, 26, 19, 37, 28, 55 };


	branchs[0] = create_brach(INPUT_IMAGE_WIDTH/32, 3, anchor1, output[0]->data.int8, output[0]->bytes, ((TfLiteAffineQuantization)(output[0]->quantization.params))->scale->data[0], ((TfLiteAffineQuantization)(output[0]->quantization.params))->zero_point->data[0]);

	branchs[1] = create_brach(INPUT_IMAGE_WIDTH/16, 3, anchor2, output[1]->data.int8, output[1]->bytes, ((TfLiteAffineQuantization)(output[1]->quantization.params))->scale->data[0],((TfLiteAffineQuantization)(output[1]->quantization.params))->zero_point->data[0]);

	network net = creat_network(input_w, input_h, num_classes, num_branch, branchs,topN);
	/* end init */

	/* start postprocessing */
	int nboxes=0;
	float thresh = .5;//50%
	float nms = .45;
	int orig_image_width = ORIGINAL_IMAGE_WIDTH;
	int orig_image_height = ORIGINAL_IMAGE_HEIGHT;
	std::forward_list<detection> dets = get_network_boxes(&net, orig_image_width, orig_image_height, thresh, &nboxes);
	/* do nms */
	CalcNMS(dets, net.num_classes, nms);
	uint8_t temp_unsuppressed_counter = 0;
	int j;
	for (std::forward_list<detection>::iterator it=dets.begin(); it != dets.end(); ++it){
	float xmin = it->bbox.x - it->bbox.w / 2.0f;
	float xmax = it->bbox.x + it->bbox.w / 2.0f;
	float ymin = it->bbox.y - it->bbox.h / 2.0f;
	float ymax = it->bbox.y + it->bbox.h / 2.0f;

	if (xmin < 0) xmin = 0;
	if (ymin < 0) ymin = 0;
	if (xmax > orig_image_width) xmax = orig_image_width;
	if (ymax > orig_image_height) ymax = orig_image_height;

	float bx = xmin;
	float by = ymin;
	float bw = xmax - xmin;
	float bh = ymax - ymin;

	for (j = 0; j < net.num_classes; ++j) {
	if (it->prob[j] > 0) {

	arm::app::DetectionResult tmp_result = {};

	tmp_result.m_normalisedVal = it->prob[j];
	tmp_result.m_x0=bx;
	tmp_result.m_y0=by;
	tmp_result.m_w=bw;
	tmp_result.m_h=bh;

	results_out.push_back(tmp_result);

	DrawBoxOnImage(img_in,orig_image_width,orig_image_height,bx,by,bw,bh);

	temp_unsuppressed_counter++;
	}
	}
	}

	free_dets(dets);
	free(branchs);

	}

	void arm::app::RgbToGrayscale(const uint8_t rgb,uint8_t gray, int im_w,int im_h)
	{
	float R=0.299;
	float G=0.587;
	float B=0.114;
	for (int i=0; i< im_w*im_h; i++ ) {

	uint32_t int_gray = rgb[i3 + 0]R + rgb[i3 + 1]G+ rgb[i3 + 2]B;
	/clip if need /
	if (int_gray <= UINT8_MAX) {
	gray[i] = int_gray;
	} else {
	gray[i] = UINT8_MAX;
	}

	}

	}