COMPMID-1451: (3RDPARTY_UPDATE) Fixes for GenerateProposals graph node and BoxWithNMSLimitKernel

COMPMID-1792: Accuracy issue in CLGenerateProposals

This patch does the following:
- Some fixes for GenerateProposals function and tests
- Adapting BoxWithNMSLimitKernel to only accept U32 tensors as keeps_size
- Update 3rdparty
- Adds a small tolerance for a GenerateProposals test

Change-Id: Ia8ec1cdfe941fe05003645e86deb9ea6a6044d74
diff --git a/src/core/CL/kernels/CLROIAlignLayerKernel.cpp b/src/core/CL/kernels/CLROIAlignLayerKernel.cpp
index 2d2ac07..325eeb2 100644
--- a/src/core/CL/kernels/CLROIAlignLayerKernel.cpp
+++ b/src/core/CL/kernels/CLROIAlignLayerKernel.cpp
@@ -47,11 +47,13 @@
     ARM_COMPUTE_RETURN_ERROR_ON(rois->num_dimensions() > 2);
     ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(input);
     ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F32, DataType::F16);
+    ARM_COMPUTE_RETURN_ERROR_ON_DATA_LAYOUT_NOT_IN(input, DataLayout::NCHW);
     ARM_COMPUTE_RETURN_ERROR_ON((pool_info.pooled_width() == 0) || (pool_info.pooled_height() == 0));
 
     if(output->total_size() != 0)
     {
         ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
+        ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_LAYOUT(input, output);
         ARM_COMPUTE_RETURN_ERROR_ON((output->dimension(0) != pool_info.pooled_width()) || (output->dimension(1) != pool_info.pooled_height()));
         ARM_COMPUTE_RETURN_ERROR_ON(input->dimension(2) != output->dimension(2));
         ARM_COMPUTE_RETURN_ERROR_ON(rois->dimension(1) != output->dimension(3));
diff --git a/src/core/CPP/kernels/CPPBoxWithNonMaximaSuppressionLimitKernel.cpp b/src/core/CPP/kernels/CPPBoxWithNonMaximaSuppressionLimitKernel.cpp
index 2b9934c..06a0551 100644
--- a/src/core/CPP/kernels/CPPBoxWithNonMaximaSuppressionLimitKernel.cpp
+++ b/src/core/CPP/kernels/CPPBoxWithNonMaximaSuppressionLimitKernel.cpp
@@ -328,7 +328,7 @@
                 {
                     *reinterpret_cast<T *>(_keeps->ptr_to_element(Coordinates(cur_start_idx + cur_out_idx + i))) = static_cast<T>(keeps[j].at(i));
                 }
-                *reinterpret_cast<T *>(_keeps_size->ptr_to_element(Coordinates(j + b * num_classes))) = static_cast<T>(keeps[j].size());
+                *reinterpret_cast<uint32_t *>(_keeps_size->ptr_to_element(Coordinates(j + b * num_classes))) = keeps[j].size();
                 cur_out_idx += keeps[j].size();
             }
         }
@@ -356,13 +356,14 @@
     ARM_COMPUTE_UNUSED(num_classes);
     ARM_COMPUTE_ERROR_ON_MSG((4 * num_classes) != boxes_in->info()->dimension(0), "First dimension of input boxes must be of size 4*num_classes");
     ARM_COMPUTE_ERROR_ON_MSG(scores_in->info()->dimension(1) != boxes_in->info()->dimension(1), "Input scores and input boxes must have the same number of rows");
+
     ARM_COMPUTE_ERROR_ON(scores_out->info()->dimension(0) != boxes_out->info()->dimension(1));
     ARM_COMPUTE_ERROR_ON(boxes_out->info()->dimension(0) != 4);
     if(keeps != nullptr)
     {
         ARM_COMPUTE_ERROR_ON_MSG(keeps_size == nullptr, "keeps_size cannot be nullptr if keeps has to be provided as output");
         ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(scores_in, keeps);
-        ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(scores_in, keeps_size);
+        ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(keeps_size, 1, DataType::U32);
         ARM_COMPUTE_ERROR_ON(scores_out->info()->dimension(0) != keeps->info()->dimension(0));
         ARM_COMPUTE_ERROR_ON(num_classes != keeps_size->info()->dimension(0));
     }
diff --git a/src/graph/nodes/GenerateProposalsLayerNode.cpp b/src/graph/nodes/GenerateProposalsLayerNode.cpp
index f5a3c02..7367e80 100644
--- a/src/graph/nodes/GenerateProposalsLayerNode.cpp
+++ b/src/graph/nodes/GenerateProposalsLayerNode.cpp
@@ -80,7 +80,8 @@
             break;
         case 2:
             // Configure num_valid_proposals
-            output_desc.shape = TensorShape(1);
+            output_desc.shape     = TensorShape(1);
+            output_desc.data_type = DataType::U32;
             break;
         default:
             ARM_COMPUTE_ERROR("Unsupported output index");
diff --git a/src/runtime/CL/functions/CLGenerateProposalsLayer.cpp b/src/runtime/CL/functions/CLGenerateProposalsLayer.cpp
index 80ed0e5..5dd1202 100644
--- a/src/runtime/CL/functions/CLGenerateProposalsLayer.cpp
+++ b/src/runtime/CL/functions/CLGenerateProposalsLayer.cpp
@@ -57,6 +57,9 @@
 void CLGenerateProposalsLayer::configure(const ICLTensor *scores, const ICLTensor *deltas, const ICLTensor *anchors, ICLTensor *proposals, ICLTensor *scores_out, ICLTensor *num_valid_proposals,
                                          const GenerateProposalsInfo &info)
 {
+    ARM_COMPUTE_ERROR_ON_NULLPTR(scores, deltas, anchors, proposals, scores_out, num_valid_proposals);
+    ARM_COMPUTE_ERROR_THROW_ON(CLGenerateProposalsLayer::validate(scores->info(), deltas->info(), anchors->info(), proposals->info(), scores_out->info(), num_valid_proposals->info(), info));
+
     const DataType data_type         = deltas->info()->data_type();
     const int      num_anchors       = scores->info()->dimension(2);
     const int      feat_width        = scores->info()->dimension(0);
@@ -109,7 +112,7 @@
     // Note that NMS needs outputs preinitialized.
     auto_init_if_empty(*scores_out->info(), TensorShape(scores_nms_size), 1, data_type);
     auto_init_if_empty(*_proposals_4_roi_values.info(), TensorShape(values_per_roi, scores_nms_size), 1, data_type);
-    auto_init_if_empty(*num_valid_proposals->info(), TensorShape(values_per_roi, scores_nms_size), 1, data_type);
+    auto_init_if_empty(*num_valid_proposals->info(), TensorShape(1), 1, DataType::U32);
 
     // Initialize temporaries (unused) outputs
     _classes_nms_unused.allocator()->init(TensorInfo(TensorShape(1, 1), 1, data_type));
@@ -137,7 +140,8 @@
 Status CLGenerateProposalsLayer::validate(const ITensorInfo *scores, const ITensorInfo *deltas, const ITensorInfo *anchors, const ITensorInfo *proposals, const ITensorInfo *scores_out,
                                           const ITensorInfo *num_valid_proposals, const GenerateProposalsInfo &info)
 {
-    ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(proposals, scores_out, num_valid_proposals);
+    ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(scores, deltas, anchors, proposals, scores_out, num_valid_proposals);
+    ARM_COMPUTE_RETURN_ERROR_ON_DATA_LAYOUT_NOT_IN(scores, DataLayout::NCHW);
 
     const int num_anchors       = scores->dimension(2);
     const int feat_width        = scores->dimension(0);
@@ -161,7 +165,7 @@
     ARM_COMPUTE_RETURN_ON_ERROR(CLPermuteKernel::validate(scores, &scores_permuted_info, PermutationVector{ 2, 0, 1 }));
 
     TensorInfo scores_flattened_info(deltas->clone()->set_tensor_shape(TensorShape(1, total_num_anchors)).set_is_resizable(true));
-    TensorInfo proposals_4_roi_values(proposals->clone()->set_tensor_shape(TensorShape(values_per_roi, total_num_anchors)).set_is_resizable(true));
+    TensorInfo proposals_4_roi_values(deltas->clone()->set_tensor_shape(TensorShape(values_per_roi, total_num_anchors)).set_is_resizable(true));
 
     ARM_COMPUTE_RETURN_ON_ERROR(CLReshapeLayerKernel::validate(&scores_permuted_info, &scores_flattened_info));
     ARM_COMPUTE_RETURN_ON_ERROR(CLBoundingBoxTransformKernel::validate(&all_anchors_info, &proposals_4_roi_values, &deltas_flattened_info, BoundingBoxTransformInfo(info.im_width(), info.im_height(),
@@ -174,7 +178,7 @@
     {
         ARM_COMPUTE_RETURN_ERROR_ON(num_valid_proposals->num_dimensions() > 1);
         ARM_COMPUTE_RETURN_ERROR_ON(num_valid_proposals->dimension(0) > 1);
-        ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_NOT_IN(num_valid_proposals, DataType::U32);
+        ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(num_valid_proposals, 1, DataType::U32);
     }
 
     if(proposals->total_size() > 0)