COMPMID-2257: Implement NEGenerateProposals.

Change-Id: I8d751f8b09f842a214c305a4530a71d82f16db8f
Signed-off-by: Pablo Tello <pablo.tello@arm.com>
Reviewed-on: https://review.mlplatform.org/c/1943
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Michele Di Giorgio <michele.digiorgio@arm.com>
diff --git a/arm_compute/core/NEON/NEKernels.h b/arm_compute/core/NEON/NEKernels.h
index 80bc74e..5eaf8ad 100644
--- a/arm_compute/core/NEON/NEKernels.h
+++ b/arm_compute/core/NEON/NEKernels.h
@@ -94,6 +94,7 @@
 #include "arm_compute/core/NEON/kernels/NEGaussian3x3Kernel.h"
 #include "arm_compute/core/NEON/kernels/NEGaussian5x5Kernel.h"
 #include "arm_compute/core/NEON/kernels/NEGaussianPyramidKernel.h"
+#include "arm_compute/core/NEON/kernels/NEGenerateProposalsLayerKernel.h"
 #include "arm_compute/core/NEON/kernels/NEHOGDescriptorKernel.h"
 #include "arm_compute/core/NEON/kernels/NEHOGDetectorKernel.h"
 #include "arm_compute/core/NEON/kernels/NEHarrisCornersKernel.h"
diff --git a/arm_compute/core/NEON/kernels/NECopyKernel.h b/arm_compute/core/NEON/kernels/NECopyKernel.h
index c6df9ba..ddf1bb4 100644
--- a/arm_compute/core/NEON/kernels/NECopyKernel.h
+++ b/arm_compute/core/NEON/kernels/NECopyKernel.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018 ARM Limited.
+ * Copyright (c) 2018-2019 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -51,18 +51,20 @@
     NECopyKernel &operator=(NECopyKernel &&) = default;
     /** Initialize the kernel's input, output.
      *
-     * @param[in]  input  Source tensor. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32.
-     * @param[out] output Destination tensor. Data types supported: same as @p input.
+     * @param[in]  input   Source tensor. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32.
+     * @param[out] output  Destination tensor. Data types supported: same as @p input.
+     * @param[in]  padding (Optional) Padding to be applied to the input tensor
      */
-    void configure(const ITensor *input, ITensor *output);
+    void configure(const ITensor *input, ITensor *output, const PaddingList &padding = PaddingList());
     /** Static function to check if given info will lead to a valid configuration of @ref NECopyKernel
      *
-     * @param[in] input  Source tensor. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32.
-     * @param[in] output Destination tensor. Data types supported: same as @p input.
+     * @param[in] input   Source tensor. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32.
+     * @param[in] output  Destination tensor. Data types supported: same as @p input.
+     * @param[in] padding (Optional) Padding to be applied to the input tensor
      *
      * @return a status
      */
-    static Status validate(const ITensorInfo *input, const ITensorInfo *output);
+    static Status validate(const ITensorInfo *input, const ITensorInfo *output, const PaddingList &padding = PaddingList());
 
     // Inherited methods overridden:
     void run(const Window &window, const ThreadInfo &info) override;
@@ -70,6 +72,7 @@
 private:
     const ITensor *_input;
     ITensor       *_output;
+    PaddingList    _padding;
 };
 } // namespace arm_compute
 #endif /*__ARM_COMPUTE_NECOPYKERNEL_H__ */
diff --git a/arm_compute/core/NEON/kernels/NEGenerateProposalsLayerKernel.h b/arm_compute/core/NEON/kernels/NEGenerateProposalsLayerKernel.h
new file mode 100644
index 0000000..a7b2603
--- /dev/null
+++ b/arm_compute/core/NEON/kernels/NEGenerateProposalsLayerKernel.h
@@ -0,0 +1,82 @@
+/*
+ * Copyright (c) 2019 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_NEGENERATEPROPOSALSLAYERKERNEL_H__
+#define __ARM_COMPUTE_NEGENERATEPROPOSALSLAYERKERNEL_H__
+
+#include "arm_compute/core/NEON/INEKernel.h"
+namespace arm_compute
+{
+class ITensor;
+
+/** Interface for Compute All Anchors kernel */
+class NEComputeAllAnchorsKernel : public INEKernel
+{
+public:
+    const char *name() const override
+    {
+        return "NEComputeAllAnchorsKernel";
+    }
+
+    /** Default constructor */
+    NEComputeAllAnchorsKernel();
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEComputeAllAnchorsKernel(const NEComputeAllAnchorsKernel &) = delete;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEComputeAllAnchorsKernel &operator=(const NEComputeAllAnchorsKernel &) = delete;
+    /** Allow instances of this class to be moved */
+    NEComputeAllAnchorsKernel(NEComputeAllAnchorsKernel &&) = default;
+    /** Allow instances of this class to be moved */
+    NEComputeAllAnchorsKernel &operator=(NEComputeAllAnchorsKernel &&) = default;
+    /** Default destructor */
+    ~NEComputeAllAnchorsKernel() = default;
+
+    /** Set the input and output tensors.
+     *
+     * @param[in]  anchors     Source tensor. Original set of anchors of size (4, A), where A is the number of anchors. Data types supported: F16/F32
+     * @param[out] all_anchors Destination tensor. Destination anchors of size (4, H*W*A) where H and W are the height and width of the feature map and A is the number of anchors. Data types supported: Same as @p input
+     * @param[in]  info        Contains Compute Anchors operation information described in @ref ComputeAnchorsInfo
+     *
+     */
+    void configure(const ITensor *anchors, ITensor *all_anchors, const ComputeAnchorsInfo &info);
+
+    /** Static function to check if given info will lead to a valid configuration of @ref NEComputeAllAnchorsKernel
+     *
+     * @param[in] anchors     Source tensor info. Original set of anchors of size (4, A), where A is the number of anchors. Data types supported: F16/F32
+     * @param[in] all_anchors Destination tensor info. Destination anchors of size (4, H*W*A) where H and W are the height and width of the feature map and A is the number of anchors. Data types supported: Same as @p input
+     * @param[in] info        Contains Compute Anchors operation information described in @ref ComputeAnchorsInfo
+     *
+     * @return a Status
+     */
+    static Status validate(const ITensorInfo *anchors, const ITensorInfo *all_anchors, const ComputeAnchorsInfo &info);
+
+    // Inherited methods overridden:
+    void run(const Window &window, const ThreadInfo &info) override;
+
+private:
+    const ITensor     *_anchors;
+    ITensor           *_all_anchors;
+    ComputeAnchorsInfo _anchors_info;
+};
+} // arm_compute
+#endif // __ARM_COMPUTE_NEGENERATEPROPOSALSLAYERKERNEL_H__
diff --git a/arm_compute/runtime/NEON/NEFunctions.h b/arm_compute/runtime/NEON/NEFunctions.h
index 09d3c65..28fd7f3 100644
--- a/arm_compute/runtime/NEON/NEFunctions.h
+++ b/arm_compute/runtime/NEON/NEFunctions.h
@@ -46,6 +46,7 @@
 #include "arm_compute/runtime/NEON/functions/NEChannelShuffleLayer.h"
 #include "arm_compute/runtime/NEON/functions/NECol2Im.h"
 #include "arm_compute/runtime/NEON/functions/NEColorConvert.h"
+#include "arm_compute/runtime/NEON/functions/NEComputeAllAnchors.h"
 #include "arm_compute/runtime/NEON/functions/NEConcatenateLayer.h"
 #include "arm_compute/runtime/NEON/functions/NEConvertFullyConnectedWeights.h"
 #include "arm_compute/runtime/NEON/functions/NEConvolution.h"
@@ -85,6 +86,7 @@
 #include "arm_compute/runtime/NEON/functions/NEGaussian3x3.h"
 #include "arm_compute/runtime/NEON/functions/NEGaussian5x5.h"
 #include "arm_compute/runtime/NEON/functions/NEGaussianPyramid.h"
+#include "arm_compute/runtime/NEON/functions/NEGenerateProposalsLayer.h"
 #include "arm_compute/runtime/NEON/functions/NEHOGDescriptor.h"
 #include "arm_compute/runtime/NEON/functions/NEHOGDetector.h"
 #include "arm_compute/runtime/NEON/functions/NEHOGGradient.h"
diff --git a/arm_compute/runtime/NEON/functions/NEComputeAllAnchors.h b/arm_compute/runtime/NEON/functions/NEComputeAllAnchors.h
new file mode 100644
index 0000000..5f24b3e
--- /dev/null
+++ b/arm_compute/runtime/NEON/functions/NEComputeAllAnchors.h
@@ -0,0 +1,62 @@
+/*
+ * Copyright (c) 2019 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_NECOMPUTEALLANCHORS_H__
+#define __ARM_COMPUTE_NECOMPUTEALLANCHORS_H__
+
+#include "arm_compute/core/NEON/kernels/NEGenerateProposalsLayerKernel.h"
+#include "arm_compute/runtime/NEON/INESimpleFunction.h"
+
+namespace arm_compute
+{
+class ITensor;
+
+/** Basic function to run @ref NEComputeAllAnchorsKernel.
+ *
+ * This function calls the following NEON kernels:
+ * -# @ref NEComputeAllAnchorsKernel
+ */
+class NEComputeAllAnchors : public INESimpleFunction
+{
+public:
+    /** Set the input and output tensors.
+     *
+     * @param[in]  anchors     Source tensor. Original set of anchors of size (4, A) where A is the number of anchors. Data types supported: F16/F32
+     * @param[out] all_anchors Destination tensor. Destination anchors of size (4, H*W*A) where H and W are the height and width of the feature map and A is the number of anchors. Data types supported: Same as @p input
+     * @param[in]  info        Contains Compute Anchors operation information described in @ref ComputeAnchorsInfo
+     *
+     */
+    void configure(const ITensor *anchors, ITensor *all_anchors, const ComputeAnchorsInfo &info);
+
+    /** Static function to check if given info will lead to a valid configuration of @ref NEComputeAllAnchorsKernel
+     *
+     * @param[in] anchors     Source tensor info. Original set of anchors of size (4, A) where A is the number of anchors. Data types supported: F16/F32
+     * @param[in] all_anchors Destination tensor info. Destination anchors of size (4, H*W*A) where H and W are the height and width of the feature map and A is the number of anchors. Data types supported: Same as @p input
+     * @param[in] info        Contains Compute Anchors operation information described in @ref ComputeAnchorsInfo
+     *
+     * @return a Status
+     */
+    static Status validate(const ITensorInfo *anchors, const ITensorInfo *all_anchors, const ComputeAnchorsInfo &info);
+};
+} // namespace arm_compute
+#endif /* __ARM_COMPUTE_NECOMPUTEALLANCHORS_H__ */
diff --git a/arm_compute/runtime/NEON/functions/NEGenerateProposalsLayer.h b/arm_compute/runtime/NEON/functions/NEGenerateProposalsLayer.h
new file mode 100644
index 0000000..0e6601e
--- /dev/null
+++ b/arm_compute/runtime/NEON/functions/NEGenerateProposalsLayer.h
@@ -0,0 +1,147 @@
+/*
+ * Copyright (c) 2019 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_NEGENERATEPROPOSALSLAYER_H__
+#define __ARM_COMPUTE_NEGENERATEPROPOSALSLAYER_H__
+#include "arm_compute/core/CPP/kernels/CPPBoxWithNonMaximaSuppressionLimitKernel.h"
+#include "arm_compute/core/NEON/kernels/NEBoundingBoxTransformKernel.h"
+#include "arm_compute/core/NEON/kernels/NECopyKernel.h"
+#include "arm_compute/core/NEON/kernels/NEGenerateProposalsLayerKernel.h"
+#include "arm_compute/core/NEON/kernels/NEMemsetKernel.h"
+#include "arm_compute/core/NEON/kernels/NEPermuteKernel.h"
+#include "arm_compute/core/NEON/kernels/NEReshapeLayerKernel.h"
+#include "arm_compute/core/NEON/kernels/NEStridedSliceKernel.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/CPP/CPPScheduler.h"
+#include "arm_compute/runtime/IFunction.h"
+#include "arm_compute/runtime/MemoryGroup.h"
+#include "arm_compute/runtime/Tensor.h"
+
+namespace arm_compute
+{
+class ITensor;
+
+/** Basic function to generate proposals for a RPN (Region Proposal Network)
+ *
+ * This function calls the following Neon kernels:
+ * -# @ref NEComputeAllAnchors
+ * -# @ref NEPermute x 2
+ * -# @ref NEReshapeLayer x 2
+ * -# @ref NEStridedSlice x 3
+ * -# @ref NEBoundingBoxTransform
+ * -# @ref NECopyKernel
+ * -# @ref NEMemsetKernel
+ * And the following CPP kernels:
+ * -# @ref CPPBoxWithNonMaximaSuppressionLimit
+ */
+class NEGenerateProposalsLayer : public IFunction
+{
+public:
+    /** Default constructor
+     *
+     * @param[in] memory_manager (Optional) Memory manager.
+     */
+    NEGenerateProposalsLayer(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEGenerateProposalsLayer(const NEGenerateProposalsLayer &) = delete;
+    /** Default move constructor */
+    NEGenerateProposalsLayer(NEGenerateProposalsLayer &&) = default;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEGenerateProposalsLayer &operator=(const NEGenerateProposalsLayer &) = delete;
+    /** Default move assignment operator */
+    NEGenerateProposalsLayer &operator=(NEGenerateProposalsLayer &&) = default;
+
+    /** Set the input and output tensors.
+     *
+     * @param[in]  scores              Scores from convolution layer of size (W, H, A), where H and W are the height and width of the feature map, and A is the number of anchors. Data types supported: F16/F32
+     * @param[in]  deltas              Bounding box deltas from convolution layer of size (W, H, 4*A). Data types supported: Same as @p scores
+     * @param[in]  anchors             Anchors tensor of size (4, A). Data types supported: Same as @p input
+     * @param[out] proposals           Box proposals output tensor of size (5, W*H*A). Data types supported: Same as @p input
+     * @param[out] scores_out          Box scores output tensor of size (W*H*A). Data types supported: Same as @p input
+     * @param[out] num_valid_proposals Scalar output tensor which says which of the first proposals are valid. Data types supported: U32
+     * @param[in]  info                Contains GenerateProposals operation information described in @ref GenerateProposalsInfo
+     *
+     * @note Only single image prediction is supported. Height and Width (and scale) of the image will be contained in the @ref GenerateProposalsInfo struct.
+     * @note Proposals contains all the proposals. Of those, only the first num_valid_proposals are valid.
+     */
+    void configure(const ITensor *scores, const ITensor *deltas, const ITensor *anchors, ITensor *proposals, ITensor *scores_out, ITensor *num_valid_proposals,
+                   const GenerateProposalsInfo &info);
+
+    /** Static function to check if given info will lead to a valid configuration of @ref NEGenerateProposalsLayer
+     *
+     * @param[in] scores              Scores info from convolution layer of size (W, H, A), where H and W are the height and width of the feature map, and A is the number of anchors. Data types supported: F16/F32
+     * @param[in] deltas              Bounding box deltas info from convolution layer of size (W, H, 4*A). Data types supported: Same as @p scores
+     * @param[in] anchors             Anchors tensor info of size (4, A). Data types supported: Same as @p input
+     * @param[in] proposals           Box proposals info  output tensor of size (5, W*H*A). Data types supported: Data types supported: U32
+     * @param[in] scores_out          Box scores output tensor info of size (W*H*A). Data types supported: Same as @p input
+     * @param[in] num_valid_proposals Scalar output tensor info which says which of the first proposals are valid. Data types supported: Same as @p input
+     * @param[in] info                Contains GenerateProposals operation information described in @ref GenerateProposalsInfo
+     *
+     * @return a Status
+     */
+    static Status validate(const ITensorInfo *scores, const ITensorInfo *deltas, const ITensorInfo *anchors, const ITensorInfo *proposals, const ITensorInfo *scores_out,
+                           const ITensorInfo           *num_valid_proposals,
+                           const GenerateProposalsInfo &info);
+
+    // Inherited methods overridden:
+    void run() override;
+
+private:
+    // Memory group manager
+    MemoryGroup _memory_group;
+
+    // Neon kernels
+    NEPermuteKernel              _permute_deltas_kernel;
+    NEReshapeLayerKernel         _flatten_deltas_kernel;
+    NEPermuteKernel              _permute_scores_kernel;
+    NEReshapeLayerKernel         _flatten_scores_kernel;
+    NEComputeAllAnchorsKernel    _compute_anchors_kernel;
+    NEBoundingBoxTransformKernel _bounding_box_kernel;
+    NEMemsetKernel               _memset_kernel;
+    NECopyKernel                 _padded_copy_kernel;
+
+    // CPP kernels
+    CPPBoxWithNonMaximaSuppressionLimitKernel _cpp_nms_kernel;
+
+    bool _is_nhwc;
+
+    // Temporary tensors
+    Tensor _deltas_permuted;
+    Tensor _deltas_flattened;
+    Tensor _scores_permuted;
+    Tensor _scores_flattened;
+    Tensor _all_anchors;
+    Tensor _all_proposals;
+    Tensor _keeps_nms_unused;
+    Tensor _classes_nms_unused;
+    Tensor _proposals_4_roi_values;
+
+    // Output tensor pointers
+    ITensor *_num_valid_proposals;
+    ITensor *_scores_out;
+
+    /** Internal function to run the CPP BoxWithNMS kernel */
+    void run_cpp_nms_kernel();
+};
+} // namespace arm_compute
+#endif /* __ARM_COMPUTE_NEGENERATEPROPOSALSLAYER_H__ */