COMPMID-3374: Remove memory state from  NEConcatenateLayer kernels

* Allow the following kernels to accept backing memory at run-time:
  * NEBatchConcatenateLayerKernel
  * NEDepthConcatenateLayerKernel
  * NEHeightConcatenateLayerKernel
  * NEWidthConcatenateLayerKernel
* Allow the following functions to accept backing memory at run-time:
  * NEConcatenateLayer

Signed-off-by: Georgios Pinitas <georgios.pinitas@arm.com>
Change-Id: Ib0b6714cff7f06a52dc74d294bc3e0d72a1c2419
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/3569
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Michalis Spyrou <michalis.spyrou@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
diff --git a/arm_compute/runtime/NEON/functions/NEConcatenateLayer.h b/arm_compute/runtime/NEON/functions/NEConcatenateLayer.h
index b5c200b..02c27e2 100644
--- a/arm_compute/runtime/NEON/functions/NEConcatenateLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEConcatenateLayer.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2019 Arm Limited.
+ * Copyright (c) 2018-2020 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -29,6 +29,7 @@
 #include "arm_compute/core/NEON/INEKernel.h"
 #include "arm_compute/core/Types.h"
 #include "arm_compute/core/utils/misc/Requires.h"
+#include "arm_compute/runtime/NEON/INEOperator.h"
 
 #include <memory>
 #include <vector>
@@ -52,6 +53,16 @@
 public:
     /** Default constructor */
     NEConcatenateLayer();
+    /** Destructor */
+    ~NEConcatenateLayer();
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEConcatenateLayer(const NEConcatenateLayer &) = delete;
+    /** Default move constructor */
+    NEConcatenateLayer(NEConcatenateLayer &&);
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEConcatenateLayer &operator=(const NEConcatenateLayer &) = delete;
+    /** Default move assignment operator */
+    NEConcatenateLayer &operator=(NEConcatenateLayer &&);
     /** Initialise the kernel's inputs vector and output.
      *
      * @note Input and output tensor dimensions preconditions defer depending on the concatenation axis.
@@ -61,7 +72,6 @@
      * @param[out]    output        Output tensor. Data types supported: Same as @p input.
      * @param[in]     axis          Concatenation axis. Supported underlying concatenation axis are 0, 1, 2 and 3.
      */
-    void configure(std::vector<ITensor *> inputs_vector, ITensor *output, size_t axis);
     void configure(std::vector<const ITensor *> inputs_vector, ITensor *output, size_t axis);
     /** Static function to check if given info will lead to a valid configuration of @ref NEConcatenateLayer
      *
@@ -74,23 +84,62 @@
      *
      * @return a status
      */
-    static Status validate(const std::vector<ITensorInfo *> &inputs_vector, const ITensorInfo *output, size_t axis);
     static Status validate(const std::vector<const ITensorInfo *> &inputs_vector, const ITensorInfo *output, size_t axis);
 
     // Inherited methods overridden:
     void run() override;
 
 private:
-    template <typename TensorType, REQUIRES_TA(std::is_same<typename std::remove_cv<TensorType>::type, ITensor>::value)>
-    void configure_internal(std::vector<TensorType *> &&inputs_vector, ITensor *output, size_t axis);
+    struct Impl;
+    std::unique_ptr<Impl> _impl;
+};
 
-    template <typename TensorInfoType, REQUIRES_TA(std::is_same<typename std::remove_cv<TensorInfoType>::type, ITensorInfo>::value)>
-    static Status validate_internal(const std::vector<TensorInfoType *> &inputs_vector, const ITensorInfo *output, size_t axis);
+namespace experimental
+{
+/** Basic function to execute concatenate tensors along a given axis. This function calls the following kernels:
+ *
+ * -# @ref NEWidthConcatenateLayerKernel (if underlying concatenation axis is 0).
+ * -# @ref NEHeightConcatenateLayerKernel (if underlying concatenation axis is 1).
+ * -# @ref NEDepthConcatenateLayerKernel (if underlying concatenation axis is 2).
+ * -# @ref NEBatchConcatenateLayerKernel (if underlying concatenation axis is 3).
+ */
+class NEConcatenateLayer : public INEOperator
+{
+public:
+    /** Default constructor */
+    NEConcatenateLayer();
+    /** Initialise the kernel's inputs vector and output.
+     *
+     * @note Input and output tensor dimensions preconditions defer depending on the concatenation axis.
+     * @note Preconditions can be found respectively at @ref NEWidthConcatenateLayerKernel, @ref NEHeightConcatenateLayerKernel and @ref NEDepthConcatenateLayerKernel.
+     *
+     * @param[in,out] inputs_vector The vectors containing all the tensors to concatenate. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
+     * @param[out]    output        Output tensor. Data types supported: Same as @p input.
+     * @param[in]     axis          Concatenation axis. Supported underlying concatenation axis are 0, 1, 2 and 3.
+     */
+    void configure(const std::vector<const ITensorInfo *> &inputs_vector, ITensorInfo *output, size_t axis);
+    /** Static function to check if given info will lead to a valid configuration of @ref NEConcatenateLayer
+     *
+     * @note Input and output tensor dimensions preconditions defer depending on the concatenation axis.
+     * @note Preconditions can be found respectively at @ref NEWidthConcatenateLayerKernel, @ref NEHeightConcatenateLayerKernel and @ref NEDepthConcatenateLayerKernel.
+     *
+     * @param[in] inputs_vector The vectors containing all the tensors info to concatenate. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
+     * @param[in] output        Output tensor info. Data types supported: Same as @p input.
+     * @param[in] axis          Concatenation axis. Supported underlying concatenation axis are 0, 1, 2 and 3.
+     *
+     * @return a status
+     */
+    static Status validate(const std::vector<const ITensorInfo *> &inputs_vector, const ITensorInfo *output, size_t axis);
+
+    // Inherited methods overridden:
+    MemoryRequirements workspace() const override;
+    void run(InputTensorMap inputs, OutputTensorMap outputs, OperatorTensorMap workspace) override;
 
 private:
     std::vector<std::unique_ptr<INEKernel>> _concat_kernels;
     unsigned int                            _num_inputs;
     unsigned int                            _axis;
 };
+} // namespace experimental
 } // namespace arm_compute
 #endif /* ARM_COMPUTE_NECONCATENATELAYER_H */