COMPMID-439 - Refactored NEQuantizationLayer and NEQuantizationLayer in order to support 3D input tensors

Change-Id: I03eac2108a30bed56d40dfd52e75577a35d492e0
Reviewed-on: http://mpd-gerrit.cambridge.arm.com/85783
Tested-by: Kaizen <jeremy.johnson+kaizengerrit@arm.com>
Reviewed-by: Michele DiGiorgio <michele.digiorgio@arm.com>
Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com>
diff --git a/arm_compute/core/NEON/kernels/NEDequantizationLayerKernel.h b/arm_compute/core/NEON/kernels/NEDequantizationLayerKernel.h
index 095a833..8f66b8a 100644
--- a/arm_compute/core/NEON/kernels/NEDequantizationLayerKernel.h
+++ b/arm_compute/core/NEON/kernels/NEDequantizationLayerKernel.h
@@ -30,7 +30,11 @@
 {
 class ITensor;
 
-/** Interface for the dequantization layer kernel. */
+/** Interface for the dequantization layer kernel.
+ *
+ * @note The implementation supports only 3D input tensors
+ *
+ */
 class NEDequantizationLayerKernel : public INEKernel
 {
 public:
@@ -48,12 +52,12 @@
     ~NEDequantizationLayerKernel() = default;
     /** Set input, output, min and max.
      *
-     * @param[in]  input  Source tensor. Data types supported: U8.
-     * @param[out] output Destination tensor. Data types supported: F32.
-     * @param[in]  min    Minimum value of the input tensor.
-     * @param[in]  max    Maximum value of the input tensor.
+     * @param[in]  input   Source tensor with at least 3 dimensions. The dimensions over the third will be interpreted as batches. Data type supported: U8.
+     * @param[out] output  Destination tensor with the same dimensions of input. Data type supported: F32.
+     * @param[in]  min_max Pointer to the tensor with shape [2, batches] which stores the minimum and maximum value for each 3D input tensor.
+     *                     The dimensions over the second must match the batched dimensions of the input tensor. Data type supported: F32
      */
-    void configure(const ITensor *input, ITensor *output, const float *min, const float *max);
+    void configure(const ITensor *input, ITensor *output, const ITensor *min_max);
 
     // Inherited methods overridden:
     void run(const Window &window, const ThreadInfo &info) override;
@@ -61,8 +65,7 @@
 private:
     const ITensor *_input;
     ITensor       *_output;
-    const float   *_min;
-    const float   *_max;
+    const ITensor *_min_max;
 };
 }
 #endif /*__ARM_COMPUTE_NEDEQUANTIZATIONLAYERKERNEL_H__ */
diff --git a/arm_compute/core/NEON/kernels/NEMinMaxLayerKernel.h b/arm_compute/core/NEON/kernels/NEMinMaxLayerKernel.h
new file mode 100644
index 0000000..5e01acf
--- /dev/null
+++ b/arm_compute/core/NEON/kernels/NEMinMaxLayerKernel.h
@@ -0,0 +1,77 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __ARM_COMPUTE_NEMINMAXLAYERKERNEL_H__
+#define __ARM_COMPUTE_NEMINMAXLAYERKERNEL_H__
+
+#include "arm_compute/core/NEON/INEKernel.h"
+
+#include <cstdint>
+#include <mutex>
+
+namespace arm_compute
+{
+class ITensor;
+
+/** Interface for the kernel to perform min max search on a 3D tensor. */
+class NEMinMaxLayerKernel : public INEKernel
+{
+public:
+    /** Default constructor */
+    NEMinMaxLayerKernel();
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEMinMaxLayerKernel(const NEMinMaxLayerKernel &) = delete;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEMinMaxLayerKernel &operator=(const NEMinMaxLayerKernel &) = delete;
+    /** Allow instances of this class to be moved */
+    NEMinMaxLayerKernel(NEMinMaxLayerKernel &&) = default;
+    /** Allow instances of this class to be moved */
+    NEMinMaxLayerKernel &operator=(NEMinMaxLayerKernel &&) = default;
+    /** Default destructor */
+    ~NEMinMaxLayerKernel() = default;
+
+    /** Initialise the kernel's input and outputs.
+     *
+     * @note output[0] = minimum
+     * @note output[1] = maximum
+     *
+     * @param[in]  input  Input tensor with at least 3 dimensions. The dimensions over the third will be interpreted as batches. Data type supported: F32.
+     * @param[out] output Output tensor with shape [2, batches, ...] which stores the minimum and maximum value for each 3D input tensor.
+     *                    The dimensions over the second must match the batched dimensions of the input tensor. Data types supported: F32
+     */
+    void configure(const ITensor *input, ITensor *output);
+    /** Resets global minimum and maximum. */
+    void reset();
+
+    // Inherited methods overridden:
+    void run(const Window &window, const ThreadInfo &info) override;
+
+private:
+    void update_min_max(float *out_ptr, float min, float max);
+    const ITensor *_input;
+    ITensor       *_output;
+    std::mutex     _mtx;
+};
+}
+#endif /* __ARM_COMPUTE_NEMINMAXLAYERKERNEL_H__ */
\ No newline at end of file
diff --git a/arm_compute/core/NEON/kernels/NEQuantizationLayerKernel.h b/arm_compute/core/NEON/kernels/NEQuantizationLayerKernel.h
index 92cd142..617a2da 100644
--- a/arm_compute/core/NEON/kernels/NEQuantizationLayerKernel.h
+++ b/arm_compute/core/NEON/kernels/NEQuantizationLayerKernel.h
@@ -30,7 +30,11 @@
 {
 class ITensor;
 
-/** Interface for the quantization layer kernel. */
+/** Interface for the quantization layer kernel.
+ *
+ * @note The implementation supports only 3D input tensors
+ *
+ */
 class NEQuantizationLayerKernel : public INEKernel
 {
 public:
@@ -48,12 +52,12 @@
     ~NEQuantizationLayerKernel() = default;
     /** Set the input, output, min and max.
      *
-     * @param[in]  input  Source tensor. Data types supported: F32.
-     * @param[out] output Destination tensor. Data types supported: U8.
-     * @param[in]  min    Pointer to the minimum value of the input tensor.
-     * @param[in]  max    Pointer to the maximum value of the input tensor.
+     * @param[in]  input   Source tensor with at least 3 dimensions. The dimensions over the third will be interpreted as batches. Data types supported: F32.
+     * @param[out] output  Destination tensor with the same dimensions of input. Data types supported: U8.
+     * @param[in]  min_max Pointer to the tensor with shape [2, batches] which stores the minimum and maximum value for each 3D input tensor.
+     *                     The dimensions over the second must match the batched dimensions of the input tensor. Data type supported: F32
      */
-    void configure(const ITensor *input, ITensor *output, const float *min, const float *max);
+    void configure(const ITensor *input, ITensor *output, const ITensor *min_max);
 
     // Inherited methods overridden:
     void run(const Window &window, const ThreadInfo &info) override;
@@ -61,8 +65,7 @@
 private:
     const ITensor *_input;
     ITensor       *_output;
-    const float   *_min;
-    const float   *_max;
+    const ITensor *_min_max;
 };
 }
 #endif /*__ARM_COMPUTE_NEQUANTIZATIONLAYERKERNEL_H__ */
diff --git a/arm_compute/runtime/NEON/functions/NEDequantizationLayer.h b/arm_compute/runtime/NEON/functions/NEDequantizationLayer.h
index 7cd8360..8985861 100644
--- a/arm_compute/runtime/NEON/functions/NEDequantizationLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEDequantizationLayer.h
@@ -27,7 +27,6 @@
 #include "arm_compute/runtime/IFunction.h"
 
 #include "arm_compute/core/NEON/kernels/NEDequantizationLayerKernel.h"
-#include "arm_compute/runtime/Tensor.h"
 
 #include "arm_compute/core/Types.h"
 
@@ -37,6 +36,8 @@
 
 /** Basic function to simulate a dequantization layer. This function calls the following NEON kernels:
  *
+ * @note The implementation supports only 3D input tensors
+ *
  * -# @ref NEDequantizationLayerKernel
  *
  */
@@ -47,12 +48,12 @@
     NEDequantizationLayer();
     /** Configure the kernel.
      *
-     * @param[in]  input  Source tensor. Data types supported: U8.
-     * @param[out] output Destination tensor. Data types supported: F32.
-     * @param[in]  min    Minimum value of the input tensor.
-     * @param[in]  max    Maximum value of the input tensor.
+     * @param[in]  input   Source tensor with at least 3 dimensions. The dimensions over the third will be interpreted as batches. Data types supported: U8.
+     * @param[out] output  Destination tensor with the same dimensions of input. Data type supported: F32.
+     * @param[in]  min_max Pointer to the tensor with shape [2, batches] which stores the minimum and maximum value for each 3D input tensor.
+     *                     The dimensions over the second must match the batched dimensions of the input tensor. Data type supported: F32
      */
-    void configure(const ITensor *input, ITensor *output, const float *min, const float *max);
+    void configure(const ITensor *input, ITensor *output, const ITensor *min_max);
 
     // Inherited methods overridden:
     void run() override;
diff --git a/arm_compute/runtime/NEON/functions/NEQuantizationLayer.h b/arm_compute/runtime/NEON/functions/NEQuantizationLayer.h
index ab189fe..d91b4ad 100644
--- a/arm_compute/runtime/NEON/functions/NEQuantizationLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEQuantizationLayer.h
@@ -26,7 +26,7 @@
 
 #include "arm_compute/runtime/IFunction.h"
 
-#include "arm_compute/core/NEON/kernels/NEMinMaxLocationKernel.h"
+#include "arm_compute/core/NEON/kernels/NEMinMaxLayerKernel.h"
 #include "arm_compute/core/NEON/kernels/NEQuantizationLayerKernel.h"
 #include "arm_compute/runtime/Tensor.h"
 
@@ -38,7 +38,9 @@
 
 /** Basic function to simulate a quantization layer. This function calls the following NEON kernels:
  *
- * -# @ref NEMinMaxKernel
+ * @note The implementation supports only 3D input tensors
+ *
+ * -# @ref NEMinMaxLayerKernel
  * -# @ref NEQuantizationLayerKernel
  *
  */
@@ -49,8 +51,8 @@
     NEQuantizationLayer();
     /** Set the input and output tensors.
      *
-     * @param[in]  input  Source tensor. Data types supported: F32
-     * @param[out] output Destination tensor. Data types supported: U8
+     * @param[in]  input  Source tensor with at least 3 dimensions. The dimensions over the third will be interpreted as batches. Data types supported: F32
+     * @param[out] output Destination tensor with the same dimensions of input. Data types supported: U8
      */
     void configure(const ITensor *input, ITensor *output);
 
@@ -59,9 +61,8 @@
 
 private:
     NEQuantizationLayerKernel _quantize_kernel;
-    NEMinMaxKernel            _min_max_kernel;
-    float                     _min;
-    float                     _max;
+    NEMinMaxLayerKernel       _min_max_kernel;
+    Tensor                    _min_max;
 };
 }
 #endif /* __ARM_COMPUTE_NEQUANTIZATIONLAYER_H__ */