Port OpenCL Quantization to new API

Partially resolves: COMPMID-4193

Change-Id: Ie8367769c690442a0e30383c67851b50ab7c6742
Signed-off-by: Manuel Bottini <manuel.bottini@arm.com>
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/5231
Reviewed-by: Michalis Spyrou <michalis.spyrou@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Tested-by: Arm Jenkins <bsgcomp@arm.com>
diff --git a/Android.bp b/Android.bp
index a2df76e..abaf824 100644
--- a/Android.bp
+++ b/Android.bp
@@ -136,7 +136,6 @@
         "src/core/CL/kernels/CLPixelWiseMultiplicationKernel.cpp",
         "src/core/CL/kernels/CLPriorBoxLayerKernel.cpp",
         "src/core/CL/kernels/CLQLSTMLayerNormalizationKernel.cpp",
-        "src/core/CL/kernels/CLQuantizationLayerKernel.cpp",
         "src/core/CL/kernels/CLROIAlignLayerKernel.cpp",
         "src/core/CL/kernels/CLROIPoolingLayerKernel.cpp",
         "src/core/CL/kernels/CLRangeKernel.cpp",
@@ -385,6 +384,7 @@
         "src/core/gpu/cl/kernels/ClHeightConcatenateKernel.cpp",
         "src/core/gpu/cl/kernels/ClPermuteKernel.cpp",
         "src/core/gpu/cl/kernels/ClPoolingKernel.cpp",
+        "src/core/gpu/cl/kernels/ClQuantizationKernel.cpp",
         "src/core/gpu/cl/kernels/ClReshapeKernel.cpp",
         "src/core/gpu/cl/kernels/ClSoftmaxKernel.cpp",
         "src/core/gpu/cl/kernels/ClWidthConcatenate2TensorsKernel.cpp",
@@ -681,6 +681,7 @@
         "src/runtime/gpu/cl/operators/ClPRelu.cpp",
         "src/runtime/gpu/cl/operators/ClPermute.cpp",
         "src/runtime/gpu/cl/operators/ClPooling.cpp",
+        "src/runtime/gpu/cl/operators/ClQuantization.cpp",
         "src/runtime/gpu/cl/operators/ClReshape.cpp",
         "src/runtime/gpu/cl/operators/ClSoftmax.cpp",
         "src/runtime/gpu/cl/operators/ClSub.cpp",
diff --git a/arm_compute/runtime/CL/functions/CLGenerateProposalsLayer.h b/arm_compute/runtime/CL/functions/CLGenerateProposalsLayer.h
index 4d6bc66..e6b0eed 100644
--- a/arm_compute/runtime/CL/functions/CLGenerateProposalsLayer.h
+++ b/arm_compute/runtime/CL/functions/CLGenerateProposalsLayer.h
@@ -43,7 +43,7 @@
 class CLDequantizationLayerKernel;
 class CLComputeAllAnchorsKernel;
 class CLPadLayerKernel;
-class CLQuantizationLayerKernel;
+class CLQuantizationLayer;
 class ICLTensor;
 class ITensorInfo;
 
@@ -56,7 +56,7 @@
  * -# @ref CLBoundingBoxTransform
  * -# @ref CLPadLayerKernel
  * -# @ref CLDequantizationLayerKernel x 2
- * -# @ref CLQuantizationLayerKernel
+ * -# @ref CLQuantizationLayer
  * And the following CPP functions:
  * -# @ref CPPBoxWithNonMaximaSuppressionLimit
  */
@@ -146,7 +146,7 @@
     std::unique_ptr<CLPadLayerKernel>             _pad_kernel;
     std::unique_ptr<CLDequantizationLayerKernel>  _dequantize_anchors;
     std::unique_ptr<CLDequantizationLayerKernel>  _dequantize_deltas;
-    std::unique_ptr<CLQuantizationLayerKernel>    _quantize_all_proposals;
+    std::unique_ptr<CLQuantizationLayer>          _quantize_all_proposals;
 
     // CPP functions
     CPPBoxWithNonMaximaSuppressionLimit _cpp_nms;
diff --git a/arm_compute/runtime/CL/functions/CLQuantizationLayer.h b/arm_compute/runtime/CL/functions/CLQuantizationLayer.h
index a0a27c5..c5dad53 100644
--- a/arm_compute/runtime/CL/functions/CLQuantizationLayer.h
+++ b/arm_compute/runtime/CL/functions/CLQuantizationLayer.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2020 Arm Limited.
+ * Copyright (c) 2017-2021 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -24,8 +24,10 @@
 #ifndef ARM_COMPUTE_CLQUANTIZATIONLAYER_H
 #define ARM_COMPUTE_CLQUANTIZATIONLAYER_H
 
-#include "arm_compute/core/Error.h"
-#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/IFunction.h"
+
+#include <memory>
 
 namespace arm_compute
 {
@@ -35,14 +37,26 @@
 
 /** Basic function to simulate a quantization layer. This function calls the following CL kernels:
  *
+ * -# @ref opencl::ClQuantization
+ *
  * @note The implementation supports only 3D input tensors.
  *
- * -# @ref CLQuantizationLayerKernel
- *
  */
-class CLQuantizationLayer : public ICLSimpleFunction
+class CLQuantizationLayer : public IFunction
 {
 public:
+    /** Default Constructor */
+    CLQuantizationLayer();
+    /** Default Destructor */
+    ~CLQuantizationLayer();
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    CLQuantizationLayer(const CLQuantizationLayer &) = delete;
+    /** Default move constructor */
+    CLQuantizationLayer(CLQuantizationLayer &&) = default;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    CLQuantizationLayer &operator=(const CLQuantizationLayer &) = delete;
+    /** Default move assignment operator */
+    CLQuantizationLayer &operator=(CLQuantizationLayer &&) = default;
     /** Set the input and output tensors.
      *
      * @param[in]  input  Source tensor. The dimensions over the third will be interpreted as batches. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/32.
@@ -68,6 +82,13 @@
      * @return a status
      */
     static Status validate(const ITensorInfo *input, const ITensorInfo *output);
+
+    // Inherited methods overridden:
+    void run() override;
+
+private:
+    struct Impl;
+    std::unique_ptr<Impl> _impl;
 };
 } //namespace arm_compute
 #endif /* ARM_COMPUTE_CLQUANTIZATIONLAYER_H */
diff --git a/docs/00_introduction.dox b/docs/00_introduction.dox
index 1fdc688..4b37b5b 100644
--- a/docs/00_introduction.dox
+++ b/docs/00_introduction.dox
@@ -1312,7 +1312,7 @@
     - CLGEMMTranspose1xW
     - CLGEMMMatrixVectorMultiplyKernel
     - @ref CLL2NormalizeLayerKernel / @ref CLL2NormalizeLayer
-    - @ref CLQuantizationLayerKernel @ref CLMinMaxLayerKernel / @ref CLQuantizationLayer
+    - CLQuantizationLayerKernel @ref CLMinMaxLayerKernel / @ref CLQuantizationLayer
     - @ref CLROIPoolingLayerKernel / @ref CLROIPoolingLayer
     - @ref CLReductionOperationKernel / @ref CLReductionOperation
     - CLReshapeLayerKernel / @ref CLReshapeLayer
diff --git a/src/core/CL/CLKernels.h b/src/core/CL/CLKernels.h
index 45e27f2..b93f270 100644
--- a/src/core/CL/CLKernels.h
+++ b/src/core/CL/CLKernels.h
@@ -79,7 +79,6 @@
 #include "src/core/CL/kernels/CLPixelWiseMultiplicationKernel.h"
 #include "src/core/CL/kernels/CLPriorBoxLayerKernel.h"
 #include "src/core/CL/kernels/CLQLSTMLayerNormalizationKernel.h"
-#include "src/core/CL/kernels/CLQuantizationLayerKernel.h"
 #include "src/core/CL/kernels/CLROIAlignLayerKernel.h"
 #include "src/core/CL/kernels/CLROIPoolingLayerKernel.h"
 #include "src/core/CL/kernels/CLRangeKernel.h"
diff --git a/src/core/CL/kernels/CLQuantizationLayerKernel.h b/src/core/CL/kernels/CLQuantizationLayerKernel.h
deleted file mode 100644
index e9d03de..0000000
--- a/src/core/CL/kernels/CLQuantizationLayerKernel.h
+++ /dev/null
@@ -1,86 +0,0 @@
-/*
- * Copyright (c) 2017-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLQUANTIZATIONLAYERKERNEL_H
-#define ARM_COMPUTE_CLQUANTIZATIONLAYERKERNEL_H
-
-#include "src/core/CL/ICLKernel.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** Interface for the quantization layer kernel.
- *
- * @note The implementation supports only 3D input tensors.
- */
-class CLQuantizationLayerKernel : public ICLKernel
-{
-public:
-    /** Default constructor */
-    CLQuantizationLayerKernel();
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    CLQuantizationLayerKernel(const CLQuantizationLayerKernel &) = delete;
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    CLQuantizationLayerKernel &operator=(const CLQuantizationLayerKernel &) = delete;
-    /** Default Move Constructor. */
-    CLQuantizationLayerKernel(CLQuantizationLayerKernel &&) = default;
-    /** Default move assignment operator */
-    CLQuantizationLayerKernel &operator=(CLQuantizationLayerKernel &&) = default;
-    /** Default destructor */
-    ~CLQuantizationLayerKernel() = default;
-    /** Set the input, output.
-     *
-     * @param[in]  input  Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F32/F16.
-     * @param[out] output Destination tensor with the same dimensions of input. Data types supported: QASYMM8/QASYMM8_SIGNED/QASYMM16.
-     *
-     * @note Output auto initialization is not supported by this kernel
-     */
-    void configure(const ICLTensor *input, ICLTensor *output);
-    /** Set the input, output.
-     *
-     * @param[in]  compile_context The compile context to be used.
-     * @param[in]  input           Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F32/F16.
-     * @param[out] output          Destination tensor with the same dimensions of input. Data types supported: QASYMM8/QASYMM8_SIGNED/QASYMM16.
-     *
-     * @note Output auto initialization is not supported by this kernel
-     */
-    void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output);
-    /** Static function to check if given info will lead to a valid configuration of @ref CLQuantizationLayerKernel
-     *
-     * @param[in] input  Input tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/F32/F16.
-     * @param[in] output Destination tensor info with the same dimensions of input. Data types supported: QASYMM8/QASYMM8_SIGNED/QASYMM16.
-     *
-     * @return a status
-     */
-    static Status validate(const ITensorInfo *input, const ITensorInfo *output);
-
-    // Inherited methods overridden:
-    void run(const Window &window, cl::CommandQueue &queue) override;
-
-private:
-    const ICLTensor *_input;
-    ICLTensor       *_output;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_CLQUANTIZATIONLAYERKERNEL_H */
diff --git a/src/core/CL/kernels/CLQuantizationLayerKernel.cpp b/src/core/gpu/cl/kernels/ClQuantizationKernel.cpp
similarity index 67%
rename from src/core/CL/kernels/CLQuantizationLayerKernel.cpp
rename to src/core/gpu/cl/kernels/ClQuantizationKernel.cpp
index 76e703f..ea56289 100644
--- a/src/core/CL/kernels/CLQuantizationLayerKernel.cpp
+++ b/src/core/gpu/cl/kernels/ClQuantizationKernel.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2020 Arm Limited.
+ * Copyright (c) 2017-2021 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -21,11 +21,12 @@
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */
-#include "src/core/CL/kernels/CLQuantizationLayerKernel.h"
+#include "src/core/gpu/cl/kernels/ClQuantizationKernel.h"
 
 #include "arm_compute/core/CL/CLHelpers.h"
 #include "arm_compute/core/CL/CLKernelLibrary.h"
 #include "arm_compute/core/CL/ICLTensor.h"
+#include "arm_compute/core/Error.h"
 #include "arm_compute/core/TensorInfo.h"
 #include "arm_compute/core/Utils.h"
 #include "arm_compute/core/Validate.h"
@@ -33,58 +34,54 @@
 #include "src/core/AccessWindowStatic.h"
 #include "src/core/CL/CLValidate.h"
 #include "src/core/helpers/WindowHelpers.h"
+#include "support/Cast.h"
 #include "support/StringSupport.h"
 
 namespace arm_compute
 {
+namespace opencl
+{
+namespace kernels
+{
 namespace
 {
-Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output)
+Status validate_arguments(const ITensorInfo *src, const ITensorInfo *dst)
 {
-    ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output);
-    ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::QASYMM8_SIGNED, DataType::F32, DataType::F16);
-    ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(input);
+    ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src, dst);
+    ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(src, 1, DataType::QASYMM8, DataType::QASYMM8_SIGNED, DataType::F32, DataType::F16);
+    ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(src);
 
     // Output must always be initialized
-    ARM_COMPUTE_RETURN_ERROR_ON(output->tensor_shape().total_size() == 0);
-    ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::QASYMM8, DataType::QASYMM8_SIGNED, DataType::QASYMM16);
-    ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(input, output);
+    ARM_COMPUTE_RETURN_ERROR_ON(dst->tensor_shape().total_size() == 0);
+    ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(dst, 1, DataType::QASYMM8, DataType::QASYMM8_SIGNED, DataType::QASYMM16);
+    ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(src, dst);
 
     return Status{};
 }
 } // namespace
 
-CLQuantizationLayerKernel::CLQuantizationLayerKernel()
-    : _input(nullptr), _output(nullptr)
+ClQuantizationKernel::ClQuantizationKernel()
 {
 }
 
-void CLQuantizationLayerKernel::configure(const ICLTensor *input, ICLTensor *output)
+void ClQuantizationKernel::configure(const CLCompileContext &compile_context, ITensorInfo *src, ITensorInfo *dst)
 {
-    configure(CLKernelLibrary::get().get_compile_context(), input, output);
-}
+    ARM_COMPUTE_ERROR_ON_NULLPTR(src, dst);
 
-void CLQuantizationLayerKernel::configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output)
-{
-    ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
+    auto padding_info = get_padding_info({ src, dst });
 
-    auto padding_info = get_padding_info({ input, output });
+    ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(src, dst));
 
-    ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), output->info()));
-
-    _input  = input;
-    _output = output;
-
-    const int  vec_size_x     = 16 / input->info()->element_size();
-    const int  input_width_x  = input->info()->tensor_shape().x();
+    const int  vec_size_x     = 16 / src->element_size();
+    const int  input_width_x  = src->tensor_shape().x();
     const bool multi_access_x = (input_width_x / vec_size_x > 0);
 
-    const UniformQuantizationInfo qinfo            = output->info()->quantization_info().uniform();
-    const DataType                output_data_type = output->info()->data_type();
+    const UniformQuantizationInfo qinfo            = dst->quantization_info().uniform();
+    const DataType                output_data_type = dst->data_type();
 
     float   scale_to_apply  = qinfo.scale;
     int32_t offset_to_apply = qinfo.offset;
-    if(is_data_type_quantized_asymmetric(_input->info()->data_type()))
+    if(is_data_type_quantized_asymmetric(src->data_type()))
     {
         /*
          * In case of requantization of a quantized input tensor to an output tensor with another quantization
@@ -116,7 +113,7 @@
          * z_n = - z_i * s_i / s_o + z_o
          *
          */
-        const UniformQuantizationInfo qinfo_in = _input->info()->quantization_info().uniform();
+        const UniformQuantizationInfo qinfo_in = src->quantization_info().uniform();
         scale_to_apply /= qinfo_in.scale;
         // In order to minimize flooring we convert the offset to a float,
         // then compute the new offset in the float domain,
@@ -126,11 +123,11 @@
 
     // Create kernel
     CLBuildOptions build_opts;
-    build_opts.add_option_if(is_data_type_float(_input->info()->data_type()), "-DIS_FLOAT");
+    build_opts.add_option_if(is_data_type_float(src->data_type()), "-DIS_FLOAT");
     build_opts.add_option("-DSCALE=" + float_to_string_with_full_precision(scale_to_apply));
     build_opts.add_option("-DOFFSET=" + support::cpp11::to_string(offset_to_apply));
     build_opts.add_option("-DVEC_SIZE=" + support::cpp11::to_string(vec_size_x));
-    build_opts.add_option("-DDATA_TYPE_IN=" + get_cl_type_from_data_type(input->info()->data_type()));
+    build_opts.add_option("-DDATA_TYPE_IN=" + get_cl_type_from_data_type(src->data_type()));
     build_opts.add_option("-DDATA_TYPE_OUT=" + get_cl_type_from_data_type(output_data_type));
     build_opts.add_option_if(multi_access_x, "-DLAST_ACCESSED_X=" + support::cpp11::to_string(std::max<int>(input_width_x - vec_size_x, 0)));
     std::pair<int, int> min_max_quant_values = quantization::get_min_max_values_from_quantized_data_type(output_data_type);
@@ -140,39 +137,44 @@
     _kernel = create_kernel(compile_context, "quantization_layer", build_opts.options());
 
     // Configure kernel window
-    Window win = calculate_max_window(*input->info(), Steps());
+    Window win = calculate_max_window(*src, Steps());
     if(multi_access_x)
     {
         win.set(Window::DimX, Window::Dimension(win.x().start(), ceil_to_multiple(win.x().end(), vec_size_x), vec_size_x));
     }
     ICLKernel::configure_internal(win);
 
-    output->info()->set_valid_region(ValidRegion(Coordinates(), output->info()->tensor_shape()));
+    dst->set_valid_region(ValidRegion(Coordinates(), dst->tensor_shape()));
 
     ARM_COMPUTE_ERROR_ON(has_padding_changed(padding_info));
 }
 
-Status CLQuantizationLayerKernel::validate(const ITensorInfo *input, const ITensorInfo *output)
+Status ClQuantizationKernel::validate(const ITensorInfo *src, const ITensorInfo *dst)
 {
-    ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, output));
+    ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(src, dst));
     return Status{};
 }
 
-void CLQuantizationLayerKernel::run(const Window &window, cl::CommandQueue &queue)
+void ClQuantizationKernel::run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue)
 {
     ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
     ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window);
 
+    auto src = utils::cast::polymorphic_downcast<const ICLTensor *>(tensors.get_const_tensor(TensorType::ACL_SRC));
+    auto dst = utils::cast::polymorphic_downcast<ICLTensor *>(tensors.get_tensor(TensorType::ACL_DST));
+
     Window window_collapsed = window.collapse_if_possible(ICLKernel::window(), 3);
     Window slice            = window_collapsed.first_slice_window_3D();
 
     do
     {
         unsigned int idx = 0;
-        add_3D_tensor_argument(idx, _input, slice);
-        add_3D_tensor_argument(idx, _output, slice);
+        add_3D_tensor_argument(idx, src, slice);
+        add_3D_tensor_argument(idx, dst, slice);
         enqueue(queue, *this, slice, lws_hint());
     }
     while(window_collapsed.slide_window_slice_3D(slice));
 }
+} // namespace kernels
+} // namespace opencl
 } // namespace arm_compute
diff --git a/src/core/gpu/cl/kernels/ClQuantizationKernel.h b/src/core/gpu/cl/kernels/ClQuantizationKernel.h
new file mode 100644
index 0000000..20822cf
--- /dev/null
+++ b/src/core/gpu/cl/kernels/ClQuantizationKernel.h
@@ -0,0 +1,73 @@
+/*
+ * Copyright (c) 2017-2021 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_CL_QUANTIZATION_KERNEL_H
+#define ARM_COMPUTE_CL_QUANTIZATION_KERNEL_H
+
+#include "src/core/common/Macros.h"
+#include "src/core/gpu/cl/ClCompileContext.h"
+#include "src/core/gpu/cl/IClKernel.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+namespace opencl
+{
+namespace kernels
+{
+/** Interface for the quantization layer kernel.
+ *
+ * @note The implementation supports only 3D input tensors.
+ */
+class ClQuantizationKernel : public IClKernel
+{
+public:
+    /** Default constructor */
+    ClQuantizationKernel();
+    ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE(ClQuantizationKernel);
+    /** Set the input, output.
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  src             Source tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/F32/F16.
+     * @param[out] dst             Destination tensor info with the same dimensions of input. Data types supported: QASYMM8/QASYMM8_SIGNED/QASYMM16.
+     *
+     * @note Output auto initialization is not supported by this kernel
+     */
+    void configure(const CLCompileContext &compile_context, ITensorInfo *src, ITensorInfo *dst);
+    /** Static function to check if given info will lead to a valid configuration of @ref ClQuantizationKernel
+     *
+     * @param[in] src Input tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/F32/F16.
+     * @param[in] dst Destination tensor info with the same dimensions of input. Data types supported: QASYMM8/QASYMM8_SIGNED/QASYMM16.
+     *
+     * @return a status
+     */
+    static Status validate(const ITensorInfo *src, const ITensorInfo *dst);
+
+    // Inherited methods overridden:
+    void run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue) override;
+};
+} // namespace kernels
+} // namespace opencl
+} // namespace arm_compute
+#endif /*ARM_COMPUTE_CL_QUANTIZATION_KERNEL_H */
diff --git a/src/runtime/CL/functions/CLGenerateProposalsLayer.cpp b/src/runtime/CL/functions/CLGenerateProposalsLayer.cpp
index 365f952..fb698d5 100644
--- a/src/runtime/CL/functions/CLGenerateProposalsLayer.cpp
+++ b/src/runtime/CL/functions/CLGenerateProposalsLayer.cpp
@@ -25,11 +25,11 @@
 
 #include "arm_compute/core/CL/ICLTensor.h"
 #include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/CL/functions/CLQuantizationLayer.h"
 #include "src/core/CL/kernels/CLBoundingBoxTransformKernel.h"
 #include "src/core/CL/kernels/CLDequantizationLayerKernel.h"
 #include "src/core/CL/kernels/CLGenerateProposalsLayerKernel.h"
 #include "src/core/CL/kernels/CLPadLayerKernel.h"
-#include "src/core/CL/kernels/CLQuantizationLayerKernel.h"
 #include "src/core/helpers/AutoConfiguration.h"
 
 namespace arm_compute
@@ -45,7 +45,7 @@
       _pad_kernel(std::make_unique<CLPadLayerKernel>()),
       _dequantize_anchors(std::make_unique<CLDequantizationLayerKernel>()),
       _dequantize_deltas(std::make_unique<CLDequantizationLayerKernel>()),
-      _quantize_all_proposals(std::make_unique<CLQuantizationLayerKernel>()),
+      _quantize_all_proposals(std::make_unique<CLQuantizationLayer>()),
       _cpp_nms(memory_manager),
       _is_nhwc(false),
       _is_qasymm8(false),
@@ -270,7 +270,7 @@
         ARM_COMPUTE_RETURN_ON_ERROR(CLBoundingBoxTransformKernel::validate(&all_anchors_f32_info, &proposals_4_roi_values_f32, &deltas_flattened_f32_info,
                                                                            BoundingBoxTransformInfo(info.im_width(), info.im_height(), 1.f)));
 
-        ARM_COMPUTE_RETURN_ON_ERROR(CLQuantizationLayerKernel::validate(&proposals_4_roi_values_f32, &proposals_4_roi_values_quantized));
+        ARM_COMPUTE_RETURN_ON_ERROR(CLQuantizationLayer::validate(&proposals_4_roi_values_f32, &proposals_4_roi_values_quantized));
         proposals_4_roi_values_to_use = &proposals_4_roi_values_quantized;
     }
     else
@@ -372,7 +372,7 @@
 
     if(_is_qasymm8)
     {
-        CLScheduler::get().enqueue(*_quantize_all_proposals, false);
+        _quantize_all_proposals->run();
     }
 
     // Non maxima suppression
diff --git a/src/runtime/CL/functions/CLQuantizationLayer.cpp b/src/runtime/CL/functions/CLQuantizationLayer.cpp
index cb8cabe..1f6ddb6 100644
--- a/src/runtime/CL/functions/CLQuantizationLayer.cpp
+++ b/src/runtime/CL/functions/CLQuantizationLayer.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2020 Arm Limited.
+ * Copyright (c) 2017-2021 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -23,10 +23,26 @@
  */
 #include "arm_compute/runtime/CL/functions/CLQuantizationLayer.h"
 
-#include "src/core/CL/kernels/CLQuantizationLayerKernel.h"
+#include "arm_compute/core/CL/CLKernelLibrary.h"
+#include "arm_compute/core/CL/ICLTensor.h"
+#include "src/core/CL/ICLKernel.h"
+#include "src/runtime/gpu/cl/operators/ClQuantization.h"
 
 namespace arm_compute
 {
+struct CLQuantizationLayer::Impl
+{
+    const ICLTensor                        *src{ nullptr };
+    ICLTensor                              *dst{ nullptr };
+    std::unique_ptr<opencl::ClQuantization> op{ nullptr };
+};
+
+CLQuantizationLayer::CLQuantizationLayer()
+    : _impl(std::make_unique<Impl>())
+{
+}
+CLQuantizationLayer::~CLQuantizationLayer() = default;
+
 void CLQuantizationLayer::configure(const ICLTensor *input, ICLTensor *output)
 {
     configure(CLKernelLibrary::get().get_compile_context(), input, output);
@@ -34,13 +50,23 @@
 
 void CLQuantizationLayer::configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output)
 {
-    auto k = std::make_unique<CLQuantizationLayerKernel>();
-    k->configure(compile_context, input, output);
-    _kernel = std::move(k);
+    _impl->src = input;
+    _impl->dst = output;
+
+    _impl->op = std::make_unique<opencl::ClQuantization>();
+    _impl->op->configure(compile_context, input->info(), output->info());
 }
 
 Status CLQuantizationLayer::validate(const ITensorInfo *input, const ITensorInfo *output)
 {
-    return CLQuantizationLayerKernel::validate(input, output);
+    return opencl::ClQuantization::validate(input, output);
+}
+
+void CLQuantizationLayer::run()
+{
+    ITensorPack pack;
+    pack.add_tensor(TensorType::ACL_SRC, _impl->src);
+    pack.add_tensor(TensorType::ACL_DST, _impl->dst);
+    _impl->op->run(pack);
 }
 } // namespace arm_compute
diff --git a/src/runtime/gpu/cl/operators/ClQuantization.cpp b/src/runtime/gpu/cl/operators/ClQuantization.cpp
new file mode 100644
index 0000000..2e753b5
--- /dev/null
+++ b/src/runtime/gpu/cl/operators/ClQuantization.cpp
@@ -0,0 +1,53 @@
+/*
+ * Copyright (c) 2021 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "src/runtime/gpu/cl/operators/ClQuantization.h"
+
+#include "arm_compute/core/Error.h"
+#include "arm_compute/runtime/CL/CLScheduler.h"
+#include "src/core/gpu/cl/ClCompileContext.h"
+#include "src/core/gpu/cl/kernels/ClQuantizationKernel.h"
+
+namespace arm_compute
+{
+namespace opencl
+{
+void ClQuantization::configure(const CLCompileContext &compile_context, ITensorInfo *src, ITensorInfo *dst)
+{
+    auto k = std::make_unique<kernels::ClQuantizationKernel>();
+    k->configure(compile_context, src, dst);
+    _kernel = std::move(k);
+}
+
+Status ClQuantization::validate(const ITensorInfo *src, const ITensorInfo *dst)
+{
+    return kernels::ClQuantizationKernel::validate(src, dst);
+}
+
+void ClQuantization::run(ITensorPack &tensors)
+{
+    ARM_COMPUTE_ERROR_ON_MSG(tensors.empty(), "No inputs provided");
+    CLScheduler::get().enqueue_op(*_kernel.get(), tensors);
+}
+} // namespace opencl
+} // namespace arm_compute
diff --git a/src/runtime/gpu/cl/operators/ClQuantization.h b/src/runtime/gpu/cl/operators/ClQuantization.h
new file mode 100644
index 0000000..d938ff9
--- /dev/null
+++ b/src/runtime/gpu/cl/operators/ClQuantization.h
@@ -0,0 +1,67 @@
+/*
+ * Copyright (c) 2021 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_CL_QUANTIZATION_H
+#define ARM_COMPUTE_CL_QUANTIZATION_H
+
+#include "arm_compute/core/KernelDescriptors.h"
+#include "src/core/gpu/cl/ClCompileContext.h"
+#include "src/runtime/gpu/cl/IClOperator.h"
+
+namespace arm_compute
+{
+namespace opencl
+{
+/** Basic function to quantize a tensor. This function calls the following OpenCL kernel:
+ *
+ * -# @ref kernels::ClQuantizationKernel
+ */
+class ClQuantization : public IClOperator
+{
+public:
+    /** Constructor */
+    ClQuantization() = default;
+    /** Set the input and output tensors.
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  src             Source tensor. The dimensions over the third will be interpreted as batches. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/32.
+     * @param[out] dst             Destination tensor with the same dimensions of input. Data types supported: QASYMM8/QASYMM8_SIGNED/QASYMM16.
+     *
+     * @note Output auto initialization is not supported by this function
+     */
+    void configure(const CLCompileContext &compile_context, ITensorInfo *src, ITensorInfo *dst);
+    /** Static function to check if given info will lead to a valid configuration of @ref CLQuantizationLayer
+     *
+     * @param[in] src Input tensor info. The dimensions over the third will be interpreted as batches. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/32.
+     * @param[in] dst Output tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/QASYMM16.
+     *
+     * @return a status
+     */
+    static Status validate(const ITensorInfo *src, const ITensorInfo *dst);
+
+    // Inherited method overridden
+    void run(ITensorPack &tensors) override;
+};
+} // namespace opencl
+} //namespace arm_compute
+#endif /* ARM_COMPUTE_CL_QUANTIZATION_H */