COMPMID-3280: Make all ML primitives for CL use the new interface - Part 2 - CLFunctions have been updated Change-Id: Ie3256a6c775bc12f3126482bd8e8a46da54b267c Signed-off-by: Manuel Bottini <manuel.bottini@arm.com> Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/3053 Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>

commit: 2b84be544e4a27f7e8e80827e9c85c8f0d58b4ce [log] [tgz]
author: Manuel Bottini <manuel.bottini@arm.com> Wed Apr 08 10:15:51 2020 +0100
committer: Manuel Bottini <manuel.bottini@arm.com> Thu Apr 23 17:53:59 2020 +0000
tree: 078051a911f9b8883a3f11955cfd3b7ba0d7d9f3
parent: 0de45d0a8009e19331c4e29d617fa183167c513a [diff]
diff --git a/arm_compute/runtime/CL/functions/CLAbsoluteDifference.h b/arm_compute/runtime/CL/functions/CLAbsoluteDifference.h
index 28d3acc..26aded6 100644
--- a/arm_compute/runtime/CL/functions/CLAbsoluteDifference.h
+++ b/arm_compute/runtime/CL/functions/CLAbsoluteDifference.h

@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2019 ARM Limited.
+ * Copyright (c) 2016-2020 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -45,6 +45,14 @@
      * @param[out] output Output tensor. Data types supported: U8, S16
      */
     void configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output);
+    /** Initialize the function
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  input1          First input tensor. Data types supported: U8, S16
+     * @param[in]  input2          Second input tensor. Data types supported: U8, S16
+     * @param[out] output          Output tensor. Data types supported: U8, S16
+     */
+    void configure(const CLCompileContext &compile_context, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output);
 };
 }
 #endif /* ARM_COMPUTE_CLABSOLUTEDIFFERENCE_H */

diff --git a/arm_compute/runtime/CL/functions/CLAccumulate.h b/arm_compute/runtime/CL/functions/CLAccumulate.h
index f465ab3..b47f0c0 100644
--- a/arm_compute/runtime/CL/functions/CLAccumulate.h
+++ b/arm_compute/runtime/CL/functions/CLAccumulate.h

@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2019 ARM Limited.
+ * Copyright (c) 2016-2020 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -42,6 +42,13 @@
      * @param[out] accum Destination tensor. Data types supported: S16.
      */
     void configure(const ICLTensor *input, ICLTensor *accum);
+    /** Set the input and accumulation tensors.
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  input           Source tensor. Data types supported: U8.
+     * @param[out] accum           Destination tensor. Data types supported: S16.
+     */
+    void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *accum);
 };
 
 /** Basic function to run @ref CLAccumulateWeightedKernel */
@@ -55,6 +62,14 @@
      * @param[in,out] accum Accumulated tensor. Data types supported: U8.
      */
     void configure(const ICLTensor *input, float alpha, ICLTensor *accum);
+    /** Set the input and accumulation tensors, and the scale value.
+     *
+     * @param[in]     compile_context The compile context to be used.
+     * @param[in]     input           Source tensor. Data types supported: U8.
+     * @param[in]     alpha           The input scalar value with a value input the range of [0, 1.0]. Data types supported: F32.
+     * @param[in,out] accum           Accumulated tensor. Data types supported: U8.
+     */
+    void configure(const CLCompileContext &compile_context, const ICLTensor *input, float alpha, ICLTensor *accum);
 };
 
 /** Basic function to run @ref CLAccumulateSquaredKernel */
@@ -68,6 +83,14 @@
      * @param[in,out] accum Accumulated tensor. Data types supported: S16.
      */
     void configure(const ICLTensor *input, uint32_t shift, ICLTensor *accum);
+    /** Set the input and accumulation tensors and the shift value.
+     *
+     * @param[in]     compile_context The compile context to be used.
+     * @param[in]     input           Source tensor. Data types supported: U8.
+     * @param[in]     shift           The input with a value input the range of [0, 15]. Data types supported: U32.
+     * @param[in,out] accum           Accumulated tensor. Data types supported: S16.
+     */
+    void configure(const CLCompileContext &compile_context, const ICLTensor *input, uint32_t shift, ICLTensor *accum);
 };
 }
 #endif /*ARM_COMPUTE_CLACCUMULATE_H */

diff --git a/arm_compute/runtime/CL/functions/CLActivationLayer.h b/arm_compute/runtime/CL/functions/CLActivationLayer.h
index 09f5d2b..fbb34e5 100644
--- a/arm_compute/runtime/CL/functions/CLActivationLayer.h
+++ b/arm_compute/runtime/CL/functions/CLActivationLayer.h

@@ -62,6 +62,17 @@
      * @param[in]      act_info Activation layer parameters.
      */
     void configure(ICLTensor *input, ICLTensor *output, ActivationLayerInfo act_info);
+    /** Set the input and output tensor.
+     *
+     * @note If the output tensor is a nullptr or is equal to the input, the activation function will be performed in-place
+     *
+     * @param[in]      compile_context The compile context to be used.
+     * @param[in, out] input           Source tensor. In case of @p output tensor = nullptr, this tensor will store the result
+     *                                 of the activation function. Data types supported: QASYMM8/QASYMM8_SIGNED/QSYMM16/F16/F32.
+     * @param[out]     output          Destination tensor. Data type supported: same as @p input
+     * @param[in]      act_info        Activation layer parameters.
+     */
+    void configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, ActivationLayerInfo act_info);
     /** Static function to check if given info will lead to a valid configuration of @ref CLActivationLayer
      *
      * @param[in] input    Source tensor info. In case of @p output tensor info = nullptr, this tensor will store the result

diff --git a/arm_compute/runtime/CL/functions/CLArgMinMaxLayer.h b/arm_compute/runtime/CL/functions/CLArgMinMaxLayer.h
index a26fcfd..b0d29bc 100644
--- a/arm_compute/runtime/CL/functions/CLArgMinMaxLayer.h
+++ b/arm_compute/runtime/CL/functions/CLArgMinMaxLayer.h

@@ -61,6 +61,15 @@
      * @param[in]  op     Reduction operation to perform. Operations supported: ARG_IDX_MAX, ARG_IDX_MIN
      */
     void configure(const ICLTensor *input, int axis, ICLTensor *output, const ReductionOperation &op);
+    /** Set the input and output tensors.
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  input           Input source tensor. Data types supported: QASYMM8/F16/F32.
+     * @param[in]  axis            Axis to find max/min index.
+     * @param[out] output          Output source tensor. Data types supported: U32/S32.
+     * @param[in]  op              Reduction operation to perform. Operations supported: ARG_IDX_MAX, ARG_IDX_MIN
+     */
+    void configure(const CLCompileContext &compile_context, const ICLTensor *input, int axis, ICLTensor *output, const ReductionOperation &op);
     /** Static function to check if given info will lead to a valid configuration of @ref CLArgMinMaxLayer
      *
      * @param[in] input  Input source tensor info. Data types supported: QASYMM8/F16/F32.

diff --git a/arm_compute/runtime/CL/functions/CLBatchNormalizationLayer.h b/arm_compute/runtime/CL/functions/CLBatchNormalizationLayer.h
index 7cd4d16..a211ea6 100644
--- a/arm_compute/runtime/CL/functions/CLBatchNormalizationLayer.h
+++ b/arm_compute/runtime/CL/functions/CLBatchNormalizationLayer.h

@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2019 ARM Limited.
+ * Copyright (c) 2017-2020 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -61,6 +61,25 @@
      */
     void configure(ICLTensor *input, ICLTensor *output, const ICLTensor *mean, const ICLTensor *var, const ICLTensor *beta = nullptr, const ICLTensor *gamma = nullptr, float epsilon = 0.001f,
                    ActivationLayerInfo act_info = ActivationLayerInfo());
+    /** Set the input and output tensors.
+     *
+     * @note If the output tensor is a nullptr or is equal to the input, the batch normalization function will be performed in-place
+     *
+     * @param[in]      compile_context The compile context to be used.
+     * @param[in, out] input           Source tensor. In case of @p output tensor = nullptr, this tensor will store the result.
+     *                                 3 lower dimensions represent a single input with dimensions [width, height, FM].
+     *                                 The rest are optional and used for representing batches. Data types supported: F16/F32. Data layouts supported: NCHW/NHWC.
+     * @param[out]     output          Destination tensor. Output will have the same number of dimensions as input. Data type supported: same as @p input
+     * @param[in]      mean            Mean values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input
+     * @param[in]      var             Variance values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input
+     * @param[in]      beta            (Optional) Beta values tensor info. 1 dimension with size equal to the feature maps [FM]. If not provided, default value for beta is 0. Data types supported: Same as @p input
+     * @param[in]      gamma           (Optional) Gamma values tensor info. 1 dimension with size equal to the feature maps [FM]. If not provided, default value for gamma is 1. Data types supported: Same as @p input
+     * @param[in]      epsilon         (Optional) Small value to avoid division with zero. Default value is 0.001f.
+     * @param[in]      act_info        (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU supported.
+     */
+    void configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, const ICLTensor *mean, const ICLTensor *var, const ICLTensor *beta = nullptr,
+                   const ICLTensor *gamma = nullptr,
+                   float epsilon = 0.001f, ActivationLayerInfo act_info = ActivationLayerInfo());
     /** Static function to check if given info will lead to a valid configuration of @ref CLBatchNormalizationLayer
      *
      * @param[in] input    Source tensor info. In case of @p output tensor info = nullptr, this tensor will store the result.

diff --git a/arm_compute/runtime/CL/functions/CLBatchToSpaceLayer.h b/arm_compute/runtime/CL/functions/CLBatchToSpaceLayer.h
index b987028..6edb464 100644
--- a/arm_compute/runtime/CL/functions/CLBatchToSpaceLayer.h
+++ b/arm_compute/runtime/CL/functions/CLBatchToSpaceLayer.h

@@ -46,6 +46,14 @@
      * @param[out] output      Tensor output. Data types supported: same as @p input
      */
     void configure(const ICLTensor *input, const ICLTensor *block_shape, ICLTensor *output);
+    /** Set the input and output tensors.
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  input           Tensor input. Supported tensor rank: 4. Data types supported: All.
+     * @param[in]  block_shape     1-D tensor with shape [M]. Data types supported: S32
+     * @param[out] output          Tensor output. Data types supported: same as @p input
+     */
+    void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *block_shape, ICLTensor *output);
     /** Set the input and output tensors. (Static block shape).
      *
      * @param[in]  input         Tensor input. Supported tensor rank: 4. Data types supported: All.
@@ -54,6 +62,15 @@
      * @param[out] output        Tensor output. Data types supported: same as @p input
      */
     void configure(const ICLTensor *input, int32_t block_shape_x, int32_t block_shape_y, ICLTensor *output);
+    /** Set the input and output tensors. (Static block shape).
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  input           Tensor input. Supported tensor rank: 4. Data types supported: All.
+     * @param[in]  block_shape_x   Block shape x value.
+     * @param[in]  block_shape_y   Block shape y value.
+     * @param[out] output          Tensor output. Data types supported: same as @p input
+     */
+    void configure(const CLCompileContext &compile_context, const ICLTensor *input, int32_t block_shape_x, int32_t block_shape_y, ICLTensor *output);
     /** Static function to check if given info will lead to a valid configuration of @ref CLBatchToSpaceLayer
      *
      * @param[in]  input       Tensor input info. Supported tensor rank: 4. Data types supported: All.

diff --git a/arm_compute/runtime/CL/functions/CLBitwiseAnd.h b/arm_compute/runtime/CL/functions/CLBitwiseAnd.h
index 77907cc..1faded0 100644
--- a/arm_compute/runtime/CL/functions/CLBitwiseAnd.h
+++ b/arm_compute/runtime/CL/functions/CLBitwiseAnd.h

@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2019 ARM Limited.
+ * Copyright (c) 2016-2020 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -45,6 +45,14 @@
      * @param[out] output Output tensor. Data types supported: U8.
      */
     void configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output);
+    /** Initialize the function
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  input1          Input tensor. Data types supported: U8.
+     * @param[in]  input2          Input tensor. Data types supported: U8.
+     * @param[out] output          Output tensor. Data types supported: U8.
+     */
+    void configure(const CLCompileContext &compile_context, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output);
 };
 }
 #endif /* ARM_COMPUTE_CLBITWISEAND_H */

diff --git a/arm_compute/runtime/CL/functions/CLBitwiseNot.h b/arm_compute/runtime/CL/functions/CLBitwiseNot.h
index b5c7cfe..c946055 100644
--- a/arm_compute/runtime/CL/functions/CLBitwiseNot.h
+++ b/arm_compute/runtime/CL/functions/CLBitwiseNot.h

@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2019 ARM Limited.
+ * Copyright (c) 2016-2020 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -44,6 +44,13 @@
      * @param[out] output Output tensor. Data types supported: U8.
      */
     void configure(const ICLTensor *input, ICLTensor *output);
+    /** Initialize the function
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  input           Input tensor. Data types supported: U8.
+     * @param[out] output          Output tensor. Data types supported: U8.
+     */
+    void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output);
 };
 }
 #endif /* ARM_COMPUTE_CLBITWISENOT_H */

diff --git a/arm_compute/runtime/CL/functions/CLBitwiseOr.h b/arm_compute/runtime/CL/functions/CLBitwiseOr.h
index 5957c3f..4fb93cc 100644
--- a/arm_compute/runtime/CL/functions/CLBitwiseOr.h
+++ b/arm_compute/runtime/CL/functions/CLBitwiseOr.h

@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2019 ARM Limited.
+ * Copyright (c) 2016-2020 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -45,6 +45,14 @@
      * @param[out] output Output tensor. Data types supported: U8.
      */
     void configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output);
+    /** Initialize the function
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  input1          Input tensor. Data types supported: U8.
+     * @param[in]  input2          Input tensor. Data types supported: U8.
+     * @param[out] output          Output tensor. Data types supported: U8.
+     */
+    void configure(const CLCompileContext &compile_context, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output);
 };
 }
 #endif /* ARM_COMPUTE_CLBITWISEOR_H */

diff --git a/arm_compute/runtime/CL/functions/CLBitwiseXor.h b/arm_compute/runtime/CL/functions/CLBitwiseXor.h
index a4e864c..6caa013 100644
--- a/arm_compute/runtime/CL/functions/CLBitwiseXor.h
+++ b/arm_compute/runtime/CL/functions/CLBitwiseXor.h

@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2019 ARM Limited.
+ * Copyright (c) 2016-2020 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -45,6 +45,14 @@
      * @param[out] output Output tensor. Data types supported: U8.
      */
     void configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output);
+    /** Initialize the function
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  input1          Input tensor. Data types supported: U8.
+     * @param[in]  input2          Input tensor. Data types supported: U8.
+     * @param[out] output          Output tensor. Data types supported: U8.
+     */
+    void configure(const CLCompileContext &compile_context, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output);
 };
 }
 #endif /* ARM_COMPUTE_CLBITWISEXOR_H */

diff --git a/arm_compute/runtime/CL/functions/CLBoundingBoxTransform.h b/arm_compute/runtime/CL/functions/CLBoundingBoxTransform.h
index 3e11781..b09359d 100644
--- a/arm_compute/runtime/CL/functions/CLBoundingBoxTransform.h
+++ b/arm_compute/runtime/CL/functions/CLBoundingBoxTransform.h

@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2019 ARM Limited.
+ * Copyright (c) 2018-2020 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -50,6 +50,18 @@
      * @note Only single image prediction is supported. Height and Width (and scale) of the image will be contained in the BoundingBoxTransformInfo struct.
      */
     void configure(const ICLTensor *boxes, ICLTensor *pred_boxes, const ICLTensor *deltas, const BoundingBoxTransformInfo &info);
+    /** Set the input and output tensors.
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  boxes           Source tensor. Bounding box proposals in pixel coordinates. Size(M, 4), format [x1, y1, x2, y2]. Data types supported: QASYMM16/F16/F32.
+     * @param[out] pred_boxes      Destination tensor. Pixel coordinates of the transformed bounding boxes. Size (M, 4*K), format [x1, y1, x2, y2]. Data types supported: Same as @p input
+     * @param[in]  deltas          Bounding box translations and scales. Size (M, 4*K), format [dx, dy, dw, dh], K  is the number of classes.
+     *                             Data types supported: QASYMM8 if @p input is QASYMM16, otherwise same as @p input
+     * @param[in]  info            Contains BoundingBox operation information described in @ref BoundingBoxTransformInfo.
+     *
+     * @note Only single image prediction is supported. Height and Width (and scale) of the image will be contained in the BoundingBoxTransformInfo struct.
+     */
+    void configure(const CLCompileContext &compile_context, const ICLTensor *boxes, ICLTensor *pred_boxes, const ICLTensor *deltas, const BoundingBoxTransformInfo &info);
 
     /** Static function to check if given info will lead to a valid configuration of @ref CLBoundingBoxTransform
      *

diff --git a/arm_compute/runtime/CL/functions/CLBox3x3.h b/arm_compute/runtime/CL/functions/CLBox3x3.h
index 3fb18e3..a4cf4d2 100644
--- a/arm_compute/runtime/CL/functions/CLBox3x3.h
+++ b/arm_compute/runtime/CL/functions/CLBox3x3.h

@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2019 ARM Limited.
+ * Copyright (c) 2016-2020 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -50,6 +50,15 @@
      * @param[in]     constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
      */
     void configure(ICLTensor *input, ICLTensor *output, BorderMode border_mode, uint8_t constant_border_value = 0);
+    /** Initialise the function's source, destinations and border mode.
+     *
+     * @param[in]     compile_context       The compile context to be used.
+     * @param[in,out] input                 Source tensor. Data types supported: U8. (Written to only for @p border_mode != UNDEFINED)
+     * @param[out]    output                Destination tensor, Data types supported: U8.
+     * @param[in]     border_mode           Border mode to use for the convolution.
+     * @param[in]     constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
+     */
+    void configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, BorderMode border_mode, uint8_t constant_border_value = 0);
 };
 }
 #endif /*ARM_COMPUTE_CLBOX3X3_H */

diff --git a/arm_compute/runtime/CL/functions/CLCannyEdge.h b/arm_compute/runtime/CL/functions/CLCannyEdge.h
index 1a56767..2729d24 100644
--- a/arm_compute/runtime/CL/functions/CLCannyEdge.h
+++ b/arm_compute/runtime/CL/functions/CLCannyEdge.h

@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2019 ARM Limited.
+ * Copyright (c) 2017-2020 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -69,6 +69,20 @@
      */
     void configure(ICLTensor *input, ICLTensor *output, int32_t upper_thr, int32_t lower_thr, int32_t gradient_size, int32_t norm_type, BorderMode border_mode,
                    uint8_t constant_border_value = 0);
+    /** Initialise the function's source, destination, thresholds, gradient size, normalization type and border mode.
+     *
+     * @param[in]     compile_context       The compile context to be used.
+     * @param[in,out] input                 Source tensor. Data types supported: U8. (Written to only for border_mode != UNDEFINED)
+     * @param[out]    output                Destination tensor. Data types supported: U8.
+     * @param[in]     upper_thr             Upper threshold used for the hysteresis.
+     * @param[in]     lower_thr             Lower threshold used for the hysteresis.
+     * @param[in]     gradient_size         Gradient size (3, 5 or 7).
+     * @param[in]     norm_type             Normalization type. if 1, L1-Norm otherwise L2-Norm.
+     * @param[in]     border_mode           Border mode to use for the convolution.
+     * @param[in]     constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
+     */
+    void configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, int32_t upper_thr, int32_t lower_thr, int32_t gradient_size, int32_t norm_type, BorderMode border_mode,
+                   uint8_t constant_border_value = 0);
 
     // Inherited methods overridden:
     virtual void run() override;

diff --git a/arm_compute/runtime/CL/functions/CLCast.h b/arm_compute/runtime/CL/functions/CLCast.h
index 4cb1fe0..6a1835c 100644
--- a/arm_compute/runtime/CL/functions/CLCast.h
+++ b/arm_compute/runtime/CL/functions/CLCast.h

@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2019 ARM Limited.
+ * Copyright (c) 2018-2020 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -56,6 +56,26 @@
      * @param[in]  policy Conversion policy.
      */
     void configure(const ICLTensor *input, ICLTensor *output, ConvertPolicy policy);
+    /** Initialize the function's source, destination
+     *
+     * Input data type must be different than output data type.
+     *
+     * Valid conversions Input -> Output :
+     *
+     *   - U8  -> S8, U16, S16, U32, S32, F16, F32
+     *   - U16 -> U8, S8, S16, U32, S32, F16, F32
+     *   - S16 -> U8, S8, U16, U32, S32, F16, F32
+     *   - U32 -> U8, S8, U16, S16, S32, F16, F32
+     *   - S32 -> U8, S8, U16, S16, U32, F16, F32
+     *   - F16 -> U8, S8, U16, S16, U32, F32
+     *   - F32 -> U8, S8, U16, S16, U32, F16
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  input           The input tensor to convert. Data types supported: U8/S8/U16/S16/U32/S32/F16/F32.
+     * @param[out] output          The output tensor. Data types supported: U8/S8/U16/S16/U32/S32/F16/F32.
+     * @param[in]  policy          Conversion policy.
+     */
+    void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, ConvertPolicy policy);
     /** Static function to check if given info will lead to a valid configuration of @ref CLCast
      *
      * @param[in] input  Source tensor info. Data types supported: U8/S8/U16/S16/U32/S32/F16/F32.

diff --git a/arm_compute/runtime/CL/functions/CLChannelCombine.h b/arm_compute/runtime/CL/functions/CLChannelCombine.h
index 25f31d8..474830d 100644
--- a/arm_compute/runtime/CL/functions/CLChannelCombine.h
+++ b/arm_compute/runtime/CL/functions/CLChannelCombine.h

@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2019 ARM Limited.
+ * Copyright (c) 2016-2020 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -47,12 +47,31 @@
     void configure(const ICLTensor *plane0, const ICLTensor *plane1, const ICLTensor *plane2, const ICLTensor *plane3, ICLTensor *output);
     /** Initialize function's inputs and outputs.
      *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  plane0          The 2D plane that forms channel 0. Must be of U8 format.
+     * @param[in]  plane1          The 2D plane that forms channel 1. Must be of U8 format.
+     * @param[in]  plane2          The 2D plane that forms channel 2. Must be of U8 format.
+     * @param[in]  plane3          The 2D plane that forms channel 3. Must be of U8 format.
+     * @param[out] output          The single planar output tensor.
+     */
+    void configure(const CLCompileContext &compile_context, const ICLTensor *plane0, const ICLTensor *plane1, const ICLTensor *plane2, const ICLTensor *plane3, ICLTensor *output);
+    /** Initialize function's inputs and outputs.
+     *
      * @param[in]  plane0 The 2D plane that forms channel 0. Must be of U8 format.
      * @param[in]  plane1 The 2D plane that forms channel 1. Must be of U8 format.
      * @param[in]  plane2 The 2D plane that forms channel 2. Must be of U8 format.
      * @param[out] output The multi planar output image.
      */
     void configure(const ICLImage *plane0, const ICLImage *plane1, const ICLImage *plane2, ICLMultiImage *output);
+    /** Initialize function's inputs and outputs.
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  plane0          The 2D plane that forms channel 0. Must be of U8 format.
+     * @param[in]  plane1          The 2D plane that forms channel 1. Must be of U8 format.
+     * @param[in]  plane2          The 2D plane that forms channel 2. Must be of U8 format.
+     * @param[out] output          The multi planar output image.
+     */
+    void configure(const CLCompileContext &compile_context, const ICLImage *plane0, const ICLImage *plane1, const ICLImage *plane2, ICLMultiImage *output);
 };
 }
 #endif /*ARM_COMPUTE_CLCHANNELCOMBINE_H*/

diff --git a/arm_compute/runtime/CL/functions/CLChannelExtract.h b/arm_compute/runtime/CL/functions/CLChannelExtract.h
index 77d84b9..aa25516 100644
--- a/arm_compute/runtime/CL/functions/CLChannelExtract.h
+++ b/arm_compute/runtime/CL/functions/CLChannelExtract.h

@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2019 ARM Limited.
+ * Copyright (c) 2016-2020 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -46,11 +46,27 @@
     void configure(const ICLTensor *input, Channel channel, ICLTensor *output);
     /** Initialize the function's source, destination
      *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  input           The input tensor to extract the channel from. Formats supported: RGB888/RGBA8888/YUYV422/UYVY422
+     * @param[in]  channel         The channel to extract.
+     * @param[out] output          The extracted channel. Must be of U8 format.
+     */
+    void configure(const CLCompileContext &compile_context, const ICLTensor *input, Channel channel, ICLTensor *output);
+    /** Initialize the function's source, destination
+     *
      * @param[in]  input   The multi-planar input image to extract channel from. Formats supported: NV12/NV21/IYUV/YUV444
      * @param[in]  channel The channel to extract.
      * @param[out] output  The extracted 2D channel. Must be of U8 format.
      */
     void configure(const ICLMultiImage *input, Channel channel, ICLImage *output);
+    /** Initialize the function's source, destination
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  input           The multi-planar input image to extract channel from. Formats supported: NV12/NV21/IYUV/YUV444
+     * @param[in]  channel         The channel to extract.
+     * @param[out] output          The extracted 2D channel. Must be of U8 format.
+     */
+    void configure(const CLCompileContext &compile_context, const ICLMultiImage *input, Channel channel, ICLImage *output);
 };
 }
 #endif /*ARM_COMPUTE_CLCHANNELEXTRACT_H*/

diff --git a/arm_compute/runtime/CL/functions/CLChannelShuffleLayer.h b/arm_compute/runtime/CL/functions/CLChannelShuffleLayer.h
index 6e30bd3..183a2f1 100644
--- a/arm_compute/runtime/CL/functions/CLChannelShuffleLayer.h
+++ b/arm_compute/runtime/CL/functions/CLChannelShuffleLayer.h

@@ -46,6 +46,14 @@
      * @param[in]  num_groups Number of groups. Must be greater than 1 and the number of channels of the tensors must be a multiple of the number of groups.
      */
     void configure(const ICLTensor *input, ICLTensor *output, unsigned int num_groups);
+    /** Initialize the function
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  input           Input tensor. Data types supported: All.
+     * @param[out] output          Output tensor. Data type supported: Same as @p input
+     * @param[in]  num_groups      Number of groups. Must be greater than 1 and the number of channels of the tensors must be a multiple of the number of groups.
+     */
+    void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, unsigned int num_groups);
     /** Static function to check if given info will lead to a valid configuration of @ref CLChannelShuffleLayerKernel
      *
      * @param[in] input      Input tensor info. Data types supported: All.

diff --git a/arm_compute/runtime/CL/functions/CLColorConvert.h b/arm_compute/runtime/CL/functions/CLColorConvert.h
index 1a3bea9..8721e8a 100644
--- a/arm_compute/runtime/CL/functions/CLColorConvert.h
+++ b/arm_compute/runtime/CL/functions/CLColorConvert.h

@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2019 ARM Limited.
+ * Copyright (c) 2016-2020 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -49,22 +49,52 @@
     void configure(const ICLTensor *input, ICLTensor *output);
     /** Initialize the function's source, destination
      *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  input           Source tensor. Formats supported: RGBA8888/UYVY422/YUYV422/RGB888
+     * @param[out] output          Destination tensor. Formats supported: RGB888 (if the formats of @p input are RGBA8888/UYVY422/YUYV422),
+     *                                                          RGBA8888 (if the formats of @p input are UYVY422/YUYV422/RGB888/),
+     *                                                          U8 (if the formats of @p input is RGB888)
+     */
+    void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output);
+    /** Initialize the function's source, destination
+     *
      * @param[in]  input  Multi-planar source image. Formats supported: NV12/NV21/IYUV
      * @param[out] output Single-planar destination image. Formats supported: RGB888/RGBA8888
      */
     void configure(const ICLMultiImage *input, ICLImage *output);
     /** Initialize the function's source, destination
      *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  input           Multi-planar source image. Formats supported: NV12/NV21/IYUV
+     * @param[out] output          Single-planar destination image. Formats supported: RGB888/RGBA8888
+     */
+    void configure(const CLCompileContext &compile_context, const ICLMultiImage *input, ICLImage *output);
+    /** Initialize the function's source, destination
+     *
      * @param[in]  input  Single-planar source image. Formats supported: RGB888/RGBA8888/UYVY422/YUYV422
      * @param[out] output Multi-planar destination image. Formats supported: NV12/IYUV/YUV444 (if the formats of @p input are RGB888/RGB8888)
      */
     void configure(const ICLImage *input, ICLMultiImage *output);
     /** Initialize the function's source, destination
      *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  input           Single-planar source image. Formats supported: RGB888/RGBA8888/UYVY422/YUYV422
+     * @param[out] output          Multi-planar destination image. Formats supported: NV12/IYUV/YUV444 (if the formats of @p input are RGB888/RGB8888)
+     */
+    void configure(const CLCompileContext &compile_context, const ICLImage *input, ICLMultiImage *output);
+    /** Initialize the function's source, destination
+     *
      * @param[in]  input  Multi-planar source image. Formats supported: NV12/NV21/IYUV
      * @param[out] output Multi-planar destination image. Formats supported: YUV444/IYUV (if the formats of @p input are NV12/NV21)/NV12 (if the format of  @p input is IYUV)
      */
     void configure(const ICLMultiImage *input, ICLMultiImage *output);
+    /** Initialize the function's source, destination
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  input           Multi-planar source image. Formats supported: NV12/NV21/IYUV
+     * @param[out] output          Multi-planar destination image. Formats supported: YUV444/IYUV (if the formats of @p input are NV12/NV21)/NV12 (if the format of  @p input is IYUV)
+     */
+    void configure(const CLCompileContext &compile_context, const ICLMultiImage *input, ICLMultiImage *output);
 };
 }
 #endif /* ARM_COMPUTE_CLCOLORCONVERT_H */

diff --git a/arm_compute/runtime/CL/functions/CLComparison.h b/arm_compute/runtime/CL/functions/CLComparison.h
index 85dbe71..4e681e7 100644
--- a/arm_compute/runtime/CL/functions/CLComparison.h
+++ b/arm_compute/runtime/CL/functions/CLComparison.h

@@ -46,6 +46,17 @@
      * @param[out] operation Comparison operation to be used.
      */
     void configure(ICLTensor *input1, ICLTensor *input2, ICLTensor *output, ComparisonOperation operation);
+    /** Initialise the kernel's inputs and outputs.
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  input1          Source tensor. Data types supported: All.
+     *                             The input1 tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0.
+     * @param[in]  input2          Source tensor. Data types supported: Same as @p input1.
+     *                             The input2 tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0.
+     * @param[out] output          Destination tensor. Data types supported: U8.
+     * @param[out] operation       Comparison operation to be used.
+     */
+    void configure(const CLCompileContext &compile_context, ICLTensor *input1, ICLTensor *input2, ICLTensor *output, ComparisonOperation operation);
     /** Static function to check if given info will lead to a valid configuration of @ref CLComparison
      *
      * @param[in]  input1    Source tensor. Data types supported: All.
@@ -75,6 +86,19 @@
      * @param[out] output Destination tensor. Data types supported: U8.
      */
     void configure(ICLTensor *input1, ICLTensor *input2, ICLTensor *output);
+    /** Comparison operations used by the class */
+
+public:
+    /** Initialise the kernel's inputs and outputs.
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  input1          Source tensor. Data types supported: U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32.
+     *                             The input1 tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0.
+     * @param[in]  input2          Source tensor. Data types supported: Same as @p input1.
+     *                             The input2 tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0.
+     * @param[out] output          Destination tensor. Data types supported: U8.
+     */
+    void configure(const CLCompileContext &compile_context, ICLTensor *input1, ICLTensor *input2, ICLTensor *output);
     /** Static function to check if given info will lead to a valid configuration of @ref CLComparison
      *
      * @param[in] input1 Source tensor. Data types supported: U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32.

diff --git a/arm_compute/runtime/CL/functions/CLComputeAllAnchors.h b/arm_compute/runtime/CL/functions/CLComputeAllAnchors.h
index a039320..15c5bfe 100644
--- a/arm_compute/runtime/CL/functions/CLComputeAllAnchors.h
+++ b/arm_compute/runtime/CL/functions/CLComputeAllAnchors.h

@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019 ARM Limited.
+ * Copyright (c) 2019-2020 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -47,6 +47,15 @@
      *
      */
     void configure(const ICLTensor *anchors, ICLTensor *all_anchors, const ComputeAnchorsInfo &info);
+    /** Set the input and output tensors.
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  anchors         Source tensor. Original set of anchors of size (4, A) where A is the number of anchors. Data types supported: F16/F32
+     * @param[out] all_anchors     Destination tensor. Destination anchors of size (4, H*W*A) where H and W are the height and width of the feature map and A is the number of anchors. Data types supported: Same as @p input
+     * @param[in]  info            Contains Compute Anchors operation information described in @ref ComputeAnchorsInfo
+     *
+     */
+    void configure(const CLCompileContext &compile_context, const ICLTensor *anchors, ICLTensor *all_anchors, const ComputeAnchorsInfo &info);
 
     /** Static function to check if given info will lead to a valid configuration of @ref CLComputeAllAnchorsKernel
      *

diff --git a/arm_compute/runtime/CL/functions/CLConcatenateLayer.h b/arm_compute/runtime/CL/functions/CLConcatenateLayer.h
index c3d065a..b8e3361 100644
--- a/arm_compute/runtime/CL/functions/CLConcatenateLayer.h
+++ b/arm_compute/runtime/CL/functions/CLConcatenateLayer.h

@@ -62,6 +62,18 @@
      */
     void configure(std::vector<ICLTensor *> &inputs_vector, ICLTensor *output, size_t axis);
     void configure(std::vector<const ICLTensor *> &inputs_vector, ICLTensor *output, size_t axis);
+    /** Initialise the kernel's inputs vector and output.
+     *
+     * @note Input and output tensor dimensions preconditions defer depending on the concatenation axis.
+     * @note Preconditions can be found respectively at @ref CLWidthConcatenateLayerKernel, @ref CLHeightConcatenateLayerKernel and @ref CLDepthConcatenateLayerKernel.
+     *
+     * @param[in]     compile_context The compile context to be used.
+     * @param[in,out] inputs_vector   The vectors containing all the tensors to concatenate. Data types supported: All.
+     * @param[out]    output          Output tensor. Data types supported: Same as @p input.
+     * @param[in]     axis            Concatenation axis. Supported underlying concatenation axis are 0, 1, 2 and 3.
+     */
+    void configure(const CLCompileContext &compile_context, std::vector<ICLTensor *> &inputs_vector, ICLTensor *output, size_t axis);
+    void configure(const CLCompileContext &compile_context, std::vector<const ICLTensor *> &inputs_vector, ICLTensor *output, size_t axis);
     /** Static function to check if given info will lead to a valid configuration of @ref CLConcatenateLayer
      *
      * @note Input and output tensor dimensions preconditions defer depending on the concatenation axis.
@@ -81,7 +93,7 @@
 
 private:
     template <typename TensorType>
-    void configure_internal(std::vector<TensorType *> &&inputs_vector, ICLTensor *output, size_t axis);
+    void configure_internal(const CLCompileContext &compile_context, std::vector<TensorType *> &&inputs_vector, ICLTensor *output, size_t axis);
 
     template <typename TensorInfoType>
     static Status validate_internal(const std::vector<TensorInfoType *> &inputs_vector, const ITensorInfo *output, size_t axis);

diff --git a/arm_compute/runtime/CL/functions/CLConvertFullyConnectedWeights.h b/arm_compute/runtime/CL/functions/CLConvertFullyConnectedWeights.h
index 76a28ed..123f638 100644
--- a/arm_compute/runtime/CL/functions/CLConvertFullyConnectedWeights.h
+++ b/arm_compute/runtime/CL/functions/CLConvertFullyConnectedWeights.h

@@ -47,6 +47,17 @@
      * @return A status
      */
     void configure(const ICLTensor *input, ICLTensor *output, const TensorShape &original_input_shape, DataLayout data_layout);
+    /** Initialize the function.
+     *
+     * @param[in]  compile_context      The compile context to be used.
+     * @param[in]  input                Source weights tensor to convert. Must be 2 dimensional. Data types supported: All.
+     * @param[out] output               The converted weights tensor. Shape and Data Type: Same as @p input.
+     * @param[in]  original_input_shape Shape of the original input tensor (the one entering fully connected layer).
+     * @param[in]  data_layout          The data layout the weights have been trained in.
+     *
+     * @return A status
+     */
+    void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const TensorShape &original_input_shape, DataLayout data_layout);
     /** Static function to check if given info will lead to a valid configuration of @ref CLConvertFullyConnectedWeights
      *
      * @param[in] input                Source weights tensor info to convert. Must be 2 dimensional. Data types supported: All.
@@ -96,7 +107,18 @@
      */
     void configure(const ICLTensor *input, const TensorShape &original_input_shape, DataLayout data_layout)
     {
-        _func.configure(input, &_output, original_input_shape, data_layout);
+        configure(CLKernelLibrary::get().get_compile_context(), input, original_input_shape, data_layout);
+    }
+    /** Configures the @ref CLConvertFullyConnectedWeights function
+     *
+     * @param[in] compile_context      The compile context to be used.
+     * @param[in] input                Source weights tensor info to convert.  Data type supported: All.
+     * @param[in] original_input_shape Shape of the original input tensor (the one entering fully connected layer).
+     * @param[in] data_layout          The data layout the weights have been trained in.
+     */
+    void configure(const CLCompileContext &compile_context, const ICLTensor *input, const TensorShape &original_input_shape, DataLayout data_layout)
+    {
+        _func.configure(compile_context, input, &_output, original_input_shape, data_layout);
     }
 
 private:

diff --git a/arm_compute/runtime/CL/functions/CLConvolution.h b/arm_compute/runtime/CL/functions/CLConvolution.h
index 43507d7..72ef8ce 100644
--- a/arm_compute/runtime/CL/functions/CLConvolution.h
+++ b/arm_compute/runtime/CL/functions/CLConvolution.h

@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2019 ARM Limited.
+ * Copyright (c) 2016-2020 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -59,6 +59,17 @@
      * @param[in]     constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
      */
     void configure(ICLTensor *input, ICLTensor *output, const int16_t *conv, uint32_t scale, BorderMode border_mode, uint8_t constant_border_value = 0);
+    /** Initialize the function's source, destination, conv and border_mode.
+     *
+     * @param[in]     compile_context       The compile context to be used.
+     * @param[in,out] input                 Source tensor. Data types supported: U8. (Written to only for @p border_mode != UNDEFINED)
+     * @param[out]    output                Destination tensor, Data types supported: U8 or S16.
+     * @param[in]     conv                  matrix_size x matrix_size S16 coefficients structured as a row-major 2D array in a linear buffer.
+     * @param[in]     scale                 Scale of the convolution matrix. If 0 is passed, it will be set to the sum of the coefficients of the convolution or 1 if they add up to 0.
+     * @param[in]     border_mode           Strategy to use for borders.
+     * @param[in]     constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
+     */
+    void configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, const int16_t *conv, uint32_t scale, BorderMode border_mode, uint8_t constant_border_value = 0);
 };
 
 /** Basic function to execute square convolution.Currently it supports 5x5, 7x7, 9x9. This function calls the following OpenCL kernels:
@@ -84,6 +95,17 @@
      * @param[in]     constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
      */
     void configure(ICLTensor *input, ICLTensor *output, const int16_t *conv, uint32_t scale, BorderMode border_mode, uint8_t constant_border_value = 0);
+    /** Initialize the function's source, destination, conv and border_mode.
+     *
+     * @param[in]     compile_context       The compile context to be used.
+     * @param[in,out] input                 Source tensor. Data types supported: U8. (Written to only for @p border_mode != UNDEFINED)
+     * @param[out]    output                Destination tensor, Data types supported: U8 or S16.
+     * @param[in]     conv                  matrix_size x matrix_size S16 coefficients structured as a row-major 2D array in a linear buffer.
+     * @param[in]     scale                 Scale of the convolution matrix. If 0 is passed, it will be set to the sum of the coefficients of the convolution or 1 if they add up to 0.
+     * @param[in]     border_mode           Strategy to use for borders.
+     * @param[in]     constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
+     */
+    void configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, const int16_t *conv, uint32_t scale, BorderMode border_mode, uint8_t constant_border_value = 0);
 
     // Inherited methods overriden:
     void run() override;
@@ -127,6 +149,20 @@
      * @param[in]     constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
      */
     void configure(ICLTensor *input, ICLTensor *output, const int16_t *conv, uint32_t rows, uint32_t cols, uint32_t scale, BorderMode border_mode, uint8_t constant_border_value = 0);
+    /** Initialize the function's source, destination, conv and border_mode.
+     *
+     * @param[in]     compile_context       The compile context to be used.
+     * @param[in,out] input                 Source tensor. Data types supported: U8. (Written to only for @p border_mode != UNDEFINED)
+     * @param[out]    output                Destination tensor, Data types supported: U8 or S16.
+     * @param[in]     conv                  Matrix_size x matrix_size S16 coefficients structured as a row-major 2D array in a linear buffer.
+     * @param[in]     rows                  Rows of convolution kernel.
+     * @param[in]     cols                  Columns of convolution kernel.
+     * @param[in]     scale                 Scale of the convolution matrix. If 0 is passed, it will be set to the sum of the coefficients of the convolution or 1 if they add up to 0.
+     * @param[in]     border_mode           Strategy to use for borders.
+     * @param[in]     constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
+     */
+    void configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, const int16_t *conv, uint32_t rows, uint32_t cols, uint32_t scale, BorderMode border_mode,
+                   uint8_t constant_border_value = 0);
 };
 }
 #endif /*ARM_COMPUTE_CLCONVOLUTION_H */

diff --git a/arm_compute/runtime/CL/functions/CLConvolutionLayer.h b/arm_compute/runtime/CL/functions/CLConvolutionLayer.h
index b526954..fff9173 100644
--- a/arm_compute/runtime/CL/functions/CLConvolutionLayer.h
+++ b/arm_compute/runtime/CL/functions/CLConvolutionLayer.h

@@ -94,6 +94,29 @@
      */
     void configure(ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info, const WeightsInfo &weights_info = WeightsInfo(),
                    const Size2D &dilation = Size2D(1U, 1U), const ActivationLayerInfo &act_info = ActivationLayerInfo(), bool enable_fast_math = false, unsigned int num_groups = 1);
+    /** Set the input and output tensors.
+     *
+     * @param[in]  compile_context  The compile context to be used.
+     * @param[in]  input            Source tensor. 3 lower dimensions represent a single input [width, height, IFM],
+     *                              while every optional dimension from 4 and above represent a batch of inputs.
+     *                              Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
+     * @param[in]  weights          Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM].
+     *                              Data type supported: Same as @p input or QASYMM8/QSYMM8_PER_CHANNEL when @p input is QASYMM8.
+     * @param[in]  biases           Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM].
+     *                              Data type supported: Should match @p input data type, except for input of QASYMM8 type where biases should be of S32 type.
+     * @param[out] output           Destination tensor. 3 lower dimensions represent a single output [width, height, OFM], while the rest represent batch of outputs.
+     *                              Data types supported: Same as @p input.
+     * @param[in]  conv_info        Contains padding and stride information described in @ref PadStrideInfo.
+     * @param[in]  weights_info     Specifies if the weights tensor has been reshaped with CLWeightsReshapeKernel. Data type supported: Same as @p input.
+     * @param[in]  dilation         (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
+     * @param[in]  act_info         (Optional) Activation layer information in case of a fused activation.
+     * @param[in]  enable_fast_math (Optional) Enable fast math computation. In case this flag were set, the function could dispatch the fastest implementation
+     *                              available which may introduce a drop of accuracy as well. Default is false
+     * @param[in]  num_groups       (Optional) Number of groups when performing a grouped convolution. num_groups != 1 is only supported for NCHW data layout
+     */
+    void configure(const CLCompileContext &compile_context, ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info,
+                   const WeightsInfo &weights_info = WeightsInfo(), const Size2D &dilation = Size2D(1U, 1U), const ActivationLayerInfo &act_info = ActivationLayerInfo(), bool enable_fast_math = false,
+                   unsigned int num_groups = 1);
     /** Static function to check if given info will lead to a valid configuration of @ref CLConvolutionLayer
      *
      * @param[in] input            Source tensor. 3 lower dimensions represent a single input [width, height, IFM],

diff --git a/arm_compute/runtime/CL/functions/CLCopy.h b/arm_compute/runtime/CL/functions/CLCopy.h
index 9252ac3..31b73c3 100644
--- a/arm_compute/runtime/CL/functions/CLCopy.h
+++ b/arm_compute/runtime/CL/functions/CLCopy.h

@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2019 ARM Limited.
+ * Copyright (c) 2018-2020 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -43,6 +43,14 @@
      *
      */
     void configure(ICLTensor *input, ICLTensor *output);
+    /** Initialise the function's source and destination.
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  input           Source tensor. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32.
+     * @param[out] output          Output tensor. Data types supported: Same as @p input.
+     *
+     */
+    void configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output);
     /** Static function to check if given info will lead to a valid configuration of @ref CLCopy
      *
      * @param[in] input  Source tensor. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32.

diff --git a/arm_compute/runtime/CL/functions/CLCropResize.h b/arm_compute/runtime/CL/functions/CLCropResize.h
index 244e345..86df0d4 100644
--- a/arm_compute/runtime/CL/functions/CLCropResize.h
+++ b/arm_compute/runtime/CL/functions/CLCropResize.h

@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019 ARM Limited.
+ * Copyright (c) 2019-2020 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -62,10 +62,10 @@
      * @note Box indices may be outside of the bounds, in which case @p extrapolation_value is used.
      * @note Start and end indices of boxes are inclusive.
      *
-     * @param[in]  input               Source tensor containing N batches of 3D images to be cropped. Data type supported: F32
-     * @param[in]  boxes               Tensor containing the boxes used to crop the images. Data type supported: F32
+     * @param[in]  input               Source tensor containing N batches of 3D images to be cropped. Data type supported: : U16/S16/U32/S32/F16/F32
+     * @param[in]  boxes               Tensor containing the boxes used to crop the images. It has to be known before configuration. Data type supported: F32
      * @param[in]  box_ind             One dimensional tensor containing the batch index of the 3D image in @p input that the corresponding
-     *                                 box in @p boxes will be applied to. Data type supported: F32
+     *                                 box in @p boxes will be applied to. It has to be known before configuration. Data type supported: F32
      * @param[out] output              Destination tensor containing a cropped and resized image for each box in @p boxes. Data type supported: F32
      * @param[in]  crop_size           The dimensions that each cropped image will be resized to.
      * @param[in]  method              The policy to be used when resizing image. Default is bilinear.
@@ -73,6 +73,24 @@
      */
     void configure(const ICLTensor *input, ICLTensor *boxes, ICLTensor *box_ind, ICLTensor *output, Coordinates2D crop_size,
                    InterpolationPolicy method = InterpolationPolicy::BILINEAR, float extrapolation_value = 0);
+    /** Configure kernel
+     *
+     * @note Supported tensor rank: up to 4
+     * @note Box indices may be outside of the bounds, in which case @p extrapolation_value is used.
+     * @note Start and end indices of boxes are inclusive.
+     *
+     * @param[in]  compile_context     The compile context to be used.
+     * @param[in]  input               Source tensor containing N batches of 3D images to be cropped. Data type supported: U16/S16/U32/S32/F16/F32
+     * @param[in]  boxes               Tensor containing the boxes used to crop the images. It has to be known before configuration. Data type supported: F32
+     * @param[in]  box_ind             One dimensional tensor containing the batch index of the 3D image in @p input that the corresponding
+     *                                 box in @p boxes will be applied to. It has to be known before configuration. Data type supported: F32
+     * @param[out] output              Destination tensor containing a cropped and resized image for each box in @p boxes. Data type supported: F32
+     * @param[in]  crop_size           The dimensions that each cropped image will be resized to.
+     * @param[in]  method              The policy to be used when resizing image. Default is bilinear.
+     * @param[in]  extrapolation_value Value to be used for values outside of the image for cropping and resizing. Default is 0.
+     */
+    void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *boxes, ICLTensor *box_ind, ICLTensor *output, Coordinates2D crop_size,
+                   InterpolationPolicy method = InterpolationPolicy::BILINEAR, float extrapolation_value = 0);
 
     /** Static function to check if given info will lead to a valid configuration of @ref NESlice
      *
@@ -109,6 +127,8 @@
     std::vector<std::unique_ptr<CLCopyKernel>> _copy;
     std::vector<std::unique_ptr<CLTensor>>     _crop_results;
     std::vector<std::unique_ptr<CLTensor>>     _scaled_results;
+
+    std::vector<std::unique_ptr<ICLKernel>> _internal_kernels;
 };
 } // namespace arm_compute
 #endif /* ARM_COMPUTE_CL_CROP_RESIZE_H */

diff --git a/arm_compute/runtime/CL/functions/CLDeconvolutionLayer.h b/arm_compute/runtime/CL/functions/CLDeconvolutionLayer.h
index 78c149d..c75b586 100644
--- a/arm_compute/runtime/CL/functions/CLDeconvolutionLayer.h
+++ b/arm_compute/runtime/CL/functions/CLDeconvolutionLayer.h

@@ -55,6 +55,19 @@
      *
      */
     void configure(ICLTensor *input, ICLTensor *weights, const ICLTensor *bias, ICLTensor *output, const PadStrideInfo &deconv_info, const WeightsInfo &weights_info = WeightsInfo());
+    /** Set the input, weights, biases and output tensors.
+     *
+     * @param[in]     compile_context The compile context to be used.
+     * @param[in,out] input           Input tensor. 3 lower dimensions represent a single input, and an optional 4th dimension for batch of inputs. Data types supported: QASYMM8_SIGNED/QASYMM8/F16/F32.
+     * @param[in]     weights         The 4d weights with dimensions [width, height, IFM, OFM]. Data type supported: Same as @p input.
+     * @param[in]     bias            (Optional) The biases have one dimension. Data type supported: Same as @p input.
+     * @param[out]    output          Output tensor. The output has the same number of dimensions as the @p input.
+     * @param[in]     deconv_info     Contains padding and policies to be used in the deconvolution, this is described in @ref PadStrideInfo.
+     * @param[in]     weights_info    (Optional) Weights information needed for @ref CLConvolutionLayer, specifies if the weights tensor has been reshaped with @ref CLWeightsReshapeKernel.
+     *
+     */
+    void configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *weights, const ICLTensor *bias, ICLTensor *output, const PadStrideInfo &deconv_info,
+                   const WeightsInfo &weights_info = WeightsInfo());
     /** Static function to check if given info will lead to a valid configuration of @ref CLDeconvolutionLayer
      *
      * @param[in] input        Input tensor info. 3 lower dimensions represent a single input, and an optional 4th dimension for batch of inputs. Data types supported: QASYMM8_SIGNED/QASYMM8/F16/F32.

diff --git a/arm_compute/runtime/CL/functions/CLDeconvolutionLayerUpsample.h b/arm_compute/runtime/CL/functions/CLDeconvolutionLayerUpsample.h
index 5a1009c..2d3dde1 100644
--- a/arm_compute/runtime/CL/functions/CLDeconvolutionLayerUpsample.h
+++ b/arm_compute/runtime/CL/functions/CLDeconvolutionLayerUpsample.h

@@ -64,6 +64,14 @@
      * @param[in]      info   Contains padding and policies to be used in the deconvolution.
      */
     void configure(ICLTensor *input, ICLTensor *output, const PadStrideInfo &info);
+    /** Initialize the function's source, destination, interpolation type and border_mode.
+     *
+     * @param[in]      compile_context The compile context to be used.
+     * @param[in, out] input           Source tensor. Data type supported: All.
+     * @param[out]     output          Destination tensor. Data type supported: same as @p input.
+     * @param[in]      info            Contains padding and policies to be used in the deconvolution.
+     */
+    void configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, const PadStrideInfo &info);
     /** Static function to check if given info will lead to a valid configuration of @ref CLDeconvolutionLayerUpsample
      *
      * @param[in] input  Source tensor info. Data type supported: All.

diff --git a/arm_compute/runtime/CL/functions/CLDepthConvertLayer.h b/arm_compute/runtime/CL/functions/CLDepthConvertLayer.h
index 1b9476c..910b9ea 100644
--- a/arm_compute/runtime/CL/functions/CLDepthConvertLayer.h
+++ b/arm_compute/runtime/CL/functions/CLDepthConvertLayer.h

@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2019 ARM Limited.
+ * Copyright (c) 2016-2020 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -57,6 +57,27 @@
      * @param[in]  shift  Value for down/up conversions. Must be 0 <= shift < 8.
      */
     void configure(const ICLTensor *input, ICLTensor *output, ConvertPolicy policy, uint32_t shift);
+    /** Initialize the function's source, destination
+     *
+     * Input data type must be different than output data type.
+     *
+     * Valid conversions Input -> Output :
+     *
+     *   - U8  -> S8, U16, S16, U32, S32, F16, F32
+     *   - U16 -> U8, S8, S16, U32, S32, F16, F32
+     *   - S16 -> U8, S8, U16, U32, S32, F16, F32
+     *   - U32 -> U8, S8, U16, S16, S32, F16, F32
+     *   - S32 -> U8, S8, U16, S16, U32, F16, F32
+     *   - F16 -> U8, S8, U16, S16, U32, F32
+     *   - F32 -> U8, S8, U16, S16, U32, F16
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  input           The input tensor to convert. Data types supported: U8/S8/U16/S16/U32/S32/F16/F32.
+     * @param[out] output          The output tensor. Data types supported: U8/S8/U16/S16/U32/S32/F16/F32.
+     * @param[in]  policy          Conversion policy.
+     * @param[in]  shift           Value for down/up conversions. Must be 0 <= shift < 8.
+     */
+    void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, ConvertPolicy policy, uint32_t shift);
     /** Static function to check if given info will lead to a valid configuration of @ref CLDepthConvertLayer
      *
      * @param[in] input  Source tensor info. Data types supported: U8/S8/U16/S16/U32/S32/F16/F32.

diff --git a/arm_compute/runtime/CL/functions/CLDepthToSpaceLayer.h b/arm_compute/runtime/CL/functions/CLDepthToSpaceLayer.h
index 0c33ed3..dbf5898 100644
--- a/arm_compute/runtime/CL/functions/CLDepthToSpaceLayer.h
+++ b/arm_compute/runtime/CL/functions/CLDepthToSpaceLayer.h

@@ -42,6 +42,14 @@
      * @param[in]  block_shape Block shape value.
      */
     void configure(const ICLTensor *input, ICLTensor *output, int32_t block_shape);
+    /** Set the input and output tensors.
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  input           Tensor input. Supported tensor rank: 4. Data types supported: All.
+     * @param[out] output          Tensor output. Data types supported: same as @p input
+     * @param[in]  block_shape     Block shape value.
+     */
+    void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, int32_t block_shape);
     /** Static function to check if given info will lead to a valid configuration of @ref CLDepthToSpaceLayer.
      *
      * @param[in] input       Tensor input info. Supported tensor rank: 4. Data types supported: All.

diff --git a/arm_compute/runtime/CL/functions/CLDepthwiseConvolutionLayer.h b/arm_compute/runtime/CL/functions/CLDepthwiseConvolutionLayer.h
index 4668e82..63c359e 100644
--- a/arm_compute/runtime/CL/functions/CLDepthwiseConvolutionLayer.h
+++ b/arm_compute/runtime/CL/functions/CLDepthwiseConvolutionLayer.h

@@ -70,6 +70,22 @@
      */
     void configure(ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info, unsigned int depth_multiplier = 1,
                    ActivationLayerInfo act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U));
+    /** Initialize the function's source, destination, weights and convolution information.
+     *
+     * @param[in]      compile_context  The compile context to be used.
+     * @param[in, out] input            Source tensor. Data type supported: QASYMM8/FP16/FP32. Data layout supported: NHWC, NCHW
+     * @param[in]      weights          Weights tensor. These are 3D tensors with shape [kernel_x, kernel_y, IFM].
+     *                                  Data type supported: Same as @p input or QASYMM8/QSYMM8_PER_CHANNEL when @p input is QASYMM8.
+     * @param[in]      biases           Biases tensor. A 1D tensor with shape [IFM]. Must be nullptr if not needed.
+     *                                  Data type supported: Same as @p input, S32 when input is QASYMM8.
+     * @param[out]     output           Destination tensor. Data type supported: same as @p input.
+     * @param[in]      conv_info        Padding and stride information to use for the convolution.
+     * @param[in]      depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
+     * @param[in]      act_info         (Optional) Activation layer information in case of a fused activation.
+     * @param[in]      dilation         (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
+     */
+    void configure(const CLCompileContext &compile_context, ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info,
+                   unsigned int depth_multiplier = 1, ActivationLayerInfo act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U));
 
     /** Static function to check if given info will lead to a valid configuration of @ref CLDepthwiseConvolutionLayer
      *
@@ -150,6 +166,22 @@
          */
         void configure(ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info, unsigned int depth_multiplier = 1,
                        ActivationLayerInfo act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U));
+        /** Initialize the function's source, destination, conv and border_size.
+         *
+         * @param[in]      compile_context  The compile context to be used.
+         * @param[in, out] input            Source tensor. Data type supported: QASYMM8/F16/F32. (Written to only for border filling).
+         * @param[in]      weights          Weights tensor. A 3D tensor with shape [3, 3, IFM].
+         *                                  Data type supported: Same as @p input or QASYMM8/QSYMM8_PER_CHANNEL when @p input is QASYMM8.
+         * @param[in]      biases           Biases tensor. A 1D tensor with shape [IFM]. Must be nullptr if not needed.
+         *                                  Data type supported: Same as @p input.
+         * @param[out]     output           Destination tensor. Data type supported: same as @p input.
+         * @param[in]      conv_info        Padding and stride information to use for the convolution.
+         * @param[in]      depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
+         * @param[in]      act_info         (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU for 3x3 QASYMM8 supported.
+         * @param[in]      dilation         (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
+         */
+        void configure(const CLCompileContext &compile_context, ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info,
+                       unsigned int depth_multiplier = 1, ActivationLayerInfo act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U));
 
         /** Static function to check if given info will lead to a valid configuration of @ref CLDepthwiseConvolutionLayer3x3
          *
@@ -234,6 +266,22 @@
          */
         void configure(ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info,
                        unsigned int depth_multiplier = 1, const ActivationLayerInfo &act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U));
+        /** Initialize the function's source, destination, weights and convolution information.
+         *
+         * @param[in]      compile_context  The compile context to be used.
+         * @param[in, out] input            Source tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/F32. (Written to only for border filling).
+         * @param[in]      weights          Weights tensor. These are 3D tensors with shape [kernel_x, kernel_y, IFM].
+         *                                  Data type supported: Same as @p input or QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL when @p input is QASYMM8.
+         * @param[in]      biases           Biases tensor. A 1D tensor with shape [IFM]. Must be nullptr if not needed.
+         *                                  Data type supported: Same as @p input, S32 when input is QASYMM8/QASYMM8_SIGNED.
+         * @param[out]     output           Destination tensor. Data type supported: same as @p input.
+         * @param[in]      conv_info        Padding and stride information to use for the convolution.
+         * @param[in]      depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
+         * @param[in]      act_info         (Optional) Activation layer information in case of a fused activation.
+         * @param[in]      dilation         (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
+         */
+        void configure(const CLCompileContext &compile_context, ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info,
+                       unsigned int depth_multiplier = 1, const ActivationLayerInfo &act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U));
 
         /** Static function to check if given info will lead to a valid configuration of @ref CLDepthwiseConvolutionLayerGeneric
          *
@@ -328,6 +376,23 @@
     ARM_COMPUTE_DEPRECATED_REL_REPLACE(20.02, CLDepthwiseConvolutionLayer)
     void configure(ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info, unsigned int depth_multiplier = 1,
                    ActivationLayerInfo act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U));
+    /** Initialize the function's source, destination, conv and border_size.
+     *
+     * @param[in]      compile_context  The compile context to be used.
+     * @param[in, out] input            Source tensor. Data type supported: QASYMM8/F16/F32. (Written to only for border filling).
+     * @param[in]      weights          Weights tensor. A 3D tensor with shape [3, 3, IFM].
+     *                                  Data type supported: Same as @p input or QASYMM8/QSYMM8_PER_CHANNEL when @p input is QASYMM8.
+     * @param[in]      biases           Biases tensor. A 1D tensor with shape [IFM]. Must be nullptr if not needed.
+     *                                  Data type supported: Same as @p input.
+     * @param[out]     output           Destination tensor. Data type supported: same as @p input.
+     * @param[in]      conv_info        Padding and stride information to use for the convolution.
+     * @param[in]      depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
+     * @param[in]      act_info         (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU for 3x3 QASYMM8 supported.
+     * @param[in]      dilation         (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
+     */
+    ARM_COMPUTE_DEPRECATED_REL_REPLACE(20.02, CLDepthwiseConvolutionLayer)
+    void configure(const CLCompileContext &compile_context, ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info,
+                   unsigned int depth_multiplier = 1, ActivationLayerInfo act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U));
 
     /** Static function to check if given info will lead to a valid configuration of @ref CLDepthwiseConvolutionLayer3x3
      *

diff --git a/arm_compute/runtime/CL/functions/CLDequantizationLayer.h b/arm_compute/runtime/CL/functions/CLDequantizationLayer.h
index 48d6ba8..c0a0fcd 100644
--- a/arm_compute/runtime/CL/functions/CLDequantizationLayer.h
+++ b/arm_compute/runtime/CL/functions/CLDequantizationLayer.h

@@ -44,6 +44,14 @@
      * @param[out] output Destination tensor with the same dimensions of input. Data type supported: F16/F32.
      */
     void configure(const ICLTensor *input, ICLTensor *output);
+    /** Set the input and output tensors.
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  input           Source tensor with at least 3 dimensions. The dimensions over the third will be interpreted as batches.
+     *                             Data types supported: QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL/QSYMM8/QSYMM16.
+     * @param[out] output          Destination tensor with the same dimensions of input. Data type supported: F16/F32.
+     */
+    void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output);
     /** Static function to check if given info will lead to a valid configuration of @ref CLDequantizationLayer
      *
      * @param[in] input  Input tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL/QSYMM8/QSYMM16.

diff --git a/arm_compute/runtime/CL/functions/CLDerivative.h b/arm_compute/runtime/CL/functions/CLDerivative.h
index 1155d40..5875ceb 100644
--- a/arm_compute/runtime/CL/functions/CLDerivative.h
+++ b/arm_compute/runtime/CL/functions/CLDerivative.h

@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2019 ARM Limited.
+ * Copyright (c) 2016-2020 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -54,6 +54,19 @@
      *
      */
     void configure(ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, BorderMode border_mode, uint8_t constant_border_value = 0);
+    /** Initialise the function's source, destinations and border mode.
+     *
+     * @note At least one of output_x or output_y must be not NULL.
+     *
+     * @param[in]     compile_context       The compile context to be used.
+     * @param[in,out] input                 Source tensor. Data types supported: U8. (Written to only for @p border_mode != UNDEFINED)
+     * @param[out]    output_x              (optional) Destination tensor. Derivative along the X direction. Data types supported: S16.
+     * @param[out]    output_y              (optional) Destination tensor. Derivative along the Y direction. Data types supported: S16.
+     * @param[in]     border_mode           Border mode to use
+     * @param[in]     constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
+     *
+     */
+    void configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, BorderMode border_mode, uint8_t constant_border_value = 0);
 };
 }
 #endif /* ARM_COMPUTE_CLDERIVATIVE_H */

diff --git a/arm_compute/runtime/CL/functions/CLDilate.h b/arm_compute/runtime/CL/functions/CLDilate.h
index ceea456..cc84820 100644
--- a/arm_compute/runtime/CL/functions/CLDilate.h
+++ b/arm_compute/runtime/CL/functions/CLDilate.h

@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2019 ARM Limited.
+ * Copyright (c) 2016-2020 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -50,6 +50,15 @@
      * @param[in]     constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
      */
     void configure(ICLTensor *input, ICLTensor *output, BorderMode border_mode, uint8_t constant_border_value = 0);
+    /** Initialise the kernel's inputs, output and border mode.
+     *
+     * @param[in]     compile_context       The compile context to be used.
+     * @param[in,out] input                 First tensor input. Data types supported: U8. (Written to only for @p border_mode != UNDEFINED)
+     * @param[out]    output                Output tensor. Data types supported: U8.
+     * @param[in]     border_mode           Border mode to use for the convolution.
+     * @param[in]     constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
+     */
+    void configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, BorderMode border_mode, uint8_t constant_border_value = 0);
 };
 }
 #endif /*ARM_COMPUTE_CLDILATE_H */

diff --git a/arm_compute/runtime/CL/functions/CLDirectConvolutionLayer.h b/arm_compute/runtime/CL/functions/CLDirectConvolutionLayer.h
index 045b1c0..0c81ffa 100644
--- a/arm_compute/runtime/CL/functions/CLDirectConvolutionLayer.h
+++ b/arm_compute/runtime/CL/functions/CLDirectConvolutionLayer.h

@@ -57,6 +57,22 @@
      * @param[in]  act_info  (Optional) Activation layer information in case of a fused activation.
      */
     void configure(ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info, const ActivationLayerInfo &act_info = ActivationLayerInfo());
+    /** Set the input and output tensors.
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  input           Source tensor. 3 lower dimensions represent a single input [width, height, IFM],
+     *                             while every optional dimension from 4 and above represent a batch of inputs.
+     *                             Data types supported: QASYMM8_SIGNED/QASYMM8/F16/F32.
+     * @param[in]  weights         Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. Data type supported:Same as @p input.
+     * @param[in]  biases          Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM].
+     *                             Data type supported: Should match @p input data type, except for input of QASYMM8 and QASYMM8_SIGNED type where biases should be of S32 type.
+     * @param[out] output          Destination tensor. 3 lower dimensions represent a single output [width, height, OFM], while the rest represent batch of outputs.
+     *                             Data types supported: Same as @p input.
+     * @param[in]  conv_info       Contains padding and stride information described in @ref PadStrideInfo.
+     * @param[in]  act_info        (Optional) Activation layer information in case of a fused activation.
+     */
+    void configure(const CLCompileContext &compile_context, ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info,
+                   const ActivationLayerInfo &act_info = ActivationLayerInfo());
     /** Static function to check if given info will lead to a valid configuration of @ref CLDirectConvolutionLayer
      *
      * @param[in] input     Source tensor. 3 lower dimensions represent a single input [width, height, IFM],

diff --git a/arm_compute/runtime/CL/functions/CLDirectDeconvolutionLayer.h b/arm_compute/runtime/CL/functions/CLDirectDeconvolutionLayer.h
index 6632bfc..1fed460 100644
--- a/arm_compute/runtime/CL/functions/CLDirectDeconvolutionLayer.h
+++ b/arm_compute/runtime/CL/functions/CLDirectDeconvolutionLayer.h

@@ -98,6 +98,21 @@
      *
      */
     void configure(ICLTensor *input, ICLTensor *weights, const ICLTensor *bias, ICLTensor *output, const PadStrideInfo &info, const WeightsInfo &weights_info = WeightsInfo());
+    /** Set the input, weights, biases and output tensors.
+     *
+     * @param[in]     compile_context The compile context to be used.
+     * @param[in,out] input           Input tensor. 3 lower dimensions represent a single input, and an optional 4th dimension for batch of inputs.
+     *                                Data types supported: QASYMM8_SIGNED/QASYMM8/F16/F32.
+     * @param[in]     weights         The 4d weights with dimensions [width, height, IFM, OFM]. Data type supported: Same as @p input.
+     * @param[in]     bias            (Optional) The biases have one dimension.
+     *                                Data type supported: Should match @p input data type, except for input of QASYMM8 and QASYMM8_SIGNED type where biases should be of S32 type
+     * @param[out]    output          Output tensor. The output has the same number of dimensions as the @p input.
+     * @param[in]     info            Contains padding and policies to be used in the deconvolution, this is decribed in @ref PadStrideInfo.
+     * @param[in]     weights_info    (Optional) Weights information needed for @ref CLConvolutionLayer, specifies if the weights tensor has been reshaped with @ref CLWeightsReshapeKernel.
+     *
+     */
+    void configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *weights, const ICLTensor *bias, ICLTensor *output, const PadStrideInfo &info,
+                   const WeightsInfo &weights_info = WeightsInfo());
     /** Static function to check if given info will lead to a valid configuration of @ref CLDirectDeconvolutionLayer
      *
      * @param[in] input        Input tensor info. 3 lower dimensions represent a single input, and an optional 4th dimension for batch of inputs.

diff --git a/arm_compute/runtime/CL/functions/CLElementWiseUnaryLayer.h b/arm_compute/runtime/CL/functions/CLElementWiseUnaryLayer.h
index e2503f7..19729b6 100644
--- a/arm_compute/runtime/CL/functions/CLElementWiseUnaryLayer.h
+++ b/arm_compute/runtime/CL/functions/CLElementWiseUnaryLayer.h

@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2019 ARM Limited.
+ * Copyright (c) 2018-2020 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -40,6 +40,13 @@
      * @param[out] output Output tensor. Data types supported: same as @p input.
      */
     void configure(const ICLTensor *input, ICLTensor *output);
+    /** Initialize the function
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  input           Input tensor. Data types supported: F16/F32.
+     * @param[out] output          Output tensor. Data types supported: same as @p input.
+     */
+    void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output);
     /** Static function to check if given info will lead to a valid configuration of @ref CLRsqrtLayer
      *
      * @param[in] input  First tensor input info. Data types supported: F16/F32.
@@ -60,6 +67,13 @@
      * @param[out] output Output tensor. Data types supported: same as @p input.
      */
     void configure(const ICLTensor *input, ICLTensor *output);
+    /** Initialize the function
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  input           Input tensor. Data types supported: F16/F32.
+     * @param[out] output          Output tensor. Data types supported: same as @p input.
+     */
+    void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output);
     /** Static function to check if given info will lead to a valid configuration of @ref CLExpLayer
      *
      * @param[in] input  First tensor input info. Data types supported: F16/F32.
@@ -80,6 +94,13 @@
      * @param[out] output Output tensor. Data types supported: same as @p input.
      */
     void configure(const ICLTensor *input, ICLTensor *output);
+    /** Initialize the function
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  input           Input tensor. Data types supported: F16/F32.
+     * @param[out] output          Output tensor. Data types supported: same as @p input.
+     */
+    void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output);
     /** Static function to check if given info will lead to a valid configuration of @ref CLNegLayer
      *
      * @param[in] input  First tensor input info. Data types supported: F16/F32.
@@ -100,6 +121,13 @@
      * @param[out] output Output tensor. Data types supported: same as @p input.
      */
     void configure(const ICLTensor *input, ICLTensor *output);
+    /** Initialize the function
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  input           Input tensor. Data types supported: F16/F32.
+     * @param[out] output          Output tensor. Data types supported: same as @p input.
+     */
+    void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output);
     /** Static function to check if given info will lead to a valid configuration of @ref CLSinLayer
      *
      * @param[in] input  First tensor input info. Data types supported: F16/F32.
@@ -120,6 +148,13 @@
      * @param[out] output Output tensor. Data types supported: same as @p input.
      */
     void configure(const ICLTensor *input, ICLTensor *output);
+    /** Initialize the function
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  input           Input tensor. Data types supported: F16/F32.
+     * @param[out] output          Output tensor. Data types supported: same as @p input.
+     */
+    void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output);
     /** Static function to check if given info will lead to a valid configuration of @ref CLLogLayer
      *
      * @param[in] input  First tensor input info. Data types supported: F16/F32.
@@ -140,6 +175,13 @@
      * @param[out] output Output tensor. Data types supported: same as @p input.
      */
     void configure(const ICLTensor *input, ICLTensor *output);
+    /** Initialize the function
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  input           Input tensor. Data types supported: F16/F32.
+     * @param[out] output          Output tensor. Data types supported: same as @p input.
+     */
+    void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output);
     /** Static function to check if given info will lead to a valid configuration of @ref CLAbsLayer
      *
      * @param[in] input  First tensor input info. Data types supported: F16/F32.
@@ -160,6 +202,13 @@
      * @param[out] output Output tensor. Data types supported: same as @p input.
      */
     void configure(const ICLTensor *input, ICLTensor *output);
+    /** Initialize the function
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  input           Input tensor. Data types supported: F16/F32.
+     * @param[out] output          Output tensor. Data types supported: same as @p input.
+     */
+    void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output);
     /** Static function to check if given info will lead to a valid configuration of @ref CLRoundLayer
      *
      * @param[in] input  First tensor input info. Data types supported: F16/F32.

diff --git a/arm_compute/runtime/CL/functions/CLElementwiseOperations.h b/arm_compute/runtime/CL/functions/CLElementwiseOperations.h
index 6d9f3a0..8c656ed 100644
--- a/arm_compute/runtime/CL/functions/CLElementwiseOperations.h
+++ b/arm_compute/runtime/CL/functions/CLElementwiseOperations.h

@@ -50,6 +50,18 @@
      * @param[in]      act_info (Optional) Activation layer information in case of a fused activation.
      */
     void configure(ICLTensor *input1, ICLTensor *input2, ICLTensor *output, ConvertPolicy policy, const ActivationLayerInfo &act_info = ActivationLayerInfo());
+    /** Initialise the kernel's inputs, output and conversion policy.
+     *
+     * @param[in]      compile_context The compile context to be used.
+     * @param[in, out] input1          First tensor input. Data types supported: U8/QASYMM8/S16/QSYMM16/S32/U32/F16/F32.
+     *                                 The input tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0.
+     * @param[in, out] input2          Second tensor input. Data types supported: U8, QASYMM8 (only if @p input1 is QASYMM8), QSYMM16 (only if @p input1 is QSYMM16), S16/F16/F32.
+     *                                 The input tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0.
+     * @param[out]     output          Output tensor. Data types supported: U8 (Only if both inputs are U8), QASYMM8 (only if both inputs are QASYMM8), QSYMM16 (only if both inputs is QSYMM16), S16/F16/F32.
+     * @param[in]      policy          Policy to use to handle overflow.
+     * @param[in]      act_info        (Optional) Activation layer information in case of a fused activation.
+     */
+    void configure(const CLCompileContext &compile_context, ICLTensor *input1, ICLTensor *input2, ICLTensor *output, ConvertPolicy policy, const ActivationLayerInfo &act_info = ActivationLayerInfo());
     /** Static function to check if given info will lead to a valid configuration of @ref CLSaturatedArithmeticOperationKernel for addition
      *
      * @param[in] input1   First tensor input info. Data types supported: U8/QASYMM8/S16/QSYMM16/S32/U32/F16/F32.
@@ -82,6 +94,18 @@
      * @param[in]      act_info (Optional) Activation layer information in case of a fused activation.
      */
     void configure(ICLTensor *input1, ICLTensor *input2, ICLTensor *output, ConvertPolicy policy, const ActivationLayerInfo &act_info = ActivationLayerInfo());
+    /** Initialise the kernel's inputs, output and conversion policy.
+     *
+     * @param[in]      compile_context The compile context to be used.
+     * @param[in, out] input1          First tensor input. Data types supported: U8/QASYMM8/S16/S32/U32/F16/F32.
+     *                                 The input tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0.
+     * @param[in, out] input2          Second tensor input. Data types supported: U8, QASYMM8 (only if @p input1 is QASYMM8), S16/F16/F32.
+     *                                 The input tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0.
+     * @param[out]     output          Output tensor. Data types supported: U8 (Only if both inputs are U8), QASYMM8 (only if both inputs are QASYMM8), S16/F16/F32.
+     * @param[in]      policy          Policy to use to handle overflow.
+     * @param[in]      act_info        (Optional) Activation layer information in case of a fused activation.
+     */
+    void configure(const CLCompileContext &compile_context, ICLTensor *input1, ICLTensor *input2, ICLTensor *output, ConvertPolicy policy, const ActivationLayerInfo &act_info = ActivationLayerInfo());
     /** Static function to check if given info will lead to a valid configuration of @ref CLSaturatedArithmeticOperationKernel for subtraction
      *
      * @param[in] input1   First tensor input info. Data types supported: U8/QASYMM8/S16/S32/U32/F16/F32.
@@ -113,6 +137,17 @@
      * @param[in]      act_info (Optional) Activation layer information in case of a fused activation.
      */
     void configure(ICLTensor *input1, ICLTensor *input2, ICLTensor *output, const ActivationLayerInfo &act_info = ActivationLayerInfo());
+    /** Initialise the kernel's inputs, output.
+     *
+     * @param[in]      compile_context The compile context to be used.
+     * @param[in, out] input1          First tensor input. Data types supported: F16/F32.
+     *                                 The input tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0.
+     * @param[in, out] input2          Second tensor input. Same as @p input1.
+     *                                 The input tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0.
+     * @param[out]     output          Output tensor. Data types supported: Same as @p input1.
+     * @param[in]      act_info        (Optional) Activation layer information in case of a fused activation.
+     */
+    void configure(const CLCompileContext &compile_context, ICLTensor *input1, ICLTensor *input2, ICLTensor *output, const ActivationLayerInfo &act_info = ActivationLayerInfo());
     /** Static function to check if given info will lead to a valid configuration of @ref CLArithmeticDivision
      *
      * @param[in] input1   First tensor input info. Data types supported: F16/F32.
@@ -143,6 +178,17 @@
      * @param[in]      act_info (Optional) Activation layer information in case of a fused activation.
      */
     void configure(ICLTensor *input1, ICLTensor *input2, ICLTensor *output, const ActivationLayerInfo &act_info = ActivationLayerInfo());
+    /** Initialise the kernel's inputs, output and conversion policy.
+     *
+     * @param[in]      compile_context The compile context to be used.
+     * @param[in, out] input1          First tensor input. Data types supported: U8/QASYMM8/S16/QSYMM16/S32/U32/F16/F32.
+     *                                 The input tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0.
+     * @param[in, out] input2          Second tensor input. Data types supported: U8, QASYMM8 (only if @p input1 is QASYMM8), S16, QSYMM16 (only if @p input1 is QSYMM16), F16/F32.
+     *                                 The input tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0.
+     * @param[out]     output          Output tensor. Data types supported: U8 (Only if both inputs are U8), QASYMM8 (only if both inputs are QASYMM8), S16, QSYMM16 (only if both inputs are QSYMM16), F16/F32.
+     * @param[in]      act_info        (Optional) Activation layer information in case of a fused activation.
+     */
+    void configure(const CLCompileContext &compile_context, ICLTensor *input1, ICLTensor *input2, ICLTensor *output, const ActivationLayerInfo &act_info = ActivationLayerInfo());
     /** Static function to check if given info will lead to a valid configuration of @ref CLArithmeticOperationKernel for max
      *
      * @param[in] input1   First tensor input info. Data types supported: U8/QASYMM8/S16/QSYMM16/S32/U32/F16/F32.
@@ -173,6 +219,17 @@
      * @param[in]      act_info (Optional) Activation layer information in case of a fused activation.
      */
     void configure(ICLTensor *input1, ICLTensor *input2, ICLTensor *output, const ActivationLayerInfo &act_info = ActivationLayerInfo());
+    /** Initialise the kernel's inputs, output and conversion policy.
+     *
+     * @param[in]      compile_context The compile context to be used.
+     * @param[in, out] input1          First tensor input. Data types supported: U8/QASYMM8/S16/QSYMM16/S32/U32/F16/F32.
+     *                                 The input tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0.
+     * @param[in, out] input2          Second tensor input. Data types supported: U8, QASYMM8 (only if @p input1 is QASYMM8), S16, QSYMM16 (only if @p input1 is QSYMM16), F16/F32.
+     *                                 The input tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0.
+     * @param[out]     output          Output tensor. Data types supported: U8 (Only if both inputs are U8), QASYMM8 (only if both inputs are QASYMM8), S16, QSYMM16 (only if both inputs are QSYMM16), F16/F32.
+     * @param[in]      act_info        (Optional) Activation layer information in case of a fused activation.
+     */
+    void configure(const CLCompileContext &compile_context, ICLTensor *input1, ICLTensor *input2, ICLTensor *output, const ActivationLayerInfo &act_info = ActivationLayerInfo());
     /** Static function to check if given info will lead to a valid configuration of @ref CLArithmeticOperationKernel for min
      *
      * @param[in] input1   First tensor input info. Data types supported: U8/QASYMM8/S16/QSYMM16/S32/U32/F16/F32.
@@ -203,6 +260,17 @@
      * @param[in]      act_info (Optional) Activation layer information in case of a fused activation.
      */
     void configure(ICLTensor *input1, ICLTensor *input2, ICLTensor *output, const ActivationLayerInfo &act_info = ActivationLayerInfo());
+    /** Initialise the kernel's inputs, output and conversion policy.
+     *
+     * @param[in]      compile_context The compile context to be used.
+     * @param[in, out] input1          First tensor input. Data types supported: U8/QASYMM8/S16/QSYMM16/F16/F32.
+     *                                 The input tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0.
+     * @param[in, out] input2          Second tensor input. Data types supported: U8, QASYMM8 (only if @p input1 is QASYMM8), S16, QSYMM16 (only if @p input1 is QSYMM16), F16/F32.
+     *                                 The input tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0.
+     * @param[out]     output          Output tensor. Data types supported: U8 (Only if both inputs are U8), QASYMM8 (only if both inputs are QASYMM8), S16, QSYMM16 (only if both inputs are QSYMM16), F16/F32.
+     * @param[in]      act_info        (Optional) Activation layer information in case of a fused activation.
+     */
+    void configure(const CLCompileContext &compile_context, ICLTensor *input1, ICLTensor *input2, ICLTensor *output, const ActivationLayerInfo &act_info = ActivationLayerInfo());
     /** Static function to check if given info will lead to a valid configuration of @ref CLArithmeticOperationKernel for squared difference
      *
      * @param[in] input1   First tensor input info. Data types supported: U8/QASYMM8/S16/QSYMM16/F16/F32.
@@ -233,6 +301,17 @@
      * @param[in]      act_info (Optional) Activation layer information in case of a fused activation.
      */
     void configure(ICLTensor *input1, ICLTensor *input2, ICLTensor *output, const ActivationLayerInfo &act_info = ActivationLayerInfo());
+    /** Initialise the kernel's inputs, output and conversion policy.
+     *
+     * @param[in]      compile_context The compile context to be used.
+     * @param[in, out] input1          First tensor input. Data types supported: F16/F32.
+     *                                 The input tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0.
+     * @param[in, out] input2          Second tensor input. Data types supported: F16/F32.
+     *                                 The input tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0.
+     * @param[out]     output          Output tensor. Data types supported:F16/F32.
+     * @param[in]      act_info        (Optional) Activation layer information in case of a fused activation.
+     */
+    void configure(const CLCompileContext &compile_context, ICLTensor *input1, ICLTensor *input2, ICLTensor *output, const ActivationLayerInfo &act_info = ActivationLayerInfo());
     /** Static function to check if given info will lead to a valid configuration of @ref CLArithmeticOperationKernel for power
      *
      * @param[in] input1   First tensor input info. Data types supported: F16/F32.

diff --git a/arm_compute/runtime/CL/functions/CLEqualizeHistogram.h b/arm_compute/runtime/CL/functions/CLEqualizeHistogram.h
index 79c18fa..d907cfb 100644
--- a/arm_compute/runtime/CL/functions/CLEqualizeHistogram.h
+++ b/arm_compute/runtime/CL/functions/CLEqualizeHistogram.h

@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2019 ARM Limited.
+ * Copyright (c) 2016-2020 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -54,6 +54,13 @@
      * @param[out] output Output of same data type with equalized brightness and contrast.
      */
     void configure(const ICLImage *input, ICLImage *output);
+    /** Initialise the kernel's inputs.
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  input           Input image. Data types supported: U8.
+     * @param[out] output          Output of same data type with equalized brightness and contrast.
+     */
+    void configure(const CLCompileContext &compile_context, const ICLImage *input, ICLImage *output);
 
     // Inherited methods overridden:
     void run() override;

diff --git a/arm_compute/runtime/CL/functions/CLErode.h b/arm_compute/runtime/CL/functions/CLErode.h
index a438f4e..57f701c 100644
--- a/arm_compute/runtime/CL/functions/CLErode.h
+++ b/arm_compute/runtime/CL/functions/CLErode.h

@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2019 ARM Limited.
+ * Copyright (c) 2016-2020 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -50,6 +50,15 @@
      * @param[in]     constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
      */
     void configure(ICLTensor *input, ICLTensor *output, BorderMode border_mode, uint8_t constant_border_value = 0);
+    /** Initialise the kernel's inputs, output and border mode
+     *
+     * @param[in]     compile_context       The compile context to be used.
+     * @param[in,out] input                 First tensor input. Data types supported: U8. (Written to only for @p border_mode != UNDEFINED)
+     * @param[out]    output                Output tensor. Data types supported: U8.
+     * @param[in]     border_mode           Border mode to use for the convolution.
+     * @param[in]     constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
+     */
+    void configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, BorderMode border_mode, uint8_t constant_border_value = 0);
 };
 }
 #endif /*ARM_COMPUTE_CLERODE_H */

diff --git a/arm_compute/runtime/CL/functions/CLFFT1D.h b/arm_compute/runtime/CL/functions/CLFFT1D.h
index 31e57e1..da15322 100644
--- a/arm_compute/runtime/CL/functions/CLFFT1D.h
+++ b/arm_compute/runtime/CL/functions/CLFFT1D.h

@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019 ARM Limited.
+ * Copyright (c) 2019-2020 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -56,6 +56,14 @@
      * @param[in]  config FFT related configuration
      */
     void configure(const ICLTensor *input, ICLTensor *output, const FFT1DInfo &config);
+    /** Initialise the function's source, destinations and border mode.
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  input           Source tensor. Data types supported: F32.
+     * @param[out] output          Destination tensor. Data types and data layouts supported: Same as @p input.
+     * @param[in]  config          FFT related configuration
+     */
+    void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const FFT1DInfo &config);
     /** Static function to check if given info will lead to a valid configuration of @ref CLFFT1D.
      *
      * @param[in] input  Source tensor info. Data types supported: F32.

diff --git a/arm_compute/runtime/CL/functions/CLFFT2D.h b/arm_compute/runtime/CL/functions/CLFFT2D.h
index d34528b..a113f20 100644
--- a/arm_compute/runtime/CL/functions/CLFFT2D.h
+++ b/arm_compute/runtime/CL/functions/CLFFT2D.h

@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019 ARM Limited.
+ * Copyright (c) 2019-2020 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -53,6 +53,14 @@
      * @param[in]  config FFT related configuration
      */
     void configure(const ICLTensor *input, ICLTensor *output, const FFT2DInfo &config);
+    /** Initialise the function's source, destinations and border mode.
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  input           Source tensor. Data types supported: F32.
+     * @param[out] output          Destination tensor. Data types and data layouts supported: Same as @p input.
+     * @param[in]  config          FFT related configuration
+     */
+    void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const FFT2DInfo &config);
     /** Static function to check if given info will lead to a valid configuration of @ref CLFFT2D.
      *
      * @param[in] input  Source tensor info. Data types supported: F32.

diff --git a/arm_compute/runtime/CL/functions/CLFFTConvolutionLayer.h b/arm_compute/runtime/CL/functions/CLFFTConvolutionLayer.h
index 34bb93a..7407319 100644
--- a/arm_compute/runtime/CL/functions/CLFFTConvolutionLayer.h
+++ b/arm_compute/runtime/CL/functions/CLFFTConvolutionLayer.h

@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019 ARM Limited.
+ * Copyright (c) 2019-2020 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -85,6 +85,23 @@
      */
     void configure(ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info,
                    const ActivationLayerInfo &act_info = ActivationLayerInfo());
+    /** Set the input and output tensors.
+     *
+     * @note: This function only works with any square kernel size and unit strides for both NCHW and NHWC data layout
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  input           Source tensor. 3 lower dimensions represent a single input [width, height, IFM],
+     *                             while every optional dimension from 4 and above represent a batch of inputs.
+     *                             Data types supported: F32.
+     * @param[in]  weights         Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. Data type supported:Same as @p input.
+     * @param[in]  biases          Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM].Data type supported: Same as @p input
+     * @param[out] output          Destination tensor. 3 lower dimensions represent a single output [width, height, OFM], while the rest represent batch of outputs.
+     *                             Data types supported: Same as @p input.
+     * @param[in]  conv_info       Contains padding and stride information described in @ref PadStrideInfo.
+     * @param[in]  act_info        (Optional) Activation layer information in case of a fused activation.
+     */
+    void configure(const CLCompileContext &compile_context, ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info,
+                   const ActivationLayerInfo &act_info = ActivationLayerInfo());
     /** Static function to check if given info will lead to a valid configuration of @ref CLFFTConvolutionLayer
      *
      * @note: This function only works with any square kernel size and unit strides for both NCHW and NHWC data layout

diff --git a/arm_compute/runtime/CL/functions/CLFastCorners.h b/arm_compute/runtime/CL/functions/CLFastCorners.h
index 2a0e010..1dc87d6 100644
--- a/arm_compute/runtime/CL/functions/CLFastCorners.h
+++ b/arm_compute/runtime/CL/functions/CLFastCorners.h

@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2019 ARM Limited.
+ * Copyright (c) 2016-2020 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -71,6 +71,19 @@
      */
     void configure(const ICLImage *input, float threshold, bool nonmax_suppression, ICLKeyPointArray *corners, unsigned int *num_corners,
                    BorderMode border_mode, uint8_t constant_border_value = 0);
+    /** Initialize the function's source, destination, conv and border_mode.
+     *
+     * @param[in]     compile_context       The compile context to be used.
+     * @param[in]     input                 Source image. Data types supported: U8.
+     * @param[in]     threshold             Threshold on difference between intensity of the central pixel and pixels on Bresenham's circle of radius 3.
+     * @param[in]     nonmax_suppression    If true, non-maximum suppression is applied to detected corners before being placed in the array.
+     * @param[out]    corners               Array of keypoints to store the results.
+     * @param[in,out] num_corners           Record number of corners in the array
+     * @param[in]     border_mode           Strategy to use for borders.
+     * @param[in]     constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
+     */
+    void configure(const CLCompileContext &compile_context, const ICLImage *input, float threshold, bool nonmax_suppression, ICLKeyPointArray *corners, unsigned int *num_corners,
+                   BorderMode border_mode, uint8_t constant_border_value = 0);
     // Inherited methods overridden:
     void run() override;
 

diff --git a/arm_compute/runtime/CL/functions/CLFill.h b/arm_compute/runtime/CL/functions/CLFill.h
index c4ba257..bb12160 100644
--- a/arm_compute/runtime/CL/functions/CLFill.h
+++ b/arm_compute/runtime/CL/functions/CLFill.h

@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019 ARM Limited.
+ * Copyright (c) 2019-2020 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -42,6 +42,13 @@
      * @param[in]     constant_value Constant value to use to fill tensor.
      */
     void configure(ICLTensor *tensor, PixelValue constant_value);
+    /** Initialize the function
+     *
+     * @param[in]     compile_context The compile context to be used.
+     * @param[in,out] tensor          Source tensor. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32
+     * @param[in]     constant_value  Constant value to use to fill tensor.
+     */
+    void configure(const CLCompileContext &compile_context, ICLTensor *tensor, PixelValue constant_value);
 };
 } // namespace arm_compute
 #endif /*ARM_COMPUTE_CLFILL_H */

diff --git a/arm_compute/runtime/CL/functions/CLFillBorder.h b/arm_compute/runtime/CL/functions/CLFillBorder.h
index ded79e5..250806b 100644
--- a/arm_compute/runtime/CL/functions/CLFillBorder.h
+++ b/arm_compute/runtime/CL/functions/CLFillBorder.h

@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2019 ARM Limited.
+ * Copyright (c) 2016-2020 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -44,6 +44,15 @@
      * @param[in]     constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
      */
     void configure(ICLTensor *tensor, unsigned int border_width, BorderMode border_mode, const PixelValue &constant_border_value = PixelValue());
+    /** Initialize the function
+     *
+     * @param[in]     compile_context       The compile context to be used.
+     * @param[in,out] tensor                Source tensor. Data types supported: U8/S16
+     * @param[in]     border_width          The border width
+     * @param[in]     border_mode           Strategy to use for borders.
+     * @param[in]     constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
+     */
+    void configure(const CLCompileContext &compile_context, ICLTensor *tensor, unsigned int border_width, BorderMode border_mode, const PixelValue &constant_border_value = PixelValue());
 };
 }
 #endif /*ARM_COMPUTE_FILLBORDER_H */

diff --git a/arm_compute/runtime/CL/functions/CLFlattenLayer.h b/arm_compute/runtime/CL/functions/CLFlattenLayer.h
index b9ce236..98cf49a 100644
--- a/arm_compute/runtime/CL/functions/CLFlattenLayer.h
+++ b/arm_compute/runtime/CL/functions/CLFlattenLayer.h

@@ -47,6 +47,15 @@
      *                    w = width input tensor, h = height input tensor and d = depth input tensor. Data type supported: same as @p input
      */
     void configure(const ICLTensor *input, ICLTensor *output);
+    /** Initialise the kernel's input and output.
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  input           First input tensor to flatten with at least 3 dimensions.
+     *                             The dimensions above the third will be interpreted as batches. Data types supported: All.
+     * @param[out] output          Output tensor with shape [w*h*d, input_batches] where:
+     *                    w = width input tensor, h = height input tensor and d = depth input tensor. Data type supported: same as @p input
+     */
+    void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output);
     /** Static function to check if given info will lead to a valid configuration of @ref CLFlattenLayer
      *
      * @param[in]  input  First input tensor to flatten with at least 3 dimensions.

diff --git a/arm_compute/runtime/CL/functions/CLFloor.h b/arm_compute/runtime/CL/functions/CLFloor.h
index c4a893f..2844a56 100644
--- a/arm_compute/runtime/CL/functions/CLFloor.h
+++ b/arm_compute/runtime/CL/functions/CLFloor.h

@@ -48,7 +48,7 @@
      * @param[in]  input           Source tensor. Data type supported: F16/F32.
      * @param[out] output          Destination tensor. Same as @p input
      */
-    void configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output);
+    void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output);
     /** Static function to check if given info will lead to a valid configuration of @ref CLFloor
      *
      * @param[in] input  Source tensor info. Data type supported: F16/F32.

diff --git a/arm_compute/runtime/CL/functions/CLFullyConnectedLayer.h b/arm_compute/runtime/CL/functions/CLFullyConnectedLayer.h
index cbd2860..188117f 100644
--- a/arm_compute/runtime/CL/functions/CLFullyConnectedLayer.h
+++ b/arm_compute/runtime/CL/functions/CLFullyConnectedLayer.h

@@ -52,6 +52,13 @@
      * @param[out] output Destination tensor which stores the transposed input tensor. Data type supported: Same as @p input.
      */
     void configure(const ICLTensor *input, ICLTensor *output);
+    /** Set the input and output tensors.
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  input           Weights tensor. The weights must be 2 dimensional. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
+     * @param[out] output          Destination tensor which stores the transposed input tensor. Data type supported: Same as @p input.
+     */
+    void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output);
     /** Static function to check if given info will lead to a valid configuration of @ref CLFullyConnectedLayerReshapeWeights
      *
      * @param[in] input  Weights tensor. The weights must be 2 dimensional. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
@@ -100,7 +107,16 @@
      */
     void configure(const ICLTensor *input)
     {
-        _func.configure(input, &_output);
+        configure(CLKernelLibrary::get().get_compile_context(), input);
+    }
+    /** Configures the @ref CLFullyConnectedLayerReshapeWeights function
+     *
+     * @param[in] compile_context The compile context to be used.
+     * @param[in] input           Source tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
+     */
+    void configure(const CLCompileContext &compile_context, const ICLTensor *input)
+    {
+        _func.configure(compile_context, input, &_output);
     }
 
 private:
@@ -147,6 +163,23 @@
      */
     void configure(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output,
                    FullyConnectedLayerInfo fc_info = FullyConnectedLayerInfo());
+    /** Set the input and output tensors.
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  input           Source tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
+     * @param[in]  weights         Weights tensor. The weights must be 2 dimensional.
+     *                             If this function is called after a Convolution Layer, the (transposed) weights will have as many rows as the product of the first 3 input's dimensions.
+     *                             If it is called after another FullyConnected Layer, the (transposed) weights will have as many rows as the input's first dimension.
+     *                             Data type supported: Same as @p input.
+     * @param[in]  biases          Bias tensor. Can be nullptr. Data type supported:Same as @p input.
+     * @param[out] output          Destination tensor. Its shape should be equal to the output of a matrix multiplication between:
+     *                             - The output of im2col on the input and the (transposed) 2D weights, if the function is called after a Convolution Layer
+     *                             - The input tensor and the (transposed) 2D weights, if the function is called after another FullyConnected Layer.
+     *                             Data type supported: Same as @p input.
+     * @param[in]  fc_info         (Optional) Fully connected layer additional info
+     */
+    void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output,
+                   FullyConnectedLayerInfo fc_info = FullyConnectedLayerInfo());
     /** Static function to check if given info will lead to a valid configuration of @ref CLFullyConnectedLayer
      *
      * @param[in]  input   Source tensor info. Data type supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
@@ -171,9 +204,9 @@
     void prepare() override;
 
 private:
-    void configure_fc_fc(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *bias, ICLTensor *output, const FullyConnectedLayerInfo &fc_info);
-    void configure_conv_fc(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *bias, ICLTensor *output, const FullyConnectedLayerInfo &fc_info);
-    void configure_mm(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *bias, ICLTensor *output, const FullyConnectedLayerInfo &fc_info);
+    void configure_fc_fc(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *weights, const ICLTensor *bias, ICLTensor *output, const FullyConnectedLayerInfo &fc_info);
+    void configure_conv_fc(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *weights, const ICLTensor *bias, ICLTensor *output, const FullyConnectedLayerInfo &fc_info);
+    void configure_mm(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *weights, const ICLTensor *bias, ICLTensor *output, const FullyConnectedLayerInfo &fc_info);
 
     MemoryGroup                                                         _memory_group;
     IWeightsManager                                                    *_weights_manager;

diff --git a/arm_compute/runtime/CL/functions/CLFuseBatchNormalization.h b/arm_compute/runtime/CL/functions/CLFuseBatchNormalization.h
index 650d2e5..9057440 100644
--- a/arm_compute/runtime/CL/functions/CLFuseBatchNormalization.h
+++ b/arm_compute/runtime/CL/functions/CLFuseBatchNormalization.h

@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2019 ARM Limited.
+ * Copyright (c) 2018-2020 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -67,6 +67,25 @@
     void configure(const ICLTensor *input_weights, const ICLTensor *bn_mean, const ICLTensor *bn_var, ICLTensor *fused_weights, ICLTensor *fused_bias,
                    const ICLTensor *input_bias = nullptr, const ICLTensor *bn_beta = nullptr, const ICLTensor *bn_gamma = nullptr,
                    float epsilon = 0.001f, FuseBatchNormalizationType fbn_type = FuseBatchNormalizationType::CONVOLUTION);
+    /** Set the input and output tensors.
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  input_weights   Input weights tensor for convolution or depthwise convolution layer. Data type supported: F16/F32. Data layout supported: NCHW, NHWC
+     * @param[in]  bn_mean         Batch normalization layer mean tensor. Same as @p input_weights
+     * @param[in]  bn_var          Batch normalization layer variance tensor. Same as @p input_weights
+     * @param[out] fused_weights   Output fused weights tensor. It can be a nullptr in case of in-place computation. Same as @p input_weights
+     * @param[out] fused_bias      Output fused bias tensor. It can be a nullptr in case of in-place computation and input_bias != nullptr. Same as @p input_weights
+     * @param[in]  input_bias      (Optional) Input bias tensor for convolution or depthwise convolution layer. It can be a nullptr in case the bias tensor is not required. Same as @p input_weights
+     * @param[in]  bn_beta         (Optional) Batch normalization layer beta tensor. It can be a nullptr in case the beta tensor is not required. Same as @p input_weights
+     *                             @note if nullptr, bn_beta is set to 0.0
+     * @param[in]  bn_gamma        (Optional) Batch normalization layer gamma tensor. It can be a nullptr in case the gamma tensor is not required. Same as @p input_weights
+     *                             @note if nullptr, bn_gamma is set to 1.0
+     * @param[in]  epsilon         (Optional) Batch normalization layer epsilon parameter. Defaults to 0.001f.
+     * @param[in]  fbn_type        (Optional) Fused batch normalization type. Defaults to Convolution.
+     */
+    void configure(const CLCompileContext &compile_context, const ICLTensor *input_weights, const ICLTensor *bn_mean, const ICLTensor *bn_var, ICLTensor *fused_weights, ICLTensor *fused_bias,
+                   const ICLTensor *input_bias = nullptr, const ICLTensor *bn_beta = nullptr, const ICLTensor *bn_gamma = nullptr,
+                   float epsilon = 0.001f, FuseBatchNormalizationType fbn_type = FuseBatchNormalizationType::CONVOLUTION);
     /** Static function to check if given info will lead to a valid configuration of @ref CLFuseBatchNormalization
      *
      * @param[in] input_weights Input weights tensor info for convolution or depthwise convolution layer. Data type supported: F16/F32. Data layout supported: NCHW, NHWC

diff --git a/arm_compute/runtime/CL/functions/CLGEMM.h b/arm_compute/runtime/CL/functions/CLGEMM.h
index 7a4f120..f558811 100644
--- a/arm_compute/runtime/CL/functions/CLGEMM.h
+++ b/arm_compute/runtime/CL/functions/CLGEMM.h

@@ -79,7 +79,18 @@
      */
     void configure(const ICLTensor *input, GEMMRHSMatrixInfo info)
     {
-        _kernel.configure(input, &_output, info);
+        configure(CLKernelLibrary::get().get_compile_context(), input, info);
+    }
+
+    /** Configures the @ref CLGEMMReshapeRHSMatrixKernel kernel
+     *
+     * @param[in] compile_context The compile context to be used.
+     * @param[in] input           Input tensor. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32
+     * @param[in] info            RHS matrix information to be used for reshaping.
+     */
+    void configure(const CLCompileContext &compile_context, const ICLTensor *input, GEMMRHSMatrixInfo info)
+    {
+        _kernel.configure(compile_context, input, &_output, info);
     }
 
 private:
@@ -134,6 +145,26 @@
      *                       in case matrix A and matrix B have been already transformed.
      */
     void configure(const ICLTensor *a, const ICLTensor *b, const ICLTensor *c, ICLTensor *output, float alpha, float beta, const GEMMInfo &gemm_info = GEMMInfo());
+    /** Initialise the kernel's inputs and output
+     *
+     * @note GEMM: General Matrix Multiply - [alpha * A * B + beta * C].
+     *
+     * @note All tensors must have the same data type.
+     *
+     * @note Whilst the first input tensor can be a vector, the second input tensor must be at least a matrix
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  a               First input tensor  (Matrix or Vector A). Data types supported: F16/F32
+     * @param[in]  b               Second input tensor (Matrix B). Data type supported: same as @p a.
+     * @param[in]  c               Third input tensor  (Matrix C). It can be a nullptr if just the multiplication between @p a and @p b is needed. Data type supported: same as @p a.
+     * @param[out] output          Output tensor. Data type supported: same as @p a
+     * @param[in]  alpha           Weight of the matrix product
+     * @param[in]  beta            Weight of matrix C
+     * @param[in]  gemm_info       (Optional) Specifies if the matrix A and/or matrix B have been reshaped and
+     *                       if the reshape of matrix B should happen only for the first run. GEMMInfo also contains information about the reshaping
+     *                       in case matrix A and matrix B have been already transformed.
+     */
+    void configure(const CLCompileContext &compile_context, const ICLTensor *a, const ICLTensor *b, const ICLTensor *c, ICLTensor *output, float alpha, float beta, const GEMMInfo &gemm_info = GEMMInfo());
     /** Static function to check if given info will lead to a valid configuration of @ref CLGEMM.
      *
      * @param[in] a         First input tensor info  (Matrix or Vector A). Data types supported: F16/F32
@@ -156,10 +187,11 @@
 private:
     static CLGEMMKernelType select_gemm_kernel(unsigned int m, unsigned int n, unsigned int k, DataType data_type, bool reshape_b_only_on_first_run);
 
-    void configure_native_v1(const ICLTensor *a, const ICLTensor *b, const ICLTensor *c, ICLTensor *output, float alpha, float beta, const GEMMInfo &gemm_info);
-    void configure_reshaped_v1(const ICLTensor *a, const ICLTensor *b, const ICLTensor *c, ICLTensor *output, float alpha, float beta, const GEMMInfo &gemm_info);
-    void configure_reshaped(const ICLTensor *a, const ICLTensor *b, const ICLTensor *c, ICLTensor *output, float alpha, float beta, const GEMMInfo &gemm_info);
-    void configure_reshaped_only_rhs(const ICLTensor *a, const ICLTensor *b, const ICLTensor *c, ICLTensor *output, float alpha, float beta, const GEMMInfo &gemm_info);
+    void configure_native_v1(const CLCompileContext &compile_context, const ICLTensor *a, const ICLTensor *b, const ICLTensor *c, ICLTensor *output, float alpha, float beta, const GEMMInfo &gemm_info);
+    void configure_reshaped_v1(const CLCompileContext &compile_context, const ICLTensor *a, const ICLTensor *b, const ICLTensor *c, ICLTensor *output, float alpha, float beta, const GEMMInfo &gemm_info);
+    void configure_reshaped_v2(const CLCompileContext &compile_context, const ICLTensor *a, const ICLTensor *b, const ICLTensor *c, ICLTensor *output, float alpha, float beta, const GEMMInfo &gemm_info);
+    void configure_reshaped_only_rhs(const CLCompileContext &compile_context, const ICLTensor *a, const ICLTensor *b, const ICLTensor *c, ICLTensor *output, float alpha, float beta,
+                                     const GEMMInfo &gemm_info);
 
     static Status validate_native_v1(const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *c, const ITensorInfo *output, float alpha, float beta, const GEMMInfo &gemm_info);
     static Status validate_reshaped_v1(const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *c, const ITensorInfo *output, float alpha, float beta, const GEMMInfo &gemm_info);

diff --git a/arm_compute/runtime/CL/functions/CLGEMMConvolutionLayer.h b/arm_compute/runtime/CL/functions/CLGEMMConvolutionLayer.h
index 4952029..6d1181e 100644
--- a/arm_compute/runtime/CL/functions/CLGEMMConvolutionLayer.h
+++ b/arm_compute/runtime/CL/functions/CLGEMMConvolutionLayer.h

@@ -62,6 +62,16 @@
      * @param[in]  num_groups (Optional) Number of groups when performing a grouped convolution. num_groups != 1 is only supported for NCHW data layout
      */
     void configure(const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, unsigned int num_groups = 1);
+    /** Set the input and output tensors.
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  weights         Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM].
+     *                             Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL/F16/F32.
+     * @param[in]  biases          Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p weights.
+     * @param[out] output          Destination tensor. Data types supported: Same as @p weights.
+     * @param[in]  num_groups      (Optional) Number of groups when performing a grouped convolution. num_groups != 1 is only supported for NCHW data layout
+     */
+    void configure(const CLCompileContext &compile_context, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, unsigned int num_groups = 1);
     /** Static function to check if given info will lead to a valid configuration of @ref CLConvolutionLayerReshapeWeights
      *
      * @param[in] weights    Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM].
@@ -94,9 +104,20 @@
      */
     void configure(const ICLTensor *input, const ICLTensor *biases, unsigned int num_groups)
     {
+        configure(CLKernelLibrary::get().get_compile_context(), input, biases, num_groups);
+    }
+    /** Configures the @ref CLConvolutionLayerReshapeWeights function
+     *
+     * @param[in] compile_context The compile context to be used.
+     * @param[in] input           Input tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
+     * @param[in] biases          Biases tensor. Data type supported: Same as @p input.
+     * @param[in] num_groups      Number of groups when performing a grouped convolution.
+     */
+    void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *biases, unsigned int num_groups)
+    {
         _bias_bit   = (biases != nullptr) ? 1 : 0;
         _num_groups = num_groups;
-        _func.configure(input, biases, &_output, num_groups);
+        _func.configure(compile_context, input, biases, &_output, num_groups);
     }
 
     //Inherited method override
@@ -178,6 +199,28 @@
      */
     void configure(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info, const WeightsInfo &weights_info = WeightsInfo(),
                    const Size2D &dilation = Size2D(1U, 1U), const ActivationLayerInfo &act_info = ActivationLayerInfo(), unsigned int num_groups = 1);
+    /** Set the input and output tensors.
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  input           Source tensor. 3 lower dimensions represent a single input [width, height, IFM],
+     *                             while every optional dimension from 4 and above represent a batch of inputs.
+     *                             Data types supported: QASYMM8/F16/F32.
+     * @param[in]  weights         Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM].
+     *                             Data type supported: Same as @p input or QASYMM8/QSYMM8_PER_CHANNEL when @p input is QASYMM8.
+     * @param[in]  biases          Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM].
+     *                             Data type supported: Should match @p input data type, except for input of QASYMM8 type where biases should be of S32 type.
+     * @param[out] output          Destination tensor. 3 lower dimensions represent a single output [width, height, OFM], while the rest represent batch of outputs.
+     *                             Data types supported: Same as @p input.
+     * @param[in]  conv_info       Contains padding and stride information described in @ref PadStrideInfo.
+     * @param[in]  weights_info    Specifies if the weights tensor has been reshaped with CLWeightsReshapeKernel. If this is not part of the fully connected layer the weights
+     *                             tensor has also been transposed with CLGEMMReshapeRHSMatrixKernel. Data type supported: Same as @p input.
+     * @param[in]  dilation        (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
+     * @param[in]  act_info        (Optional) Activation layer information in case of a fused activation.
+     * @param[in]  num_groups      (Optional) Number of groups when performing a grouped convolution. num_groups != 1 is only supported for NCHW data layout
+     */
+    void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info,
+                   const WeightsInfo &weights_info = WeightsInfo(),
+                   const Size2D &dilation = Size2D(1U, 1U), const ActivationLayerInfo &act_info = ActivationLayerInfo(), unsigned int num_groups = 1);
     /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMConvolutionLayer.
      *
      * @param[in]  input        Source tensor. 3 lower dimensions represent a single input [width, height, IFM],
@@ -208,6 +251,7 @@
 private:
     /** Configures the appropriate matrix multiply routine
      *
+     * @param[in]      compile_context       The compile context to be used.
      * @param[in]      input                 Input tensor. Data types supported: QASYMM8/F16/F32.
      * @param[in]      weights               Weights tensor. Data type supported: Same as @p input or QASYMM8/QSYMM8_PER_CHANNEL when @p input is QASYMM8.
      * @param[in]      biases                Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM].
@@ -218,8 +262,9 @@
      * @param[in]      gemm_3d_depth         Depth of GEMM 3D
      * @param[in]      act_info              Activation to apply after the matrix multiplication
      */
-    void configure_mm(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const GEMMLowpOutputStageInfo &gemmlowp_output_stage, int gemm_3d_depth,
-                      const ActivationLayerInfo &act_info);
+    void configure_mm(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output,
+                      const GEMMLowpOutputStageInfo &gemmlowp_output_stage,
+                      int gemm_3d_depth, const ActivationLayerInfo &act_info);
     /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMConvolutionLayer matrix multiply routines
      *
      * @param[in] input                 Input tensor info. Data types supported: QASYMM8/F16/F32.

diff --git a/arm_compute/runtime/CL/functions/CLGEMMDeconvolutionLayer.h b/arm_compute/runtime/CL/functions/CLGEMMDeconvolutionLayer.h
index 01687b6..d8710a4 100644
--- a/arm_compute/runtime/CL/functions/CLGEMMDeconvolutionLayer.h
+++ b/arm_compute/runtime/CL/functions/CLGEMMDeconvolutionLayer.h

@@ -99,6 +99,17 @@
      * @param[in]     deconv_info Contains padding and policies to be used in the deconvolution, this is described in @ref PadStrideInfo. This function supports only stride_x = weights.width && stride_y = weights.height. Moreover, padding is not supported.
      */
     void configure(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *bias, ICLTensor *output, const PadStrideInfo &deconv_info);
+    /** Set the input, weights, biases and output tensors.
+     *
+     * @param[in]     compile_context The compile context to be used.
+     * @param[in,out] input           Input tensor. 3 lower dimensions represent a single input, and an optional 4th dimension for batch of inputs.
+     *                                Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. Data layout supported: NHWC
+     * @param[in]     weights         The 4d weights with dimensions [width, height, IFM, OFM]. Data type supported: Same as @p input. Data layout supported: same as @p input.
+     * @param[in]     bias            (Optional) The biases have one dimension. Data type supported: Same as @p input. Data layout supported: same as @p input.
+     * @param[out]    output          Output tensor. The output has the same number of dimensions as the @p input. Data layout supported: same as @p input.
+     * @param[in]     deconv_info     Contains padding and policies to be used in the deconvolution, this is described in @ref PadStrideInfo. This function supports only stride_x = weights.width && stride_y = weights.height. Moreover, padding is not supported.
+     */
+    void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *weights, const ICLTensor *bias, ICLTensor *output, const PadStrideInfo &deconv_info);
     /** Static function to check if given info will lead to a valid configuration of @ref CLDeconvolutionLayer
      *
      * @param[in] input       Input tensor info. 3 lower dimensions represent a single input, and an optional 4th dimension for batch of inputs.

diff --git a/arm_compute/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.h b/arm_compute/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.h
index 1d7013d..6ac3cef 100644
--- a/arm_compute/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.h
+++ b/arm_compute/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.h

@@ -72,6 +72,25 @@
      *                       if the reshape of matrix B should be executed only for the first run
      */
     void configure(const ICLTensor *a, const ICLTensor *b, const ICLTensor *c, ICLTensor *output, const GEMMInfo &gemm_info = GEMMInfo());
+    /** Initialise the kernel's inputs, output
+     *
+     * @note GEMMLowp:  low precision GEMM kernel. [A * B + C]
+     *  This kernel performs the following computations:
+     *
+     *  -# Convert a values from QASYMM8 to int32 and add a_offset to each of them.
+     *  -# Convert b values from QASYMM8 to int32 and add b_offset to each of them.
+     *  -# Compute the matrix product of the resulting a * b in int32.
+     *  -# Quantize to uint8 if gemm_info.gemmlowp_output_stage != NONE
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  a               First input tensor  (Matrix A). Data type supported: QASYMM8/QASYMM8_SIGNED.
+     * @param[in]  b               Second input tensor (Matrix B). Data type supported: same as @p a
+     * @param[in]  c               Third input tensor  (Matrix C). It can be a nullptr. Data type supported: S32
+     * @param[out] output          Output tensor. Data type supported: S32 or QASYMM8/QASYMM8_SIGNED if gemm_info.gemmlowp_output_stage != NONE
+     * @param[in]  gemm_info       (Optional) Specifies if the matrix A and/or matrix B have been reshaped and
+     *                       if the reshape of matrix B should be executed only for the first run
+     */
+    void configure(const CLCompileContext &compile_context, const ICLTensor *a, const ICLTensor *b, const ICLTensor *c, ICLTensor *output, const GEMMInfo &gemm_info = GEMMInfo());
     /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMLowpMatrixMultiplyCore
      *
      * @param[in] a         First input tensor info (Matrix A). Data type supported: QASYMM8.

diff --git a/arm_compute/runtime/CL/functions/CLGEMMLowpOutputStage.h b/arm_compute/runtime/CL/functions/CLGEMMLowpOutputStage.h
index 4c11e51..06cb759 100644
--- a/arm_compute/runtime/CL/functions/CLGEMMLowpOutputStage.h
+++ b/arm_compute/runtime/CL/functions/CLGEMMLowpOutputStage.h

@@ -75,6 +75,23 @@
     ARM_COMPUTE_DEPRECATED_REL(20.05)
     void configure(const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, int result_offset, int result_mult_int, int result_shift, int min = std::numeric_limits<int32_t>::lowest(),
                    int max = std::numeric_limits<int32_t>::max());
+    /** Initialise the kernel's inputs, output
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  input           Input tensor. It is the output of @ref CLGEMMLowpMatrixMultiplyCore function. Data type supported: S32
+     * @param[in]  bias            Biases tensor. Only shared biases supported and it can be a nullptr if the addition of biases is not required.
+     *                             Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input.
+     * @param[out] output          Output tensor. Data type supported: QASYMM8
+     * @param[in]  result_offset   Offset to be added to each element of the input matrix
+     * @param[in]  result_mult_int Value to be multiplied to each element of the input matrix when once the result_offset has been add
+     * @param[in]  result_shift    Number of bits to shift right the result before converting back to QASYMM8
+     * @param[in]  min             (Optional) Min value used to saturate down the output result before converting back to QASYMM8. Defaults to the minimum possible 32-bit signed integer.
+     * @param[in]  max             (Optional) Max value used to saturate up the output result before converting back to QASYMM8,
+     *                             Along with @p min, this value can be used to implement "rectified linear unit" activation functions. Defaults to the maximum possible 32-bit signed integer.
+     */
+    ARM_COMPUTE_DEPRECATED_REL(20.05)
+    void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, int result_offset, int result_mult_int, int result_shift,
+                   int min = std::numeric_limits<int32_t>::lowest(), int max = std::numeric_limits<int32_t>::max());
     /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMLowpQuantizeDownInt32ToUint8Scale
      *
      * @param[in] input  Input tensor. It is the output of @ref CLGEMMLowpMatrixMultiplyCore function. Data type supported: S32
@@ -137,6 +154,23 @@
      */
     void configure(const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, int result_fixedpoint_multiplier, int result_shift, int result_offset_after_shift,
                    int min = std::numeric_limits<int32_t>::lowest(), int max = std::numeric_limits<int32_t>::max());
+    /** Initialise the kernel's inputs, output
+     *
+     * @param[in]  compile_context              The compile context to be used.
+     * @param[in]  input                        Input tensor. Data type supported: S32
+     * @param[in]  bias                         Biases tensor. Only shared biases supported and it can be a nullptr if the biases addition is not required.
+     *                                          Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input.
+     * @param[out] output                       Output tensor. Data type supported: QASYMM8
+     * @param[in]  result_fixedpoint_multiplier Fixed point value to be multiplied to each element of the input matrix when once the result_offset has been add
+     * @param[in]  result_shift                 Number of bits to shift right the result after the fixed point multiplication
+     * @param[in]  result_offset_after_shift    Offset to be applied to result before converting it back to QASYMM8
+     * @param[in]  min                          (Optional) Min value used to saturate down the output result before converting back to QASYMM8. Defaults to the minimum possible 32-bit signed integer.
+     * @param[in]  max                          (Optional) Max value used to saturate up the output result before converting back to QASYMM8,
+     *                                          Along with @p min, this value can be used to implement "rectified linear unit" activation functions. Defaults to the maximum possible 32-bit signed integer.
+     */
+    void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, int result_fixedpoint_multiplier, int result_shift,
+                   int result_offset_after_shift,
+                   int min = std::numeric_limits<int32_t>::lowest(), int max = std::numeric_limits<int32_t>::max());
     /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint
      *
      * @param[in] input  Input tensor. It is the output of @ref CLGEMMLowpMatrixMultiplyCore function. Data type supported: S32
@@ -198,6 +232,23 @@
      */
     void configure(const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, int result_fixedpoint_multiplier, int result_shift, int result_offset_after_shift,
                    int min = std::numeric_limits<int32_t>::lowest(), int max = std::numeric_limits<int32_t>::max());
+    /** Initialise the kernel's inputs, output
+     *
+     * @param[in]  compile_context              The compile context to be used.
+     * @param[in]  input                        Input tensor. Data type supported: S32
+     * @param[in]  bias                         Biases tensor. Only shared biases supported and it can be a nullptr if the biases addition is not required.
+     *                                          Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input.
+     * @param[out] output                       Output tensor. Data type supported: QASYMM8_SIGNED
+     * @param[in]  result_fixedpoint_multiplier Fixed point value to be multiplied to each element of the input matrix when once the result_offset has been add
+     * @param[in]  result_shift                 Number of bits to shift right the result after the fixed point multiplication
+     * @param[in]  result_offset_after_shift    Offset to be applied to result before converting it back to QASYMM8_SIGNED
+     * @param[in]  min                          (Optional) Min value used to saturate down the output result before converting back to QASYMM8_SIGNED. Defaults to the minimum possible 32-bit signed integer.
+     * @param[in]  max                          (Optional) Max value used to saturate up the output result before converting back to QASYMM8_SIGNED. Defaults to 0
+     *                                          Along with @p min, this value can be used to implement "rectified linear unit" activation functions. Defaults to the maximum possible 32-bit signed integer.
+     */
+    void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, int result_fixedpoint_multiplier, int result_shift,
+                   int result_offset_after_shift,
+                   int min = std::numeric_limits<int32_t>::lowest(), int max = std::numeric_limits<int32_t>::max());
     /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPoint
      *
      * @param[in] input  Input tensor. It is the output of @ref CLGEMMLowpMatrixMultiplyCore function. Data type supported: S32
@@ -240,6 +291,23 @@
     ARM_COMPUTE_DEPRECATED_REL(20.05)
     void configure(const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, float multiplier, int offset, int min = std::numeric_limits<int32_t>::lowest(),
                    int max = std::numeric_limits<int32_t>::max());
+    /** Initialise the kernel's inputs, output
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  input           Input tensor. Data type supported: S32
+     * @param[in]  bias            Biases tensor. Only shared biases supported and it can be a nullptr if the biases addition is not required.
+     *                             Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input.
+     * @param[out] output          Output tensor. Data type supported: QASYMM8
+     * @param[in]  multiplier      Float multiplier to be multiplied to each element of the input matrix
+     * @param[in]  offset          Offset to be applied to result before converting it back to QASYMM8
+     * @param[in]  min             (Optional) Min value used to saturate down the output result before converting back to QASYMM8. Defaults to the minimum possible 32-bit signed integer.
+     * @param[in]  max             (Optional) Max value used to saturate up the output result before converting back to QASYMM8,
+     *                        Along with @p min, this value can be used to implement "rectified linear unit" activation functions. Defaults to the maximum possible 32-bit signed integer.
+     */
+    ARM_COMPUTE_DEPRECATED_REL(20.05)
+    void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, float multiplier, int offset,
+                   int min = std::numeric_limits<int32_t>::lowest(),
+                   int max = std::numeric_limits<int32_t>::max());
     /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint
      *
      * @param[in] input  Input tensor. It is the output of @ref CLGEMMLowpMatrixMultiplyCore function. Data type supported: S32
@@ -300,6 +368,21 @@
      */
     void configure(const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, int result_fixedpoint_multiplier, int result_shift, int min = std::numeric_limits<int32_t>::lowest(),
                    int max = std::numeric_limits<int32_t>::max());
+    /** Initialise the kernel's inputs, output
+     *
+     * @param[in]  compile_context              The compile context to be used.
+     * @param[in]  input                        Input tensor. Data type supported: S32
+     * @param[in]  bias                         Biases tensor. Only shared biases supported and it can be a nullptr if the biases addition is not required.
+     *                                          Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input.
+     * @param[out] output                       Output tensor. Data type supported: QSYMM16
+     * @param[in]  result_fixedpoint_multiplier Fixed point value to be multiplied to each element of the input matrix when once the result_offset has been add
+     * @param[in]  result_shift                 Number of bits to shift right the result after the fixed point multiplication
+     * @param[in]  min                          (Optional) Min value used to saturate down the output result before converting back to QSYMM16. Defaults to the minimum possible 32-bit signed integer.
+     * @param[in]  max                          (Optional) Max value used to saturate up the output result before converting back to QSYMM16.
+     *                                          Along with @p min, this value can be used to implement "rectified linear unit" activation functions. Defaults to the maximum possible 32-bit signed integer.
+     */
+    void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, int result_fixedpoint_multiplier, int result_shift,
+                   int min = std::numeric_limits<int32_t>::lowest(), int max = std::numeric_limits<int32_t>::max());
     /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPoint
      *
      * @param[in] input  Input tensor info. It is the output of @ref CLGEMMLowpMatrixMultiplyCore function. Data type supported: S32
@@ -336,6 +419,16 @@
      * @param[in]  info   GEMMLowp output stage metadata.
      */
     void configure(const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, const GEMMLowpOutputStageInfo &info);
+    /** Initialise the kernel's inputs, output
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  input           Input tensor. Data type supported: S32
+     * @param[in]  bias            Biases tensor. Only shared biases supported and it can be a nullptr if the biases addition is not required.
+     *                             Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input.
+     * @param[out] output          Output tensor. Data type supported: QASYMM8/QASYMM8_SIGNED
+     * @param[in]  info            GEMMLowp output stage metadata.
+     */
+    void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, const GEMMLowpOutputStageInfo &info);
     /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel
      *
      * @param[in] input  Input tensor. It is the output of @ref CLGEMMLowpMatrixMultiplyCore function. Data type supported: S32

diff --git a/arm_compute/runtime/CL/functions/CLGather.h b/arm_compute/runtime/CL/functions/CLGather.h
index 71843aa..dcd9efc 100644
--- a/arm_compute/runtime/CL/functions/CLGather.h
+++ b/arm_compute/runtime/CL/functions/CLGather.h

@@ -43,6 +43,15 @@
      * @param[in]  axis    (Optional) The axis in @p input to gather @p indices from. Defaults to 0
      */
     void configure(const ICLTensor *input, const ICLTensor *indices, ICLTensor *output, int axis = 0);
+    /** Initialise the kernel's inputs and outputs
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  input           Source tensor. Supported tensor rank: up to 4. Data type supported: All.
+     * @param[in]  indices         Indices tensor. Supported tensor rank: up to 1. Must be one of the following types: U32/S32. Each value must be in range [0, input.shape[@p axis])
+     * @param[out] output          Destination tensor. Data type supported: Same as @p input
+     * @param[in]  axis            (Optional) The axis in @p input to gather @p indices from. Defaults to 0
+     */
+    void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *indices, ICLTensor *output, int axis = 0);
 
     /** Static function to check if given info will lead to a valid configuration of @ref CLGatherKernel
      *

diff --git a/arm_compute/runtime/CL/functions/CLGaussian3x3.h b/arm_compute/runtime/CL/functions/CLGaussian3x3.h
index 2caf6c9..f1906cd 100644
--- a/arm_compute/runtime/CL/functions/CLGaussian3x3.h
+++ b/arm_compute/runtime/CL/functions/CLGaussian3x3.h

@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2019 ARM Limited.
+ * Copyright (c) 2016-2020 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -50,6 +50,15 @@
      * @param[in]     constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
      */
     void configure(ICLTensor *input, ICLTensor *output, BorderMode border_mode, uint8_t constant_border_value = 0);
+    /** Initialise the function's source, destinations and border mode.
+     *
+     * @param[in]     compile_context       The compile context to be used.
+     * @param[in,out] input                 Source tensor. Data types supported: U8. (Written to only for @p border_mode != UNDEFINED)
+     * @param[out]    output                Destination tensor, Data types supported: U8.
+     * @param[in]     border_mode           Border mode to use for the convolution.
+     * @param[in]     constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
+     */
+    void configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, BorderMode border_mode, uint8_t constant_border_value = 0);
 };
 }
 #endif /*ARM_COMPUTE_CLGAUSSIAN3X3_H */

diff --git a/arm_compute/runtime/CL/functions/CLGaussian5x5.h b/arm_compute/runtime/CL/functions/CLGaussian5x5.h
index 5d121a4..d4ed772 100644
--- a/arm_compute/runtime/CL/functions/CLGaussian5x5.h
+++ b/arm_compute/runtime/CL/functions/CLGaussian5x5.h

@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2019 ARM Limited.
+ * Copyright (c) 2016-2020 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -62,6 +62,15 @@
      * @param[in]     constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
      */
     void configure(ICLTensor *input, ICLTensor *output, BorderMode border_mode, uint8_t constant_border_value = 0);
+    /** Initialise the function's source, destinations and border mode.
+     *
+     * @param[in]     compile_context       The compile context to be used.
+     * @param[in,out] input                 Source tensor. Data types supported: U8. (Written to only for @p border_mode != UNDEFINED)
+     * @param[out]    output                Destination tensor, Data types supported: U8.
+     * @param[in]     border_mode           Border mode to use for the convolution.
+     * @param[in]     constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
+     */
+    void configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, BorderMode border_mode, uint8_t constant_border_value = 0);
 
     // Inherited methods overridden:
     void run() override;

diff --git a/arm_compute/runtime/CL/functions/CLGaussianPyramid.h b/arm_compute/runtime/CL/functions/CLGaussianPyramid.h
index aa90a5d..a75a4d1 100644
--- a/arm_compute/runtime/CL/functions/CLGaussianPyramid.h
+++ b/arm_compute/runtime/CL/functions/CLGaussianPyramid.h

@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2019 ARM Limited.
+ * Copyright (c) 2017-2020 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -65,6 +65,16 @@
      *
      */
     virtual void configure(ICLTensor *input, CLPyramid *pyramid, BorderMode border_mode, uint8_t constant_border_value = 0) = 0;
+    /** Initialise the function's source, destinations and border mode.
+     *
+     * @param[in]      compile_context       The compile context to be used.
+     * @param[in, out] input                 Source tensor. Data types supported: U8. (Written to only for @p border_mode != UNDEFINED)
+     * @param[out]     pyramid               Destination pyramid tensors, Data types supported at each level: U8.
+     * @param[in]      border_mode           Border mode to use.
+     * @param[in]      constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
+     *
+     */
+    virtual void configure(const CLCompileContext &compile_context, ICLTensor *input, CLPyramid *pyramid, BorderMode border_mode, uint8_t constant_border_value = 0) = 0;
 
 protected:
     ICLTensor *_input;
@@ -86,6 +96,7 @@
 
     // Inherited methods overridden:
     void configure(ICLTensor *input, CLPyramid *pyramid, BorderMode border_mode, uint8_t constant_border_value) override;
+    void configure(const CLCompileContext &compile_context, ICLTensor *input, CLPyramid *pyramid, BorderMode border_mode, uint8_t constant_border_value) override;
     void run() override;
 
 private:
@@ -109,6 +120,7 @@
 
     // Inherited methods overridden:
     void configure(ICLTensor *input, CLPyramid *pyramid, BorderMode border_mode, uint8_t constant_border_value) override;
+    void configure(const CLCompileContext &compile_context, ICLTensor *input, CLPyramid *pyramid, BorderMode border_mode, uint8_t constant_border_value) override;
     void run() override;
 
 private:

diff --git a/arm_compute/runtime/CL/functions/CLGenerateProposalsLayer.h b/arm_compute/runtime/CL/functions/CLGenerateProposalsLayer.h
index fb6967f..91b30fa 100644
--- a/arm_compute/runtime/CL/functions/CLGenerateProposalsLayer.h
+++ b/arm_compute/runtime/CL/functions/CLGenerateProposalsLayer.h

@@ -85,6 +85,24 @@
      */
     void configure(const ICLTensor *scores, const ICLTensor *deltas, const ICLTensor *anchors, ICLTensor *proposals, ICLTensor *scores_out, ICLTensor *num_valid_proposals,
                    const GenerateProposalsInfo &info);
+    /** Set the input and output tensors.
+     *
+     * @param[in]  compile_context     The compile context to be used.
+     * @param[in]  scores              Scores from convolution layer of size (W, H, A), where H and W are the height and width of the feature map, and A is the number of anchors.
+     *                                 Data types supported: QASYMM8/F16/F32
+     * @param[in]  deltas              Bounding box deltas from convolution layer of size (W, H, 4*A). Data types supported: Same as @p scores
+     * @param[in]  anchors             Anchors tensor of size (4, A). Data types supported: QSYMM16 with scale of 0.125 if @p scores is QASYMM8, otherwise same as @p scores
+     * @param[out] proposals           Box proposals output tensor of size (5, W*H*A).
+     *                                 Data types supported: QASYMM16 with scale of 0.125 and 0 offset if @p scores is QASYMM8, otherwise same as @p scores
+     * @param[out] scores_out          Box scores output tensor of size (W*H*A). Data types supported: Same as @p scores
+     * @param[out] num_valid_proposals Scalar output tensor which says which of the first proposals are valid. Data types supported: U32
+     * @param[in]  info                Contains GenerateProposals operation information described in @ref GenerateProposalsInfo
+     *
+     * @note Only single image prediction is supported. Height and Width (and scale) of the image will be contained in the @ref GenerateProposalsInfo struct.
+     * @note Proposals contains all the proposals. Of those, only the first num_valid_proposals are valid.
+     */
+    void configure(const CLCompileContext &compile_context, const ICLTensor *scores, const ICLTensor *deltas, const ICLTensor *anchors, ICLTensor *proposals, ICLTensor *scores_out,
+                   ICLTensor *num_valid_proposals, const GenerateProposalsInfo &info);
 
     /** Static function to check if given info will lead to a valid configuration of @ref CLGenerateProposalsLayer
      *

diff --git a/arm_compute/runtime/CL/functions/CLHOGDescriptor.h b/arm_compute/runtime/CL/functions/CLHOGDescriptor.h
index 3214e8c..71280c8 100644
--- a/arm_compute/runtime/CL/functions/CLHOGDescriptor.h
+++ b/arm_compute/runtime/CL/functions/CLHOGDescriptor.h

@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2019 ARM Limited.
+ * Copyright (c) 2017-2020 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -59,6 +59,17 @@
      * @param[in]      constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
      */
     void configure(ICLTensor *input, ICLTensor *output, const IHOG *hog, BorderMode border_mode, uint8_t constant_border_value = 0);
+    /** Initialise the function's source, destination, HOG data-object and border mode
+     *
+     * @param[in]      compile_context       The compile context to be used.
+     * @param[in, out] input                 Input tensor. Data type supported: U8
+     *                                       (Written to only for @p border_mode != UNDEFINED)
+     * @param[out]     output                Output tensor which stores the HOG descriptor. DataType supported: F32. The number of channels is equal to the number of histogram bins per block
+     * @param[in]      hog                   HOG data object which describes the HOG descriptor
+     * @param[in]      border_mode           Border mode to use.
+     * @param[in]      constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
+     */
+    void configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, const IHOG *hog, BorderMode border_mode, uint8_t constant_border_value = 0);
 
     // Inherited method overridden:
     void run() override;

diff --git a/arm_compute/runtime/CL/functions/CLHOGDetector.h b/arm_compute/runtime/CL/functions/CLHOGDetector.h
index 6703de9..c2bdc15 100644
--- a/arm_compute/runtime/CL/functions/CLHOGDetector.h
+++ b/arm_compute/runtime/CL/functions/CLHOGDetector.h

@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2019 ARM Limited.
+ * Copyright (c) 2017-2020 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -64,6 +64,22 @@
      * @param[in]  idx_class               (Optional) Index of the class used for evaluating which class the detection window belongs to
      */
     void configure(const ICLTensor *input, const ICLHOG *hog, ICLDetectionWindowArray *detection_windows, const Size2D &detection_window_stride, float threshold = 0.0f, size_t idx_class = 0);
+    /** Initialise the kernel's input, output, HOG data object, detection window stride, threshold and index class
+     *
+     * @attention The function does not reset the number of values in @ref IDetectionWindowArray so it is caller's responsibility to clear it.
+     *
+     * @param[in]  compile_context         The compile context to be used.
+     * @param[in]  input                   Input tensor. It is the output of @ref CLHOGDescriptor. Data type supported: F32
+     * @param[in]  hog                     HOG data-object that describes the HOG descriptor
+     * @param[out] detection_windows       Array of @ref DetectionWindow used to store the detected objects
+     * @param[in]  detection_window_stride Distance in pixels between 2 consecutive detection windows in x and y directions.
+     *                                     It must be multiple of the block stride stored in hog
+     * @param[in]  threshold               (Optional) Threshold for the distance between features and SVM classifying plane
+     * @param[in]  idx_class               (Optional) Index of the class used for evaluating which class the detection window belongs to
+     */
+    void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLHOG *hog, ICLDetectionWindowArray *detection_windows, const Size2D &detection_window_stride,
+                   float threshold = 0.0f,
+                   size_t idx_class = 0);
 
     // Inherited methods overridden:
     void run() override;

diff --git a/arm_compute/runtime/CL/functions/CLHOGGradient.h b/arm_compute/runtime/CL/functions/CLHOGGradient.h
index ec4a187..450a4a6 100644
--- a/arm_compute/runtime/CL/functions/CLHOGGradient.h
+++ b/arm_compute/runtime/CL/functions/CLHOGGradient.h

@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2019 ARM Limited.
+ * Copyright (c) 2017-2020 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -61,6 +61,19 @@
      * @param[in]      constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
      */
     void configure(ICLTensor *input, ICLTensor *output_magnitude, ICLTensor *output_phase, PhaseType phase_type, BorderMode border_mode, uint8_t constant_border_value = 0);
+    /** Initialise the function's source, destinations, phase type and border mode
+     *
+     * @param[in]      compile_context       The compile context to be used.
+     * @param[in, out] input                 Input tensor. Data type supported: U8.
+     *                                       (Written to only for @p border_mode != UNDEFINED)
+     * @param[out]     output_magnitude      Output tensor (magnitude). Data type supported: U16.
+     * @param[out]     output_phase          Output tensor.(phase). Format supported: U8
+     * @param[in]      phase_type            Type of @ref PhaseType
+     * @param[in]      border_mode           Border mode to use
+     * @param[in]      constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
+     */
+    void configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output_magnitude, ICLTensor *output_phase, PhaseType phase_type, BorderMode border_mode,
+                   uint8_t constant_border_value = 0);
 
     // Inherited method overridden:
     void run() override;

diff --git a/arm_compute/runtime/CL/functions/CLHOGMultiDetection.h b/arm_compute/runtime/CL/functions/CLHOGMultiDetection.h
index 424c69d..3d22ff6 100644
--- a/arm_compute/runtime/CL/functions/CLHOGMultiDetection.h
+++ b/arm_compute/runtime/CL/functions/CLHOGMultiDetection.h

@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2019 ARM Limited.
+ * Copyright (c) 2017-2020 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -82,8 +82,29 @@
      *
      */
     void configure(ICLTensor *input, const ICLMultiHOG *multi_hog, ICLDetectionWindowArray *detection_windows, ICLSize2DArray *detection_window_strides, BorderMode border_mode,
-                   uint8_t constant_border_value = 0,
-                   float threshold = 0.0f, bool non_maxima_suppression = false, float min_distance = 1.0f);
+                   uint8_t constant_border_value = 0, float threshold = 0.0f, bool non_maxima_suppression = false, float min_distance = 1.0f);
+    /** Initialise the function's source, destination, detection window strides, border mode, threshold and non-maxima suppression
+     *
+     * @param[in]      compile_context          The compile context to be used.
+     * @param[in, out] input                    Input tensor. Data type supported: U8
+     *                                          (Written to only for @p border_mode != UNDEFINED)
+     * @param[in]      multi_hog                Container of multiple HOG data object. Each HOG data object describes one HOG model to detect.
+     *                                          This container should store the HOG data-objects in descending or ascending cell_size width order.
+     *                                          This will help to understand if the HOG descriptor computation can be skipped for some HOG data-objects
+     * @param[out]     detection_windows        Array of @ref DetectionWindow used for locating the detected objects
+     * @param[in]      detection_window_strides Array of @ref Size2D used to specify the distance in pixels between 2 consecutive detection windows in x and y directions for each HOG data-object
+     *                                          The dimension of this array must be the same of multi_hog->num_models()
+     *                                          The i-th detection_window_stride of this array must be multiple of the block_stride stored in the i-th multi_hog array
+     * @param[in]      border_mode              Border mode to use.
+     * @param[in]      constant_border_value    (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
+     * @param[in]      threshold                (Optional) Threshold for the distance between features and SVM classifying plane
+     * @param[in]      non_maxima_suppression   (Optional) Flag to specify whether the non-maxima suppression is required or not.
+     *                                          True if the non-maxima suppression stage has to be computed
+     * @param[in]      min_distance             (Optional) Radial Euclidean distance to use for the non-maxima suppression stage
+     *
+     */
+    void configure(const CLCompileContext &compile_context, ICLTensor *input, const ICLMultiHOG *multi_hog, ICLDetectionWindowArray *detection_windows, ICLSize2DArray *detection_window_strides,
+                   BorderMode border_mode, uint8_t constant_border_value = 0, float threshold = 0.0f, bool non_maxima_suppression = false, float min_distance = 1.0f);
 
     // Inherited method overridden:
     void run() override;

diff --git a/arm_compute/runtime/CL/functions/CLHarrisCorners.h b/arm_compute/runtime/CL/functions/CLHarrisCorners.h
index 6c89d6d..2d0e78b 100644
--- a/arm_compute/runtime/CL/functions/CLHarrisCorners.h
+++ b/arm_compute/runtime/CL/functions/CLHarrisCorners.h

@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2019 ARM Limited.
+ * Copyright (c) 2016-2020 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -82,6 +82,23 @@
     void configure(ICLImage *input, float threshold, float min_dist, float sensitivity,
                    int32_t gradient_size, int32_t block_size, ICLKeyPointArray *corners,
                    BorderMode border_mode, uint8_t constant_border_value = 0, bool use_fp16 = false);
+    /** Initialize the function's source, destination, conv and border_mode.
+     *
+     * @param[in]     compile_context       The compile context to be used.
+     * @param[in,out] input                 Source image. Data types supported: U8. (Written to only for @p border_mode != UNDEFINED)
+     * @param[in]     threshold             Minimum threshold with which to eliminate Harris Corner scores (computed using the normalized Sobel kernel).
+     * @param[in]     min_dist              Radial Euclidean distance for the euclidean distance stage.
+     * @param[in]     sensitivity           Sensitivity threshold k from the Harris-Stephens equation
+     * @param[in]     gradient_size         The gradient window size to use on the input. The implementation supports 3, 5, and 7
+     * @param[in]     block_size            The block window size used to compute the Harris Corner score. The implementation supports 3, 5, and 7.
+     * @param[out]    corners               Array of keypoints to store the results.
+     * @param[in]     border_mode           Border mode to use
+     * @param[in]     constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
+     * @param[in]     use_fp16              (Optional) If true the FP16 kernels will be used. If false F32 kernels are used.
+     */
+    void configure(const CLCompileContext &compile_context, ICLImage *input, float threshold, float min_dist, float sensitivity,
+                   int32_t gradient_size, int32_t block_size, ICLKeyPointArray *corners,
+                   BorderMode border_mode, uint8_t constant_border_value = 0, bool use_fp16 = false);
 
     // Inherited methods overridden:
     void run() override;

diff --git a/arm_compute/runtime/CL/functions/CLHistogram.h b/arm_compute/runtime/CL/functions/CLHistogram.h
index ad38924..6d34dd7 100644
--- a/arm_compute/runtime/CL/functions/CLHistogram.h
+++ b/arm_compute/runtime/CL/functions/CLHistogram.h

@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2019 ARM Limited.
+ * Copyright (c) 2016-2020 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -55,6 +55,13 @@
      * @param[out] output Output distribution.
      */
     void configure(const ICLImage *input, ICLDistribution1D *output);
+    /** Initialize the function
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  input           Source image. Data types supported: U8
+     * @param[out] output          Output distribution.
+     */
+    void configure(const CLCompileContext &compile_context, const ICLImage *input, ICLDistribution1D *output);
 
     // Inherited methods overridden:
     void run() override;

diff --git a/arm_compute/runtime/CL/functions/CLInstanceNormalizationLayer.h b/arm_compute/runtime/CL/functions/CLInstanceNormalizationLayer.h
index ddd4b12..4614b90 100644
--- a/arm_compute/runtime/CL/functions/CLInstanceNormalizationLayer.h
+++ b/arm_compute/runtime/CL/functions/CLInstanceNormalizationLayer.h

@@ -51,6 +51,18 @@
      * @param[in]      use_mixed_precision (Optional) Use mixed precision in case of FP16 execution
      */
     void configure(ICLTensor *input, ICLTensor *output, float gamma = 1.0f, float beta = 0.0f, float epsilon = 1e-12f, bool use_mixed_precision = true);
+    /** Set the input and output tensors.
+     *
+     * @param[in]      compile_context     The compile context to be used.
+     * @param[in, out] input               Source tensor. In case of @p output tensor = nullptr this tensor will store the result of the normalization.
+     *                                     Data types supported: F16/F32. Data layout supported: NHWC, NCHW
+     * @param[out]     output              Destination tensor. Data types and data layouts supported: same as @p input.
+     * @param[in]      gamma               (Optional) The scale scalar value applied to the normalized tensor. Defaults to 1.0
+     * @param[in]      beta                (Optional) The offset scalar value applied to the normalized tensor. Defaults to 0.0
+     * @param[in]      epsilon             (Optional) Lower bound value for the normalization. Defaults to 1e-12
+     * @param[in]      use_mixed_precision (Optional) Use mixed precision in case of FP16 execution
+     */
+    void configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, float gamma = 1.0f, float beta = 0.0f, float epsilon = 1e-12f, bool use_mixed_precision = true);
 
     /** Static function to check if given info will lead to a valid configuration of @ref CLInstanceNormalizationLayer.
      *

diff --git a/arm_compute/runtime/CL/functions/CLIntegralImage.h b/arm_compute/runtime/CL/functions/CLIntegralImage.h
index 2a452a9..1ea189b 100644
--- a/arm_compute/runtime/CL/functions/CLIntegralImage.h
+++ b/arm_compute/runtime/CL/functions/CLIntegralImage.h

@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2019 ARM Limited.
+ * Copyright (c) 2016-2020 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -48,6 +48,13 @@
      * @param[out] output Destination tensor, Data types supported: U32.
      */
     void configure(const ICLTensor *input, ICLTensor *output);
+    /** Initialise the function's source, destinations and border mode.
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  input           Source tensor. Data types supported: U8.
+     * @param[out] output          Destination tensor, Data types supported: U32.
+     */
+    void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output);
 
     // Inherited methods overridden:
     void run() override;

diff --git a/arm_compute/runtime/CL/functions/CLL2NormalizeLayer.h b/arm_compute/runtime/CL/functions/CLL2NormalizeLayer.h
index e200dc7..91c547b 100644
--- a/arm_compute/runtime/CL/functions/CLL2NormalizeLayer.h
+++ b/arm_compute/runtime/CL/functions/CLL2NormalizeLayer.h

@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2019 ARM Limited.
+ * Copyright (c) 2017-2020 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -59,6 +59,15 @@
      * @param[in]  epsilon (Optional) Lower bound value for the normalization.
      */
     void configure(ICLTensor *input, ICLTensor *output, int axis, float epsilon = 1e-12f);
+    /** Set the input and output tensors.
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  input           Source tensor. Data types supported: F16/F32. Data layouts supported: NCHW/NHWC.
+     * @param[out] output          Destination tensor. Data types and data layouts supported: Same as @p input.
+     * @param[in]  axis            Axis along which to reduce. Negative values wrap around. Maximum supported actual reduction axis : 2
+     * @param[in]  epsilon         (Optional) Lower bound value for the normalization.
+     */
+    void configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, int axis, float epsilon = 1e-12f);
 
     /** Static function to check if given info will lead to a valid configuration of @ref CLL2NormalizeLayer.
      *

diff --git a/arm_compute/runtime/CL/functions/CLLSTMLayer.h b/arm_compute/runtime/CL/functions/CLLSTMLayer.h
index a94f239..a29513a 100644
--- a/arm_compute/runtime/CL/functions/CLLSTMLayer.h
+++ b/arm_compute/runtime/CL/functions/CLLSTMLayer.h

@@ -102,6 +102,52 @@
                    const ICLTensor *output_state_in, const ICLTensor *cell_state_in,
                    ICLTensor *scratch_buffer, ICLTensor *output_state_out, ICLTensor *cell_state_out, ICLTensor *output,
                    const LSTMParams<ICLTensor> &lstm_params, const ActivationLayerInfo &activation_info, float cell_threshold = 0.f, float projection_threshold = 0.f);
+    /** Initialize function's tensors.
+     *
+     * @param[in]  compile_context             The compile context to be used.
+     * @param[in]  input                       Source tensor. Input is a 2D tensor with dimensions [input_size, batch_size]. Data types supported: F16/F32.
+     * @param[in]  input_to_forget_weights     2D weights tensor with dimensions [input_size, num_units]. Data type supported: Same as @p input.
+     * @param[in]  input_to_cell_weights       2D weights tensor with dimensions [input_size, num_units]. Data type supported: Same as @p input.
+     * @param[in]  input_to_output_weights     2D weights tensor with dimensions [input_size, num_units]. Data type supported: Same as @p input.
+     * @param[in]  recurrent_to_forget_weights 2D weights tensor with dimensions [output_size, num_units]. Data type supported: Same as @p input.
+     * @param[in]  recurrent_to_cell_weights   2D weights tensor with dimensions [output_size, num_units]. Data type supported: Same as @p input.
+     * @param[in]  recurrent_to_output_weights 2D weights tensor with dimensions [output_size, num_units]. Data type supported: Same as @p input.
+     * @param[in]  forget_gate_bias            1D weights tensor with dimensions [num_units]. Data type supported: Same as @p input.
+     * @param[in]  cell_bias                   1D weights tensor with dimensions [num_units]. Data type supported: Same as @p input.
+     * @param[in]  output_gate_bias            1D weights tensor with dimensions [num_units]. Data type supported: Same as @p input.
+     * @param[in]  output_state_in             2D weights tensor with dimensions [output_size, batch_size]. Data type supported: Same as @p input.
+     * @param[in]  cell_state_in               2D tensor with dimensions [num_units, batch_size]. Data type supported: Same as @p input.
+     * @param[out] scratch_buffer              2D tensor with dimensions [num_units * 4, batch_size] with CIFG or [num_units * 3, batch_size] without CIGF. Data type supported: Same as @p input.
+     * @param[out] output_state_out            2D weights tensor with dimensions [output_size, batch_size]. Data type supported: Same as @p input.
+     * @param[out] cell_state_out              2D tensor with dimensions [num_units, batch_size]. Data type supported: Same as @p input.
+     * @param[out] output                      Destination tensor. Output is a 2D tensor with dimensions [output_size, batch_size].
+     *                                         Data types supported: Same as @p input.
+     * @param[in]  lstm_params                 Weights tensors used in peephole optimization:
+     *                                         input_to_input_weights     2D weights tensor with dimensions [input_size, num_units]. Data type supported: Same as @p input.
+     *                                         recurrent_to_input_weights 2D weights tensor with dimensions [output_size, num_units]. Data type supported: Same as @p input.
+     *                                         cell_to_input_weights      1D weights tensor with dimensions [num_units]. Can be nullptr. Data type supported: Same as @p input.
+     *                                         cell_to_forget_weights     1D weights tensor with dimensions [num_units]. Data type supported: Same as @p input.
+     *                                         cell_to_output_weights     1D weights tensor with dimensions [num_units]. Data type supported: Same as @p input.
+     *                                         input_gate_bias            1D weights tensor with dimensions [num_units]. Data type supported: Same as @p input
+     *                                         projection_weights         2D weights tensor with dimensions [output_size, num_units]. Data type supported: Same as @p input.
+     *                                         projection_bias            1D weights tensor with dimensions [output_size]. Data type supported: Same as @p input.
+     *                                         input_layer_norm_weights   1D weights tensor with dimensions [num_units]. Data type supported: Same as @p input.
+     *                                         forget_layer_norm_weights  1D weights tensor with dimensions [num_units]. Data type supported: Same as @p input.
+     *                                         cell_layer_norm_weights    1D weights tensor with dimensions [num_units]. Data type supported: Same as @p input.
+     *                                         output_layer_norm_weights  1D weights tensor with dimensions [num_units]. Data type supported: Same as @p input.
+     * @param[in]  activation_info             Contains activation information described in @ref ActivationLayerInfo.
+     * @param[in]  cell_threshold              (Optional) The clipping threshold for the cell state, such that values are bound within [-cell_clip, cell_clip].
+     *                                         If set to 0.0f then clipping is disabled.
+     * @param[in]  projection_threshold        (Optional) The clipping threshold for the output from the projection layer, such that values are bound within [-proj_clip, proj_clip].
+     *                                         If set to 0.0f then clipping is disabled.
+     */
+    void configure(const CLCompileContext &compile_context, const ICLTensor *input,
+                   const ICLTensor *input_to_forget_weights, const ICLTensor *input_to_cell_weights, const ICLTensor *input_to_output_weights,
+                   const ICLTensor *recurrent_to_forget_weights, const ICLTensor *recurrent_to_cell_weights, const ICLTensor *recurrent_to_output_weights,
+                   const ICLTensor *forget_gate_bias, const ICLTensor *cell_bias, const ICLTensor *output_gate_bias,
+                   const ICLTensor *output_state_in, const ICLTensor *cell_state_in,
+                   ICLTensor *scratch_buffer, ICLTensor *output_state_out, ICLTensor *cell_state_out, ICLTensor *output,
+                   const LSTMParams<ICLTensor> &lstm_params, const ActivationLayerInfo &activation_info, float cell_threshold = 0.f, float projection_threshold = 0.f);
 
     /** Static function to check if given info will lead to a valid configuration of @ref CLLSTMLayer
      *

diff --git a/arm_compute/runtime/CL/functions/CLLSTMLayerQuantized.h b/arm_compute/runtime/CL/functions/CLLSTMLayerQuantized.h
index 1d39060..082fdb4 100644
--- a/arm_compute/runtime/CL/functions/CLLSTMLayerQuantized.h
+++ b/arm_compute/runtime/CL/functions/CLLSTMLayerQuantized.h

@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019 ARM Limited.
+ * Copyright (c) 2019-2020 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -97,6 +97,33 @@
                    const ICLTensor *input_gate_bias, const ICLTensor *forget_gate_bias, const ICLTensor *cell_bias, const ICLTensor *output_gate_bias,
                    ICLTensor *cell_state_in, const ICLTensor *output_state_in,
                    ICLTensor *cell_state_out, ICLTensor *output_state_out);
+    /** Initialize function's tensors.
+     *
+     * @param[in]  compile_context             The compile context to be used.
+     * @param[in]  input                       Source tensor. Input is a 2D tensor with dimensions [input_size, batch_size]. Data types supported: QASYMM8.
+     * @param[in]  input_to_input_weights      2D weights tensor with dimensions [input_size, output_size]. Data type supported: Same as @p input.
+     * @param[in]  input_to_forget_weights     2D weights tensor with dimensions [input_size, output_size]. Data type supported: Same as @p input.
+     * @param[in]  input_to_cell_weights       2D weights tensor with dimensions [input_size, output_size]. Data type supported: Same as @p input.
+     * @param[in]  input_to_output_weights     2D weights tensor with dimensions [input_size, output_size]. Data type supported: Same as @p input.
+     * @param[in]  recurrent_to_input_weights  2D weights tensor with dimensions [output_size, output_size]. Data type supported: Same as @p input.
+     * @param[in]  recurrent_to_forget_weights 2D weights tensor with dimensions [output_size, output_size]. Data type supported: Same as @p input.
+     * @param[in]  recurrent_to_cell_weights   2D weights tensor with dimensions [output_size, output_size]. Data type supported: Same as @p input.
+     * @param[in]  recurrent_to_output_weights 2D weights tensor with dimensions [output_size, output_size]. Data type supported: Same as @p input.
+     * @param[in]  input_gate_bias             1D weights tensor with dimensions [output_size]. Data type supported: S32.
+     * @param[in]  forget_gate_bias            1D weights tensor with dimensions [output_size]. Data type supported: S32.
+     * @param[in]  cell_bias                   1D weights tensor with dimensions [output_size]. Data type supported: S32.
+     * @param[in]  output_gate_bias            1D weights tensor with dimensions [output_size]. Data type supported: S32.
+     * @param[in]  cell_state_in               2D tensor with dimensions [output_size, batch_size]. Data type supported:  QSYMM16.
+     * @param[in]  output_state_in             2D tensor with dimensions [output_size, batch_size]. Data type supported: Same as @p input.
+     * @param[out] cell_state_out              Destination tensor. Output is a 2D tensor with dimensions [output_size, batch_size]. Data type supported:  QSYMM16.
+     * @param[out] output_state_out            Destination tensor. Output is a 2D tensor with dimensions [output_size, batch_size].Data types supported: Same as @p input.
+     */
+    void configure(const CLCompileContext &compile_context, const ICLTensor *input,
+                   const ICLTensor *input_to_input_weights, const ICLTensor *input_to_forget_weights, const ICLTensor *input_to_cell_weights, const ICLTensor *input_to_output_weights,
+                   const ICLTensor *recurrent_to_input_weights, const ICLTensor *recurrent_to_forget_weights, const ICLTensor *recurrent_to_cell_weights, const ICLTensor *recurrent_to_output_weights,
+                   const ICLTensor *input_gate_bias, const ICLTensor *forget_gate_bias, const ICLTensor *cell_bias, const ICLTensor *output_gate_bias,
+                   ICLTensor *cell_state_in, const ICLTensor *output_state_in,
+                   ICLTensor *cell_state_out, ICLTensor *output_state_out);
 
     /** Static function to check if given info will lead to a valid configuration of @ref CLLSTMLayerQuantized
      *

diff --git a/arm_compute/runtime/CL/functions/CLLaplacianPyramid.h b/arm_compute/runtime/CL/functions/CLLaplacianPyramid.h
index a407e98..49a87ba 100644
--- a/arm_compute/runtime/CL/functions/CLLaplacianPyramid.h
+++ b/arm_compute/runtime/CL/functions/CLLaplacianPyramid.h

@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2019 ARM Limited.
+ * Copyright (c) 2017-2020 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -68,6 +68,19 @@
      *
      */
     void configure(ICLTensor *input, CLPyramid *pyramid, ICLTensor *output, BorderMode border_mode, uint8_t constant_border_value);
+    /** Initialise the function's source, destinations and border mode.
+     *
+     * @param[in]  compile_context       The compile context to be used.
+     * @param[in]  input                 Source tensor. Data types supported: U8.
+     * @param[out] pyramid               Destination pyramid tensors, Data types supported at each level: S16.
+     * @param[out] output                The lowest resolution tensor necessary to reconstruct the input tensor from the pyramid. Data types supported: S16.
+     *                                   The first two dimensions of this tensor must match the first two dimensions of the tensor in the last level of the pyramid, that is:
+     *                                   output.width = input.width() / pow(2,pyramid_levels-1) and out.height = in.height() / pow(2,pyramid_levels-1)
+     * @param[in]  border_mode           Border mode to use.
+     * @param[in]  constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
+     *
+     */
+    void configure(const CLCompileContext &compile_context, ICLTensor *input, CLPyramid *pyramid, ICLTensor *output, BorderMode border_mode, uint8_t constant_border_value);
 
     // Inherited methods overridden:
     void run() override;

diff --git a/arm_compute/runtime/CL/functions/CLLaplacianReconstruct.h b/arm_compute/runtime/CL/functions/CLLaplacianReconstruct.h
index 3407f46..2c7afde 100644
--- a/arm_compute/runtime/CL/functions/CLLaplacianReconstruct.h
+++ b/arm_compute/runtime/CL/functions/CLLaplacianReconstruct.h

@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2019 ARM Limited.
+ * Copyright (c) 2017-2020 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -77,6 +77,22 @@
      *
      */
     void configure(const CLPyramid *pyramid, ICLTensor *input, ICLTensor *output, BorderMode border_mode, uint8_t constant_border_value);
+    /** Initialise the function's source, destinations and border mode.
+     *
+     * The Output image must have the same size as the first level of the pyramid.
+     * The Input image must have the same size as the last level of the pyramid.
+     *
+     * The idea is to reconstuct the original hi-res image from a low-res representation of it and the laplacian pyramid.
+     *
+     * @param[in]  compile_context       The compile context to be used.
+     * @param[in]  pyramid               Laplacian pyramid tensors, Data types supported at each level: S16.
+     * @param[in]  input                 Source tensor. Data types supported: S16.
+     * @param[out] output                Output tensor. Data types supported: U8.
+     * @param[in]  border_mode           Border mode to use for the convolution.
+     * @param[in]  constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
+     *
+     */
+    void configure(const CLCompileContext &compile_context, const CLPyramid *pyramid, ICLTensor *input, ICLTensor *output, BorderMode border_mode, uint8_t constant_border_value);
 
     // Inherited methods overridden:
     void run() override;

diff --git a/arm_compute/runtime/CL/functions/CLLocallyConnectedLayer.h b/arm_compute/runtime/CL/functions/CLLocallyConnectedLayer.h
index 1186a44..7a43eab 100644
--- a/arm_compute/runtime/CL/functions/CLLocallyConnectedLayer.h
+++ b/arm_compute/runtime/CL/functions/CLLocallyConnectedLayer.h

@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2019 ARM Limited.
+ * Copyright (c) 2017-2020 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -73,6 +73,19 @@
      * @param[in]  conv_info Contains padding and stride information described in @ref PadStrideInfo.
      */
     void configure(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info);
+    /** Set the input and output tensors.
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  input           Source tensor. 3 lower dimensions represent a single input [width, height, IFM],
+     *                             while every optional dimension from 4 and above represent a batch of inputs.
+     *                             Data types supported: F32.
+     * @param[in]  weights         Weights tensor. Weights are 5D tensor with dimensions [kernel_x, kernel_y, IFM, OFM, num_patches]. Data type supported:Same as @p input.
+     * @param[in]  biases          Biases tensor. Shared biases supported. Biases are 2D tensor with dimensions [OFM, num_patches]. Data type supported:Same as @p input.
+     * @param[out] output          Destination tensor. 3 lower dimensions represent a single output [width, height, OFM], while the rest represent batch of outputs.
+     *                             Data types supported: Same as @p input.
+     * @param[in]  conv_info       Contains padding and stride information described in @ref PadStrideInfo.
+     */
+    void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info);
     /** Static function to check if given info will lead to a valid configuration of @ref CLLocallyConnectedLayer
      *
      * @param[in] input     Input tensor info. 3 lower dimensions represent a single input [width, height, IFM],

diff --git a/arm_compute/runtime/CL/functions/CLMagnitude.h b/arm_compute/runtime/CL/functions/CLMagnitude.h
index 2f5932b..e52ab24 100644
--- a/arm_compute/runtime/CL/functions/CLMagnitude.h
+++ b/arm_compute/runtime/CL/functions/CLMagnitude.h

@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2019 ARM Limited.
+ * Copyright (c) 2016-2020 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -43,6 +43,15 @@
      * @param[in]  mag_type (Optional) Magnitude calculation type. Default: L2NORM.
      */
     void configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, MagnitudeType mag_type = MagnitudeType::L2NORM);
+    /** Initialise the kernel's inputs.
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  input1          First tensor input. Data types supported: S16.
+     * @param[in]  input2          Second tensor input. Data types supported: S16.
+     * @param[out] output          Output tensor. Data types supported: S16.
+     * @param[in]  mag_type        (Optional) Magnitude calculation type. Default: L2NORM.
+     */
+    void configure(const CLCompileContext &compile_context, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, MagnitudeType mag_type = MagnitudeType::L2NORM);
 };
 }
 #endif /*ARM_COMPUTE_CLMAGNITUDE_H */

diff --git a/arm_compute/runtime/CL/functions/CLMeanStdDev.h b/arm_compute/runtime/CL/functions/CLMeanStdDev.h
index fea1ed1..561ac04 100644
--- a/arm_compute/runtime/CL/functions/CLMeanStdDev.h
+++ b/arm_compute/runtime/CL/functions/CLMeanStdDev.h

@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2019 ARM Limited.
+ * Copyright (c) 2016-2020 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -57,6 +57,14 @@
      * @param[out]     stddev (Optional) Output standard deviation of pixel values.
      */
     void configure(ICLImage *input, float *mean, float *stddev = nullptr);
+    /** Initialise the kernel's inputs and outputs.
+     *
+     * @param[in]      compile_context The compile context to be used.
+     * @param[in, out] input           Input image. Data types supported: U8/F16/F32. (Written to only for border filling)
+     * @param[out]     mean            Output average pixel value.
+     * @param[out]     stddev          (Optional) Output standard deviation of pixel values.
+     */
+    void configure(const CLCompileContext &compile_context, ICLImage *input, float *mean, float *stddev = nullptr);
     /** Static function to check if given info will lead to a valid configuration of @ref CLMeanStdDev
      *
      * @param[in] input  Input image. Data types supported: U8/F16/F32.

diff --git a/arm_compute/runtime/CL/functions/CLMeanStdDevNormalizationLayer.h b/arm_compute/runtime/CL/functions/CLMeanStdDevNormalizationLayer.h
index 565f8f3..e39a590 100644
--- a/arm_compute/runtime/CL/functions/CLMeanStdDevNormalizationLayer.h
+++ b/arm_compute/runtime/CL/functions/CLMeanStdDevNormalizationLayer.h

@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019 ARM Limited.
+ * Copyright (c) 2019-2020 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -44,6 +44,16 @@
      * @param[in]      epsilon (Optional) Small float to avoid division by zero in case of zero standard deviation. Defaults to 1e-8.
      */
     void configure(ICLTensor *input, ICLTensor *output = nullptr, float epsilon = 1e-8f);
+    /** Initialise the function's input and outputs.
+     *
+     * @note If the output tensor is a nullptr, the normalization will be performed in-place.
+     *
+     * @param[in]      compile_context The compile context to be used.
+     * @param[in, out] input           Input tensor with 2 dimensions. Data types supported: F16/F32.
+     * @param[out]     output          (Optional) Destination tensor. It can be nullptr in case of in-place computation. Data type supported: same as @p input
+     * @param[in]      epsilon         (Optional) Small float to avoid division by zero in case of zero standard deviation. Defaults to 1e-8.
+     */
+    void configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output = nullptr, float epsilon = 1e-8f);
     /** Static function to check if given info will lead to a valid configuration of @ref CLMeanStdDevNormalizationKernel
      *
      * @param[in] input   Source tensor info with 2 dimensions. In case of @p output tensor info = nullptr,

diff --git a/arm_compute/runtime/CL/functions/CLMedian3x3.h b/arm_compute/runtime/CL/functions/CLMedian3x3.h
index 3a9a95a..f3bb283 100644
--- a/arm_compute/runtime/CL/functions/CLMedian3x3.h
+++ b/arm_compute/runtime/CL/functions/CLMedian3x3.h

@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2019 ARM Limited.
+ * Copyright (c) 2016-2020 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -50,6 +50,15 @@
      * @param[in]     constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
      */
     void configure(ICLTensor *input, ICLTensor *output, BorderMode border_mode, uint8_t constant_border_value = 0);
+    /** Initialise the function's source, destinations and border mode.
+     *
+     * @param[in]     compile_context       The compile context to be used.
+     * @param[in,out] input                 Source tensor. Data types supported: U8. (Written to only for @p border_mode != UNDEFINED)
+     * @param[out]    output                Destination tensor, Data types supported: U8.
+     * @param[in]     border_mode           Border mode to use for the convolution.
+     * @param[in]     constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
+     */
+    void configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, BorderMode border_mode, uint8_t constant_border_value = 0);
 };
 }
 #endif /*ARM_COMPUTE_CLMEDIAN3X3_H */

diff --git a/arm_compute/runtime/CL/functions/CLMinMaxLocation.h b/arm_compute/runtime/CL/functions/CLMinMaxLocation.h
index 30a29f2..e9e3bd9 100644
--- a/arm_compute/runtime/CL/functions/CLMinMaxLocation.h
+++ b/arm_compute/runtime/CL/functions/CLMinMaxLocation.h

@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2019 ARM Limited.
+ * Copyright (c) 2017-2020 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -66,6 +66,22 @@
     void configure(const ICLImage *input, void *min, void *max,
                    CLCoordinates2DArray *min_loc = nullptr, CLCoordinates2DArray *max_loc = nullptr,
                    uint32_t *min_count = nullptr, uint32_t *max_count = nullptr);
+    /** Initialise the kernel's inputs and outputs.
+     *
+     * @note When locations of min and max occurrences are requested, the reported number of locations is limited to the given array size.
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  input           Input image. Data types supported: U8/S16/F32.
+     * @param[out] min             Minimum value of image. Data types supported: S32 if input type is U8/S16, F32 if input type is F32.
+     * @param[out] max             Maximum value of image. Data types supported: S32 if input type is U8/S16, F32 if input type is F32.
+     * @param[out] min_loc         (Optional) Array of Coordinates2D used to store minimum value locations.
+     * @param[out] max_loc         (Optional) Array of Coordinates2D used to store maximum value locations.
+     * @param[out] min_count       (Optional) Number of minimum value encounters.
+     * @param[out] max_count       (Optional) Number of maximum value encounters.
+     */
+    void configure(const CLCompileContext &compile_context, const ICLImage *input, void *min, void *max,
+                   CLCoordinates2DArray *min_loc = nullptr, CLCoordinates2DArray *max_loc = nullptr,
+                   uint32_t *min_count = nullptr, uint32_t *max_count = nullptr);
 
     // Inherited methods overridden:
     void run() override;

diff --git a/arm_compute/runtime/CL/functions/CLNonLinearFilter.h b/arm_compute/runtime/CL/functions/CLNonLinearFilter.h
index a7c87d3..79f73ea 100644
--- a/arm_compute/runtime/CL/functions/CLNonLinearFilter.h
+++ b/arm_compute/runtime/CL/functions/CLNonLinearFilter.h

@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2019 ARM Limited.
+ * Copyright (c) 2017-2020 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -56,6 +56,20 @@
      */
     void configure(ICLTensor *input, ICLTensor *output, NonLinearFilterFunction function, unsigned int mask_size, MatrixPattern pattern, const uint8_t *mask,
                    BorderMode border_mode, uint8_t constant_border_value = 0);
+    /** Initialize the function's source, destination, conv and border_mode.
+     *
+     * @param[in]     compile_context       The compile context to be used.
+     * @param[in,out] input                 Source tensor. Data types supported: U8. (Written to only for @p border_mode != UNDEFINED)
+     * @param[out]    output                Destination tensor. Data types supported: U8
+     * @param[in]     function              Non linear function to perform
+     * @param[in]     mask_size             Mask size. Supported sizes: 3, 5
+     * @param[in]     pattern               Mask pattern
+     * @param[in]     mask                  The given mask. Will be used only if pattern is specified to PATTERN_OTHER
+     * @param[in]     border_mode           Strategy to use for borders.
+     * @param[in]     constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
+     */
+    void configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, NonLinearFilterFunction function, unsigned int mask_size, MatrixPattern pattern, const uint8_t *mask,
+                   BorderMode border_mode, uint8_t constant_border_value = 0);
 };
 }
 #endif /*ARM_COMPUTE_CLNONLINEARFILTER_H */

diff --git a/arm_compute/runtime/CL/functions/CLNonMaximaSuppression3x3.h b/arm_compute/runtime/CL/functions/CLNonMaximaSuppression3x3.h
index 0859a09..e2c0c4f 100644
--- a/arm_compute/runtime/CL/functions/CLNonMaximaSuppression3x3.h
+++ b/arm_compute/runtime/CL/functions/CLNonMaximaSuppression3x3.h

@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2019 ARM Limited.
+ * Copyright (c) 2016-2020 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -50,6 +50,18 @@
      *                                   The implementation supports just 2 border modes: UNDEFINED and CONSTANT
      */
     void configure(ICLTensor *input, ICLTensor *output, BorderMode border_mode);
+    /** Initialise the function's source, destinations and border mode.
+     *
+     * @note The implementation supports just 2 border modes: UNDEFINED and CONSTANT
+     *       The constant values used with CONSTANT border mode is 0
+     *
+     * @param[in]     compile_context The compile context to be used.
+     * @param[in,out] input           Source tensor. Data types supported: U8, F32. (Written to only for @p border_mode != UNDEFINED)
+     * @param[out]    output          Destination for the Non-Maxima suppressions 3x3. Data types supported: same as @p input.
+     * @param[in]     border_mode     Border mode to use for non-maxima suppression.
+     *                                   The implementation supports just 2 border modes: UNDEFINED and CONSTANT
+     */
+    void configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, BorderMode border_mode);
 };
 }
 #endif /* ARM_COMPUTE_CLNONMAXIMASUPPRESSION3X3_H */

diff --git a/arm_compute/runtime/CL/functions/CLNormalizationLayer.h b/arm_compute/runtime/CL/functions/CLNormalizationLayer.h
index d06bf56..07bb62c 100644
--- a/arm_compute/runtime/CL/functions/CLNormalizationLayer.h
+++ b/arm_compute/runtime/CL/functions/CLNormalizationLayer.h

@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2019 ARM Limited.
+ * Copyright (c) 2017-2020 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -58,6 +58,17 @@
      * @param[in]      norm_info Normalization layer information like the normalization type, normalization size and other parameters.
      */
     void configure(ICLTensor *input, ICLTensor *output, const NormalizationLayerInfo &norm_info);
+    /** Set the input and output tensors.
+     *
+     * @param[in]      compile_context The compile context to be used.
+     * @param[in, out] input           Source tensor. 3 lower dims represent a single input with dimensions [width, height, IFM],
+     *                                 and an optional 4th dimension for batch of inputs. Data types supported: F16/F32 (Written to by the border handler).
+     *                                 Data layouts supported: NCHW/NHWC.
+     * @param[out]     output          Destination tensor. Dimensions, data type and number of channels must match the input ones.
+     *                                 Data types supported: same as @p input. Data layouts supported: same as @p input.
+     * @param[in]      norm_info       Normalization layer information like the normalization type, normalization size and other parameters.
+     */
+    void configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, const NormalizationLayerInfo &norm_info);
     /** Static function to check if given info will lead to a valid configuration of @ref CLNormalizationLayer
      *
      * @param[in] input     Source tensor. 3 lower dims represent a single input with dimensions [width, height, IFM],

diff --git a/arm_compute/runtime/CL/functions/CLNormalizePlanarYUVLayer.h b/arm_compute/runtime/CL/functions/CLNormalizePlanarYUVLayer.h
index 5fbfdd1..5dd3760 100644
--- a/arm_compute/runtime/CL/functions/CLNormalizePlanarYUVLayer.h
+++ b/arm_compute/runtime/CL/functions/CLNormalizePlanarYUVLayer.h

@@ -50,6 +50,17 @@
      *                    Data types supported: Same as @p input
      */
     void configure(const ICLTensor *input, ICLTensor *output, const ICLTensor *mean, const ICLTensor *std);
+    /** Set the input and output tensors.
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  input           Source tensor. 3 lower dimensions represent a single input with dimensions [width, height, channels].
+     *                             Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
+     * @param[out] output          Destinationfeature tensor. Data type supported: same as @p input
+     * @param[in]  mean            Mean values tensor. 1 dimension with size equal to the number of input channels. Data types supported: Same as @p input
+     * @param[in]  std             Standard deviation values tensor. 1 dimension with size equal to the number of input channels.
+     *                    Data types supported: Same as @p input
+     */
+    void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const ICLTensor *mean, const ICLTensor *std);
     /** Static function to check if given info will lead to a valid configuration of @ref CLNormalizePlanarYUVLayer
      *
      * @param[in]  input  Source tensor info. 3 lower dimensions represent a single input with dimensions [width, height, channels].

diff --git a/arm_compute/runtime/CL/functions/CLOpticalFlow.h b/arm_compute/runtime/CL/functions/CLOpticalFlow.h
index 33df175..12d0583 100644
--- a/arm_compute/runtime/CL/functions/CLOpticalFlow.h
+++ b/arm_compute/runtime/CL/functions/CLOpticalFlow.h

@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2019 ARM Limited.
+ * Copyright (c) 2017-2020 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -91,6 +91,27 @@
                    const ICLKeyPointArray *old_points, const ICLKeyPointArray *new_points_estimates, ICLKeyPointArray *new_points,
                    Termination termination, float epsilon, size_t num_iterations, size_t window_dimension, bool use_initial_estimate,
                    BorderMode border_mode, uint8_t constant_border_value = 0);
+    /**  Initialise the function input and output
+     *
+     * @param[in]  compile_context       The compile context to be used.
+     * @param[in]  old_pyramid           Pointer to the pyramid for the old tensor. Data types supported U8
+     * @param[in]  new_pyramid           Pointer to the pyramid for the new tensor. Data types supported U8
+     * @param[in]  old_points            Pointer to the IKeyPointArray storing old key points
+     * @param[in]  new_points_estimates  Pointer to the IKeyPointArray storing new estimates key points
+     * @param[out] new_points            Pointer to the IKeyPointArray storing new key points
+     * @param[in]  termination           The criteria to terminate the search of each keypoint.
+     * @param[in]  epsilon               The error for terminating the algorithm
+     * @param[in]  num_iterations        The maximum number of iterations before terminate the alogrithm
+     * @param[in]  window_dimension      The size of the window on which to perform the algorithm
+     * @param[in]  use_initial_estimate  The flag to indicate whether the initial estimated position should be used
+     * @param[in]  border_mode           The border mode applied at scharr kernel stage
+     * @param[in]  constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT
+     *
+     */
+    void configure(const CLCompileContext &compile_context, const CLPyramid *old_pyramid, const CLPyramid *new_pyramid,
+                   const ICLKeyPointArray *old_points, const ICLKeyPointArray *new_points_estimates, ICLKeyPointArray *new_points,
+                   Termination termination, float epsilon, size_t num_iterations, size_t window_dimension, bool use_initial_estimate,
+                   BorderMode border_mode, uint8_t constant_border_value = 0);
 
     // Inherited methods overridden:
     void run() override;

diff --git a/arm_compute/runtime/CL/functions/CLPReluLayer.h b/arm_compute/runtime/CL/functions/CLPReluLayer.h
index 7f8a412..74fa86a 100644
--- a/arm_compute/runtime/CL/functions/CLPReluLayer.h
+++ b/arm_compute/runtime/CL/functions/CLPReluLayer.h

@@ -47,6 +47,16 @@
      * @param[out] output Destination tensor. Data type supported: same as @p input
      */
     void configure(ICLTensor *input, ICLTensor *alpha, ICLTensor *output);
+    /** Set the input and output tensor.
+     *
+     * @note If the output tensor is a nullptr or is equal to the input, the activation function will be performed in-place
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  input           Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
+     * @param[in]  alpha           PRelu layer parameters. Data types supported: same of @p input.
+     * @param[out] output          Destination tensor. Data type supported: same as @p input
+     */
+    void configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *alpha, ICLTensor *output);
     /** Static function to check if given info will lead to a valid configuration of @ref CLPReluLayer
      *
      * @param[in] input  Source tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.

diff --git a/arm_compute/runtime/CL/functions/CLPadLayer.h b/arm_compute/runtime/CL/functions/CLPadLayer.h
index f020d68..82d7205 100644
--- a/arm_compute/runtime/CL/functions/CLPadLayer.h
+++ b/arm_compute/runtime/CL/functions/CLPadLayer.h

@@ -63,6 +63,19 @@
      *                            or reflect the input, either including the border values (SYMMETRIC) or not (REFLECT).
      */
     void configure(ICLTensor *input, ICLTensor *output, const PaddingList &padding, PixelValue constant_value = PixelValue(), PaddingMode mode = PaddingMode::CONSTANT);
+    /** Initialize the function
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  input           Source tensor. Data types supported: All.
+     * @param[out] output          Output tensor. Data type supported: same as @p input
+     * @param[in]  padding         The padding for each spatial dimension of the input tensor. The pair padding[i]
+     *                             specifies the front and the end padding in the i-th dimension.
+     * @param[in]  constant_value  (Optional) Constant value to be used for the padding.
+     * @param[in]  mode            (Optional) Controls whether the padding should be filled with @p constant_value using CONSTANT,
+     *                            or reflect the input, either including the border values (SYMMETRIC) or not (REFLECT).
+     */
+    void configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, const PaddingList &padding, PixelValue constant_value = PixelValue(),
+                   PaddingMode mode = PaddingMode::CONSTANT);
 
     /**  Static function to check if given info will lead to a valid configuration of @ref CLPadLayer.
      *

diff --git a/arm_compute/runtime/CL/functions/CLPermute.h b/arm_compute/runtime/CL/functions/CLPermute.h
index b1705cf..37e651c 100644
--- a/arm_compute/runtime/CL/functions/CLPermute.h
+++ b/arm_compute/runtime/CL/functions/CLPermute.h

@@ -46,6 +46,16 @@
      * @param[in] perm   Permutation vector
      */
     void configure(const ICLTensor *input, ICLTensor *output, const PermutationVector &perm);
+    /** Set the input and output tensors.
+     *
+     * @note Arbitrary permutation vectors are supported with rank not greater than 4
+     *
+     * @param[in] compile_context The compile context to be used.
+     * @param[in] input           The input tensor to permute. Data types supported: All.
+     * @param[in] output          The output tensor. Data types supported: Same as @p input
+     * @param[in] perm            Permutation vector
+     */
+    void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const PermutationVector &perm);
     /**  Static function to check if given info will lead to a valid configuration of @ref CLPermute.
      *
      * @note Arbitrary permutation vectors are supported with rank not greater than 4

diff --git a/arm_compute/runtime/CL/functions/CLPhase.h b/arm_compute/runtime/CL/functions/CLPhase.h
index ac8a867..f993906 100644
--- a/arm_compute/runtime/CL/functions/CLPhase.h
+++ b/arm_compute/runtime/CL/functions/CLPhase.h

@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2019 ARM Limited.
+ * Copyright (c) 2016-2020 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -43,6 +43,15 @@
      * @param[in]  phase_type (Optional) Phase calculation type. Default: SIGNED.
      */
     void configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, PhaseType phase_type = PhaseType::SIGNED);
+    /** Initialise the kernel's inputs, output.
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  input1          First tensor input. Data types supported: S16.
+     * @param[in]  input2          Second tensor input. Data types supported: S16.
+     * @param[out] output          Output tensor. Data types supported: U8.
+     * @param[in]  phase_type      (Optional) Phase calculation type. Default: SIGNED.
+     */
+    void configure(const CLCompileContext &compile_context, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, PhaseType phase_type = PhaseType::SIGNED);
 };
 }
 #endif /*ARM_COMPUTE_CLPHASE_H */

diff --git a/arm_compute/runtime/CL/functions/CLPixelWiseMultiplication.h b/arm_compute/runtime/CL/functions/CLPixelWiseMultiplication.h
index 47bb2bf..8b0ee70 100644
--- a/arm_compute/runtime/CL/functions/CLPixelWiseMultiplication.h
+++ b/arm_compute/runtime/CL/functions/CLPixelWiseMultiplication.h

@@ -59,6 +59,22 @@
      */
     void configure(ICLTensor *input1, ICLTensor *input2, ICLTensor *output, float scale,
                    ConvertPolicy overflow_policy, RoundingPolicy rounding_policy, const ActivationLayerInfo &act_info = ActivationLayerInfo());
+    /** Initialise the kernel's inputs, output and convertion policy.
+     *
+     * @param[in]      compile_context The compile context to be used.
+     * @param[in, out] input1          An input tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/F32.
+     *                                 The input tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0.
+     * @param[in, out] input2          An input tensor. Data types supported: same as @p input1.
+     *                                 The input tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0.
+     * @param[out]     output          The output tensor, Data types supported: same as @p input1. Note: U8 requires both inputs to be U8.
+     * @param[in]      scale           Scale to apply after multiplication.
+     *                                 Scale must be positive and its value must be either 1/255 or 1/2^n where n is between 0 and 15.
+     * @param[in]      overflow_policy Overflow policy. Supported overflow policies: Wrap, Saturate
+     * @param[in]      rounding_policy Rounding policy. Supported rounding modes: to zero, to nearest even.
+     * @param[in]      act_info        (Optional) Activation layer information in case of a fused activation.
+     */
+    void configure(const CLCompileContext &compile_context, ICLTensor *input1, ICLTensor *input2, ICLTensor *output, float scale,
+                   ConvertPolicy overflow_policy, RoundingPolicy rounding_policy, const ActivationLayerInfo &act_info = ActivationLayerInfo());
     /** Static function to check if given info will lead to a valid configuration of @ref CLPixelWiseMultiplication
      *
      * @param[in] input1          An input tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/F32.
@@ -98,6 +114,17 @@
      * @param[in]      act_info (Optional) Activation layer information in case of a fused activation.
      */
     void configure(ICLTensor *input1, ICLTensor *input2, ICLTensor *output, const ActivationLayerInfo &act_info = ActivationLayerInfo());
+    /** Initialise the kernel's inputs, output.
+     *
+     * @param[in]      compile_context The compile context to be used.
+     * @param[in, out] input1          An input tensor. Data types supported: F32. Number of channels supported: 2.
+     *                                 The input tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0.
+     * @param[in, out] input2          An input tensor. Data types supported: same as @p input1. Number of channels supported: same as @p input1.
+     *                                 The input tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0.
+     * @param[out]     output          The output tensor, Data types supported: same as @p input1. Number of channels supported: same as @p input1.
+     * @param[in]      act_info        (Optional) Activation layer information in case of a fused activation.
+     */
+    void configure(const CLCompileContext &compile_context, ICLTensor *input1, ICLTensor *input2, ICLTensor *output, const ActivationLayerInfo &act_info = ActivationLayerInfo());
     /** Static function to check if given info will lead to a valid configuration of @ref CLComplexPixelWiseMultiplication
      *
      * @param[in] input1   An input tensor info. Data types supported: F32. Number of channels supported: 2.

diff --git a/arm_compute/runtime/CL/functions/CLPoolingLayer.h b/arm_compute/runtime/CL/functions/CLPoolingLayer.h
index 05b35dc..7d646ab 100644
--- a/arm_compute/runtime/CL/functions/CLPoolingLayer.h
+++ b/arm_compute/runtime/CL/functions/CLPoolingLayer.h

@@ -49,6 +49,15 @@
      * @param[out]    indices   (optional) The indices of the maximal values. Data type supported: U32.
      */
     void configure(ICLTensor *input, ICLTensor *output, const PoolingLayerInfo &pool_info, ICLTensor *indices = nullptr);
+    /** Set the input and output tensors.
+     *
+     * @param[in]     compile_context The compile context to be used.
+     * @param[in,out] input           Source tensor. (Written to only when padding != 0) Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
+     * @param[out]    output          Destination tensor. Data types supported: Same as @p input.
+     * @param[in]     pool_info       Contains pooling operation information described in @ref PoolingLayerInfo.
+     * @param[out]    indices         (optional) The indices of the maximal values. Data type supported: U32.
+     */
+    void configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, const PoolingLayerInfo &pool_info, ICLTensor *indices = nullptr);
     /** Static function to check if given info will lead to a valid configuration of @ref CLPoolingLayer
      *
      * @param[in] input     Source tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.

diff --git a/arm_compute/runtime/CL/functions/CLPriorBoxLayer.h b/arm_compute/runtime/CL/functions/CLPriorBoxLayer.h
index eea1399..d39e411 100644
--- a/arm_compute/runtime/CL/functions/CLPriorBoxLayer.h
+++ b/arm_compute/runtime/CL/functions/CLPriorBoxLayer.h

@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2019 ARM Limited.
+ * Copyright (c) 2018-2020 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -46,6 +46,15 @@
      * @param[in]  info   Prior box layer info.
      */
     void configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, const PriorBoxLayerInfo &info);
+    /** Set the input and output tensors.
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  input1          First source tensor. Data types supported: F32. Data layouts supported: NCHW/NHWC.
+     * @param[in]  input2          Second source tensor. Data types and layouts supported: same as @p input1
+     * @param[out] output          Destination tensor. Output dimensions are [W * H * num_priors * 4, 2]. Data types and layouts supported: same as @p input1
+     * @param[in]  info            Prior box layer info.
+     */
+    void configure(const CLCompileContext &compile_context, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, const PriorBoxLayerInfo &info);
     /** Static function to check if given info will lead to a valid configuration of @ref CLPriorBoxLayer
      *
      * @param[in] input1 First source tensor info. Data types supported: F32. Data layouts supported: NCHW/NHWC.

diff --git a/arm_compute/runtime/CL/functions/CLQLSTMLayer.h b/arm_compute/runtime/CL/functions/CLQLSTMLayer.h
index ab34135..72a61f8 100644
--- a/arm_compute/runtime/CL/functions/CLQLSTMLayer.h
+++ b/arm_compute/runtime/CL/functions/CLQLSTMLayer.h

@@ -113,6 +113,55 @@
                    ICLTensor *cell_state_out, ICLTensor *output_state_out,
                    const LSTMParams<ICLTensor> &lstm_params);
 
+    /** Initialize function's tensors.
+     *
+     * @param[in]  compile_context             The compile context to be used.
+     * @param[in]  input                       Source tensor. Input is a 2D tensor with dimensions [input_size, batch_size]. Data types supported: QASYMM8_SIGNED.
+     * @param[in]  input_to_forget_weights     2D weights tensor with dimensions [input_size, num_units]. Data type supported: QSYMM8.
+     * @param[in]  input_to_cell_weights       2D weights tensor with dimensions [input_size, num_units]. Data type supported: QSYMM8.
+     * @param[in]  input_to_output_weights     2D weights tensor with dimensions [input_size, num_units]. Data type supported: QSYMM8.
+     * @param[in]  recurrent_to_forget_weights 2D weights tensor with dimensions [output_size, num_units]. Data type supported: QSYMM8.
+     * @param[in]  recurrent_to_cell_weights   2D weights tensor with dimensions [output_size, num_units]. Data type supported: QSYMM8.
+     * @param[in]  recurrent_to_output_weights 2D weights tensor with dimensions [output_size, num_units]. Data type supported: QSYMM8.
+     * @param[in]  forget_gate_bias            1D weights tensor with dimensions [num_units]. Data type supported: S32.
+     * @param[in]  cell_bias                   1D weights tensor with dimensions [num_units]. Data type supported: S32.
+     * @param[in]  output_gate_bias            1D weights tensor with dimensions [num_units]. Data type supported: S32.
+     * @param[in]  cell_state_in               2D tensor with dimensions [output_size, batch_size]. Data type supported:  QSYMM16.
+     * @param[in]  output_state_in             2D tensor with dimensions [num_units, batch_size]. Data type supported: Same as @p input.
+     * @param[out] cell_state_out              Destination tensor. Output is a 2D tensor with dimensions [output_size, batch_size]. Data type supported:  QSYMM16.
+     * @param[out] output_state_out            Destination tensor. Output is a 2D tensor with dimensions [num_units, batch_size].Data types supported: Same as @p input.
+     * @param[in]  lstm_params                 Weights tensors used in peephole, CIFG and layer normalization optimizations:
+     *                                         input_intermediate_scale   Scale of the intermediate result of matmul, i.e. input to layer normalization, at input gate.
+     *                                         forget_intermediate_scale  Scale of the intermediate result of matmul, i.e. input to layer normalization, at forget gate.
+     *                                         cell_intermediate_scale    Scale of the intermediate result of matmul, i.e. input to layer normalization, at cell gate.
+     *                                         output_intermediate_scale  Scale of the intermediate result of matmul, i.e. input to layer normalization, at output gate.
+     *                                         hidden_state_zero          The zero point of the hidden state.
+     *                                         hidden_state_scale         The scale of the hidden state.
+     *                                         input_to_input_weights     (Optional) 2D weights tensor with dimensions [input_size, num_units]. Data type supported: QSYMM8.
+     *                                         recurrent_to_input_weights (Optional) 2D weights tensor with dimensions [output_size, num_units]. Data type supported: QSYMM8.
+     *                                         cell_to_input_weights      (Optional) 1D weights tensor with dimensions [num_units]. Can be nullptr. Data type supported: QSYMM16.
+     *                                         cell_to_forget_weights     (Optional) 1D weights tensor with dimensions [num_units]. Data type supported: QSYMM16.
+     *                                         cell_to_output_weights     (Optional) 1D weights tensor with dimensions [num_units]. Data type supported: QSYMM16.
+     *                                         input_gate_bias            (Optional) 1D weights tensor with dimensions [num_units]. Data type supported: S32.
+     *                                         projection_weights         (Optional) 2D weights tensor with dimensions [output_size, num_units]. Data type supported: QSYMM8.
+     *                                         projection_bias            (Optional) 1D weights tensor with dimensions [output_size]. S32.
+     *                                         input_layer_norm_weights   (Optional) 1D weights tensor with dimensions [num_units]. Data type supported: QSYMM16.
+     *                                         forget_layer_norm_weights  (Optional) 1D weights tensor with dimensions [num_units]. Data type supported: QSYMM16.
+     *                                         cell_layer_norm_weights    (Optional) 1D weights tensor with dimensions [num_units]. Data type supported: QSYMM16.
+     *                                         output_layer_norm_weights  (Optional) 1D weights tensor with dimensions [num_units]. Data type supported: QSYMM16.
+     *                                         cell_threshold             (Optional) The clipping threshold for the cell state, such that values are bound within [-cell_clip, cell_clip].
+     *                                                                               If set to 0.0 then clipping is disabled.
+     *                                         projection_threshold       (Optional) The clipping threshold for the output from the projection layer, such that values are bound within
+     *                                                                               [-proj_clip, proj_clip]. If set to 0.0 then clipping is disabled.
+     */
+    void configure(const CLCompileContext &compile_context, const ICLTensor *input,
+                   const ICLTensor *input_to_forget_weights, const ICLTensor *input_to_cell_weights, const ICLTensor *input_to_output_weights,
+                   const ICLTensor *recurrent_to_forget_weights, const ICLTensor *recurrent_to_cell_weights, const ICLTensor *recurrent_to_output_weights,
+                   const ICLTensor *forget_gate_bias, const ICLTensor *cell_bias, const ICLTensor *output_gate_bias,
+                   const ICLTensor *cell_state_in, const ICLTensor *output_state_in,
+                   ICLTensor *cell_state_out, ICLTensor *output_state_out,
+                   const LSTMParams<ICLTensor> &lstm_params);
+
     /** Static function to check if given info will lead to a valid configuration of @ref CLQLSTMLayer
      *
      * @param[in]  input                       Source tensor info. Input is a 2D tensor info with dimensions [input_size, batch_size]. Data types supported: QASYMM8_SIGNED.
@@ -169,19 +218,20 @@
 private:
     /** Internal method to configure matrix multiplication plus output stage of each gate.
      *
-     * @param[in] mm             Matrix multiplication function to use.
-     * @param[in] outstage       Output stage function to use.
-     * @param[in] gemmlowp_info  GEMMLowp metadata to be used by the output stage.
-     * @param[in] mm_input       Input tensor to matrix multiplication function.
-     * @param[in] mm_weights     Weights tensor to matrix multiplication function.
-     * @param[in] bias           Bias tensor to matrix multiplication function.
-     * @param[in] outstage_res   Tensor to be used for storing the result of the output stage.
-     * @param[in] gemmlowp_scale Real multiplier to be used computing multiplier and shift for requantization.
-     * @param[in] mm_res_info    Tensor info to be used to initialize matrix multiplication result tensor.
-     * @param[in] mm_res_info    Tensor info to be used to initialize output stage result tensor.
+     * @param[in] compile_context The compile context to be used.
+     * @param[in] mm              Matrix multiplication function to use.
+     * @param[in] outstage        Output stage function to use.
+     * @param[in] gemmlowp_info   GEMMLowp metadata to be used by the output stage.
+     * @param[in] mm_input        Input tensor to matrix multiplication function.
+     * @param[in] mm_weights      Weights tensor to matrix multiplication function.
+     * @param[in] bias            Bias tensor to matrix multiplication function.
+     * @param[in] outstage_res    Tensor to be used for storing the result of the output stage.
+     * @param[in] gemmlowp_scale  Real multiplier to be used computing multiplier and shift for requantization.
+     * @param[in] mm_res_info     Tensor info to be used to initialize matrix multiplication result tensor.
+     * @param[in] mm_res_info     Tensor info to be used to initialize output stage result tensor.
      *
      */
-    void configure_mm(CLGEMMLowpMatrixMultiplyCore &mm, CLGEMMLowpOutputStage &outstage, GEMMLowpOutputStageInfo &gemmlowp_info,
+    void configure_mm(const CLCompileContext &compile_context, CLGEMMLowpMatrixMultiplyCore &mm, CLGEMMLowpOutputStage &outstage, GEMMLowpOutputStageInfo &gemmlowp_info,
                       const ICLTensor *mm_input, const ICLTensor *mm_weights, const ICLTensor *bias, CLTensor *mm_res,
                       CLTensor *outstage_res, float gemmlowp_scale,
                       const TensorInfo &mm_res_info, const TensorInfo &outstage_tensor_info);

diff --git a/arm_compute/runtime/CL/functions/CLQuantizationLayer.h b/arm_compute/runtime/CL/functions/CLQuantizationLayer.h
index fbdef53..f59e3b7 100644
--- a/arm_compute/runtime/CL/functions/CLQuantizationLayer.h
+++ b/arm_compute/runtime/CL/functions/CLQuantizationLayer.h

@@ -48,6 +48,15 @@
      * @note Output auto initialization is not supported by this function
      */
     void configure(const ICLTensor *input, ICLTensor *output);
+    /** Set the input and output tensors.
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  input           Source tensor. The dimensions over the third will be interpreted as batches. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/32.
+     * @param[out] output          Destination tensor with the same dimensions of input. Data types supported: QASYMM8/QASYMM8_SIGNED/QASYMM16.
+     *
+     * @note Output auto initialization is not supported by this function
+     */
+    void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output);
     /** Static function to check if given info will lead to a valid configuration of @ref CLQuantizationLayer
      *
      * @param[in] input  Input tensor info. The dimensions over the third will be interpreted as batches. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/32.

diff --git a/arm_compute/runtime/CL/functions/CLRNNLayer.h b/arm_compute/runtime/CL/functions/CLRNNLayer.h
index 569e3da..0291eb1 100644
--- a/arm_compute/runtime/CL/functions/CLRNNLayer.h
+++ b/arm_compute/runtime/CL/functions/CLRNNLayer.h

@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2019 ARM Limited.
+ * Copyright (c) 2018-2020 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -54,6 +54,19 @@
     void configure(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *recurrent_weights, const ICLTensor *bias, ICLTensor *hidden_state, ICLTensor *output, ActivationLayerInfo &info);
     /** Initialize the function
      *
+     * @param[in]     compile_context   The compile context to be used.
+     * @param[in]     input             Input is a 2-D tensor of shape [input_size, batch_size]. Data types supported: F16/F32
+     * @param[in]     weights           Weights tensor of shape [input_size, num_units] that multiplies the input. Data types supported: Same as @p input
+     * @param[in]     recurrent_weights Weights tensor of shape [num_units, num_units] that multiplies the current 'state'. Data types supported: Same as @p input
+     * @param[in]     bias              Bias vector of shape [num_units]. Data types supported: Same as @p input
+     * @param[out]    output            Output tensor of shape [num_units, batch_size]. Data types supported: Same as @p input
+     * @param[in,out] hidden_state      Output tensor of shape [num_units, batch_size]. Data types supported: Same as @p input
+     * @param[in]     info              Activation layer parameter.
+     */
+    void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *weights, const ICLTensor *recurrent_weights, const ICLTensor *bias, ICLTensor *hidden_state,
+                   ICLTensor *output, ActivationLayerInfo &info);
+    /** Initialize the function
+     *
      * @param[in] input             Input is a 2-D tensor of shape [input_size, batch_size]. Data types supported: F16/F32
      * @param[in] weights           Weights tensor of shape [input_size, num_units] that multiplies the input. Data types supported: Same as @p input
      * @param[in] recurrent_weights Weights tensor of shape [num_units, num_units] that multiplies the current 'state'. Data types supported: Same as @p input

diff --git a/arm_compute/runtime/CL/functions/CLROIAlignLayer.h b/arm_compute/runtime/CL/functions/CLROIAlignLayer.h
index 7c2c6eb..b6defe6 100644
--- a/arm_compute/runtime/CL/functions/CLROIAlignLayer.h
+++ b/arm_compute/runtime/CL/functions/CLROIAlignLayer.h

@@ -56,6 +56,22 @@
      * @note The fourth dimension of @p output tensor must be the same as the number of elements in @p rois array.
      */
     void configure(const ICLTensor *input, const ICLTensor *rois, ICLTensor *output, const ROIPoolingLayerInfo &pool_info);
+    /** Set the input and output tensors.
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  input           Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
+     * @param[in]  rois            ROIs tensor, it is a 2D tensor of size [5, N] (where N is the number of ROIs) containing top left and bottom right corner
+     *                             as coordinate of an image and batch_id of ROI [ batch_id, x1, y1, x2, y2 ].
+     *                             Data types supported: QASYMM16 with scale of 0.125 and 0 offset if @p input is QASYMM8/QASYMM8_SIGNED, otherwise same as @p input
+     * @param[out] output          Destination tensor. Data types supported: Same as @p input.
+     * @param[in]  pool_info       Contains pooling operation information described in @ref ROIPoolingLayerInfo.
+     *
+     * @note The x and y dimensions of @p output tensor must be the same as @p pool_info 's pooled
+     * width and pooled height.
+     * @note The z dimensions of @p output tensor and @p input tensor must be the same.
+     * @note The fourth dimension of @p output tensor must be the same as the number of elements in @p rois array.
+     */
+    void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *rois, ICLTensor *output, const ROIPoolingLayerInfo &pool_info);
     /** Static function to check if given info will lead to a valid configuration of @ref CLROIAlignLayer
      *
      * @param[in] input     Source tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.

diff --git a/arm_compute/runtime/CL/functions/CLROIPoolingLayer.h b/arm_compute/runtime/CL/functions/CLROIPoolingLayer.h
index 7d0e1da..0376e78 100644
--- a/arm_compute/runtime/CL/functions/CLROIPoolingLayer.h
+++ b/arm_compute/runtime/CL/functions/CLROIPoolingLayer.h

@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2019 ARM Limited.
+ * Copyright (c) 2017-2020 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -56,6 +56,21 @@
      * @note The fourth dimension of @p output tensor must be the same as the number of elements in @p rois array.
      */
     void configure(const ICLTensor *input, const ICLTensor *rois, ICLTensor *output, const ROIPoolingLayerInfo &pool_info);
+    /** Set the input and output tensors.
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  input           Source tensor. Data types supported: F16/F32.
+     * @param[in]  rois            ROIs tensor, it is a 2D tensor of size [5, N] (where N is the number of ROIs) containing top left and bottom right corner
+     *                             as coordinate of an image and batch_id of ROI [ batch_id, x1, y1, x2, y2 ]. Data types supported: U16
+     * @param[out] output          Destination tensor. Data types supported: Same as @p input.
+     * @param[in]  pool_info       Contains pooling operation information described in @ref ROIPoolingLayerInfo.
+     *
+     * @note The x and y dimensions of @p output tensor must be the same as @p pool_info 's pooled
+     * width and pooled height.
+     * @note The z dimensions of @p output tensor and @p input tensor must be the same.
+     * @note The fourth dimension of @p output tensor must be the same as the number of elements in @p rois array.
+     */
+    void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *rois, ICLTensor *output, const ROIPoolingLayerInfo &pool_info);
 };
 }
 #endif /* ARM_COMPUTE_CLROIPOOLINGLAYER_H */

diff --git a/arm_compute/runtime/CL/functions/CLRange.h b/arm_compute/runtime/CL/functions/CLRange.h
index 2cc8376..19e11ba 100644
--- a/arm_compute/runtime/CL/functions/CLRange.h
+++ b/arm_compute/runtime/CL/functions/CLRange.h

@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2019 ARM Limited.
+ * Copyright (c) 2018-2020 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -47,6 +47,15 @@
      * @param[in]  step   The gap between each pair of values in the sequence. Default is 1.
      */
     void configure(ICLTensor *output, float start, float end, float step = 1.f);
+    /** Initialize the kernel's start, end, step and output tensor.
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[out] output          Output tensor. Data types supported: U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32.
+     * @param[in]  start           The starting value of the sequence.
+     * @param[in]  end             The ending (not including) value of the sequence.
+     * @param[in]  step            The gap between each pair of values in the sequence. Default is 1.
+     */
+    void configure(const CLCompileContext &compile_context, ICLTensor *output, float start, float end, float step = 1.f);
     /** Static function to check if given info will lead to a valid configuration of @ref CLRange
      *
      * @param[in] output Output tensor info. Data types supported: U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32.

diff --git a/arm_compute/runtime/CL/functions/CLReduceMean.h b/arm_compute/runtime/CL/functions/CLReduceMean.h
index 30000ed..57ec48d 100644
--- a/arm_compute/runtime/CL/functions/CLReduceMean.h
+++ b/arm_compute/runtime/CL/functions/CLReduceMean.h

@@ -51,6 +51,17 @@
      * @param[out] output         Destination tensor. Data type supported: Same as @p input
      */
     void configure(ICLTensor *input, const Coordinates &reduction_axis, bool keep_dims, ICLTensor *output);
+    /** Configure kernel
+     *
+     * @note Supported tensor rank: up to 4
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  input           Source tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/F16/F32
+     * @param[in]  reduction_axis  Reduction axis vector.
+     * @param[in]  keep_dims       If positive, retains reduced dimensions with length 1.
+     * @param[out] output          Destination tensor. Data type supported: Same as @p input
+     */
+    void configure(const CLCompileContext &compile_context, ICLTensor *input, const Coordinates &reduction_axis, bool keep_dims, ICLTensor *output);
 
     /** Static function to check if given info will lead to a valid configuration of @ref CLReduceMean
      *

diff --git a/arm_compute/runtime/CL/functions/CLReductionOperation.h b/arm_compute/runtime/CL/functions/CLReductionOperation.h
index 254c730..25cf655 100644
--- a/arm_compute/runtime/CL/functions/CLReductionOperation.h
+++ b/arm_compute/runtime/CL/functions/CLReductionOperation.h

@@ -61,6 +61,16 @@
      * @param[in]  keep_dims (Optional) Whether to keep the reduced dimension after the operation. Defaults to true.
      */
     void configure(ICLTensor *input, ICLTensor *output, unsigned int axis, ReductionOperation op, bool keep_dims = true);
+    /** Set the input and output tensors.
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  input           Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
+     * @param[out] output          Destination tensor. Data types and data layouts supported: Same as @p input.
+     * @param[in]  axis            Axis along which to reduce. Supported reduction axis : 0, 1, 2, 3
+     * @param[in]  op              Reduction operation to perform. Operations supported: MEAN_SUM, PROD, SUM_SQUARE, SUM, MIN, MAX
+     * @param[in]  keep_dims       (Optional) Whether to keep the reduced dimension after the operation. Defaults to true.
+     */
+    void configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, unsigned int axis, ReductionOperation op, bool keep_dims = true);
 
     /** Static function to check if given info will lead to a valid configuration of @ref CLReductionOperation.
      *

diff --git a/arm_compute/runtime/CL/functions/CLRemap.h b/arm_compute/runtime/CL/functions/CLRemap.h
index f035ac9..dc8a2c4 100644
--- a/arm_compute/runtime/CL/functions/CLRemap.h
+++ b/arm_compute/runtime/CL/functions/CLRemap.h

@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2019 ARM Limited.
+ * Copyright (c) 2017-2020 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -54,6 +54,20 @@
      */
     void configure(ICLTensor *input, const ICLTensor *map_x, const ICLTensor *map_y, ICLTensor *output,
                    InterpolationPolicy policy, BorderMode border_mode, uint8_t constant_border_value = 0);
+    /** Initialise the function's sources, destination, interpolation policy and border mode.
+     *
+     * @param[in]     compile_context       The compile context to be used.
+     * @param[in,out] input                 Source tensor. Data types supported: U8. (Written to only for @p border_mode != UNDEFINED)
+     * @param[in]     map_x                 Map for X coords. Data types supported: F32.
+     * @param[in]     map_y                 Map for Y coords. Data types supported: F32.
+     * @param[out]    output                Output tensor. Data types supported: U8.
+     * @param[in]     policy                Interpolation policy to use. Only NEAREST and BILINEAR are supported.
+     * @param[in]     border_mode           Border mode to use on the input tensor.
+     * @param[in]     constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
+     *
+     */
+    void configure(const CLCompileContext &compile_context, ICLTensor *input, const ICLTensor *map_x, const ICLTensor *map_y, ICLTensor *output,
+                   InterpolationPolicy policy, BorderMode border_mode, uint8_t constant_border_value = 0);
 };
 }
 #endif /*ARM_COMPUTE_CLREMAP_H */

diff --git a/arm_compute/runtime/CL/functions/CLReorgLayer.h b/arm_compute/runtime/CL/functions/CLReorgLayer.h
index dd08c0f..8b245ab 100644
--- a/arm_compute/runtime/CL/functions/CLReorgLayer.h
+++ b/arm_compute/runtime/CL/functions/CLReorgLayer.h

@@ -45,6 +45,18 @@
      *
      */
     void configure(ICLTensor *input, ICLTensor *output, int32_t stride);
+    /** Initialise the function's source and destination.
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  input           Source tensor. Data types supported: All.
+     * @param[out] output          Destination tensor with tensor shape:
+     *                             [width_input / stride, height_input / stride, channels_input * stride * stride, batch_size]. This means the output has
+     *                             the same number of input elements. Data types supported: same as @p input.
+     * @param[in]  stride          Stride value to use for reorganizing the values in the output tensor.
+     *                    It defines the spatial distance between 2 consecutive pixels in the x and y direction
+     *
+     */
+    void configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, int32_t stride);
     /** Static function to check if given info will lead to a valid configuration of @ref CLReorgLayer
      *
      * @param[in] input  Source tensor. Data types supported: All.

diff --git a/arm_compute/runtime/CL/functions/CLReshapeLayer.h b/arm_compute/runtime/CL/functions/CLReshapeLayer.h
index 63fe545..e91c2c7 100644
--- a/arm_compute/runtime/CL/functions/CLReshapeLayer.h
+++ b/arm_compute/runtime/CL/functions/CLReshapeLayer.h

@@ -40,6 +40,13 @@
      * @param[out] output Output tensor. Data type supported: Same as @p input
      */
     void configure(const ICLTensor *input, ICLTensor *output);
+    /** Initialise the kernel's inputs and outputs
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  input           First tensor input. Data type supported: All
+     * @param[out] output          Output tensor. Data type supported: Same as @p input
+     */
+    void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output);
 
     /** Static function to check if given info will lead to a valid configuration of @ref CLReshapeLayer
      *

diff --git a/arm_compute/runtime/CL/functions/CLReverse.h b/arm_compute/runtime/CL/functions/CLReverse.h
index f87bd19..87ae34c 100644
--- a/arm_compute/runtime/CL/functions/CLReverse.h
+++ b/arm_compute/runtime/CL/functions/CLReverse.h

@@ -41,6 +41,14 @@
      * @param[in]  axis   Axis tensor. Contains the indices of the dimensions to reverse. Data type supported: U32
      */
     void configure(const ICLTensor *input, ICLTensor *output, const ICLTensor *axis);
+    /** Initialize the function
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  input           Input tensor. Data types supported: All.
+     * @param[out] output          Output tensor. Data type supported: Same as @p input
+     * @param[in]  axis            Axis tensor. Contains the indices of the dimensions to reverse. Data type supported: U32
+     */
+    void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const ICLTensor *axis);
     /** Static function to check if given info will lead to a valid configuration of @ref CLReverseKernel
      *
      * @param[in] input  Input tensor info. Data types supported: All.

diff --git a/arm_compute/runtime/CL/functions/CLScale.h b/arm_compute/runtime/CL/functions/CLScale.h
index c06c9b6..f345995 100644
--- a/arm_compute/runtime/CL/functions/CLScale.h
+++ b/arm_compute/runtime/CL/functions/CLScale.h

@@ -51,6 +51,21 @@
      */
     void configure(ICLTensor *input, ICLTensor *output, InterpolationPolicy policy, BorderMode border_mode, PixelValue constant_border_value = PixelValue(),
                    SamplingPolicy sampling_policy = SamplingPolicy::CENTER, bool use_padding = true, bool align_corners = false);
+    /** Initialize the function's source, destination, interpolation type and border_mode.
+     *
+     * @param[in]     compile_context       The compile context to be used.
+     * @param[in,out] input                 Source tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/F16/F32. (Written to only for @p border_mode != UNDEFINED)
+     * @param[out]    output                Destination tensor. Data types supported: Same as @p input
+     *                                      All but the lowest two dimensions must be the same size as in the input tensor, i.e. scaling is only performed within the XY-plane.
+     * @param[in]     policy                The interpolation type.
+     * @param[in]     border_mode           Strategy to use for borders.
+     * @param[in]     constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
+     * @param[in]     sampling_policy       (Optional) Sampling policy used by the interpolation. Defaults to @ref SamplingPolicy::CENTER
+     * @param[in]     use_padding           (Optional) Is padding in use or not. Defaults to true.
+     * @param[in]     align_corners         (Optional) Align corners of input and output, only affecting bilinear policy with TOP_LEFT sampling policy. Defaults to false.
+     */
+    void configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, InterpolationPolicy policy, BorderMode border_mode, PixelValue constant_border_value = PixelValue(),
+                   SamplingPolicy sampling_policy = SamplingPolicy::CENTER, bool use_padding = true, bool align_corners = false);
 
     /** Static function to check if given info will lead to a valid configuration of @ref CLScale
      *

diff --git a/arm_compute/runtime/CL/functions/CLScharr3x3.h b/arm_compute/runtime/CL/functions/CLScharr3x3.h
index 7082432..b25b548 100644
--- a/arm_compute/runtime/CL/functions/CLScharr3x3.h
+++ b/arm_compute/runtime/CL/functions/CLScharr3x3.h

@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2019 ARM Limited.
+ * Copyright (c) 2016-2020 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -53,6 +53,18 @@
      * @param[in]     constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
      */
     void configure(ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, BorderMode border_mode, uint8_t constant_border_value = 0);
+    /** Initialise the function's source, destinations and border mode.
+     *
+     * @note At least one of output_x or output_y must be not NULL.
+     *
+     * @param[in]     compile_context       The compile context to be used.
+     * @param[in,out] input                 Source tensor. Data types supported: U8. (Written to only for @p border_mode != UNDEFINED)
+     * @param[out]    output_x              (optional) Destination for the Scharr 3x3 convolution along the X axis. Data types supported: S16.
+     * @param[out]    output_y              (optional) Destination for the Scharr 3x3 convolution along the Y axis. Data types supported: S16.
+     * @param[in]     border_mode           Border mode to use for the convolution.
+     * @param[in]     constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
+     */
+    void configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, BorderMode border_mode, uint8_t constant_border_value = 0);
 };
 }
 #endif /*ARM_COMPUTE_CLSCHARR3X3_H */

diff --git a/arm_compute/runtime/CL/functions/CLSelect.h b/arm_compute/runtime/CL/functions/CLSelect.h
index a7e06e4..84d0997 100644
--- a/arm_compute/runtime/CL/functions/CLSelect.h
+++ b/arm_compute/runtime/CL/functions/CLSelect.h

@@ -45,6 +45,15 @@
      * @param[out] output Output tensor. Data types supported: Same as @p x.
      */
     void configure(const ICLTensor *c, const ICLTensor *x, const ICLTensor *y, ICLTensor *output);
+    /** Initialise the kernel's inputs and output.
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  c               Condition input tensor. Data types supported: U8.
+     * @param[in]  x               First input tensor. Data types supported: All.
+     * @param[in]  y               Second input tensor. Data types supported: Same as @p x
+     * @param[out] output          Output tensor. Data types supported: Same as @p x.
+     */
+    void configure(const CLCompileContext &compile_context, const ICLTensor *c, const ICLTensor *x, const ICLTensor *y, ICLTensor *output);
     /** Static function to check if given info will lead to a valid configuration of @ref CLSelect
      *
      * @param[in] c      Condition input tensor. Data types supported: U8.

diff --git a/arm_compute/runtime/CL/functions/CLSlice.h b/arm_compute/runtime/CL/functions/CLSlice.h
index f5fca43..a8c6e1f 100644
--- a/arm_compute/runtime/CL/functions/CLSlice.h
+++ b/arm_compute/runtime/CL/functions/CLSlice.h

@@ -48,6 +48,20 @@
      * @param[in]  ends   The ends of the dimensions of the input tensor to be sliced. The length must be of rank(input).
      */
     void configure(const ICLTensor *input, ICLTensor *output, const Coordinates &starts, const Coordinates &ends);
+    /** Configure kernel
+     *
+     * @note Supported tensor rank: up to 4
+     * @note Start indices must be non-negative. 0 <= starts[i]
+     * @note End coordinates can be negative, which represents the number of elements before the end of that dimension.
+     * @note End indices are not inclusive unless negative.
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  input           Source tensor. Data type supported: All.
+     * @param[out] output          Destination tensor. Data type supported: Same as @p input
+     * @param[in]  starts          The starts of the dimensions of the input tensor to be sliced. The length must be of rank(input).
+     * @param[in]  ends            The ends of the dimensions of the input tensor to be sliced. The length must be of rank(input).
+     */
+    void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const Coordinates &starts, const Coordinates &ends);
 
     /** Static function to check if given info will lead to a valid configuration of @ref CLSlice
      *

diff --git a/arm_compute/runtime/CL/functions/CLSobel3x3.h b/arm_compute/runtime/CL/functions/CLSobel3x3.h
index 2f4cf50..24bc0cd 100644
--- a/arm_compute/runtime/CL/functions/CLSobel3x3.h
+++ b/arm_compute/runtime/CL/functions/CLSobel3x3.h

@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2019 ARM Limited.
+ * Copyright (c) 2016-2020 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -53,6 +53,18 @@
      * @param[in]     constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
      */
     void configure(ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, BorderMode border_mode, uint8_t constant_border_value = 0);
+    /** Initialise the function's source, destinations and border mode.
+     *
+     * @note At least one of output_x or output_y must be not NULL.
+     *
+     * @param[in]     compile_context       The compile context to be used.
+     * @param[in,out] input                 Source tensor. Data types supported: U8. (Written to only for @p border_mode != UNDEFINED)
+     * @param[out]    output_x              (optional) Destination for the Sobel 3x3 convolution along the X axis. Data types supported: S16.
+     * @param[out]    output_y              (optional) Destination for the Sobel 3x3 convolution along the Y axis. Data types supported: S16.
+     * @param[in]     border_mode           Border mode to use for the convolution.
+     * @param[in]     constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
+     */
+    void configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, BorderMode border_mode, uint8_t constant_border_value = 0);
 };
 }
 #endif /*ARM_COMPUTE_CLSOBEL3X3_H */

diff --git a/arm_compute/runtime/CL/functions/CLSobel5x5.h b/arm_compute/runtime/CL/functions/CLSobel5x5.h
index 2a9136b..bf26627 100644
--- a/arm_compute/runtime/CL/functions/CLSobel5x5.h
+++ b/arm_compute/runtime/CL/functions/CLSobel5x5.h

@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2019 ARM Limited.
+ * Copyright (c) 2016-2020 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -65,6 +65,18 @@
      * @param[in]     constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
      */
     void configure(ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, BorderMode border_mode, uint8_t constant_border_value = 0);
+    /** Initialise the function's source, destinations and border mode.
+     *
+     * @note At least one of output_x or output_y must be not NULL.
+     *
+     * @param[in]     compile_context       The compile context to be used.
+     * @param[in,out] input                 Source tensor. Data types supported: U8. (Written to only for @p border_mode != UNDEFINED)
+     * @param[out]    output_x              (optional) Destination for the Sobel 5x5 convolution along the X axis. Data types supported: S16.
+     * @param[out]    output_y              (optional) Destination for the Sobel 5x5 convolution along the Y axis. Data types supported: S16.
+     * @param[in]     border_mode           Border mode to use for the convolution.
+     * @param[in]     constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
+     */
+    void configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, BorderMode border_mode, uint8_t constant_border_value = 0);
 
     // Inherited methods overridden:
     void run() override;

diff --git a/arm_compute/runtime/CL/functions/CLSobel7x7.h b/arm_compute/runtime/CL/functions/CLSobel7x7.h
index e3188b8..13932c7 100644
--- a/arm_compute/runtime/CL/functions/CLSobel7x7.h
+++ b/arm_compute/runtime/CL/functions/CLSobel7x7.h

@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2019 ARM Limited.
+ * Copyright (c) 2016-2020 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -65,6 +65,18 @@
      * @param[in]     constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
      */
     void configure(ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, BorderMode border_mode, uint8_t constant_border_value = 0);
+    /** Initialise the function's source, destinations and border mode.
+     *
+     * @note At least one of output_x or output_y must be not NULL.
+     *
+     * @param[in]     compile_context       The compile context to be used.
+     * @param[in,out] input                 Source tensor. Data types supported: U8. (Written to only for @p border_mode != UNDEFINED)
+     * @param[out]    output_x              (optional) Destination for the Sobel 7x7 convolution along the X axis. Data types supported: S32.
+     * @param[out]    output_y              (optional) Destination for the Sobel 7x7 convolution along the Y axis. Data types supported: S32.
+     * @param[in]     border_mode           Border mode to use for the convolution.
+     * @param[in]     constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
+     */
+    void configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, BorderMode border_mode, uint8_t constant_border_value = 0);
 
     // Inherited methods overridden:
     void run() override;

diff --git a/arm_compute/runtime/CL/functions/CLSoftmaxLayer.h b/arm_compute/runtime/CL/functions/CLSoftmaxLayer.h
index 751b68d..fadbc43 100644
--- a/arm_compute/runtime/CL/functions/CLSoftmaxLayer.h
+++ b/arm_compute/runtime/CL/functions/CLSoftmaxLayer.h

@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2019 ARM Limited.
+ * Copyright (c) 2017-2020 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -67,6 +67,17 @@
      *                    when @p axis is 2, the Softmax reduction will be applied on each of the [4x4] planes of the input image.
      */
     void configure(const ICLTensor *input, ICLTensor *output, float beta = 1.0f, size_t axis = 1);
+    /** Set the input and output tensors.
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  input           Source tensor. Data types supported: QASYMM8/F16/F32
+     * @param[out] output          Destination tensor. Data types supported: same as @p input
+     * @param[in]  beta            (Optional) A scaling factor for the exponent. Defaults to 1.f
+     * @param[in]  axis            (Optional) Reduction axis. It has the purpose of squashing the first @p axis
+     *                    dimensions together. For instance, given a [4x4x4x4] image,
+     *                    when @p axis is 2, the Softmax reduction will be applied on each of the [4x4] planes of the input image.
+     */
+    void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, float beta = 1.0f, size_t axis = 1);
     /** Static function to check if given info will lead to a valid configuration of @ref CLSoftmaxLayer
      *
      * @param[in] input  Source tensor. Data types supported: QASYMM8/F16/F32
@@ -97,6 +108,21 @@
      *                    when @p axis is 2, the Softmax reduction will be applied on each of the [4x4] planes of the input image.
      */
     void configure_reshape_input_kernel(const ICLTensor *input, const ICLTensor *output, size_t axis);
+    /** Utility method to configure the kernels needed to flatten the input
+     * tensor.
+     *
+     * @note This function changes the internal state of this class. In particular,
+     * it initializes the kernel @p _flatten_kernel and the tensors @p _input_flat and
+     * @p _output_flat
+     *
+     * @param[in] compile_context The compile context to be used.
+     * @param[in] input           Original source tensor.
+     * @param[in] output          Original destination tensor.
+     * @param[in] axis            (Optional) Reduction axis. It has the purpose of squashing the first @p axis
+     *                    dimensions together. For instance, given a [4x4x4x4] image,
+     *                    when @p axis is 2, the Softmax reduction will be applied on each of the [4x4] planes of the input image.
+     */
+    void configure_reshape_input_kernel(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *output, size_t axis);
 
     MemoryGroup                    _memory_group;
     CLLogits1DMaxShiftExpSumKernel _max_shift_exp_sum_kernel;

diff --git a/arm_compute/runtime/CL/functions/CLSpaceToBatchLayer.h b/arm_compute/runtime/CL/functions/CLSpaceToBatchLayer.h
index ef9f164..b8e2bdc 100644
--- a/arm_compute/runtime/CL/functions/CLSpaceToBatchLayer.h
+++ b/arm_compute/runtime/CL/functions/CLSpaceToBatchLayer.h

@@ -63,6 +63,15 @@
      * @param[out] output      Tensor output. Data types supported: same as @p input
      */
     void configure(const ICLTensor *input, const ICLTensor *block_shape, const ICLTensor *paddings, ICLTensor *output);
+    /** Set the input and output tensors.
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  input           Tensor input. Supported tensor rank: 4. Data types supported: All.
+     * @param[in]  block_shape     1-D tensor with shape [M]. Data types supported: S32
+     * @param[in]  paddings        2-D tensor with shape [2, M]. Data types supported: S32
+     * @param[out] output          Tensor output. Data types supported: same as @p input
+     */
+    void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *block_shape, const ICLTensor *paddings, ICLTensor *output);
     /** Set the input and output tensors. (Static block shape and paddings)
      *
      * @param[in]  input         Tensor input. Supported tensor rank: 4. Data types supported: All.
@@ -73,6 +82,18 @@
      * @param[out] output        Tensor output. Data types supported: same as @p input
      */
     void configure(const ICLTensor *input, const int block_shape_x, const int block_shape_y, const Size2D &padding_left, const Size2D &padding_right, ICLTensor *output);
+    /** Set the input and output tensors. (Static block shape and paddings)
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  input           Tensor input. Supported tensor rank: 4. Data types supported: All.
+     * @param[in]  block_shape_x   Block shape x value.
+     * @param[in]  block_shape_y   Block shape y value.
+     * @param[in]  padding_left    The left padding of the output tensor.
+     * @param[in]  padding_right   The right padding of the output tensor.
+     * @param[out] output          Tensor output. Data types supported: same as @p input
+     */
+    void configure(const CLCompileContext &compile_context, const ICLTensor *input, const int block_shape_x, const int block_shape_y, const Size2D &padding_left, const Size2D &padding_right,
+                   ICLTensor *output);
     /** Static function to check if given info will lead to a valid configuration of @ref CLSpaceToBatchLayer
      *
      * @param[in]  input       Tensor input info. Supported tensor rank: 4. Data types supported: All.

diff --git a/arm_compute/runtime/CL/functions/CLSpaceToDepthLayer.h b/arm_compute/runtime/CL/functions/CLSpaceToDepthLayer.h
index be7937d..ac011dd 100644
--- a/arm_compute/runtime/CL/functions/CLSpaceToDepthLayer.h
+++ b/arm_compute/runtime/CL/functions/CLSpaceToDepthLayer.h

@@ -46,6 +46,14 @@
      * @param[in]  block_shape Block shape value.
      */
     void configure(const ICLTensor *input, ICLTensor *output, int32_t block_shape);
+    /** Set the input and output tensors.
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  input           Tensor input. Supported tensor rank: 4. Data types supported: All.
+     * @param[out] output          Tensor output. Data types supported: same as @p input
+     * @param[in]  block_shape     Block shape value.
+     */
+    void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, int32_t block_shape);
     /** Static function to check if given info will lead to a valid configuration of @ref CLSpaceToDepthLayer.
      *
      * @param[in] input       Tensor input info. Supported tensor rank: 4. Data types supported: All.

diff --git a/arm_compute/runtime/CL/functions/CLStackLayer.h b/arm_compute/runtime/CL/functions/CLStackLayer.h
index ebce4f3..9b20445 100644
--- a/arm_compute/runtime/CL/functions/CLStackLayer.h
+++ b/arm_compute/runtime/CL/functions/CLStackLayer.h

@@ -56,6 +56,17 @@
      * @param[out] output Output tensor. Data types supported: Same as @p input.
      */
     void configure(const std::vector<ICLTensor *> &input, int axis, ICLTensor *output);
+    /** Initialise the kernel's inputs vector and output.
+     *
+     * @note Supported input tensor rank: up to 4
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  input           The vectors containing all the tensors with the same shape to stack. Data types supported: All.
+     * @param[in]  axis            The dimension to stack the tensors along. It must be smaller than the number of input dimensions.
+     *                             Negative values wrap around
+     * @param[out] output          Output tensor. Data types supported: Same as @p input.
+     */
+    void configure(const CLCompileContext &compile_context, const std::vector<ICLTensor *> &input, int axis, ICLTensor *output);
     /** Static function to check if given info will lead to a valid configuration of @ref CLStackLayerKernel
      *
      * @note Supported input tensor rank: up to 4

diff --git a/arm_compute/runtime/CL/functions/CLStridedSlice.h b/arm_compute/runtime/CL/functions/CLStridedSlice.h
index 6bde2c0..bb2bc96 100644
--- a/arm_compute/runtime/CL/functions/CLStridedSlice.h
+++ b/arm_compute/runtime/CL/functions/CLStridedSlice.h

@@ -52,6 +52,24 @@
     void configure(const ICLTensor *input, ICLTensor *output,
                    const Coordinates &starts, const Coordinates &ends, const BiStrides &strides,
                    int32_t begin_mask = 0, int32_t end_mask = 0, int32_t shrink_axis_mask = 0);
+    /** Configure kernel
+     *
+     * @note Supported tensor rank: up to 4
+     *
+     * @param[in]  compile_context  The compile context to be used.
+     * @param[in]  input            Source tensor. Data type supported: All.
+     * @param[out] output           Destination tensor. Data type supported: Same as @p input
+     * @param[in]  starts           The starts of the dimensions of the input tensor to be sliced. The length must be of rank(input).
+     * @param[in]  ends             The ends of the dimensions of the input tensor to be sliced. The length must be of rank(input).
+     * @param[in]  strides          The strides of the dimensions of the input tensor to be sliced. The length must be of rank(input).
+     * @param[in]  begin_mask       (Optional) If the ith bit of begin_mask is set, starts[i] is ignored and the fullest possible range in that dimension is used instead.
+     * @param[in]  end_mask         (Optional) If the ith bit of end_mask is set, ends[i] is ignored and the fullest possible range in that dimension is used instead.
+     * @param[in]  shrink_axis_mask (Optional) If the ith bit of shrink_axis_mask is set, it implies that the ith specification shrinks the dimensionality by 1.
+     *                              A slice of size 1 starting from starts[i] in the dimension must be preserved.
+     */
+    void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output,
+                   const Coordinates &starts, const Coordinates &ends, const BiStrides &strides,
+                   int32_t begin_mask = 0, int32_t end_mask = 0, int32_t shrink_axis_mask = 0);
 
     /** Static function to check if given info will lead to a valid configuration of @ref CLStridedSlice
      *

diff --git a/arm_compute/runtime/CL/functions/CLTableLookup.h b/arm_compute/runtime/CL/functions/CLTableLookup.h
index c1b7b94..1c11f07 100644
--- a/arm_compute/runtime/CL/functions/CLTableLookup.h
+++ b/arm_compute/runtime/CL/functions/CLTableLookup.h

@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2019 ARM Limited.
+ * Copyright (c) 2016-2020 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -42,6 +42,14 @@
      * @param[out] output Output tensor. Data types supported: U8 and S16
      */
     void configure(const ICLTensor *input, const ICLLut *lut, ICLTensor *output);
+    /** Initialise the kernel's inputs and output
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  input           First tensor input. Data types supported: U8 and S16
+     * @param[in]  lut             Input lookup table. Data types supported: U8 and S16
+     * @param[out] output          Output tensor. Data types supported: U8 and S16
+     */
+    void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLLut *lut, ICLTensor *output);
 };
 }
 #endif /*ARM_COMPUTE_CLTABLELOOKUP_H */

diff --git a/arm_compute/runtime/CL/functions/CLThreshold.h b/arm_compute/runtime/CL/functions/CLThreshold.h
index a19b320..d8ae6fb 100644
--- a/arm_compute/runtime/CL/functions/CLThreshold.h
+++ b/arm_compute/runtime/CL/functions/CLThreshold.h

@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2019 ARM Limited.
+ * Copyright (c) 2016-2020 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -50,6 +50,20 @@
     void configure(const ICLTensor *input, ICLTensor *output, uint8_t threshold,
                    uint8_t false_value = 0, uint8_t true_value = 0,
                    ThresholdType type = ThresholdType::BINARY, uint8_t upper = 0);
+    /** Initialise the function's source, destination, thresholds and threshold type
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  input           First tensor input. Data types supported: U8.
+     * @param[out] output          Output tensor. Data types supported: U8.
+     * @param[in]  threshold       Threshold. If upper threshold is specified, this will be used as the lower threshold.
+     * @param[in]  false_value     Value to assign when the condition is false.
+     * @param[in]  true_value      value to assign when the condition is true.
+     * @param[in]  type            Thresholding type. Can either be BINARY or RANGE.
+     * @param[in]  upper           Upper threshold. Only used with RANGE thresholding
+     */
+    void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, uint8_t threshold,
+                   uint8_t false_value = 0, uint8_t true_value = 0,
+                   ThresholdType type = ThresholdType::BINARY, uint8_t upper = 0);
 };
 }
 #endif /*ARM_COMPUTE_CLTHRESHOLD_H */

diff --git a/arm_compute/runtime/CL/functions/CLTile.h b/arm_compute/runtime/CL/functions/CLTile.h
index 9c83b0c..0dad9ad 100644
--- a/arm_compute/runtime/CL/functions/CLTile.h
+++ b/arm_compute/runtime/CL/functions/CLTile.h

@@ -43,6 +43,14 @@
      * @param[out] output    Destination tensor. Same as @p input
      */
     void configure(const ICLTensor *input, ICLTensor *output, const Multiples &multiples);
+    /** Set the source, destination of the kernel
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  input           Source tensor. Data type supported: All.
+     * @param[in]  multiples       Contains the number of times the input tensor should be replicated on the given dimension.
+     * @param[out] output          Destination tensor. Same as @p input
+     */
+    void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const Multiples &multiples);
     /** Static function to check if given info will lead to a valid configuration of @ref CLTile
      *
      * @param[in] input     Source tensor info. Data type supported: All.

diff --git a/arm_compute/runtime/CL/functions/CLTranspose.h b/arm_compute/runtime/CL/functions/CLTranspose.h
index 61092a1..b2fdcda 100644
--- a/arm_compute/runtime/CL/functions/CLTranspose.h
+++ b/arm_compute/runtime/CL/functions/CLTranspose.h

@@ -44,6 +44,13 @@
      * @param[out] output Output tensor. Data type supported: Same as @p input
      */
     void configure(const ICLTensor *input, ICLTensor *output);
+    /** Initialise the kernel's inputs and output
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  input           Input tensor. Data types supported: All.
+     * @param[out] output          Output tensor. Data type supported: Same as @p input
+     */
+    void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output);
     /** Static function to check if given info will lead to a valid configuration of @ref CLTranspose
      *
      * @param[in] input  The input tensor. Data types supported: All.

diff --git a/arm_compute/runtime/CL/functions/CLUnstack.h b/arm_compute/runtime/CL/functions/CLUnstack.h
index 814d073..777da69 100644
--- a/arm_compute/runtime/CL/functions/CLUnstack.h
+++ b/arm_compute/runtime/CL/functions/CLUnstack.h

@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2019 ARM Limited.
+ * Copyright (c) 2018-2020 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -55,6 +55,16 @@
      *
      */
     void configure(const ICLTensor *input, const std::vector<ICLTensor *> &output_vector, int axis);
+    /** Set the input, output and unstacking axis.
+     *
+     * @param[in]     compile_context The compile context to be used.
+     * @param[in]     input           A tensor to be unstacked. Data type supported: U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32.
+     * @param[in,out] output_vector   A vector of tensors. Data types supported: Same as @p input.
+     *                                Note: The number of elements of the vector will be used as the number of slices to be taken from the axis.
+     * @param[in]     axis            The axis to unstack along. Valid values are [-R,R) where R is the input's rank. Negative values wrap around.
+     *
+     */
+    void configure(const CLCompileContext &compile_context, const ICLTensor *input, const std::vector<ICLTensor *> &output_vector, int axis);
     /** Static function to check if given info will lead to a valid configuration of @ref CLUnstack
      *
      * @param[in] input         Input tensor info. Data type supported: U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32

diff --git a/arm_compute/runtime/CL/functions/CLUpsampleLayer.h b/arm_compute/runtime/CL/functions/CLUpsampleLayer.h
index 1695fd7..5f4f57f 100644
--- a/arm_compute/runtime/CL/functions/CLUpsampleLayer.h
+++ b/arm_compute/runtime/CL/functions/CLUpsampleLayer.h

@@ -60,6 +60,16 @@
      */
     void configure(ICLTensor *input, ICLTensor *output,
                    const Size2D &info, const InterpolationPolicy upsampling_policy);
+    /** Initialize the function's source, destination, interpolation type and border_mode.
+     *
+     * @param[in]  compile_context   The compile context to be used.
+     * @param[in]  input             Source tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
+     * @param[out] output            Destination tensor. Data types supported: same as @p input.
+     * @param[in]  info              Contains stride information described in @ref Size2D.
+     * @param[in]  upsampling_policy Defines the policy to fill the intermediate pixels.
+     */
+    void configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output,
+                   const Size2D &info, const InterpolationPolicy upsampling_policy);
     /** Static function to check if given info will lead to a valid configuration of @ref CLDeconvolutionLayerUpsample
      *
      * @param[in] input             Source tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.

diff --git a/arm_compute/runtime/CL/functions/CLWarpAffine.h b/arm_compute/runtime/CL/functions/CLWarpAffine.h
index 2de7107..1a2fe9d 100644
--- a/arm_compute/runtime/CL/functions/CLWarpAffine.h
+++ b/arm_compute/runtime/CL/functions/CLWarpAffine.h

@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2019 ARM Limited.
+ * Copyright (c) 2016-2020 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -48,6 +48,19 @@
      * @param[in]     constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
      */
     void configure(ICLTensor *input, ICLTensor *output, const std::array<float, 9> &matrix, InterpolationPolicy policy, BorderMode border_mode, uint8_t constant_border_value = 0);
+    /** Initialize the function's source, destination, interpolation policy and border_mode.
+     *
+     * @param[in]     compile_context       The compile context to be used.
+     * @param[in,out] input                 Source temspr. Data types supported: U8. (Written to only for @p border_mode != UNDEFINED)
+     * @param[out]    output                Destination tensor, Data types supported: U8.
+     * @param[in]     matrix                The affine matrix. Must be 2x3 of type float.
+     *                                      The matrix argument requires 9 values, the last 3 values are ignored.
+     * @param[in]     policy                The interpolation type.
+     * @param[in]     border_mode           Strategy to use for borders.
+     * @param[in]     constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
+     */
+    void configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, const std::array<float, 9> &matrix, InterpolationPolicy policy, BorderMode border_mode,
+                   uint8_t constant_border_value = 0);
 };
 }
 #endif /*ARM_COMPUTE_CLWARPAFFINE_H */

diff --git a/arm_compute/runtime/CL/functions/CLWarpPerspective.h b/arm_compute/runtime/CL/functions/CLWarpPerspective.h
index 93fcc85..5db9ec4 100644
--- a/arm_compute/runtime/CL/functions/CLWarpPerspective.h
+++ b/arm_compute/runtime/CL/functions/CLWarpPerspective.h

@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2019 ARM Limited.
+ * Copyright (c) 2016-2020 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -47,6 +47,18 @@
      * @param[in]     constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
      */
     void configure(ICLTensor *input, ICLTensor *output, const std::array<float, 9> &matrix, InterpolationPolicy policy, BorderMode border_mode, uint8_t constant_border_value = 0);
+    /** Initialize the function's source, destination, interpolation policy and border_mode.
+     *
+     * @param[in]     compile_context       The compile context to be used.
+     * @param[in,out] input                 Source tensor. Data types supported: U8. (Written to only for @p border_mode != UNDEFINED)
+     * @param[out]    output                Destination tensor. Data types supported: U8.
+     * @param[in]     matrix                The perspective matrix. Must be 3x3 of type float.
+     * @param[in]     policy                The interpolation type.
+     * @param[in]     border_mode           Strategy to use for borders.
+     * @param[in]     constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
+     */
+    void configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, const std::array<float, 9> &matrix, InterpolationPolicy policy, BorderMode border_mode,
+                   uint8_t constant_border_value = 0);
 };
 }
 #endif /*ARM_COMPUTE_CLWARPPERSPECTIVE_H */

diff --git a/arm_compute/runtime/CL/functions/CLWinogradConvolutionLayer.h b/arm_compute/runtime/CL/functions/CLWinogradConvolutionLayer.h
index 7ac59c9..c1de5f1 100644
--- a/arm_compute/runtime/CL/functions/CLWinogradConvolutionLayer.h
+++ b/arm_compute/runtime/CL/functions/CLWinogradConvolutionLayer.h

@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2019 ARM Limited.
+ * Copyright (c) 2018-2020 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -75,6 +75,26 @@
      */
     void configure(ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info,
                    const ActivationLayerInfo &act_info = ActivationLayerInfo(), bool enable_fast_math = false);
+    /** Set the input and output tensors.
+     *
+     * @note: This function only works with 3x3,3x1,1x3,5x5,5x1,1x5,7x1 and 1x7 kernels along with unit strides for both NCHW and NHWC data layout
+     * @note  Some Winograd configurations (i.e. F(4x4, 5x5)) are supported only with enable_fast_math = true
+     *
+     * @param[in]  compile_context  The compile context to be used.
+     * @param[in]  input            Source tensor. 3 lower dimensions represent a single input [width, height, IFM],
+     *                              while every optional dimension from 4 and above represent a batch of inputs.
+     *                              Data types supported: F16/F32.
+     * @param[in]  weights          Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. Data type supported:Same as @p input.
+     * @param[in]  biases           Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM].Data type supported: Same as @p input
+     * @param[out] output           Destination tensor. 3 lower dimensions represent a single output [width, height, OFM], while the rest represent batch of outputs.
+     *                              Data types supported: Same as @p input.
+     * @param[in]  conv_info        Contains padding and stride information described in @ref PadStrideInfo.
+     * @param[in]  act_info         (Optional) Activation layer information in case of a fused activation.
+     * @param[in]  enable_fast_math (Optional) Enable fast math computation. In case this flag were set, the function could dispatch the fastest implementation
+     *                              available which may introduce a drop of accuracy as well. Default is false
+     */
+    void configure(const CLCompileContext &compile_context, ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info,
+                   const ActivationLayerInfo &act_info = ActivationLayerInfo(), bool enable_fast_math = false);
     /** Static function to check if given info will lead to a valid configuration of @ref CLWinogradConvolutionLayer
      *
      * @note: This function only works with 3x3,3x1,1x3,5x5,5x1 and 1x5 kernels along with unit strides for both NCHW and NHWC data layout

diff --git a/arm_compute/runtime/CL/functions/CLWinogradInputTransform.h b/arm_compute/runtime/CL/functions/CLWinogradInputTransform.h
index e1ab928..11a402e 100644
--- a/arm_compute/runtime/CL/functions/CLWinogradInputTransform.h
+++ b/arm_compute/runtime/CL/functions/CLWinogradInputTransform.h

@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2019 ARM Limited.
+ * Copyright (c) 2018-2020 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -55,6 +55,25 @@
      * @param[in] winograd_info Contains Winograd's information described in @ref WinogradInfo.
      */
     void configure(ICLTensor *input, ICLTensor *output, const WinogradInfo &winograd_info);
+    /** Set the input and output tensors.
+     *
+     * @note Winograd input transform supports the following configurations for NCWH data layout
+     *       F(output tile, kernel size):F(2x2, 3x3), F(2x1, 3x1), F(1x2, 1x3),
+     *                                   F(4x4, 3x3), F(4x1, 3x1), F(1x4, 1x3),
+     *                                   F(4x4, 5x5), F(4x1, 5x1), F(1x4, 1x5)
+     *
+     * @note Winograd input transform supports the following configurations for NHWC data layout
+     *       F(output tile, kernel size):F(4x4, 3x3), F(4x1, 3x1), F(1x4, 1x3),
+     *                                   F(4x4, 5x5), F(4x1, 5x1), F(1x4, 1x5)
+     *
+     *       Strides: only unit strides
+     *
+     * @param[in] compile_context The compile context to be used.
+     * @param[in] input           The input tensor to transform. Data types supported: F16,F32
+     * @param[in] output          The output tensor. The shape for this tensor can be calculated using the utility function @p compute_winograd_input_transform_shape. Data types supported: Same as @p input
+     * @param[in] winograd_info   Contains Winograd's information described in @ref WinogradInfo.
+     */
+    void configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, const WinogradInfo &winograd_info);
     /**  Static function to check if given info will lead to a valid configuration of @ref CLWinogradInputTransform.
      *
      * @note Winograd input transform supports the following configurations for NCWH data layout

diff --git a/arm_compute/runtime/CL/functions/CLYOLOLayer.h b/arm_compute/runtime/CL/functions/CLYOLOLayer.h
index 95c684b..e70d84b 100644
--- a/arm_compute/runtime/CL/functions/CLYOLOLayer.h
+++ b/arm_compute/runtime/CL/functions/CLYOLOLayer.h

@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2019 ARM Limited.
+ * Copyright (c) 2018-2020 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -53,6 +53,18 @@
      * @param[in]      num_classes Number of classes to activate (must be submultiple of @p input channels)
      */
     void configure(ICLTensor *input, ICLTensor *output, const ActivationLayerInfo &act_info, int32_t num_classes);
+    /** Set the input and output tensor.
+     *
+     * @note If the output tensor is a nullptr or is equal to the input, the activation function will be performed in-place
+     *
+     * @param[in]      compile_context The compile context to be used.
+     * @param[in, out] input           Source tensor. In case of @p output tensor = nullptr, this tensor will store the result
+     *                                 of the activation function. Data types supported: F16/F32.
+     * @param[out]     output          Destination tensor. Data type supported: same as @p input
+     * @param[in]      act_info        Activation layer parameters.
+     * @param[in]      num_classes     Number of classes to activate (must be submultiple of @p input channels)
+     */
+    void configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, const ActivationLayerInfo &act_info, int32_t num_classes);
     /** Static function to check if given info will lead to a valid configuration of @ref CLYOLOLayer
      *
      * @param[in] input       Source tensor info. In case of @p output tensor info = nullptr, this tensor will store the result
commit	2b84be544e4a27f7e8e80827e9c85c8f0d58b4ce	[log] [tgz]
author	Manuel Bottini <manuel.bottini@arm.com>	Wed Apr 08 10:15:51 2020 +0100
committer	Manuel Bottini <manuel.bottini@arm.com>	Thu Apr 23 17:53:59 2020 +0000
tree	078051a911f9b8883a3f11955cfd3b7ba0d7d9f3
parent	0de45d0a8009e19331c4e29d617fa183167c513a [diff]