Provide logging for configure functions in all CL functions

Partially Resolves: COMPMID-4718
Signed-off-by: Ramy Elgammal <ramy.elgammal@arm.com>
Change-Id: I59943081de0f3105bdbfc78af53971a0de8cd53e
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/6332
Reviewed-by: Pablo Marquez Tello <pablo.tello@arm.com>
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
diff --git a/src/gpu/cl/operators/ClElementwiseOperations.cpp b/src/gpu/cl/operators/ClElementwiseOperations.cpp
index 2525041..32d2b88 100644
--- a/src/gpu/cl/operators/ClElementwiseOperations.cpp
+++ b/src/gpu/cl/operators/ClElementwiseOperations.cpp
@@ -46,6 +46,7 @@
 
 void ClElementwiseMax::configure(const ClCompileContext &compile_context, ITensorInfo *src1, ITensorInfo *src2, ITensorInfo *dst, const ActivationLayerInfo &act_info)
 {
+    ARM_COMPUTE_LOG_PARAMS(src1, src2, dst, act_info);
     auto k = std::make_unique<kernels::ClArithmeticKernel>();
     k->configure(compile_context, ArithmeticOperation::MAX, src1, src2, dst, act_info);
     _kernel = std::move(k);
@@ -58,6 +59,7 @@
 
 void ClElementwiseMin::configure(const ClCompileContext &compile_context, ITensorInfo *src1, ITensorInfo *src2, ITensorInfo *dst, const ActivationLayerInfo &act_info)
 {
+    ARM_COMPUTE_LOG_PARAMS(src1, src2, dst, act_info);
     auto k = std::make_unique<kernels::ClArithmeticKernel>();
     k->configure(compile_context, ArithmeticOperation::MIN, src1, src2, dst, act_info);
     _kernel = std::move(k);
@@ -70,6 +72,7 @@
 
 void ClElementwiseSquaredDiff::configure(const ClCompileContext &compile_context, ITensorInfo *src1, ITensorInfo *src2, ITensorInfo *dst, const ActivationLayerInfo &act_info)
 {
+    ARM_COMPUTE_LOG_PARAMS(src1, src2, dst, act_info);
     auto k = std::make_unique<kernels::ClArithmeticKernel>();
     k->configure(compile_context, ArithmeticOperation::SQUARED_DIFF, src1, src2, dst, act_info);
     _kernel = std::move(k);
@@ -82,6 +85,7 @@
 
 void ClElementwisePower::configure(const ClCompileContext &compile_context, ITensorInfo *src1, ITensorInfo *src2, ITensorInfo *dst, const ActivationLayerInfo &act_info)
 {
+    ARM_COMPUTE_LOG_PARAMS(src1, src2, dst, act_info);
     auto k = std::make_unique<kernels::ClArithmeticKernel>();
     k->configure(compile_context, ArithmeticOperation::POWER, src1, src2, dst, act_info);
     _kernel = std::move(k);
diff --git a/src/gpu/cl/operators/ClElementwiseUnary.cpp b/src/gpu/cl/operators/ClElementwiseUnary.cpp
index 270769b..f94d402 100644
--- a/src/gpu/cl/operators/ClElementwiseUnary.cpp
+++ b/src/gpu/cl/operators/ClElementwiseUnary.cpp
@@ -46,6 +46,7 @@
 
 void ClExp::configure(const ClCompileContext &compile_context, const ITensorInfo *src, ITensorInfo *dst)
 {
+    ARM_COMPUTE_LOG_PARAMS(src, dst);
     auto k = std::make_unique<kernels::ClElementWiseUnaryKernel>();
     k->configure(compile_context, src, dst, ElementWiseUnary::EXP);
     _kernel = std::move(k);
@@ -58,6 +59,7 @@
 
 void ClNeg::configure(const ClCompileContext &compile_context, const ITensorInfo *src, ITensorInfo *dst)
 {
+    ARM_COMPUTE_LOG_PARAMS(src, dst);
     auto k = std::make_unique<kernels::ClElementWiseUnaryKernel>();
     k->configure(compile_context, src, dst, ElementWiseUnary::NEG);
     _kernel = std::move(k);
@@ -70,6 +72,7 @@
 
 void ClSin::configure(const ClCompileContext &compile_context, const ITensorInfo *src, ITensorInfo *dst)
 {
+    ARM_COMPUTE_LOG_PARAMS(src, dst);
     auto k = std::make_unique<kernels::ClElementWiseUnaryKernel>();
     k->configure(compile_context, src, dst, ElementWiseUnary::SIN);
     _kernel = std::move(k);
@@ -82,6 +85,7 @@
 
 void ClAbs::configure(const ClCompileContext &compile_context, const ITensorInfo *src, ITensorInfo *dst)
 {
+    ARM_COMPUTE_LOG_PARAMS(src, dst);
     auto k = std::make_unique<kernels::ClElementWiseUnaryKernel>();
     k->configure(compile_context, src, dst, ElementWiseUnary::ABS);
     _kernel = std::move(k);
@@ -94,6 +98,7 @@
 
 void ClLog::configure(const ClCompileContext &compile_context, const ITensorInfo *src, ITensorInfo *dst)
 {
+    ARM_COMPUTE_LOG_PARAMS(src, dst);
     auto k = std::make_unique<kernels::ClElementWiseUnaryKernel>();
     k->configure(compile_context, src, dst, ElementWiseUnary::LOG);
     _kernel = std::move(k);
@@ -106,6 +111,7 @@
 
 void ClRound::configure(const ClCompileContext &compile_context, const ITensorInfo *src, ITensorInfo *dst)
 {
+    ARM_COMPUTE_LOG_PARAMS(src, dst);
     auto k = std::make_unique<kernels::ClElementWiseUnaryKernel>();
     k->configure(compile_context, src, dst, ElementWiseUnary::ROUND);
     _kernel = std::move(k);
diff --git a/src/runtime/CL/functions/CLArgMinMaxLayer.cpp b/src/runtime/CL/functions/CLArgMinMaxLayer.cpp
index 8c32563..1b0a86a 100644
--- a/src/runtime/CL/functions/CLArgMinMaxLayer.cpp
+++ b/src/runtime/CL/functions/CLArgMinMaxLayer.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2020 Arm Limited.
+ * Copyright (c) 2018-2021 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -34,6 +34,8 @@
 #include "src/core/helpers/AutoConfiguration.h"
 #include "src/runtime/Utils.h"
 
+#include "src/common/utils/Log.h"
+
 namespace arm_compute
 {
 CLArgMinMaxLayer::CLArgMinMaxLayer(std::shared_ptr<IMemoryManager> memory_manager)
@@ -119,6 +121,8 @@
 void CLArgMinMaxLayer::configure(const CLCompileContext &compile_context, const ICLTensor *input, int axis, ICLTensor *output, const ReductionOperation &op)
 {
     ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
+    ARM_COMPUTE_LOG_PARAMS(input, axis, output, op);
+
     _num_of_stages  = utils::calculate_number_of_stages_only_x_axis(input->info()->dimension(0), axis);
     _reduction_axis = axis;
 
diff --git a/src/runtime/CL/functions/CLBatchNormalizationLayer.cpp b/src/runtime/CL/functions/CLBatchNormalizationLayer.cpp
index 6b76da8..234a0df 100644
--- a/src/runtime/CL/functions/CLBatchNormalizationLayer.cpp
+++ b/src/runtime/CL/functions/CLBatchNormalizationLayer.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2020 Arm Limited.
+ * Copyright (c) 2017-2021 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -29,6 +29,7 @@
 #include "arm_compute/core/Types.h"
 #include "arm_compute/core/Validate.h"
 #include "arm_compute/runtime/CL/CLScheduler.h"
+#include "src/common/utils/Log.h"
 
 #include "src/core/CL/kernels/CLBatchNormalizationLayerKernel.h"
 
@@ -51,6 +52,7 @@
                                           const ICLTensor *gamma, float epsilon,
                                           ActivationLayerInfo act_info)
 {
+    ARM_COMPUTE_LOG_PARAMS(input, output, mean, var, beta, gamma, epsilon, act_info);
     _norm_kernel->configure(compile_context, input, output, mean, var, beta, gamma, epsilon, act_info);
 }
 
diff --git a/src/runtime/CL/functions/CLBatchToSpaceLayer.cpp b/src/runtime/CL/functions/CLBatchToSpaceLayer.cpp
index c2fdb74..a7691aa 100644
--- a/src/runtime/CL/functions/CLBatchToSpaceLayer.cpp
+++ b/src/runtime/CL/functions/CLBatchToSpaceLayer.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2020 Arm Limited.
+ * Copyright (c) 2018-2021 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -32,6 +32,8 @@
 
 #include "src/core/CL/kernels/CLBatchToSpaceLayerKernel.h"
 
+#include "src/common/utils/Log.h"
+
 namespace arm_compute
 {
 CLBatchToSpaceLayer::CLBatchToSpaceLayer()
@@ -48,6 +50,7 @@
 
 void CLBatchToSpaceLayer::configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *block_shape, ICLTensor *output)
 {
+    ARM_COMPUTE_LOG_PARAMS(input, block_shape, output);
     _batch_to_space_kernel->configure(compile_context, input, block_shape, output);
 }
 
@@ -58,6 +61,7 @@
 
 void CLBatchToSpaceLayer::configure(const CLCompileContext &compile_context, const ICLTensor *input, int32_t block_shape_x, int32_t block_shape_y, ICLTensor *output)
 {
+    ARM_COMPUTE_LOG_PARAMS(input, block_shape_x, block_shape_y, output);
     _batch_to_space_kernel->configure(compile_context, input, block_shape_x, block_shape_y, output);
 }
 
diff --git a/src/runtime/CL/functions/CLBitwiseAnd.cpp b/src/runtime/CL/functions/CLBitwiseAnd.cpp
index 70e27c0..a4712ed 100644
--- a/src/runtime/CL/functions/CLBitwiseAnd.cpp
+++ b/src/runtime/CL/functions/CLBitwiseAnd.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2020 Arm Limited.
+ * Copyright (c) 2016-2021 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -25,6 +25,8 @@
 
 #include "src/core/CL/kernels/CLBitwiseKernel.h"
 
+#include "src/common/utils/Log.h"
+
 #include <utility>
 
 namespace arm_compute
@@ -36,6 +38,7 @@
 
 void CLBitwiseAnd::configure(const CLCompileContext &compile_context, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output)
 {
+    ARM_COMPUTE_LOG_PARAMS(input1, input2, output);
     auto k = std::make_unique<CLBitwiseKernel>();
     k->configure(compile_context, input1, input2, output, BitwiseOperation::AND);
     _kernel = std::move(k);
diff --git a/src/runtime/CL/functions/CLBitwiseNot.cpp b/src/runtime/CL/functions/CLBitwiseNot.cpp
index 7970a16..5964b92 100644
--- a/src/runtime/CL/functions/CLBitwiseNot.cpp
+++ b/src/runtime/CL/functions/CLBitwiseNot.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2020 Arm Limited.
+ * Copyright (c) 2016-2021 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -25,6 +25,8 @@
 
 #include "src/core/CL/kernels/CLBitwiseKernel.h"
 
+#include "src/common/utils/Log.h"
+
 #include <utility>
 
 namespace arm_compute
@@ -36,6 +38,7 @@
 
 void CLBitwiseNot::configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output)
 {
+    ARM_COMPUTE_LOG_PARAMS(input, output);
     auto k = std::make_unique<CLBitwiseKernel>();
     k->configure(compile_context, input, nullptr, output, BitwiseOperation::NOT);
     _kernel = std::move(k);
diff --git a/src/runtime/CL/functions/CLBitwiseOr.cpp b/src/runtime/CL/functions/CLBitwiseOr.cpp
index fbda9ad..a07bf17 100644
--- a/src/runtime/CL/functions/CLBitwiseOr.cpp
+++ b/src/runtime/CL/functions/CLBitwiseOr.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2020 Arm Limited.
+ * Copyright (c) 2016-2021 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -25,6 +25,8 @@
 
 #include "src/core/CL/kernels/CLBitwiseKernel.h"
 
+#include "src/common/utils/Log.h"
+
 #include <utility>
 
 namespace arm_compute
@@ -36,6 +38,7 @@
 
 void CLBitwiseOr::configure(const CLCompileContext &compile_context, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output)
 {
+    ARM_COMPUTE_LOG_PARAMS(input1, input2, output);
     auto k = std::make_unique<CLBitwiseKernel>();
     k->configure(compile_context, input1, input2, output, BitwiseOperation::OR);
     _kernel = std::move(k);
diff --git a/src/runtime/CL/functions/CLBitwiseXor.cpp b/src/runtime/CL/functions/CLBitwiseXor.cpp
index 4f4b74c..f65e2e4 100644
--- a/src/runtime/CL/functions/CLBitwiseXor.cpp
+++ b/src/runtime/CL/functions/CLBitwiseXor.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2020 Arm Limited.
+ * Copyright (c) 2016-2021 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -25,6 +25,8 @@
 
 #include "src/core/CL/kernels/CLBitwiseKernel.h"
 
+#include "src/common/utils/Log.h"
+
 #include <utility>
 
 namespace arm_compute
@@ -36,8 +38,9 @@
 
 void CLBitwiseXor::configure(const CLCompileContext &compile_context, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output)
 {
+    ARM_COMPUTE_LOG_PARAMS(input1, input2, output);
     auto k = std::make_unique<CLBitwiseKernel>();
     k->configure(compile_context, input1, input2, output, BitwiseOperation::XOR);
     _kernel = std::move(k);
 }
-} // namespace arm_compute
\ No newline at end of file
+} // namespace arm_compute
diff --git a/src/runtime/CL/functions/CLBoundingBoxTransform.cpp b/src/runtime/CL/functions/CLBoundingBoxTransform.cpp
index 0dade0a..48583bf 100644
--- a/src/runtime/CL/functions/CLBoundingBoxTransform.cpp
+++ b/src/runtime/CL/functions/CLBoundingBoxTransform.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2020 Arm Limited.
+ * Copyright (c) 2018-2021 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -25,6 +25,8 @@
 
 #include "src/core/CL/kernels/CLBoundingBoxTransformKernel.h"
 
+#include "src/common/utils/Log.h"
+
 namespace arm_compute
 {
 void CLBoundingBoxTransform::configure(const ICLTensor *boxes, ICLTensor *pred_boxes, const ICLTensor *deltas, const BoundingBoxTransformInfo &info)
@@ -34,6 +36,8 @@
 
 void CLBoundingBoxTransform::configure(const CLCompileContext &compile_context, const ICLTensor *boxes, ICLTensor *pred_boxes, const ICLTensor *deltas, const BoundingBoxTransformInfo &info)
 {
+    ARM_COMPUTE_LOG_PARAMS(boxes, pred_boxes, deltas, info);
+
     // Configure Bounding Box kernel
     auto k = std::make_unique<CLBoundingBoxTransformKernel>();
     k->configure(compile_context, boxes, pred_boxes, deltas, info);
diff --git a/src/runtime/CL/functions/CLCast.cpp b/src/runtime/CL/functions/CLCast.cpp
index f9403af..10f7cc2 100644
--- a/src/runtime/CL/functions/CLCast.cpp
+++ b/src/runtime/CL/functions/CLCast.cpp
@@ -29,6 +29,8 @@
 #include "src/core/CL/ICLKernel.h"
 #include "src/gpu/cl/operators/ClCast.h"
 
+#include "src/common/utils/Log.h"
+
 #include <utility>
 
 namespace arm_compute
@@ -56,6 +58,7 @@
 void CLCast::configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, ConvertPolicy policy)
 {
     ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
+    ARM_COMPUTE_LOG_PARAMS(input, output, policy);
 
     _impl->src = input;
     _impl->dst = output;
diff --git a/src/runtime/CL/functions/CLChannelShuffleLayer.cpp b/src/runtime/CL/functions/CLChannelShuffleLayer.cpp
index c6af5a0..021f28f 100644
--- a/src/runtime/CL/functions/CLChannelShuffleLayer.cpp
+++ b/src/runtime/CL/functions/CLChannelShuffleLayer.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2020 Arm Limited.
+ * Copyright (c) 2018-2021 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -26,6 +26,8 @@
 #include "arm_compute/core/Types.h"
 #include "src/core/CL/kernels/CLChannelShuffleLayerKernel.h"
 
+#include "src/common/utils/Log.h"
+
 namespace arm_compute
 {
 void CLChannelShuffleLayer::configure(const ICLTensor *input, ICLTensor *output, unsigned int num_groups)
@@ -35,6 +37,7 @@
 
 void CLChannelShuffleLayer::configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, unsigned int num_groups)
 {
+    ARM_COMPUTE_LOG_PARAMS(input, output, num_groups);
     auto k = std::make_unique<CLChannelShuffleLayerKernel>();
     k->configure(compile_context, input, output, num_groups);
     _kernel = std::move(k);
diff --git a/src/runtime/CL/functions/CLComparison.cpp b/src/runtime/CL/functions/CLComparison.cpp
index 4122928..192a266 100644
--- a/src/runtime/CL/functions/CLComparison.cpp
+++ b/src/runtime/CL/functions/CLComparison.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2020 Arm Limited.
+ * Copyright (c) 2018-2021 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -28,6 +28,8 @@
 #include "src/core/CL/kernels/CLComparisonKernel.h"
 #include "src/core/CL/kernels/CLFillBorderKernel.h"
 
+#include "src/common/utils/Log.h"
+
 namespace arm_compute
 {
 void CLComparison::configure(ICLTensor *input1, ICLTensor *input2, ICLTensor *output, ComparisonOperation operation)
@@ -37,6 +39,7 @@
 
 void CLComparison::configure(const CLCompileContext &compile_context, ICLTensor *input1, ICLTensor *input2, ICLTensor *output, ComparisonOperation operation)
 {
+    ARM_COMPUTE_LOG_PARAMS(input2, input2, output, operation);
     auto k = std::make_unique<CLComparisonKernel>();
     k->configure(compile_context, input1, input2, output, operation);
     _kernel = std::move(k);
diff --git a/src/runtime/CL/functions/CLConcatenateLayer.cpp b/src/runtime/CL/functions/CLConcatenateLayer.cpp
index 8ab50be..0a8884f 100644
--- a/src/runtime/CL/functions/CLConcatenateLayer.cpp
+++ b/src/runtime/CL/functions/CLConcatenateLayer.cpp
@@ -27,6 +27,8 @@
 #include "src/core/CL/ICLKernel.h"
 #include "src/gpu/cl/operators/ClConcatenate.h"
 
+#include "src/common/utils/Log.h"
+
 namespace arm_compute
 {
 struct CLConcatenateLayer::Impl
@@ -57,6 +59,7 @@
 void CLConcatenateLayer::configure(const CLCompileContext &compile_context, std::vector<const ICLTensor *> &inputs_vector, ICLTensor *output, size_t axis)
 {
     ARM_COMPUTE_ERROR_ON(output == nullptr);
+    ARM_COMPUTE_LOG_PARAMS(inputs_vector, output, axis);
 
     _impl->srcs       = inputs_vector;
     _impl->dst        = output;
diff --git a/src/runtime/CL/functions/CLConvertFullyConnectedWeights.cpp b/src/runtime/CL/functions/CLConvertFullyConnectedWeights.cpp
index 7780c0a..b3efe5c 100644
--- a/src/runtime/CL/functions/CLConvertFullyConnectedWeights.cpp
+++ b/src/runtime/CL/functions/CLConvertFullyConnectedWeights.cpp
@@ -30,6 +30,8 @@
 #include "src/core/CL/ICLKernel.h"
 #include "src/gpu/cl/operators/ClConvertFullyConnectedWeights.h"
 
+#include "src/common/utils/Log.h"
+
 namespace arm_compute
 {
 struct CLConvertFullyConnectedWeights::Impl
@@ -54,6 +56,7 @@
                                                DataLayout data_layout)
 {
     ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
+    ARM_COMPUTE_LOG_PARAMS(input, output, original_input_shape, data_layout);
     _impl->src = input;
     _impl->dst = output;
     _impl->op  = std::make_unique<opencl::ClConvertFullyConnectedWeights>();
@@ -74,4 +77,4 @@
     _impl->op->run(pack);
 }
 
-} // namespace arm_compute
\ No newline at end of file
+} // namespace arm_compute
diff --git a/src/runtime/CL/functions/CLConvolutionLayer.cpp b/src/runtime/CL/functions/CLConvolutionLayer.cpp
index 1f715d2..eaca6ee 100644
--- a/src/runtime/CL/functions/CLConvolutionLayer.cpp
+++ b/src/runtime/CL/functions/CLConvolutionLayer.cpp
@@ -31,6 +31,8 @@
 #include "src/core/CL/ICLKernel.h"
 #include "src/core/helpers/MemoryHelpers.h"
 #include "src/gpu/cl/operators/ClConv2d.h"
+
+#include "src/common/utils/Log.h"
 #include "support/Cast.h"
 
 namespace arm_compute
@@ -70,6 +72,7 @@
     ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, output);
     ARM_COMPUTE_ERROR_THROW_ON(CLConvolutionLayer::validate(input->info(), weights->info(), ((biases != nullptr) ? biases->info() : nullptr), output->info(), conv_info, weights_info, dilation, act_info,
                                                             enable_fast_math, num_groups));
+    ARM_COMPUTE_LOG_PARAMS(input, weights, biases, output, conv_info, weights_info, dilation, act_info, enable_fast_math, num_groups);
 
     const Conv2dInfo conv2d_info = Conv2dInfo(conv_info, dilation, act_info, enable_fast_math, num_groups);
 
@@ -176,4 +179,4 @@
         release_temporaries(_impl->aux_mem_req, _impl->workspace);
     }
 }
-} // namespace arm_compute
\ No newline at end of file
+} // namespace arm_compute
diff --git a/src/runtime/CL/functions/CLCopy.cpp b/src/runtime/CL/functions/CLCopy.cpp
index e8aaf85..56400b6 100644
--- a/src/runtime/CL/functions/CLCopy.cpp
+++ b/src/runtime/CL/functions/CLCopy.cpp
@@ -30,6 +30,8 @@
 #include "src/core/CL/ICLKernel.h"
 #include "src/gpu/cl/operators/ClCopy.h"
 
+#include "src/common/utils/Log.h"
+
 #include <utility>
 
 namespace arm_compute
@@ -57,6 +59,7 @@
 void CLCopy::configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, Window *dst_window)
 {
     ARM_COMPUTE_ERROR_ON_NULLPTR(input);
+    ARM_COMPUTE_LOG_PARAMS(input, output, dst_window);
 
     _impl->src = input;
     _impl->dst = output;
diff --git a/src/runtime/CL/functions/CLCrop.cpp b/src/runtime/CL/functions/CLCrop.cpp
index ff30837..35ea17c 100644
--- a/src/runtime/CL/functions/CLCrop.cpp
+++ b/src/runtime/CL/functions/CLCrop.cpp
@@ -30,6 +30,8 @@
 #include "src/core/CL/ICLKernel.h"
 #include "src/gpu/cl/operators/ClCrop.h"
 
+#include "src/common/utils/Log.h"
+
 #include <utility>
 
 namespace arm_compute
@@ -59,6 +61,7 @@
                        Window *dst_window)
 {
     ARM_COMPUTE_ERROR_ON_NULLPTR(src, dst);
+    ARM_COMPUTE_LOG_PARAMS(src, dst, start, end, batch_index, extrapolation_value, dst_window);
 
     _impl->src = src;
     _impl->dst = dst;
diff --git a/src/runtime/CL/functions/CLCropResize.cpp b/src/runtime/CL/functions/CLCropResize.cpp
index 77c44d5..d8fc38d 100644
--- a/src/runtime/CL/functions/CLCropResize.cpp
+++ b/src/runtime/CL/functions/CLCropResize.cpp
@@ -29,6 +29,8 @@
 #include "src/core/helpers/AutoConfiguration.h"
 #include "src/core/helpers/WindowHelpers.h"
 
+#include "src/common/utils/Log.h"
+
 #include <cstddef>
 
 namespace arm_compute
@@ -92,6 +94,7 @@
 {
     ARM_COMPUTE_ERROR_ON_NULLPTR(input, output, boxes, box_ind);
     ARM_COMPUTE_ERROR_THROW_ON(CLCropResize::validate(input->info(), boxes->info(), box_ind->info(), output->info(), crop_size, method, extrapolation_value));
+    ARM_COMPUTE_LOG_PARAMS(input, boxes, box_ind, output, crop_size, method, extrapolation_value);
 
     TensorShape output_shape = TensorShape(input->info()->tensor_shape()[0], crop_size.x, crop_size.y, boxes->info()->tensor_shape()[1]);
     auto_init_if_empty(*output->info(), output_shape, 1, DataType::F32);
@@ -291,4 +294,4 @@
     }
     CLScheduler::get().sync();
 }
-} // namespace arm_compute
\ No newline at end of file
+} // namespace arm_compute
diff --git a/src/runtime/CL/functions/CLDeconvolutionLayer.cpp b/src/runtime/CL/functions/CLDeconvolutionLayer.cpp
index 8038a75..c348bfc 100644
--- a/src/runtime/CL/functions/CLDeconvolutionLayer.cpp
+++ b/src/runtime/CL/functions/CLDeconvolutionLayer.cpp
@@ -30,6 +30,8 @@
 #include "arm_compute/runtime/CL/CLScheduler.h"
 #include "src/core/CL/ICLKernel.h"
 
+#include "src/common/utils/Log.h"
+
 #include <cmath>
 #include <memory>
 #include <tuple>
@@ -52,6 +54,7 @@
                                      const WeightsInfo &weights_info)
 {
     ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, output);
+    ARM_COMPUTE_LOG_PARAMS(input, weights, bias, output, deconv_info, weights_info);
 
     switch(CLDeconvolutionLayer::get_deconvolution_method(input->info(), weights->info(), nullptr, output->info(), deconv_info, weights_info))
     {
diff --git a/src/runtime/CL/functions/CLDeconvolutionLayerUpsample.cpp b/src/runtime/CL/functions/CLDeconvolutionLayerUpsample.cpp
index c371558..0b428f5 100644
--- a/src/runtime/CL/functions/CLDeconvolutionLayerUpsample.cpp
+++ b/src/runtime/CL/functions/CLDeconvolutionLayerUpsample.cpp
@@ -29,6 +29,8 @@
 #include "arm_compute/runtime/CL/CLTensor.h"
 #include "src/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.h"
 
+#include "src/common/utils/Log.h"
+
 namespace arm_compute
 {
 CLDeconvolutionLayerUpsample::CLDeconvolutionLayerUpsample() // NOLINT
@@ -53,6 +55,7 @@
 void CLDeconvolutionLayerUpsample::configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, const PadStrideInfo &info)
 {
     ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
+    ARM_COMPUTE_LOG_PARAMS(input, output, info);
 
     _output = output;
     _fill.configure(compile_context, _output, PixelValue(0, _output->info()->data_type(), _output->info()->quantization_info()));
diff --git a/src/runtime/CL/functions/CLDepthConvertLayer.cpp b/src/runtime/CL/functions/CLDepthConvertLayer.cpp
index 5930ff1..cac3f51 100644
--- a/src/runtime/CL/functions/CLDepthConvertLayer.cpp
+++ b/src/runtime/CL/functions/CLDepthConvertLayer.cpp
@@ -29,6 +29,8 @@
 #include "src/core/CL/ICLKernel.h"
 #include "src/gpu/cl/operators/ClCast.h"
 
+#include "src/common/utils/Log.h"
+
 #include <utility>
 
 namespace arm_compute
@@ -56,6 +58,7 @@
 void CLDepthConvertLayer::configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, ConvertPolicy policy, uint32_t shift)
 {
     ARM_COMPUTE_UNUSED(shift);
+    ARM_COMPUTE_LOG_PARAMS(input, output, policy, shift);
 
     _impl->src = input;
     _impl->dst = output;
diff --git a/src/runtime/CL/functions/CLDepthToSpaceLayer.cpp b/src/runtime/CL/functions/CLDepthToSpaceLayer.cpp
index bd2303c..98531e7 100644
--- a/src/runtime/CL/functions/CLDepthToSpaceLayer.cpp
+++ b/src/runtime/CL/functions/CLDepthToSpaceLayer.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2020 Arm Limited.
+ * Copyright (c) 2019-2021 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -25,6 +25,8 @@
 
 #include "src/core/CL/kernels/CLDepthToSpaceLayerKernel.h"
 
+#include "src/common/utils/Log.h"
+
 #include <utility>
 
 namespace arm_compute
@@ -36,6 +38,7 @@
 
 void CLDepthToSpaceLayer::configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, int32_t block_shape)
 {
+    ARM_COMPUTE_LOG_PARAMS(input, output, block_shape);
     auto k = std::make_unique<CLDepthToSpaceLayerKernel>();
     k->configure(compile_context, input, output, block_shape);
     _kernel = std::move(k);
diff --git a/src/runtime/CL/functions/CLDepthwiseConvolutionLayer.cpp b/src/runtime/CL/functions/CLDepthwiseConvolutionLayer.cpp
index 84798fa..2ee23c4 100644
--- a/src/runtime/CL/functions/CLDepthwiseConvolutionLayer.cpp
+++ b/src/runtime/CL/functions/CLDepthwiseConvolutionLayer.cpp
@@ -33,6 +33,8 @@
 #include "arm_compute/runtime/CL/CLScheduler.h"
 #include "src/core/CL/kernels/CLDepthwiseConvolutionLayerNativeKernel.h"
 
+#include "src/common/utils/Log.h"
+
 namespace arm_compute
 {
 using namespace arm_compute::misc;
@@ -171,6 +173,7 @@
                                                                      depth_multiplier,
                                                                      act_info,
                                                                      dilation));
+    ARM_COMPUTE_LOG_PARAMS(input, weights, biases, output, conv_info, depth_multiplier, act_info, dilation);
 
     _is_quantized     = is_data_type_quantized(input->info()->data_type());
     _is_prepared      = false;
diff --git a/src/runtime/CL/functions/CLDequantizationLayer.cpp b/src/runtime/CL/functions/CLDequantizationLayer.cpp
index e11802e..64c6b5d 100644
--- a/src/runtime/CL/functions/CLDequantizationLayer.cpp
+++ b/src/runtime/CL/functions/CLDequantizationLayer.cpp
@@ -29,6 +29,8 @@
 #include "src/core/CL/ICLKernel.h"
 #include "src/gpu/cl/operators/ClDequantize.h"
 
+#include "src/common/utils/Log.h"
+
 namespace arm_compute
 {
 struct CLDequantizationLayer::Impl
@@ -51,6 +53,7 @@
 
 void CLDequantizationLayer::configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output)
 {
+    ARM_COMPUTE_LOG_PARAMS(input, output);
     _impl->src = input;
     _impl->dst = output;
 
diff --git a/src/runtime/CL/functions/CLDirectConvolutionLayer.cpp b/src/runtime/CL/functions/CLDirectConvolutionLayer.cpp
index 7bbb7e8..752e0e4 100644
--- a/src/runtime/CL/functions/CLDirectConvolutionLayer.cpp
+++ b/src/runtime/CL/functions/CLDirectConvolutionLayer.cpp
@@ -31,6 +31,8 @@
 #include "src/gpu/cl/operators/ClActivation.h"
 #include "src/gpu/cl/operators/ClDirectConv2d.h"
 
+#include "src/common/utils/Log.h"
+
 namespace arm_compute
 {
 struct CLDirectConvolutionLayer::Impl
@@ -59,6 +61,7 @@
                                          const PadStrideInfo &conv_info, const ActivationLayerInfo &act_info)
 {
     ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, output);
+    ARM_COMPUTE_LOG_PARAMS(input, weights, biases, output, conv_info, act_info);
 
     _impl->src     = input;
     _impl->weights = weights;
@@ -84,4 +87,4 @@
     pack.add_tensor(TensorType::ACL_DST, _impl->dst);
     _impl->op->run(pack);
 }
-}
\ No newline at end of file
+}
diff --git a/src/runtime/CL/functions/CLDirectDeconvolutionLayer.cpp b/src/runtime/CL/functions/CLDirectDeconvolutionLayer.cpp
index a476bb6..b263d73 100644
--- a/src/runtime/CL/functions/CLDirectDeconvolutionLayer.cpp
+++ b/src/runtime/CL/functions/CLDirectDeconvolutionLayer.cpp
@@ -33,6 +33,8 @@
 #include "src/core/CL/kernels/CLFillBorderKernel.h"
 #include "src/core/helpers/AutoConfiguration.h"
 
+#include "src/common/utils/Log.h"
+
 #include <memory>
 #include <tuple>
 
@@ -120,6 +122,7 @@
                                            const WeightsInfo &weights_info)
 {
     ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, output);
+    ARM_COMPUTE_LOG_PARAMS(input, weights, bias, output, info, weights_info);
 
     const unsigned int pad_left   = info.pad_left();
     const unsigned int pad_right  = info.pad_right();
diff --git a/src/runtime/CL/functions/CLFFT1D.cpp b/src/runtime/CL/functions/CLFFT1D.cpp
index cf136dc..bd0966b 100644
--- a/src/runtime/CL/functions/CLFFT1D.cpp
+++ b/src/runtime/CL/functions/CLFFT1D.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2020 Arm Limited.
+ * Copyright (c) 2019-2021 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -31,6 +31,8 @@
 #include "src/core/CL/kernels/CLFFTScaleKernel.h"
 #include "src/core/utils/helpers/fft.h"
 
+#include "src/common/utils/Log.h"
+
 namespace arm_compute
 {
 CLFFT1D::CLFFT1D(std::shared_ptr<IMemoryManager> memory_manager)
@@ -56,6 +58,7 @@
 {
     ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
     ARM_COMPUTE_ERROR_THROW_ON(CLFFT1D::validate(input->info(), output->info(), config));
+    ARM_COMPUTE_LOG_PARAMS(input, output, config);
 
     // Decompose size to radix factors
     const auto         supported_radix   = CLFFTRadixStageKernel::supported_radix();
diff --git a/src/runtime/CL/functions/CLFFT2D.cpp b/src/runtime/CL/functions/CLFFT2D.cpp
index e0497ca..94fc411 100644
--- a/src/runtime/CL/functions/CLFFT2D.cpp
+++ b/src/runtime/CL/functions/CLFFT2D.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2020 Arm Limited.
+ * Copyright (c) 2019-2021 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -30,6 +30,8 @@
 #include "src/core/CL/kernels/CLFFTRadixStageKernel.h"
 #include "src/core/CL/kernels/CLFFTScaleKernel.h"
 
+#include "src/common/utils/Log.h"
+
 namespace arm_compute
 {
 CLFFT2D::CLFFT2D(std::shared_ptr<IMemoryManager> memory_manager)
@@ -48,6 +50,7 @@
 {
     ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
     ARM_COMPUTE_ERROR_THROW_ON(CLFFT2D::validate(input->info(), output->info(), config));
+    ARM_COMPUTE_LOG_PARAMS(input, output, config);
 
     // Setup first pass
     FFT1DInfo first_pass_config;
diff --git a/src/runtime/CL/functions/CLFFTConvolutionLayer.cpp b/src/runtime/CL/functions/CLFFTConvolutionLayer.cpp
index 41b02d0..d12e2de 100644
--- a/src/runtime/CL/functions/CLFFTConvolutionLayer.cpp
+++ b/src/runtime/CL/functions/CLFFTConvolutionLayer.cpp
@@ -38,6 +38,8 @@
 #include "src/core/helpers/AutoConfiguration.h"
 #include "src/core/utils/helpers/fft.h"
 
+#include "src/common/utils/Log.h"
+
 namespace arm_compute
 {
 namespace
@@ -113,6 +115,7 @@
 {
     ARM_COMPUTE_UNUSED(enable_fast_math);
     ARM_COMPUTE_ERROR_THROW_ON(CLFFTConvolutionLayer::validate(input->info(), weights->info(), biases != nullptr ? biases->info() : nullptr, output->info(), conv_info, act_info, enable_fast_math));
+    ARM_COMPUTE_LOG_PARAMS(input, weights, biases, output, conv_info, act_info, enable_fast_math);
 
     _original_weights = weights;
     _original_bias    = biases;
diff --git a/src/runtime/CL/functions/CLFillBorder.cpp b/src/runtime/CL/functions/CLFillBorder.cpp
index 2e5a29e..de9b857 100644
--- a/src/runtime/CL/functions/CLFillBorder.cpp
+++ b/src/runtime/CL/functions/CLFillBorder.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2020 Arm Limited.
+ * Copyright (c) 2016-2021 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -25,6 +25,8 @@
 
 #include "src/core/CL/kernels/CLFillBorderKernel.h"
 
+#include "src/common/utils/Log.h"
+
 #include <utility>
 
 using namespace arm_compute;
@@ -36,6 +38,7 @@
 
 void CLFillBorder::configure(const CLCompileContext &compile_context, ICLTensor *tensor, unsigned int border_width, BorderMode border_mode, const PixelValue &constant_border_value)
 {
+    ARM_COMPUTE_LOG_PARAMS(tensor, border_width, border_mode, constant_border_value);
     auto k = std::make_unique<CLFillBorderKernel>();
     k->configure(compile_context, tensor, BorderSize(border_width), border_mode, constant_border_value);
     _kernel = std::move(k);
diff --git a/src/runtime/CL/functions/CLFuseBatchNormalization.cpp b/src/runtime/CL/functions/CLFuseBatchNormalization.cpp
index 2945508..7379e9d 100644
--- a/src/runtime/CL/functions/CLFuseBatchNormalization.cpp
+++ b/src/runtime/CL/functions/CLFuseBatchNormalization.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2020 Arm Limited.
+ * Copyright (c) 2018-2021 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -30,6 +30,8 @@
 #include "arm_compute/runtime/CL/CLScheduler.h"
 #include "src/core/CL/kernels/CLFuseBatchNormalizationKernel.h"
 
+#include "src/common/utils/Log.h"
+
 namespace arm_compute
 {
 CLFuseBatchNormalization::CLFuseBatchNormalization()
@@ -52,6 +54,7 @@
                                          const ICLTensor *input_bias, const ICLTensor *bn_beta, const ICLTensor *bn_gamma,
                                          float epsilon, FuseBatchNormalizationType fbn_type)
 {
+    ARM_COMPUTE_LOG_PARAMS(input_weights, bn_mean, bn_var, fused_weights, fused_bias, input_bias, bn_beta, bn_gamma, epsilon, fbn_type);
     _fuse_bn_kernel->configure(compile_context, input_weights, bn_mean, bn_var, fused_weights, fused_bias, input_bias, bn_beta, bn_gamma, epsilon, fbn_type);
 }
 
diff --git a/src/runtime/CL/functions/CLGEMMDeconvolutionLayer.cpp b/src/runtime/CL/functions/CLGEMMDeconvolutionLayer.cpp
index 126a59e..8af83e2 100644
--- a/src/runtime/CL/functions/CLGEMMDeconvolutionLayer.cpp
+++ b/src/runtime/CL/functions/CLGEMMDeconvolutionLayer.cpp
@@ -31,6 +31,8 @@
 #include "src/core/CL/kernels/CLDeconvolutionReshapeOutputKernel.h"
 #include "src/core/CL/kernels/CLFillBorderKernel.h"
 
+#include "src/common/utils/Log.h"
+
 #include <tuple>
 
 namespace arm_compute
@@ -228,6 +230,7 @@
                                                                   bias != nullptr ? bias->info() : nullptr,
                                                                   output->info(),
                                                                   deconv_info));
+    ARM_COMPUTE_LOG_PARAMS(input, weights, bias, output, deconv_info);
 
     _original_weights = weights;
     _padded_input     = deconv_info.pad_bottom() > 0 || deconv_info.pad_left() > 0 || deconv_info.pad_right() > 0 || deconv_info.pad_top() > 0;
diff --git a/src/runtime/CL/functions/CLGather.cpp b/src/runtime/CL/functions/CLGather.cpp
index bde34dc..033c117 100644
--- a/src/runtime/CL/functions/CLGather.cpp
+++ b/src/runtime/CL/functions/CLGather.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2020 Arm Limited.
+ * Copyright (c) 2018-2021 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -26,6 +26,8 @@
 #include "arm_compute/core/CL/ICLTensor.h"
 #include "src/core/CL/kernels/CLGatherKernel.h"
 
+#include "src/common/utils/Log.h"
+
 namespace arm_compute
 {
 void CLGather::configure(const ICLTensor *input, const ICLTensor *indices, ICLTensor *output, int axis)
@@ -35,6 +37,7 @@
 
 void CLGather::configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *indices, ICLTensor *output, int axis)
 {
+    ARM_COMPUTE_LOG_PARAMS(input, indices, output, axis);
     auto k = std::make_unique<CLGatherKernel>();
     k->configure(compile_context, input, indices, output, axis);
     _kernel = std::move(k);
diff --git a/src/runtime/CL/functions/CLGenerateProposalsLayer.cpp b/src/runtime/CL/functions/CLGenerateProposalsLayer.cpp
index 81e24db..9cb7d61 100644
--- a/src/runtime/CL/functions/CLGenerateProposalsLayer.cpp
+++ b/src/runtime/CL/functions/CLGenerateProposalsLayer.cpp
@@ -32,6 +32,8 @@
 #include "src/core/CL/kernels/CLPadLayerKernel.h"
 #include "src/core/helpers/AutoConfiguration.h"
 
+#include "src/common/utils/Log.h"
+
 namespace arm_compute
 {
 CLGenerateProposalsLayer::CLGenerateProposalsLayer(std::shared_ptr<IMemoryManager> memory_manager)
@@ -81,6 +83,7 @@
 {
     ARM_COMPUTE_ERROR_ON_NULLPTR(scores, deltas, anchors, proposals, scores_out, num_valid_proposals);
     ARM_COMPUTE_ERROR_THROW_ON(CLGenerateProposalsLayer::validate(scores->info(), deltas->info(), anchors->info(), proposals->info(), scores_out->info(), num_valid_proposals->info(), info));
+    ARM_COMPUTE_LOG_PARAMS(scores, deltas, anchors, proposals, scores_out, num_valid_proposals, info);
 
     _is_nhwc                        = scores->info()->data_layout() == DataLayout::NHWC;
     const DataType scores_data_type = scores->info()->data_type();
diff --git a/src/runtime/CL/functions/CLInstanceNormalizationLayer.cpp b/src/runtime/CL/functions/CLInstanceNormalizationLayer.cpp
index 4a0bda8..5feafe1 100644
--- a/src/runtime/CL/functions/CLInstanceNormalizationLayer.cpp
+++ b/src/runtime/CL/functions/CLInstanceNormalizationLayer.cpp
@@ -31,6 +31,8 @@
 #include "src/core/CL/kernels/CLFillBorderKernel.h"
 #include "src/core/CL/kernels/CLInstanceNormalizationLayerKernel.h"
 
+#include "src/common/utils/Log.h"
+
 namespace arm_compute
 {
 CLInstanceNormalizationLayer::CLInstanceNormalizationLayer(CLRuntimeContext *ctx) // NOLINT
@@ -51,6 +53,7 @@
 
 void CLInstanceNormalizationLayer::configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, float gamma, float beta, float epsilon, bool use_mixed_precision)
 {
+    ARM_COMPUTE_LOG_PARAMS(input, output, gamma, beta, epsilon, use_mixed_precision);
     auto w = std::make_unique<CLComputeMeanVariance>();
     w->configure(compile_context, input, &_mean_var_tensor, use_mixed_precision);
     _mean_var_kernel = std::move(w);
diff --git a/src/runtime/CL/functions/CLL2NormalizeLayer.cpp b/src/runtime/CL/functions/CLL2NormalizeLayer.cpp
index 8c360aa..1278385 100644
--- a/src/runtime/CL/functions/CLL2NormalizeLayer.cpp
+++ b/src/runtime/CL/functions/CLL2NormalizeLayer.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2020 Arm Limited.
+ * Copyright (c) 2017-2021 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -33,6 +33,8 @@
 #include "src/core/CL/kernels/CLL2NormalizeLayerKernel.h"
 #include "src/core/CL/kernels/CLReductionOperationKernel.h"
 
+#include "src/common/utils/Log.h"
+
 namespace arm_compute
 {
 namespace
@@ -57,6 +59,8 @@
 
 void CLL2NormalizeLayer::configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, int axis, float epsilon)
 {
+    ARM_COMPUTE_LOG_PARAMS(input, output, axis, epsilon);
+
     // Reset auxiliary tensor
     _sumsq.allocator()->init(TensorInfo());
 
diff --git a/src/runtime/CL/functions/CLLSTMLayer.cpp b/src/runtime/CL/functions/CLLSTMLayer.cpp
index 0122162..9f17a52 100644
--- a/src/runtime/CL/functions/CLLSTMLayer.cpp
+++ b/src/runtime/CL/functions/CLLSTMLayer.cpp
@@ -32,6 +32,8 @@
 #include "src/core/CL/kernels/CLFillBorderKernel.h"
 #include "src/gpu/cl/kernels/ClTransposeKernel.h"
 
+#include "src/common/utils/Log.h"
+
 namespace arm_compute
 {
 using namespace arm_compute::misc::shape_calculator;
@@ -83,6 +85,10 @@
                                  output_state_in, cell_state_in,
                                  scratch_buffer, output_state_out, cell_state_out, output);
 
+    ARM_COMPUTE_LOG_PARAMS(input, input_to_forget_weights, input_to_cell_weights, input_to_output_weights, recurrent_to_forget_weights, recurrent_to_cell_weights,
+                           recurrent_to_output_weights, forget_gate_bias, cell_bias, output_gate_bias, output_state_in, cell_state_in, scratch_buffer, output_state_out, cell_state_out,
+                           output, lstm_params, activation_info, cell_threshold, projection_threshold);
+
     _is_layer_norm_lstm = lstm_params.use_layer_norm();
 
     // Set lstm parameters
diff --git a/src/runtime/CL/functions/CLLSTMLayerQuantized.cpp b/src/runtime/CL/functions/CLLSTMLayerQuantized.cpp
index 589523a..d14c610 100644
--- a/src/runtime/CL/functions/CLLSTMLayerQuantized.cpp
+++ b/src/runtime/CL/functions/CLLSTMLayerQuantized.cpp
@@ -30,6 +30,8 @@
 #include "src/core/CL/kernels/CLFillBorderKernel.h"
 #include "src/core/helpers/AutoConfiguration.h"
 
+#include "src/common/utils/Log.h"
+
 #include <memory>
 
 namespace arm_compute
@@ -78,6 +80,10 @@
                                  recurrent_to_input_weights, recurrent_to_forget_weights, recurrent_to_cell_weights, recurrent_to_output_weights,
                                  input_gate_bias, forget_gate_bias, cell_bias, output_gate_bias, cell_state_in, output_state_in, cell_state_out, output_state_out);
 
+    ARM_COMPUTE_LOG_PARAMS(input, input_to_input_weights, input_to_forget_weights, input_to_cell_weights, input_to_output_weights, recurrent_to_input_weights,
+                           recurrent_to_forget_weights, recurrent_to_cell_weights, recurrent_to_output_weights, input_gate_bias, forget_gate_bias, cell_bias, output_gate_bias, cell_state_in, output_state_in, cell_state_out,
+                           output_state_out);
+
     ARM_COMPUTE_ERROR_THROW_ON(CLLSTMLayerQuantized::validate(input->info(), input_to_input_weights->info(), input_to_forget_weights->info(), input_to_cell_weights->info(),
                                                               input_to_output_weights->info(),
                                                               recurrent_to_input_weights->info(), recurrent_to_forget_weights->info(), recurrent_to_cell_weights->info(), recurrent_to_output_weights->info(),
diff --git a/src/runtime/CL/functions/CLLogicalAnd.cpp b/src/runtime/CL/functions/CLLogicalAnd.cpp
index 306957a..696191c 100644
--- a/src/runtime/CL/functions/CLLogicalAnd.cpp
+++ b/src/runtime/CL/functions/CLLogicalAnd.cpp
@@ -25,6 +25,8 @@
 #include "arm_compute/core/CL/ICLTensor.h"
 #include "src/gpu/cl/kernels/ClElementwiseKernel.h"
 
+#include "src/common/utils/Log.h"
+
 #include <utility>
 
 namespace arm_compute
@@ -33,6 +35,7 @@
 {
 void CLLogicalAnd::configure(const CLCompileContext &compile_context, ITensorInfo *input1, ITensorInfo *input2, ITensorInfo *output)
 {
+    ARM_COMPUTE_LOG_PARAMS(input1, input2, output);
     auto k = std::make_unique<arm_compute::opencl::kernels::ClLogicalBinaryKernel>();
     k->configure(compile_context, LogicalOperation::And, input1, input2, output);
     _kernel = std::move(k);
diff --git a/src/runtime/CL/functions/CLLogicalOr.cpp b/src/runtime/CL/functions/CLLogicalOr.cpp
index 6352421..f9a606e 100644
--- a/src/runtime/CL/functions/CLLogicalOr.cpp
+++ b/src/runtime/CL/functions/CLLogicalOr.cpp
@@ -25,6 +25,8 @@
 #include "arm_compute/core/CL/ICLTensor.h"
 #include "src/gpu/cl/kernels/ClElementwiseKernel.h"
 
+#include "src/common/utils/Log.h"
+
 #include <utility>
 
 namespace arm_compute
@@ -33,6 +35,7 @@
 {
 void CLLogicalOr::configure(const CLCompileContext &compile_context, ITensorInfo *input1, ITensorInfo *input2, ITensorInfo *output)
 {
+    ARM_COMPUTE_LOG_PARAMS(input1, input2, output);
     auto k = std::make_unique<arm_compute::opencl::kernels::ClLogicalBinaryKernel>();
     k->configure(compile_context, LogicalOperation::Or, input1, input2, output);
     _kernel = std::move(k);
diff --git a/src/runtime/CL/functions/CLMaxUnpoolingLayer.cpp b/src/runtime/CL/functions/CLMaxUnpoolingLayer.cpp
index 52151cd..2786d32 100644
--- a/src/runtime/CL/functions/CLMaxUnpoolingLayer.cpp
+++ b/src/runtime/CL/functions/CLMaxUnpoolingLayer.cpp
@@ -29,6 +29,8 @@
 #include "arm_compute/runtime/CL/CLScheduler.h"
 #include "src/core/CL/kernels/CLMaxUnpoolingLayerKernel.h"
 
+#include "src/common/utils/Log.h"
+
 namespace arm_compute
 {
 CLMaxUnpoolingLayer::CLMaxUnpoolingLayer()
@@ -46,6 +48,7 @@
 
 void CLMaxUnpoolingLayer::configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *indices, ICLTensor *output, const PoolingLayerInfo &pool_info)
 {
+    ARM_COMPUTE_LOG_PARAMS(input, indices, output, pool_info);
     const PixelValue zero_value(0.f);
     _fill.configure(output, zero_value);
 
diff --git a/src/runtime/CL/functions/CLMeanStdDevNormalizationLayer.cpp b/src/runtime/CL/functions/CLMeanStdDevNormalizationLayer.cpp
index 0f6a0e4..a81cbca 100644
--- a/src/runtime/CL/functions/CLMeanStdDevNormalizationLayer.cpp
+++ b/src/runtime/CL/functions/CLMeanStdDevNormalizationLayer.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2020 Arm Limited.
+ * Copyright (c) 2019-2021 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -26,6 +26,8 @@
 #include "arm_compute/core/Types.h"
 #include "src/core/CL/kernels/CLMeanStdDevNormalizationKernel.h"
 
+#include "src/common/utils/Log.h"
+
 namespace arm_compute
 {
 void CLMeanStdDevNormalizationLayer::configure(ICLTensor *input, ICLTensor *output, float epsilon)
@@ -35,6 +37,7 @@
 
 void CLMeanStdDevNormalizationLayer::configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, float epsilon)
 {
+    ARM_COMPUTE_LOG_PARAMS(input, output, epsilon);
     auto k = std::make_unique<CLMeanStdDevNormalizationKernel>();
     k->configure(compile_context, input, output, epsilon);
     _kernel = std::move(k);
diff --git a/src/runtime/CL/functions/CLNormalizationLayer.cpp b/src/runtime/CL/functions/CLNormalizationLayer.cpp
index 12560f1..c0cc518 100644
--- a/src/runtime/CL/functions/CLNormalizationLayer.cpp
+++ b/src/runtime/CL/functions/CLNormalizationLayer.cpp
@@ -33,6 +33,8 @@
 #include "src/core/CL/kernels/CLFillBorderKernel.h"
 #include "src/core/CL/kernels/CLNormalizationLayerKernel.h"
 
+#include "src/common/utils/Log.h"
+
 namespace arm_compute
 {
 CLNormalizationLayer::CLNormalizationLayer()
@@ -51,6 +53,7 @@
 void CLNormalizationLayer::configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, const NormalizationLayerInfo &norm_info)
 {
     ARM_COMPUTE_ERROR_ON(input == nullptr);
+    ARM_COMPUTE_LOG_PARAMS(input, output, norm_info);
 
     // Configure normalization kernel
     _norm_kernel->configure(compile_context, input, output, norm_info);
@@ -78,4 +81,4 @@
     // Run normalization kernel
     CLScheduler::get().enqueue(*_norm_kernel);
 }
-} // namespace arm_compute
\ No newline at end of file
+} // namespace arm_compute
diff --git a/src/runtime/CL/functions/CLNormalizePlanarYUVLayer.cpp b/src/runtime/CL/functions/CLNormalizePlanarYUVLayer.cpp
index 70189a2..63c9164 100644
--- a/src/runtime/CL/functions/CLNormalizePlanarYUVLayer.cpp
+++ b/src/runtime/CL/functions/CLNormalizePlanarYUVLayer.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2020 Arm Limited.
+ * Copyright (c) 2018-2021 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -26,6 +26,8 @@
 
 #include "src/core/CL/kernels/CLNormalizePlanarYUVLayerKernel.h"
 
+#include "src/common/utils/Log.h"
+
 #include <utility>
 
 namespace arm_compute
@@ -37,6 +39,7 @@
 
 void CLNormalizePlanarYUVLayer::configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const ICLTensor *mean, const ICLTensor *std)
 {
+    ARM_COMPUTE_LOG_PARAMS(input, output, mean, std);
     auto k = std::make_unique<CLNormalizePlanarYUVLayerKernel>();
     k->configure(compile_context, input, output, mean, std);
     _kernel = std::move(k);
diff --git a/src/runtime/CL/functions/CLPadLayer.cpp b/src/runtime/CL/functions/CLPadLayer.cpp
index d105c05..0ed8f03 100644
--- a/src/runtime/CL/functions/CLPadLayer.cpp
+++ b/src/runtime/CL/functions/CLPadLayer.cpp
@@ -24,6 +24,8 @@
 #include "arm_compute/runtime/CL/functions/CLPadLayer.h"
 #include "src/core/CL/kernels/CLPadLayerKernel.h"
 
+#include "src/common/utils/Log.h"
+
 namespace arm_compute
 {
 CLPadLayer::CLPadLayer()
@@ -43,6 +45,7 @@
 void CLPadLayer::configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, const PaddingList &padding, PixelValue constant_value, PaddingMode mode)
 {
     ARM_COMPUTE_ERROR_THROW_ON(validate(input->info(), output->info(), padding, constant_value, mode));
+    ARM_COMPUTE_LOG_PARAMS(input, output, padding, constant_value, mode);
 
     _perform_pad = std::any_of(padding.begin(), padding.end(), [](PaddingInfo info)
     {
@@ -87,4 +90,4 @@
         _copy.run();
     }
 }
-} // namespace arm_compute
\ No newline at end of file
+} // namespace arm_compute
diff --git a/src/runtime/CL/functions/CLPermute.cpp b/src/runtime/CL/functions/CLPermute.cpp
index 556e943..a56afff 100644
--- a/src/runtime/CL/functions/CLPermute.cpp
+++ b/src/runtime/CL/functions/CLPermute.cpp
@@ -30,6 +30,8 @@
 #include "src/core/CL/ICLKernel.h"
 #include "src/gpu/cl/operators/ClPermute.h"
 
+#include "src/common/utils/Log.h"
+
 namespace arm_compute
 {
 struct CLPermute::Impl
@@ -54,6 +56,7 @@
 void CLPermute::configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const PermutationVector &perm)
 {
     ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
+    ARM_COMPUTE_LOG_PARAMS(input, output, perm);
 
     _impl->src = input;
     _impl->dst = output;
@@ -74,4 +77,4 @@
     pack.add_tensor(TensorType::ACL_DST, _impl->dst);
     _impl->op->run(pack);
 }
-} // namespace arm_compute
\ No newline at end of file
+} // namespace arm_compute
diff --git a/src/runtime/CL/functions/CLPriorBoxLayer.cpp b/src/runtime/CL/functions/CLPriorBoxLayer.cpp
index 5ace7c6..019f0a7 100644
--- a/src/runtime/CL/functions/CLPriorBoxLayer.cpp
+++ b/src/runtime/CL/functions/CLPriorBoxLayer.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2020 Arm Limited.
+ * Copyright (c) 2018-2021 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -32,6 +32,8 @@
 #include "src/core/CL/kernels/CLFillBorderKernel.h"
 #include "src/core/CL/kernels/CLPriorBoxLayerKernel.h"
 
+#include "src/common/utils/Log.h"
+
 using namespace arm_compute;
 
 CLPriorBoxLayer::CLPriorBoxLayer()
@@ -46,6 +48,7 @@
 
 void CLPriorBoxLayer::configure(const CLCompileContext &compile_context, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, const PriorBoxLayerInfo &info)
 {
+    ARM_COMPUTE_LOG_PARAMS(input1, input2, output, info);
     _min           = cl::Buffer(CLScheduler::get().context(), CL_MEM_ALLOC_HOST_PTR | CL_MEM_READ_WRITE, info.min_sizes().size() * sizeof(float));
     _aspect_ratios = cl::Buffer(CLScheduler::get().context(), CL_MEM_ALLOC_HOST_PTR | CL_MEM_READ_WRITE, info.aspect_ratios().size() * sizeof(float));
     if(!info.max_sizes().empty())
@@ -61,4 +64,4 @@
 Status CLPriorBoxLayer::validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, const PriorBoxLayerInfo &info)
 {
     return CLPriorBoxLayerKernel::validate(input1, input2, output, info);
-}
\ No newline at end of file
+}
diff --git a/src/runtime/CL/functions/CLQLSTMLayer.cpp b/src/runtime/CL/functions/CLQLSTMLayer.cpp
index 6ddf555..a8df466 100644
--- a/src/runtime/CL/functions/CLQLSTMLayer.cpp
+++ b/src/runtime/CL/functions/CLQLSTMLayer.cpp
@@ -35,6 +35,8 @@
 #include "src/core/helpers/WindowHelpers.h"
 #include "src/gpu/cl/kernels/ClGemmLowpReductionKernel.h"
 
+#include "src/common/utils/Log.h"
+
 namespace arm_compute
 {
 using namespace arm_compute::utils::info_helpers;
@@ -180,6 +182,10 @@
                                  forget_gate_bias, cell_bias, output_gate_bias, cell_state_in, output_state_in,
                                  cell_state_out, output_state_out, output);
 
+    ARM_COMPUTE_LOG_PARAMS(input, input_to_forget_weights, input_to_cell_weights, input_to_output_weights,
+                           recurrent_to_forget_weights, recurrent_to_cell_weights, recurrent_to_output_weights,
+                           forget_gate_bias, cell_bias, output_gate_bias, cell_state_in, output_state_in,
+                           cell_state_out, output_state_out, output, lstm_params);
     // Set lstm parameters
     LSTMParams<ITensorInfo> lstm_params_info{};
     build_lstm_params_tensor_info(lstm_params, &lstm_params_info);
diff --git a/src/runtime/CL/functions/CLRNNLayer.cpp b/src/runtime/CL/functions/CLRNNLayer.cpp
index 20deef4..6f12286 100644
--- a/src/runtime/CL/functions/CLRNNLayer.cpp
+++ b/src/runtime/CL/functions/CLRNNLayer.cpp
@@ -30,6 +30,8 @@
 #include "arm_compute/runtime/CL/CLScheduler.h"
 #include "src/core/CL/kernels/CLFillBorderKernel.h"
 
+#include "src/common/utils/Log.h"
+
 namespace arm_compute
 {
 using namespace arm_compute::misc::shape_calculator;
@@ -83,6 +85,7 @@
 {
     ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, recurrent_weights, bias, hidden_state, output);
     ARM_COMPUTE_ERROR_THROW_ON(CLRNNLayer::validate(input->info(), weights->info(), recurrent_weights->info(), bias->info(), hidden_state->info(), output->info(), info));
+    ARM_COMPUTE_LOG_PARAMS(input, weights, recurrent_weights, bias, hidden_state, output, info);
 
     const int   idx_height = get_data_layout_dimension_index(input->info()->data_layout(), DataLayoutDimension::HEIGHT);
     TensorShape shape      = compute_rnn_shape(recurrent_weights->info(), hidden_state->info()->dimension(idx_height));
diff --git a/src/runtime/CL/functions/CLROIAlignLayer.cpp b/src/runtime/CL/functions/CLROIAlignLayer.cpp
index 291ccff..867ef7c 100644
--- a/src/runtime/CL/functions/CLROIAlignLayer.cpp
+++ b/src/runtime/CL/functions/CLROIAlignLayer.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2020 Arm Limited.
+ * Copyright (c) 2018-2021 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -27,6 +27,8 @@
 #include "src/core/CL/kernels/CLROIAlignLayerKernel.h"
 #include "src/core/CL/kernels/CLROIPoolingLayerKernel.h"
 
+#include "src/common/utils/Log.h"
+
 namespace arm_compute
 {
 Status CLROIAlignLayer::validate(const ITensorInfo *input, const ITensorInfo *rois, ITensorInfo *output, const ROIPoolingLayerInfo &pool_info)
@@ -43,6 +45,8 @@
 
 void CLROIAlignLayer::configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *rois, ICLTensor *output, const ROIPoolingLayerInfo &pool_info)
 {
+    ARM_COMPUTE_LOG_PARAMS(input, rois, output, pool_info);
+
     // Configure ROI pooling kernel
     auto k = std::make_unique<CLROIAlignLayerKernel>();
     k->configure(compile_context, input, rois, output, pool_info);
diff --git a/src/runtime/CL/functions/CLROIPoolingLayer.cpp b/src/runtime/CL/functions/CLROIPoolingLayer.cpp
index cf7d4bc..239a1c6 100644
--- a/src/runtime/CL/functions/CLROIPoolingLayer.cpp
+++ b/src/runtime/CL/functions/CLROIPoolingLayer.cpp
@@ -25,6 +25,8 @@
 #include "arm_compute/core/CL/ICLArray.h"
 #include "src/core/CL/kernels/CLROIPoolingLayerKernel.h"
 
+#include "src/common/utils/Log.h"
+
 using namespace arm_compute;
 
 Status CLROIPoolingLayer::validate(const ITensorInfo *input, const ITensorInfo *rois, ITensorInfo *output, const ROIPoolingLayerInfo &pool_info)
@@ -39,6 +41,8 @@
 
 void CLROIPoolingLayer::configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *rois, const ICLTensor *output, const ROIPoolingLayerInfo &pool_info)
 {
+    ARM_COMPUTE_LOG_PARAMS(input, rois, output, pool_info);
+
     // Configure ROI pooling kernel
     auto k = std::make_unique<CLROIPoolingLayerKernel>();
     k->configure(compile_context, input, rois, output, pool_info);
diff --git a/src/runtime/CL/functions/CLRange.cpp b/src/runtime/CL/functions/CLRange.cpp
index d4735c8..3fbbd5f 100644
--- a/src/runtime/CL/functions/CLRange.cpp
+++ b/src/runtime/CL/functions/CLRange.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2020 Arm Limited.
+ * Copyright (c) 2018-2021 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -29,6 +29,8 @@
 #include "arm_compute/runtime/CL/CLScheduler.h"
 #include "src/core/CL/kernels/CLRangeKernel.h"
 
+#include "src/common/utils/Log.h"
+
 using namespace arm_compute;
 
 void CLRange::configure(ICLTensor *output, const float start, const float end, const float step)
@@ -38,6 +40,7 @@
 
 void CLRange::configure(const CLCompileContext &compile_context, ICLTensor *output, const float start, const float end, const float step)
 {
+    ARM_COMPUTE_LOG_PARAMS(output, start, end, step);
     auto k = std::make_unique<CLRangeKernel>();
     k->set_target(CLScheduler::get().target());
     k->configure(compile_context, output, start, end, step);
diff --git a/src/runtime/CL/functions/CLReduceMean.cpp b/src/runtime/CL/functions/CLReduceMean.cpp
index b761dc2..6a2fcc6 100644
--- a/src/runtime/CL/functions/CLReduceMean.cpp
+++ b/src/runtime/CL/functions/CLReduceMean.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2020 Arm Limited.
+ * Copyright (c) 2018-2021 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -32,6 +32,8 @@
 #include "src/core/CL/kernels/CLReductionOperationKernel.h"
 #include "src/core/helpers/AutoConfiguration.h"
 
+#include "src/common/utils/Log.h"
+
 namespace arm_compute
 {
 namespace
@@ -113,6 +115,8 @@
 {
     // Perform validate step
     ARM_COMPUTE_ERROR_THROW_ON(CLReduceMean::validate(input->info(), reduction_axis, keep_dims, output->info()));
+    ARM_COMPUTE_LOG_PARAMS(input, reduction_axis, keep_dims, output);
+
     // Output auto inizialitation if not yet initialized
     const TensorShape output_shape = arm_compute::misc::shape_calculator::calculate_reduce_mean_shape(input->info(), reduction_axis, keep_dims);
     auto_init_if_empty(*output->info(), input->info()->clone()->set_tensor_shape(output_shape));
diff --git a/src/runtime/CL/functions/CLReductionOperation.cpp b/src/runtime/CL/functions/CLReductionOperation.cpp
index 61859f8..cdc7fec 100644
--- a/src/runtime/CL/functions/CLReductionOperation.cpp
+++ b/src/runtime/CL/functions/CLReductionOperation.cpp
@@ -34,6 +34,8 @@
 #include "src/core/helpers/AutoConfiguration.h"
 #include "src/runtime/Utils.h"
 
+#include "src/common/utils/Log.h"
+
 namespace arm_compute
 {
 CLReductionOperation::CLReductionOperation(std::shared_ptr<IMemoryManager> memory_manager)
@@ -109,6 +111,7 @@
 void CLReductionOperation::configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, unsigned int axis, ReductionOperation op, bool keep_dims)
 {
     ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
+    ARM_COMPUTE_LOG_PARAMS(input, output, axis, op, keep_dims);
     _reduction_axis      = axis;
     _is_reshape_required = !keep_dims;
 
diff --git a/src/runtime/CL/functions/CLRemap.cpp b/src/runtime/CL/functions/CLRemap.cpp
index de9f0a0..b9c62d8 100644
--- a/src/runtime/CL/functions/CLRemap.cpp
+++ b/src/runtime/CL/functions/CLRemap.cpp
@@ -30,6 +30,8 @@
 #include "src/core/CL/kernels/CLFillBorderKernel.h"
 #include "src/core/CL/kernels/CLRemapKernel.h"
 
+#include "src/common/utils/Log.h"
+
 namespace arm_compute
 {
 void CLRemap::configure(const CLCompileContext &compile_context, ICLTensor *input, const ICLTensor *map_x, const ICLTensor *map_y, ICLTensor *output, InterpolationPolicy policy,
@@ -51,6 +53,7 @@
 void CLRemap::configure(const CLCompileContext &compile_context, ICLTensor *input, const ICLTensor *map_x, const ICLTensor *map_y, ICLTensor *output, InterpolationPolicy policy,
                         BorderMode border_mode, PixelValue constant_border_value)
 {
+    ARM_COMPUTE_LOG_PARAMS(input, map_x, map_y, output, policy, border_mode, constant_border_value);
     auto k = std::make_unique<CLRemapKernel>();
     k->configure(compile_context, input, map_x, map_y, output, RemapInfo{ policy, border_mode, constant_border_value });
     _kernel = std::move(k);
diff --git a/src/runtime/CL/functions/CLReorgLayer.cpp b/src/runtime/CL/functions/CLReorgLayer.cpp
index 69b28ab..15de959 100644
--- a/src/runtime/CL/functions/CLReorgLayer.cpp
+++ b/src/runtime/CL/functions/CLReorgLayer.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2020 Arm Limited.
+ * Copyright (c) 2018-2021 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -29,6 +29,8 @@
 #include "arm_compute/core/Validate.h"
 #include "src/core/CL/kernels/CLReorgLayerKernel.h"
 
+#include "src/common/utils/Log.h"
+
 #include <utility>
 
 using namespace arm_compute;
@@ -40,6 +42,7 @@
 
 void CLReorgLayer::configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, int32_t stride)
 {
+    ARM_COMPUTE_LOG_PARAMS(input, output, stride);
     auto k = std::make_unique<CLReorgLayerKernel>();
     k->configure(compile_context, input, output, stride);
     _kernel = std::move(k);
diff --git a/src/runtime/CL/functions/CLReverse.cpp b/src/runtime/CL/functions/CLReverse.cpp
index 2a845ba..1fc9357 100644
--- a/src/runtime/CL/functions/CLReverse.cpp
+++ b/src/runtime/CL/functions/CLReverse.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2020 Arm Limited.
+ * Copyright (c) 2018-2021 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -26,6 +26,8 @@
 #include "arm_compute/core/Types.h"
 #include "src/core/CL/kernels/CLReverseKernel.h"
 
+#include "src/common/utils/Log.h"
+
 namespace arm_compute
 {
 void CLReverse::configure(const ICLTensor *input, ICLTensor *output, const ICLTensor *axis)
@@ -35,6 +37,7 @@
 
 void CLReverse::configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const ICLTensor *axis)
 {
+    ARM_COMPUTE_LOG_PARAMS(input, output, axis);
     auto k = std::make_unique<CLReverseKernel>();
     k->configure(compile_context, input, output, axis);
     _kernel = std::move(k);
diff --git a/src/runtime/CL/functions/CLSelect.cpp b/src/runtime/CL/functions/CLSelect.cpp
index 5ec18a0..c4ab3dc 100644
--- a/src/runtime/CL/functions/CLSelect.cpp
+++ b/src/runtime/CL/functions/CLSelect.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2020 Arm Limited.
+ * Copyright (c) 2018-2021 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -27,6 +27,8 @@
 #include "arm_compute/runtime/CL/CLScheduler.h"
 #include "src/core/CL/kernels/CLSelectKernel.h"
 
+#include "src/common/utils/Log.h"
+
 using namespace arm_compute;
 
 namespace arm_compute
@@ -38,6 +40,7 @@
 
 void CLSelect::configure(const CLCompileContext &compile_context, const ICLTensor *c, const ICLTensor *x, const ICLTensor *y, ICLTensor *output)
 {
+    ARM_COMPUTE_LOG_PARAMS(c, x, y, output);
     auto k = std::make_unique<CLSelectKernel>();
     k->configure(compile_context, c, x, y, output);
     _kernel = std::move(k);
diff --git a/src/runtime/CL/functions/CLSlice.cpp b/src/runtime/CL/functions/CLSlice.cpp
index 7f39143..7e3ac7d 100644
--- a/src/runtime/CL/functions/CLSlice.cpp
+++ b/src/runtime/CL/functions/CLSlice.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2020 Arm Limited.
+ * Copyright (c) 2018-2021 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -28,6 +28,8 @@
 #include "arm_compute/core/utils/helpers/tensor_transform.h"
 #include "src/core/CL/kernels/CLStridedSliceKernel.h"
 
+#include "src/common/utils/Log.h"
+
 namespace arm_compute
 {
 namespace experimental
@@ -35,6 +37,7 @@
 void CLSlice::configure(const CLCompileContext &compile_context, const ITensorInfo *input, ITensorInfo *output, const Coordinates &starts, const Coordinates &ends)
 {
     ARM_COMPUTE_ERROR_ON_NULLPTR(input);
+    ARM_COMPUTE_LOG_PARAMS(input, output, starts, ends);
 
     // Get absolute end coordinates
     const int32_t slice_end_mask = arm_compute::helpers::tensor_transform::construct_slice_end_mask(ends);
diff --git a/src/runtime/CL/functions/CLSpaceToBatchLayer.cpp b/src/runtime/CL/functions/CLSpaceToBatchLayer.cpp
index 6180f4d..3b70834 100644
--- a/src/runtime/CL/functions/CLSpaceToBatchLayer.cpp
+++ b/src/runtime/CL/functions/CLSpaceToBatchLayer.cpp
@@ -31,6 +31,8 @@
 #include "arm_compute/runtime/CL/CLScheduler.h"
 #include "src/core/CL/kernels/CLSpaceToBatchLayerKernel.h"
 
+#include "src/common/utils/Log.h"
+
 namespace arm_compute
 {
 CLSpaceToBatchLayer::CLSpaceToBatchLayer()
@@ -50,6 +52,7 @@
 void CLSpaceToBatchLayer::configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *block_shape, const ICLTensor *paddings, ICLTensor *output)
 {
     ARM_COMPUTE_ERROR_ON_NULLPTR(input, block_shape, paddings, output);
+    ARM_COMPUTE_LOG_PARAMS(input, block_shape, paddings, output);
 
     if(input->info()->tensor_shape().total_size() != output->info()->tensor_shape().total_size())
     {
@@ -68,6 +71,7 @@
                                     const Size2D &padding_right, ICLTensor *output)
 {
     ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
+    ARM_COMPUTE_LOG_PARAMS(input, block_shape_x, block_shape_y, padding_left, padding_right, output);
 
     if(input->info()->tensor_shape().total_size() != output->info()->tensor_shape().total_size())
     {
diff --git a/src/runtime/CL/functions/CLSpaceToDepthLayer.cpp b/src/runtime/CL/functions/CLSpaceToDepthLayer.cpp
index 842d5bc..67dafff 100644
--- a/src/runtime/CL/functions/CLSpaceToDepthLayer.cpp
+++ b/src/runtime/CL/functions/CLSpaceToDepthLayer.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2020 Arm Limited.
+ * Copyright (c) 2019-2021 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -31,6 +31,8 @@
 #include "arm_compute/runtime/CL/CLScheduler.h"
 #include "src/core/CL/kernels/CLSpaceToDepthLayerKernel.h"
 
+#include "src/common/utils/Log.h"
+
 namespace arm_compute
 {
 CLSpaceToDepthLayer::CLSpaceToDepthLayer()
@@ -47,6 +49,7 @@
 
 void CLSpaceToDepthLayer::configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, int32_t block_shape)
 {
+    ARM_COMPUTE_LOG_PARAMS(input, output, block_shape);
     _space_to_depth_kernel->configure(compile_context, input, output, block_shape);
 }
 
diff --git a/src/runtime/CL/functions/CLStackLayer.cpp b/src/runtime/CL/functions/CLStackLayer.cpp
index 3ef6a27..6a335da 100644
--- a/src/runtime/CL/functions/CLStackLayer.cpp
+++ b/src/runtime/CL/functions/CLStackLayer.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2020 Arm Limited.
+ * Copyright (c) 2018-2021 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -34,6 +34,8 @@
 #include "arm_compute/runtime/CL/CLScheduler.h"
 #include "src/core/CL/kernels/CLStackLayerKernel.h"
 
+#include "src/common/utils/Log.h"
+
 namespace arm_compute
 {
 CLStackLayer::CLStackLayer() // NOLINT
@@ -52,6 +54,7 @@
 
 void CLStackLayer::configure(const CLCompileContext &compile_context, const std::vector<ICLTensor *> &input, int axis, ICLTensor *output)
 {
+    ARM_COMPUTE_LOG_PARAMS(input, axis, output);
     _num_inputs = input.size();
     _stack_kernels.reserve(_num_inputs);
 
diff --git a/src/runtime/CL/functions/CLStridedSlice.cpp b/src/runtime/CL/functions/CLStridedSlice.cpp
index fd3db93..261bdc1 100644
--- a/src/runtime/CL/functions/CLStridedSlice.cpp
+++ b/src/runtime/CL/functions/CLStridedSlice.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2020 Arm Limited.
+ * Copyright (c) 2018-2021 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -27,6 +27,8 @@
 #include "arm_compute/core/Types.h"
 #include "src/core/CL/kernels/CLStridedSliceKernel.h"
 
+#include "src/common/utils/Log.h"
+
 namespace arm_compute
 {
 namespace experimental
@@ -35,6 +37,7 @@
                                const Coordinates &starts, const Coordinates &ends, const BiStrides &strides,
                                int32_t begin_mask, int32_t end_mask, int32_t shrink_axis_mask)
 {
+    ARM_COMPUTE_LOG_PARAMS(input, output, starts, ends, strides, begin_mask, end_mask, shrink_axis_mask);
     auto k = std::make_unique<CLStridedSliceKernel>();
     k->configure(compile_context, input, output, starts, ends, strides, begin_mask, end_mask, shrink_axis_mask);
     _kernel = std::move(k);
diff --git a/src/runtime/CL/functions/CLTile.cpp b/src/runtime/CL/functions/CLTile.cpp
index 818f10f..ef79099 100644
--- a/src/runtime/CL/functions/CLTile.cpp
+++ b/src/runtime/CL/functions/CLTile.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2020 Arm Limited.
+ * Copyright (c) 2018-2021 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -25,6 +25,8 @@
 
 #include "src/core/CL/kernels/CLTileKernel.h"
 
+#include "src/common/utils/Log.h"
+
 namespace arm_compute
 {
 void CLTile::configure(const ICLTensor *input, ICLTensor *output, const Multiples &multiples)
@@ -34,6 +36,7 @@
 
 void CLTile::configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const Multiples &multiples)
 {
+    ARM_COMPUTE_LOG_PARAMS(input, output, multiples);
     auto k = std::make_unique<CLTileKernel>();
     k->configure(compile_context, input, output, multiples);
     _kernel = std::move(k);
diff --git a/src/runtime/CL/functions/CLUnstack.cpp b/src/runtime/CL/functions/CLUnstack.cpp
index 28d122b..98d4781 100644
--- a/src/runtime/CL/functions/CLUnstack.cpp
+++ b/src/runtime/CL/functions/CLUnstack.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2020 Arm Limited.
+ * Copyright (c) 2018-2021 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -29,6 +29,8 @@
 #include "arm_compute/core/Types.h"
 #include "arm_compute/core/utils/misc/ShapeCalculator.h"
 
+#include "src/common/utils/Log.h"
+
 namespace arm_compute
 {
 namespace
@@ -66,6 +68,7 @@
 
 void CLUnstack::configure(const CLCompileContext &compile_context, const ICLTensor *input, const std::vector<ICLTensor *> &output_vector, int axis)
 {
+    ARM_COMPUTE_LOG_PARAMS(input, output_vector, axis);
     std::vector<ITensorInfo *> outputs_vector_info(output_vector.size());
     std::transform(output_vector.begin(), output_vector.end(), outputs_vector_info.begin(), [](ICLTensor * t)
     {