Fix Depthwise failure in Cpu backend

Resolves: COMPMID-4395

Change-Id: Ib3dfdc42e95998c1e5713d6ec1bdaa83299b0360
Signed-off-by: Michalis Spyrou <michalis.spyrou@arm.com>
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/5488
Reviewed-by: Giorgio Arena <giorgio.arena@arm.com>
Reviewed-by: TeresaARM <teresa.charlinreyes@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Tested-by: Arm Jenkins <bsgcomp@arm.com>
diff --git a/src/runtime/NEON/functions/NEDepthwiseConvolutionLayer.cpp b/src/runtime/NEON/functions/NEDepthwiseConvolutionLayer.cpp
index 1d7b1c6..da9610e 100644
--- a/src/runtime/NEON/functions/NEDepthwiseConvolutionLayer.cpp
+++ b/src/runtime/NEON/functions/NEDepthwiseConvolutionLayer.cpp
@@ -121,7 +121,7 @@
         _impl->permuted_output.info()->set_quantization_info(output->info()->quantization_info());
 
         // Configure optimized depthwise
-        dwc_optimized_func->configure(_impl->permuted_input.info(), _impl->permuted_weights.info(), biases->info(), _impl->permuted_output.info(), info);
+        dwc_optimized_func->configure(_impl->permuted_input.info(), _impl->permuted_weights.info(), biases == nullptr ? nullptr : biases->info(), _impl->permuted_output.info(), info);
 
         // Configure the function to transform the convoluted output to ACL's native ordering format NCHW
         _impl->permuted_output.info()->set_data_layout(DataLayout::NHWC);
@@ -132,7 +132,7 @@
     }
     else
     {
-        dwc_optimized_func->configure(_impl->src->info(), _impl->weights->info(), biases->info(), _impl->dst->info(), info);
+        dwc_optimized_func->configure(_impl->src->info(), _impl->weights->info(), biases == nullptr ? nullptr : biases->info(), _impl->dst->info(), info);
     }
 
     // Allocate memory based on the internal memory requirements
@@ -184,7 +184,6 @@
         if(_impl->permute)
         {
             _impl->permuted_weights.allocator()->allocate();
-            _impl->weights->mark_as_unused();
         }
 
         if(!_impl->permuted_weights.is_used())
diff --git a/src/runtime/cpu/operators/CpuDepthwiseConvolution.cpp b/src/runtime/cpu/operators/CpuDepthwiseConvolution.cpp
index 183a2af..6d09728 100644
--- a/src/runtime/cpu/operators/CpuDepthwiseConvolution.cpp
+++ b/src/runtime/cpu/operators/CpuDepthwiseConvolution.cpp
@@ -170,7 +170,7 @@
     if(_permute)
     {
         ITensorPack pack;
-        auto        src      = tensors.get_tensor(TensorType::ACL_SRC_0);
+        auto        src      = tensors.get_const_tensor(TensorType::ACL_SRC_0);
         auto        src_perm = tensors.get_tensor(TensorType::ACL_INT_0);
         pack.add_tensor(TensorType::ACL_SRC, src);
         pack.add_tensor(TensorType::ACL_DST, src_perm);
@@ -247,6 +247,8 @@
             pack.add_tensor(TensorType::ACL_DST, permuted_weights);
             _permute_weights->run(pack);
 
+            weights->mark_as_unused();
+
             ITensorPack pack_opt;
             pack_opt.add_const_tensor(TensorType::ACL_SRC_1, permuted_weights);
             pack_opt.add_tensor(TensorType::ACL_SRC_2, bias);