DepthwiseConv reports full assembly kernel name

* Fixed the kernel name in CpuDepthwiseConv2dAssemblyWrapperKernel
* Resolves MLCE-706

Change-Id: I01ddbe2c030e22e5ba6761ed32110a35c314ccae
Signed-off-by: Pablo Marquez Tello <pablo.tello@arm.com>
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/6787
Reviewed-by: Giorgio Arena <giorgio.arena@arm.com>
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
diff --git a/src/core/NEON/kernels/arm_conv/depthwise/depthwise_implementation.hpp b/src/core/NEON/kernels/arm_conv/depthwise/depthwise_implementation.hpp
index 1d52b56..ea41529 100644
--- a/src/core/NEON/kernels/arm_conv/depthwise/depthwise_implementation.hpp
+++ b/src/core/NEON/kernels/arm_conv/depthwise/depthwise_implementation.hpp
@@ -136,7 +136,14 @@
 {
   const DepthwiseImplementation<TInput, TWeight, TOutput, OutputStage> *impl = nullptr;
   const bool success = find_implementation<TInput, TWeight, TOutput, OutputStage>(args, os, impl);
-  return UniqueDepthwiseCommon<TInput, TWeight, TOutput>(success ? impl->get_instance(args, os) : nullptr);
+
+  if(success)
+  {
+        auto i =  impl->get_instance(args, os);
+        i->set_name(impl->name);
+        return UniqueDepthwiseCommon<TInput, TWeight, TOutput>(i);
+  }
+  return nullptr;
 }
 
 }  // namespace depthwise
diff --git a/src/core/NEON/kernels/assembly/depthwise.hpp b/src/core/NEON/kernels/assembly/depthwise.hpp
index eadf48d..9262ea0 100644
--- a/src/core/NEON/kernels/assembly/depthwise.hpp
+++ b/src/core/NEON/kernels/assembly/depthwise.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021 Arm Limited.
+ * Copyright (c) 2021-2022 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -78,10 +78,20 @@
 template <typename TInput, typename TWeight, typename TOutput>
 class DepthwiseCommon : public IDepthwiseCommon
 {
+private:
+    std::string _name{};
+
 protected:
     const DepthwiseArgs m_args; // Copy of arguments
-
 public:
+    std::string name() const
+    {
+        return _name;
+    }
+    void set_name(const std::string &n)
+    {
+        _name = n;
+    }
     DepthwiseCommon(const DepthwiseArgs &args)
         : m_args(args) {};
     DepthwiseCommon(DepthwiseCommon &) = delete;
diff --git a/src/cpu/kernels/internal/CpuDepthwiseConv2dAssemblyWrapperKernel.cpp b/src/cpu/kernels/internal/CpuDepthwiseConv2dAssemblyWrapperKernel.cpp
index 17bbd16..73bf7dc 100644
--- a/src/cpu/kernels/internal/CpuDepthwiseConv2dAssemblyWrapperKernel.cpp
+++ b/src/cpu/kernels/internal/CpuDepthwiseConv2dAssemblyWrapperKernel.cpp
@@ -56,7 +56,7 @@
 template <typename TSrc, typename TWeights, typename TDst>
 void create_arm_dwc(const ITensorInfo *src, const ITensorInfo *weights, ITensorInfo *dst,
                     const ConvolutionInfo &info, const CPUInfo &cpu_info,
-                    std::unique_ptr<arm_conv::depthwise::IDepthwiseCommon> &kernel)
+                    std::unique_ptr<arm_conv::depthwise::IDepthwiseCommon> &kernel, std::string &_name)
 {
     unsigned int stride_cols{};
     unsigned int stride_rows{};
@@ -88,6 +88,7 @@
         return;
     }
 
+    _name  = dwc_kernel_asm->name();
     kernel = std::move(dwc_kernel_asm);
 }
 
@@ -95,7 +96,8 @@
 void create_arm_dwc_quant(const ITensorInfo *src, const ITensorInfo *weights, ITensorInfo *dst,
                           const ConvolutionInfo &info, const CPUInfo &cpu_info,
                           std::unique_ptr<arm_conv::depthwise::IDepthwiseCommon> &kernel,
-                          std::vector<int32_t> &multipliers, std::vector<int32_t> &right_shifts, std::vector<int32_t> &left_shifts)
+                          std::vector<int32_t> &multipliers, std::vector<int32_t> &right_shifts, std::vector<int32_t> &left_shifts,
+                          std::string &_name)
 {
     unsigned int stride_cols{};
     unsigned int stride_rows{};
@@ -189,7 +191,7 @@
         // Configuration not supported: Leave function unconfigured:
         return;
     }
-
+    _name  = dwc_kernel_asm->name();
     kernel = std::move(dwc_kernel_asm);
 }
 } // namespace
@@ -198,7 +200,8 @@
     : _kernel_asm(nullptr),
       _multipliers(),
       _left_shifts(),
-      _right_shifts()
+      _right_shifts(),
+      _name()
 {
 }
 
@@ -213,30 +216,31 @@
     // Destination initialization if not yet initialized
     const TensorShape dst_shape = compute_depthwise_convolution_shape(*src, *weights, info);
     auto_init_if_empty(*dst, src->clone()->set_tensor_shape(dst_shape));
-
+    _name = "CpuDepthwiseConv2dAssemblyWrapperKernel";
+    std::string asm_kernel_name("");
 #if defined(__aarch64__)
     switch(src->data_type())
     {
         case DataType::QASYMM8:
             if(is_data_type_quantized_per_channel(weights->data_type()))
             {
-                create_arm_dwc_quant<uint8_t, int8_t, uint8_t>(src, weights, dst, info, cpu_info, _kernel_asm, _multipliers, _right_shifts, _left_shifts);
+                create_arm_dwc_quant<uint8_t, int8_t, uint8_t>(src, weights, dst, info, cpu_info, _kernel_asm, _multipliers, _right_shifts, _left_shifts, asm_kernel_name);
             }
             else
             {
-                create_arm_dwc_quant<uint8_t, uint8_t, uint8_t>(src, weights, dst, info, cpu_info, _kernel_asm, _multipliers, _right_shifts, _left_shifts);
+                create_arm_dwc_quant<uint8_t, uint8_t, uint8_t>(src, weights, dst, info, cpu_info, _kernel_asm, _multipliers, _right_shifts, _left_shifts, asm_kernel_name);
             }
             break;
         case DataType::QASYMM8_SIGNED:
-            create_arm_dwc_quant<int8_t, int8_t, int8_t>(src, weights, dst, info, cpu_info, _kernel_asm, _multipliers, _right_shifts, _left_shifts);
+            create_arm_dwc_quant<int8_t, int8_t, int8_t>(src, weights, dst, info, cpu_info, _kernel_asm, _multipliers, _right_shifts, _left_shifts, asm_kernel_name);
             break;
 #if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)
         case DataType::F16:
-            create_arm_dwc<float16_t, float16_t, float16_t>(src, weights, dst, info, cpu_info, _kernel_asm);
+            create_arm_dwc<float16_t, float16_t, float16_t>(src, weights, dst, info, cpu_info, _kernel_asm, asm_kernel_name);
             break;
 #endif // defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)
         case DataType::F32:
-            create_arm_dwc<float, float, float>(src, weights, dst, info, cpu_info, _kernel_asm);
+            create_arm_dwc<float, float, float>(src, weights, dst, info, cpu_info, _kernel_asm, asm_kernel_name);
             break;
         default:
             break;
@@ -245,6 +249,10 @@
 
     Window win = calculate_max_window(*dst, Steps());
     ICpuKernel::configure(win);
+    if(_kernel_asm != nullptr)
+    {
+        _name += "/" + asm_kernel_name;
+    }
 }
 
 Status CpuDepthwiseConv2dAssemblyWrapperKernel::validate(const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *bias, const ITensorInfo *dst, const ConvolutionInfo &info)
@@ -352,7 +360,7 @@
 
 const char *CpuDepthwiseConv2dAssemblyWrapperKernel::name() const
 {
-    return "CpuDepthwiseConv2dAssemblyWrapperKernel";
+    return _name.c_str();
 }
 
 size_t CpuDepthwiseConv2dAssemblyWrapperKernel::get_mws(const CPUInfo &platform, size_t thread_count) const
diff --git a/src/cpu/kernels/internal/CpuDepthwiseConv2dAssemblyWrapperKernel.h b/src/cpu/kernels/internal/CpuDepthwiseConv2dAssemblyWrapperKernel.h
index 902e961..ea51d5d 100644
--- a/src/cpu/kernels/internal/CpuDepthwiseConv2dAssemblyWrapperKernel.h
+++ b/src/cpu/kernels/internal/CpuDepthwiseConv2dAssemblyWrapperKernel.h
@@ -123,6 +123,7 @@
     std::vector<int32_t>                                   _multipliers{};
     std::vector<int32_t>                                   _left_shifts{};
     std::vector<int32_t>                                   _right_shifts{};
+    std::string                                            _name{};
 };
 } // namespace kernels
 } // namespace cpu