Integrate SME2 kernels

* Add SME/SME2 detection.
* Integrate SME2 implementation for:
  - Normal convolution
  - Winograd
  - Depthwise convolution
  - Pooling

Resolves: COMPMID-5700
Signed-off-by: Viet-Hoa Do <viet-hoa.do@arm.com>
Change-Id: I2f1ca1d05f8cfeee9309ed1c0a36096a4a6aad5c
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/8692
Reviewed-by: Gunes Bayir <gunes.bayir@arm.com>
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
diff --git a/src/core/NEON/kernels/arm_conv/depthwise/depthwise_s8q.cpp b/src/core/NEON/kernels/arm_conv/depthwise/depthwise_s8q.cpp
index 4ff249a..2d03183 100644
--- a/src/core/NEON/kernels/arm_conv/depthwise/depthwise_s8q.cpp
+++ b/src/core/NEON/kernels/arm_conv/depthwise/depthwise_s8q.cpp
@@ -28,11 +28,19 @@
 #include "depthwise_depthfirst.hpp"
 #include "depthwise_depthfirst_generic.hpp"
 #include "depthwise_depthfirst_multiplier.hpp"
+#include "depthwise_planar.hpp"
 
 #include "depthwise_implementation_constraints.hpp"
 
 #if defined(__aarch64__)
 #if defined(ARM_COMPUTE_ENABLE_SVE)
+#if defined(ARM_COMPUTE_ENABLE_SME2)
+#include "kernels/sme2_s8q_planar_3x3_s1_4rows_dot_za.hpp"
+#include "kernels/sme2_s8q_planar_3x3_s2_4rows_dot_za.hpp"
+#include "kernels/sme2_s8q_planar_5x5_s1_4rows_dot_za.hpp"
+#include "kernels/sme2_s8q_planar_5x5_s2_4rows_dot_za.hpp"
+#endif  // defined(ARM_COMPUTE_ENABLE_SME2)
+
 #include "kernels/sve_s8qs_nhwc_3x3_s1_output2x2_dot_depthfirst.hpp"
 #include "kernels/sve_s8q_nhwc_3x3_s1_output2x2_dot_depthfirst.hpp"
 #include "kernels/sve_s8q_nhwc_3x3_s1_output2x2_mla_depthfirst.hpp"
@@ -73,6 +81,60 @@
 static const DepthwiseImplementation<int8_t, int8_t, int8_t, Requantize32> depthwise_s8q_methods[] = {
 #if defined(__aarch64__)
 #if defined(ARM_COMPUTE_ENABLE_SVE)
+#if defined(ARM_COMPUTE_ENABLE_SME2)
+  {
+    DepthwiseMethod::PLANAR,
+    "sme2_s8q_planar_3x3_s1_4rows_dot_za",
+    constraint<Requantize32>(cpu_has_sme, cpu_has_sme2,
+                             is_supported<sme2_s8q_planar_3x3_s1_4rows_dot_za>,
+                             has_no_channel_multiplier,
+                             qp_has_no_left_shift, no_prime_right_pad),
+    nullptr,
+    [] (const DepthwiseArgs &args, const Requantize32 &qp) -> DepthwiseCommon<int8_t, int8_t, int8_t> * {
+      auto strat = new sme2_s8q_planar_3x3_s1_4rows_dot_za(args.cpu_info);
+      return new DepthwisePlanar<int8_t>(strat, args, qp);
+    },
+  },
+  {
+    DepthwiseMethod::PLANAR,
+    "sme2_s8q_planar_3x3_s2_4rows_dot_za",
+    constraint<Requantize32>(cpu_has_sme, cpu_has_sme2,
+                             is_supported<sme2_s8q_planar_3x3_s2_4rows_dot_za>,
+                             has_no_channel_multiplier,
+                             qp_has_no_left_shift, no_prime_right_pad),
+    nullptr,
+    [] (const DepthwiseArgs &args, const Requantize32 &qp) -> DepthwiseCommon<int8_t, int8_t, int8_t> * {
+      auto strat = new sme2_s8q_planar_3x3_s2_4rows_dot_za(args.cpu_info);
+      return new DepthwisePlanar<int8_t>(strat, args, qp);
+    },
+  },
+  {
+    DepthwiseMethod::PLANAR,
+    "sme2_s8q_planar_5x5_s1_4rows_dot_za",
+    constraint<Requantize32>(cpu_has_sme, cpu_has_sme2,
+                             is_supported<sme2_s8q_planar_5x5_s1_4rows_dot_za>,
+                             has_no_channel_multiplier,
+                             qp_has_no_left_shift, no_prime_right_pad),
+    nullptr,
+    [] (const DepthwiseArgs &args, const Requantize32 &qp) -> DepthwiseCommon<int8_t, int8_t, int8_t> * {
+      auto strat = new sme2_s8q_planar_5x5_s1_4rows_dot_za(args.cpu_info);
+      return new DepthwisePlanar<int8_t>(strat, args, qp);
+    },
+  },
+  {
+    DepthwiseMethod::PLANAR,
+    "sme2_s8q_planar_5x5_s2_4rows_dot_za",
+    constraint<Requantize32>(cpu_has_sme, cpu_has_sme2,
+                             is_supported<sme2_s8q_planar_5x5_s2_4rows_dot_za>,
+                             has_no_channel_multiplier,
+                             qp_has_no_left_shift, no_prime_right_pad),
+    nullptr,
+    [] (const DepthwiseArgs &args, const Requantize32 &qp) -> DepthwiseCommon<int8_t, int8_t, int8_t> * {
+      auto strat = new sme2_s8q_planar_5x5_s2_4rows_dot_za(args.cpu_info);
+      return new DepthwisePlanar<int8_t>(strat, args, qp);
+    },
+  },
+#endif  // defined(ARM_COMPUTE_ENABLE_SME2)
   {
     DepthwiseMethod::DEPTHFIRST,
     "sve_s8qs_nhwc_3x3_s1_output2x2_dot_depthfirst",