Integrate SME2 kernels

* Add SME/SME2 detection.
* Integrate SME2 implementation for:
  - Normal convolution
  - Winograd
  - Depthwise convolution
  - Pooling

Resolves: COMPMID-5700
Signed-off-by: Viet-Hoa Do <viet-hoa.do@arm.com>
Change-Id: I2f1ca1d05f8cfeee9309ed1c0a36096a4a6aad5c
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/8692
Reviewed-by: Gunes Bayir <gunes.bayir@arm.com>
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
diff --git a/src/common/cpuinfo/CpuInfo.cpp b/src/common/cpuinfo/CpuInfo.cpp
index 0be21be..f76c0cc 100644
--- a/src/common/cpuinfo/CpuInfo.cpp
+++ b/src/common/cpuinfo/CpuInfo.cpp
@@ -336,17 +336,18 @@
 #elif(BARE_METAL) && defined(__aarch64__)        /* !defined(BARE_METAL) && !defined(__APPLE__) && !defined(__OpenBSD__) && (defined(__arm__) || defined(__aarch64__)) */
 
     // Assume single CPU in bare metal mode.  Just read the ID register and feature bits directly.
-    uint64_t isar0 = 0, isar1 = 0, pfr0 = 0, svefr0 = 0, midr = 0;
+    uint64_t isar0 = 0, isar1 = 0, pfr0 = 0, pfr1 = 0, svefr0 = 0, midr = 0;
     ARM_COMPUTE_GET_FEATURE_REG(isar0, ID_AA64ISAR0_EL1);
     ARM_COMPUTE_GET_FEATURE_REG(isar1, ID_AA64ISAR1_EL1);
     ARM_COMPUTE_GET_FEATURE_REG(pfr0, ID_AA64PFR0_EL1);
+    ARM_COMPUTE_GET_FEATURE_REG(pfr1, ID_AA64PFR1_EL1);
     ARM_COMPUTE_GET_FEATURE_REG(midr, MIDR_EL1);
     if((pfr0 >> 32) & 0xf)
     {
         svefr0 = get_sve_feature_reg();
     }
 
-    CpuIsaInfo            isa = init_cpu_isa_from_regs(isar0, isar1, pfr0, svefr0, midr);
+    CpuIsaInfo            isa = init_cpu_isa_from_regs(isar0, isar1, pfr0, pfr1, svefr0, midr);
     std::vector<CpuModel> cpus_model(1, midr_to_model(midr));
     CpuInfo               info(isa, cpus_model);
     return info;
diff --git a/src/common/cpuinfo/CpuInfo.h b/src/common/cpuinfo/CpuInfo.h
index 135ff96..953e488 100644
--- a/src/common/cpuinfo/CpuInfo.h
+++ b/src/common/cpuinfo/CpuInfo.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021 Arm Limited.
+ * Copyright (c) 2021-2022 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -71,6 +71,14 @@
     {
         return _isa.sve2;
     }
+    bool has_sme() const
+    {
+        return _isa.sme;
+    }
+    bool has_sme2() const
+    {
+        return _isa.sme2;
+    }
     bool has_fp16() const
     {
         return _isa.fp16;
diff --git a/src/common/cpuinfo/CpuIsaInfo.cpp b/src/common/cpuinfo/CpuIsaInfo.cpp
index 6165533..23da54a 100644
--- a/src/common/cpuinfo/CpuIsaInfo.cpp
+++ b/src/common/cpuinfo/CpuIsaInfo.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021 Arm Limited.
+ * Copyright (c) 2021-2022 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -42,6 +42,7 @@
 #define ARM_COMPUTE_CPU_FEATURE_HWCAP2_SVEBF16 (1 << 12)
 #define ARM_COMPUTE_CPU_FEATURE_HWCAP2_I8MM (1 << 13)
 #define ARM_COMPUTE_CPU_FEATURE_HWCAP2_BF16 (1 << 14)
+#define ARM_COMPUTE_CPU_FEATURE_HWCAP2_SME (1 << 23)
 
 namespace arm_compute
 {
@@ -69,6 +70,10 @@
     isa.sve  = is_feature_supported(hwcaps, ARM_COMPUTE_CPU_FEATURE_HWCAP_SVE);
     isa.sve2 = is_feature_supported(hwcaps2, ARM_COMPUTE_CPU_FEATURE_HWCAP2_SVE2);
 
+    // Detection of SME from type HWCAP2 in the auxillary vector
+    isa.sme   = is_feature_supported(hwcaps2, ARM_COMPUTE_CPU_FEATURE_HWCAP2_SME);
+    isa.sme2  = isa.sme; // Needs to be set properly
+
     // Data-type support
     isa.fp16    = is_feature_supported(hwcaps, ARM_COMPUTE_CPU_FEATURE_HWCAP_FPHP | ARM_COMPUTE_CPU_FEATURE_HWCAP_ASIMDHP);
     isa.bf16    = is_feature_supported(hwcaps2, ARM_COMPUTE_CPU_FEATURE_HWCAP2_BF16);
@@ -87,7 +92,7 @@
 }
 #endif /* defined(__aarch64__) */
 
-void decode_regs(CpuIsaInfo &isa, const uint64_t isar0, const uint64_t isar1, const uint64_t pfr0, const uint64_t svefr0)
+void decode_regs(CpuIsaInfo &isa, const uint64_t isar0, const uint64_t isar1, const uint64_t pfr0, const uint64_t pfr1, const uint64_t svefr0)
 {
     auto is_supported = [](uint64_t feature_reg, uint8_t feature_pos) -> bool
     {
@@ -97,6 +102,8 @@
     // High-level SIMD support
     isa.sve  = is_supported(pfr0, 32);
     isa.sve2 = is_supported(svefr0, 0);
+    isa.sme  = is_supported(pfr1, 24);
+    isa.sme2 = (((pfr1 >> 24) & 0xf) > 1);
 
     // Data-type support
     isa.fp16    = is_supported(pfr0, 16);
@@ -140,11 +147,11 @@
     return isa;
 }
 
-CpuIsaInfo init_cpu_isa_from_regs(uint64_t isar0, uint64_t isar1, uint64_t pfr0, uint64_t svefr0, uint64_t midr)
+CpuIsaInfo init_cpu_isa_from_regs(uint64_t isar0, uint64_t isar1, uint64_t pfr0, uint64_t pfr1, uint64_t svefr0, uint64_t midr)
 {
     CpuIsaInfo isa;
 
-    decode_regs(isa, isar0, isar1, pfr0, svefr0);
+    decode_regs(isa, isar0, isar1, pfr0, pfr1, svefr0);
 
     const CpuModel model = midr_to_model(midr);
     allowlisted_model_features(isa, model);
@@ -152,4 +159,4 @@
     return isa;
 }
 } // namespace cpuinfo
-} // namespace arm_compute
\ No newline at end of file
+} // namespace arm_compute
diff --git a/src/common/cpuinfo/CpuIsaInfo.h b/src/common/cpuinfo/CpuIsaInfo.h
index a2aace1..b92b653 100644
--- a/src/common/cpuinfo/CpuIsaInfo.h
+++ b/src/common/cpuinfo/CpuIsaInfo.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021 Arm Limited.
+ * Copyright (c) 2021-2022 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -40,6 +40,8 @@
     bool neon{ false };
     bool sve{ false };
     bool sve2{ false };
+    bool sme{ false };
+    bool sme2{ false };
 
     /* Data-type extensions support */
     bool fp16{ false };
@@ -67,13 +69,14 @@
  *
  * @param[in] isar0  Value of Instruction Set Attribute Register 0 (ID_AA64ISAR0_EL1)
  * @param[in] isar1  Value of Instruction Set Attribute Register 1 (ID_AA64ISAR1_EL1)
- * @param[in] pfr0   Value of  Processor Feature Register 0 (ID_AA64PFR0_EL1)
+ * @param[in] pfr0   Value of Processor Feature Register 0 (ID_AA64PFR0_EL1)
+ * @param[in] pfr1   Value of Processor Feature Register 1 (ID_AA64PFR1_EL1)
  * @param[in] svefr0 Value of SVE feature ID register 0 (ID_AA64ZFR0_EL1)
  * @param[in] midr   Value of Main ID Register (MIDR)
  *
  * @return CpuIsaInfo A populated ISA feature structure
  */
-CpuIsaInfo init_cpu_isa_from_regs(uint64_t isar0, uint64_t isar1, uint64_t pfr0, uint64_t svefr0, uint64_t midr);
+CpuIsaInfo init_cpu_isa_from_regs(uint64_t isar0, uint64_t isar1, uint64_t pfr0, uint64_t pfr1, uint64_t svefr0, uint64_t midr);
 } // namespace cpuinfo
 } // namespace arm_compute