COMPMID-1867: Add NEON/SVE GEMM Hybrid kernels.

Change-Id: Ib40a9921e7f9a6a8be6c38872d6b3a0f24ed0cd3
Reviewed-on: https://review.mlplatform.org/515
Reviewed-by: Anthony Barbier <Anthony.barbier@arm.com>
Tested-by: Arm Jenkins <bsgcomp@arm.com>
diff --git a/src/core/NEON/kernels/arm_gemm/utils.hpp b/src/core/NEON/kernels/arm_gemm/utils.hpp
index a1fc00e..8b96c32 100644
--- a/src/core/NEON/kernels/arm_gemm/utils.hpp
+++ b/src/core/NEON/kernels/arm_gemm/utils.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2019 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -24,9 +24,7 @@
 
 #pragma once
 
-#ifdef __ARM_FEATURE_SVE
-#include <arm_sve.h>
-#endif
+#include <cstddef>
 
 // Macro for unreachable code (e.g. impossible default cases on switch)
 #define UNREACHABLE(why)  __builtin_unreachable()
@@ -49,13 +47,43 @@
     }
 }
 
+namespace arm_gemm {
+namespace utils {
+namespace {
+
+#ifdef __ARM_FEATURE_SVE
+template<size_t sz>
+inline unsigned long get_vector_length_sz() {
+    unsigned long v;
+
+    __asm (
+        "cntb	%0"
+        : "=r" (v)
+    );
+
+    return v / sz;
+}
+
+#define VEC_LEN_SPEC(sz, opcode) template <> inline unsigned long get_vector_length_sz<sz>() { unsigned long v; __asm ( opcode " %0" : "=r" (v)); return v; }
+
+VEC_LEN_SPEC(8, "cntd")
+VEC_LEN_SPEC(4, "cntw")
+VEC_LEN_SPEC(2, "cnth")
+VEC_LEN_SPEC(1, "cntb")
+#endif
+
+} // anonymous namespace
+
 template <typename T>
 inline unsigned long get_vector_length() {
 #ifdef __ARM_FEATURE_SVE
-    const unsigned long length = svcntb();
+    return get_vector_length_sz<sizeof(T)>();
 #else
-    const unsigned long length = 16;
+    return 16 / sizeof(T);
 #endif
+}
 
-    return length / sizeof(T);
-}
\ No newline at end of file
+} // utils namespace
+} // arm_gemm namespace
+
+using namespace arm_gemm::utils;
\ No newline at end of file