Update assembly code
This patch brings performance uplift on Cortex-A35.
Resolves: COMPMID-4316
Change-Id: I2b9c02a599373f780dd1b981b821e33bd59a3422
Signed-off-by: Michalis Spyrou <michalis.spyrou@arm.com>
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/5461
Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com>
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
diff --git a/src/core/NEON/kernels/arm_gemm/asmlib.hpp b/src/core/NEON/kernels/arm_gemm/asmlib.hpp
index 0067433..7766656 100644
--- a/src/core/NEON/kernels/arm_gemm/asmlib.hpp
+++ b/src/core/NEON/kernels/arm_gemm/asmlib.hpp
@@ -32,6 +32,7 @@
// "Correct" version
#define ASM_PREFETCH(address) "PRFM PLDL1KEEP, " address "\n"
+#define ASM_PREFETCHU(address) "PRFUM PLDL1KEEP, " address "\n"
#define ASM_PREFETCHL2(address) "PRFM PLDL2KEEP, " address "\n"
#define ASM_PREFETCHW(address) "PRFM PSTL1KEEP, " address "\n"
#define ASM_PREFETCHWL2(address) "PRFM PSTL2KEEP, " address "\n"