COMPMID-2178: Update GEMM assembly code.
Perform offset reduction and requantization within the assembly wrapper.
Change-Id: I5d5b3e1f6f9ef4c71805362c57f88ff199c027a3
Signed-off-by: Georgios Pinitas <georgios.pinitas@arm.com>
Reviewed-on: https://review.mlplatform.org/c/1541
Comments-Addressed: Pablo Marquez <pablo.tello@arm.com>
Reviewed-by: Gian Marco Iodice <gianmarco.iodice@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Tested-by: Arm Jenkins <bsgcomp@arm.com>
diff --git a/src/core/NEON/kernels/arm_gemm/gemv_pretransposed.hpp b/src/core/NEON/kernels/arm_gemm/gemv_pretransposed.hpp
index 21f8278..9206418 100644
--- a/src/core/NEON/kernels/arm_gemm/gemv_pretransposed.hpp
+++ b/src/core/NEON/kernels/arm_gemm/gemv_pretransposed.hpp
@@ -69,8 +69,8 @@
GemvPretransposed & operator= (GemvPretransposed &) = delete;
GemvPretransposed(const GemmArgs<Tr> &args)
- : _Nsize(args._Nsize), _Ksize(args._Ksize), _nmultis(args._nmulti), _trB(args._trB), _beta(args._beta), _ci(args._ci),
- _buffer_per_multi(_Ksize * iceildiv(_Nsize, strategy::A_interleave()) * strategy::A_interleave()) {
+ : _Nsize(args._Nsize), _Ksize(args._Ksize), _nmultis(args._nmulti), _trB(args._trB), _beta(args._beta), _ci(args._ci),
+ _buffer_per_multi(_Ksize * iceildiv(_Nsize, strategy::A_interleave()) * strategy::A_interleave()) {
/* For now don't do any blocking. TODO: figure out if we should. */
if (args._cfg && args._cfg->inner_block_size) {
m_block = args._cfg->inner_block_size;