COMPMID-1381: Cleaned up the AssemblyHelper interface

Introduced a new IFunction for when we'll fork the arm_gemm functions
Increased encapsulation and abstraction of which method is used

Change-Id: I5fd8b14b5c77e7f8ecb09029b5e2eccd10dbdcf4
Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/139108
Tested-by: Jenkins <bsgcomp@arm.com>
Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com>
Reviewed-by: Pablo Tello <pablo.tello@arm.com>
diff --git a/arm_compute/runtime/NEON/functions/NEGEMM.h b/arm_compute/runtime/NEON/functions/NEGEMM.h
index cf059e5..523f1d3 100644
--- a/arm_compute/runtime/NEON/functions/NEGEMM.h
+++ b/arm_compute/runtime/NEON/functions/NEGEMM.h
@@ -32,10 +32,9 @@
 #include "arm_compute/runtime/IFunction.h"
 #include "arm_compute/runtime/IMemoryManager.h"
 #include "arm_compute/runtime/MemoryGroup.h"
+#include "arm_compute/runtime/NEON/functions/NEGEMMAssemblyDispatch.h"
 #include "arm_compute/runtime/Tensor.h"
 
-#include "arm_compute/runtime/NEON/AssemblyHelper.h"
-
 #include <memory>
 
 namespace arm_compute
@@ -86,12 +85,10 @@
     NEGEMMInterleave4x4Kernel  _interleave_kernel;
     NEGEMMTranspose1xWKernel   _transpose_kernel;
     NEGEMMMatrixMultiplyKernel _mm_kernel;
-    AssemblyKernelGlueF32      _asm_glue;
+    NEGEMMAssemblyDispatchF32  _asm_glue;
     NEGEMMMatrixAdditionKernel _ma_kernel;
     Tensor                     _tmp_a;
     Tensor                     _tmp_b;
-    Tensor                     _workspace;
-    Tensor                     _B_pretransposed;
     const ITensor             *_original_b;
     bool                       _run_vector_matrix_multiplication;
     bool                       _run_addition;