COMPMID-1773: Replaced Gemm wrapper for NEGEMM in NEWinograd
Change-Id: I68c648a5246fcdc67a496602089f93d65eb1d601
diff --git a/arm_compute/runtime/NEON/functions/NEWinogradConvolutionLayer.h b/arm_compute/runtime/NEON/functions/NEWinogradConvolutionLayer.h
index 5da6331..292c70b 100644
--- a/arm_compute/runtime/NEON/functions/NEWinogradConvolutionLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEWinogradConvolutionLayer.h
@@ -31,7 +31,8 @@
#include "arm_compute/runtime/CPP/functions/CPPPermute.h"
#include "arm_compute/runtime/MemoryGroup.h"
#include "arm_compute/runtime/NEON/functions/NEActivationLayer.h"
-#include "arm_compute/runtime/NEON/functions/NEGEMMAssemblyDispatch.h"
+#include "arm_compute/runtime/NEON/functions/NEGEMM.h"
+
#include "arm_compute/runtime/Tensor.h"
#include <memory>
@@ -103,7 +104,7 @@
private:
MemoryGroup _memory_group;
- NEGEMMAssemblyDispatch _asm_glue;
+ NEGEMM _gemm_function;
std::unique_ptr<INEKernel> _transform_input_kernel;
std::unique_ptr<INEKernel> _transform_output_kernel;
std::unique_ptr<INEKernel> _transform_weights_kernel;
diff --git a/src/runtime/NEON/functions/NEWinogradConvolutionLayer.cpp b/src/runtime/NEON/functions/NEWinogradConvolutionLayer.cpp
index 569db37..c8e3b3b 100644
--- a/src/runtime/NEON/functions/NEWinogradConvolutionLayer.cpp
+++ b/src/runtime/NEON/functions/NEWinogradConvolutionLayer.cpp
@@ -235,7 +235,7 @@
} //namespace
NEWinogradConvolutionLayer::NEWinogradConvolutionLayer(std::shared_ptr<IMemoryManager> memory_manager)
- : _memory_group(memory_manager), _asm_glue(memory_manager), _transform_input_kernel(nullptr), _transform_output_kernel(nullptr), _transform_weights_kernel(nullptr), _activationlayer_function(),
+ : _memory_group(memory_manager), _gemm_function(memory_manager), _transform_input_kernel(nullptr), _transform_output_kernel(nullptr), _transform_weights_kernel(nullptr), _activationlayer_function(),
_permute_input(), _permute_weights(), _permute_output(), _input_workspace(), _output_workspace(), _kernel_storage(), _input_nhwc(), _output_nhwc(), _weights_hwio(), _input(), _weights(), _output(),
_is_prepared(false), _is_activationlayer_enabled(false)
{
@@ -484,8 +484,7 @@
}
_weights_hwio.allocator()->allocate();
-
- _asm_glue.configure(&_input_workspace, &_kernel_storage, &_output_workspace, 1.0f, 0.f, false);
+ _gemm_function.configure(&_input_workspace, &_kernel_storage, nullptr, &_output_workspace, 1.0f, 0.f);
_input_workspace.allocator()->allocate();
_kernel_storage.allocator()->allocate();
_output_workspace.allocator()->allocate();
@@ -525,7 +524,7 @@
NEScheduler::get().schedule(_transform_input_kernel.get(), Window::DimX);
//Run 16 GEMMs in multiple threads, each kernel runs one or more GEMMs
- _asm_glue.run();
+ _gemm_function.run();
// Transform output tensor to the spatial domain
NEScheduler::get().schedule(_transform_output_kernel.get(), Window::DimX);