COMPMID-784: Winograd refactoring

Removed the code that created a subtensor and imported memory from the workspace in the function run() method.

The subtensor is no longer needed because we perform the reordering of the tensors with NEPermute. The call to the method
winograd::Winograd2x2_3x3GEMM<TOut, TIn>::reshape_output() will transform the results from the winograd domain
into the spatial domain and this will be stored in the member _output_nhwc.

Change-Id: Iae09d26c7587cd2eed98968c3ce214e20031038e
Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/115483
Reviewed-by: Anthony Barbier <anthony.barbier@arm.com>
Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com>
Tested-by: Jenkins <bsgcomp@arm.com>
diff --git a/src/core/NEON/kernels/NEWinogradLayerKernel.cpp b/src/core/NEON/kernels/NEWinogradLayerKernel.cpp
index eaf77e6..d17630a 100644
--- a/src/core/NEON/kernels/NEWinogradLayerKernel.cpp
+++ b/src/core/NEON/kernels/NEWinogradLayerKernel.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017, 2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -29,9 +29,12 @@
 #include "arm_compute/core/TensorInfo.h"
 #include "support/ToolchainSupport.h"
 
-#include "src/core/NEON/kernels/winograd/winograd_shim_nchw.hpp"
+#include "src/core/NEON/kernels/winograd/winograd_gemm.hpp"
 
-using T = winograd_shim_nchw::Winograd2x2_3x3GEMM<float, float>;
+namespace
+{
+using T = winograd::Winograd2x2_3x3GEMM<float, float>;
+} // namespace
 
 namespace arm_compute
 {
@@ -50,16 +53,6 @@
 {
 }
 
-void Winograd3x3F32::nchw2nhwc(const Tensor4DShape &input_shape, const PaddingType padding_type, void *working_space, const void *const input)
-{
-    _pimpl->convolver.nchw2nhwc(input_shape, padding_type, working_space, reinterpret_cast<const float *>(input));
-}
-
-void Winograd3x3F32::nhwc2nchw(const Tensor4DShape &input_shape, const PaddingType padding_type, void *working_space, void *const output)
-{
-    _pimpl->convolver.nhwc2nchw(input_shape, padding_type, working_space, reinterpret_cast<float *const>(output));
-}
-
 void Winograd3x3F32::transform_weights(const void *const kernel, void *transform_working_space)
 {
     _pimpl->convolver.transform_weights(reinterpret_cast<const float *>(kernel), transform_working_space);
@@ -82,11 +75,6 @@
 #endif /* __aarch64__ */
 }
 
-std::pair<void *, void *> Winograd3x3F32::get_nhwc_ptrs(const Tensor4DShape &input_shape, const PaddingType padding_type, void *working_space)
-{
-    return _pimpl->convolver.get_nhwc_ptrs(input_shape, padding_type, working_space);
-}
-
 Winograd3x3F32::Winograd3x3F32(const KernelShape &kernel_shape, const Tensor4DShape input_shape, const PaddingType padding_type, void *kernel_storage)
     : _pimpl(support::cpp14::make_unique<Private>(kernel_shape, input_shape, padding_type, kernel_storage))
 {