COMPMID-1248 Enabled memory manager in NEWinogradConvolutionLayer

Change-Id: I7bbab53f18a42f0879d80122a52bb6bdca4b8631
Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/142413
Tested-by: Jenkins <bsgcomp@arm.com>
Reviewed-by: Gian Marco Iodice <gianmarco.iodice@arm.com>
diff --git a/src/runtime/NEON/functions/NEGEMMAssemblyDispatch.cpp b/src/runtime/NEON/functions/NEGEMMAssemblyDispatch.cpp
index 8ba620f..39fee1b 100644
--- a/src/runtime/NEON/functions/NEGEMMAssemblyDispatch.cpp
+++ b/src/runtime/NEON/functions/NEGEMMAssemblyDispatch.cpp
@@ -159,7 +159,7 @@
      * @param[in] memory_group   Tensor memory group.
      * @param[in] alignment      Workspace memory alignment.
      */
-    void allocate_workspace(size_t workspace_size, MemoryGroup *memory_group, size_t alignment);
+    void allocate_workspace(size_t workspace_size, MemoryGroup &memory_group, size_t alignment);
 
     /** Assembly Gemm kernel */
     std::unique_ptr<arm_gemm::GemmCommon<TypeInput, TypeOutput>> _gemm_kernel_asm{ nullptr };
@@ -204,8 +204,7 @@
     {
         // Allocate workspace
         const unsigned int alignment = 4096;
-        //FIXME: is memory_group ever null ?
-        allocate_workspace(workspace_size, &memory_group, alignment);
+        allocate_workspace(workspace_size, memory_group, alignment);
     }
 
     //if we disable this code below in brackets then ConvLayer deadlocks when threads > 1 and
@@ -256,14 +255,11 @@
 }
 
 template <typename TypeInput, typename TypeOutput>
-void Fallback<TypeInput, TypeOutput>::allocate_workspace(size_t workspace_size, MemoryGroup *memory_group, size_t alignment)
+void Fallback<TypeInput, TypeOutput>::allocate_workspace(size_t workspace_size, MemoryGroup &memory_group, size_t alignment)
 {
     ARM_COMPUTE_ERROR_ON_MSG(workspace_size == 0, "size cannot be 0");
     _workspace.allocator()->init(TensorInfo(TensorShape{ (workspace_size + alignment /* FIXME: remove alignment after COMPMID-1088 */) }, 1, DataType::S8), alignment);
-    if(memory_group != nullptr)
-    {
-        memory_group->manage(&_workspace);
-    }
+    memory_group.manage(&_workspace);
     _workspace.allocator()->allocate();
 }
 
diff --git a/src/runtime/NEON/functions/NEWinogradConvolutionLayer.cpp b/src/runtime/NEON/functions/NEWinogradConvolutionLayer.cpp
index a71eade..11bb2d8 100644
--- a/src/runtime/NEON/functions/NEWinogradConvolutionLayer.cpp
+++ b/src/runtime/NEON/functions/NEWinogradConvolutionLayer.cpp
@@ -263,23 +263,17 @@
     d_info.init(d_shape, 1, data_type, d_strides, 0, output_storage_size);
 
     _input_workspace.allocator()->init(a_info, storage_alignment);
-    _input_workspace.allocator()->allocate();
-
     _kernel_storage.allocator()->init(b_info, storage_alignment);
-    _kernel_storage.allocator()->allocate();
-
     _output_workspace.allocator()->init(d_info, storage_alignment);
-    _output_workspace.allocator()->allocate();
 
     // configure and allocate dst tensor to be used to convert from winograd domain to spatial domain when calling to reshape_output()
     TensorInfo info(TensorShape(_output->info()->dimension(2), _output->info()->dimension(0),
                                 _output->info()->dimension(1), _output->info()->dimension(3)),
                     1, _output->info()->data_type());
     _output_nhwc.allocator()->init(info);
-    _output_nhwc.allocator()->allocate();
 
     // Configure the InputTransform
-
+    _memory_group.manage(&_input_workspace);
     if(data_layout == DataLayout::NCHW)
     {
         // configure the kernel to transform the input tensor from NCHW -> NHWC
@@ -314,6 +308,7 @@
     // Configure OutputTransform
     //The biases tensor has not been allocated at this point in time, the output transform will add the biases to the final result in the run() method
 
+    _memory_group.manage(&_output_workspace);
     if(data_layout == DataLayout::NCHW)
     {
         transform_output_kernel->configure(biases, &_output_workspace,
@@ -328,10 +323,15 @@
     }
 
     _asm_glue.configure(&_input_workspace, &_kernel_storage, &_output_workspace, 1.0f, 0.f, false);
+    _input_workspace.allocator()->allocate();
+    _kernel_storage.allocator()->allocate();
+    _output_workspace.allocator()->allocate();
 
     // Reorder the convoluted output to ACL's ordering NCHW
     _permute_output.configure(&_output_nhwc, _output, PermutationVector(1U, 2U, 0U));
 
+    _output_nhwc.allocator()->allocate();
+
     _transform_input_kernel   = std::move(transform_input_kernel);
     _transform_weights_kernel = std::move(transform_weights_kernel);
     _transform_output_kernel  = std::move(transform_output_kernel);