COMPMID-1004 GLES: Add memory manager to GLES functions

Change-Id: I80fc9c0dd02afd79b501abde751036f9599b7bf2
Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/125103
Tested-by: Jenkins <bsgcomp@arm.com>
Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com>
diff --git a/src/runtime/GLES_COMPUTE/GCBufferAllocator.cpp b/src/runtime/GLES_COMPUTE/GCBufferAllocator.cpp
new file mode 100644
index 0000000..50e3cc7
--- /dev/null
+++ b/src/runtime/GLES_COMPUTE/GCBufferAllocator.cpp
@@ -0,0 +1,50 @@
+/*
+ * Copyright (c) 2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/runtime/GLES_COMPUTE/GCBufferAllocator.h"
+#include "arm_compute/runtime/GLES_COMPUTE/GCTensorAllocator.h"
+
+#include "arm_compute/core/Error.h"
+#include "arm_compute/core/GLES_COMPUTE/OpenGLES.h"
+
+#include <cstddef>
+
+using namespace arm_compute;
+
+void *GCBufferAllocator::allocate(size_t size, size_t alignment)
+{
+    ARM_COMPUTE_UNUSED(alignment);
+    auto *gl_buffer = new GLBufferWrapper();
+    ARM_COMPUTE_GL_CHECK(glBindBuffer(GL_SHADER_STORAGE_BUFFER, gl_buffer->_ssbo_name));
+    ARM_COMPUTE_GL_CHECK(glBufferData(GL_SHADER_STORAGE_BUFFER, static_cast<GLsizeiptr>(size), nullptr, GL_STATIC_DRAW));
+    ARM_COMPUTE_GL_CHECK(glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0));
+
+    return reinterpret_cast<void *>(gl_buffer);
+}
+
+void GCBufferAllocator::free(void *ptr)
+{
+    ARM_COMPUTE_ERROR_ON(ptr == nullptr);
+    auto *gl_buffer = reinterpret_cast<GLBufferWrapper *>(ptr);
+    delete gl_buffer;
+}
diff --git a/src/runtime/GLES_COMPUTE/GCTensor.cpp b/src/runtime/GLES_COMPUTE/GCTensor.cpp
index edbd16d..e193d26 100644
--- a/src/runtime/GLES_COMPUTE/GCTensor.cpp
+++ b/src/runtime/GLES_COMPUTE/GCTensor.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -27,7 +27,7 @@
 using namespace arm_compute;
 
 GCTensor::GCTensor()
-    : _allocator()
+    : _allocator(this)
 {
 }
 
diff --git a/src/runtime/GLES_COMPUTE/GCTensorAllocator.cpp b/src/runtime/GLES_COMPUTE/GCTensorAllocator.cpp
index 694b34f..abd2b48 100644
--- a/src/runtime/GLES_COMPUTE/GCTensorAllocator.cpp
+++ b/src/runtime/GLES_COMPUTE/GCTensorAllocator.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -31,11 +31,16 @@
 
 using namespace arm_compute;
 
-GCTensorAllocator::GCTensorAllocator()
-    : _gl_buffer(), _mapping(nullptr)
+GCTensorAllocator::GCTensorAllocator(GCTensor *owner)
+    : _associated_memory_group(nullptr), _gl_buffer(), _mapping(nullptr), _owner(owner)
 {
 }
 
+GCTensorAllocator::~GCTensorAllocator()
+{
+    _gl_buffer = support::cpp14::make_unique<GLBufferWrapper>();
+}
+
 uint8_t *GCTensorAllocator::data()
 {
     return _mapping;
@@ -43,17 +48,35 @@
 
 void GCTensorAllocator::allocate()
 {
-    _gl_buffer = support::cpp14::make_unique<GLBufferWrapper>();
-    ARM_COMPUTE_GL_CHECK(glBindBuffer(GL_SHADER_STORAGE_BUFFER, _gl_buffer->_ssbo_name));
-    ARM_COMPUTE_GL_CHECK(glBufferData(GL_SHADER_STORAGE_BUFFER, static_cast<GLsizeiptr>(info().total_size()), nullptr, GL_STATIC_DRAW));
-    ARM_COMPUTE_GL_CHECK(glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0));
+    if(_associated_memory_group == nullptr)
+    {
+        _gl_buffer = support::cpp14::make_unique<GLBufferWrapper>();
+        ARM_COMPUTE_GL_CHECK(glBindBuffer(GL_SHADER_STORAGE_BUFFER, _gl_buffer->_ssbo_name));
+        ARM_COMPUTE_GL_CHECK(glBufferData(GL_SHADER_STORAGE_BUFFER, static_cast<GLsizeiptr>(info().total_size()), nullptr, GL_STATIC_DRAW));
+        ARM_COMPUTE_GL_CHECK(glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0));
+    }
+    else
+    {
+        _associated_memory_group->finalize_memory(_owner, reinterpret_cast<void **>(&_gl_buffer), info().total_size());
+    }
     info().set_is_resizable(false);
 }
 
 void GCTensorAllocator::free()
 {
-    _gl_buffer.reset();
-    info().set_is_resizable(true);
+    if(_associated_memory_group == nullptr)
+    {
+        _gl_buffer.reset();
+        info().set_is_resizable(true);
+    }
+}
+
+void GCTensorAllocator::set_associated_memory_group(GCMemoryGroup *associated_memory_group)
+{
+    ARM_COMPUTE_ERROR_ON(associated_memory_group == nullptr);
+    ARM_COMPUTE_ERROR_ON(_associated_memory_group != nullptr);
+    ARM_COMPUTE_ERROR_ON(_gl_buffer.get() != nullptr);
+    _associated_memory_group = associated_memory_group;
 }
 
 uint8_t *GCTensorAllocator::lock()
diff --git a/src/runtime/GLES_COMPUTE/functions/GCConvolutionLayer.cpp b/src/runtime/GLES_COMPUTE/functions/GCConvolutionLayer.cpp
index 5689722..f4c0736 100644
--- a/src/runtime/GLES_COMPUTE/functions/GCConvolutionLayer.cpp
+++ b/src/runtime/GLES_COMPUTE/functions/GCConvolutionLayer.cpp
@@ -91,9 +91,9 @@
     }
 }
 
-GCConvolutionLayer::GCConvolutionLayer()
-    : _reshape_weights(), _input_im2col_kernel(), _input_interleave_kernel(), _mm_kernel(), _output_col2im_kernel(), _fill_border(), _input_im2col_reshaped(), _input_interleaved_reshaped(),
-      _weights_reshaped(), _weights_transposed(), _gemm_output(), _tmp_output(), _append_bias(false), _is_fully_connected_convolution(false), _are_weights_reshaped(false)
+GCConvolutionLayer::GCConvolutionLayer(std::shared_ptr<IMemoryManager> memory_manager)
+    : _memory_group(std::move(memory_manager)), _reshape_weights(), _input_im2col_kernel(), _input_interleave_kernel(), _mm_kernel(), _output_col2im_kernel(), _fill_border(), _input_im2col_reshaped(),
+      _input_interleaved_reshaped(), _weights_reshaped(), _weights_transposed(), _gemm_output(), _tmp_output(), _append_bias(false), _is_fully_connected_convolution(false), _are_weights_reshaped(false)
 {
 }
 
@@ -196,6 +196,7 @@
     // FIXME: input->clone() doesn't work with subtensors for grouped convolutions.
     TensorInfo im2col_reshaped_info(shape_im2col, 1, dt, input->info()->fixed_point_position());
     _input_im2col_reshaped.allocator()->init(im2col_reshaped_info);
+    _memory_group.manage(&_input_im2col_reshaped);
 
     // Create tensor (interleave) to prepare input tensor for GEMM
     if(run_interleaved)
@@ -207,6 +208,7 @@
         // FIXME: input->clone() doesn't work with subtensors for grouped convolutions.
         TensorInfo interleaved_info(shape_interleaved, 1, dt, input->info()->fixed_point_position());
         _input_interleaved_reshaped.allocator()->init(interleaved_info);
+        _memory_group.manage(&_input_interleaved_reshaped);
     }
 
     // Create GEMM output tensor
@@ -218,6 +220,7 @@
     // FIXME: input->clone() doesn't work with subtensors for grouped convolutions.
     TensorInfo info_gemm(shape_gemm, 1, gemm_data_type, input->info()->fixed_point_position());
     _gemm_output.allocator()->init(info_gemm);
+    _memory_group.manage(&_gemm_output);
 
     // Configure kernels
     if(dt == DataType::F16)
@@ -263,6 +266,8 @@
         _reshape_weights.run();
     }
 
+    _memory_group.acquire();
+
     // Run im2col
     GCScheduler::get().dispatch(_fill_border);
     GCScheduler::get().memory_barrier();
@@ -282,4 +287,6 @@
     GCScheduler::get().memory_barrier();
     // Reshape output matrix
     GCScheduler::get().dispatch(_output_col2im_kernel, false);
+
+    _memory_group.release();
 }
diff --git a/src/runtime/GLES_COMPUTE/functions/GCFullyConnectedLayer.cpp b/src/runtime/GLES_COMPUTE/functions/GCFullyConnectedLayer.cpp
index 9e4f0f6..0f8f8e6 100644
--- a/src/runtime/GLES_COMPUTE/functions/GCFullyConnectedLayer.cpp
+++ b/src/runtime/GLES_COMPUTE/functions/GCFullyConnectedLayer.cpp
@@ -38,9 +38,9 @@
     _kernel = std::move(k);
 }
 
-GCFullyConnectedLayer::GCFullyConnectedLayer()
-    : _im2col_kernel(), _reshape_weights_kernel(), _mm_kernel(), _accumulate_biases_kernel(), _im2col_output(), _reshape_weights_output(), _are_weights_reshaped(true), _is_fc_after_conv(true),
-      _accumulate_biases(false)
+GCFullyConnectedLayer::GCFullyConnectedLayer(std::shared_ptr<IMemoryManager> memory_manager)
+    : _memory_group(std::move(memory_manager)), _im2col_kernel(), _reshape_weights_kernel(), _mm_kernel(), _accumulate_biases_kernel(), _im2col_output(), _reshape_weights_output(),
+      _are_weights_reshaped(true), _is_fc_after_conv(true), _accumulate_biases(false)
 {
 }
 
@@ -61,6 +61,7 @@
     _im2col_output.allocator()->init(TensorInfo(shape_im2col, 1, dt));
 
     // Configure im2col kernel
+    _memory_group.manage(&_im2col_output);
     _im2col_kernel.configure(input, &_im2col_output, Size2D(1, 1), PadStrideInfo(1, 1, 0, 0), false);
 
     // Configure matrix multiply kernel
@@ -156,6 +157,7 @@
         _reshape_weights_kernel.run();
     }
 
+    _memory_group.acquire();
     // Linearize input if it comes from a convolutional layer
     if(_is_fc_after_conv)
     {
@@ -177,4 +179,5 @@
 
         GCScheduler::get().dispatch(_accumulate_biases_kernel);
     }
+    _memory_group.release();
 }
diff --git a/src/runtime/GLES_COMPUTE/functions/GCGEMM.cpp b/src/runtime/GLES_COMPUTE/functions/GCGEMM.cpp
index 5122c20..46424a5 100644
--- a/src/runtime/GLES_COMPUTE/functions/GCGEMM.cpp
+++ b/src/runtime/GLES_COMPUTE/functions/GCGEMM.cpp
@@ -40,8 +40,8 @@
 using namespace arm_compute;
 using namespace arm_compute::gles_compute;
 
-GCGEMM::GCGEMM()
-    : _interleave_kernel(), _transpose_kernel(), _mm_kernel(), _ma_kernel(), _tmp_a(), _tmp_b(), _is_interleaved_transposed(false), _run_addition(false)
+GCGEMM::GCGEMM(std::shared_ptr<IMemoryManager> memory_manager)
+    : _memory_group(std::move(memory_manager)), _interleave_kernel(), _transpose_kernel(), _mm_kernel(), _ma_kernel(), _tmp_a(), _tmp_b(), _is_interleaved_transposed(false), _run_addition(false)
 {
 }
 
@@ -88,6 +88,7 @@
 
         TensorInfo info_a(shape_tmp_a, 1, a->info()->data_type(), a->info()->fixed_point_position());
         _tmp_a.allocator()->init(info_a);
+        _memory_group.manage(&_tmp_a);
 
         TensorInfo info_b(shape_tmp_b, 1, b->info()->data_type(), b->info()->fixed_point_position());
         _tmp_b.allocator()->init(info_b);
@@ -118,6 +119,7 @@
 
 void GCGEMM::run()
 {
+    _memory_group.acquire();
     if(_is_interleaved_transposed)
     {
         // Run interleave kernel
@@ -137,4 +139,5 @@
         GCScheduler::get().memory_barrier();
         GCScheduler::get().dispatch(_ma_kernel);
     }
+    _memory_group.release();
 }
diff --git a/src/runtime/GLES_COMPUTE/functions/GCNormalizationLayer.cpp b/src/runtime/GLES_COMPUTE/functions/GCNormalizationLayer.cpp
index fc3882d..13213d2 100644
--- a/src/runtime/GLES_COMPUTE/functions/GCNormalizationLayer.cpp
+++ b/src/runtime/GLES_COMPUTE/functions/GCNormalizationLayer.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -33,8 +33,8 @@
 
 using namespace arm_compute;
 
-GCNormalizationLayer::GCNormalizationLayer()
-    : _squared_input(), _norm_kernel(), _multiply_kernel(), _border_handler()
+GCNormalizationLayer::GCNormalizationLayer(std::shared_ptr<IMemoryManager> memory_manager)
+    : _memory_group(std::move(memory_manager)), _squared_input(), _norm_kernel(), _multiply_kernel(), _border_handler()
 {
 }
 
@@ -43,6 +43,7 @@
     ARM_COMPUTE_ERROR_ON(input == nullptr);
 
     _squared_input.allocator()->init(TensorInfo(input->info()->tensor_shape(), 1, input->info()->data_type()));
+    _memory_group.manage(&_squared_input);
 
     _norm_kernel.configure(input, &_squared_input, output, norm_info);
     _multiply_kernel.configure(input, input, &_squared_input, 1.0f);
@@ -55,9 +56,11 @@
 
 void GCNormalizationLayer::run()
 {
+    _memory_group.acquire();
     GCScheduler::get().dispatch(_multiply_kernel, false);
     GCScheduler::get().memory_barrier();
     GCScheduler::get().dispatch(_border_handler, false);
     GCScheduler::get().memory_barrier();
     GCScheduler::get().dispatch(_norm_kernel, true);
+    _memory_group.release();
 }
diff --git a/src/runtime/GLES_COMPUTE/functions/GCSoftmaxLayer.cpp b/src/runtime/GLES_COMPUTE/functions/GCSoftmaxLayer.cpp
index 5221c5c..1748a59 100644
--- a/src/runtime/GLES_COMPUTE/functions/GCSoftmaxLayer.cpp
+++ b/src/runtime/GLES_COMPUTE/functions/GCSoftmaxLayer.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -29,8 +29,8 @@
 
 using namespace arm_compute;
 
-GCSoftmaxLayer::GCSoftmaxLayer()
-    : _max_kernel(), _shift_exp_sum_kernel(), _norm_kernel(), _max(), _sum(), _tmp()
+GCSoftmaxLayer::GCSoftmaxLayer(std::shared_ptr<IMemoryManager> memory_manager)
+    : _memory_group(std::move(memory_manager)), _max_kernel(), _shift_exp_sum_kernel(), _norm_kernel(), _max(), _sum(), _tmp()
 {
 }
 
@@ -50,6 +50,11 @@
     _max.allocator()->init(tensor_info_max_sum);
     _sum.allocator()->init(tensor_info_max_sum);
 
+    // Manage intermediate buffers
+    _memory_group.manage(&_tmp);
+    _memory_group.manage(&_max);
+    _memory_group.manage(&_sum);
+
     // Configure Kernels
     _max_kernel.configure(input, &_max);
     _shift_exp_sum_kernel.configure(input, &_max, &_tmp, &_sum);
@@ -63,9 +68,13 @@
 
 void GCSoftmaxLayer::run()
 {
+    _memory_group.acquire();
+
     GCScheduler::get().dispatch(_max_kernel, false);
     GCScheduler::get().memory_barrier();
     GCScheduler::get().dispatch(_shift_exp_sum_kernel, false);
     GCScheduler::get().memory_barrier();
     GCScheduler::get().dispatch(_norm_kernel);
+
+    _memory_group.release();
 }