COMPMID-1145: (API) Introduce prepare() stage (NEON/CL/GLES)

Change-Id: I5b46764f9c3154ec3e3b9c951cc9e6dfbcb81dfb
Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/134255
Reviewed-by: Anthony Barbier <anthony.barbier@arm.com>
Tested-by: Jenkins <bsgcomp@arm.com>
Reviewed-by: Pablo Tello <pablo.tello@arm.com>
Reviewed-by: Michele DiGiorgio <michele.digiorgio@arm.com>
diff --git a/arm_compute/runtime/CL/functions/CLDeconvolutionLayer.h b/arm_compute/runtime/CL/functions/CLDeconvolutionLayer.h
index 8296930..7767b73 100644
--- a/arm_compute/runtime/CL/functions/CLDeconvolutionLayer.h
+++ b/arm_compute/runtime/CL/functions/CLDeconvolutionLayer.h
@@ -94,12 +94,14 @@
 
     // Inherited methods overridden:
     void run() override;
+    void prepare() override;
 
 private:
     CLMemoryGroup                _memory_group;
     CLDeconvolutionLayerUpsample _scale_f;
     CLConvolutionLayer           _conv_f;
     CLTensor                     _scaled_output;
+    bool                         _is_prepared;
 };
 }
 #endif /* __ARM_COMPUTE_CLDECONVOLUTIONLAYER_H__ */
diff --git a/arm_compute/runtime/CL/functions/CLDepthwiseConvolutionLayer.h b/arm_compute/runtime/CL/functions/CLDepthwiseConvolutionLayer.h
index b1eb4b9..229fb24 100644
--- a/arm_compute/runtime/CL/functions/CLDepthwiseConvolutionLayer.h
+++ b/arm_compute/runtime/CL/functions/CLDepthwiseConvolutionLayer.h
@@ -140,6 +140,7 @@
 
     // Inherited methods overriden:
     void run() override;
+    void prepare() override;
 
 private:
     CLDepthwiseIm2ColKernel                   _im2col_kernel;
@@ -153,7 +154,7 @@
     CLTensor                                  _weights_reshaped;
     CLTensor                                  _v2mm_output;
     CLTensor                                  _output_reshaped;
-    bool                                      _is_first_run;
+    bool                                      _is_prepared;
     bool                                      _is_quantized;
     const ICLTensor                          *_original_weights;
 };
diff --git a/arm_compute/runtime/CL/functions/CLDepthwiseSeparableConvolutionLayer.h b/arm_compute/runtime/CL/functions/CLDepthwiseSeparableConvolutionLayer.h
index 27cee5e..a434610 100644
--- a/arm_compute/runtime/CL/functions/CLDepthwiseSeparableConvolutionLayer.h
+++ b/arm_compute/runtime/CL/functions/CLDepthwiseSeparableConvolutionLayer.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -70,6 +70,7 @@
 
     // Inherited methods overriden:
     void run() override;
+    void prepare() override;
 
 private:
     CLDepthwiseConvolutionLayer _depthwise_conv;
diff --git a/arm_compute/runtime/CL/functions/CLGEMMConvolutionLayer.h b/arm_compute/runtime/CL/functions/CLGEMMConvolutionLayer.h
index aaa4326..3dde529 100644
--- a/arm_compute/runtime/CL/functions/CLGEMMConvolutionLayer.h
+++ b/arm_compute/runtime/CL/functions/CLGEMMConvolutionLayer.h
@@ -195,7 +195,6 @@
     bool _is_quantized;
     bool _is_activationlayer_enabled;
     bool _is_prepared;
-    bool _retain_internal_weights;
 };
 }
 #endif /* __ARM_COMPUTE_CLGEMMCONVOLUTIONLAYER_H__ */
diff --git a/arm_compute/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.h b/arm_compute/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.h
index 3976704..f404ccd 100644
--- a/arm_compute/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.h
+++ b/arm_compute/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -53,6 +53,14 @@
 public:
     /** Constructor */
     CLGEMMLowpMatrixMultiplyCore(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    CLGEMMLowpMatrixMultiplyCore(const CLGEMMLowpMatrixMultiplyCore &) = delete;
+    /** Default move constructor */
+    CLGEMMLowpMatrixMultiplyCore(CLGEMMLowpMatrixMultiplyCore &&) = default;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    CLGEMMLowpMatrixMultiplyCore &operator=(const CLGEMMLowpMatrixMultiplyCore &) = delete;
+    /** Default move assignment operator */
+    CLGEMMLowpMatrixMultiplyCore &operator=(CLGEMMLowpMatrixMultiplyCore &&) = default;
     /** Initialise the kernel's inputs, output
      *
      * @note GEMM_LOWP:  low precision GEMM kernel
@@ -83,6 +91,7 @@
 
     // Inherited methods overridden:
     void run() override;
+    void prepare() override;
 
 private:
     CLMemoryGroup                      _memory_group;
@@ -96,11 +105,12 @@
     CLTensor                           _vector_sum_row;
     CLTensor                           _tmp_a;
     CLTensor                           _tmp_b;
+    const ICLTensor                   *_original_b;
     int32_t                            _a_offset;
     int32_t                            _b_offset;
     bool                               _is_interleaved_transposed;
-    bool                               _is_first_run;
     bool                               _reshape_b_only_on_first_run;
+    bool                               _is_prepared;
 };
 }
 #endif /*__ARM_COMPUTE_CLGEMMLOWPMATRIXMULTIPLYCORE_H__ */
diff --git a/arm_compute/runtime/CL/functions/CLLocallyConnectedLayer.h b/arm_compute/runtime/CL/functions/CLLocallyConnectedLayer.h
index b7b2587..c2bb47c 100644
--- a/arm_compute/runtime/CL/functions/CLLocallyConnectedLayer.h
+++ b/arm_compute/runtime/CL/functions/CLLocallyConnectedLayer.h
@@ -90,6 +90,7 @@
 
     // Inherited methods overridden:
     void run() override;
+    void prepare() override;
 
 private:
     CLMemoryGroup                          _memory_group;
@@ -100,7 +101,7 @@
     CLTensor                               _input_im2col_reshaped;
     CLTensor                               _weights_reshaped;
     CLTensor                               _gemm_output;
-    bool                                   _is_first_run;
+    bool                                   _is_prepared;
     const ICLTensor                       *_original_weights;
 };
 }
diff --git a/arm_compute/runtime/CL/functions/CLRNNLayer.h b/arm_compute/runtime/CL/functions/CLRNNLayer.h
index 9f239a9..ab7407d 100644
--- a/arm_compute/runtime/CL/functions/CLRNNLayer.h
+++ b/arm_compute/runtime/CL/functions/CLRNNLayer.h
@@ -69,6 +69,7 @@
 
     // Inherited methods overridden:
     void run() override;
+    void prepare() override;
 
 private:
     CLMemoryGroup              _memory_group;
@@ -80,6 +81,7 @@
     CLTensor                   _fully_connected_out;
     CLTensor                   _gemm_output;
     CLTensor                   _add_output;
+    bool                       _is_prepared;
 };
 }
 #endif /* __ARM_COMPUTE_CLRNN_LAYER_H__ */