COMPMID-920: Introduce prepare() stage

Change-Id: I08ddb7f6e061178e7566518b48e4e18f8f078596
Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/129825
Tested-by: Jenkins <bsgcomp@arm.com>
Reviewed-by: Anthony Barbier <anthony.barbier@arm.com>
diff --git a/arm_compute/runtime/CL/functions/CLConvolutionLayer.h b/arm_compute/runtime/CL/functions/CLConvolutionLayer.h
index 5c05334..97998b5 100644
--- a/arm_compute/runtime/CL/functions/CLConvolutionLayer.h
+++ b/arm_compute/runtime/CL/functions/CLConvolutionLayer.h
@@ -106,10 +106,11 @@
                                                     const WeightsInfo &weights_info, const ActivationLayerInfo &act_info, const GPUTarget gpu_target, const Size2D &dilation = Size2D(1U, 1U), bool enable_fast_math = false);
     // Inherited methods overridden:
     void run() override;
+    void prepare() override;
 
 private:
     std::shared_ptr<IMemoryManager> _memory_manager;
-    std::unique_ptr<IFunction>      _function; /**< Function to run */
+    std::unique_ptr<IFunction>      _function;
 };
 }
 #endif /* __ARM_COMPUTE_CLCONVOLUTIONLAYER_H__ */
diff --git a/arm_compute/runtime/CL/functions/CLFullyConnectedLayer.h b/arm_compute/runtime/CL/functions/CLFullyConnectedLayer.h
index 67c0467..7fb5af9 100644
--- a/arm_compute/runtime/CL/functions/CLFullyConnectedLayer.h
+++ b/arm_compute/runtime/CL/functions/CLFullyConnectedLayer.h
@@ -109,6 +109,7 @@
 
     //Inherited methods override
     void run() override;
+    void prepare() override;
 
 private:
     void configure_fc_fc(const ICLTensor *input, const ICLTensor *weights, ICLTensor *output);
diff --git a/arm_compute/runtime/CL/functions/CLGEMM.h b/arm_compute/runtime/CL/functions/CLGEMM.h
index c5d7b86..60ff32c 100644
--- a/arm_compute/runtime/CL/functions/CLGEMM.h
+++ b/arm_compute/runtime/CL/functions/CLGEMM.h
@@ -100,6 +100,7 @@
 
     // Inherited methods overridden:
     void run() override;
+    void prepare() override;
 
 private:
     CLMemoryGroup              _memory_group;
@@ -112,8 +113,8 @@
     const ICLTensor           *_original_b;
     bool                       _is_interleaved_transposed;
     bool                       _run_addition;
-    bool                       _is_first_run;
     bool                       _reshape_b_only_on_first_run;
+    bool                       _is_prepared;
 };
 }
 
diff --git a/arm_compute/runtime/CL/functions/CLGEMMConvolutionLayer.h b/arm_compute/runtime/CL/functions/CLGEMMConvolutionLayer.h
index a24ac3a..3dde529 100644
--- a/arm_compute/runtime/CL/functions/CLGEMMConvolutionLayer.h
+++ b/arm_compute/runtime/CL/functions/CLGEMMConvolutionLayer.h
@@ -153,6 +153,7 @@
 
     // Inherited methods overridden:
     void run() override;
+    void prepare() override;
 
 private:
     /** Configures the appropriate matrix multiply routine
@@ -192,8 +193,8 @@
     CLTensor _tmp_output;
 
     bool _is_quantized;
-    bool _is_first_run;
     bool _is_activationlayer_enabled;
+    bool _is_prepared;
 };
 }
 #endif /* __ARM_COMPUTE_CLGEMMCONVOLUTIONLAYER_H__ */
diff --git a/arm_compute/runtime/CL/functions/CLWinogradConvolutionLayer.h b/arm_compute/runtime/CL/functions/CLWinogradConvolutionLayer.h
index a279769..594d602 100644
--- a/arm_compute/runtime/CL/functions/CLWinogradConvolutionLayer.h
+++ b/arm_compute/runtime/CL/functions/CLWinogradConvolutionLayer.h
@@ -49,6 +49,14 @@
 public:
     /** Default constructor */
     CLWinogradConvolutionLayer(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    CLWinogradConvolutionLayer(const CLWinogradConvolutionLayer &) = delete;
+    /** Default move constructor */
+    CLWinogradConvolutionLayer(CLWinogradConvolutionLayer &&) = default;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    CLWinogradConvolutionLayer &operator=(const CLWinogradConvolutionLayer &) = delete;
+    /** Default move assignment operator */
+    CLWinogradConvolutionLayer &operator=(CLWinogradConvolutionLayer &&) = default;
     /** Set the input and output tensors.
      *
      * @note: This function only works with 3x3 and 5x5 kernels along with unit strides
@@ -92,6 +100,7 @@
 
     // Inherited methods overridden:
     void run() override;
+    void prepare() override;
 
 private:
     CLMemoryGroup                   _memory_group;
@@ -103,7 +112,8 @@
     CLTensor                        _input0;
     CLTensor                        _input1;
     CLTensor                        _batched_mm_output;
-    bool                            _is_first_run;
+    const ICLTensor                *_original_weights;
+    bool                            _is_prepared;
     bool                            _is_activationlayer_enabled;
 };
 }
diff --git a/arm_compute/runtime/IFunction.h b/arm_compute/runtime/IFunction.h
index a4e7ed1..f64b2be 100644
--- a/arm_compute/runtime/IFunction.h
+++ b/arm_compute/runtime/IFunction.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016, 2017 ARM Limited.
+ * Copyright (c) 2016-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -30,6 +30,8 @@
 class IFunction
 {
 public:
+    /** Destructor */
+    virtual ~IFunction() = default;
     /** Run the kernels contained in the function
      *
      * For NEON kernels:
@@ -43,12 +45,18 @@
      * - The queue is then flushed.
      *
      * @note The function will not block until the kernels are executed. It is the user's responsibility to wait.
+     * @note Will call prepare() on first run if hasn't been done
      */
     virtual void run() = 0;
-    /** Destructor
+    /** Prepare the function for executing
      *
+     * Any one off pre-processing step required by the function is handled here
+     *
+     * @note Prepare stage might not need all the function's buffers' backing memory to be available in order to execute
      */
-    virtual ~IFunction() = default;
+    virtual void prepare()
+    {
+    }
 };
 }
 #endif /*__ARM_COMPUTE_IFUNCTION_H__ */