COMPMID-2161 [NEON] Create IWeightManager class

Change-Id: I1a9a46da2f98e896b825099151b56d1d8271dd31
Signed-off-by: Michalis Spyrou <michalis.spyrou@arm.com>
Reviewed-on: https://review.mlplatform.org/c/1915
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com>
Tested-by: Arm Jenkins <bsgcomp@arm.com>
diff --git a/arm_compute/runtime/CL/functions/CLConvertFullyConnectedWeights.h b/arm_compute/runtime/CL/functions/CLConvertFullyConnectedWeights.h
index 9bfade4..43abb67 100644
--- a/arm_compute/runtime/CL/functions/CLConvertFullyConnectedWeights.h
+++ b/arm_compute/runtime/CL/functions/CLConvertFullyConnectedWeights.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018 ARM Limited.
+ * Copyright (c) 2018-2019 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -41,6 +41,8 @@
      * @param[out] output               The converted weights tensor. Shape and Data Type: Same as @p input.
      * @param[in]  original_input_shape Shape of the original input tensor (the one entering fully connected layer).
      * @param[in]  data_layout          The data layout the weights have been trained in.
+     *
+     * @return A status
      */
     void configure(const ICLTensor *input, ICLTensor *output, const TensorShape &original_input_shape, DataLayout data_layout);
     /** Static function to check if given info will lead to a valid configuration of @ref CLConvertFullyConnectedWeights
diff --git a/arm_compute/runtime/CL/functions/CLFullyConnectedLayer.h b/arm_compute/runtime/CL/functions/CLFullyConnectedLayer.h
index 7cf7d95..d54304e 100644
--- a/arm_compute/runtime/CL/functions/CLFullyConnectedLayer.h
+++ b/arm_compute/runtime/CL/functions/CLFullyConnectedLayer.h
@@ -34,6 +34,7 @@
 #include "arm_compute/runtime/CL/functions/CLGEMM.h"
 #include "arm_compute/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.h"
 #include "arm_compute/runtime/CL/functions/CLGEMMLowpOutputStage.h"
+#include "arm_compute/runtime/IWeightsManager.h"
 #include "arm_compute/runtime/MemoryGroup.h"
 
 namespace arm_compute
@@ -76,7 +77,7 @@
 {
 public:
     /** Constructor */
-    CLFullyConnectedLayer(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
+    CLFullyConnectedLayer(std::shared_ptr<IMemoryManager> memory_manager = nullptr, IWeightsManager *weights_manager = nullptr);
     /** Prevent instances of this class from being copied (As this class contains pointers) */
     CLFullyConnectedLayer(const CLFullyConnectedLayer &) = delete;
     /** Default move constructor */
diff --git a/arm_compute/runtime/CL/functions/CLGEMMConvolutionLayer.h b/arm_compute/runtime/CL/functions/CLGEMMConvolutionLayer.h
index d29a31a..0b27c82 100644
--- a/arm_compute/runtime/CL/functions/CLGEMMConvolutionLayer.h
+++ b/arm_compute/runtime/CL/functions/CLGEMMConvolutionLayer.h
@@ -94,7 +94,7 @@
 class CLGEMMConvolutionLayer : public IFunction
 {
 public:
-    /** Default constructor
+    /** Constructor
      *
      * @param[in] memory_manager (Optional) Memory manager.
      */
diff --git a/arm_compute/runtime/GLES_COMPUTE/functions/GCFullyConnectedLayer.h b/arm_compute/runtime/GLES_COMPUTE/functions/GCFullyConnectedLayer.h
index 6fcebd6..3a13e65 100644
--- a/arm_compute/runtime/GLES_COMPUTE/functions/GCFullyConnectedLayer.h
+++ b/arm_compute/runtime/GLES_COMPUTE/functions/GCFullyConnectedLayer.h
@@ -30,6 +30,7 @@
 #include "arm_compute/core/GLES_COMPUTE/kernels/GCTransposeKernel.h"
 #include "arm_compute/runtime/GLES_COMPUTE/GCTensor.h"
 #include "arm_compute/runtime/GLES_COMPUTE/IGCSimpleFunction.h"
+#include "arm_compute/runtime/IWeightsManager.h"
 #include "arm_compute/runtime/MemoryGroup.h"
 
 namespace arm_compute
@@ -64,7 +65,7 @@
 {
 public:
     /** Constructor */
-    GCFullyConnectedLayer(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
+    GCFullyConnectedLayer(std::shared_ptr<IMemoryManager> memory_manager = nullptr, IWeightsManager *weights_manager = nullptr);
     /** Prevent instances of this class from being copied (As this class contains pointers) */
     GCFullyConnectedLayer(const GCFullyConnectedLayer &) = delete;
     /** Default move constructor */
diff --git a/arm_compute/runtime/ITransformWeights.h b/arm_compute/runtime/ITransformWeights.h
new file mode 100644
index 0000000..6376c30
--- /dev/null
+++ b/arm_compute/runtime/ITransformWeights.h
@@ -0,0 +1,117 @@
+/*
+ * Copyright (c) 2019 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_ITRANSFORMWEIGHTS_H__
+#define __ARM_COMPUTE_ITRANSFORMWEIGHTS_H__
+
+#include <atomic>
+
+namespace arm_compute
+{
+// Forward declarations
+class ITensor;
+
+/** Weights tensor transform interface
+ *  In order to identify the different reshape functions, each reshape function has
+ * to generate a unique id. We use the following conversion using an unsigned 32bit value:
+ *
+ * Lower two bits store the target:
+ * 00 -> NEON
+ * 01 -> CL
+ * 10 -> GLES
+ * 11 -> Unused
+ *
+ * Five bits store the id of the reshape function:
+ * 00000 -> FullyConnectedLayerReshapeWeights
+ * 00001 -> ConvertFullyConnectedWeights
+ * 00010 -> ConvolutionLayerReshapeWeights
+ * 00011 -> DepthwiseConvolutionLayerReshapeWeights
+ * 00100 -> GEMMReshapeLHSMatrixKernel
+ * 00101 -> GEMMReshapeRHSMatrixKernel
+ *
+ * Rest of the bits are used for identifying special cases such as assembly functions and extra
+ * arguments in the reshape kernels.
+ *
+ * */
+class ITransformWeights
+{
+public:
+    /** Default Constructor */
+    ITransformWeights() = default;
+    /** Default Destructor */
+    virtual ~ITransformWeights() = default;
+    /** Prevent instances of this class to be copy constructed */
+    ITransformWeights(const ITransformWeights &) = delete;
+    /** Prevent instances of this class to be copied */
+    ITransformWeights &operator=(const ITransformWeights &) = delete;
+    /** Allow instances of this class to be move constructed */
+    ITransformWeights(ITransformWeights &&) = default;
+    /** Allow instances of this class to be moved */
+    ITransformWeights &operator=(ITransformWeights &&) = default;
+
+    /** Get a pointer to the transformed weights
+     *
+     * @return The pointer to the transformed ITensor weights
+     */
+    virtual ITensor *get_weights() = 0;
+    /** Function that returns a unique id of the reshape function
+     *
+     * @return The computed unique id
+     */
+    virtual uint32_t uid() = 0;
+    /** Run the transformation function */
+    virtual void run() = 0;
+    /** Release transformed weights memory */
+    virtual void release() = 0;
+    /** Increase the object's refcount */
+    void increase_refcount()
+    {
+        ++_num_refcount;
+    }
+
+    /** Decrease the object's refcount and return the updated value
+     *
+     * @return The updated refcount
+     * */
+    int32_t decrease_refcount()
+    {
+        return --_num_refcount;
+    }
+
+    /** Function that returns a flag on whether the weights are reshaped or not
+     *
+     * @return True if the function is reshaped
+     */
+    bool is_reshape_run()
+    {
+        return _reshape_run;
+    }
+
+protected:
+    std::atomic<int32_t> _num_refcount{ 0 };
+    bool                 _reshape_run{ false };
+};
+
+} // arm_compute
+
+#endif /*__ARM_COMPUTE_ITRANSFORMWEIGHTS_H__ */
\ No newline at end of file
diff --git a/arm_compute/runtime/IWeightsManager.h b/arm_compute/runtime/IWeightsManager.h
new file mode 100644
index 0000000..2d61b89
--- /dev/null
+++ b/arm_compute/runtime/IWeightsManager.h
@@ -0,0 +1,85 @@
+/*
+ * Copyright (c) 2019 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __ARM_COMPUTE_IWEIGHTSMANAGER_H__
+#define __ARM_COMPUTE_IWEIGHTSMANAGER_H__
+
+#include "arm_compute/core/ITensor.h"
+#include "arm_compute/runtime/ITransformWeights.h"
+
+#include <map>
+
+namespace arm_compute
+{
+/** Weights manager interface to handle weights transformations */
+class IWeightsManager
+{
+public:
+    /** Constructor */
+    IWeightsManager();
+    /** Default Destructor */
+    virtual ~IWeightsManager() = default;
+    /** Prevent instances of this class to be copy constructed */
+    IWeightsManager(const IWeightsManager &) = delete;
+    /** Prevent instances of this class to be copied */
+    IWeightsManager &operator=(const IWeightsManager &) = delete;
+    /** Allow instances of this class to be move constructed */
+    IWeightsManager(IWeightsManager &&) = default;
+    /** Allow instances of this class to be moved */
+    IWeightsManager &operator=(IWeightsManager &&) = default;
+
+    /** Start managing a weights tensor
+     *
+     * @param[in] weights Pointer to the weights tensor to be managed
+     * @param[in] parent  Parent node in case where the weights are coming from a previous reshape function
+     */
+    void manage(const ITensor *weights, ITransformWeights *parent = nullptr);
+    /** Run the reshape function.
+     *
+     * @param[in] weights           Pointer to the weights tensor we want to reshape
+     * @param[in] weights_transform Weights transformation object
+     *
+     * @return The reshaped tensor
+     */
+    ITensor *run(const ITensor *weights, ITransformWeights *weights_transform);
+    /** Acquire the requested reshape tensor of the selected weights
+     *
+     * @param[in] weights           Pointer to the weights tensor to be managed
+     * @param[in] weights_transform Weights transformation object
+     */
+    ITensor *acquire(const ITensor *weights, ITransformWeights *weights_transform);
+    /** Check if the weights are managed
+     *
+     * @param[in] weights Pointer to the weights tensor we want to check if managed
+     *
+     * @return True if the weights tensor is managed else false
+     */
+    bool are_weights_managed(const ITensor *weights);
+
+private:
+    std::map<const ITensor *, std::vector<ITransformWeights *>> _managed_weights;
+    std::map<const ITensor *, ITransformWeights *>              _managed_weights_parents;
+};
+} // arm_compute
+#endif /*__ARM_COMPUTE_IWEIGHTSMANAGER_H__ */
\ No newline at end of file
diff --git a/arm_compute/runtime/NEON/functions/NEConvertFullyConnectedWeights.h b/arm_compute/runtime/NEON/functions/NEConvertFullyConnectedWeights.h
index 8f26142..50a86bd 100644
--- a/arm_compute/runtime/NEON/functions/NEConvertFullyConnectedWeights.h
+++ b/arm_compute/runtime/NEON/functions/NEConvertFullyConnectedWeights.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018 ARM Limited.
+ * Copyright (c) 2018-2019 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -26,7 +26,9 @@
 
 #include "arm_compute/core/NEON/kernels/NEConvertFullyConnectedWeightsKernel.h"
 #include "arm_compute/runtime/IFunction.h"
+#include "arm_compute/runtime/ITransformWeights.h"
 #include "arm_compute/runtime/NEON/NEScheduler.h"
+#include "arm_compute/runtime/Tensor.h"
 
 namespace arm_compute
 {
@@ -52,6 +54,8 @@
      * @param[in] output               The converted weights tensor info. Shape and Data Type: Same as @p input.
      * @param[in] original_input_shape Shape of the original input tensor (the one entering fully connected layer).
      * @param[in] data_layout          The data layout the weights have been trained in.
+     *
+     * @return A Status
      */
     static Status validate(const ITensorInfo *input, const ITensorInfo *output, const TensorShape &original_input_shape, DataLayout data_layout);
 
@@ -61,5 +65,45 @@
 private:
     NEConvertFullyConnectedWeightsKernel _kernel;
 };
-}
+
+namespace weights_transformations
+{
+/** Basic function to run @ref NEConvertFullyConnectedWeightsKernel. */
+class NEConvertFullyConnectedWeightsManaged : public ITransformWeights
+{
+public:
+    void run() override
+    {
+        _output.allocator()->allocate();
+        _func.run();
+        _reshape_run = true;
+    }
+
+    void release() override
+    {
+        _output.allocator()->free();
+    }
+
+    ITensor *get_weights() override
+    {
+        return &_output;
+    }
+
+    uint32_t uid() override
+    {
+        return _uid;
+    }
+
+    void configure(const ITensor *input, const TensorShape &original_input_shape, DataLayout data_layout)
+    {
+        _func.configure(input, &_output, original_input_shape, data_layout);
+    }
+
+private:
+    static constexpr uint32_t      _uid = 0x4;
+    Tensor                         _output{};
+    NEConvertFullyConnectedWeights _func{};
+};
+} // namespace weights_transformations
+} // namespace arm_compute
 #endif /* __ARM_COMPUTE_NECONVERTFULLYCONNECTEDWEIGHTS_H__ */
diff --git a/arm_compute/runtime/NEON/functions/NEDeconvolutionLayer.h b/arm_compute/runtime/NEON/functions/NEDeconvolutionLayer.h
index 360bb23..6880bbb 100644
--- a/arm_compute/runtime/NEON/functions/NEDeconvolutionLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEDeconvolutionLayer.h
@@ -73,7 +73,7 @@
 class NEDeconvolutionLayer : public IFunction
 {
 public:
-    /** Default constructor */
+    /** Constructor */
     NEDeconvolutionLayer(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
 
     /** Prevent instances of this class from being copied (As this class contains pointers) */
diff --git a/arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h b/arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h
index 56ce274..b80e0e4 100644
--- a/arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2018 ARM Limited.
+ * Copyright (c) 2017-2019 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -63,6 +63,46 @@
     static Status validate(const ITensorInfo *input, const ITensorInfo *output);
 };
 
+namespace weights_transformations
+{
+/** Basic function to manage the reshape weights generated from @ref NEFullyConnectedLayerReshapeWeights */
+class NEFullyConnectedLayerReshapeWeightsManaged : public ITransformWeights
+{
+public:
+    void run() override
+    {
+        _output.allocator()->allocate();
+        _func.run();
+        _reshape_run = true;
+    }
+
+    void release() override
+    {
+        _output.allocator()->free();
+    }
+
+    ITensor *get_weights() override
+    {
+        return &_output;
+    }
+
+    uint32_t uid() override
+    {
+        return _uid;
+    }
+
+    void configure(const ITensor *input)
+    {
+        _func.configure(input, &_output);
+    }
+
+private:
+    static constexpr uint32_t           _uid = 0x0;
+    Tensor                              _output{};
+    NEFullyConnectedLayerReshapeWeights _func{};
+};
+} // namespace weights_transformations
+
 /** Basic function to compute a Fully Connected layer on NEON. This function calls the following NEON kernels:
  *  -# @ref NEIm2ColKernel (called when the input comes from a convolutional layer)
  *  -# @ref NEFullyConnectedLayerReshapeWeights (if @p are_weights_reshaped is set to false and transpose_weights is set to true ) (called once)
@@ -75,7 +115,7 @@
 {
 public:
     /** Constructor */
-    NEFullyConnectedLayer(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
+    NEFullyConnectedLayer(std::shared_ptr<IMemoryManager> memory_manager = nullptr, IWeightsManager *weights_manager = nullptr);
     /** Prevent instances of this class from being copied (As this class contains pointers) */
     NEFullyConnectedLayer(const NEFullyConnectedLayer &) = delete;
     /** Default move constructor */
@@ -128,25 +168,28 @@
     void configure_conv_fc(const ITensor *input, const ITensor *weights, ITensor *output);
     void configure_mm(const ITensor *input, const ITensor *weights, ITensor *output);
 
-    MemoryGroup                                         _memory_group;
-    NEFlattenLayerKernel                                _flatten_kernel;
-    NEConvertFullyConnectedWeights                      _convert_weights;
-    NEFullyConnectedLayerReshapeWeights                 _reshape_weights_function;
-    NEGEMM                                              _mm_gemm;
-    NEGEMMLowpMatrixMultiplyCore                        _mm_gemmlowp;
-    NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint _gemmlowp_output_stage;
-    NEGEMMMatrixAccumulateBiasesKernel                  _accumulate_biases_kernel;
-    Tensor                                              _flatten_output;
-    Tensor                                              _gemmlowp_output;
-    Tensor                                              _converted_weights_output;
-    Tensor                                              _reshape_weights_output;
-    const ITensor                                      *_original_weights;
-    bool                                                _are_weights_converted;
-    bool                                                _are_weights_reshaped;
-    bool                                                _is_fc_after_conv;
-    bool                                                _accumulate_biases;
-    bool                                                _is_quantized;
-    bool                                                _is_prepared;
+    MemoryGroup                                                         _memory_group;
+    IWeightsManager                                                    *_weights_manager;
+    NEFlattenLayerKernel                                                _flatten_kernel;
+    NEConvertFullyConnectedWeights                                      _convert_weights;
+    weights_transformations::NEConvertFullyConnectedWeightsManaged      _convert_weights_managed;
+    NEFullyConnectedLayerReshapeWeights                                 _reshape_weights_function;
+    weights_transformations::NEFullyConnectedLayerReshapeWeightsManaged _reshape_weights_managed_function;
+    NEGEMM                                                              _mm_gemm;
+    NEGEMMLowpMatrixMultiplyCore                                        _mm_gemmlowp;
+    NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint                 _gemmlowp_output_stage;
+    NEGEMMMatrixAccumulateBiasesKernel                                  _accumulate_biases_kernel;
+    Tensor                                                              _flatten_output;
+    Tensor                                                              _gemmlowp_output;
+    Tensor                                                              _converted_weights_output;
+    Tensor                                                              _reshape_weights_output;
+    const ITensor                                                      *_original_weights;
+    bool                                                                _are_weights_converted;
+    bool                                                                _are_weights_reshaped;
+    bool                                                                _is_fc_after_conv;
+    bool                                                                _accumulate_biases;
+    bool                                                                _is_quantized;
+    bool                                                                _is_prepared;
 };
 } // namespace arm_compute
 #endif /* __ARM_COMPUTE_NEFULLYCONNECTEDLAYER_H__ */
diff --git a/arm_compute/runtime/NEON/functions/NEGEMM.h b/arm_compute/runtime/NEON/functions/NEGEMM.h
index 7f9e318..d947be1 100644
--- a/arm_compute/runtime/NEON/functions/NEGEMM.h
+++ b/arm_compute/runtime/NEON/functions/NEGEMM.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2018 ARM Limited.
+ * Copyright (c) 2017-2019 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -31,6 +31,7 @@
 #include "arm_compute/core/NEON/kernels/NEGEMMTranspose1xWKernel.h"
 #include "arm_compute/runtime/IFunction.h"
 #include "arm_compute/runtime/IMemoryManager.h"
+#include "arm_compute/runtime/IWeightsManager.h"
 #include "arm_compute/runtime/MemoryGroup.h"
 #include "arm_compute/runtime/NEON/functions/NEGEMMAssemblyDispatch.h"
 #include "arm_compute/runtime/Tensor.h"
@@ -51,7 +52,7 @@
 {
 public:
     /** Constructor */
-    NEGEMM(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
+    NEGEMM(std::shared_ptr<IMemoryManager> memory_manager = nullptr, IWeightsManager *weights_manager = nullptr);
     /** Prevent instances of this class from being copied (As this class contains pointers) */
     NEGEMM(const NEGEMM &) = delete;
     /** Default move constructor */
@@ -96,6 +97,7 @@
 
 private:
     MemoryGroup                _memory_group;
+    IWeightsManager           *_weights_manager;
     NEGEMMInterleave4x4Kernel  _interleave_kernel;
     NEGEMMTranspose1xWKernel   _transpose_kernel;
     NEGEMMMatrixMultiplyKernel _mm_kernel;
diff --git a/arm_compute/runtime/NEON/functions/NEGEMMAssemblyDispatch.h b/arm_compute/runtime/NEON/functions/NEGEMMAssemblyDispatch.h
index ec4f700..83e495e 100644
--- a/arm_compute/runtime/NEON/functions/NEGEMMAssemblyDispatch.h
+++ b/arm_compute/runtime/NEON/functions/NEGEMMAssemblyDispatch.h
@@ -27,6 +27,7 @@
 #include "arm_compute/core/NEON/kernels/assembly/NEGEMMAssemblyWrapperKernel.h"
 #include "arm_compute/runtime/IFunction.h"
 #include "arm_compute/runtime/IMemoryManager.h"
+#include "arm_compute/runtime/IWeightsManager.h"
 #include "arm_compute/runtime/MemoryGroup.h"
 #include "arm_compute/runtime/Tensor.h"
 
@@ -38,9 +39,8 @@
 class NEGEMMAssemblyDispatch : public IFunction
 {
 public:
-    /** Default constructor */
-    NEGEMMAssemblyDispatch(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
-
+    /** Constructor */
+    NEGEMMAssemblyDispatch(std::shared_ptr<IMemoryManager> memory_manager = nullptr, IWeightsManager *weights_manager = nullptr);
     /** Prevent instances of this class from being copy constructed */
     NEGEMMAssemblyDispatch(const NEGEMMAssemblyDispatch &) = delete;
     /** Prevent instances of this class from being copied */
@@ -79,8 +79,9 @@
 
     /** Interface for the arm_gemm fallback */
     std::unique_ptr<IFallback>      _arm_gemm;
-    MemoryGroup                     _memory_group;   /**< Function memory group */
-    std::shared_ptr<IMemoryManager> _memory_manager; /**< Copy of the memory manager used to create the memory group to be used when instantiating new functions */
+    MemoryGroup                     _memory_group;    /**< Function memory group */
+    std::shared_ptr<IMemoryManager> _memory_manager;  /**< Copy of the memory manager used to create the memory group to be used when instantiating new functions */
+    IWeightsManager                *_weights_manager; /**< Pointer to the weights manager */
 public:
     /** If supported create an ACL function else fallback to the arm_gemm function.
      *
@@ -117,6 +118,5 @@
     void prepare() override;
     void run() override;
 };
-
 } // namespace arm_compute
 #endif /* __ARM_COMPUTE_NEGEMMASSEMBLYDISPATCH_H__ */
diff --git a/arm_compute/runtime/NEON/functions/NEGEMMConvolutionLayer.h b/arm_compute/runtime/NEON/functions/NEGEMMConvolutionLayer.h
index ace924f..dccc35f 100644
--- a/arm_compute/runtime/NEON/functions/NEGEMMConvolutionLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEGEMMConvolutionLayer.h
@@ -32,6 +32,7 @@
 #include "arm_compute/core/NEON/kernels/NEIm2ColKernel.h"
 #include "arm_compute/core/NEON/kernels/NEWeightsReshapeKernel.h"
 #include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/IWeightsManager.h"
 #include "arm_compute/runtime/MemoryGroup.h"
 #include "arm_compute/runtime/NEON/functions/NEActivationLayer.h"
 #include "arm_compute/runtime/NEON/functions/NEGEMM.h"
@@ -54,6 +55,14 @@
 public:
     /** Constructor */
     NEConvolutionLayerReshapeWeights();
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEConvolutionLayerReshapeWeights(const NEConvolutionLayerReshapeWeights &) = delete;
+    /** Default move constructor */
+    NEConvolutionLayerReshapeWeights(NEConvolutionLayerReshapeWeights &&) = default;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEConvolutionLayerReshapeWeights &operator=(const NEConvolutionLayerReshapeWeights &) = delete;
+    /** Default move assignment operator */
+    NEConvolutionLayerReshapeWeights &operator=(NEConvolutionLayerReshapeWeights &&) = default;
     /** Set the input and output tensors.
      *
      * @param[in]  weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. Data type supported: QASYMM8/F16/F32.
@@ -78,6 +87,52 @@
     NEWeightsReshapeKernel _weights_reshape_kernel;
 };
 
+namespace weights_transformations
+{
+/** Basic function to manage the reshape weights generated from @ref NEConvolutionLayerReshapeWeights */
+class NEConvolutionLayerReshapeWeightsTransform : public ITransformWeights
+{
+public:
+    void configure(const ITensor *input, const ITensor *biases)
+    {
+        _bias_bit = (biases != nullptr) ? 1 : 0;
+        _func.configure(input, biases, &_output);
+    }
+
+    void run() override
+    {
+        _output.allocator()->allocate();
+        _func.run();
+        _reshape_run = true;
+    }
+
+    ITensor *get_weights() override
+    {
+        return &_output;
+    }
+
+    void release() override
+    {
+        _output.allocator()->free();
+    }
+
+    uint32_t uid() override
+    {
+        return ((0x8) | (_bias_bit << 7));
+    }
+
+    bool is_reshape_run()
+    {
+        return _reshape_run;
+    }
+
+private:
+    Tensor                           _output{};
+    NEConvolutionLayerReshapeWeights _func{};
+    int32_t                          _bias_bit{ 0 };
+};
+} // namespace weights_transformations
+
 /** Basic function to compute the convolution layer. This function calls the following NEON kernels/functions:
  *
  * -# @ref NEIm2ColKernel
@@ -92,7 +147,7 @@
 {
 public:
     /** Constructor */
-    NEGEMMConvolutionLayer(const std::shared_ptr<IMemoryManager> &memory_manager = nullptr);
+    NEGEMMConvolutionLayer(const std::shared_ptr<IMemoryManager> &memory_manager = nullptr, IWeightsManager *weights_manager = nullptr);
     /** Prevent instances of this class from being copied (As this class contains pointers) */
     NEGEMMConvolutionLayer(const NEGEMMConvolutionLayer &) = delete;
     /** Default move constructor */
@@ -187,15 +242,17 @@
     static Status validate_gemm3d(const ITensorInfo *input_info, const ActivationLayerInfo &act_info, int gemm_3d_depth, bool skip_im2col);
 
 private:
-    MemoryGroup                      _memory_group;
-    NEConvolutionLayerReshapeWeights _reshape_weights;
-    NEIm2ColKernel                   _im2col_kernel;
-    NEGEMM                           _mm_gemm;
-    NEGEMMLowpMatrixMultiplyCore     _mm_gemmlowp;
-    NECol2ImKernel                   _col2im_kernel;
-    NEActivationLayer                _activationlayer_function;
-    NEArithmeticAdditionKernel       _add_bias_kernel;
-    NEReshapeLayer                   _reshape_layer;
+    MemoryGroup                                                        _memory_group;
+    IWeightsManager                                                   *_weights_manager;
+    NEConvolutionLayerReshapeWeights                                   _reshape_weights;
+    weights_transformations::NEConvolutionLayerReshapeWeightsTransform _reshape_weights_managed;
+    NEIm2ColKernel                                                     _im2col_kernel;
+    NEGEMM                                                             _mm_gemm;
+    NEGEMMLowpMatrixMultiplyCore                                       _mm_gemmlowp;
+    NECol2ImKernel                                                     _col2im_kernel;
+    NEActivationLayer                                                  _activationlayer_function;
+    NEArithmeticAdditionKernel                                         _add_bias_kernel;
+    NEReshapeLayer                                                     _reshape_layer;
 
     const ITensor *_original_weights;
 
diff --git a/arm_compute/runtime/NEON/functions/NERNNLayer.h b/arm_compute/runtime/NEON/functions/NERNNLayer.h
index ec39439..978c445 100644
--- a/arm_compute/runtime/NEON/functions/NERNNLayer.h
+++ b/arm_compute/runtime/NEON/functions/NERNNLayer.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018 ARM Limited.
+ * Copyright (c) 2018-2019 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -86,7 +86,7 @@
     NEGEMM                     _gemm_state_f;
     NEArithmeticAdditionKernel _add_kernel;
     NEActivationLayerKernel    _activation_kernel;
-    NEFullyConnectedLayer      _fully_connected_kernel;
+    NEFullyConnectedLayer      _fully_connected;
     NECopyKernel               _copy_kernel;
     Tensor                     _fully_connected_out;
     Tensor                     _gemm_output;
diff --git a/arm_compute/runtime/NEON/functions/assembly/NEGEMMInterleavedWrapper.h b/arm_compute/runtime/NEON/functions/assembly/NEGEMMInterleavedWrapper.h
index ad89e1f..d3dda9a 100644
--- a/arm_compute/runtime/NEON/functions/assembly/NEGEMMInterleavedWrapper.h
+++ b/arm_compute/runtime/NEON/functions/assembly/NEGEMMInterleavedWrapper.h
@@ -32,6 +32,7 @@
 #include "arm_compute/runtime/IFunction.h"
 #include "arm_compute/runtime/IMemoryManager.h"
 #include "arm_compute/runtime/IScheduler.h"
+#include "arm_compute/runtime/IWeightsManager.h"
 #include "arm_compute/runtime/MemoryGroup.h"
 #include "arm_compute/runtime/Tensor.h"
 
@@ -94,8 +95,8 @@
 class NEGEMMInterleavedWrapper : public IFunction
 {
 public:
-    NEGEMMInterleavedWrapper(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
-    ~NEGEMMInterleavedWrapper()                                             = default;
+    NEGEMMInterleavedWrapper(std::shared_ptr<IMemoryManager> memory_manager = nullptr, IWeightsManager *weights_manager = nullptr);
+    ~NEGEMMInterleavedWrapper() = default;
 
     NEGEMMInterleavedWrapper(const NEGEMMInterleavedWrapper &) = delete;
     NEGEMMInterleavedWrapper &operator=(const NEGEMMInterleavedWrapper &) = delete;