IVGCVSW-6126 ConstTensorsAsInput: Conv2d - Backends

!android-nn-driver:7477

Signed-off-by: Cathal Corbett <cathal.corbett@arm.com>
Change-Id: Ibf633ccccc385bd980934ff829407d21981323ef
diff --git a/src/armnn/Network.cpp b/src/armnn/Network.cpp
index 479e57f..77ad5c4 100644
--- a/src/armnn/Network.cpp
+++ b/src/armnn/Network.cpp
@@ -1832,6 +1832,9 @@
         ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "Optimizer_ConvertConstants");
         Optimizer::Pass(optGraph, MakeOptimizations(ConvertConstantsFloatToHalf()));
         Optimizer::Pass(optGraph, MakeOptimizations(ConvertConstantsHalfToFloat()));
+
+        // Once the constants are converted we can now safely call RedirectMembersToConstantInputs
+        Optimizer::Pass(optGraph, MakeOptimizations(RedirectMembersToConstantInputs()));
     }
     return optNet;
 }
diff --git a/src/armnn/test/optimizations/FuseActivationTests.cpp b/src/armnn/test/optimizations/FuseActivationTests.cpp
index 0cca86f..3b89171 100644
--- a/src/armnn/test/optimizations/FuseActivationTests.cpp
+++ b/src/armnn/test/optimizations/FuseActivationTests.cpp
@@ -56,32 +56,35 @@
                                                float scale = 1.f,
                                                int32_t offset = 0)
     {
+        IgnoreUnused(scale);
+        IgnoreUnused(offset);
+
         Convolution2dDescriptor descriptor;
         descriptor.m_DataLayout  = DataLayout::NHWC;
         descriptor.m_StrideX     = 1;
         descriptor.m_StrideY     = 1;
 
-        std::vector<float> weightsData   = {  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12,
-                                             11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22,
-                                             21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
-                                             31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42};
-        std::vector<T>     weightsVector = armnnUtils::QuantizedVector<T>(weightsData, scale, offset);
-        TensorInfo         weightsInfo(GetWeightsShape(), ArmnnType, scale, offset, true);
-        ConstTensor        weights(weightsInfo, weightsVector);
-        Optional<ConstTensor> optionalBias;
-        ARMNN_NO_DEPRECATE_WARN_BEGIN
-        return network->AddConvolution2dLayer(descriptor, weights, optionalBias, name);
-        ARMNN_NO_DEPRECATE_WARN_END
+        return network->AddConvolution2dLayer(descriptor, name);
     }
 
     static std::vector<IConnectableLayer*> AddConstantLayers(INetwork* network,
                                                              float scale = 1.f,
                                                              int32_t offset = 0)
     {
-        IgnoreUnused(network);
-        IgnoreUnused(scale);
-        IgnoreUnused(offset);
-        return {};
+
+        std::vector<float> weightsData   = {  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12,
+                                             11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22,
+                                             21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
+                                             31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42 };
+        std::vector<T>     weightsVector = armnnUtils::QuantizedVector<T>(weightsData, scale, offset);
+        TensorInfo         weightsInfo(GetWeightsShape(), ArmnnType, scale, offset, true);
+        ConstTensor        weights(weightsInfo, weightsVector);
+
+        IConnectableLayer* weightsLayer = network->AddConstantLayer(weights, "Weights");
+        weightsLayer->GetOutputSlot(0).SetTensorInfo(weightsInfo);
+
+        std::vector<IConnectableLayer*> layers = { weightsLayer };
+        return layers;
     }
 };
 
diff --git a/src/backends/cl/workloads/ClConvolution2dWorkload.cpp b/src/backends/cl/workloads/ClConvolution2dWorkload.cpp
index e3d679a..762645b 100644
--- a/src/backends/cl/workloads/ClConvolution2dWorkload.cpp
+++ b/src/backends/cl/workloads/ClConvolution2dWorkload.cpp
@@ -28,7 +28,7 @@
                                                     bool isFastMathEnabled,
                                                     const ActivationDescriptor* activationDescriptor)
 {
-    // The implemented workload does support both const and non const
+    // The arm_compute::CLConvolutionLayer supports both const and non const
     // weights. However, in the case of non const weights we'd have to call
     // prepare or configure for each inference which we're not setup to do just yet.
     if (!weights.IsConstant())
@@ -39,7 +39,8 @@
 
     const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input, descriptor.m_DataLayout);
     const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output, descriptor.m_DataLayout);
-    const arm_compute::TensorInfo aclWeightsInfo = BuildArmComputeTensorInfo(weights, descriptor.m_DataLayout);
+    arm_compute::TensorInfo aclWeightsInfo = BuildArmComputeTensorInfo(weights, descriptor.m_DataLayout);
+    aclWeightsInfo.set_are_values_constant(weights.IsConstant());
 
     const arm_compute::Size2D aclDilationInfo = BuildArmComputeSize2D(descriptor.m_DilationX,
                                                                       descriptor.m_DilationY);
@@ -57,6 +58,7 @@
                                        "ArmNN ClConvolution2dWorkload does not support non constant bias."};
         }
         aclBiasesInfo = BuildArmComputeTensorInfo(biases.value(), descriptor.m_DataLayout);
+        aclBiasesInfo.set_are_values_constant(biases.value().IsConstant());
         optionalAclBiasesInfo = &aclBiasesInfo;
     }
 
@@ -85,31 +87,31 @@
     , m_ConvolutionLayer(memoryManager)
 {
     ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "ClConvolution2dWorkload");
-    const TensorInfo& weightInfo = m_Data.m_Weight->GetTensorInfo();
-    m_Data.ValidateInputsOutputs("ClConvolution2dWorkload", 1, 1);
-
-    m_KernelTensor = std::make_unique<arm_compute::CLTensor>();
-    BuildArmComputeTensor(*m_KernelTensor, weightInfo, m_Data.m_Parameters.m_DataLayout);
 
     const arm_compute::Size2D aclDilationInfo = BuildArmComputeSize2D(m_Data.m_Parameters.m_DilationX,
                                                                       m_Data.m_Parameters.m_DilationY);
 
-    if (m_Data.m_Parameters.m_BiasEnabled)
-    {
-        m_BiasTensor = std::make_unique<arm_compute::CLTensor>();
-        BuildArmComputeTensor(*m_BiasTensor, m_Data.m_Bias->GetTensorInfo(), m_Data.m_Parameters.m_DataLayout);
-    }
+    uint32_t numInputs = m_Data.m_Parameters.m_BiasEnabled ? 3: 2;
+    m_Data.ValidateInputsOutputs("ClConvolution2dWorkload", numInputs, 1);
 
     arm_compute::ICLTensor& input  = static_cast<IClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
     arm_compute::ICLTensor& output = static_cast<IClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
+    arm_compute::ICLTensor& weights = static_cast<IClTensorHandle*>(m_Data.m_Inputs[1])->GetTensor();
+    arm_compute::ICLTensor* bias  = nullptr;
+    if (m_Data.m_Parameters.m_BiasEnabled)
+    {
+        bias = &static_cast<IClTensorHandle*>(m_Data.m_Inputs[2])->GetTensor();
+    }
 
     // Create Proxy tensor and set the initial tensor handle to it
     m_InputProxy = std::make_unique<ICLTensorProxy>(&input);
     m_OutputProxy = std::make_unique<ICLTensorProxy>(&output);
 
+
     arm_compute::DataLayout aclDataLayout = ConvertDataLayout(m_Data.m_Parameters.m_DataLayout);
     input.info()->set_data_layout(aclDataLayout);
     output.info()->set_data_layout(aclDataLayout);
+    weights.info()->set_data_layout(aclDataLayout);
 
     arm_compute::PadStrideInfo padStrideInfo = BuildArmComputePadStrideInfo(m_Data.m_Parameters);
 
@@ -119,8 +121,8 @@
         ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "ClConvolution2dWorkload_configure");
         m_ConvolutionLayer.configure(clCompileContext,
                                      m_InputProxy.get(),
-                                     m_KernelTensor.get(),
-                                     m_BiasTensor.get(),
+                                     &weights,
+                                     bias,
                                      m_OutputProxy.get(),
                                      padStrideInfo,
                                      arm_compute::WeightsInfo(),
@@ -131,7 +133,7 @@
 
     m_ConvolutionMethod =
         m_ConvolutionLayer.get_convolution_method(input.info(),
-                                                  m_KernelTensor->info(),
+                                                  weights.info(),
                                                   output.info(),
                                                   padStrideInfo,
                                                   arm_compute::WeightsInfo(),
@@ -156,27 +158,12 @@
     ARMNN_REPORT_PROFILING_WORKLOAD_DESC("ClConvolution2dWorkload_Construct",
                                          descriptor.m_Parameters,
                                          detailsInfo,
-                                         this->GetGuid());
-
-    InitializeArmComputeClTensorData(*m_KernelTensor, m_Data.m_Weight);
-
-    if (m_BiasTensor)
-    {
-        InitializeArmComputeClTensorData(*m_BiasTensor, m_Data.m_Bias);
-    }
-
-    // Force Compute Library to perform the necessary copying and reshaping, after which
-    // delete all the input tensors that will no longer be needed
-    {
-        ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "ClConvolution2dWorkload_prepare");
-        m_ConvolutionLayer.prepare();
-    }
-    FreeUnusedTensors();
+                                         GetGuid());
 }
 
 void ClConvolution2dWorkload::Execute() const
 {
-    ARMNN_SCOPED_PROFILING_EVENT_CL_GUID("ClConvolution2dWorkload_Execute", this->GetGuid());
+    ARMNN_SCOPED_PROFILING_EVENT_CL_GUID("ClConvolution2dWorkload_Execute", GetGuid());
     RunClFunction(m_ConvolutionLayer, CHECK_LOCATION());
 }
 
@@ -185,12 +172,6 @@
     return m_ConvolutionMethod;
 }
 
-void ClConvolution2dWorkload::FreeUnusedTensors()
-{
-    FreeTensorIfUnused(m_KernelTensor);
-    FreeTensorIfUnused(m_BiasTensor);
-}
-
 void ClConvolution2dWorkload::Reconfigure()
 {
     arm_compute::ICLTensor& input  = static_cast<IClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
diff --git a/src/backends/cl/workloads/ClConvolution2dWorkload.hpp b/src/backends/cl/workloads/ClConvolution2dWorkload.hpp
index bba92d2..7293c83 100644
--- a/src/backends/cl/workloads/ClConvolution2dWorkload.hpp
+++ b/src/backends/cl/workloads/ClConvolution2dWorkload.hpp
@@ -60,13 +60,8 @@
 private:
     mutable arm_compute::CLConvolutionLayer m_ConvolutionLayer;
 
-    std::unique_ptr<arm_compute::CLTensor> m_KernelTensor;
-    std::unique_ptr<arm_compute::CLTensor> m_BiasTensor;
-
     arm_compute::ConvolutionMethod m_ConvolutionMethod;
 
-    void FreeUnusedTensors();
-
     std::unique_ptr<ICLTensorProxy> m_InputProxy;
     std::unique_ptr<ICLTensorProxy> m_OutputProxy;
 };
diff --git a/src/backends/neon/workloads/NeonConvolution2dWorkload.cpp b/src/backends/neon/workloads/NeonConvolution2dWorkload.cpp
index d5716c8..12d8c46 100644
--- a/src/backends/neon/workloads/NeonConvolution2dWorkload.cpp
+++ b/src/backends/neon/workloads/NeonConvolution2dWorkload.cpp
@@ -29,7 +29,7 @@
                                                       bool isFastMathEnabled,
                                                       const ActivationDescriptor* activationDescriptor)
 {
-    // The implemented workload does support both const and non const
+    // arm_compute::NEConvolutionLayer supports both const and non const
     // weights. However, in the case of non const weights we'd have to call
     // prepare or configure for each inference which we're not setup to do just yet.
     if (!weights.IsConstant())
@@ -40,7 +40,8 @@
 
     const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input, descriptor.m_DataLayout);
     const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output, descriptor.m_DataLayout);
-    const arm_compute::TensorInfo aclWeightsInfo = BuildArmComputeTensorInfo(weights, descriptor.m_DataLayout);
+    arm_compute::TensorInfo aclWeightsInfo = BuildArmComputeTensorInfo(weights, descriptor.m_DataLayout);
+    aclWeightsInfo.set_are_values_constant(weights.IsConstant());
 
     const arm_compute::Size2D aclDilationInfo = BuildArmComputeSize2D(descriptor.m_DilationX,
                                                                       descriptor.m_DilationY);
@@ -58,6 +59,7 @@
                                        "ArmNN NeonConvolution2dWorkload does not support non constant bias."};
         }
         aclBiasesInfo = BuildArmComputeTensorInfo(biases.value(), descriptor.m_DataLayout);
+        aclBiasesInfo.set_are_values_constant(biases.value().IsConstant());
         optionalAclBiasesInfo = &aclBiasesInfo;
     }
 
@@ -86,7 +88,8 @@
 {
     using arm_compute::NEConvolutionLayer;
 
-    m_Data.ValidateInputsOutputs("NeonConvolution2dWorkload", 1, 1);
+    uint32_t numInputs = m_Data.m_Parameters.m_BiasEnabled ? 3: 2;
+    m_Data.ValidateInputsOutputs("NeonConvolution2dWorkload", numInputs, 1);
 
     arm_compute::ITensor& input = PolymorphicDowncast<IAclTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
     arm_compute::ITensor& output = PolymorphicDowncast<IAclTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
@@ -97,7 +100,6 @@
 
     m_KernelTensor = std::make_unique<arm_compute::Tensor>();
     BuildArmComputeTensor(*m_KernelTensor, m_Data.m_Weight->GetTensorInfo(), m_Data.m_Parameters.m_DataLayout);
-
     if (m_Data.m_Parameters.m_BiasEnabled)
     {
         m_BiasTensor = std::make_unique<arm_compute::Tensor>();
@@ -148,7 +150,7 @@
     ARMNN_REPORT_PROFILING_WORKLOAD_DESC("NeonConvolution2dWorkload_Construct",
                                          descriptor.m_Parameters,
                                          detailsInfo,
-                                         this->GetGuid());
+                                         GetGuid());
 
     m_ConvolutionLayer.reset(convolutionLayer.release());
 
@@ -162,7 +164,8 @@
     }
 
     m_ConvolutionLayer->prepare();
-    FreeUnusedTensors();
+    FreeTensorIfUnused(m_KernelTensor);
+    FreeTensorIfUnused(m_BiasTensor);
 }
 
 void NeonConvolution2dWorkload::Execute() const
@@ -176,10 +179,4 @@
     return m_ConvolutionMethod;
 }
 
-void NeonConvolution2dWorkload::FreeUnusedTensors()
-{
-    FreeTensorIfUnused(m_KernelTensor);
-    FreeTensorIfUnused(m_BiasTensor);
-}
-
 } //namespace armnn
diff --git a/src/backends/neon/workloads/NeonConvolution2dWorkload.hpp b/src/backends/neon/workloads/NeonConvolution2dWorkload.hpp
index 93e5cb4..e833f2a 100644
--- a/src/backends/neon/workloads/NeonConvolution2dWorkload.hpp
+++ b/src/backends/neon/workloads/NeonConvolution2dWorkload.hpp
@@ -45,9 +45,6 @@
     std::unique_ptr<arm_compute::Tensor> m_BiasTensor;
 
     arm_compute::ConvolutionMethod m_ConvolutionMethod;
-
-    void FreeUnusedTensors();
-
 };
 
 } //namespace armnn