Refactor: Don't include all ComputeLibrary function definitions everywhere. Just include the function definition that is specifically needed for each workload. Also, tighten up the scope where Compute Library functions are available. Knocks about 30seconds off a 4m30s single-threaded compile of the Neon workloads. Change-Id: Idac438f3bc77ff978295fbc9505cb42447def145

commit: d80a7126b0abdd532a9f731559827a23f2e565e0 [log] [tgz]
author: Matthew Bentham <matthew.bentham@arm.com> Tue Jan 08 17:52:37 2019 +0000
committer: Matthew Bentham <matthew.bentham@arm.com> Tue Jan 08 18:00:12 2019 +0000
tree: e04f6454e2353469dd9806b2a589c54b61dd777d
parent: a1d3c6a49f35d7d3f11cc7e1b588d1d5401bdbf1 [diff] [blame]
diff --git a/src/backends/neon/workloads/NeonFullyConnectedWorkload.cpp b/src/backends/neon/workloads/NeonFullyConnectedWorkload.cpp
index e432a6b..7395270 100644
--- a/src/backends/neon/workloads/NeonFullyConnectedWorkload.cpp
+++ b/src/backends/neon/workloads/NeonFullyConnectedWorkload.cpp

@@ -5,10 +5,13 @@
 
 #include "NeonFullyConnectedWorkload.hpp"
 
+#include "NeonWorkloadUtils.hpp"
 #include <aclCommon/ArmComputeTensorUtils.hpp>
 #include <aclCommon/ArmComputeUtils.hpp>
 #include <backendsCommon/CpuTensorHandle.hpp>
 
+#include <arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h>
+
 namespace armnn
 {
 using namespace armcomputetensorutils;
@@ -45,7 +48,6 @@
 NeonFullyConnectedWorkload::NeonFullyConnectedWorkload(const FullyConnectedQueueDescriptor& descriptor,
     const WorkloadInfo& info, std::shared_ptr<arm_compute::MemoryManagerOnDemand>& memoryManager)
     : BaseWorkload<FullyConnectedQueueDescriptor>(descriptor, info)
-    , m_FullyConnectedLayer(memoryManager)
 {
     m_Data.ValidateInputsOutputs("NeonFullyConnectedWorkload", 1, 1);
 
@@ -64,7 +66,10 @@
     // Construct
     arm_compute::FullyConnectedLayerInfo fc_info;
     fc_info.transpose_weights = m_Data.m_Parameters.m_TransposeWeightMatrix;
-    m_FullyConnectedLayer.configure(&input, m_WeightsTensor.get(), m_BiasesTensor.get(), &output, fc_info);
+
+    auto layer = std::make_unique<arm_compute::NEFullyConnectedLayer>(memoryManager);
+    layer->configure(&input, m_WeightsTensor.get(), m_BiasesTensor.get(), &output, fc_info);
+    m_FullyConnectedLayer.reset(layer.release());
 
     // Allocate
     if (m_Data.m_Weight->GetTensorInfo().GetDataType() == DataType::QuantisedAsymm8)
@@ -90,14 +95,14 @@
 
     // Force Compute Library to perform the necessary copying and reshaping, after which
     // delete all the input tensors that will no longer be needed
-    m_FullyConnectedLayer.prepare();
+    m_FullyConnectedLayer->prepare();
     FreeUnusedTensors();
 }
 
 void NeonFullyConnectedWorkload::Execute() const
 {
     ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonFullyConnectedWorkload_Execute");
-    m_FullyConnectedLayer.run();
+    m_FullyConnectedLayer->run();
 }
 
 void NeonFullyConnectedWorkload::FreeUnusedTensors()
commit	d80a7126b0abdd532a9f731559827a23f2e565e0	[log] [tgz]
author	Matthew Bentham <matthew.bentham@arm.com>	Tue Jan 08 17:52:37 2019 +0000
committer	Matthew Bentham <matthew.bentham@arm.com>	Tue Jan 08 18:00:12 2019 +0000
tree	e04f6454e2353469dd9806b2a589c54b61dd777d
parent	a1d3c6a49f35d7d3f11cc7e1b588d1d5401bdbf1 [diff] [blame]