IVGCVSW-2957 MergerLayer subtensor optimization now backend agnostic
+ Update clframework pin
+ Cl and Neon Merger workloads updated to use MemoryLayout agnostic API
+ Workloads only use sub-tensor optimization if ALL input tensors are sub-tensors
+ Refactor LayerSupportCommon code to be a bit more succinct
Change-Id: Ib61ad4ccbd767e924dff07e61022e0cda4069828
Signed-off-by: Derek Lamberti <derek.lamberti@arm.com>
diff --git a/src/backends/cl/workloads/ClMergerWorkload.cpp b/src/backends/cl/workloads/ClMergerWorkload.cpp
index e06d8c5..610acb9 100644
--- a/src/backends/cl/workloads/ClMergerWorkload.cpp
+++ b/src/backends/cl/workloads/ClMergerWorkload.cpp
@@ -9,16 +9,25 @@
#include <cl/ClTensorHandle.hpp>
#include <cl/ClLayerSupport.hpp>
+#include <arm_compute/core/Types.h>
+
#include <boost/polymorphic_pointer_cast.hpp>
namespace armnn
{
using namespace armcomputetensorutils;
+namespace
+{
+size_t CalcAxis(const MergerDescriptor& desc)
+{
+ return (desc.GetNumDimensions() - desc.GetConcatAxis()) - 1;
+}
+} //namespace
+
arm_compute::Status ClMergerWorkloadValidate(const std::vector<const TensorInfo*>& inputs,
const TensorInfo& output,
const MergerDescriptor& descriptor)
-
{
std::vector<arm_compute::TensorInfo> aclInputs;
for (const TensorInfo* input : inputs)
@@ -27,59 +36,65 @@
aclInputs.emplace_back(aclInputInfo);
}
const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output);
- arm_compute::DataLayoutDimension aclAxis = arm_compute::DataLayoutDimension::WIDTH;
-
std::vector<arm_compute::ITensorInfo*> aclInputPtrs;
for (arm_compute::ITensorInfo& input : aclInputs)
{
aclInputPtrs.emplace_back(&input);
}
+ size_t aclAxis = CalcAxis(descriptor);
return arm_compute::CLConcatenateLayer::validate(aclInputPtrs, &aclOutputInfo, aclAxis);
-
}
ClMergerWorkload::ClMergerWorkload(const MergerQueueDescriptor& descriptor, const WorkloadInfo& info)
: BaseWorkload<MergerQueueDescriptor>(descriptor, info)
{
- m_Execute = true;
+ bool allInputsAreSubtensors = true;
- unsigned int innerAxisOrder = descriptor.m_Parameters.GetNumDimensions() - descriptor.m_Parameters.GetConcatAxis();
-
- if (innerAxisOrder != 1)
+ // Check that all inputs are sub-tensors
+ for (auto input : descriptor.m_Inputs)
{
- m_Execute = false;
+ if (!input->GetParent())
+ {
+ // Non sub-tensor input found so we need to execute the merger function
+ allInputsAreSubtensors = false;
+ break;
+ }
+ }
+
+ if (allInputsAreSubtensors)
+ {
+ // Can skip configuring the merger function since it's not executed
return;
}
std::vector<arm_compute::ICLTensor *> aclInputs;
- arm_compute::DataLayout aclDataLayout = ConvertDataLayout(armnn::DataLayout::NCHW);
for (auto input : m_Data.m_Inputs)
{
arm_compute::ICLTensor& aclInput = boost::polymorphic_pointer_downcast<IClTensorHandle>(input)->GetTensor();
- aclInput.info()->set_data_layout(aclDataLayout);
aclInputs.emplace_back(&aclInput);
}
arm_compute::ICLTensor& output = boost::polymorphic_pointer_downcast<IClTensorHandle>(
m_Data.m_Outputs[0])->GetTensor();
- output.info()->set_data_layout(aclDataLayout);
- arm_compute::DataLayoutDimension aclAxis = arm_compute::DataLayoutDimension::WIDTH;
+ // Create the layer function
+ m_Layer.reset(new arm_compute::CLConcatenateLayer());
- m_Layer.configure(aclInputs, &output, aclAxis);
+ // Configure input and output tensors
+ size_t aclAxis = CalcAxis(descriptor.m_Parameters);
+ m_Layer->configure(aclInputs, &output, aclAxis);
- m_Layer.prepare();
-
+ // Prepare
+ m_Layer->prepare();
}
void ClMergerWorkload::Execute() const
{
- if (m_Execute)
+ if (m_Layer)
{
ARMNN_SCOPED_PROFILING_EVENT_CL("ClMergerWorkload_Execute");
- m_Layer.run();
+ m_Layer->run();
}
-
}
} //namespace armnn
\ No newline at end of file
diff --git a/src/backends/cl/workloads/ClMergerWorkload.hpp b/src/backends/cl/workloads/ClMergerWorkload.hpp
index 8189a1b..1c2f823 100644
--- a/src/backends/cl/workloads/ClMergerWorkload.hpp
+++ b/src/backends/cl/workloads/ClMergerWorkload.hpp
@@ -24,8 +24,7 @@
void Execute() const override;
private:
- mutable arm_compute::CLConcatenateLayer m_Layer;
- bool m_Execute;
+ mutable std::unique_ptr<arm_compute::CLConcatenateLayer> m_Layer;
};
} //namespace armnn