IVGCVSW-5328-5329 Fuse Activation
* Added Fused Activation Optimization to both CL and Neon backends.
* Added Fused Activation support to all the CL and Neon workloads
that support it.
* Changed ProfilingTest network to be a Convolution layer
followed by an Abs layer rather than an Activation layer.
* Added IBackendInternal::OptimizeSubgraphView function that can accept a
ModelOptions.
* Network will now call OptimizeSubgraphView passing in the ModelOptions.
Signed-off-by: Keith Davis <keith.davis@arm.com>
Signed-off-by: Mike Kelly <mike.kelly@arm.com>
Signed-off-by: Teresa Charlin <teresa.charlinreyes@arm.com>
Change-Id: Ib536ac3cbafc7d9b35c139ad9a65b7735262cd9d
diff --git a/src/backends/neon/workloads/NeonAdditionWorkload.cpp b/src/backends/neon/workloads/NeonAdditionWorkload.cpp
index cb0c8a4..9300b31 100644
--- a/src/backends/neon/workloads/NeonAdditionWorkload.cpp
+++ b/src/backends/neon/workloads/NeonAdditionWorkload.cpp
@@ -7,6 +7,8 @@
#include "NeonWorkloadUtils.hpp"
#include <aclCommon/ArmComputeTensorUtils.hpp>
+#include <aclCommon/ArmComputeUtils.hpp>
+
#include <armnn/utility/PolymorphicDowncast.hpp>
#include <backendsCommon/CpuTensorHandle.hpp>
@@ -17,16 +19,21 @@
arm_compute::Status NeonAdditionWorkloadValidate(const TensorInfo& input0,
const TensorInfo& input1,
- const TensorInfo& output)
+ const TensorInfo& output,
+ const ActivationDescriptor* activationDescriptor)
{
const arm_compute::TensorInfo aclInput0 = armcomputetensorutils::BuildArmComputeTensorInfo(input0);
const arm_compute::TensorInfo aclInput1 = armcomputetensorutils::BuildArmComputeTensorInfo(input1);
const arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output);
+ const arm_compute::ActivationLayerInfo activationInfo = ConvertActivationDescriptorToAclActivationLayerInfo(
+ activationDescriptor);
+
return arm_compute::NEArithmeticAddition::validate(&aclInput0,
&aclInput1,
&aclOutput,
- arm_compute::ConvertPolicy::SATURATE);
+ arm_compute::ConvertPolicy::SATURATE,
+ activationInfo);
}
@@ -40,8 +47,10 @@
arm_compute::ITensor& input2 = PolymorphicDowncast<IAclTensorHandle*>(m_Data.m_Inputs[1])->GetTensor();
arm_compute::ITensor& output = PolymorphicDowncast<IAclTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
+ const arm_compute::ActivationLayerInfo activationInfo = ConvertAdditionalInfoToAclActivationLayerInfo(descriptor);
+
auto layer = std::make_unique<arm_compute::NEArithmeticAddition>();
- layer->configure(&input1, &input2, &output, arm_compute::ConvertPolicy::SATURATE);
+ layer->configure(&input1, &input2, &output, arm_compute::ConvertPolicy::SATURATE, activationInfo);
m_AddLayer.reset(layer.release());
}