IVGCVSW-5328-5329 Fuse Activation
* Added Fused Activation Optimization to both CL and Neon backends.
* Added Fused Activation support to all the CL and Neon workloads
that support it.
* Changed ProfilingTest network to be a Convolution layer
followed by an Abs layer rather than an Activation layer.
* Added IBackendInternal::OptimizeSubgraphView function that can accept a
ModelOptions.
* Network will now call OptimizeSubgraphView passing in the ModelOptions.
Signed-off-by: Keith Davis <keith.davis@arm.com>
Signed-off-by: Mike Kelly <mike.kelly@arm.com>
Signed-off-by: Teresa Charlin <teresa.charlinreyes@arm.com>
Change-Id: Ib536ac3cbafc7d9b35c139ad9a65b7735262cd9d
diff --git a/src/backends/neon/workloads/NeonConvolution2dWorkload.cpp b/src/backends/neon/workloads/NeonConvolution2dWorkload.cpp
index af6f1ae..fd8be17 100644
--- a/src/backends/neon/workloads/NeonConvolution2dWorkload.cpp
+++ b/src/backends/neon/workloads/NeonConvolution2dWorkload.cpp
@@ -6,6 +6,7 @@
#include "NeonConvolution2dWorkload.hpp"
#include <aclCommon/ArmComputeTensorUtils.hpp>
+#include <aclCommon/ArmComputeUtils.hpp>
#include <armnn/utility/PolymorphicDowncast.hpp>
#include <backendsCommon/CpuTensorHandle.hpp>
#include <neon/workloads/NeonWorkloadUtils.hpp>
@@ -25,7 +26,8 @@
const Convolution2dDescriptor& descriptor,
const TensorInfo& weights,
const Optional<TensorInfo>& biases,
- bool isFastMathEnabled)
+ bool isFastMathEnabled,
+ const ActivationDescriptor* activationDescriptor)
{
const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input, descriptor.m_DataLayout);
const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output, descriptor.m_DataLayout);
@@ -47,6 +49,9 @@
arm_compute::PadStrideInfo layerInfo = BuildArmComputePadStrideInfo(descriptor);
+ const arm_compute::ActivationLayerInfo activationInfo = ConvertActivationDescriptorToAclActivationLayerInfo(
+ activationDescriptor);
+
return arm_compute::NEConvolutionLayer::validate(&aclInputInfo,
&aclWeightsInfo,
optionalAclBiasesInfo,
@@ -54,7 +59,7 @@
layerInfo,
arm_compute::WeightsInfo(),
aclDilationInfo,
- arm_compute::ActivationLayerInfo(),
+ activationInfo,
isFastMathEnabled);
}
@@ -92,6 +97,8 @@
const arm_compute::Size2D aclDilationInfo = BuildArmComputeSize2D(m_Data.m_Parameters.m_DilationX,
m_Data.m_Parameters.m_DilationY);
+ const arm_compute::ActivationLayerInfo activationInfo = ConvertAdditionalInfoToAclActivationLayerInfo(descriptor);
+
auto convolutionLayer = std::make_unique<arm_compute::NEConvolutionLayer>(memoryManager);
convolutionLayer->configure(&input,
m_KernelTensor.get(),
@@ -100,7 +107,7 @@
padStrideInfo,
arm_compute::WeightsInfo(),
aclDilationInfo,
- arm_compute::ActivationLayerInfo(),
+ activationInfo,
isFastMathEnabled);
m_ConvolutionMethod =
@@ -110,7 +117,7 @@
padStrideInfo,
arm_compute::WeightsInfo(),
aclDilationInfo,
- arm_compute::ActivationLayerInfo(),
+ activationInfo,
isFastMathEnabled);
m_ConvolutionLayer.reset(convolutionLayer.release());