IVGCVSW-5328-5329 Fuse Activation

 * Added Fused Activation Optimization to both CL and Neon backends.
 * Added Fused Activation support to all the CL and Neon workloads
   that support it.
 * Changed ProfilingTest network to be a Convolution layer
   followed by an Abs layer rather than an Activation layer.
 * Added IBackendInternal::OptimizeSubgraphView function that can accept a
   ModelOptions.
 * Network will now call OptimizeSubgraphView passing in the ModelOptions.

Signed-off-by: Keith Davis <keith.davis@arm.com>
Signed-off-by: Mike Kelly <mike.kelly@arm.com>
Signed-off-by: Teresa Charlin <teresa.charlinreyes@arm.com>
Change-Id: Ib536ac3cbafc7d9b35c139ad9a65b7735262cd9d
diff --git a/src/backends/cl/workloads/ClMultiplicationWorkload.cpp b/src/backends/cl/workloads/ClMultiplicationWorkload.cpp
index e9b75c3..46a1c4b 100644
--- a/src/backends/cl/workloads/ClMultiplicationWorkload.cpp
+++ b/src/backends/cl/workloads/ClMultiplicationWorkload.cpp
@@ -4,8 +4,12 @@
 //
 
 #include "ClMultiplicationWorkload.hpp"
-#include <cl/ClTensorHandle.hpp>
+
+#include <aclCommon/ArmComputeUtils.hpp>
 #include <backendsCommon/CpuTensorHandle.hpp>
+
+#include <cl/ClTensorHandle.hpp>
+
 #include "ClWorkloadUtils.hpp"
 
 namespace armnn
@@ -13,7 +17,8 @@
 
 arm_compute::Status ClMultiplicationWorkloadValidate(const TensorInfo& input0,
                                                      const TensorInfo& input1,
-                                                     const TensorInfo& output)
+                                                     const TensorInfo& output,
+                                                     const ActivationDescriptor* activationDescriptor)
 {
     const arm_compute::TensorInfo aclInput1 = armcomputetensorutils::BuildArmComputeTensorInfo(input0);
     const arm_compute::TensorInfo aclInput2 = armcomputetensorutils::BuildArmComputeTensorInfo(input1);
@@ -23,6 +28,9 @@
                           arm_compute::ConvertPolicy::SATURATE :
                           arm_compute::ConvertPolicy::WRAP;
 
+    const arm_compute::ActivationLayerInfo activationInfo = ConvertActivationDescriptorToAclActivationLayerInfo(
+            activationDescriptor);
+
     // At the time of writing, configure() will fail if a rounding policy other than TO_ZERO is supplied to it,
     // when providing a scale of 1.0 for F32 tensors, even though the provided rounding policy appears to be
     // ignored for F32 tensors.
@@ -31,7 +39,8 @@
                                                             &aclOutput,
                                                             1.0f,
                                                             convertPolicy,
-                                                            arm_compute::RoundingPolicy::TO_ZERO);
+                                                            arm_compute::RoundingPolicy::TO_ZERO,
+                                                            activationInfo);
 }
 
 
@@ -50,13 +59,16 @@
                           arm_compute::ConvertPolicy::SATURATE :
                           arm_compute::ConvertPolicy::WRAP;
 
+    const arm_compute::ActivationLayerInfo activationInfo = ConvertAdditionalInfoToAclActivationLayerInfo(descriptor);
+
     // Construct
     m_PixelWiseMultiplication.configure(&input0,
                                         &input1,
                                         &output,
                                         1.0f,
                                         convertPolicy,
-                                        arm_compute::RoundingPolicy::TO_NEAREST_EVEN);
+                                        arm_compute::RoundingPolicy::TO_NEAREST_EVEN,
+                                        activationInfo);
 }
 
 void ClMultiplicationWorkload::Execute() const