IVGCVSW-5568 Revert "IVGCVSW-5563 Fix Crash on model with FullyConnected Sigmoid Activation"

* This reverts commit be25d94aefe53f221304b1f5f344913b708f808b.
* Add Unit Test: any receiver layer + any activation layer in float and QAsymmU8
* Tidy up fuse activation tests

Signed-off-by: Teresa Charlin <teresa.charlinreyes@arm.com>
Change-Id: Ie059d03b85cd17eaaafe5188bb173672a1fb9ae0
diff --git a/src/armnn/test/optimizations/FuseActivationTests.cpp b/src/armnn/test/optimizations/FuseActivationTests.cpp
index 0e85597..f28abff 100644
--- a/src/armnn/test/optimizations/FuseActivationTests.cpp
+++ b/src/armnn/test/optimizations/FuseActivationTests.cpp
@@ -19,10 +19,8 @@
 
 BOOST_AUTO_TEST_SUITE(Optimizer)
 
-namespace
+namespace armnn
 {
-const float   g_qScale  = 1.0f;
-const int32_t g_qOffset = 0;
 
 template<typename T>
 std::vector<T> GetVector(unsigned int size, float initial, float increment)
@@ -40,10 +38,10 @@
     return vector;
 }
 
-template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
+template<DataType ArmnnType, typename T = ResolveType<ArmnnType>>
 struct Convolution2dTest
 {
-    using LayerType = armnn::Convolution2dLayer;
+    using LayerType = Convolution2dLayer;
     static std::string GetReceiverLayerName() { return "Convolution2d"; };
     static const bool isElementWise = false;
 
@@ -55,7 +53,9 @@
     constexpr static const unsigned int outputSize = 36; // batchOut * heightOut * widthOut * channelOut
 
     static IConnectableLayer* AddReceiverLayer(INetwork* network,
-                                               const char* name)
+                                               const char* name,
+                                               float scale = 1.f,
+                                               int32_t offset = 0)
     {
         Convolution2dDescriptor descriptor;
         descriptor.m_BiasEnabled = false;
@@ -67,8 +67,8 @@
                                              11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22,
                                              21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
                                              31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42};
-        std::vector<T>     weightsVector = armnnUtils::QuantizedVector<T>(weightsData, g_qScale, g_qOffset);
-        TensorInfo         weightsInfo(GetWeightsShape(), ArmnnType, g_qScale, g_qOffset);
+        std::vector<T>     weightsVector = armnnUtils::QuantizedVector<T>(weightsData, scale, offset);
+        TensorInfo         weightsInfo(GetWeightsShape(), ArmnnType, scale, offset);
         ConstTensor        weights(weightsInfo, weightsVector);
         Optional<ConstTensor> optionalBias;
 
@@ -76,11 +76,11 @@
     }
 };
 
-template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
-struct DepthwiseConvolution2dTest
+template<DataType ArmnnType, typename T = ResolveType<ArmnnType>>
+struct DWConvolution2dTest
 {
 public:
-    using LayerType = armnn::DepthwiseConvolution2dLayer;
+    using LayerType = DepthwiseConvolution2dLayer;
     static std::string GetReceiverLayerName() { return "DepthwiseConvolution2d"; };
     static const bool isElementWise = false;
 
@@ -92,7 +92,9 @@
     constexpr static const unsigned int outputSize = 108; //batchOut * heightOut * widthOut * channelOut;
 
     static IConnectableLayer* AddReceiverLayer(INetwork* network,
-                                               const char* name)
+                                               const char* name,
+                                               float scale = 1.f,
+                                               int32_t offset = 0)
     {
         DepthwiseConvolution2dDescriptor descriptor;
         descriptor.m_BiasEnabled = false;
@@ -104,8 +106,8 @@
                                             11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22,
                                             21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
                                             31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42};
-        std::vector<T>     weightsVector = armnnUtils::QuantizedVector<T>(weightsData, g_qScale, g_qOffset);
-        TensorInfo         weightsInfo(GetWeightsShape(), ArmnnType, g_qScale, g_qOffset);
+        std::vector<T>     weightsVector = armnnUtils::QuantizedVector<T>(weightsData, scale, offset);
+        TensorInfo         weightsInfo(GetWeightsShape(), ArmnnType, scale, offset);
         ConstTensor        weights(weightsInfo, weightsVector);
         Optional<ConstTensor> optionalBias;
 
@@ -113,11 +115,11 @@
     }
 };
 
-template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
+template<DataType ArmnnType, typename T = ResolveType<ArmnnType>>
 struct FullyConnectedTest
 {
 public:
-    using LayerType = armnn::FullyConnectedLayer;
+    using LayerType = FullyConnectedLayer;
     static std::string GetReceiverLayerName() { return "FullyConnected"; };
     static const bool isElementWise = false;
 
@@ -129,7 +131,9 @@
     constexpr static const unsigned int outputSize = 6;  // batchOut * heightOut * widthOut * channelOut
 
     static IConnectableLayer* AddReceiverLayer(INetwork* network,
-                                               const char* name)
+                                               const char* name,
+                                               float scale = 1.f,
+                                               int32_t offset = 0)
     {
         FullyConnectedDescriptor descriptor;
         descriptor.m_BiasEnabled = false;
@@ -137,8 +141,8 @@
         std::vector<float> weightsData   = { 1,  2,  3,  4,  5,
                                              6,  7,  8,  9, 10,
                                             11, 12, 13, 14, 15};
-        std::vector<T>     weightsVector = armnnUtils::QuantizedVector<T>(weightsData, g_qScale, g_qOffset);
-        TensorInfo         weightsInfo(GetWeightsShape(), ArmnnType, g_qScale, g_qOffset);
+        std::vector<T>     weightsVector = armnnUtils::QuantizedVector<T>(weightsData, scale, offset);
+        TensorInfo         weightsInfo(GetWeightsShape(), ArmnnType, scale, offset);
         ConstTensor        weights(weightsInfo, weightsVector);
         Optional<ConstTensor> optionalBias;
 
@@ -146,11 +150,11 @@
     }
 };
 
-template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
+template<DataType ArmnnType, typename T = ResolveType<ArmnnType>>
 struct BatchNormTest
 {
 public:
-    using LayerType = armnn::BatchNormalizationLayer;
+    using LayerType = BatchNormalizationLayer;
     static std::string GetReceiverLayerName() { return "BatchNorm"; };
     static const bool isElementWise = false;
 
@@ -161,8 +165,13 @@
     constexpr static const unsigned int outputSize = 48; // batchOut * heightOut * widthOut * channelOut
 
     static IConnectableLayer* AddReceiverLayer(INetwork* network,
-                                               const char* name)
+                                               const char* name,
+                                               float scale = 1.f,
+                                               int32_t offset = 0)
     {
+        IgnoreUnused(scale);
+        IgnoreUnused(offset);
+
         BatchNormalizationDescriptor descriptor;
         descriptor.m_DataLayout = DataLayout::NHWC;
 
@@ -181,10 +190,10 @@
     }
 };
 
-template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
+template<DataType ArmnnType, typename T = ResolveType<ArmnnType>>
 struct MultiplicationTest
 {
-    using LayerType = armnn::MultiplicationLayer;
+    using LayerType = MultiplicationLayer;
     static std::string GetReceiverLayerName() { return "Multiplication"; };
     static const bool isElementWise = true;
 
@@ -195,16 +204,21 @@
     constexpr static const unsigned int outputSize = 48; // batchOut * heightOut * widthOut * channelOut
 
     static IConnectableLayer* AddReceiverLayer(INetwork* network,
-                                               const char* name)
+                                               const char* name,
+                                               float scale = 1.f,
+                                               int32_t offset = 0)
     {
+        IgnoreUnused(scale);
+        IgnoreUnused(offset);
+
         return network->AddMultiplicationLayer(name);
     }
 };
 
-template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
+template<DataType ArmnnType, typename T = ResolveType<ArmnnType>>
 struct AdditionTest
 {
-    using LayerType = armnn::AdditionLayer;
+    using LayerType = AdditionLayer;
     static std::string GetReceiverLayerName() { return "Addition"; };
     static const bool isElementWise = true;
 
@@ -215,16 +229,21 @@
     constexpr static const unsigned int outputSize = 48; // batchOut * heightOut * widthOut * channelOut
 
     static IConnectableLayer* AddReceiverLayer(INetwork* network,
-                                               const char* name)
+                                               const char* name,
+                                               float scale = 1.f,
+                                               int32_t offset = 0)
     {
+        IgnoreUnused(scale);
+        IgnoreUnused(offset);
+
         return network->AddAdditionLayer(name);
     }
 };
 
-template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
+template<DataType ArmnnType, typename T = ResolveType<ArmnnType>>
 struct SubtractionTest
 {
-    using LayerType = armnn::SubtractionLayer;
+    using LayerType = SubtractionLayer;
     static std::string GetReceiverLayerName() { return "Subtraction"; };
     static const bool isElementWise = true;
 
@@ -235,16 +254,21 @@
     constexpr static const unsigned int outputSize = 48; // batchOut * heightOut * widthOut * channelOut
 
     static IConnectableLayer* AddReceiverLayer(INetwork* network,
-                                               const char* name)
+                                               const char* name,
+                                               float scale = 1.f,
+                                               int32_t offset = 0)
     {
+        IgnoreUnused(scale);
+        IgnoreUnused(offset);
+
         return network->AddSubtractionLayer(name);
     }
 };
 
-template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
+template<DataType ArmnnType, typename T = ResolveType<ArmnnType>>
 struct DivisionTest
 {
-    using LayerType = armnn::DivisionLayer;
+    using LayerType = DivisionLayer;
     static std::string GetReceiverLayerName() { return "Division"; };
     static const bool isElementWise = true;
 
@@ -255,17 +279,21 @@
     constexpr static const unsigned int outputSize = 48; // batchOut * heightOut * widthOut * channelOut
 
     static IConnectableLayer* AddReceiverLayer(INetwork* network,
-                                               const char* name)
+                                               const char* name,
+                                               float scale = 1.f,
+                                               int32_t offset = 0)
     {
+        IgnoreUnused(scale);
+        IgnoreUnused(offset);
+
         return network->AddDivisionLayer(name);
     }
 };
 
-} // namespace
-
 template<typename LayerTest,
-         armnn::DataType ArmnnType>
-INetworkPtr CreatNetwork(ActivationDescriptor activationDescriptor, bool preventFusing)
+         DataType ArmnnType>
+INetworkPtr CreatNetwork(ActivationDescriptor activationDescriptor, bool preventFusing,
+                         float scale, int32_t offset)
 {
     // Create a network
     INetworkPtr network = INetwork::Create();
@@ -273,7 +301,9 @@
     IConnectableLayer* inputLayer = network->AddInputLayer(0);
 
     IConnectableLayer* receiverLayer = LayerTest::AddReceiverLayer(network.get(),
-                                                                   "receiverLayer");
+                                                                   "receiverLayer",
+                                                                   scale,
+                                                                   offset);
 
     IConnectableLayer* activationLayer = network->AddActivationLayer(activationDescriptor,
                                                                      "activation");
@@ -282,8 +312,8 @@
     IConnectableLayer* output2Layer = preventFusing?network->AddOutputLayer(1):nullptr;
 
     // Define layers information
-    TensorInfo inputInfo(LayerTest::GetInputShape(), ArmnnType, g_qScale, g_qOffset);
-    TensorInfo outputInfo(LayerTest::GetOutputShape(), ArmnnType, g_qScale, g_qOffset);
+    TensorInfo inputInfo(LayerTest::GetInputShape(), ArmnnType, scale, offset);
+    TensorInfo outputInfo(LayerTest::GetOutputShape(), ArmnnType, scale, offset);
 
     // Set layer information
     inputLayer->GetOutputSlot(0).SetTensorInfo(inputInfo);
@@ -308,15 +338,15 @@
 }
 
 template<typename LayerTest,
-         armnn::DataType ArmnnType,
+         DataType ArmnnType,
          typename LayerType = typename LayerTest::LayerType,
-         typename T = armnn::ResolveType<ArmnnType>>
-void FuseActivationIntoPreviousLayerTest(ActivationDescriptor activationDescriptor, float tolerance, armnn::Compute
-backendId)
+         typename T = ResolveType<ArmnnType>>
+void FuseActivationIntoPreviousLayerTest(ActivationDescriptor activationDescriptor, float tolerance, Compute backendId, 
+                                         float scale = 1.f, int32_t offset=0)
 {
     // FIRST NETWORK: Fused
     // Construct ArmNN network
-    INetworkPtr networkFused = CreatNetwork<LayerTest, ArmnnType>(activationDescriptor, false);
+    INetworkPtr networkFused = CreatNetwork<LayerTest, ArmnnType>(activationDescriptor, false, scale, offset);
 
     // Create ArmNN runtime
     IRuntimePtr run = IRuntime::Create(IRuntime::CreationOptions()); // default options
@@ -326,7 +356,7 @@
 
     Graph graphFused = PolymorphicDowncast<OptimizedNetwork*>(optNetFused.get())->GetGraph();
 
-    auto checkFusedConv2d = [](const armnn::Layer* const layer)->bool {
+    auto checkFusedConv2d = [](const Layer* const layer)->bool {
         return IsLayerOfType<LayerType>(layer) &&
             (layer->GetNameStr() == "fused-activation-into-receiverLayer");
     };
@@ -344,7 +374,7 @@
 
     //Creates structures for inputs and outputs.
     std::vector<float> data = GetVector<float>(LayerTest::inputSize, 1.0f, 0.1f);
-    std::vector<T> inputDataFused = armnnUtils::QuantizedVector<T>(data, g_qScale, g_qOffset);
+    std::vector<T> inputDataFused = armnnUtils::QuantizedVector<T>(data, scale, offset);
     std::vector<T> outputDataFused(LayerTest::outputSize);
 
     InputTensors  inputTensorsFused{
@@ -357,7 +387,7 @@
 
     // SECOND NETWORK: NotFused
     // Construct ArmNN network
-    INetworkPtr networkNotFused = CreatNetwork<LayerTest, ArmnnType>(activationDescriptor, true);
+    INetworkPtr networkNotFused = CreatNetwork<LayerTest, ArmnnType>(activationDescriptor, true, scale, offset);
 
     // Create ArmNN runtime
     IRuntimePtr runNotFused = IRuntime::Create(IRuntime::CreationOptions()); // default options
@@ -370,18 +400,18 @@
     BOOST_CHECK(5 == graphNotFused.GetNumLayers());
     BOOST_TEST(CheckSequence(graphNotFused.cbegin(),
                              graphNotFused.cend(),
-                             &IsLayerOfType<armnn::InputLayer>,
+                             &IsLayerOfType<InputLayer>,
                              &IsLayerOfType<LayerType>,
-                             &IsLayerOfType<armnn::ActivationLayer>,
-                             &IsLayerOfType<armnn::OutputLayer>,
-                             &IsLayerOfType<armnn::OutputLayer>));
+                             &IsLayerOfType<ActivationLayer>,
+                             &IsLayerOfType<OutputLayer>,
+                             &IsLayerOfType<OutputLayer>));
 
     // Load network into runtime
     NetworkId networkIdentifierNotFused;
     BOOST_TEST(runNotFused->LoadNetwork(networkIdentifierNotFused, std::move(optNetNotFused)) == Status::Success);
 
     //Creates structures for inputs and outputs.
-    std::vector<T> inputDataNotFused = armnnUtils::QuantizedVector<T>(data, g_qScale, g_qOffset);
+    std::vector<T> inputDataNotFused = armnnUtils::QuantizedVector<T>(data, scale, offset);
     std::vector<T> outputDataNotFused(LayerTest::outputSize);
     std::vector<T> outputData2NotFused(LayerTest::outputSize);
 
@@ -402,6 +432,58 @@
     }
 }
 
+template<typename LayerTest,
+         DataType ArmnnType,
+         typename LayerType = typename LayerTest::LayerType,
+         typename T = ResolveType<ArmnnType>>
+bool FuseActivationSimpleTest(ActivationDescriptor activationDescriptor, Compute backendId, 
+                              float scale = 1.f, int32_t offset = 0)
+{
+    bool success;
+    try
+    {
+        // Construct ArmNN network
+        INetworkPtr networkFused = CreatNetwork<LayerTest, ArmnnType>(activationDescriptor, false, scale, offset);
+
+        // Create ArmNN runtime
+        IRuntimePtr run = IRuntime::Create(IRuntime::CreationOptions()); // default options
+
+        // Optimise ArmNN network
+        IOptimizedNetworkPtr optNetFused = Optimize(*networkFused, {backendId}, run->GetDeviceSpec());
+
+        Graph graphFused = PolymorphicDowncast<OptimizedNetwork*>(optNetFused.get())->GetGraph();
+
+        // Load network into runtime
+        NetworkId networkIdentifier;
+        BOOST_TEST(run->LoadNetwork(networkIdentifier, std::move(optNetFused)) == Status::Success);
+
+        //Creates structures for inputs and outputs.
+        std::vector<float> data           = GetVector<float>(LayerTest::inputSize, 1.0f, 0.1f);
+        std::vector<T>     inputDataFused = armnnUtils::QuantizedVector<T>(data, scale, offset);
+        std::vector<T>     outputDataFused(LayerTest::outputSize);
+
+        InputTensors  inputTensorsFused{
+            {0, ConstTensor(run->GetInputTensorInfo(networkIdentifier, 0), inputDataFused.data())}};
+        OutputTensors outputTensorsFused{
+            {0, Tensor(run->GetOutputTensorInfo(networkIdentifier, 0), outputDataFused.data())}};
+
+        // Execute network
+        run->EnqueueWorkload(networkIdentifier, inputTensorsFused, outputTensorsFused);
+
+        success = true;
+    }
+    catch (const std::exception& e)
+    {
+        std::cerr << e.what() << std::endl;
+        success = false;
+    }
+
+    return success;
+}
+
+} // namespace armnn
+
+using namespace armnn;
 #if defined(ARMCOMPUTENEON_ENABLED)
 // ReLu fused into Receiver Layers Float32
 BOOST_AUTO_TEST_CASE(FuseReLUIntoConvFloat32CpuAccTest)
@@ -410,15 +492,15 @@
     activationDescriptor.m_Function = ActivationFunction::ReLu;
 
     FuseActivationIntoPreviousLayerTest<Convolution2dTest<DataType::Float32>, DataType::Float32>
-        (activationDescriptor, 0.0001f, armnn::Compute::CpuAcc);
+        (activationDescriptor, 0.0001f, Compute::CpuAcc);
 }
 BOOST_AUTO_TEST_CASE(FuseReLUIntoDWConvFloat32CpuAccTest)
 {
     ActivationDescriptor activationDescriptor;
     activationDescriptor.m_Function = ActivationFunction::ReLu;
 
-    FuseActivationIntoPreviousLayerTest<DepthwiseConvolution2dTest<DataType::Float32>, DataType::Float32>
-        (activationDescriptor, 0.0001f, armnn::Compute::CpuAcc);
+    FuseActivationIntoPreviousLayerTest<DWConvolution2dTest<DataType::Float32>, DataType::Float32>
+        (activationDescriptor, 0.0001f, Compute::CpuAcc);
 }
 BOOST_AUTO_TEST_CASE(FuseReLUIntoFullyConnectedFloat32CpuAccTest)
 {
@@ -426,7 +508,7 @@
     activationDescriptor.m_Function = ActivationFunction::ReLu;
 
     FuseActivationIntoPreviousLayerTest<FullyConnectedTest<DataType::Float32>, DataType::Float32>
-        (activationDescriptor, 0.0001f, armnn::Compute::CpuAcc);
+        (activationDescriptor, 0.0001f, Compute::CpuAcc);
 }
 BOOST_AUTO_TEST_CASE(FuseReLUIntoBatchNormFloat32CpuAccTest)
 {
@@ -434,7 +516,7 @@
     activationDescriptor.m_Function = ActivationFunction::ReLu;
 
     FuseActivationIntoPreviousLayerTest<BatchNormTest<DataType::Float32>, DataType::Float32>
-        (activationDescriptor, 0.0001f, armnn::Compute::CpuAcc);
+        (activationDescriptor, 0.0001f, Compute::CpuAcc);
 }
 
 // BoundedReLu fused into Receiver Layers Float32
@@ -446,7 +528,7 @@
     activationDescriptor.m_B = -1.0f;
 
     FuseActivationIntoPreviousLayerTest<Convolution2dTest<DataType::Float32>, DataType::Float32>
-        (activationDescriptor, 0.0001f, armnn::Compute::CpuAcc);
+        (activationDescriptor, 0.0001f, Compute::CpuAcc);
 }
 BOOST_AUTO_TEST_CASE(FuseBoundedReLUIntoDWConvFloat32CpuAccTest)
 {
@@ -455,8 +537,8 @@
     activationDescriptor.m_A = 1.0f;
     activationDescriptor.m_B = -1.0f;
 
-    FuseActivationIntoPreviousLayerTest < DepthwiseConvolution2dTest < DataType::Float32 > , DataType::Float32 >
-        (activationDescriptor, 0.0001f, armnn::Compute::CpuAcc);
+    FuseActivationIntoPreviousLayerTest < DWConvolution2dTest < DataType::Float32 > , DataType::Float32 >
+        (activationDescriptor, 0.0001f, Compute::CpuAcc);
 }
 BOOST_AUTO_TEST_CASE(FuseBoundedReLUIntoFullyConnectedFloat32CpuAccTest)
 {
@@ -466,7 +548,7 @@
     activationDescriptor.m_B = -1.0f;
 
     FuseActivationIntoPreviousLayerTest<FullyConnectedTest<DataType::Float32>, DataType::Float32>
-        (activationDescriptor, 0.0001f, armnn::Compute::CpuAcc);
+        (activationDescriptor, 0.0001f, Compute::CpuAcc);
 }
 BOOST_AUTO_TEST_CASE(FuseBoundedReLUIntoBatchNormFloat32CpuAccTest)
 {
@@ -476,7 +558,7 @@
     activationDescriptor.m_B = -1.0f;
 
     FuseActivationIntoPreviousLayerTest<BatchNormTest<DataType::Float32>, DataType::Float32>
-        (activationDescriptor, 0.0001f, armnn::Compute::CpuAcc);
+        (activationDescriptor, 0.0001f, Compute::CpuAcc);
 }
 
 // ReLU fused into Receiver Layers QAsymmU8
@@ -486,15 +568,15 @@
     activationDescriptor.m_Function = ActivationFunction::ReLu;
 
     FuseActivationIntoPreviousLayerTest<Convolution2dTest<DataType::QAsymmU8>, DataType::QAsymmU8>
-        (activationDescriptor, 0.0001f, armnn::Compute::CpuAcc);
+        (activationDescriptor, 0.0001f, Compute::CpuAcc);
 }
 BOOST_AUTO_TEST_CASE(FuseReLUIntoDWConvQAsymmU8CpuAccTest)
 {
     ActivationDescriptor activationDescriptor;
     activationDescriptor.m_Function = ActivationFunction::ReLu;
 
-    FuseActivationIntoPreviousLayerTest<DepthwiseConvolution2dTest<DataType::QAsymmU8>, DataType::QAsymmU8>
-        (activationDescriptor, 0.0001f, armnn::Compute::CpuAcc);
+    FuseActivationIntoPreviousLayerTest<DWConvolution2dTest<DataType::QAsymmU8>, DataType::QAsymmU8>
+        (activationDescriptor, 0.0001f, Compute::CpuAcc);
 }
 BOOST_AUTO_TEST_CASE(FuseReLUIntoFullyConnectedQAsymmU8CpuAccTest)
 {
@@ -502,7 +584,7 @@
     activationDescriptor.m_Function = ActivationFunction::ReLu;
 
     FuseActivationIntoPreviousLayerTest<FullyConnectedTest<DataType::QAsymmU8>, DataType::QAsymmU8>
-        (activationDescriptor, 0.0001f, armnn::Compute::CpuAcc);
+        (activationDescriptor, 0.0001f, Compute::CpuAcc);
 }
 
 // HardSwish fused into Receiver Layers Float32
@@ -512,7 +594,7 @@
     activationDescriptor.m_Function = ActivationFunction::HardSwish;
 
     FuseActivationIntoPreviousLayerTest<Convolution2dTest<DataType::Float32>, DataType::Float32>
-        (activationDescriptor, 0.0001f, armnn::Compute::CpuAcc);
+        (activationDescriptor, 0.0001f, Compute::CpuAcc);
 }
 
 // TanH fused into Receiver Layers Float32
@@ -522,7 +604,91 @@
     activationDescriptor.m_Function = ActivationFunction::TanH;
 
     FuseActivationIntoPreviousLayerTest<Convolution2dTest<DataType::Float32>, DataType::Float32>
-        (activationDescriptor, 0.0001f, armnn::Compute::CpuAcc);
+        (activationDescriptor, 0.0001f, Compute::CpuAcc);
+}
+
+// Test that all receiver layers follow by all activation layers work, either fused or not fused
+BOOST_AUTO_TEST_CASE(LayerFollowedByActivationFloat32CpuAccTest)
+{
+    ActivationDescriptor activationDescriptor;
+    for (int i = 0; i != 12; ++i)
+    {
+        activationDescriptor.m_Function = static_cast<ActivationFunction>(i);
+        activationDescriptor.m_A = 1.0f;
+        activationDescriptor.m_B = -1.0f;
+        BOOST_CHECK_MESSAGE((FuseActivationSimpleTest<Convolution2dTest<DataType::Float32>, DataType::Float32>
+            (activationDescriptor, Compute::CpuAcc)), "Convolution + Activation function " << i);
+        BOOST_CHECK_MESSAGE((FuseActivationSimpleTest<DWConvolution2dTest<DataType::Float32>, DataType::Float32>
+            (activationDescriptor, Compute::CpuAcc)), "DepthwiseConvolution + Activation function " << i);
+        BOOST_CHECK_MESSAGE((FuseActivationSimpleTest<FullyConnectedTest<DataType::Float32>, DataType::Float32>
+            (activationDescriptor, Compute::CpuAcc)), "FullyConnected + Activation function " << i);
+        BOOST_CHECK_MESSAGE((FuseActivationSimpleTest<BatchNormTest<DataType::Float32>, DataType::Float32>
+            (activationDescriptor, Compute::CpuAcc)), "BatchNorm + Activation function " << i);
+    }
+}
+BOOST_AUTO_TEST_CASE(LayerFollowedByActivationFloat16CpuAccTest)
+{
+    ActivationDescriptor activationDescriptor;
+    for (int i = 0; i != 12; ++i)
+    {
+        activationDescriptor.m_Function = static_cast<ActivationFunction>(i);
+        activationDescriptor.m_A = 1.0f;
+        activationDescriptor.m_B = -1.0f;
+        BOOST_CHECK_MESSAGE((FuseActivationSimpleTest<Convolution2dTest<DataType::Float16>, DataType::Float16>
+            (activationDescriptor, Compute::CpuAcc)), "Convolution + Activation function " << i);
+        BOOST_CHECK_MESSAGE((FuseActivationSimpleTest<DWConvolution2dTest<DataType::Float16>, DataType::Float16>
+            (activationDescriptor, Compute::CpuAcc)), "DepthwiseConvolution + Activation function " << i);
+        BOOST_CHECK_MESSAGE((FuseActivationSimpleTest<FullyConnectedTest<DataType::Float16>, DataType::Float16>
+            (activationDescriptor, Compute::CpuAcc)), "FullyConnected + Activation function " << i);
+        BOOST_CHECK_MESSAGE((FuseActivationSimpleTest<BatchNormTest<DataType::Float16>, DataType::Float16>
+            (activationDescriptor, Compute::CpuAcc)), "BatchNorm + Activation function " << i);
+    }
+}
+BOOST_AUTO_TEST_CASE(LayerFollowedByActivationQAsymmU8CpuAccTest)
+{
+    ActivationDescriptor activationDescriptor;
+
+    activationDescriptor.m_Function = ActivationFunction::Sigmoid;
+    BOOST_CHECK_MESSAGE((FuseActivationSimpleTest<Convolution2dTest<DataType::QAsymmU8>, DataType::QAsymmU8>
+        (activationDescriptor, Compute::CpuAcc, 1.f / 256.f, 0)), "Convolution + Activation function " <<
+        static_cast<int>(activationDescriptor.m_Function));
+    BOOST_CHECK_MESSAGE((FuseActivationSimpleTest<FullyConnectedTest<DataType::QAsymmU8>, DataType::QAsymmU8>
+        (activationDescriptor, Compute::CpuAcc, 1.f / 256.f, 0)), "FullyConnected + Activation function " <<
+        static_cast<int>(activationDescriptor.m_Function));
+
+    activationDescriptor.m_Function = ActivationFunction::TanH;
+    BOOST_CHECK_MESSAGE((FuseActivationSimpleTest<Convolution2dTest<DataType::QAsymmU8>, DataType::QAsymmU8>
+        (activationDescriptor, Compute::CpuAcc, 1.f / 128.f, 128)), "Convolution + Activation function " <<
+        static_cast<int>(activationDescriptor.m_Function));
+    BOOST_CHECK_MESSAGE((FuseActivationSimpleTest<FullyConnectedTest<DataType::QAsymmU8>, DataType::QAsymmU8>
+        (activationDescriptor, Compute::CpuAcc, 1.f / 128.f, 128)), "FullyConnected + Activation function " <<
+        static_cast<int>(activationDescriptor.m_Function));
+
+    activationDescriptor.m_Function = ActivationFunction::ReLu;
+    BOOST_CHECK_MESSAGE((FuseActivationSimpleTest<Convolution2dTest<DataType::QAsymmU8>, DataType::QAsymmU8>
+        (activationDescriptor, Compute::CpuAcc)), "Convolution + Activation function " <<
+        static_cast<int>(activationDescriptor.m_Function));
+    BOOST_CHECK_MESSAGE((FuseActivationSimpleTest<FullyConnectedTest<DataType::QAsymmU8>, DataType::QAsymmU8>
+        (activationDescriptor, Compute::CpuAcc)), "FullyConnected + Activation function " <<
+        static_cast<int>(activationDescriptor.m_Function));
+
+    activationDescriptor.m_Function = ActivationFunction::BoundedReLu;
+    activationDescriptor.m_A = 1.0f;
+    activationDescriptor.m_B = -1.0f;
+    BOOST_CHECK_MESSAGE((FuseActivationSimpleTest<Convolution2dTest<DataType::QAsymmU8>, DataType::QAsymmU8>
+        (activationDescriptor, Compute::CpuAcc)), "Convolution + Activation function " <<
+        static_cast<int>(activationDescriptor.m_Function));
+    BOOST_CHECK_MESSAGE((FuseActivationSimpleTest<FullyConnectedTest<DataType::QAsymmU8>, DataType::QAsymmU8>
+        (activationDescriptor, Compute::CpuAcc)), "FullyConnected + Activation function " <<
+        static_cast<int>(activationDescriptor.m_Function));
+
+    activationDescriptor.m_Function = ActivationFunction::HardSwish;
+    BOOST_CHECK_MESSAGE((FuseActivationSimpleTest<Convolution2dTest<DataType::QAsymmU8>, DataType::QAsymmU8>
+        (activationDescriptor, Compute::CpuAcc)), "Convolution + Activation function " <<
+        static_cast<int>(activationDescriptor.m_Function));
+    BOOST_CHECK_MESSAGE((FuseActivationSimpleTest<FullyConnectedTest<DataType::QAsymmU8>, DataType::QAsymmU8>
+        (activationDescriptor, Compute::CpuAcc)), "FullyConnected + Activation function " <<
+        static_cast<int>(activationDescriptor.m_Function));
 }
 #endif
 
@@ -534,15 +700,15 @@
     activationDescriptor.m_Function = ActivationFunction::ReLu;
 
     FuseActivationIntoPreviousLayerTest<Convolution2dTest<DataType::Float32>, DataType::Float32>
-        (activationDescriptor, 0.0001f, armnn::Compute::GpuAcc);
+        (activationDescriptor, 0.0001f, Compute::GpuAcc);
 }
 BOOST_AUTO_TEST_CASE(FuseReLUIntoDWConvFloat32GpuAccTest)
 {
     ActivationDescriptor activationDescriptor;
     activationDescriptor.m_Function = ActivationFunction::ReLu;
 
-    FuseActivationIntoPreviousLayerTest<DepthwiseConvolution2dTest<DataType::Float32>, DataType::Float32>
-        (activationDescriptor, 0.0001f, armnn::Compute::GpuAcc);
+    FuseActivationIntoPreviousLayerTest<DWConvolution2dTest<DataType::Float32>, DataType::Float32>
+        (activationDescriptor, 0.0001f, Compute::GpuAcc);
 }
 BOOST_AUTO_TEST_CASE(FuseReLUIntoFullyConnectedFloat32GpuAccTest)
 {
@@ -550,7 +716,7 @@
     activationDescriptor.m_Function = ActivationFunction::ReLu;
 
     FuseActivationIntoPreviousLayerTest<FullyConnectedTest<DataType::Float32>, DataType::Float32>
-        (activationDescriptor, 0.0001f, armnn::Compute::GpuAcc);
+        (activationDescriptor, 0.0001f, Compute::GpuAcc);
 }
 BOOST_AUTO_TEST_CASE(FuseReLUIntoBatchNormFloat32GpuAccTest)
 {
@@ -558,7 +724,7 @@
     activationDescriptor.m_Function = ActivationFunction::ReLu;
 
     FuseActivationIntoPreviousLayerTest<BatchNormTest<DataType::Float32>, DataType::Float32>
-        (activationDescriptor, 0.0001f, armnn::Compute::GpuAcc);
+        (activationDescriptor, 0.0001f, Compute::GpuAcc);
 }
 BOOST_AUTO_TEST_CASE(FuseReLUIntoMulFloat32GpuAccTest)
 {
@@ -566,7 +732,7 @@
     activationDescriptor.m_Function = ActivationFunction::ReLu;
 
     FuseActivationIntoPreviousLayerTest<MultiplicationTest<DataType::Float32>, DataType::Float32>
-        (activationDescriptor, 0.0001f, armnn::Compute::GpuAcc);
+        (activationDescriptor, 0.0001f, Compute::GpuAcc);
 }
 BOOST_AUTO_TEST_CASE(FuseReLUIntoAddFloat32GpuAccTest)
 {
@@ -574,7 +740,7 @@
     activationDescriptor.m_Function = ActivationFunction::ReLu;
 
     FuseActivationIntoPreviousLayerTest<AdditionTest<DataType::Float32>, DataType::Float32>
-        (activationDescriptor, 0.0001f, armnn::Compute::GpuAcc);
+        (activationDescriptor, 0.0001f, Compute::GpuAcc);
 }
 BOOST_AUTO_TEST_CASE(FuseReLUIntoSubFloat32GpuAccTest)
 {
@@ -582,7 +748,7 @@
     activationDescriptor.m_Function = ActivationFunction::ReLu;
 
     FuseActivationIntoPreviousLayerTest<SubtractionTest<DataType::Float32>, DataType::Float32>
-        (activationDescriptor, 0.0001f, armnn::Compute::GpuAcc);
+        (activationDescriptor, 0.0001f, Compute::GpuAcc);
 }
 BOOST_AUTO_TEST_CASE(FuseReLUIntoDivFloat32GpuAccTest)
 {
@@ -590,7 +756,7 @@
     activationDescriptor.m_Function = ActivationFunction::ReLu;
 
     FuseActivationIntoPreviousLayerTest<DivisionTest<DataType::Float32>, DataType::Float32>
-        (activationDescriptor, 0.0001f, armnn::Compute::GpuAcc);
+        (activationDescriptor, 0.0001f, Compute::GpuAcc);
 }
 
 // BoundedReLu fused into Receiver Layers Float32
@@ -602,7 +768,7 @@
     activationDescriptor.m_B = -1.0f;
 
     FuseActivationIntoPreviousLayerTest<Convolution2dTest<DataType::Float32>, DataType::Float32>
-        (activationDescriptor, 0.0001f, armnn::Compute::GpuAcc);
+        (activationDescriptor, 0.0001f, Compute::GpuAcc);
 }
 BOOST_AUTO_TEST_CASE(FuseBoundedReLUIntoDWConvFloat32GpuAccTest)
 {
@@ -611,8 +777,8 @@
     activationDescriptor.m_A = 1.0f;
     activationDescriptor.m_B = -1.0f;
 
-    FuseActivationIntoPreviousLayerTest<DepthwiseConvolution2dTest<DataType::Float32>, DataType::Float32>
-        (activationDescriptor, 0.0001f, armnn::Compute::GpuAcc);
+    FuseActivationIntoPreviousLayerTest<DWConvolution2dTest<DataType::Float32>, DataType::Float32>
+        (activationDescriptor, 0.0001f, Compute::GpuAcc);
 }
 BOOST_AUTO_TEST_CASE(FuseBoundedReLUIntoFullyConnectedFloat32GpuAccTest)
 {
@@ -622,7 +788,7 @@
     activationDescriptor.m_B = -1.0f;
 
     FuseActivationIntoPreviousLayerTest<FullyConnectedTest<DataType::Float32>, DataType::Float32>
-        (activationDescriptor, 0.0001f, armnn::Compute::GpuAcc);
+        (activationDescriptor, 0.0001f, Compute::GpuAcc);
 }
 BOOST_AUTO_TEST_CASE(FuseBoundedReLUIntoBatchNormFloat32GpuAccTest)
 {
@@ -632,7 +798,7 @@
     activationDescriptor.m_B = -1.0f;
 
     FuseActivationIntoPreviousLayerTest<BatchNormTest<DataType::Float32>, DataType::Float32>
-        (activationDescriptor, 0.0001f, armnn::Compute::GpuAcc);
+        (activationDescriptor, 0.0001f, Compute::GpuAcc);
 }
 BOOST_AUTO_TEST_CASE(FuseBoundedReLUIntoMulFloat32GpuAccTest)
 {
@@ -642,7 +808,7 @@
     activationDescriptor.m_B = -1.0f;
 
     FuseActivationIntoPreviousLayerTest<MultiplicationTest<DataType::Float32>, DataType::Float32>
-        (activationDescriptor, 0.0001f, armnn::Compute::GpuAcc);
+        (activationDescriptor, 0.0001f, Compute::GpuAcc);
 }
 BOOST_AUTO_TEST_CASE(FuseBoundedReLUIntoAddFloat32GpuAccTest)
 {
@@ -652,7 +818,7 @@
     activationDescriptor.m_B = -1.0f;
 
     FuseActivationIntoPreviousLayerTest<AdditionTest<DataType::Float32>, DataType::Float32>
-        (activationDescriptor, 0.0001f, armnn::Compute::GpuAcc);
+        (activationDescriptor, 0.0001f, Compute::GpuAcc);
 }
 BOOST_AUTO_TEST_CASE(FuseBoundedReLUIntoSubFloat32GpuAccTest)
 {
@@ -662,7 +828,7 @@
     activationDescriptor.m_B = -1.0f;
 
     FuseActivationIntoPreviousLayerTest<SubtractionTest<DataType::Float32>, DataType::Float32>
-        (activationDescriptor, 0.0001f, armnn::Compute::GpuAcc);
+        (activationDescriptor, 0.0001f, Compute::GpuAcc);
 }
 BOOST_AUTO_TEST_CASE(FuseBoundedReLUIntoDivFloat32GpuAccTest)
 {
@@ -672,7 +838,7 @@
     activationDescriptor.m_B = -1.0f;
 
     FuseActivationIntoPreviousLayerTest<DivisionTest<DataType::Float32>, DataType::Float32>
-        (activationDescriptor, 0.0001f, armnn::Compute::GpuAcc);
+        (activationDescriptor, 0.0001f, Compute::GpuAcc);
 }
 
 // ReLU fused into Receiver Layers QAsymmU8
@@ -682,15 +848,15 @@
     activationDescriptor.m_Function = ActivationFunction::ReLu;
 
     FuseActivationIntoPreviousLayerTest<Convolution2dTest<DataType::QAsymmU8>, DataType::QAsymmU8>
-        (activationDescriptor, 0.0001f, armnn::Compute::GpuAcc);
+        (activationDescriptor, 0.0001f, Compute::GpuAcc);
 }
 BOOST_AUTO_TEST_CASE(FuseReLUQIntoDWConvAsymmU8GpuAccTest)
 {
     ActivationDescriptor activationDescriptor;
     activationDescriptor.m_Function = ActivationFunction::ReLu;
 
-    FuseActivationIntoPreviousLayerTest<DepthwiseConvolution2dTest<DataType::QAsymmU8>, DataType::QAsymmU8>
-        (activationDescriptor, 0.0001f, armnn::Compute::GpuAcc);
+    FuseActivationIntoPreviousLayerTest<DWConvolution2dTest<DataType::QAsymmU8>, DataType::QAsymmU8>
+        (activationDescriptor, 0.0001f, Compute::GpuAcc);
 }
 BOOST_AUTO_TEST_CASE(FuseReLUQIntoFullyConnectedAsymmU8GpuAccTest)
 {
@@ -698,7 +864,7 @@
     activationDescriptor.m_Function = ActivationFunction::ReLu;
 
     FuseActivationIntoPreviousLayerTest<FullyConnectedTest<DataType::QAsymmU8>, DataType::QAsymmU8>
-        (activationDescriptor, 0.0001f, armnn::Compute::GpuAcc);
+        (activationDescriptor, 0.0001f, Compute::GpuAcc);
 }
 
 // HardSwish fused into Receiver Layers Float32
@@ -708,7 +874,7 @@
     activationDescriptor.m_Function = ActivationFunction::HardSwish;
 
     FuseActivationIntoPreviousLayerTest<Convolution2dTest<DataType::Float32>, DataType::Float32>
-        (activationDescriptor, 0.0001f, armnn::Compute::GpuAcc);
+        (activationDescriptor, 0.0001f, Compute::GpuAcc);
 }
 BOOST_AUTO_TEST_CASE(FuseHardSwishIntoMulFloat32GpuAccTest)
 {
@@ -716,7 +882,7 @@
     activationDescriptor.m_Function = ActivationFunction::HardSwish;
 
     FuseActivationIntoPreviousLayerTest<MultiplicationTest<DataType::Float32>, DataType::Float32>
-        (activationDescriptor, 0.0001f, armnn::Compute::GpuAcc);
+        (activationDescriptor, 0.0001f, Compute::GpuAcc);
 }
 BOOST_AUTO_TEST_CASE(FuseHardSwishIntoAddFloat32GpuAccTest)
 {
@@ -724,7 +890,7 @@
     activationDescriptor.m_Function = ActivationFunction::HardSwish;
 
     FuseActivationIntoPreviousLayerTest<AdditionTest<DataType::Float32>, DataType::Float32>
-        (activationDescriptor, 0.0001f, armnn::Compute::GpuAcc);
+        (activationDescriptor, 0.0001f, Compute::GpuAcc);
 }
 BOOST_AUTO_TEST_CASE(FuseHardSwishIntoSubFloat32GpuAccTest)
 {
@@ -732,7 +898,7 @@
     activationDescriptor.m_Function = ActivationFunction::HardSwish;
 
     FuseActivationIntoPreviousLayerTest<SubtractionTest<DataType::Float32>, DataType::Float32>
-        (activationDescriptor, 0.0001f, armnn::Compute::GpuAcc);
+        (activationDescriptor, 0.0001f, Compute::GpuAcc);
 }
 BOOST_AUTO_TEST_CASE(FuseHardSwishIntoDivFloat32GpuAccTest)
 {
@@ -740,7 +906,7 @@
     activationDescriptor.m_Function = ActivationFunction::HardSwish;
 
     FuseActivationIntoPreviousLayerTest<DivisionTest<DataType::Float32>, DataType::Float32>
-        (activationDescriptor, 0.0001f, armnn::Compute::GpuAcc);
+        (activationDescriptor, 0.0001f, Compute::GpuAcc);
 }
 
 // TanH fused into Receiver Layers Float32
@@ -750,7 +916,7 @@
     activationDescriptor.m_Function = ActivationFunction::TanH;
 
     FuseActivationIntoPreviousLayerTest<Convolution2dTest<DataType::Float32>, DataType::Float32>
-        (activationDescriptor, 0.0001f, armnn::Compute::GpuAcc);
+        (activationDescriptor, 0.0001f, Compute::GpuAcc);
 }
 BOOST_AUTO_TEST_CASE(FuseTanHIntoMulFloat32GpuAccTest)
 {
@@ -758,7 +924,7 @@
     activationDescriptor.m_Function = ActivationFunction::TanH;
 
     FuseActivationIntoPreviousLayerTest<MultiplicationTest<DataType::Float32>, DataType::Float32>
-        (activationDescriptor, 0.0001f, armnn::Compute::GpuAcc);
+        (activationDescriptor, 0.0001f, Compute::GpuAcc);
 }
 BOOST_AUTO_TEST_CASE(FuseTanHIntoAddFloat32GpuAccTest)
 {
@@ -766,7 +932,7 @@
     activationDescriptor.m_Function = ActivationFunction::TanH;
 
     FuseActivationIntoPreviousLayerTest<AdditionTest<DataType::Float32>, DataType::Float32>
-        (activationDescriptor, 0.0001f, armnn::Compute::GpuAcc);
+        (activationDescriptor, 0.0001f, Compute::GpuAcc);
 }
 BOOST_AUTO_TEST_CASE(FuseTanHIntoSubFloat32GpuAccTest)
 {
@@ -774,7 +940,7 @@
     activationDescriptor.m_Function = ActivationFunction::TanH;
 
     FuseActivationIntoPreviousLayerTest<SubtractionTest<DataType::Float32>, DataType::Float32>
-        (activationDescriptor, 0.0001f, armnn::Compute::GpuAcc);
+        (activationDescriptor, 0.0001f, Compute::GpuAcc);
 }
 BOOST_AUTO_TEST_CASE(FuseTanHIntoDivFloat32GpuAccTest)
 {
@@ -782,7 +948,113 @@
     activationDescriptor.m_Function = ActivationFunction::TanH;
 
     FuseActivationIntoPreviousLayerTest<DivisionTest<DataType::Float32>, DataType::Float32>
-        (activationDescriptor, 0.0001f, armnn::Compute::GpuAcc);
+        (activationDescriptor, 0.0001f, Compute::GpuAcc);
+}
+
+// Test that all receiver layers follow by all activation layers work, either fused or not fused
+BOOST_AUTO_TEST_CASE(LayerFollowedByActivationFloat32GpuAccTest)
+{
+    ActivationDescriptor activationDescriptor;
+    for (int i = 0; i != 12; ++i)
+    {
+        activationDescriptor.m_Function = static_cast<ActivationFunction>(i);
+        activationDescriptor.m_A = 1.0f;
+        activationDescriptor.m_B = -1.0f;
+        if (activationDescriptor.m_Function != ActivationFunction::Elu)
+        {
+            BOOST_CHECK_MESSAGE((FuseActivationSimpleTest<Convolution2dTest<DataType::Float32>, DataType::Float32>
+                (activationDescriptor, Compute::GpuAcc)), "Convolution + Activation function " << i);
+            BOOST_CHECK_MESSAGE((FuseActivationSimpleTest<DWConvolution2dTest<DataType::Float32>, DataType::Float32>
+                (activationDescriptor, Compute::GpuAcc)), "DepthwiseConvolution + Activation function " << i);
+            BOOST_CHECK_MESSAGE((FuseActivationSimpleTest<FullyConnectedTest<DataType::Float32>, DataType::Float32>
+                (activationDescriptor, Compute::GpuAcc)), "FullyConnected + Activation function " << i);
+            BOOST_CHECK_MESSAGE((FuseActivationSimpleTest<BatchNormTest<DataType::Float32>, DataType::Float32>
+                (activationDescriptor, Compute::GpuAcc)), "BatchNorm + Activation function " << i);
+            BOOST_CHECK_MESSAGE((FuseActivationSimpleTest<MultiplicationTest<DataType::Float32>, DataType::Float32>
+                (activationDescriptor, Compute::GpuAcc)), "Multiplication + Activation function " << i);
+            BOOST_CHECK_MESSAGE((FuseActivationSimpleTest<AdditionTest<DataType::Float32>, DataType::Float32>
+                (activationDescriptor, Compute::GpuAcc)), "Addition + Activation function " << i);
+            BOOST_CHECK_MESSAGE((FuseActivationSimpleTest<SubtractionTest<DataType::Float32>, DataType::Float32>
+                (activationDescriptor, Compute::GpuAcc)), "Subtraction + Activation function " << i);
+            BOOST_CHECK_MESSAGE((FuseActivationSimpleTest<DivisionTest<DataType::Float32>, DataType::Float32>
+                (activationDescriptor, Compute::GpuAcc)), "Division + Activation function " << i);
+        }
+    }
+}
+BOOST_AUTO_TEST_CASE(LayerFollowedByActivationFloat16GpuAccTest)
+{
+    ActivationDescriptor activationDescriptor;
+    for (int i = 0; i != 12; ++i)
+    {
+        activationDescriptor.m_Function = static_cast<ActivationFunction>(i);
+        activationDescriptor.m_A = 1.0f;
+        activationDescriptor.m_B = -1.0f;
+        if (activationDescriptor.m_Function != ActivationFunction::Elu)
+        {
+            BOOST_CHECK_MESSAGE((FuseActivationSimpleTest<Convolution2dTest<DataType::Float16>, DataType::Float16>
+                (activationDescriptor, Compute::GpuAcc)), "Convolution + Activation function " << i);
+            BOOST_CHECK_MESSAGE((FuseActivationSimpleTest<DWConvolution2dTest<DataType::Float16>, DataType::Float16>
+                (activationDescriptor, Compute::GpuAcc)), "Depthwise + Activation function " << i);
+            BOOST_CHECK_MESSAGE((FuseActivationSimpleTest<FullyConnectedTest<DataType::Float16>, DataType::Float16>
+                (activationDescriptor, Compute::GpuAcc)), "FullyConnected + Activation function " << i);
+            BOOST_CHECK_MESSAGE((FuseActivationSimpleTest<BatchNormTest<DataType::Float16>, DataType::Float16>
+                (activationDescriptor, Compute::GpuAcc)), "BatchNorm + Activation function " << i);
+            BOOST_CHECK_MESSAGE((FuseActivationSimpleTest<MultiplicationTest<DataType::Float16>, DataType::Float16>
+                (activationDescriptor, Compute::GpuAcc)), "Multiplication + Activation function " << i);
+            BOOST_CHECK_MESSAGE((FuseActivationSimpleTest<AdditionTest<DataType::Float16>, DataType::Float16>
+                (activationDescriptor, Compute::GpuAcc)), "Addition + Activation function " << i);
+            BOOST_CHECK_MESSAGE((FuseActivationSimpleTest<SubtractionTest<DataType::Float16>, DataType::Float16>
+                (activationDescriptor, Compute::GpuAcc)), "Subtraction + Activation function " << i);
+            BOOST_CHECK_MESSAGE((FuseActivationSimpleTest<DivisionTest<DataType::Float16>, DataType::Float16>
+                (activationDescriptor, Compute::GpuAcc)), "Division + Activation function " << i);
+        }
+    }
+}
+BOOST_AUTO_TEST_CASE(LayerFollowedByActivationQAsymmU8GpuAccTest)
+{
+    ActivationDescriptor activationDescriptor;
+
+    activationDescriptor.m_Function = ActivationFunction::Sigmoid;
+    BOOST_CHECK_MESSAGE((FuseActivationSimpleTest<Convolution2dTest<DataType::QAsymmU8>, DataType::QAsymmU8>
+        (activationDescriptor, Compute::GpuAcc, 1.f / 256.f, 0)), "Convolution + Activation function " <<
+        static_cast<int>(activationDescriptor.m_Function));
+    BOOST_CHECK_MESSAGE((FuseActivationSimpleTest<FullyConnectedTest<DataType::QAsymmU8>, DataType::QAsymmU8>
+        (activationDescriptor, Compute::GpuAcc, 1.f / 256.f, 0)), "FullyConnected + Activation function " <<
+        static_cast<int>(activationDescriptor.m_Function));
+
+    activationDescriptor.m_Function = ActivationFunction::TanH;
+    BOOST_CHECK_MESSAGE((FuseActivationSimpleTest<Convolution2dTest<DataType::QAsymmU8>, DataType::QAsymmU8>
+        (activationDescriptor, Compute::GpuAcc, 1.f / 128.f, 128)), "Convolution + Activation function " <<
+        static_cast<int>(activationDescriptor.m_Function));
+    BOOST_CHECK_MESSAGE((FuseActivationSimpleTest<FullyConnectedTest<DataType::QAsymmU8>, DataType::QAsymmU8>
+        (activationDescriptor, Compute::GpuAcc, 1.f / 128.f, 128)), "FullyConnected + Activation function " <<
+        static_cast<int>(activationDescriptor.m_Function));
+
+    activationDescriptor.m_Function = ActivationFunction::ReLu;
+    BOOST_CHECK_MESSAGE((FuseActivationSimpleTest<Convolution2dTest<DataType::QAsymmU8>, DataType::QAsymmU8>
+        (activationDescriptor, Compute::GpuAcc)), "Convolution + Activation function " <<
+        static_cast<int>(activationDescriptor.m_Function));
+    BOOST_CHECK_MESSAGE((FuseActivationSimpleTest<FullyConnectedTest<DataType::QAsymmU8>, DataType::QAsymmU8>
+        (activationDescriptor, Compute::GpuAcc)), "FullyConnected + Activation function " <<
+        static_cast<int>(activationDescriptor.m_Function));
+
+    activationDescriptor.m_Function = ActivationFunction::BoundedReLu;
+    activationDescriptor.m_A = 1.0f;
+    activationDescriptor.m_B = -1.0f;
+    BOOST_CHECK_MESSAGE((FuseActivationSimpleTest<Convolution2dTest<DataType::QAsymmU8>, DataType::QAsymmU8>
+        (activationDescriptor, Compute::GpuAcc)), "Convolution + Activation function " <<
+        static_cast<int>(activationDescriptor.m_Function));
+    BOOST_CHECK_MESSAGE((FuseActivationSimpleTest<FullyConnectedTest<DataType::QAsymmU8>, DataType::QAsymmU8>
+        (activationDescriptor, Compute::GpuAcc)), "FullyConnected + Activation function " <<
+        static_cast<int>(activationDescriptor.m_Function));
+
+    activationDescriptor.m_Function = ActivationFunction::HardSwish;
+    BOOST_CHECK_MESSAGE((FuseActivationSimpleTest<Convolution2dTest<DataType::QAsymmU8>, DataType::QAsymmU8>
+        (activationDescriptor, Compute::GpuAcc)), "Convolution + Activation function " <<
+        static_cast<int>(activationDescriptor.m_Function));
+    BOOST_CHECK_MESSAGE((FuseActivationSimpleTest<FullyConnectedTest<DataType::QAsymmU8>, DataType::QAsymmU8>
+        (activationDescriptor, Compute::GpuAcc)), "FullyConnected + Activation function " <<
+        static_cast<int>(activationDescriptor.m_Function));
 }
 #endif
 
diff --git a/src/backends/neon/workloads/NeonFullyConnectedWorkload.cpp b/src/backends/neon/workloads/NeonFullyConnectedWorkload.cpp
index 39fb4c9..31489a0 100644
--- a/src/backends/neon/workloads/NeonFullyConnectedWorkload.cpp
+++ b/src/backends/neon/workloads/NeonFullyConnectedWorkload.cpp
@@ -27,16 +27,6 @@
                                                        const FullyConnectedDescriptor& descriptor,
                                                        const ActivationDescriptor* activationDescriptor)
 {
-    if (activationDescriptor)
-    {
-        std::vector<ActivationFunction> activations = {ActivationFunction::ReLu, ActivationFunction::BoundedReLu};
-        if (std::find(activations.begin(), activations.end(), activationDescriptor->m_Function) == activations.end())
-        {
-            return arm_compute::Status{
-                arm_compute::ErrorCode::RUNTIME_ERROR, "NeonFullyConnectedWorkload :Unsupported Activation Function"};
-        }
-    }
-
     const arm_compute::TensorInfo aclInput = BuildArmComputeTensorInfo(input);
     const arm_compute::TensorInfo aclOutput = BuildArmComputeTensorInfo(output);
     const arm_compute::TensorInfo aclWeights = BuildArmComputeTensorInfo(weights);