IVGCVSW-2455 Move the extra backend code to a new git repo

 * Refactored all the workload factories to use "virtual" properly
 * Removed the precompiled layer support from the optimizer
 * Disabled compiler optimization for debug build to ease debugging

Change-Id: I6ab83abd78f422771359295e25b79bb9be613cfd
diff --git a/cmake/GlobalConfig.cmake b/cmake/GlobalConfig.cmake
index 9d7dab0..969b234 100644
--- a/cmake/GlobalConfig.cmake
+++ b/cmake/GlobalConfig.cmake
@@ -67,7 +67,7 @@
 
 # Compiler flags for Debug builds
 if(COMPILER_IS_GNU_LIKE)
-    set(CMAKE_CXX_FLAGS_DEBUG "-g")
+    set(CMAKE_CXX_FLAGS_DEBUG "-g -O0")
 elseif(${CMAKE_CXX_COMPILER_ID} STREQUAL MSVC)
     set(CMAKE_CXX_FLAGS_DEBUG "/MDd /ZI /Od")
     # Disable SAFESEH which is necessary for Edit and Continue to work
diff --git a/include/armnn/ILayerSupport.hpp b/include/armnn/ILayerSupport.hpp
index 8d800f4..d9e3d0a 100644
--- a/include/armnn/ILayerSupport.hpp
+++ b/include/armnn/ILayerSupport.hpp
@@ -102,6 +102,11 @@
                                            const FullyConnectedDescriptor& descriptor,
                                            Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const = 0;
 
+    virtual bool IsGreaterSupported(const TensorInfo& input0,
+                                    const TensorInfo& input1,
+                                    const TensorInfo& ouput,
+                                    Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const = 0;
+
     virtual bool IsInputSupported(const TensorInfo& input,
                                   Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const = 0;
 
@@ -232,11 +237,6 @@
                                         const TensorInfo& input1,
                                         const TensorInfo& output,
                                         Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const = 0;
-
-    virtual bool IsGreaterSupported(const TensorInfo& input0,
-                                    const TensorInfo& input1,
-                                    const TensorInfo& ouput,
-                                    Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const = 0;
 }; // class ILayerSupport
 
 using ILayerSupportSharedPtr = std::shared_ptr<ILayerSupport>;
diff --git a/src/armnn/Network.cpp b/src/armnn/Network.cpp
index 8a1437a..f9ebad2 100644
--- a/src/armnn/Network.cpp
+++ b/src/armnn/Network.cpp
@@ -430,28 +430,6 @@
     Optimizer::Pass(optNetObjPtr->GetGraph(), MakeOptimizations(OptimizeInverseConversionsFp16(),
                                                                 OptimizeInverseConversionsFp32()));
 
-    // Insert pre-compiled layers where required by the backend
-    // TODO: This is a dummy/default backend id used for making the code build until
-    //       we've properly refactored the optimizer
-    const BackendId backendId(Compute::Undefined);
-    auto const& backendRegistry = BackendRegistryInstance();
-    if (backendRegistry.IsBackendRegistered(backendId))
-    {
-        // Obtain a backend object using the registered factory
-        auto backendFactory = backendRegistry.GetFactory(backendId);
-        auto backendObjPtr  = backendFactory();
-
-        OptimizationResult insertPreCompiledLayersResult = InsertPreCompiledLayers(optNetObjPtr,
-                                                                                   backendObjPtr,
-                                                                                   backendSettings,
-                                                                                   errMessages);
-        if (insertPreCompiledLayersResult.m_Error)
-        {
-            // Failed to insert pre-compiled layers
-            return IOptimizedNetworkPtr(nullptr, &IOptimizedNetwork::Destroy);
-        }
-    }
-
     // If the debug flag is set, then insert a DebugLayer after each layer.
     // NOTE: This optimization can only happen strictly after the PreCompiled layers have
     //       already been inserted
@@ -588,8 +566,7 @@
         throw InvalidArgumentException("AddDepthwiseConvolution2dLayer: biases cannot be NULL");
     }
 
-    const auto layer = m_Graph->AddLayer<DepthwiseConvolution2dLayer>(convolution2dDescriptor,
-            name);
+    const auto layer = m_Graph->AddLayer<DepthwiseConvolution2dLayer>(convolution2dDescriptor, name);
 
     layer->m_Weight = std::make_unique<ScopedCpuTensorHandle>(weights);
 
diff --git a/src/backends/backendsCommon/LayerSupportBase.cpp b/src/backends/backendsCommon/LayerSupportBase.cpp
index 2e43657..9d68d35 100644
--- a/src/backends/backendsCommon/LayerSupportBase.cpp
+++ b/src/backends/backendsCommon/LayerSupportBase.cpp
@@ -164,6 +164,14 @@
     return DefaultLayerSupport(__func__, __FILE__, __LINE__, reasonIfUnsupported);
 }
 
+bool LayerSupportBase::IsGreaterSupported(const TensorInfo& input0,
+                                          const TensorInfo& input1,
+                                          const TensorInfo& output,
+                                          Optional<std::string&> reasonIfUnsupported) const
+{
+    return DefaultLayerSupport(__func__, __FILE__, __LINE__, reasonIfUnsupported);
+}
+
 bool LayerSupportBase::IsInputSupported(const TensorInfo& input,
                                         Optional<std::string&> reasonIfUnsupported) const
 {
@@ -313,6 +321,13 @@
     return DefaultLayerSupport(__func__, __FILE__, __LINE__, reasonIfUnsupported);
 }
 
+bool LayerSupportBase::IsRsqrtSupported(const TensorInfo &input,
+                                        const TensorInfo &output,
+                                        Optional<std::string &> reasonIfUnsupported) const
+{
+    return DefaultLayerSupport(__func__, __FILE__, __LINE__, reasonIfUnsupported);
+}
+
 bool LayerSupportBase::IsSoftmaxSupported(const TensorInfo& input,
                                           const TensorInfo& output,
                                           const SoftmaxDescriptor& descriptor,
@@ -352,19 +367,4 @@
     return DefaultLayerSupport(__func__, __FILE__, __LINE__, reasonIfUnsupported);
 }
 
-bool LayerSupportBase::IsGreaterSupported(const TensorInfo& input0,
-                                          const TensorInfo& input1,
-                                          const TensorInfo& output,
-                                          Optional<std::string&> reasonIfUnsupported) const
-{
-    return DefaultLayerSupport(__func__, __FILE__, __LINE__, reasonIfUnsupported);
-}
-
-bool LayerSupportBase::IsRsqrtSupported(const TensorInfo &input,
-                                        const TensorInfo &output,
-                                        Optional<std::string &> reasonIfUnsupported) const
-{
-    return DefaultLayerSupport(__func__, __FILE__, __LINE__, reasonIfUnsupported);
-}
-
 } // namespace armnn
diff --git a/src/backends/backendsCommon/LayerSupportBase.hpp b/src/backends/backendsCommon/LayerSupportBase.hpp
index 77cb302..5a71b44 100644
--- a/src/backends/backendsCommon/LayerSupportBase.hpp
+++ b/src/backends/backendsCommon/LayerSupportBase.hpp
@@ -44,9 +44,10 @@
                                       const TensorInfo& output,
                                       Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const override;
 
-    bool IsConvertFp32ToFp16Supported(const TensorInfo& input,
-                                      const TensorInfo& output,
-                                      Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const override;
+    bool IsConvertFp32ToFp16Supported(
+            const TensorInfo& input,
+            const TensorInfo& output,
+            Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const override;
 
     bool IsConvolution2dSupported(const TensorInfo& input,
                                   const TensorInfo& output,
diff --git a/src/backends/backendsCommon/test/CMakeLists.txt b/src/backends/backendsCommon/test/CMakeLists.txt
index 7edd93e..8107176 100644
--- a/src/backends/backendsCommon/test/CMakeLists.txt
+++ b/src/backends/backendsCommon/test/CMakeLists.txt
@@ -28,8 +28,6 @@
     OptimizedNetworkTests.cpp
     PermuteTestImpl.hpp
     Pooling2dTestImpl.hpp
-    PreCompiledTestImpl.cpp
-    PreCompiledTestImpl.hpp
     QuantizeHelper.hpp
     ReshapeTestImpl.hpp
     RuntimeTestImpl.hpp
diff --git a/src/backends/backendsCommon/test/LayerTests.cpp b/src/backends/backendsCommon/test/LayerTests.cpp
index c9188e3..95fa50b 100644
--- a/src/backends/backendsCommon/test/LayerTests.cpp
+++ b/src/backends/backendsCommon/test/LayerTests.cpp
@@ -39,7 +39,6 @@
 #include "StridedSliceTestImpl.hpp"
 #include "NormTestImpl.hpp"
 #include "PermuteTestImpl.hpp"
-#include "PreCompiledTestImpl.hpp"
 #include "LstmTestImpl.hpp"
 #include "ConvertFp16ToFp32TestImpl.hpp"
 #include "ConvertFp32ToFp16TestImpl.hpp"
@@ -8656,41 +8655,6 @@
     return Debug1DTest<armnn::DataType::QuantisedAsymm8>(workloadFactory, memoryManager);
 }
 
-LayerTestResult<uint8_t, 4> PreCompiledConvolution2dTest(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
-{
-    return PreCompiledConvolution2dTestImpl(workloadFactory, memoryManager);
-}
-
-LayerTestResult<uint8_t, 4> PreCompiledConvolution2dStride2x2Test(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
-{
-    return PreCompiledConvolution2dStride2x2TestImpl(workloadFactory, memoryManager);
-}
-
-LayerTestResult<uint8_t, 4> PreCompiledDepthwiseConvolution2dTest(
-    armnn::IWorkloadFactory & workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr & memoryManager)
-{
-    return PreCompiledDepthwiseConvolution2dTestImpl(workloadFactory, memoryManager);
-}
-
-LayerTestResult<uint8_t, 4> PreCompiledDepthwiseConvolution2dStride2x2Test(
-    armnn::IWorkloadFactory & workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr & memoryManager)
-{
-    return PreCompiledDepthwiseConvolution2dStride2x2TestImpl(workloadFactory, memoryManager);
-}
-
-LayerTestResult<uint8_t, 4> PreCompiledMaxPooling2dTest(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
-{
-    return PreCompiledMaxPooling2dTestImpl(workloadFactory, memoryManager);
-}
-
 LayerTestResult<float, 1> Gather1DParamsFloatTest(
     armnn::IWorkloadFactory& workloadFactory,
     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
@@ -8732,4 +8696,4 @@
 {
     return GatherMultiDimParamsMultiDimIndicesTestImpl<armnn::DataType::QuantisedAsymm8>(
         workloadFactory, memoryManager);
-}
\ No newline at end of file
+}
diff --git a/src/backends/backendsCommon/test/PreCompiledTestImpl.cpp b/src/backends/backendsCommon/test/PreCompiledTestImpl.cpp
deleted file mode 100644
index 5a2bba1..0000000
--- a/src/backends/backendsCommon/test/PreCompiledTestImpl.cpp
+++ /dev/null
@@ -1,491 +0,0 @@
-//
-// Copyright © 2017 Arm Ltd. All rights reserved.
-// SPDX-License-Identifier: MIT
-//
-
-#include "PreCompiledTestImpl.hpp"
-
-#include "TensorCopyUtils.hpp"
-
-#include <Graph.hpp>
-#include <Network.hpp>
-#include <Runtime.hpp>
-
-#include <armnn/Exceptions.hpp>
-#include <armnn/INetwork.hpp>
-
-#include <test/TensorHelpers.hpp>
-
-#include <backendsCommon/WorkloadFactory.hpp>
-
-#include <boost/polymorphic_pointer_cast.hpp>
-
-using namespace armnn;
-
-namespace
-{
-
-template<typename ConvolutionDescriptor>
-struct PreCompiledConvolutionHelper
-{
-};
-
-template<>
-struct PreCompiledConvolutionHelper<Convolution2dDescriptor>
-{
-    static IConnectableLayer* AddConvolutionLayerToNetwork(
-        Network& network,
-        Convolution2dDescriptor descriptor,
-        const ConstTensor& weights,
-        const ConstTensor& biases)
-    {
-        return network.AddConvolution2dLayer(descriptor, weights, biases, "convolution");
-    }
-};
-
-template<>
-struct PreCompiledConvolutionHelper<DepthwiseConvolution2dDescriptor>
-{
-    static IConnectableLayer* AddConvolutionLayerToNetwork(
-        Network& network,
-        DepthwiseConvolution2dDescriptor descriptor,
-        const ConstTensor& weights,
-        const ConstTensor& biases)
-    {
-        return network.AddDepthwiseConvolution2dLayer(descriptor, weights, biases, "depthwiseConvolution");
-    }
-};
-
-template<typename ConvolutionDescriptor>
-ConvolutionDescriptor CreateConvolutionDescriptor(unsigned int stride, unsigned int padding)
-{
-    ConvolutionDescriptor descriptor;
-
-    descriptor.m_StrideX     = stride;
-    descriptor.m_StrideY     = stride;
-    descriptor.m_PadLeft     = padding;
-    descriptor.m_PadRight    = padding;
-    descriptor.m_PadTop      = padding;
-    descriptor.m_PadBottom   = padding;
-    descriptor.m_BiasEnabled = true;
-    descriptor.m_DataLayout  = DataLayout::NHWC;
-
-    return descriptor;
-}
-
-static std::vector<uint8_t> CreateIdentityConvolutionKernel(
-    unsigned int kernelSize, unsigned int channels)
-{
-    BOOST_ASSERT(kernelSize % 2 == 1); // kernelSize need to be an odd number
-
-    const unsigned int numElements = channels * (kernelSize * kernelSize);
-    std::vector<uint8_t> kernel(numElements, 0u);
-
-    unsigned int centerIndex = kernelSize / 2;
-    for(unsigned int y = 0u; y < kernelSize; y++)
-    {
-        for(unsigned int x = 0u; x < kernelSize; x++)
-        {
-            for(unsigned int channel = 0u; channel < channels; channel++)
-            {
-                if (x == centerIndex && y == centerIndex)
-                {
-                    const unsigned int flatIndex =
-                        (y * kernelSize * channels) + (x * channels) + channel;
-
-                    kernel[flatIndex] = 1u;
-                }
-            }
-        }
-    }
-
-    return kernel;
-}
-
-template<typename ConvolutionDescriptor>
-std::vector<uint8_t> GetIdentityConvolutionExpectedOutputData(
-    const TensorInfo& inputInfo,
-    const TensorInfo& outputInfo,
-    const ConvolutionDescriptor& descriptor,
-    const std::vector<uint8_t>& inputData)
-{
-    const unsigned int outputDataSize = outputInfo.GetNumElements();
-    std::vector<uint8_t> expectedOutputData(outputDataSize);
-
-    const unsigned int channels = outputInfo.GetShape()[3];
-    BOOST_ASSERT(channels == inputInfo.GetShape()[3]);
-
-    const unsigned int inputW  = inputInfo.GetShape()[2];
-
-    const unsigned int outputH = outputInfo.GetShape()[1];
-    const unsigned int outputW = outputInfo.GetShape()[2];
-
-    // Pick values from the input buffer, but after each iteration skip a number of
-    // rows and columns equal to the stride in the respective dimension
-    for (unsigned int inputY = 0, outputY = 0; outputY < outputH; inputY += descriptor.m_StrideY, outputY++)
-    {
-        for (unsigned int inputX = 0, outputX = 0; outputX < outputW; inputX += descriptor.m_StrideX, outputX++)
-        {
-            for (unsigned int channel = 0u; channel < channels; channel++)
-            {
-                const unsigned int inputIndex  =
-                    (inputY * inputW * channels) + (inputX * channels) + channel;
-                const unsigned int outputIndex =
-                    (outputY * outputW * channels) + (outputX * channels) + channel;
-
-                expectedOutputData[outputIndex] = inputData[inputIndex];
-            }
-        }
-    }
-
-    return expectedOutputData;
-}
-
-armnn::PreCompiledLayer* FindPreCompiledLayer(armnn::Graph& optimisedGraph)
-{
-    for (auto& layer : optimisedGraph)
-    {
-        if (layer->GetType() == armnn::LayerType::PreCompiled)
-        {
-            return boost::polymorphic_pointer_downcast<armnn::PreCompiledLayer>(layer);
-        }
-    }
-
-    // No pre-compiled layer found
-    return nullptr;
-}
-
-// NOTE: This only supports a single input and a single output
-LayerTestResult<uint8_t, 4> OptimiseAndRunNetwork(armnn::IWorkloadFactory& workloadFactory,
-                                                  Network& net,
-                                                  TensorInfo inputInfo,
-                                                  std::vector<uint8_t> inputData,
-                                                  TensorInfo outputInfo,
-                                                  std::vector<uint8_t> expectedOutputData)
-{
-    // Optimize the network for the backend supported by the factory
-    std::vector<BackendId> backends = {workloadFactory.GetBackendId()};
-    IRuntimePtr runtime(IRuntime::Create(IRuntime::CreationOptions()));
-    IOptimizedNetworkPtr optimizedNet = Optimize(net, backends, runtime->GetDeviceSpec(), OptimizerOptions());
-    if (!optimizedNet)
-    {
-        throw RuntimeException(std::string("Failed to optimize network for ") + std::string(backends[0]),
-                               CHECK_LOCATION());
-    }
-
-    // Find the pre-compiled layer in the optimised graph
-    Graph& optimisedGraph = static_cast<OptimizedNetwork*>(optimizedNet.get())->GetGraph();
-    PreCompiledLayer* preCompiledLayer = FindPreCompiledLayer(optimisedGraph);
-    if (!preCompiledLayer)
-    {
-        throw RuntimeException("Could not find pre-compiled layer in optimised graph", CHECK_LOCATION());
-    }
-
-    // Create the tensor handles
-    for (auto&& layer : optimisedGraph.TopologicalSort())
-    {
-        layer->CreateTensorHandles(optimisedGraph, workloadFactory);
-    }
-
-    // Create the pre-compiled workload
-    auto workload = preCompiledLayer->CreateWorkload(optimisedGraph, workloadFactory);
-
-    // Set the input data
-    boost::multi_array<uint8_t, 4> input = MakeTensor<uint8_t, 4>(inputInfo, inputData);
-    const QueueDescriptor& workloadData =
-        static_cast<BaseWorkload<PreCompiledQueueDescriptor>*>(workload.get())->GetData();
-    CopyDataToITensorHandle(workloadData.m_Inputs[0], &input[0][0][0][0]);
-
-    // Execute the workload
-    workload->Execute();
-
-    // Set the expected and actual outputs
-    LayerTestResult<uint8_t, 4> result(outputInfo);
-    result.outputExpected = MakeTensor<uint8_t, 4>(outputInfo, expectedOutputData);
-    CopyDataFromITensorHandle(&result.output[0][0][0][0], workloadData.m_Outputs[0]);
-    return result;
-}
-
-} // anonymous namespace
-
-template<typename ConvolutionDescriptor>
-LayerTestResult<uint8_t, 4> PreCompiledConvolution2dTestImpl(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
-    unsigned int inputSize,
-    unsigned int outputSize,
-    unsigned int channels,
-    unsigned int kernelSize,
-    const ConvolutionDescriptor& descriptor,
-    bool isDepthwiseConvolution = false)
-{
-    BOOST_ASSERT(descriptor.m_BiasEnabled == true);
-    BOOST_ASSERT(descriptor.m_DataLayout  == DataLayout::NHWC);
-
-    // Set up tensor shapes and infos
-    const TensorShape inputShape ({1, inputSize,  inputSize,  channels});
-    const TensorShape outputShape({1, outputSize, outputSize, channels});
-    const TensorShape kernelShape = isDepthwiseConvolution
-                                    // The format for the depthwise convolution is MIHW
-                                    ? TensorShape({1, channels, kernelSize, kernelSize})
-                                    // The format for the regular convolution depends on the layout of the inputs,
-                                    // in this case is NHWC
-                                    : TensorShape({1, kernelSize, kernelSize, channels});
-    const TensorShape biasesShape({1, 1, 1, channels});
-
-    // NOTE: inputScale * weightsScale / outputScale must be >= 0.0 and < 1.0
-    TensorInfo inputInfo(inputShape, DataType::QuantisedAsymm8, 1.0f, 0);
-    TensorInfo outputInfo(outputShape, DataType::QuantisedAsymm8, 2.0f, 0);
-    TensorInfo weightsInfo(kernelShape, DataType::QuantisedAsymm8, 1.0f, 0);
-    TensorInfo biasesInfo(biasesShape, DataType::Signed32, 1.0f, 0);
-
-    // Populate weight and bias data
-    std::vector<uint8_t> weightsData = CreateIdentityConvolutionKernel(kernelSize, channels);
-
-    // NOTE: We need to multiply the elements of the identity kernel by 2
-    // to compensate for the scaling factor
-    std::transform(weightsData.begin(), weightsData.end(), weightsData.begin(),
-        [](uint8_t w) -> uint8_t { return static_cast<uint8_t>(w * 2); });
-
-    const unsigned int biasDataSize = biasesInfo.GetNumElements();
-    std::vector<int32_t> biasesData(biasDataSize, 0);
-
-    // Construct network
-    Network network;
-    ConstTensor weights(weightsInfo, weightsData);
-    ConstTensor biases(biasesInfo, biasesData);
-
-    IConnectableLayer* const inputLayer = network.AddInputLayer(0, "input");
-
-    IConnectableLayer* const convolutionLayer =
-        PreCompiledConvolutionHelper<ConvolutionDescriptor>
-            ::AddConvolutionLayerToNetwork(network, descriptor, weights, biases);
-
-    IConnectableLayer* const outputLayer = network.AddOutputLayer(0, "output");
-
-    inputLayer->GetOutputSlot(0).Connect(convolutionLayer->GetInputSlot(0));
-    inputLayer->GetOutputSlot(0).SetTensorInfo(inputInfo);
-
-    convolutionLayer->GetOutputSlot(0).Connect(outputLayer->GetInputSlot(0));
-    convolutionLayer->GetOutputSlot(0).SetTensorInfo(outputInfo);
-
-    // Generate input data: sequence [0, 1 .. 255]
-    const unsigned int inputDataSize = inputInfo.GetNumElements();
-    std::vector<uint8_t> inputData(inputDataSize);
-    std::iota(inputData.begin(), inputData.end(), 0);
-
-    // Set expected output
-    std::vector<uint8_t> expectedOutputData =
-        GetIdentityConvolutionExpectedOutputData(inputInfo,
-                                                 outputInfo,
-                                                 descriptor,
-                                                 inputData);
-
-    return OptimiseAndRunNetwork(workloadFactory,
-                                 network,
-                                 inputInfo,
-                                 inputData,
-                                 outputInfo,
-                                 expectedOutputData);
-}
-
-LayerTestResult<uint8_t, 4> PreCompiledConvolution2dTestImpl(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
-{
-    const unsigned int inputSize  = 16;
-    const unsigned int outputSize = 16;
-    const unsigned int channels   = 1;
-    const unsigned int kernelSize = 3;
-    const unsigned int stride     = 1;
-    const unsigned int padding    = 1;
-
-    Convolution2dDescriptor descriptor =
-        CreateConvolutionDescriptor<Convolution2dDescriptor>(stride, padding);
-
-    return PreCompiledConvolution2dTestImpl(workloadFactory,
-                                            memoryManager,
-                                            inputSize,
-                                            outputSize,
-                                            channels,
-                                            kernelSize,
-                                            descriptor);
-}
-
-LayerTestResult<uint8_t, 4> PreCompiledConvolution2dStride2x2TestImpl(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
-{
-    const unsigned int inputSize  = 16;
-    const unsigned int outputSize = 8;
-    const unsigned int channels   = 1;
-    const unsigned int kernelSize = 3;
-    const unsigned int stride     = 2;
-    const unsigned int padding    = 1;
-
-    Convolution2dDescriptor descriptor =
-        CreateConvolutionDescriptor<Convolution2dDescriptor>(stride, padding);
-
-    return PreCompiledConvolution2dTestImpl(workloadFactory,
-                                            memoryManager,
-                                            inputSize,
-                                            outputSize,
-                                            channels,
-                                            kernelSize,
-                                            descriptor);
-}
-
-LayerTestResult<uint8_t, 4> PreCompiledDepthwiseConvolution2dTestImpl(
-    armnn::IWorkloadFactory & workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr & memoryManager)
-{
-    const unsigned int inputSize  = 16;
-    const unsigned int outputSize = 16;
-    const unsigned int channels   = 3;
-    const unsigned int kernelSize = 1;
-    const unsigned int stride     = 1;
-    const unsigned int padding    = 0;
-
-    DepthwiseConvolution2dDescriptor descriptor =
-        CreateConvolutionDescriptor<DepthwiseConvolution2dDescriptor>(stride, padding);
-
-    return PreCompiledConvolution2dTestImpl(workloadFactory,
-                                            memoryManager,
-                                            inputSize,
-                                            outputSize,
-                                            channels,
-                                            kernelSize,
-                                            descriptor,
-                                            true);
-}
-
-LayerTestResult<uint8_t, 4> PreCompiledDepthwiseConvolution2dStride2x2TestImpl(
-    armnn::IWorkloadFactory & workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr & memoryManager)
-{
-    const unsigned int inputSize  = 16;
-    const unsigned int outputSize = 8;
-    const unsigned int channels   = 3;
-    const unsigned int kernelSize = 3;
-    const unsigned int stride     = 2;
-    const unsigned int padding    = 1;
-
-    DepthwiseConvolution2dDescriptor descriptor =
-        CreateConvolutionDescriptor<DepthwiseConvolution2dDescriptor>(stride, padding);
-
-    return PreCompiledConvolution2dTestImpl(workloadFactory,
-                                            memoryManager,
-                                            inputSize,
-                                            outputSize,
-                                            channels,
-                                            kernelSize,
-                                            descriptor);
-}
-
-LayerTestResult<uint8_t, 4> PreCompiledMaxPooling2dTestImpl(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
-{
-    // Pooling cannot be run in isolation, it must be fused with the previous layer, e.g. Convolution2d.
-
-    // Set up the Convolution descriptor
-    Convolution2dDescriptor convDescriptor;
-    convDescriptor.m_StrideX = 1;
-    convDescriptor.m_StrideY = 1;
-    convDescriptor.m_BiasEnabled = true;
-    convDescriptor.m_DataLayout  = DataLayout::NHWC;
-
-    // Set up the Convolution weights
-    TensorInfo weightsInfo(TensorShape({16, 1, 1, 16}), DataType::QuantisedAsymm8, 2.0f, 0);
-    const unsigned int weightsDataSize = weightsInfo.GetNumElements();
-    std::vector<uint8_t> weightsData(weightsDataSize);
-    for (unsigned int i = 0; i < 16; ++i)
-    {
-        for (unsigned int j = 0; j < 16; ++j)
-        {
-            weightsData[(i * 16) + j] = i == j ? 1.0f : 0.0f;
-        }
-    }
-    ConstTensor weights(weightsInfo, weightsData);
-
-    // Set up the Convolution biases
-    TensorInfo biasInfo(TensorShape({1, 1, 1, 16}), DataType::Signed32, 1.0f * 2.0f, 0);
-    const unsigned int biasDataSize = biasInfo.GetNumElements();
-    std::vector<int32_t> biasData(biasDataSize, 0);
-    ConstTensor biases(biasInfo, biasData);
-
-    // Set up the Convolution input
-    TensorInfo inputInfo(TensorShape({1, 16, 16, 16 }), DataType::QuantisedAsymm8, 1.0f, 0);
-    const unsigned int inputDataSize = inputInfo.GetNumElements();
-    std::vector<uint8_t> inputData(inputDataSize);
-    for (unsigned int i = 0; i < inputDataSize; ++i)
-    {
-        inputData[i] = boost::numeric_cast<uint8_t>((i * 4) % 250);
-    }
-
-    // Set up the Convolution output / Pooling input info
-    TensorInfo convOutputInfo(TensorShape({1, 16, 16, 16 }), DataType::QuantisedAsymm8, 4.0f, 0);
-
-    // Set up the Pooling descriptor
-    Pooling2dDescriptor poolDescriptor;
-    poolDescriptor.m_PoolType = PoolingAlgorithm::Max;
-    poolDescriptor.m_PoolWidth = 2;
-    poolDescriptor.m_PoolHeight = 2;
-    poolDescriptor.m_StrideX = 2;
-    poolDescriptor.m_StrideY = 2;
-    poolDescriptor.m_PaddingMethod = PaddingMethod::Exclude;
-    poolDescriptor.m_DataLayout = DataLayout::NHWC;
-
-    // Set the expected output from the Pooling layer
-    TensorInfo outputInfo(TensorShape({1, 8, 8, 16 }), DataType::QuantisedAsymm8, 4.0f, 0);
-    const unsigned int outputDataSize = outputInfo.GetNumElements();
-    std::vector<uint8_t> expectedOutputData(outputDataSize);
-    // The Maxpooling inputs are the Convolution outputs, i.e. (Convolution inputs / 2) after scale adjustments
-    // Maxpooling selects the max value in each pool from its inputs and our pool size is 2x2
-    for (unsigned int channel = 0; channel < 16; ++channel)
-    {
-        for (unsigned int row = 0; row < 8; ++row)
-        {
-            for (unsigned int column = 0; column < 8; ++column)
-            {
-                // The input and output data indexes are calculated for NHWC data layout.
-                // Output index: (row * columns * channels) + (column * channels) + channel
-                auto outIndex = (row * 8 * 16) + (column * 16) + channel;
-                // Input index: (row * strideY * columns * channels) + (column * strideX * channels) + channel
-                //      and we take 4 entries for the 2x2 pool
-                auto in0Index = ((row * 2) * 16 * 16) + ((column * 2) * 16) + channel;
-                auto in1Index = ((row * 2) * 16 * 16) + (((column * 2) + 1) * 16) + channel;
-                auto in2Index = (((row * 2) + 1) * 16 * 16) + ((column * 2) * 16) + channel;
-                auto in3Index = (((row * 2) + 1) * 16 * 16) + (((column * 2) + 1) * 16) + channel;
-                // output value is the maximum of the input pool values, adjusted for the quantization scale change
-                auto maxIn = std::max<uint8_t>({inputData[in0Index],
-                                                inputData[in1Index],
-                                                inputData[in2Index],
-                                                inputData[in3Index]});
-                expectedOutputData[outIndex] = maxIn / 2;
-            }
-        }
-    }
-
-    // Construct the network
-    Network net;
-    IConnectableLayer* const inputLayer   = net.AddInputLayer(0, "input");
-    IConnectableLayer* const convLayer    = net.AddConvolution2dLayer(convDescriptor, weights, biases, "conv");
-    IConnectableLayer* const poolingLayer = net.AddPooling2dLayer(poolDescriptor, "pooling2d");
-    IConnectableLayer* const outputLayer  = net.AddOutputLayer(0, "output");
-
-    // Connect the layers
-    inputLayer->GetOutputSlot(0).Connect(convLayer->GetInputSlot(0));
-    inputLayer->GetOutputSlot(0).SetTensorInfo(inputInfo);
-    convLayer->GetOutputSlot(0).Connect(poolingLayer->GetInputSlot(0));
-    convLayer->GetOutputSlot(0).SetTensorInfo(convOutputInfo);
-    poolingLayer->GetOutputSlot(0).Connect(outputLayer->GetInputSlot(0));
-    poolingLayer->GetOutputSlot(0).SetTensorInfo(outputInfo);
-
-    return OptimiseAndRunNetwork(workloadFactory,
-                                 net,
-                                 inputInfo,
-                                 inputData,
-                                 outputInfo,
-                                 expectedOutputData);
-}
diff --git a/src/backends/backendsCommon/test/PreCompiledTestImpl.hpp b/src/backends/backendsCommon/test/PreCompiledTestImpl.hpp
deleted file mode 100644
index f4e78b6..0000000
--- a/src/backends/backendsCommon/test/PreCompiledTestImpl.hpp
+++ /dev/null
@@ -1,27 +0,0 @@
-//
-// Copyright © 2017 Arm Ltd. All rights reserved.
-// SPDX-License-Identifier: MIT
-//
-#pragma once
-
-#include "LayerTests.hpp"
-
-LayerTestResult<uint8_t, 4> PreCompiledConvolution2dTestImpl(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<uint8_t, 4> PreCompiledConvolution2dStride2x2TestImpl(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
-
-LayerTestResult<uint8_t, 4> PreCompiledDepthwiseConvolution2dTestImpl(
-    armnn::IWorkloadFactory & workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr & memoryManager);
-
-LayerTestResult<uint8_t, 4> PreCompiledDepthwiseConvolution2dStride2x2TestImpl(
-    armnn::IWorkloadFactory & workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr & memoryManager);
-
-LayerTestResult<uint8_t, 4> PreCompiledMaxPooling2dTestImpl(
-    armnn::IWorkloadFactory& workloadFactory,
-    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
diff --git a/src/backends/cl/ClWorkloadFactory.hpp b/src/backends/cl/ClWorkloadFactory.hpp
index ba2f066..7425537 100644
--- a/src/backends/cl/ClWorkloadFactory.hpp
+++ b/src/backends/cl/ClWorkloadFactory.hpp
@@ -25,137 +25,137 @@
                                  Optional<DataType> dataType,
                                  std::string& outReasonIfUnsupported);
 
-    virtual bool SupportsSubTensors() const override { return true; }
+    bool SupportsSubTensors() const override { return true; }
 
-    virtual std::unique_ptr<ITensorHandle> CreateSubTensorHandle(ITensorHandle&      parent,
-                                                                 TensorShape const&   subTensorShape,
-                                                                 unsigned int const* subTensorOrigin) const override;
+    std::unique_ptr<ITensorHandle> CreateSubTensorHandle(ITensorHandle& parent,
+                                                         TensorShape const& subTensorShape,
+                                                         unsigned int const* subTensorOrigin) const override;
 
-    virtual std::unique_ptr<ITensorHandle> CreateTensorHandle(const TensorInfo& tensorInfo) const override;
+    std::unique_ptr<ITensorHandle> CreateTensorHandle(const TensorInfo& tensorInfo) const override;
 
-    virtual std::unique_ptr<ITensorHandle> CreateTensorHandle(const TensorInfo& tensorInfo,
-                                                              DataLayout dataLayout) const override;
+    std::unique_ptr<ITensorHandle> CreateTensorHandle(const TensorInfo& tensorInfo,
+                                                      DataLayout dataLayout) const override;
 
-    virtual std::unique_ptr<IWorkload> CreateInput(const InputQueueDescriptor& descriptor,
-                                                   const WorkloadInfo& info) const override;
+    std::unique_ptr<IWorkload> CreateInput(const InputQueueDescriptor& descriptor,
+                                           const WorkloadInfo& info) const override;
 
-    virtual std::unique_ptr<IWorkload> CreateOutput(const OutputQueueDescriptor& descriptor,
+    std::unique_ptr<IWorkload> CreateOutput(const OutputQueueDescriptor& descriptor,
+                                            const WorkloadInfo& info) const override;
+
+    std::unique_ptr<IWorkload> CreateActivation(const ActivationQueueDescriptor& descriptor,
+                                                const WorkloadInfo& info) const override;
+
+    std::unique_ptr<IWorkload> CreateSoftmax(const SoftmaxQueueDescriptor& descriptor,
+                                             const WorkloadInfo& info) const override;
+
+    std::unique_ptr<IWorkload> CreateSplitter(const SplitterQueueDescriptor& descriptor,
+                                              const WorkloadInfo& info) const override;
+
+    std::unique_ptr<IWorkload> CreateMerger(const MergerQueueDescriptor& descriptor,
+                                            const WorkloadInfo& info) const override;
+
+    std::unique_ptr<IWorkload> CreateFullyConnected(const FullyConnectedQueueDescriptor& descriptor,
                                                     const WorkloadInfo& info) const override;
 
-    virtual std::unique_ptr<IWorkload> CreateActivation(const ActivationQueueDescriptor& descriptor,
-                                                        const WorkloadInfo&              info) const override;
+    std::unique_ptr<IWorkload> CreatePermute(const PermuteQueueDescriptor& descriptor,
+                                             const WorkloadInfo& info) const override;
 
-    virtual std::unique_ptr<IWorkload> CreateSoftmax(const SoftmaxQueueDescriptor& descriptor,
-                                                     const WorkloadInfo&           info) const override;
+    std::unique_ptr<IWorkload> CreatePooling2d(const Pooling2dQueueDescriptor& descriptor,
+                                               const WorkloadInfo& info) const override;
 
-    virtual std::unique_ptr<IWorkload> CreateSplitter(const SplitterQueueDescriptor& descriptor,
-                                                      const WorkloadInfo&            info) const override;
-
-    virtual std::unique_ptr<IWorkload> CreateMerger(const MergerQueueDescriptor& descriptor,
-                                                    const WorkloadInfo&          info) const override;
-
-    virtual std::unique_ptr<IWorkload> CreateFullyConnected(const FullyConnectedQueueDescriptor& descriptor,
-                                                            const WorkloadInfo&                  info) const override;
-
-    virtual std::unique_ptr<IWorkload> CreatePermute(const PermuteQueueDescriptor& descriptor,
-                                                     const WorkloadInfo&           info) const override;
-
-    virtual std::unique_ptr<IWorkload> CreatePooling2d(const Pooling2dQueueDescriptor& descriptor,
-                                                       const WorkloadInfo&           info) const override;
-
-    virtual std::unique_ptr<IWorkload> CreateConvolution2d(const Convolution2dQueueDescriptor& descriptor,
-                                                           const WorkloadInfo&               info) const override;
-
-    virtual std::unique_ptr<IWorkload> CreateDepthwiseConvolution2d(
-        const DepthwiseConvolution2dQueueDescriptor& descriptor, const WorkloadInfo& info) const override;
-
-    virtual std::unique_ptr<IWorkload> CreateNormalization(const NormalizationQueueDescriptor& descriptor,
-                                                           const WorkloadInfo&                 info) const override;
-
-    virtual std::unique_ptr<IWorkload> CreateAddition(const AdditionQueueDescriptor& descriptor,
-                                                      const WorkloadInfo&            info) const override;
-
-    virtual std::unique_ptr<IWorkload> CreateMultiplication(const MultiplicationQueueDescriptor& descriptor,
-                                                            const WorkloadInfo&                  info) const override;
-
-    virtual std::unique_ptr<IWorkload> CreateBatchNormalization(const BatchNormalizationQueueDescriptor& descriptor,
-                                                                const WorkloadInfo& info) const override;
-
-    virtual std::unique_ptr<IWorkload> CreateMemCopy(const MemCopyQueueDescriptor& descriptor,
-                                                     const WorkloadInfo& info) const override;
-
-    virtual std::unique_ptr<IWorkload> CreateResizeBilinear(const ResizeBilinearQueueDescriptor& descriptor,
-                                                            const WorkloadInfo& info) const override;
-
-    virtual std::unique_ptr<IWorkload> CreateFakeQuantization(const FakeQuantizationQueueDescriptor& descriptor,
-                                                              const WorkloadInfo& info) const override;
-
-    virtual std::unique_ptr<IWorkload> CreateL2Normalization(const L2NormalizationQueueDescriptor& descriptor,
-                                                             const WorkloadInfo& info) const override;
-
-    virtual std::unique_ptr<IWorkload> CreateConstant(const ConstantQueueDescriptor& descriptor,
-                                                      const WorkloadInfo& info) const override;
-
-    virtual std::unique_ptr<IWorkload> CreateReshape(const ReshapeQueueDescriptor& descriptor,
-                                                     const WorkloadInfo& info) const override;
-
-    virtual std::unique_ptr<IWorkload> CreateSpaceToBatchNd(const SpaceToBatchNdQueueDescriptor& descriptor,
-                                                            const WorkloadInfo& info) const override;
-
-    virtual std::unique_ptr<IWorkload> CreateFloor(const FloorQueueDescriptor& descriptor,
+    std::unique_ptr<IWorkload> CreateConvolution2d(const Convolution2dQueueDescriptor& descriptor,
                                                    const WorkloadInfo& info) const override;
 
-    virtual std::unique_ptr<IWorkload> CreateLstm(const LstmQueueDescriptor& descriptor,
-                                                  const WorkloadInfo& info) const override;
+    std::unique_ptr<IWorkload> CreateDepthwiseConvolution2d(const DepthwiseConvolution2dQueueDescriptor& descriptor,
+                                                            const WorkloadInfo& info) const override;
 
-    virtual std::unique_ptr<IWorkload> CreateConvertFp16ToFp32(const ConvertFp16ToFp32QueueDescriptor& descriptor,
-                                                               const WorkloadInfo& info) const override;
+    std::unique_ptr<IWorkload> CreateNormalization(const NormalizationQueueDescriptor& descriptor,
+                                                   const WorkloadInfo& info) const override;
 
-    virtual std::unique_ptr<IWorkload> CreateConvertFp32ToFp16(const ConvertFp32ToFp16QueueDescriptor& descriptor,
-                                                               const WorkloadInfo& info) const override;
+    std::unique_ptr<IWorkload> CreateAddition(const AdditionQueueDescriptor& descriptor,
+                                              const WorkloadInfo& info) const override;
 
-    virtual std::unique_ptr<IWorkload> CreateDivision(const DivisionQueueDescriptor& descriptor,
+    std::unique_ptr<IWorkload> CreateMultiplication(const MultiplicationQueueDescriptor& descriptor,
+                                                    const WorkloadInfo& info) const override;
+
+    std::unique_ptr<IWorkload> CreateBatchNormalization(const BatchNormalizationQueueDescriptor& descriptor,
+                                                        const WorkloadInfo& info) const override;
+
+    std::unique_ptr<IWorkload> CreateMemCopy(const MemCopyQueueDescriptor& descriptor,
+                                             const WorkloadInfo& info) const override;
+
+    std::unique_ptr<IWorkload> CreateResizeBilinear(const ResizeBilinearQueueDescriptor& descriptor,
+                                                    const WorkloadInfo& info) const override;
+
+    std::unique_ptr<IWorkload> CreateFakeQuantization(const FakeQuantizationQueueDescriptor& descriptor,
                                                       const WorkloadInfo& info) const override;
 
-    virtual std::unique_ptr<IWorkload> CreateSubtraction(const SubtractionQueueDescriptor& descriptor,
-                                                         const WorkloadInfo& info) const override;
-
-    virtual std::unique_ptr<IWorkload> CreateMaximum(const MaximumQueueDescriptor& descriptor,
+    std::unique_ptr<IWorkload> CreateL2Normalization(const L2NormalizationQueueDescriptor& descriptor,
                                                      const WorkloadInfo& info) const override;
 
-    virtual std::unique_ptr<IWorkload> CreateMean(const MeanQueueDescriptor& descriptor,
-                                                  const WorkloadInfo& Info) const override;
+    std::unique_ptr<IWorkload> CreateConstant(const ConstantQueueDescriptor& descriptor,
+                                              const WorkloadInfo& info) const override;
 
-    virtual std::unique_ptr<IWorkload> CreatePad(const PadQueueDescriptor& descriptor,
+    std::unique_ptr<IWorkload> CreateReshape(const ReshapeQueueDescriptor& descriptor,
+                                             const WorkloadInfo& info) const override;
+
+    std::unique_ptr<IWorkload> CreateSpaceToBatchNd(const SpaceToBatchNdQueueDescriptor& descriptor,
+                                                    const WorkloadInfo& info) const override;
+
+    std::unique_ptr<IWorkload> CreateFloor(const FloorQueueDescriptor& descriptor,
+                                           const WorkloadInfo& info) const override;
+
+    std::unique_ptr<IWorkload> CreateLstm(const LstmQueueDescriptor& descriptor,
+                                          const WorkloadInfo& info) const override;
+
+    std::unique_ptr<IWorkload> CreateConvertFp16ToFp32(const ConvertFp16ToFp32QueueDescriptor& descriptor,
+                                                       const WorkloadInfo& info) const override;
+
+    std::unique_ptr<IWorkload> CreateConvertFp32ToFp16(const ConvertFp32ToFp16QueueDescriptor& descriptor,
+                                                       const WorkloadInfo& info) const override;
+
+    std::unique_ptr<IWorkload> CreateDivision(const DivisionQueueDescriptor& descriptor,
+                                              const WorkloadInfo& info) const override;
+
+    std::unique_ptr<IWorkload> CreateSubtraction(const SubtractionQueueDescriptor& descriptor,
                                                  const WorkloadInfo& info) const override;
 
-    virtual std::unique_ptr<IWorkload> CreateEqual(const EqualQueueDescriptor& descriptor,
-                                                   const WorkloadInfo& info) const override;
+    std::unique_ptr<IWorkload> CreateMaximum(const MaximumQueueDescriptor& descriptor,
+                                             const WorkloadInfo& info) const override;
 
-    virtual std::unique_ptr<IWorkload> CreateBatchToSpaceNd(const BatchToSpaceNdQueueDescriptor& descriptor,
-                                                            const WorkloadInfo& info) const override;
+    std::unique_ptr<IWorkload> CreateMean(const MeanQueueDescriptor& descriptor,
+                                          const WorkloadInfo& Info) const override;
 
-    virtual std::unique_ptr<IWorkload> CreateStridedSlice(const StridedSliceQueueDescriptor& descriptor,
-                                                          const WorkloadInfo& info) const override;
+    std::unique_ptr<IWorkload> CreatePad(const PadQueueDescriptor& descriptor,
+                                         const WorkloadInfo& info) const override;
 
-    virtual std::unique_ptr<IWorkload> CreateMinimum(const MinimumQueueDescriptor& descriptor,
-                                                     const WorkloadInfo& info) const override;
+    std::unique_ptr<IWorkload> CreateEqual(const EqualQueueDescriptor& descriptor,
+                                           const WorkloadInfo& info) const override;
 
-    virtual std::unique_ptr<IWorkload> CreateGreater(const GreaterQueueDescriptor& descriptor,
-                                                     const WorkloadInfo& info) const override;
-
-    virtual std::unique_ptr<IWorkload> CreateDebug(const DebugQueueDescriptor& descriptor,
-                                                   const WorkloadInfo& info) const override;
-
-    virtual std::unique_ptr<IWorkload> CreateRsqrt(const RsqrtQueueDescriptor& descriptor,
-                                                   const WorkloadInfo& info) const override;
-
-    virtual std::unique_ptr<IWorkload> CreatePreCompiled(const PreCompiledQueueDescriptor& descriptor,
-                                                         const WorkloadInfo& info) const override;
-
-    virtual std::unique_ptr<IWorkload> CreateGather(const GatherQueueDescriptor& descriptor,
+    std::unique_ptr<IWorkload> CreateBatchToSpaceNd(const BatchToSpaceNdQueueDescriptor& descriptor,
                                                     const WorkloadInfo& info) const override;
 
+    std::unique_ptr<IWorkload> CreateStridedSlice(const StridedSliceQueueDescriptor& descriptor,
+                                                  const WorkloadInfo& info) const override;
+
+    std::unique_ptr<IWorkload> CreateMinimum(const MinimumQueueDescriptor& descriptor,
+                                             const WorkloadInfo& info) const override;
+
+    std::unique_ptr<IWorkload> CreateGreater(const GreaterQueueDescriptor& descriptor,
+                                             const WorkloadInfo& info) const override;
+
+    std::unique_ptr<IWorkload> CreateDebug(const DebugQueueDescriptor& descriptor,
+                                           const WorkloadInfo& info) const override;
+
+    std::unique_ptr<IWorkload> CreateRsqrt(const RsqrtQueueDescriptor& descriptor,
+                                           const WorkloadInfo& info) const override;
+
+    std::unique_ptr<IWorkload> CreatePreCompiled(const PreCompiledQueueDescriptor& descriptor,
+                                                 const WorkloadInfo& info) const override;
+
+    std::unique_ptr<IWorkload> CreateGather(const GatherQueueDescriptor& descriptor,
+                                            const WorkloadInfo& info) const override;
+
 private:
     template<typename FloatWorkload, typename Uint8Workload, typename QueueDescriptorType, typename... Args>
     static std::unique_ptr<IWorkload> MakeWorkload(const QueueDescriptorType& descriptor,
diff --git a/src/backends/cl/test/ClLayerTests.cpp b/src/backends/cl/test/ClLayerTests.cpp
old mode 100755
new mode 100644
diff --git a/src/backends/neon/NeonWorkloadFactory.hpp b/src/backends/neon/NeonWorkloadFactory.hpp
index fe9f1b0..52caf7d 100644
--- a/src/backends/neon/NeonWorkloadFactory.hpp
+++ b/src/backends/neon/NeonWorkloadFactory.hpp
@@ -26,136 +26,136 @@
                                  Optional<DataType> dataType,
                                  std::string& outReasonIfUnsupported);
 
-    virtual bool SupportsSubTensors() const override { return true; }
+    bool SupportsSubTensors() const override { return true; }
 
-    virtual std::unique_ptr<ITensorHandle> CreateSubTensorHandle(ITensorHandle& parent,
-                                                                 TensorShape const& subTensorShape,
-                                                                 unsigned int const* subTensorOrigin) const override;
+    std::unique_ptr<ITensorHandle> CreateSubTensorHandle(ITensorHandle& parent,
+                                                         TensorShape const& subTensorShape,
+                                                         unsigned int const* subTensorOrigin) const override;
 
-    virtual std::unique_ptr<ITensorHandle> CreateTensorHandle(const TensorInfo& tensorInfo) const override;
+    std::unique_ptr<ITensorHandle> CreateTensorHandle(const TensorInfo& tensorInfo) const override;
 
-    virtual std::unique_ptr<ITensorHandle> CreateTensorHandle(const TensorInfo& tensorInfo,
-                                                              DataLayout dataLayout) const override;
+    std::unique_ptr<ITensorHandle> CreateTensorHandle(const TensorInfo& tensorInfo,
+                                                      DataLayout dataLayout) const override;
 
-    virtual std::unique_ptr<IWorkload> CreateInput(const InputQueueDescriptor& descriptor,
-                                                   const WorkloadInfo&        info) const override;
+    std::unique_ptr<IWorkload> CreateInput(const InputQueueDescriptor& descriptor,
+                                           const WorkloadInfo& info) const override;
 
-    virtual std::unique_ptr<IWorkload> CreateOutput(const OutputQueueDescriptor& descriptor,
-                                                    const WorkloadInfo&        info) const override;
+    std::unique_ptr<IWorkload> CreateOutput(const OutputQueueDescriptor& descriptor,
+                                            const WorkloadInfo& info) const override;
 
-    virtual std::unique_ptr<IWorkload> CreateActivation(const ActivationQueueDescriptor& descriptor,
-                                                        const WorkloadInfo&              info) const override;
+    std::unique_ptr<IWorkload> CreateActivation(const ActivationQueueDescriptor& descriptor,
+                                                const WorkloadInfo& info) const override;
 
-    virtual std::unique_ptr<IWorkload> CreateSoftmax(const SoftmaxQueueDescriptor& descriptor,
-                                                     const WorkloadInfo&           info) const override;
+    std::unique_ptr<IWorkload> CreateSoftmax(const SoftmaxQueueDescriptor& descriptor,
+                                             const WorkloadInfo& info) const override;
 
-    virtual std::unique_ptr<IWorkload> CreateSplitter(const SplitterQueueDescriptor& descriptor,
-                                                      const WorkloadInfo&            info) const override;
+    std::unique_ptr<IWorkload> CreateSplitter(const SplitterQueueDescriptor& descriptor,
+                                              const WorkloadInfo& info) const override;
 
-    virtual std::unique_ptr<IWorkload> CreateMerger(const MergerQueueDescriptor& descriptor,
-                                                    const WorkloadInfo&          info) const override;
+    std::unique_ptr<IWorkload> CreateMerger(const MergerQueueDescriptor& descriptor,
+                                            const WorkloadInfo& info) const override;
 
-    virtual std::unique_ptr<IWorkload> CreateFullyConnected(const FullyConnectedQueueDescriptor& descriptor,
-                                                            const WorkloadInfo&                  info) const override;
+    std::unique_ptr<IWorkload> CreateFullyConnected(const FullyConnectedQueueDescriptor& descriptor,
+                                                    const WorkloadInfo& info) const override;
 
-    virtual std::unique_ptr<IWorkload> CreatePermute(const PermuteQueueDescriptor& descriptor,
-                                                     const WorkloadInfo&           info) const override;
+    std::unique_ptr<IWorkload> CreatePermute(const PermuteQueueDescriptor& descriptor,
+                                             const WorkloadInfo& info) const override;
 
-    virtual std::unique_ptr<IWorkload> CreatePooling2d(const Pooling2dQueueDescriptor& descriptor,
-                                                       const WorkloadInfo&           info) const override;
+    std::unique_ptr<IWorkload> CreatePooling2d(const Pooling2dQueueDescriptor& descriptor,
+                                               const WorkloadInfo& info) const override;
 
-    virtual std::unique_ptr<IWorkload> CreateConvolution2d(const Convolution2dQueueDescriptor& descriptor,
-                                                           const WorkloadInfo&               info) const override;
-
-    virtual std::unique_ptr<IWorkload> CreateDepthwiseConvolution2d(
-        const DepthwiseConvolution2dQueueDescriptor& descriptor, const WorkloadInfo& info) const override;
-
-    virtual std::unique_ptr<IWorkload> CreateNormalization(const NormalizationQueueDescriptor& descriptor,
-                                                           const WorkloadInfo&                 info) const override;
-
-    virtual std::unique_ptr<IWorkload> CreateMultiplication(const MultiplicationQueueDescriptor& descriptor,
-                                                            const WorkloadInfo&                  info) const override;
-
-    virtual std::unique_ptr<IWorkload> CreateAddition(const AdditionQueueDescriptor& descriptor,
-                                                      const WorkloadInfo&            info) const override;
-
-    virtual std::unique_ptr<IWorkload> CreateBatchNormalization(const BatchNormalizationQueueDescriptor& descriptor,
-                                                                const WorkloadInfo& info) const override;
-
-    virtual std::unique_ptr<IWorkload> CreateMemCopy(const MemCopyQueueDescriptor& descriptor,
-                                                     const WorkloadInfo&        info) const override;
-
-    virtual std::unique_ptr<IWorkload> CreateResizeBilinear(const ResizeBilinearQueueDescriptor& descriptor,
-                                                            const WorkloadInfo& info) const override;
-
-    virtual std::unique_ptr<IWorkload> CreateFakeQuantization(const FakeQuantizationQueueDescriptor& descriptor,
-                                                              const WorkloadInfo& info) const override;
-
-    virtual std::unique_ptr<IWorkload> CreateL2Normalization(const L2NormalizationQueueDescriptor& descriptor,
-                                                             const WorkloadInfo& info) const override;
-
-    virtual std::unique_ptr<IWorkload> CreateConstant(const ConstantQueueDescriptor& descriptor,
-                                                      const WorkloadInfo& info) const override;
-
-    virtual std::unique_ptr<IWorkload> CreateReshape(const ReshapeQueueDescriptor& descriptor,
-                                                     const WorkloadInfo& info) const override;
-
-    virtual std::unique_ptr<IWorkload> CreateSpaceToBatchNd(const SpaceToBatchNdQueueDescriptor& descriptor,
-                                                            const WorkloadInfo& info) const override;
-
-    virtual std::unique_ptr<IWorkload> CreateFloor(const FloorQueueDescriptor& descriptor,
+    std::unique_ptr<IWorkload> CreateConvolution2d(const Convolution2dQueueDescriptor& descriptor,
                                                    const WorkloadInfo& info) const override;
 
-    virtual std::unique_ptr<IWorkload> CreateLstm(const LstmQueueDescriptor& descriptor,
-                                                  const WorkloadInfo& info) const override;
+    std::unique_ptr<IWorkload> CreateDepthwiseConvolution2d(const DepthwiseConvolution2dQueueDescriptor& descriptor,
+                                                            const WorkloadInfo& info) const override;
 
-    virtual std::unique_ptr<IWorkload> CreateConvertFp16ToFp32(const ConvertFp16ToFp32QueueDescriptor& descriptor,
-                                                               const WorkloadInfo& info) const override;
+    std::unique_ptr<IWorkload> CreateNormalization(const NormalizationQueueDescriptor& descriptor,
+                                                   const WorkloadInfo& info) const override;
 
-    virtual std::unique_ptr<IWorkload> CreateConvertFp32ToFp16(const ConvertFp32ToFp16QueueDescriptor& descriptor,
-                                                               const WorkloadInfo& info) const override;
+    std::unique_ptr<IWorkload> CreateMultiplication(const MultiplicationQueueDescriptor& descriptor,
+                                                    const WorkloadInfo& info) const override;
 
-    virtual std::unique_ptr<IWorkload> CreateDivision(const DivisionQueueDescriptor& descriptor,
+    std::unique_ptr<IWorkload> CreateAddition(const AdditionQueueDescriptor& descriptor,
+                                              const WorkloadInfo& info) const override;
+
+    std::unique_ptr<IWorkload> CreateBatchNormalization(const BatchNormalizationQueueDescriptor& descriptor,
+                                                        const WorkloadInfo& info) const override;
+
+    std::unique_ptr<IWorkload> CreateMemCopy(const MemCopyQueueDescriptor& descriptor,
+                                             const WorkloadInfo& info) const override;
+
+    std::unique_ptr<IWorkload> CreateResizeBilinear(const ResizeBilinearQueueDescriptor& descriptor,
+                                                    const WorkloadInfo& info) const override;
+
+    std::unique_ptr<IWorkload> CreateFakeQuantization(const FakeQuantizationQueueDescriptor& descriptor,
                                                       const WorkloadInfo& info) const override;
 
-    virtual std::unique_ptr<IWorkload> CreateSubtraction(const SubtractionQueueDescriptor& descriptor,
-                                                         const WorkloadInfo& info) const override;
-
-    virtual std::unique_ptr<IWorkload> CreateMaximum(const MaximumQueueDescriptor& descriptor,
+    std::unique_ptr<IWorkload> CreateL2Normalization(const L2NormalizationQueueDescriptor& descriptor,
                                                      const WorkloadInfo& info) const override;
 
-    virtual std::unique_ptr<IWorkload> CreateMean(const MeanQueueDescriptor& descriptor,
-                                                  const WorkloadInfo& Info) const override;
+    std::unique_ptr<IWorkload> CreateConstant(const ConstantQueueDescriptor& descriptor,
+                                              const WorkloadInfo& info) const override;
 
-    virtual std::unique_ptr<IWorkload> CreatePad(const PadQueueDescriptor& descriptor,
+    std::unique_ptr<IWorkload> CreateReshape(const ReshapeQueueDescriptor& descriptor,
+                                             const WorkloadInfo& info) const override;
+
+    std::unique_ptr<IWorkload> CreateSpaceToBatchNd(const SpaceToBatchNdQueueDescriptor& descriptor,
+                                                    const WorkloadInfo& info) const override;
+
+    std::unique_ptr<IWorkload> CreateFloor(const FloorQueueDescriptor& descriptor,
+                                           const WorkloadInfo& info) const override;
+
+    std::unique_ptr<IWorkload> CreateLstm(const LstmQueueDescriptor& descriptor,
+                                          const WorkloadInfo& info) const override;
+
+    std::unique_ptr<IWorkload> CreateConvertFp16ToFp32(const ConvertFp16ToFp32QueueDescriptor& descriptor,
+                                                       const WorkloadInfo& info) const override;
+
+    std::unique_ptr<IWorkload> CreateConvertFp32ToFp16(const ConvertFp32ToFp16QueueDescriptor& descriptor,
+                                                       const WorkloadInfo& info) const override;
+
+    std::unique_ptr<IWorkload> CreateDivision(const DivisionQueueDescriptor& descriptor,
+                                              const WorkloadInfo& info) const override;
+
+    std::unique_ptr<IWorkload> CreateSubtraction(const SubtractionQueueDescriptor& descriptor,
                                                  const WorkloadInfo& info) const override;
 
-    virtual std::unique_ptr<IWorkload> CreateEqual(const EqualQueueDescriptor& descriptor,
-                                                   const WorkloadInfo& info) const override;
+    std::unique_ptr<IWorkload> CreateMaximum(const MaximumQueueDescriptor& descriptor,
+                                             const WorkloadInfo& info) const override;
 
-    virtual std::unique_ptr<IWorkload> CreateBatchToSpaceNd(const BatchToSpaceNdQueueDescriptor& descriptor,
-                                                            const WorkloadInfo& Info) const override;
+    std::unique_ptr<IWorkload> CreateMean(const MeanQueueDescriptor& descriptor,
+                                          const WorkloadInfo& Info) const override;
 
-    virtual std::unique_ptr<IWorkload> CreateStridedSlice(const StridedSliceQueueDescriptor& descriptor,
-                                                          const WorkloadInfo& info) const override;
+    std::unique_ptr<IWorkload> CreatePad(const PadQueueDescriptor& descriptor,
+                                         const WorkloadInfo& info) const override;
 
-    virtual std::unique_ptr<IWorkload> CreateMinimum(const MinimumQueueDescriptor& descriptor,
-                                                     const WorkloadInfo& info) const override;
+    std::unique_ptr<IWorkload> CreateEqual(const EqualQueueDescriptor& descriptor,
+                                           const WorkloadInfo& info) const override;
 
-    virtual std::unique_ptr<IWorkload> CreateGreater(const GreaterQueueDescriptor& descriptor,
-                                                     const WorkloadInfo& info) const override;
+    std::unique_ptr<IWorkload> CreateBatchToSpaceNd(const BatchToSpaceNdQueueDescriptor& descriptor,
+                                                    const WorkloadInfo& Info) const override;
 
-    virtual std::unique_ptr<IWorkload> CreateDebug(const DebugQueueDescriptor& descriptor,
-                                                   const WorkloadInfo& info) const override;
+    std::unique_ptr<IWorkload> CreateStridedSlice(const StridedSliceQueueDescriptor& descriptor,
+                                                  const WorkloadInfo& info) const override;
 
-    virtual std::unique_ptr<IWorkload> CreateRsqrt(const RsqrtQueueDescriptor& descriptor,
-                                                   const WorkloadInfo& info) const override;
+    std::unique_ptr<IWorkload> CreateMinimum(const MinimumQueueDescriptor& descriptor,
+                                             const WorkloadInfo& info) const override;
 
-    virtual std::unique_ptr<IWorkload> CreatePreCompiled(const PreCompiledQueueDescriptor& descriptor,
-                                                         const WorkloadInfo& info) const override;
+    std::unique_ptr<IWorkload> CreateGreater(const GreaterQueueDescriptor& descriptor,
+                                             const WorkloadInfo& info) const override;
 
-    virtual std::unique_ptr<IWorkload> CreateGather(const GatherQueueDescriptor& descriptor,
-                                                    const WorkloadInfo& info) const override;
+    std::unique_ptr<IWorkload> CreateDebug(const DebugQueueDescriptor& descriptor,
+                                           const WorkloadInfo& info) const override;
+
+    std::unique_ptr<IWorkload> CreateRsqrt(const RsqrtQueueDescriptor& descriptor,
+                                           const WorkloadInfo& info) const override;
+
+    std::unique_ptr<IWorkload> CreatePreCompiled(const PreCompiledQueueDescriptor& descriptor,
+                                                 const WorkloadInfo& info) const override;
+
+    std::unique_ptr<IWorkload> CreateGather(const GatherQueueDescriptor& descriptor,
+                                            const WorkloadInfo& info) const override;
 
 private:
     mutable std::shared_ptr<NeonMemoryManager> m_MemoryManager;
diff --git a/src/backends/reference/RefWorkloadFactory.hpp b/src/backends/reference/RefWorkloadFactory.hpp
index 443af76..79e73a2 100644
--- a/src/backends/reference/RefWorkloadFactory.hpp
+++ b/src/backends/reference/RefWorkloadFactory.hpp
@@ -31,7 +31,7 @@
 {
 public:
     explicit RefWorkloadFactory();
-    virtual ~RefWorkloadFactory() {}
+    ~RefWorkloadFactory() {}
 
     const BackendId& GetBackendId() const override;
 
@@ -39,141 +39,141 @@
                                  Optional<DataType> dataType,
                                  std::string& outReasonIfUnsupported);
 
-    virtual bool SupportsSubTensors() const override { return false; }
+    bool SupportsSubTensors() const override { return false; }
 
-    virtual std::unique_ptr<ITensorHandle> CreateSubTensorHandle(ITensorHandle& parent,
-                                                                 TensorShape const& subTensorShape,
-                                                                 unsigned int const* subTensorOrigin) const override
+    std::unique_ptr<ITensorHandle> CreateSubTensorHandle(ITensorHandle& parent,
+                                                         TensorShape const& subTensorShape,
+                                                         unsigned int const* subTensorOrigin) const override
     {
         boost::ignore_unused(parent, subTensorShape, subTensorOrigin);
         return nullptr;
     }
 
-    virtual std::unique_ptr<ITensorHandle> CreateTensorHandle(const TensorInfo& tensorInfo) const override;
+    std::unique_ptr<ITensorHandle> CreateTensorHandle(const TensorInfo& tensorInfo) const override;
 
-    virtual std::unique_ptr<ITensorHandle> CreateTensorHandle(const TensorInfo& tensorInfo,
-                                                              DataLayout dataLayout) const override;
+    std::unique_ptr<ITensorHandle> CreateTensorHandle(const TensorInfo& tensorInfo,
+                                                      DataLayout dataLayout) const override;
 
-    virtual std::unique_ptr<IWorkload> CreateInput(const InputQueueDescriptor& descriptor,
-                                                   const WorkloadInfo& info) const override;
+    std::unique_ptr<IWorkload> CreateInput(const InputQueueDescriptor& descriptor,
+                                           const WorkloadInfo& info) const override;
 
-    virtual std::unique_ptr<IWorkload> CreateOutput(const OutputQueueDescriptor& descriptor,
+    std::unique_ptr<IWorkload> CreateOutput(const OutputQueueDescriptor& descriptor,
+                                            const WorkloadInfo& info) const override;
+
+    std::unique_ptr<IWorkload> CreateActivation(const ActivationQueueDescriptor& descriptor,
+                                                const WorkloadInfo& info) const override;
+
+    std::unique_ptr<IWorkload> CreateSoftmax(const SoftmaxQueueDescriptor& descriptor,
+                                             const WorkloadInfo& info) const override;
+
+    std::unique_ptr<IWorkload> CreateSplitter(const SplitterQueueDescriptor& descriptor,
+                                              const WorkloadInfo& info) const override;
+
+    std::unique_ptr<IWorkload> CreateMerger(const MergerQueueDescriptor& descriptor,
+                                            const WorkloadInfo& info) const override;
+
+    std::unique_ptr<IWorkload> CreateFullyConnected(const FullyConnectedQueueDescriptor& descriptor,
                                                     const WorkloadInfo& info) const override;
 
-    virtual std::unique_ptr<IWorkload> CreateActivation(const ActivationQueueDescriptor& descriptor,
-                                                        const WorkloadInfo&              info) const override;
+    std::unique_ptr<IWorkload> CreatePooling2d(const Pooling2dQueueDescriptor& descriptor,
+                                               const WorkloadInfo& info) const override;
 
-    virtual std::unique_ptr<IWorkload> CreateSoftmax(const SoftmaxQueueDescriptor& descriptor,
-                                                     const WorkloadInfo&           info) const override;
+    std::unique_ptr<IWorkload> CreatePermute(const PermuteQueueDescriptor& descriptor,
+                                             const WorkloadInfo& info) const override;
 
-    virtual std::unique_ptr<IWorkload> CreateSplitter(const SplitterQueueDescriptor& descriptor,
-                                                      const WorkloadInfo&            info) const override;
-
-    virtual std::unique_ptr<IWorkload> CreateMerger(const MergerQueueDescriptor& descriptor,
-                                                    const WorkloadInfo&          info) const override;
-
-    virtual std::unique_ptr<IWorkload> CreateFullyConnected(const FullyConnectedQueueDescriptor& descriptor,
-                                                            const WorkloadInfo&                  info) const override;
-
-    virtual std::unique_ptr<IWorkload> CreatePooling2d(const Pooling2dQueueDescriptor& descriptor,
-                                                       const WorkloadInfo&           info) const override;
-
-    virtual std::unique_ptr<IWorkload> CreatePermute(const PermuteQueueDescriptor& descriptor,
-                                                     const WorkloadInfo&           info) const override;
-
-    virtual std::unique_ptr<IWorkload> CreateConvolution2d(const Convolution2dQueueDescriptor& descriptor,
-                                                           const WorkloadInfo&               info) const override;
-
-    virtual std::unique_ptr<IWorkload> CreateDepthwiseConvolution2d(
-        const DepthwiseConvolution2dQueueDescriptor& descriptor, const WorkloadInfo& info) const override;
-
-    virtual std::unique_ptr<IWorkload> CreateNormalization(const NormalizationQueueDescriptor& descriptor,
-                                                           const WorkloadInfo&                 info) const override;
-
-    virtual std::unique_ptr<IWorkload> CreateMultiplication(const MultiplicationQueueDescriptor& descriptor,
-                                                            const WorkloadInfo&                  info) const override;
-
-    virtual std::unique_ptr<IWorkload> CreateAddition(const AdditionQueueDescriptor& descriptor,
-                                                      const WorkloadInfo&            info) const override;
-
-    virtual std::unique_ptr<IWorkload> CreateBatchNormalization(const BatchNormalizationQueueDescriptor& descriptor,
-                                                                const WorkloadInfo& info) const override;
-
-    virtual std::unique_ptr<IWorkload> CreateMemCopy(const MemCopyQueueDescriptor& descriptor,
-                                                     const WorkloadInfo& info) const override;
-
-    virtual std::unique_ptr<IWorkload> CreateResizeBilinear(const ResizeBilinearQueueDescriptor& descriptor,
-                                                            const WorkloadInfo& info) const override;
-
-    virtual std::unique_ptr<IWorkload> CreateFakeQuantization(const FakeQuantizationQueueDescriptor& descriptor,
-                                                              const WorkloadInfo& info) const override;
-
-    virtual std::unique_ptr<IWorkload> CreateL2Normalization(const L2NormalizationQueueDescriptor& descriptor,
-                                                             const WorkloadInfo& info) const override;
-
-    virtual std::unique_ptr<IWorkload> CreateConstant(const ConstantQueueDescriptor& descriptor,
-                                                      const WorkloadInfo& info) const override;
-
-    virtual std::unique_ptr<IWorkload> CreateReshape(const ReshapeQueueDescriptor& descriptor,
-                                                     const WorkloadInfo& info) const override;
-
-    virtual std::unique_ptr<IWorkload> CreateSpaceToBatchNd(const SpaceToBatchNdQueueDescriptor& descriptor,
-                                                            const WorkloadInfo& info) const override;
-
-    virtual std::unique_ptr<IWorkload> CreateFloor(const FloorQueueDescriptor& descriptor,
+    std::unique_ptr<IWorkload> CreateConvolution2d(const Convolution2dQueueDescriptor& descriptor,
                                                    const WorkloadInfo& info) const override;
 
-    virtual std::unique_ptr<IWorkload> CreateLstm(const LstmQueueDescriptor& descriptor,
-                                                  const WorkloadInfo& info) const override;
+    std::unique_ptr<IWorkload> CreateDepthwiseConvolution2d(const DepthwiseConvolution2dQueueDescriptor& descriptor,
+                                                            const WorkloadInfo& info) const override;
 
-    virtual std::unique_ptr<IWorkload> CreateConvertFp16ToFp32(const ConvertFp16ToFp32QueueDescriptor& descriptor,
-                                                               const WorkloadInfo& info) const override;
+    std::unique_ptr<IWorkload> CreateNormalization(const NormalizationQueueDescriptor& descriptor,
+                                                   const WorkloadInfo& info) const override;
 
-    virtual std::unique_ptr<IWorkload> CreateConvertFp32ToFp16(const ConvertFp32ToFp16QueueDescriptor& descriptor,
-                                                               const WorkloadInfo& info) const override;
+    std::unique_ptr<IWorkload> CreateMultiplication(const MultiplicationQueueDescriptor& descriptor,
+                                                    const WorkloadInfo& info) const override;
 
-    virtual std::unique_ptr<IWorkload> CreateDivision(const DivisionQueueDescriptor& descriptor,
+    std::unique_ptr<IWorkload> CreateAddition(const AdditionQueueDescriptor& descriptor,
+                                              const WorkloadInfo& info) const override;
+
+    std::unique_ptr<IWorkload> CreateBatchNormalization(const BatchNormalizationQueueDescriptor& descriptor,
+                                                        const WorkloadInfo& info) const override;
+
+    std::unique_ptr<IWorkload> CreateMemCopy(const MemCopyQueueDescriptor& descriptor,
+                                             const WorkloadInfo& info) const override;
+
+    std::unique_ptr<IWorkload> CreateResizeBilinear(const ResizeBilinearQueueDescriptor& descriptor,
+                                                    const WorkloadInfo& info) const override;
+
+    std::unique_ptr<IWorkload> CreateFakeQuantization(const FakeQuantizationQueueDescriptor& descriptor,
                                                       const WorkloadInfo& info) const override;
 
-    virtual std::unique_ptr<IWorkload> CreateSubtraction(const SubtractionQueueDescriptor& descriptor,
-                                                         const WorkloadInfo& info) const override;
-
-    virtual std::unique_ptr<IWorkload> CreateMaximum(const MaximumQueueDescriptor& descriptor,
+    std::unique_ptr<IWorkload> CreateL2Normalization(const L2NormalizationQueueDescriptor& descriptor,
                                                      const WorkloadInfo& info) const override;
 
-    virtual std::unique_ptr<IWorkload> CreateMean(const MeanQueueDescriptor& descriptor,
-                                                  const WorkloadInfo& Info) const override;
+    std::unique_ptr<IWorkload> CreateConstant(const ConstantQueueDescriptor& descriptor,
+                                              const WorkloadInfo& info) const override;
 
-    virtual std::unique_ptr<IWorkload> CreatePad(const PadQueueDescriptor& descriptor,
+    std::unique_ptr<IWorkload> CreateReshape(const ReshapeQueueDescriptor& descriptor,
+                                             const WorkloadInfo& info) const override;
+
+    std::unique_ptr<IWorkload> CreateSpaceToBatchNd(const SpaceToBatchNdQueueDescriptor& descriptor,
+                                                    const WorkloadInfo& info) const override;
+
+    std::unique_ptr<IWorkload> CreateFloor(const FloorQueueDescriptor& descriptor,
+                                           const WorkloadInfo& info) const override;
+
+    std::unique_ptr<IWorkload> CreateLstm(const LstmQueueDescriptor& descriptor,
+                                          const WorkloadInfo& info) const override;
+
+    std::unique_ptr<IWorkload> CreateConvertFp16ToFp32(const ConvertFp16ToFp32QueueDescriptor& descriptor,
+                                                       const WorkloadInfo& info) const override;
+
+    std::unique_ptr<IWorkload> CreateConvertFp32ToFp16(const ConvertFp32ToFp16QueueDescriptor& descriptor,
+                                                       const WorkloadInfo& info) const override;
+
+    std::unique_ptr<IWorkload> CreateDivision(const DivisionQueueDescriptor& descriptor,
+                                              const WorkloadInfo& info) const override;
+
+    std::unique_ptr<IWorkload> CreateSubtraction(const SubtractionQueueDescriptor& descriptor,
                                                  const WorkloadInfo& info) const override;
 
-    virtual std::unique_ptr<IWorkload> CreateEqual(const EqualQueueDescriptor& descriptor,
-                                                   const WorkloadInfo& info) const override;
+    std::unique_ptr<IWorkload> CreateMaximum(const MaximumQueueDescriptor& descriptor,
+                                             const WorkloadInfo& info) const override;
 
-    virtual std::unique_ptr<IWorkload> CreateBatchToSpaceNd(const BatchToSpaceNdQueueDescriptor& descriptor,
-                                                            const WorkloadInfo& info) const override;
+    std::unique_ptr<IWorkload> CreateMean(const MeanQueueDescriptor& descriptor,
+                                          const WorkloadInfo& Info) const override;
 
-    virtual std::unique_ptr<IWorkload> CreateStridedSlice(const StridedSliceQueueDescriptor& descriptor,
-                                                          const WorkloadInfo& info) const override;
+    std::unique_ptr<IWorkload> CreatePad(const PadQueueDescriptor& descriptor,
+                                         const WorkloadInfo& info) const override;
 
-    virtual std::unique_ptr<IWorkload> CreateMinimum(const MinimumQueueDescriptor& descriptor,
-                                                     const WorkloadInfo& info) const override;
+    std::unique_ptr<IWorkload> CreateEqual(const EqualQueueDescriptor& descriptor,
+                                           const WorkloadInfo& info) const override;
 
-    virtual std::unique_ptr<IWorkload> CreateGreater(const GreaterQueueDescriptor& descriptor,
-                                                     const WorkloadInfo& info) const override;
-
-    virtual std::unique_ptr<IWorkload> CreateDebug(const DebugQueueDescriptor& descriptor,
-                                                   const WorkloadInfo& info) const override;
-
-    virtual std::unique_ptr<IWorkload> CreateRsqrt(const RsqrtQueueDescriptor& descriptor,
-                                                   const WorkloadInfo& info) const override;
-
-    virtual std::unique_ptr<IWorkload> CreatePreCompiled(const PreCompiledQueueDescriptor& descriptor,
-                                                         const WorkloadInfo& info) const override;
-
-    virtual std::unique_ptr<IWorkload> CreateGather(const GatherQueueDescriptor& descriptor,
+    std::unique_ptr<IWorkload> CreateBatchToSpaceNd(const BatchToSpaceNdQueueDescriptor& descriptor,
                                                     const WorkloadInfo& info) const override;
 
+    std::unique_ptr<IWorkload> CreateStridedSlice(const StridedSliceQueueDescriptor& descriptor,
+                                                  const WorkloadInfo& info) const override;
+
+    std::unique_ptr<IWorkload> CreateMinimum(const MinimumQueueDescriptor& descriptor,
+                                             const WorkloadInfo& info) const override;
+
+    std::unique_ptr<IWorkload> CreateGreater(const GreaterQueueDescriptor& descriptor,
+                                             const WorkloadInfo& info) const override;
+
+    std::unique_ptr<IWorkload> CreateDebug(const DebugQueueDescriptor& descriptor,
+                                           const WorkloadInfo& info) const override;
+
+    std::unique_ptr<IWorkload> CreateRsqrt(const RsqrtQueueDescriptor& descriptor,
+                                           const WorkloadInfo& info) const override;
+
+    std::unique_ptr<IWorkload> CreatePreCompiled(const PreCompiledQueueDescriptor& descriptor,
+                                                 const WorkloadInfo& info) const override;
+
+    std::unique_ptr<IWorkload> CreateGather(const GatherQueueDescriptor& descriptor,
+                                            const WorkloadInfo& info) const override;
+
 private:
 
     template <typename F32Workload, typename U8Workload, typename QueueDescriptorType>