IVGCVSW-5157 'Pipe ModelOption through Network::LoadNetwork() to Workload factory'

* Pass ModelOptions to WorkloadFactory
* Updated signature of CL and NEON Convolution2d workloads added FastMathEnabled param.


Signed-off-by: Sadik Armagan <sadik.armagan@arm.com>
Change-Id: I536178be8e4dd4083489e69febadaf0feeba46d2
diff --git a/include/armnn/backends/IBackendInternal.hpp b/include/armnn/backends/IBackendInternal.hpp
index ee9cb49..4815529 100644
--- a/include/armnn/backends/IBackendInternal.hpp
+++ b/include/armnn/backends/IBackendInternal.hpp
@@ -118,6 +118,14 @@
     virtual IWorkloadFactoryPtr CreateWorkloadFactory(
         class TensorHandleFactoryRegistry& tensorHandleFactoryRegistry) const;
 
+    virtual IWorkloadFactoryPtr CreateWorkloadFactory(
+        const IMemoryManagerSharedPtr& memoryManager,
+        const ModelOptions& modelOptions) const;
+
+    virtual IWorkloadFactoryPtr CreateWorkloadFactory(
+        class TensorHandleFactoryRegistry& tensorHandleFactoryRegistry,
+        const ModelOptions& modelOptions) const;
+
     /// Create the runtime context of the backend
     ///
     /// Implementations may return a default-constructed IBackendContextPtr if
diff --git a/src/armnn/LoadedNetwork.cpp b/src/armnn/LoadedNetwork.cpp
index 4a293b9..7b64a88 100644
--- a/src/armnn/LoadedNetwork.cpp
+++ b/src/armnn/LoadedNetwork.cpp
@@ -146,14 +146,16 @@
 
             if (backend->SupportsTensorAllocatorAPI())
             {
-                auto workloadFactory = backend->CreateWorkloadFactory(m_TensorHandleFactoryRegistry);
+                auto workloadFactory = backend->CreateWorkloadFactory(
+                    m_TensorHandleFactoryRegistry, m_OptimizedNetwork->GetModelOptions());
                 m_WorkloadFactories.emplace(
                     std::make_pair(backendId, std::make_pair(std::move(workloadFactory), nullptr)));
             }
             else
             {
                 IBackendInternal::IMemoryManagerSharedPtr memoryManager = backend->CreateMemoryManager();
-                auto workloadFactory = backend->CreateWorkloadFactory(memoryManager);
+                auto workloadFactory = backend->CreateWorkloadFactory(
+                    memoryManager, m_OptimizedNetwork->GetModelOptions());
 
                 m_WorkloadFactories.emplace(
                     std::make_pair(backendId, std::make_pair(std::move(workloadFactory), memoryManager)));
@@ -361,7 +363,10 @@
     ARMNN_ASSERT_MSG(workloadFactory, "No workload factory");
 
     std::string reasonIfUnsupported;
-    ARMNN_ASSERT_MSG(IWorkloadFactory::IsLayerSupported(layer, {}, reasonIfUnsupported),
+    ARMNN_ASSERT_MSG(IWorkloadFactory::IsLayerSupported(layer,
+                                                        {},
+                                                        reasonIfUnsupported,
+                                                        m_OptimizedNetwork->GetModelOptions()),
         "Factory does not support layer");
     IgnoreUnused(reasonIfUnsupported);
     return *workloadFactory;
diff --git a/src/armnn/test/CreateWorkload.hpp b/src/armnn/test/CreateWorkload.hpp
index fe73550..b13b9b5 100644
--- a/src/armnn/test/CreateWorkload.hpp
+++ b/src/armnn/test/CreateWorkload.hpp
@@ -32,14 +32,16 @@
 
 // Calls CreateWorkload for a layer, and checks the returned pointer is of the correct type.
 template<typename Workload>
-std::unique_ptr<Workload> MakeAndCheckWorkload(Layer& layer, const IWorkloadFactory& factory)
+std::unique_ptr<Workload> MakeAndCheckWorkload(Layer& layer,
+                                               const IWorkloadFactory& factory,
+                                               const ModelOptions& modelOptions = {})
 {
     std::unique_ptr<IWorkload> workload = layer.CreateWorkload(factory);
     BOOST_TEST(workload.get() == PolymorphicDowncast<Workload*>(workload.get()),
                "Cannot convert to derived class");
     std::string reasonIfUnsupported;
     layer.SetBackendId(factory.GetBackendId());
-    BOOST_TEST(factory.IsLayerSupported(layer, layer.GetDataType(), reasonIfUnsupported));
+    BOOST_TEST(factory.IsLayerSupported(layer, layer.GetDataType(), reasonIfUnsupported, modelOptions));
     return std::unique_ptr<Workload>(static_cast<Workload*>(workload.release()));
 }
 
@@ -220,7 +222,8 @@
 template <typename Convolution2dWorkload, armnn::DataType DataType>
 std::unique_ptr<Convolution2dWorkload> CreateConvolution2dWorkloadTest(armnn::IWorkloadFactory& factory,
                                                                        armnn::Graph&            graph,
-                                                                       DataLayout dataLayout = DataLayout::NCHW)
+                                                                       DataLayout dataLayout = DataLayout::NCHW,
+                                                                       const ModelOptions& modelOptions = {})
 {
     // Creates the layer we're testing.
     Convolution2dDescriptor layerDesc;
@@ -255,7 +258,7 @@
     CreateTensorHandles(graph, factory);
 
     // Makes the workload and checks it.
-    auto workload = MakeAndCheckWorkload<Convolution2dWorkload>(*layer, factory);
+    auto workload = MakeAndCheckWorkload<Convolution2dWorkload>(*layer, factory, modelOptions);
 
     Convolution2dQueueDescriptor queueDescriptor = workload->GetData();
     BOOST_TEST(queueDescriptor.m_Parameters.m_StrideX == 2);
diff --git a/src/backends/backendsCommon/IBackendInternal.cpp b/src/backends/backendsCommon/IBackendInternal.cpp
index 1cca61e..0806033 100644
--- a/src/backends/backendsCommon/IBackendInternal.cpp
+++ b/src/backends/backendsCommon/IBackendInternal.cpp
@@ -39,6 +39,28 @@
     return IWorkloadFactoryPtr{};
 }
 
+IBackendInternal::IWorkloadFactoryPtr IBackendInternal::CreateWorkloadFactory(
+    const IMemoryManagerSharedPtr& memoryManager,
+    const ModelOptions& modelOptions) const
+{
+    if(modelOptions.empty())
+    {
+        return CreateWorkloadFactory(memoryManager);
+    }
+    return IWorkloadFactoryPtr{};
+}
+
+IBackendInternal::IWorkloadFactoryPtr IBackendInternal::CreateWorkloadFactory(
+    class TensorHandleFactoryRegistry& tensorHandleFactoryRegistry,
+    const ModelOptions& modelOptions) const
+{
+    if(modelOptions.empty())
+    {
+        return CreateWorkloadFactory(tensorHandleFactoryRegistry);
+    }
+    return IWorkloadFactoryPtr{};
+}
+
 IBackendInternal::IBackendContextPtr IBackendInternal::CreateBackendContext(const IRuntime::CreationOptions&) const
 {
     return IBackendContextPtr{};
diff --git a/src/backends/backendsCommon/WorkloadFactory.cpp b/src/backends/backendsCommon/WorkloadFactory.cpp
index 0bafda2..54a4157 100644
--- a/src/backends/backendsCommon/WorkloadFactory.cpp
+++ b/src/backends/backendsCommon/WorkloadFactory.cpp
@@ -1243,6 +1243,19 @@
                                          modelOptions);
 }
 
+bool IWorkloadFactory::IsLayerSupported(const BackendId& backendId,
+                                        const IConnectableLayer& connectableLayer,
+                                        Optional<DataType> dataType,
+                                        std::string& outReasonIfUnsupported,
+                                        const ModelOptions& modelOptions)
+{
+    return IsLayerConfigurationSupported(backendId,
+                                         connectableLayer,
+                                         dataType,
+                                         outReasonIfUnsupported,
+                                         modelOptions);
+}
+
 // Default Implementations
 std::unique_ptr<IWorkload> IWorkloadFactory::CreateAbs(const AbsQueueDescriptor& /*descriptor*/,
                                                        const WorkloadInfo& /*info*/) const
diff --git a/src/backends/backendsCommon/WorkloadFactory.hpp b/src/backends/backendsCommon/WorkloadFactory.hpp
index 68f9da6..5096c3b 100644
--- a/src/backends/backendsCommon/WorkloadFactory.hpp
+++ b/src/backends/backendsCommon/WorkloadFactory.hpp
@@ -39,6 +39,12 @@
                                  std::string& outReasonIfUnsupported,
                                  const ModelOptions& modelOptions);
 
+    static bool IsLayerSupported(const BackendId& backendId,
+                                 const IConnectableLayer& layer,
+                                 Optional<DataType> dataType,
+                                 std::string& outReasonIfUnsupported,
+                                 const ModelOptions& modelOptions);
+
     virtual bool SupportsSubTensors() const = 0;
 
     ARMNN_DEPRECATED_MSG("Use ITensorHandleFactory::CreateSubTensorHandle instead")
diff --git a/src/backends/cl/ClBackend.cpp b/src/backends/cl/ClBackend.cpp
index 49636d9..6254b0a 100644
--- a/src/backends/cl/ClBackend.cpp
+++ b/src/backends/cl/ClBackend.cpp
@@ -46,6 +46,13 @@
 }
 
 IBackendInternal::IWorkloadFactoryPtr ClBackend::CreateWorkloadFactory(
+    const IBackendInternal::IMemoryManagerSharedPtr& memoryManager, const ModelOptions& modelOptions) const
+{
+    return std::make_unique<ClWorkloadFactory>(
+        PolymorphicPointerDowncast<ClMemoryManager>(memoryManager), CreateBackendSpecificModelContext(modelOptions));
+}
+
+IBackendInternal::IWorkloadFactoryPtr ClBackend::CreateWorkloadFactory(
     TensorHandleFactoryRegistry& registry) const
 {
     auto memoryManager = std::make_shared<ClMemoryManager>(std::make_unique<arm_compute::CLBufferAllocator>());
@@ -57,6 +64,18 @@
             PolymorphicPointerDowncast<ClMemoryManager>(memoryManager));
 }
 
+IBackendInternal::IWorkloadFactoryPtr ClBackend::CreateWorkloadFactory(
+    TensorHandleFactoryRegistry& registry, const ModelOptions& modelOptions) const
+{
+    auto memoryManager = std::make_shared<ClMemoryManager>(std::make_unique<arm_compute::CLBufferAllocator>());
+
+    registry.RegisterMemoryManager(memoryManager);
+    registry.RegisterFactory(std::make_unique<ClTensorHandleFactory>(memoryManager));
+
+    return std::make_unique<ClWorkloadFactory>(
+        PolymorphicPointerDowncast<ClMemoryManager>(memoryManager), CreateBackendSpecificModelContext(modelOptions));
+}
+
 std::vector<ITensorHandleFactory::FactoryId> ClBackend::GetHandleFactoryPreferences() const
 {
     return std::vector<ITensorHandleFactory::FactoryId> {ClTensorHandleFactory::GetIdStatic()};
diff --git a/src/backends/cl/ClBackend.hpp b/src/backends/cl/ClBackend.hpp
index 108124c..af5534e 100644
--- a/src/backends/cl/ClBackend.hpp
+++ b/src/backends/cl/ClBackend.hpp
@@ -26,6 +26,12 @@
     IBackendInternal::IWorkloadFactoryPtr CreateWorkloadFactory(
         TensorHandleFactoryRegistry& registry) const override;
 
+    IWorkloadFactoryPtr CreateWorkloadFactory( const IMemoryManagerSharedPtr& memoryManager,
+                                               const ModelOptions& modelOptions) const override;
+
+    IWorkloadFactoryPtr CreateWorkloadFactory(class TensorHandleFactoryRegistry& tensorHandleFactoryRegistry,
+                                              const ModelOptions& modelOptions) const override;
+
     std::vector<ITensorHandleFactory::FactoryId> GetHandleFactoryPreferences() const override;
 
     void RegisterTensorHandleFactories(TensorHandleFactoryRegistry& registry) override;
diff --git a/src/backends/cl/ClWorkloadFactory.cpp b/src/backends/cl/ClWorkloadFactory.cpp
index 58e17df..4acfa57 100644
--- a/src/backends/cl/ClWorkloadFactory.cpp
+++ b/src/backends/cl/ClWorkloadFactory.cpp
@@ -4,6 +4,7 @@
 //
 #include "ClWorkloadFactory.hpp"
 #include "ClBackendId.hpp"
+#include "ClBackendModelContext.hpp"
 
 #include <Layer.hpp>
 
@@ -42,6 +43,14 @@
     return IWorkloadFactory::IsLayerSupported(s_Id, layer, dataType, outReasonIfUnsupported);
 }
 
+bool ClWorkloadFactory::IsLayerSupported(const IConnectableLayer& layer,
+                                         Optional<DataType> dataType,
+                                         std::string& outReasonIfUnsupported,
+                                         const ModelOptions& modelOptions)
+{
+    return IWorkloadFactory::IsLayerSupported(s_Id, layer, dataType, outReasonIfUnsupported, modelOptions);
+}
+
 const BackendId& ClWorkloadFactory::GetBackendId() const
 {
     return s_Id;
@@ -78,7 +87,13 @@
 }
 
 ClWorkloadFactory::ClWorkloadFactory(const std::shared_ptr<ClMemoryManager>& memoryManager)
-    : m_MemoryManager(memoryManager)
+    : m_MemoryManager(memoryManager), m_ModelContextPtr(IBackendInternal::IBackendSpecificModelContextPtr{})
+{
+}
+
+ClWorkloadFactory::ClWorkloadFactory(const std::shared_ptr<ClMemoryManager>& memoryManager,
+                                     const IBackendInternal::IBackendSpecificModelContextPtr& modelContextPtr)
+    : m_MemoryManager(memoryManager), m_ModelContextPtr(modelContextPtr)
 {
 }
 
@@ -205,7 +220,22 @@
 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateConvolution2d(const Convolution2dQueueDescriptor& descriptor,
                                                                   const WorkloadInfo& info) const
 {
-    return MakeWorkload<ClConvolution2dWorkload>(descriptor, info, m_MemoryManager->GetIntraLayerManager());
+    bool isFastMathEnabled = false;
+    if (m_ModelContextPtr)
+    {
+        if (m_ModelContextPtr.get() != nullptr)
+        {
+            auto modelOptions = dynamic_cast<ClBackendModelContext*>(m_ModelContextPtr.get());
+            if (modelOptions)
+            {
+                isFastMathEnabled = modelOptions->IsFastMathEnabled();
+            }
+        }
+    }
+    return MakeWorkload<ClConvolution2dWorkload>(descriptor,
+                                                 info,
+                                                 m_MemoryManager->GetIntraLayerManager(),
+                                                 isFastMathEnabled);
 }
 
 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateDebug(const DebugQueueDescriptor& descriptor,
diff --git a/src/backends/cl/ClWorkloadFactory.hpp b/src/backends/cl/ClWorkloadFactory.hpp
index 80cd7c5..fad5dd0 100644
--- a/src/backends/cl/ClWorkloadFactory.hpp
+++ b/src/backends/cl/ClWorkloadFactory.hpp
@@ -7,6 +7,8 @@
 #include <armnn/IRuntime.hpp>
 #include <armnn/Optional.hpp>
 
+#include <armnn/backends/IBackendInternal.hpp>
+
 #include <backendsCommon/WorkloadFactoryBase.hpp>
 #include <aclCommon/BaseMemoryManager.hpp>
 
@@ -19,12 +21,20 @@
 public:
     ClWorkloadFactory(const std::shared_ptr<ClMemoryManager>& memoryManager);
 
+    ClWorkloadFactory(const std::shared_ptr<ClMemoryManager>& memoryManager,
+                      const IBackendInternal::IBackendSpecificModelContextPtr& modelContextPtr);
+
     const BackendId& GetBackendId() const override;
 
     static bool IsLayerSupported(const Layer& layer,
                                  Optional<DataType> dataType,
                                  std::string& outReasonIfUnsupported);
 
+    static bool IsLayerSupported(const IConnectableLayer& layer,
+                                 Optional<DataType> dataType,
+                                 std::string& outReasonIfUnsupported,
+                                 const ModelOptions& modelOptions);
+
     bool SupportsSubTensors() const override { return true; }
 
     ARMNN_DEPRECATED_MSG("Use ITensorHandleFactory::CreateSubTensorHandle instead")
@@ -242,6 +252,7 @@
                                                    Args&&... args);
 
     mutable std::shared_ptr<ClMemoryManager> m_MemoryManager;
+    const IBackendInternal::IBackendSpecificModelContextPtr m_ModelContextPtr;
 };
 
 } // namespace armnn
diff --git a/src/backends/cl/test/ClCreateWorkloadTests.cpp b/src/backends/cl/test/ClCreateWorkloadTests.cpp
index 1dd0abe..fc5ccfe 100644
--- a/src/backends/cl/test/ClCreateWorkloadTests.cpp
+++ b/src/backends/cl/test/ClCreateWorkloadTests.cpp
@@ -6,6 +6,8 @@
 #include "ClContextControlFixture.hpp"
 #include "ClWorkloadFactoryHelper.hpp"
 
+#include <armnn/utility/Assert.hpp>
+#include <armnn/utility/IgnoreUnused.hpp>
 #include <armnn/utility/PolymorphicDowncast.hpp>
 #include <backendsCommon/MemCopyWorkload.hpp>
 
@@ -304,6 +306,35 @@
     ClConvolution2dWorkloadTest<ClConvolution2dWorkload, armnn::DataType::Float16>(DataLayout::NHWC);
 }
 
+BOOST_AUTO_TEST_CASE(CreateConvolution2dFastMathEnabledWorkload)
+{
+    Graph graph;
+
+    using ModelOptions = std::vector<BackendOptions>;
+    ModelOptions modelOptions = {};
+    BackendOptions gpuAcc("GpuAcc",
+    {
+        { "FastMathEnabled", true }
+    });
+    modelOptions.push_back(gpuAcc);
+
+    ClWorkloadFactory factory =
+        ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager(), modelOptions);
+
+    auto workload =
+        CreateConvolution2dWorkloadTest<ClConvolution2dWorkload, armnn::DataType::Float32>(factory,
+                                                                                           graph,
+                                                                                           DataLayout::NCHW,
+                                                                                           modelOptions);
+
+    ARMNN_ASSERT(workload != nullptr);
+    auto conv2dWorkload = PolymorphicDowncast<ClConvolution2dWorkload*>(workload.get());
+    IgnoreUnused(conv2dWorkload);
+    ARMNN_ASSERT(conv2dWorkload != nullptr);
+    // fast_math enabled but configuration does not match with WINOGRAD
+    ARMNN_ASSERT(conv2dWorkload->GetConvolutionMethod() == arm_compute::ConvolutionMethod::GEMM);
+}
+
 template <typename DepthwiseConvolutionWorkloadType, typename armnn::DataType DataType>
 static void ClDepthwiseConvolutionWorkloadTest(DataLayout dataLayout)
 {
diff --git a/src/backends/cl/test/ClWorkloadFactoryHelper.hpp b/src/backends/cl/test/ClWorkloadFactoryHelper.hpp
index 6e3c6fc..f7f1629 100644
--- a/src/backends/cl/test/ClWorkloadFactoryHelper.hpp
+++ b/src/backends/cl/test/ClWorkloadFactoryHelper.hpp
@@ -27,9 +27,12 @@
     }
 
     static armnn::ClWorkloadFactory GetFactory(
-        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+        const armnn::ModelOptions& modelOptions = {})
     {
-        return armnn::ClWorkloadFactory(armnn::PolymorphicPointerDowncast<armnn::ClMemoryManager>(memoryManager));
+        armnn::ClBackend backend;
+        return armnn::ClWorkloadFactory(armnn::PolymorphicPointerDowncast<armnn::ClMemoryManager>(memoryManager),
+                                        backend.CreateBackendSpecificModelContext(modelOptions));
     }
 
     static armnn::ClTensorHandleFactory GetTensorHandleFactory(
diff --git a/src/backends/cl/workloads/ClConvolution2dWorkload.cpp b/src/backends/cl/workloads/ClConvolution2dWorkload.cpp
index 42c9903..7b52f27 100644
--- a/src/backends/cl/workloads/ClConvolution2dWorkload.cpp
+++ b/src/backends/cl/workloads/ClConvolution2dWorkload.cpp
@@ -59,7 +59,9 @@
 }
 
 ClConvolution2dWorkload::ClConvolution2dWorkload(const Convolution2dQueueDescriptor& descriptor,
-    const WorkloadInfo& info, std::shared_ptr<arm_compute::MemoryManagerOnDemand>& memoryManager)
+                                                 const WorkloadInfo& info,
+                                                 std::shared_ptr<arm_compute::MemoryManagerOnDemand>& memoryManager,
+                                                 const bool isFastMathEnabled)
     : BaseWorkload<Convolution2dQueueDescriptor>(descriptor, info)
     , m_ConvolutionLayer(memoryManager)
 {
@@ -95,7 +97,20 @@
                                  &output,
                                  padStrideInfo,
                                  arm_compute::WeightsInfo(),
-                                 aclDilationInfo);
+                                 aclDilationInfo,
+                                 arm_compute::ActivationLayerInfo(),
+                                 isFastMathEnabled);
+
+    m_ConvolutionMethod =
+        m_ConvolutionLayer.get_convolution_method(input.info(),
+                                                  m_KernelTensor->info(),
+                                                  output.info(),
+                                                  padStrideInfo,
+                                                  arm_compute::WeightsInfo(),
+                                                  arm_compute::ActivationLayerInfo(),
+                                                  arm_compute::CLScheduler::get().target(),
+                                                  aclDilationInfo,
+                                                  isFastMathEnabled);
 
     InitializeArmComputeClTensorData(*m_KernelTensor, m_Data.m_Weight);
 
@@ -116,6 +131,11 @@
     RunClFunction(m_ConvolutionLayer, CHECK_LOCATION());
 }
 
+arm_compute::ConvolutionMethod ClConvolution2dWorkload::GetConvolutionMethod() const
+{
+    return m_ConvolutionMethod;
+}
+
 void ClConvolution2dWorkload::FreeUnusedTensors()
 {
     FreeTensorIfUnused(m_KernelTensor);
diff --git a/src/backends/cl/workloads/ClConvolution2dWorkload.hpp b/src/backends/cl/workloads/ClConvolution2dWorkload.hpp
index 8b0afad..f769422 100644
--- a/src/backends/cl/workloads/ClConvolution2dWorkload.hpp
+++ b/src/backends/cl/workloads/ClConvolution2dWorkload.hpp
@@ -28,16 +28,22 @@
 class ClConvolution2dWorkload : public BaseWorkload<Convolution2dQueueDescriptor>
 {
 public:
-    ClConvolution2dWorkload(const Convolution2dQueueDescriptor& descriptor, const WorkloadInfo& info,
-                            std::shared_ptr<arm_compute::MemoryManagerOnDemand>& memoryManager);
+    ClConvolution2dWorkload(const Convolution2dQueueDescriptor& descriptor,
+                            const WorkloadInfo& info,
+                            std::shared_ptr<arm_compute::MemoryManagerOnDemand>& memoryManager,
+                            const bool isFastMathEnabled = false);
     void Execute() const override;
 
+    arm_compute::ConvolutionMethod GetConvolutionMethod() const;
+
 private:
     mutable arm_compute::CLConvolutionLayer m_ConvolutionLayer;
 
     std::unique_ptr<arm_compute::CLTensor> m_KernelTensor;
     std::unique_ptr<arm_compute::CLTensor> m_BiasTensor;
 
+    arm_compute::ConvolutionMethod m_ConvolutionMethod;
+
     void FreeUnusedTensors();
 };
 
diff --git a/src/backends/neon/NeonBackend.cpp b/src/backends/neon/NeonBackend.cpp
index 31e08ce..d300960 100644
--- a/src/backends/neon/NeonBackend.cpp
+++ b/src/backends/neon/NeonBackend.cpp
@@ -48,6 +48,13 @@
 }
 
 IBackendInternal::IWorkloadFactoryPtr NeonBackend::CreateWorkloadFactory(
+    const IBackendInternal::IMemoryManagerSharedPtr& memoryManager, const ModelOptions& modelOptions) const
+{
+    return std::make_unique<NeonWorkloadFactory>(
+        PolymorphicPointerDowncast<NeonMemoryManager>(memoryManager), CreateBackendSpecificModelContext(modelOptions));
+}
+
+IBackendInternal::IWorkloadFactoryPtr NeonBackend::CreateWorkloadFactory(
     class TensorHandleFactoryRegistry& tensorHandleFactoryRegistry) const
 {
     auto memoryManager = std::make_shared<NeonMemoryManager>(std::make_unique<arm_compute::Allocator>(),
@@ -60,6 +67,19 @@
         PolymorphicPointerDowncast<NeonMemoryManager>(memoryManager));
 }
 
+IBackendInternal::IWorkloadFactoryPtr NeonBackend::CreateWorkloadFactory(
+    TensorHandleFactoryRegistry& tensorHandleFactoryRegistry, const ModelOptions& modelOptions) const
+{
+    auto memoryManager = std::make_shared<NeonMemoryManager>(std::make_unique<arm_compute::Allocator>(),
+                                                             BaseMemoryManager::MemoryAffinity::Offset);
+
+    tensorHandleFactoryRegistry.RegisterMemoryManager(memoryManager);
+    tensorHandleFactoryRegistry.RegisterFactory(std::make_unique<NeonTensorHandleFactory>(memoryManager));
+
+    return std::make_unique<NeonWorkloadFactory>(
+        PolymorphicPointerDowncast<NeonMemoryManager>(memoryManager), CreateBackendSpecificModelContext(modelOptions));
+}
+
 IBackendInternal::IBackendContextPtr NeonBackend::CreateBackendContext(const IRuntime::CreationOptions&) const
 {
     return IBackendContextPtr{};
diff --git a/src/backends/neon/NeonBackend.hpp b/src/backends/neon/NeonBackend.hpp
index 6458ecc..42c6666 100644
--- a/src/backends/neon/NeonBackend.hpp
+++ b/src/backends/neon/NeonBackend.hpp
@@ -26,6 +26,12 @@
     IWorkloadFactoryPtr CreateWorkloadFactory(
         class TensorHandleFactoryRegistry& tensorHandleFactoryRegistry) const override;
 
+    IWorkloadFactoryPtr CreateWorkloadFactory( const IMemoryManagerSharedPtr& memoryManager,
+                                               const ModelOptions& modelOptions) const override;
+
+    IWorkloadFactoryPtr CreateWorkloadFactory(class TensorHandleFactoryRegistry& tensorHandleFactoryRegistry,
+                                              const ModelOptions& modelOptions) const override;
+
     IBackendInternal::IBackendContextPtr CreateBackendContext(const IRuntime::CreationOptions&) const override;
     IBackendInternal::IBackendProfilingContextPtr CreateBackendProfilingContext(
         const IRuntime::CreationOptions&, IBackendProfilingPtr& backendProfiling) override;
diff --git a/src/backends/neon/NeonLayerSupport.cpp b/src/backends/neon/NeonLayerSupport.cpp
index 853a518..0084dbd 100644
--- a/src/backends/neon/NeonLayerSupport.cpp
+++ b/src/backends/neon/NeonLayerSupport.cpp
@@ -329,7 +329,7 @@
     {
         if (m_ModelContextPtr.get() != nullptr)
         {
-            auto modelOptions = armnn::PolymorphicDowncast<NeonBackendModelContext*>(m_ModelContextPtr.get());
+            auto modelOptions = dynamic_cast<NeonBackendModelContext*>(m_ModelContextPtr.get());
             if (modelOptions)
             {
                 isFastMathEnabled = modelOptions->IsFastMathEnabled();
diff --git a/src/backends/neon/NeonWorkloadFactory.cpp b/src/backends/neon/NeonWorkloadFactory.cpp
index 40010fe..928989b 100644
--- a/src/backends/neon/NeonWorkloadFactory.cpp
+++ b/src/backends/neon/NeonWorkloadFactory.cpp
@@ -4,6 +4,7 @@
 //
 
 #include "NeonBackendId.hpp"
+#include "NeonBackendModelContext.hpp"
 #include "NeonTensorHandle.hpp"
 #include "NeonWorkloadFactory.hpp"
 
@@ -36,13 +37,27 @@
     return IWorkloadFactory::IsLayerSupported(s_Id, layer, dataType, outReasonIfUnsupported);
 }
 
+bool NeonWorkloadFactory::IsLayerSupported(const IConnectableLayer& layer,
+                                           Optional<DataType> dataType,
+                                           std::string& outReasonIfUnsupported,
+                                           const ModelOptions& modelOptions)
+{
+    return IWorkloadFactory::IsLayerSupported(s_Id, layer, dataType, outReasonIfUnsupported, modelOptions);
+}
+
 const BackendId& NeonWorkloadFactory::GetBackendId() const
 {
     return s_Id;
 }
 
 NeonWorkloadFactory::NeonWorkloadFactory(const std::shared_ptr<NeonMemoryManager>& memoryManager)
-    : m_MemoryManager(memoryManager)
+    : m_MemoryManager(memoryManager), m_ModelContextPtr(IBackendInternal::IBackendSpecificModelContextPtr{})
+{
+}
+
+NeonWorkloadFactory::NeonWorkloadFactory(const std::shared_ptr<NeonMemoryManager>& memoryManager,
+                                         const IBackendInternal::IBackendSpecificModelContextPtr& modelContextPtr)
+    : m_MemoryManager(memoryManager), m_ModelContextPtr(modelContextPtr)
 {
 }
 
@@ -184,8 +199,22 @@
 std::unique_ptr<armnn::IWorkload> NeonWorkloadFactory::CreateConvolution2d(
     const Convolution2dQueueDescriptor& descriptor, const WorkloadInfo& info) const
 {
-    return std::make_unique<NeonConvolution2dWorkload>(descriptor, info,
-                                                       m_MemoryManager->GetIntraLayerManager());
+    bool isFastMathEnabled = false;
+    if (m_ModelContextPtr)
+    {
+        if (m_ModelContextPtr.get() != nullptr)
+        {
+            auto modelOptions = dynamic_cast<NeonBackendModelContext*>(m_ModelContextPtr.get());
+            if (modelOptions)
+            {
+                isFastMathEnabled = modelOptions->IsFastMathEnabled();
+            }
+        }
+    }
+    return std::make_unique<NeonConvolution2dWorkload>(descriptor,
+                                                       info,
+                                                       m_MemoryManager->GetIntraLayerManager(),
+                                                       isFastMathEnabled);
 }
 
 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateDebug(const DebugQueueDescriptor& descriptor,
diff --git a/src/backends/neon/NeonWorkloadFactory.hpp b/src/backends/neon/NeonWorkloadFactory.hpp
index 09ae839..6a514e2 100644
--- a/src/backends/neon/NeonWorkloadFactory.hpp
+++ b/src/backends/neon/NeonWorkloadFactory.hpp
@@ -5,6 +5,7 @@
 #pragma once
 
 #include <armnn/Optional.hpp>
+#include <armnn/backends/IBackendInternal.hpp>
 
 #include <backendsCommon/WorkloadFactoryBase.hpp>
 #include <aclCommon/BaseMemoryManager.hpp>
@@ -19,12 +20,20 @@
 public:
     NeonWorkloadFactory(const std::shared_ptr<NeonMemoryManager>& memoryManager);
 
+    NeonWorkloadFactory(const std::shared_ptr<NeonMemoryManager>& memoryManager,
+                        const IBackendInternal::IBackendSpecificModelContextPtr& modelContextPtr);
+
     const BackendId& GetBackendId() const override;
 
     static bool IsLayerSupported(const Layer& layer,
                                  Optional<DataType> dataType,
                                  std::string& outReasonIfUnsupported);
 
+    static bool IsLayerSupported(const IConnectableLayer& layer,
+                                 Optional<DataType> dataType,
+                                 std::string& outReasonIfUnsupported,
+                                 const ModelOptions& modelOptions);
+
     bool SupportsSubTensors() const override { return true; }
 
     ARMNN_DEPRECATED_MSG("Use ITensorHandleFactory::CreateSubTensorHandle instead")
@@ -238,6 +247,7 @@
 
 private:
     mutable std::shared_ptr<NeonMemoryManager> m_MemoryManager;
+    const IBackendInternal::IBackendSpecificModelContextPtr m_ModelContextPtr;
 };
 
 } // namespace armnn
diff --git a/src/backends/neon/test/NeonCreateWorkloadTests.cpp b/src/backends/neon/test/NeonCreateWorkloadTests.cpp
index 37d026f..99ff9ae 100644
--- a/src/backends/neon/test/NeonCreateWorkloadTests.cpp
+++ b/src/backends/neon/test/NeonCreateWorkloadTests.cpp
@@ -6,6 +6,8 @@
 #include "NeonWorkloadFactoryHelper.hpp"
 
 #include <aclCommon/ArmComputeTensorUtils.hpp>
+#include <armnn/utility/Assert.hpp>
+#include <armnn/utility/IgnoreUnused.hpp>
 #include <armnn/utility/PolymorphicDowncast.hpp>
 #include <backendsCommon/MemCopyWorkload.hpp>
 
@@ -276,6 +278,33 @@
     NeonCreateConvolution2dWorkloadTest<DataType::Float32>(DataLayout::NHWC);
 }
 
+BOOST_AUTO_TEST_CASE(CreateConvolution2dFastMathEnabledWorkload)
+{
+    Graph graph;
+    using ModelOptions = std::vector<BackendOptions>;
+    ModelOptions modelOptions = {};
+    BackendOptions cpuAcc("CpuAcc",
+    {
+        { "FastMathEnabled", true }
+    });
+    modelOptions.push_back(cpuAcc);
+    NeonWorkloadFactory factory =
+        NeonWorkloadFactoryHelper::GetFactory(NeonWorkloadFactoryHelper::GetMemoryManager(), modelOptions);
+
+    auto workload =
+        CreateConvolution2dWorkloadTest<NeonConvolution2dWorkload, armnn::DataType::Float32>(factory,
+                                                                                             graph,
+                                                                                             DataLayout::NCHW,
+                                                                                             modelOptions);
+
+    ARMNN_ASSERT(workload != nullptr);
+    auto conv2dWorkload = PolymorphicDowncast<NeonConvolution2dWorkload*>(workload.get());
+    IgnoreUnused(conv2dWorkload);
+    ARMNN_ASSERT(conv2dWorkload != nullptr);
+    // fast_math enabled but configuration does not match with WINOGRAD
+    ARMNN_ASSERT(conv2dWorkload->GetConvolutionMethod() == arm_compute::ConvolutionMethod::GEMM);
+}
+
 template <typename armnn::DataType DataType>
 static void NeonCreateDepthWiseConvolutionWorkloadTest(DataLayout dataLayout)
 {
diff --git a/src/backends/neon/test/NeonWorkloadFactoryHelper.hpp b/src/backends/neon/test/NeonWorkloadFactoryHelper.hpp
index 8715011..89052d6 100644
--- a/src/backends/neon/test/NeonWorkloadFactoryHelper.hpp
+++ b/src/backends/neon/test/NeonWorkloadFactoryHelper.hpp
@@ -27,10 +27,12 @@
     }
 
     static armnn::NeonWorkloadFactory GetFactory(
-        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+        const armnn::ModelOptions& modelOptions = {})
     {
-        return armnn::NeonWorkloadFactory(
-            armnn::PolymorphicPointerDowncast<armnn::NeonMemoryManager>(memoryManager));
+        armnn::NeonBackend backend;
+        return armnn::NeonWorkloadFactory(armnn::PolymorphicPointerDowncast<armnn::NeonMemoryManager>(memoryManager),
+                                          backend.CreateBackendSpecificModelContext(modelOptions));
     }
 
     static armnn::NeonTensorHandleFactory GetTensorHandleFactory(
diff --git a/src/backends/neon/workloads/NeonConvolution2dWorkload.cpp b/src/backends/neon/workloads/NeonConvolution2dWorkload.cpp
index 83f7611..d35b968 100644
--- a/src/backends/neon/workloads/NeonConvolution2dWorkload.cpp
+++ b/src/backends/neon/workloads/NeonConvolution2dWorkload.cpp
@@ -59,8 +59,10 @@
 }
 
 NeonConvolution2dWorkload::NeonConvolution2dWorkload(
-    const Convolution2dQueueDescriptor& descriptor, const WorkloadInfo& info,
-    std::shared_ptr<arm_compute::MemoryManagerOnDemand>& memoryManager)
+    const Convolution2dQueueDescriptor& descriptor,
+    const WorkloadInfo& info,
+    std::shared_ptr<arm_compute::MemoryManagerOnDemand>& memoryManager,
+    const bool isFastMathEnabled)
     : BaseWorkload<Convolution2dQueueDescriptor>(descriptor, info)
 {
     using arm_compute::NEDirectConvolutionLayer;
@@ -97,7 +99,19 @@
                                 &output,
                                 padStrideInfo,
                                 arm_compute::WeightsInfo(),
-                                aclDilationInfo);
+                                aclDilationInfo,
+                                arm_compute::ActivationLayerInfo(),
+                                isFastMathEnabled);
+
+    m_ConvolutionMethod =
+        convolutionLayer->get_convolution_method(input.info(),
+                                                 m_KernelTensor->info(),
+                                                 output.info(),
+                                                 padStrideInfo,
+                                                 arm_compute::WeightsInfo(),
+                                                 aclDilationInfo,
+                                                 arm_compute::ActivationLayerInfo(),
+                                                 isFastMathEnabled);
 
     m_ConvolutionLayer.reset(convolutionLayer.release());
 
@@ -120,6 +134,11 @@
     m_ConvolutionLayer->run();
 }
 
+arm_compute::ConvolutionMethod NeonConvolution2dWorkload::GetConvolutionMethod() const
+{
+    return m_ConvolutionMethod;
+}
+
 void NeonConvolution2dWorkload::FreeUnusedTensors()
 {
     FreeTensorIfUnused(m_KernelTensor);
diff --git a/src/backends/neon/workloads/NeonConvolution2dWorkload.hpp b/src/backends/neon/workloads/NeonConvolution2dWorkload.hpp
index 54e08a2..860d78b 100644
--- a/src/backends/neon/workloads/NeonConvolution2dWorkload.hpp
+++ b/src/backends/neon/workloads/NeonConvolution2dWorkload.hpp
@@ -28,17 +28,23 @@
 public:
     using BaseWorkload<Convolution2dQueueDescriptor>::m_Data;
 
-    NeonConvolution2dWorkload(const Convolution2dQueueDescriptor& descriptor, const WorkloadInfo& info,
-                              std::shared_ptr<arm_compute::MemoryManagerOnDemand>& memoryManager);
+    NeonConvolution2dWorkload(const Convolution2dQueueDescriptor& descriptor,
+                              const WorkloadInfo& info,
+                              std::shared_ptr<arm_compute::MemoryManagerOnDemand>& memoryManager,
+                              const bool isFastMathENabled = false);
 
     void Execute() const override;
 
+    arm_compute::ConvolutionMethod GetConvolutionMethod() const;
+
 private:
     std::unique_ptr<arm_compute::IFunction> m_ConvolutionLayer;
 
     std::unique_ptr<arm_compute::Tensor> m_KernelTensor;
     std::unique_ptr<arm_compute::Tensor> m_BiasTensor;
 
+    arm_compute::ConvolutionMethod m_ConvolutionMethod;
+
     void FreeUnusedTensors();
 
 };
diff --git a/src/backends/reference/RefWorkloadFactory.cpp b/src/backends/reference/RefWorkloadFactory.cpp
index 4ab1701..e7e57b1 100644
--- a/src/backends/reference/RefWorkloadFactory.cpp
+++ b/src/backends/reference/RefWorkloadFactory.cpp
@@ -103,6 +103,14 @@
     return IWorkloadFactory::IsLayerSupported(s_Id, layer, dataType, outReasonIfUnsupported);
 }
 
+bool RefWorkloadFactory::IsLayerSupported(const IConnectableLayer& layer,
+                                          Optional<DataType> dataType,
+                                          std::string& outReasonIfUnsupported,
+                                          const ModelOptions& modelOptions)
+{
+    return IWorkloadFactory::IsLayerSupported(s_Id, layer, dataType, outReasonIfUnsupported, modelOptions);
+}
+
 std::unique_ptr<ITensorHandle> RefWorkloadFactory::CreateTensorHandle(const TensorInfo& tensorInfo,
                                                                       const bool isMemoryManaged) const
 {
diff --git a/src/backends/reference/RefWorkloadFactory.hpp b/src/backends/reference/RefWorkloadFactory.hpp
index 3a09588..5f22c9e 100644
--- a/src/backends/reference/RefWorkloadFactory.hpp
+++ b/src/backends/reference/RefWorkloadFactory.hpp
@@ -41,6 +41,11 @@
                                  Optional<DataType> dataType,
                                  std::string& outReasonIfUnsupported);
 
+    static bool IsLayerSupported(const IConnectableLayer& layer,
+                                 Optional<DataType> dataType,
+                                 std::string& outReasonIfUnsupported,
+                                 const ModelOptions& modelOptions);
+
     bool SupportsSubTensors() const override { return false; }
 
     ARMNN_DEPRECATED_MSG("Use ITensorHandleFactory::CreateSubTensorHandle instead")