IVGCVSW-7880 Add check for FP16 backend support
* Check if preferred backends have FP16 support before enable fp16-turbo-mode
* Unit tests
* Replaced global gpuAccCapabilities with getter method construction
* Replaced deprecated function call in SL shim
Signed-off-by: Narumol Prangnawarat <narumol.prangnawarat@arm.com>
Signed-off-by: Ryan OShea <ryan.oshea3@arm.com>
Change-Id: If29b62b330ca8987de8acf6408db11daf25ca0b5
diff --git a/src/armnn/Network.cpp b/src/armnn/Network.cpp
index 22d2c78..3074c1f 100644
--- a/src/armnn/Network.cpp
+++ b/src/armnn/Network.cpp
@@ -37,6 +37,7 @@
#include <algorithm>
#include <memory>
#include <vector>
+#include <armnn/ArmNN.hpp>
namespace armnn
{
@@ -837,14 +838,18 @@
// need to set the compute device on the layer
// before we can check if it is supported
layer->SetBackendId(backend);
+ std::string currentReasonIfUnsupported;
// To run FP16 operations on CpuAcc we need at least v8.2 architecture. If the available architecture
// is older than v8.2, we can check if the operator is supported by changing operator inputs & outputs
// to be FP32 and inserting convert layers around the FP32 operator.
- bool isLayerSupported = IWorkloadFactory::IsLayerSupported(*layer, EmptyOptional(), reasonIfUnsupported);
+ bool isLayerSupported = IWorkloadFactory::IsLayerSupported(*layer, EmptyOptional(), currentReasonIfUnsupported);
+ reasonIfUnsupported += currentReasonIfUnsupported;
+ // This string matches the error message that is produced by acl when attempting to run FP16 kernels on
+ // a cpu or build that does not have fp16 support. We use this to check if we should add
+ // conversion layers or not.
std::string checkStr = "This CPU architecture does not support F16 data type, you need v8.2 or above";
- if (!isLayerSupported ||
- reasonIfUnsupported.find(checkStr) != std::string::npos)
+ if (!isLayerSupported || currentReasonIfUnsupported.find(checkStr) != std::string::npos)
{
if (dataTypeIn == DataType::Float16 || dataTypeOut == DataType::Float16)
{
@@ -994,6 +999,51 @@
return {dataTypeIn, dataTypeOut};
}
+bool CheckFp16Support(BackendsMap& backends,
+ const std::vector<BackendId>& availablePreferredBackends)
+{
+ bool hasFp16 = false;
+ // Check if the first preferred backend has FP16 support
+ auto firstBackend = availablePreferredBackends[0];
+ auto backendObjPtr = backends.find(firstBackend)->second.get();
+ ARMNN_ASSERT(backendObjPtr);
+ auto hasFp16Capability = BackendOptions::BackendOption{"HasFp16", true};
+ auto backendCapabilities = backendObjPtr->GetCapabilities();
+
+ if (HasMatchingCapability(hasFp16Capability, backendCapabilities))
+ {
+ // First preferred backend has FP16 support. Enable reduce FP32 to FP16 when fp16-turbo-mode is enabled.
+ hasFp16 = true;
+ ARMNN_LOG(debug) << "The first available preferred backend: " << firstBackend
+ << ", has FP16 support.";
+ }
+ else
+ {
+ ARMNN_LOG(warning) << "The first available preferred backend: " << firstBackend
+ << ", does not have FP16 support. "
+ << "The FP16 turbo mode option will be disable. It will run using FP32.";
+ }
+
+ // Check if the rest of the available preferred backends have FP16 support
+ for (size_t i = 1; i < availablePreferredBackends.size(); ++i)
+ {
+ auto backend = availablePreferredBackends[i];
+ backendObjPtr = backends.find(backend)->second.get();
+ backendCapabilities = backendObjPtr->GetCapabilities();
+ if (!HasMatchingCapability(hasFp16Capability, backendCapabilities))
+ {
+ ARMNN_LOG(warning) << "Next preferred backend: " << backend << ", does not have FP16 support. "
+ << "It will run using FP32 when falling back to this backend.";
+ }
+ else
+ {
+ ARMNN_LOG(debug) << "Next preferred backend: " << backend << ", has FP16 support.";
+ }
+ }
+
+ return hasFp16;
+}
+
// Refactor to allow passing the IConnectableLayer* rather than Layer Iterator
// on Graph and SubgraphView which are different types.
void AssignBackendsIConnectable(OptimizedNetworkImpl* optNetObjPtr,
@@ -1913,16 +1963,10 @@
FuseBatchNormIntoDepthwiseConvolution2DFloat16()));
- if (options.GetReduceFp32ToFp16())
- {
- ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "Optimizer_ReduceFp32ToFp16");
- Optimizer::Pass(optGraph, MakeOptimizations(Fp32NetworkToFp16Converter()));
- Optimizer::Pass(optGraph, MakeOptimizations(ConvertConstantsFloatToHalf()));
- }
-
// Initialize backend settings
BackendSettings backendSettings(backendPreferences, deviceSpec);
- if (backendSettings.GetAvailablePreferredBackends().empty())
+ auto availablePreferredBackends = backendSettings.GetAvailablePreferredBackends();
+ if (availablePreferredBackends.empty())
{
std::stringstream failureMsg;
failureMsg << "None of the preferred backends " << backendPreferences
@@ -1935,6 +1979,17 @@
TensorHandleFactoryRegistry tensorHandleFactoryRegistry;
BackendsMap backends = CreateSupportedBackends(tensorHandleFactoryRegistry, backendSettings);
+ if (options.GetReduceFp32ToFp16())
+ {
+ bool hasFp16 = CheckFp16Support(backends, availablePreferredBackends);
+ if (hasFp16)
+ {
+ ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "Optimizer_ReduceFp32ToFp16");
+ Optimizer::Pass(optGraph, MakeOptimizations(Fp32NetworkToFp16Converter()));
+ Optimizer::Pass(optGraph, MakeOptimizations(ConvertConstantsFloatToHalf()));
+ }
+ }
+
// Assign an available backend to each layer
Graph::Iterator firstLayer = optGraph.begin();
Graph::Iterator lastLayer = optGraph.end();
diff --git a/src/armnn/Network.hpp b/src/armnn/Network.hpp
index a84a0e9..3454924 100644
--- a/src/armnn/Network.hpp
+++ b/src/armnn/Network.hpp
@@ -350,6 +350,8 @@
}
/// Reduces all Fp32 operators in the model to Fp16 for faster processing.
+ /// If the first preferred backend does not have Fp16 support, this option will be disabled.
+ /// If the value of converted Fp16 is infinity, round to the closest finite Fp16 value.
/// @Note This feature works best if all operators of the model are in Fp32. ArmNN will add conversion layers
/// between layers that weren't in Fp32 in the first place or if the operator is not supported in Fp16.
/// The overhead of these conversions can lead to a slower overall performance if too many conversions are
diff --git a/src/armnn/test/optimizations/Fp32NetworkToFp16ConverterTests.cpp b/src/armnn/test/optimizations/Fp32NetworkToFp16ConverterTests.cpp
index 0a4a4fa..90f94bc 100644
--- a/src/armnn/test/optimizations/Fp32NetworkToFp16ConverterTests.cpp
+++ b/src/armnn/test/optimizations/Fp32NetworkToFp16ConverterTests.cpp
@@ -1,5 +1,5 @@
//
-// Copyright © 2022 Arm Ltd and Contributors. All rights reserved.
+// Copyright © 2022-2023 Arm Ltd and Contributors. All rights reserved.
// SPDX-License-Identifier: MIT
//
@@ -7,6 +7,12 @@
#include <Optimizer.hpp>
+#if defined(ARMNNREF_ENABLED)
+#include <CommonTestUtils.hpp>
+#include <GraphUtils.hpp>
+#include <backendsCommon/test/mockBackend/MockImportBackend.hpp>
+#endif
+
#include <doctest/doctest.h>
TEST_SUITE("Optimizer")
@@ -50,4 +56,284 @@
CHECK(floor->GetOutputSlot(0).GetTensorInfo().GetDataType() == armnn::DataType::Float16);
}
+#if defined(ARMNNREF_ENABLED)
+TEST_CASE("ReduceFp32ToFp16EnabledBackendHasFp16SupportTest")
+{
+ using namespace armnn;
+ IRuntime::CreationOptions options;
+ IRuntimePtr runtime(IRuntime::Create(options));
+
+ // Builds up the structure of the network.
+ INetworkPtr net(INetwork::Create());
+
+ IConnectableLayer* input0 = net->AddInputLayer(0, "input0");
+ IConnectableLayer* input1 = net->AddInputLayer(1, "input1");
+ IConnectableLayer* input2 = net->AddInputLayer(2, "input2");
+ IConnectableLayer* add = net->AddElementwiseBinaryLayer(BinaryOperation::Add, "add");
+ IConnectableLayer* sub = net->AddElementwiseBinaryLayer(BinaryOperation::Sub, "sub");
+ IConnectableLayer* output = net->AddOutputLayer(0, "output");
+
+ input0->GetOutputSlot(0).Connect(add->GetInputSlot(0));
+ input1->GetOutputSlot(0).Connect(add->GetInputSlot(1));
+ input2->GetOutputSlot(0).Connect(sub->GetInputSlot(0));
+ add->GetOutputSlot(0).Connect(sub->GetInputSlot(1));
+ sub->GetOutputSlot(0).Connect(output->GetInputSlot(0));
+
+ TensorInfo info = TensorInfo({ 1, 2, 4, 2 }, DataType::Float32);
+
+ input0->GetOutputSlot(0).SetTensorInfo(info);
+ input1->GetOutputSlot(0).SetTensorInfo(info);
+ input2->GetOutputSlot(0).SetTensorInfo(info);
+ add->GetOutputSlot(0).SetTensorInfo(info);
+ sub->GetOutputSlot(0).SetTensorInfo(info);
+
+ std::vector<BackendId> backends = { Compute::CpuRef };
+
+ // optimize the network
+ OptimizerOptionsOpaque optOptions;
+ optOptions.SetReduceFp32ToFp16(true);
+ IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec(), optOptions);
+
+ Graph& graph = GetGraphForTesting(optNet.get());
+
+ // Layers are added to convert the inputs to FP16
+ CHECK(CheckSequence(graph.cbegin(), graph.cend(), &IsLayerOfType<armnn::InputLayer>,
+ &IsLayerOfType<armnn::InputLayer>,
+ &IsLayerOfType<armnn::InputLayer>,
+ &IsLayerOfType<armnn::ConvertFp32ToFp16Layer>,
+ &IsLayerOfType<armnn::ConvertFp32ToFp16Layer>,
+ &IsLayerOfType<armnn::ConvertFp32ToFp16Layer>,
+ &IsLayerOfType<armnn::ElementwiseBinaryLayer>,
+ &IsLayerOfType<armnn::ElementwiseBinaryLayer>,
+ &IsLayerOfType<armnn::ConvertFp16ToFp32Layer>,
+ &IsLayerOfType<armnn::OutputLayer>));
+
+ Layer* const addLayer = GetFirstLayerWithName(graph, "add");
+ Layer* const subLayer = GetFirstLayerWithName(graph, "sub");
+
+ CHECK(addLayer->GetDataType() == armnn::DataType::Float16);
+ CHECK(addLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo().GetDataType()
+ == armnn::DataType::Float16);
+ CHECK(addLayer->GetOutputSlot(0).GetTensorInfo().GetDataType() == armnn::DataType::Float16);
+
+ CHECK(subLayer->GetDataType() == armnn::DataType::Float16);
+ CHECK(subLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo().GetDataType()
+ == armnn::DataType::Float16);
+ CHECK(subLayer->GetOutputSlot(0).GetTensorInfo().GetDataType() == armnn::DataType::Float16);
+}
+
+TEST_CASE("ReduceFp32ToFp16EnabledBackendNoFp16SupportTest")
+{
+ using namespace armnn;
+
+ // Create a mock backend without FP16 support
+ MockImportBackendInitialiser initialiser; // Register the Mock Backend
+ auto backendObjPtr = CreateBackendObject(MockImportBackendId());
+ CHECK((backendObjPtr != nullptr));
+
+ BackendIdSet backendIds = BackendRegistryInstance().GetBackendIds();
+ if (backendIds.find("MockRef") == backendIds.end())
+ {
+ std::string message = "Cannot load MockRef";
+ FAIL(message);
+ }
+
+ IRuntime::CreationOptions options;
+ IRuntimePtr runtime(IRuntime::Create(options));
+
+ // Builds up the structure of the network.
+ INetworkPtr net(INetwork::Create());
+
+ IConnectableLayer* input0 = net->AddInputLayer(0, "input0");
+ IConnectableLayer* input1 = net->AddInputLayer(1, "input1");
+ IConnectableLayer* add = net->AddElementwiseBinaryLayer(BinaryOperation::Add, "add");
+ IConnectableLayer* output = net->AddOutputLayer(0, "output");
+
+ input0->GetOutputSlot(0).Connect(add->GetInputSlot(0));
+ input1->GetOutputSlot(0).Connect(add->GetInputSlot(1));
+ add->GetOutputSlot(0).Connect(output->GetInputSlot(0));
+
+ TensorInfo info = TensorInfo({ 1, 2, 4, 2 }, DataType::Float32);
+
+ input0->GetOutputSlot(0).SetTensorInfo(info);
+ input1->GetOutputSlot(0).SetTensorInfo(info);
+ add->GetOutputSlot(0).SetTensorInfo(info);
+
+ std::vector<BackendId> backends = { "MockRef" };
+
+ // optimize the network
+ OptimizerOptionsOpaque optOptions;
+ optOptions.SetReduceFp32ToFp16(true);
+ IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec(), optOptions);
+
+ Graph& graph = GetGraphForTesting(optNet.get());
+
+ // Do not add layers to convert the inputs to FP16
+ CHECK(CheckSequence(graph.cbegin(), graph.cend(), &IsLayerOfType<armnn::InputLayer>,
+ &IsLayerOfType<armnn::InputLayer>,
+ &IsLayerOfType<armnn::ElementwiseBinaryLayer>,
+ &IsLayerOfType<armnn::OutputLayer>));
+
+ // Checks that data type is FP32
+ Layer* const addLayer = GetFirstLayerWithName(graph, "add");
+
+ CHECK(addLayer->GetDataType() == armnn::DataType::Float32);
+ CHECK(addLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo().GetDataType()
+ == armnn::DataType::Float32);
+ CHECK(addLayer->GetOutputSlot(0).GetTensorInfo().GetDataType() == armnn::DataType::Float32);
+}
+
+TEST_CASE("ReduceFp32ToFp16EnabledFirstBackendHasFp16SupportTest")
+{
+ using namespace armnn;
+
+ // Create a mock backend without FP16 support
+ MockImportBackendInitialiser initialiser; // Register the Mock Backend
+ auto backendObjPtr = CreateBackendObject(MockImportBackendId());
+ CHECK((backendObjPtr != nullptr));
+
+ BackendIdSet backendIds = BackendRegistryInstance().GetBackendIds();
+ if (backendIds.find("MockRef") == backendIds.end())
+ {
+ std::string message = "Cannot load MockRef";
+ FAIL(message);
+ }
+ IRuntime::CreationOptions options;
+ IRuntimePtr runtime(IRuntime::Create(options));
+
+ // Builds up the structure of the network.
+ INetworkPtr net(INetwork::Create());
+
+ IConnectableLayer* input0 = net->AddInputLayer(0, "input0");
+ IConnectableLayer* input1 = net->AddInputLayer(1, "input1");
+ IConnectableLayer* input2 = net->AddInputLayer(2, "input2");
+ IConnectableLayer* add = net->AddElementwiseBinaryLayer(BinaryOperation::Add, "add");
+ IConnectableLayer* sub = net->AddElementwiseBinaryLayer(BinaryOperation::Sub, "sub");
+ IConnectableLayer* output = net->AddOutputLayer(0, "output");
+
+ input0->GetOutputSlot(0).Connect(add->GetInputSlot(0));
+ input1->GetOutputSlot(0).Connect(add->GetInputSlot(1));
+ input2->GetOutputSlot(0).Connect(sub->GetInputSlot(0));
+ add->GetOutputSlot(0).Connect(sub->GetInputSlot(1));
+ sub->GetOutputSlot(0).Connect(output->GetInputSlot(0));
+
+ TensorInfo info = TensorInfo({ 1, 2, 4, 2 }, DataType::Float32);
+
+ input0->GetOutputSlot(0).SetTensorInfo(info);
+ input1->GetOutputSlot(0).SetTensorInfo(info);
+ input2->GetOutputSlot(0).SetTensorInfo(info);
+ add->GetOutputSlot(0).SetTensorInfo(info);
+ sub->GetOutputSlot(0).SetTensorInfo(info);
+
+ std::vector<BackendId> backends = { Compute::CpuRef, "MockRef" };
+
+ // optimize the network
+ OptimizerOptionsOpaque optOptions;
+ optOptions.SetReduceFp32ToFp16(true);
+ IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec(), optOptions);
+
+ Graph& graph = GetGraphForTesting(optNet.get());
+
+ // Layers are added to convert the inputs to FP16
+ CHECK(CheckSequence(graph.cbegin(), graph.cend(), &IsLayerOfType<armnn::InputLayer>,
+ &IsLayerOfType<armnn::InputLayer>,
+ &IsLayerOfType<armnn::InputLayer>,
+ &IsLayerOfType<armnn::ConvertFp32ToFp16Layer>,
+ &IsLayerOfType<armnn::ConvertFp32ToFp16Layer>,
+ &IsLayerOfType<armnn::ConvertFp32ToFp16Layer>,
+ &IsLayerOfType<armnn::ElementwiseBinaryLayer>,
+ &IsLayerOfType<armnn::ElementwiseBinaryLayer>,
+ &IsLayerOfType<armnn::ConvertFp16ToFp32Layer>,
+ &IsLayerOfType<armnn::OutputLayer>));
+
+ Layer* const addLayer = GetFirstLayerWithName(graph, "add");
+ Layer* const subLayer = GetFirstLayerWithName(graph, "sub");
+
+ CHECK(addLayer->GetDataType() == armnn::DataType::Float16);
+ CHECK(addLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo().GetDataType()
+ == armnn::DataType::Float16);
+ CHECK(addLayer->GetOutputSlot(0).GetTensorInfo().GetDataType() == armnn::DataType::Float16);
+
+ CHECK(subLayer->GetDataType() == armnn::DataType::Float16);
+ CHECK(subLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo().GetDataType()
+ == armnn::DataType::Float16);
+ CHECK(subLayer->GetOutputSlot(0).GetTensorInfo().GetDataType() == armnn::DataType::Float16);
+}
+
+TEST_CASE("ReduceFp32ToFp16EnabledFirstBackendNoFp16SupportTest")
+{
+ using namespace armnn;
+
+ // Create a mock backend without FP16 support
+ MockImportBackendInitialiser initialiser; // Register the Mock Backend
+ auto backendObjPtr = CreateBackendObject(MockImportBackendId());
+ CHECK((backendObjPtr != nullptr));
+
+ BackendIdSet backendIds = BackendRegistryInstance().GetBackendIds();
+ if (backendIds.find("MockRef") == backendIds.end())
+ {
+ std::string message = "Cannot load MockRef";
+ FAIL(message);
+ }
+
+ IRuntime::CreationOptions options;
+ IRuntimePtr runtime(IRuntime::Create(options));
+
+ // Builds up the structure of the network.
+ INetworkPtr net(INetwork::Create());
+
+ IConnectableLayer* input0 = net->AddInputLayer(0, "input0");
+ IConnectableLayer* input1 = net->AddInputLayer(1, "input1");
+ IConnectableLayer* input2 = net->AddInputLayer(2, "input2");
+ IConnectableLayer* add = net->AddElementwiseBinaryLayer(BinaryOperation::Add, "add");
+ IConnectableLayer* sub = net->AddElementwiseBinaryLayer(BinaryOperation::Sub, "sub");
+ IConnectableLayer* output = net->AddOutputLayer(0, "output");
+
+ input0->GetOutputSlot(0).Connect(add->GetInputSlot(0));
+ input1->GetOutputSlot(0).Connect(add->GetInputSlot(1));
+ input2->GetOutputSlot(0).Connect(sub->GetInputSlot(0));
+ add->GetOutputSlot(0).Connect(sub->GetInputSlot(1));
+ sub->GetOutputSlot(0).Connect(output->GetInputSlot(0));
+
+ TensorInfo info = TensorInfo({ 1, 2, 4, 2 }, DataType::Float32);
+
+ input0->GetOutputSlot(0).SetTensorInfo(info);
+ input1->GetOutputSlot(0).SetTensorInfo(info);
+ input2->GetOutputSlot(0).SetTensorInfo(info);
+ add->GetOutputSlot(0).SetTensorInfo(info);
+ sub->GetOutputSlot(0).SetTensorInfo(info);
+
+ std::vector<BackendId> backends = { "MockRef", Compute::CpuRef };
+
+ // optimize the network
+ OptimizerOptionsOpaque optOptions;
+ optOptions.SetReduceFp32ToFp16(true);
+ IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec(), optOptions);
+
+ Graph& graph = GetGraphForTesting(optNet.get());
+
+ // Do not add layers to convert the inputs to FP16
+ CHECK(CheckSequence(graph.cbegin(), graph.cend(), &IsLayerOfType<armnn::InputLayer>,
+ &IsLayerOfType<armnn::InputLayer>,
+ &IsLayerOfType<armnn::InputLayer>,
+ &IsLayerOfType<armnn::ElementwiseBinaryLayer>,
+ &IsLayerOfType<armnn::ElementwiseBinaryLayer>,
+ &IsLayerOfType<armnn::OutputLayer>));
+
+ // Checks that data type is FP32
+ Layer* const addLayer = GetFirstLayerWithName(graph, "add");
+ Layer* const subLayer = GetFirstLayerWithName(graph, "sub");
+
+ CHECK(addLayer->GetDataType() == armnn::DataType::Float32);
+ CHECK(addLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo().GetDataType()
+ == armnn::DataType::Float32);
+ CHECK(addLayer->GetOutputSlot(0).GetTensorInfo().GetDataType() == armnn::DataType::Float32);
+
+ CHECK(subLayer->GetDataType() == armnn::DataType::Float32);
+ CHECK(subLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo().GetDataType()
+ == armnn::DataType::Float32);
+ CHECK(subLayer->GetOutputSlot(0).GetTensorInfo().GetDataType() == armnn::DataType::Float32);
+}
+#endif // ARMNNREF_ENABLED
+
}
\ No newline at end of file
diff --git a/src/backends/backendsCommon/common.mk b/src/backends/backendsCommon/common.mk
index 2c41285..303f8ac 100644
--- a/src/backends/backendsCommon/common.mk
+++ b/src/backends/backendsCommon/common.mk
@@ -115,5 +115,6 @@
ifeq ($(ARMNN_REF_ENABLED),1)
COMMON_TEST_SOURCES += \
+ test/mockBackend/MockImportBackend.cpp \
test/WorkloadDataValidation.cpp
endif # ARMNN_REF_ENABLED == 1
diff --git a/src/backends/cl/ClBackend.cpp b/src/backends/cl/ClBackend.cpp
index b018654..532892e 100644
--- a/src/backends/cl/ClBackend.cpp
+++ b/src/backends/cl/ClBackend.cpp
@@ -273,6 +273,23 @@
return std::make_unique<ClBackendDefaultAllocator>();
}
+BackendCapabilities ClBackend::GetCapabilities() const
+{
+ // add new capabilities here..
+ return BackendCapabilities ("GpuAcc",
+ {
+ {"NonConstWeights", true},
+ {"AsyncExecution", false},
+ {"ProtectedContentAllocation", true},
+ {"ConstantTensorsAsInputs", true},
+ {"PreImportIOTensors", false},
+ {"ExternallyManagedMemory", true},
+ {"MultiAxisPacking", false},
+ {"SingleAxisPacking", true},
+ {"HasFp16", arm_compute::CLKernelLibrary::get().fp16_supported()}
+ });
+}
+
OptimizationViews ClBackend::OptimizeSubgraphView(const SubgraphView& subgraph,
const ModelOptions& modelOptions) const
{
diff --git a/src/backends/cl/ClBackend.hpp b/src/backends/cl/ClBackend.hpp
index d276eac..1d2a866 100644
--- a/src/backends/cl/ClBackend.hpp
+++ b/src/backends/cl/ClBackend.hpp
@@ -21,19 +21,6 @@
namespace armnn
{
-// add new capabilities here..
-const BackendCapabilities gpuAccCapabilities("GpuAcc",
- {
- {"NonConstWeights", true},
- {"AsyncExecution", false},
- {"ProtectedContentAllocation", true},
- {"ConstantTensorsAsInputs", true},
- {"PreImportIOTensors", false},
- {"ExternallyManagedMemory", true},
- {"MultiAxisPacking", false},
- {"SingleAxisPacking", true}
- });
-
class ClBackend : public IBackendInternal
{
public:
@@ -90,10 +77,7 @@
std::unique_ptr<ICustomAllocator> GetDefaultAllocator() const override;
- BackendCapabilities GetCapabilities() const override
- {
- return gpuAccCapabilities;
- };
+ BackendCapabilities GetCapabilities() const override;
virtual bool UseCustomMemoryAllocator(std::shared_ptr<ICustomAllocator> allocator,
armnn::Optional<std::string&> errMsg) override
diff --git a/src/backends/cl/test/ClOptimizedNetworkTests.cpp b/src/backends/cl/test/ClOptimizedNetworkTests.cpp
index 3d4341d..9d721c0 100644
--- a/src/backends/cl/test/ClOptimizedNetworkTests.cpp
+++ b/src/backends/cl/test/ClOptimizedNetworkTests.cpp
@@ -11,6 +11,7 @@
#include <cl/ClWorkloadFactory.hpp>
#include <cl/ClBackendContext.hpp>
+#include <arm_compute/core/CL/CLKernelLibrary.h>
#include <armnnUtils/Filesystem.hpp>
@@ -94,15 +95,28 @@
const armnn::Graph& graph = GetGraphForTesting(optimizedNet.get());
- // Tests that all layers are present in the graph.
- CHECK(graph.GetNumLayers() == 5);
+ if(arm_compute::CLKernelLibrary::get().fp16_supported())
+ {
+ // Tests that all layers are present in the graph.
+ CHECK(graph.GetNumLayers() == 5);
- // Tests that the vertices exist and have correct names.
- CHECK(GraphHasNamedLayer(graph, "input layer"));
- CHECK(GraphHasNamedLayer(graph, "convert_fp32_to_fp16-0-input layer"));
- CHECK(GraphHasNamedLayer(graph, "activation layer"));
- CHECK(GraphHasNamedLayer(graph, "convert_fp16_to_fp32-0-output layer"));
- CHECK(GraphHasNamedLayer(graph, "output layer"));
+ // Tests that the vertices exist and have correct names.
+ CHECK(GraphHasNamedLayer(graph, "input layer"));
+ CHECK(GraphHasNamedLayer(graph, "convert_fp32_to_fp16-0-input layer"));
+ CHECK(GraphHasNamedLayer(graph, "activation layer"));
+ CHECK(GraphHasNamedLayer(graph, "convert_fp16_to_fp32-0-output layer"));
+ CHECK(GraphHasNamedLayer(graph, "output layer"));
+ }
+ else
+ {
+ // Tests that all layers except for conversion layers are present in the graph.
+ CHECK(graph.GetNumLayers() == 3);
+
+ // Tests that the vertices exist and have correct names.
+ CHECK(GraphHasNamedLayer(graph, "input layer"));
+ CHECK(GraphHasNamedLayer(graph, "activation layer"));
+ CHECK(GraphHasNamedLayer(graph, "output layer"));
+ }
}
TEST_CASE("FastMathEnabledTestOnGpuAcc")
diff --git a/src/backends/neon/NeonBackend.hpp b/src/backends/neon/NeonBackend.hpp
index 3364e4b..ef5258b 100644
--- a/src/backends/neon/NeonBackend.hpp
+++ b/src/backends/neon/NeonBackend.hpp
@@ -6,6 +6,8 @@
#include <armnn/backends/IBackendInternal.hpp>
+#include <arm_compute/core/CPP/CPPTypes.h>
+
namespace armnn
{
@@ -19,7 +21,8 @@
{"PreImportIOTensors", false},
{"ExternallyManagedMemory", true},
{"MultiAxisPacking", false},
- {"SingleAxisPacking", true}
+ {"SingleAxisPacking", true},
+ {"HasFp16", arm_compute::CPUInfo::get().has_fp16()}
});
diff --git a/src/backends/reference/RefBackend.hpp b/src/backends/reference/RefBackend.hpp
index ecbe4d5..7d355ea 100644
--- a/src/backends/reference/RefBackend.hpp
+++ b/src/backends/reference/RefBackend.hpp
@@ -1,5 +1,5 @@
//
-// Copyright © 2022 Arm Ltd and Contributors. All rights reserved.
+// Copyright © 2022-2023 Arm Ltd and Contributors. All rights reserved.
// SPDX-License-Identifier: MIT
//
#pragma once
@@ -18,7 +18,8 @@
{"PreImportIOTensors", true},
{"ExternallyManagedMemory", true},
{"MultiAxisPacking", false},
- {"SingleAxisPacking", true}
+ {"SingleAxisPacking", true},
+ {"HasFp16", true}
});
const std::set<armnn::BackendCapability> oldCpuRefCapabilities {