blob: 6ac94337ba0e4453d97dc1f0cf93ef3aedce20ad [file] [log] [blame]
Narumol Prangnawarat265e53e2020-10-30 16:06:55 +00001//
2// Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
3// SPDX-License-Identifier: MIT
4//
5
Sadik Armagana097d2a2021-11-24 15:47:28 +00006#include <CommonTestUtils.hpp>
Narumol Prangnawarat265e53e2020-10-30 16:06:55 +00007
Sadik Armagana097d2a2021-11-24 15:47:28 +00008#include <GraphUtils.hpp>
Narumol Prangnawarat265e53e2020-10-30 16:06:55 +00009
Sadik Armagan1625efc2021-06-10 18:24:34 +010010#include <doctest/doctest.h>
Narumol Prangnawarat265e53e2020-10-30 16:06:55 +000011
Sadik Armagan1625efc2021-06-10 18:24:34 +010012TEST_SUITE("ClFallback")
13{
14TEST_CASE("ClImportEnabledFallbackToNeon")
Narumol Prangnawarat265e53e2020-10-30 16:06:55 +000015{
16 using namespace armnn;
17
18 IRuntime::CreationOptions options;
19 IRuntimePtr runtime(IRuntime::Create(options));
20
21 // Builds up the structure of the network.
22 INetworkPtr net(INetwork::Create());
23
24 IConnectableLayer* input0 = net->AddInputLayer(0, "input0");
25 IConnectableLayer* input1 = net->AddInputLayer(1, "input1");
26 IConnectableLayer* input2 = net->AddInputLayer(2, "input2");
27 IConnectableLayer* add = net->AddAdditionLayer("add");
28 IConnectableLayer* sub = net->AddSubtractionLayer("sub");
29 IConnectableLayer* output = net->AddOutputLayer(0, "output");
30
31 input0->GetOutputSlot(0).Connect(add->GetInputSlot(0));
32 input1->GetOutputSlot(0).Connect(add->GetInputSlot(1));
33 input2->GetOutputSlot(0).Connect(sub->GetInputSlot(0));
34 add->GetOutputSlot(0).Connect(sub->GetInputSlot(1));
35 sub->GetOutputSlot(0).Connect(output->GetInputSlot(0));
36
Narumol Prangnawarate5f0b242021-05-07 17:52:36 +010037 TensorInfo info = TensorInfo({ 1, 2, 4, 2 }, DataType::Float32);
Cathal Corbett5b8093c2021-10-22 11:12:07 +010038 info.SetConstant(true);
Narumol Prangnawarat265e53e2020-10-30 16:06:55 +000039
40 input0->GetOutputSlot(0).SetTensorInfo(info);
41 input1->GetOutputSlot(0).SetTensorInfo(info);
42 input2->GetOutputSlot(0).SetTensorInfo(info);
43 add->GetOutputSlot(0).SetTensorInfo(info);
44 sub->GetOutputSlot(0).SetTensorInfo(info);
45
46 std::vector<BackendId> backends = { Compute::GpuAcc, Compute::CpuAcc };
47 // Use BackendSelectionHint to specify CpuAcc for Subtraction layer
48 sub->BackendSelectionHint(backends[1]);
49
50 // optimize the network
51 OptimizerOptions optOptions;
52 optOptions.m_ImportEnabled = true;
53 IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec(), optOptions);
54
Francis Murtagh3d2b4b22021-02-15 18:23:17 +000055 Graph& graph = GetGraphForTesting(optNet.get());
Narumol Prangnawarat265e53e2020-10-30 16:06:55 +000056
57 armnn::Layer* const layer0 = GetFirstLayerWithName(graph, "input0");
58 armnn::Layer* const layer1 = GetFirstLayerWithName(graph, "input1");
59 armnn::Layer* const layer2 = GetFirstLayerWithName(graph, "input2");
60 armnn::Layer* const layer3 = GetFirstLayerWithName(graph, "add");
61 armnn::Layer* const layer4 = GetFirstLayerWithName(graph, "[ add (0) -> sub (1) ]");
62 armnn::Layer* const layer5 = GetFirstLayerWithName(graph, "sub");
63 armnn::Layer* const layer6 = GetFirstLayerWithName(graph, "output");
64
65 // Checks order is valid.
Sadik Armagan1625efc2021-06-10 18:24:34 +010066 CHECK(CheckOrder(graph, layer0, layer1));
67 CHECK(CheckOrder(graph, layer1, layer2));
68 CHECK(CheckOrder(graph, layer2, layer3));
69 CHECK(CheckOrder(graph, layer3, layer4));
70 CHECK(CheckOrder(graph, layer4, layer5));
71 CHECK(CheckOrder(graph, layer5, layer6));
Narumol Prangnawarat265e53e2020-10-30 16:06:55 +000072
73 // Use memory import between backends
Sadik Armagan1625efc2021-06-10 18:24:34 +010074 CHECK((layer4->GetType() == LayerType::MemCopy));
Narumol Prangnawarat265e53e2020-10-30 16:06:55 +000075
76 // Correctly use backend hint
Sadik Armagan1625efc2021-06-10 18:24:34 +010077 CHECK((layer5->GetBackendId() == Compute::CpuAcc ));
Narumol Prangnawarat265e53e2020-10-30 16:06:55 +000078
79 // Load it into the runtime. It should pass.
80 NetworkId netId;
81 std::string ignoredErrorMessage;
Francis Murtagh73d3e2e2021-04-29 14:23:04 +010082 INetworkProperties networkProperties(false, MemorySource::Malloc, MemorySource::Malloc);
Narumol Prangnawarat265e53e2020-10-30 16:06:55 +000083 runtime->LoadNetwork(netId, std::move(optNet), ignoredErrorMessage, networkProperties);
84
85 // Creates structures for input & output
Narumol Prangnawarate5f0b242021-05-07 17:52:36 +010086 std::vector<float> inputValue0
Narumol Prangnawarat265e53e2020-10-30 16:06:55 +000087 {
Narumol Prangnawarate5f0b242021-05-07 17:52:36 +010088 1.0f, 1.0f, 2.0f, 2.0f, 2.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f, 6.0f, 1.0f, 1.0f, 2.0f, 2.0f
Narumol Prangnawarat265e53e2020-10-30 16:06:55 +000089 };
Narumol Prangnawarate5f0b242021-05-07 17:52:36 +010090 std::vector<float> inputValue1
Narumol Prangnawarat265e53e2020-10-30 16:06:55 +000091 {
Narumol Prangnawarate5f0b242021-05-07 17:52:36 +010092 0.0f, 1.0f, 1.0f, 2.0f, 3.0f, 3.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f, 0.0f, 1.0f, 1.0f, 2.0f
Narumol Prangnawarat265e53e2020-10-30 16:06:55 +000093 };
94 std::vector<float> inputData2
95 {
Narumol Prangnawarate5f0b242021-05-07 17:52:36 +010096 12.0f, 11.0f, 10.0f, 9.0f, 8.0f, 7.0f, 6.0f, 5.0f, 4.0f, 3.0f, 2.0f, 1.0f, 12.0f, 11.0f, 10.0f, 9.0f
Narumol Prangnawarat265e53e2020-10-30 16:06:55 +000097 };
98
Narumol Prangnawarate5f0b242021-05-07 17:52:36 +010099 std::vector<float> outputData(16);
Narumol Prangnawarat265e53e2020-10-30 16:06:55 +0000100
101 std::vector<float> expectedOutput
102 {
Narumol Prangnawarate5f0b242021-05-07 17:52:36 +0100103 11.0f, 9.0f, 7.0f, 5.0f, 3.0f, 1.0f, -1.0f, -3.0f, -5.0f, -7.0f, -9.0f, -11.0f, 11.0f, 9.0f, 7.0f, 5.0f
Narumol Prangnawarat265e53e2020-10-30 16:06:55 +0000104 };
105
Narumol Prangnawarate5f0b242021-05-07 17:52:36 +0100106 // Prepare aligned data
107 unsigned int numElements = info.GetNumElements();
108 size_t totalBytes = numElements * sizeof(float);
109 const size_t alignment = 64;
110 size_t space = totalBytes + alignment + alignment;
111 auto inputData0 = std::make_unique<uint8_t[]>(space);
112 void* alignedInputPtr0 = inputData0.get();
Sadik Armagan1625efc2021-06-10 18:24:34 +0100113 CHECK(std::align(alignment, totalBytes, alignedInputPtr0, space));
Narumol Prangnawarate5f0b242021-05-07 17:52:36 +0100114
115 auto* intputPtr0 = reinterpret_cast<float*>(alignedInputPtr0);
116 std::copy(inputValue0.begin(), inputValue0.end(), intputPtr0);
117
118 auto inputData1 = std::make_unique<uint8_t[]>(space);
119 void* alignedInputPtr1 = inputData1.get();
Sadik Armagan1625efc2021-06-10 18:24:34 +0100120 CHECK(std::align(alignment, totalBytes, alignedInputPtr1, space));
Narumol Prangnawarate5f0b242021-05-07 17:52:36 +0100121
122 auto* intputPtr1 = reinterpret_cast<float*>(alignedInputPtr1);
123 std::copy(inputValue1.begin(), inputValue1.end(), intputPtr1);
124
Narumol Prangnawarat265e53e2020-10-30 16:06:55 +0000125 InputTensors inputTensors
126 {
Narumol Prangnawarate5f0b242021-05-07 17:52:36 +0100127 { 0, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), alignedInputPtr0) },
128 { 1, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 1), alignedInputPtr1) },
Narumol Prangnawarat265e53e2020-10-30 16:06:55 +0000129 { 2, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 2), inputData2.data()) }
130 };
131 OutputTensors outputTensors
132 {
133 { 0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data()) }
134 };
135
136 runtime->GetProfiler(netId)->EnableProfiling(true);
137
138 // Do the inference
139 runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
140
141 // Retrieve the Profiler.Print() output to get the workload execution
142 ProfilerManager& profilerManager = armnn::ProfilerManager::GetInstance();
143 std::stringstream ss;
144 profilerManager.GetProfiler()->Print(ss);;
145 std::string dump = ss.str();
146
147 // Executed Subtraction using CpuAcc
148 std::size_t found = dump.find("NeonSubtractionWorkload_Execute");
Sadik Armagan1625efc2021-06-10 18:24:34 +0100149 CHECK(found != std::string::npos);
Narumol Prangnawarat265e53e2020-10-30 16:06:55 +0000150
151 // Contain CopyMemGeneric
152 found = dump.find("CopyMemGeneric");
Sadik Armagan1625efc2021-06-10 18:24:34 +0100153 CHECK(found != std::string::npos);
Narumol Prangnawarat265e53e2020-10-30 16:06:55 +0000154
155 // Check output is as expected
Sadik Armagan1625efc2021-06-10 18:24:34 +0100156 CHECK(outputData == expectedOutput);
Narumol Prangnawarate5f0b242021-05-07 17:52:36 +0100157
158 runtime->UnloadNetwork(netId);
Narumol Prangnawarat265e53e2020-10-30 16:06:55 +0000159}
160
Sadik Armagan1625efc2021-06-10 18:24:34 +0100161TEST_CASE("ClImportDisabledFallbackToNeon")
Narumol Prangnawarat265e53e2020-10-30 16:06:55 +0000162{
163 using namespace armnn;
164
165 IRuntime::CreationOptions options;
166 IRuntimePtr runtime(IRuntime::Create(options));
167
168 // Builds up the structure of the network.
169 INetworkPtr net(INetwork::Create());
170
171 IConnectableLayer* input0 = net->AddInputLayer(0, "input0");
172 IConnectableLayer* input1 = net->AddInputLayer(1, "input1");
173 IConnectableLayer* input2 = net->AddInputLayer(2, "input2");
174 IConnectableLayer* add = net->AddAdditionLayer("add");
175 IConnectableLayer* sub = net->AddSubtractionLayer("sub");
176 IConnectableLayer* output = net->AddOutputLayer(0, "output");
177
178 input0->GetOutputSlot(0).Connect(add->GetInputSlot(0));
179 input1->GetOutputSlot(0).Connect(add->GetInputSlot(1));
180 input2->GetOutputSlot(0).Connect(sub->GetInputSlot(0));
181 add->GetOutputSlot(0).Connect(sub->GetInputSlot(1));
182 sub->GetOutputSlot(0).Connect(output->GetInputSlot(0));
183
184 TensorInfo info = TensorInfo({ 1, 2, 3, 2 }, DataType::Float32);
Cathal Corbett5b8093c2021-10-22 11:12:07 +0100185 info.SetConstant(true);
Narumol Prangnawarat265e53e2020-10-30 16:06:55 +0000186
187 input0->GetOutputSlot(0).SetTensorInfo(info);
188 input1->GetOutputSlot(0).SetTensorInfo(info);
189 input2->GetOutputSlot(0).SetTensorInfo(info);
190 add->GetOutputSlot(0).SetTensorInfo(info);
191 sub->GetOutputSlot(0).SetTensorInfo(info);
192
193 std::vector<BackendId> backends = { Compute::GpuAcc, Compute::CpuAcc };
194 // Use BackendSelectionHint to specify CpuAcc for Subtraction layer
195 sub->BackendSelectionHint(backends[1]);
196
197 // optimize the network
198 OptimizerOptions optOptions;
199 IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec(), optOptions);
200
Francis Murtagh3d2b4b22021-02-15 18:23:17 +0000201 Graph& graph = GetGraphForTesting(optNet.get());
Narumol Prangnawarat265e53e2020-10-30 16:06:55 +0000202
203 armnn::Layer* const layer0 = GetFirstLayerWithName(graph, "input0");
204 armnn::Layer* const layer1 = GetFirstLayerWithName(graph, "input1");
205 armnn::Layer* const layer2 = GetFirstLayerWithName(graph, "input2");
206 armnn::Layer* const layer3 = GetFirstLayerWithName(graph, "add");
207 armnn::Layer* const layer4 = GetFirstLayerWithName(graph, "[ add (0) -> sub (1) ]");
208 armnn::Layer* const layer5 = GetFirstLayerWithName(graph, "sub");
209 armnn::Layer* const layer6 = GetFirstLayerWithName(graph, "output");
210
211 // Checks order is valid.
Sadik Armagan1625efc2021-06-10 18:24:34 +0100212 CHECK(CheckOrder(graph, layer0, layer1));
213 CHECK(CheckOrder(graph, layer1, layer2));
214 CHECK(CheckOrder(graph, layer2, layer3));
215 CHECK(CheckOrder(graph, layer3, layer4));
216 CHECK(CheckOrder(graph, layer4, layer5));
217 CHECK(CheckOrder(graph, layer5, layer6));
Narumol Prangnawarat265e53e2020-10-30 16:06:55 +0000218
219 // Use memory import between backends
Sadik Armagan1625efc2021-06-10 18:24:34 +0100220 CHECK((layer4->GetType() == LayerType::MemCopy));
Narumol Prangnawarat265e53e2020-10-30 16:06:55 +0000221
222 // Correctly use backend hint
Sadik Armagan1625efc2021-06-10 18:24:34 +0100223 CHECK((layer5->GetBackendId() == Compute::CpuAcc ));
Narumol Prangnawarat265e53e2020-10-30 16:06:55 +0000224
225 // Load it into the runtime. It should pass.
226 NetworkId netId;
227 runtime->LoadNetwork(netId, std::move(optNet));
228
229 // Creates structures for input & output
230 std::vector<float> inputData0
231 {
232 1.0f, 1.0f, 2.0f, 2.0f, 2.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f, 6.0f
233 };
234 std::vector<float> inputData1
235 {
236 0.0f, 1.0f, 1.0f, 2.0f, 3.0f, 3.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f
237 };
238 std::vector<float> inputData2
239 {
240 12.0f, 11.0f, 10.0f, 9.0f, 8.0f, 7.0f, 6.0f, 5.0f, 4.0f, 3.0f, 2.0f, 1.0f
241 };
242
243 std::vector<float> outputData(12);
244
245 std::vector<float> expectedOutput
246 {
247 11.0f, 9.0f, 7.0f, 5.0f, 3.0f, 1.0f, -1.0f, -3.0f, -5.0f, -7.0f, -9.0f, -11.0f
248 };
249
250 InputTensors inputTensors
251 {
252 { 0, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData0.data()) },
253 { 1, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 1), inputData1.data()) },
254 { 2, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 2), inputData2.data()) }
255 };
256 OutputTensors outputTensors
257 {
258 { 0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data()) }
259 };
260
261 runtime->GetProfiler(netId)->EnableProfiling(true);
262
263 // Do the inference
264 runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
265
266 // Retrieve the Profiler.Print() output to get the workload execution
267 ProfilerManager& profilerManager = armnn::ProfilerManager::GetInstance();
268 std::stringstream ss;
269 profilerManager.GetProfiler()->Print(ss);;
270 std::string dump = ss.str();
271
272 // Executed Subtraction using CpuAcc
273 std::size_t found = dump.find("NeonSubtractionWorkload_Execute");
Sadik Armagan1625efc2021-06-10 18:24:34 +0100274 CHECK(found != std::string::npos);
Narumol Prangnawarat265e53e2020-10-30 16:06:55 +0000275
276 // Contain CopyMemGeneric
277 found = dump.find("CopyMemGeneric");
Sadik Armagan1625efc2021-06-10 18:24:34 +0100278 CHECK(found != std::string::npos);
Narumol Prangnawarat265e53e2020-10-30 16:06:55 +0000279
280 // Check output is as expected
Sadik Armagan1625efc2021-06-10 18:24:34 +0100281 CHECK(outputData == expectedOutput);
Narumol Prangnawarat265e53e2020-10-30 16:06:55 +0000282}
283
Sadik Armagan1625efc2021-06-10 18:24:34 +0100284TEST_CASE("ClImportEnabledFallbackSubgraphToNeon")
Narumol Prangnawarat265e53e2020-10-30 16:06:55 +0000285{
286 using namespace armnn;
287
288 IRuntime::CreationOptions options;
289 IRuntimePtr runtime(IRuntime::Create(options));
290
291 // Builds up the structure of the network.
292 INetworkPtr net(INetwork::Create());
293
294 Pooling2dDescriptor desc;
Narumol Prangnawarate5f0b242021-05-07 17:52:36 +0100295 desc.m_PoolWidth = 2;
296 desc.m_PoolHeight = 2;
297 desc.m_StrideX = 2;
298 desc.m_StrideY = 2;
Narumol Prangnawarat265e53e2020-10-30 16:06:55 +0000299
300 IConnectableLayer* input0 = net->AddInputLayer(0, "input0");
301 IConnectableLayer* input1 = net->AddInputLayer(1, "input1");
302 IConnectableLayer* input2 = net->AddInputLayer(2, "input2");
303 IConnectableLayer* add = net->AddAdditionLayer("add");
304 IConnectableLayer* sub = net->AddSubtractionLayer("sub");
305 IConnectableLayer* pooling = net->AddPooling2dLayer(desc, "pooling");
306 IConnectableLayer* output = net->AddOutputLayer(0, "output");
307
308 input0->GetOutputSlot(0).Connect(add->GetInputSlot(0));
309 input1->GetOutputSlot(0).Connect(add->GetInputSlot(1));
310 input2->GetOutputSlot(0).Connect(sub->GetInputSlot(0));
311 add->GetOutputSlot(0).Connect(sub->GetInputSlot(1));
312 sub->GetOutputSlot(0).Connect(pooling->GetInputSlot(0));
313 pooling->GetOutputSlot(0).Connect(output->GetInputSlot(0));
314
Narumol Prangnawarate5f0b242021-05-07 17:52:36 +0100315 TensorInfo info = TensorInfo({ 1, 2, 4, 2 }, DataType::Float32);
Cathal Corbett5b8093c2021-10-22 11:12:07 +0100316 info.SetConstant(true);
Narumol Prangnawarate5f0b242021-05-07 17:52:36 +0100317 TensorInfo poolingInfo = TensorInfo({ 1, 2, 2, 1 }, DataType::Float32);
Narumol Prangnawarat265e53e2020-10-30 16:06:55 +0000318
319 input0->GetOutputSlot(0).SetTensorInfo(info);
320 input1->GetOutputSlot(0).SetTensorInfo(info);
321 input2->GetOutputSlot(0).SetTensorInfo(info);
322 add->GetOutputSlot(0).SetTensorInfo(info);
323 sub->GetOutputSlot(0).SetTensorInfo(info);
324 pooling->GetOutputSlot(0).SetTensorInfo(poolingInfo);
325
326 std::vector<BackendId> backends = { Compute::GpuAcc, Compute::CpuAcc };
327 // Use BackendSelectionHint to specify CpuAcc for Subtraction layer
328 sub->BackendSelectionHint(backends[1]);
329
330 // optimize the network
331 OptimizerOptions optOptions;
332 optOptions.m_ImportEnabled = true;
333 IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec(), optOptions);
334
Francis Murtagh3d2b4b22021-02-15 18:23:17 +0000335 Graph& graph = GetGraphForTesting(optNet.get());
Narumol Prangnawarat265e53e2020-10-30 16:06:55 +0000336
337 armnn::Layer* const layer0 = GetFirstLayerWithName(graph, "input0");
338 armnn::Layer* const layer1 = GetFirstLayerWithName(graph, "input1");
339 armnn::Layer* const layer2 = GetFirstLayerWithName(graph, "input2");
340 armnn::Layer* const layer3 = GetFirstLayerWithName(graph, "add");
341 armnn::Layer* const layer4 = GetFirstLayerWithName(graph, "[ add (0) -> sub (1) ]");
342 armnn::Layer* const layer5 = GetFirstLayerWithName(graph, "sub");
343 armnn::Layer* const layer6 = GetFirstLayerWithName(graph, "[ sub (0) -> pooling (0) ]");
344 armnn::Layer* const layer7 = GetFirstLayerWithName(graph, "pooling");
345 armnn::Layer* const layer8 = GetFirstLayerWithName(graph, "output");
346
347 // Checks order is valid.
Sadik Armagan1625efc2021-06-10 18:24:34 +0100348 CHECK(CheckOrder(graph, layer0, layer1));
349 CHECK(CheckOrder(graph, layer1, layer2));
350 CHECK(CheckOrder(graph, layer2, layer3));
351 CHECK(CheckOrder(graph, layer3, layer4));
352 CHECK(CheckOrder(graph, layer4, layer5));
353 CHECK(CheckOrder(graph, layer5, layer6));
354 CHECK(CheckOrder(graph, layer6, layer7));
355 CHECK(CheckOrder(graph, layer7, layer8));
Narumol Prangnawarat265e53e2020-10-30 16:06:55 +0000356
357 // Use memory import between backends
Sadik Armagan1625efc2021-06-10 18:24:34 +0100358 CHECK((layer4->GetType() == LayerType::MemCopy));
359 CHECK((layer6->GetType() == LayerType::MemCopy));
Narumol Prangnawarat265e53e2020-10-30 16:06:55 +0000360
361 // Correctly use backend hint
Sadik Armagan1625efc2021-06-10 18:24:34 +0100362 CHECK((layer5->GetBackendId() == Compute::CpuAcc ));
Narumol Prangnawarat265e53e2020-10-30 16:06:55 +0000363
364 // Load it into the runtime. It should pass.
365 NetworkId netId;
366 std::string ignoredErrorMessage;
Francis Murtagh73d3e2e2021-04-29 14:23:04 +0100367 INetworkProperties networkProperties(false, MemorySource::Malloc, MemorySource::Malloc);
Narumol Prangnawarat265e53e2020-10-30 16:06:55 +0000368 runtime->LoadNetwork(netId, std::move(optNet), ignoredErrorMessage, networkProperties);
369
370 // Creates structures for input & output
Narumol Prangnawarate5f0b242021-05-07 17:52:36 +0100371 std::vector<float> inputValue0
Narumol Prangnawarat265e53e2020-10-30 16:06:55 +0000372 {
Narumol Prangnawarate5f0b242021-05-07 17:52:36 +0100373 1.0f, 1.0f, 2.0f, 2.0f, 2.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f, 6.0f, 1.0f, 1.0f, 2.0f, 2.0f
Narumol Prangnawarat265e53e2020-10-30 16:06:55 +0000374 };
Narumol Prangnawarate5f0b242021-05-07 17:52:36 +0100375 std::vector<float> inputValue1
Narumol Prangnawarat265e53e2020-10-30 16:06:55 +0000376 {
Narumol Prangnawarate5f0b242021-05-07 17:52:36 +0100377 0.0f, 1.0f, 1.0f, 2.0f, 3.0f, 3.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f, 0.0f, 1.0f, 1.0f, 2.0f
Narumol Prangnawarat265e53e2020-10-30 16:06:55 +0000378 };
379 std::vector<float> inputData2
380 {
Narumol Prangnawarate5f0b242021-05-07 17:52:36 +0100381 12.0f, 11.0f, 10.0f, 9.0f, 8.0f, 7.0f, 6.0f, 5.0f, 4.0f, 3.0f, 2.0f, 1.0f, 12.0f, 11.0f, 10.0f, 9.0f
Narumol Prangnawarat265e53e2020-10-30 16:06:55 +0000382 };
383
Narumol Prangnawarate5f0b242021-05-07 17:52:36 +0100384 std::vector<float> outputData(4);
Narumol Prangnawarat265e53e2020-10-30 16:06:55 +0000385
Narumol Prangnawarate5f0b242021-05-07 17:52:36 +0100386 std::vector<float> expectedOutput{ 11.0f, 3.0f, -5.0f, 11.0f };
387
388 unsigned int numElements = info.GetNumElements();
389 size_t totalBytes = numElements * sizeof(float);
390 const size_t alignment = 64;
391 size_t space = totalBytes + alignment + alignment;
392 auto inputData0 = std::make_unique<uint8_t[]>(space);
393 void* alignedInputPtr0 = inputData0.get();
Sadik Armagan1625efc2021-06-10 18:24:34 +0100394 CHECK(std::align(alignment, totalBytes, alignedInputPtr0, space));
Narumol Prangnawarate5f0b242021-05-07 17:52:36 +0100395
396 auto* intputPtr0 = reinterpret_cast<float*>(alignedInputPtr0);
397 std::copy(inputValue0.begin(), inputValue0.end(), intputPtr0);
398
399 auto inputData1 = std::make_unique<uint8_t[]>(space);
400 void* alignedInputPtr1 = inputData1.get();
Sadik Armagan1625efc2021-06-10 18:24:34 +0100401 CHECK(std::align(alignment, totalBytes, alignedInputPtr1, space));
Narumol Prangnawarate5f0b242021-05-07 17:52:36 +0100402
403 auto* intputPtr1 = reinterpret_cast<float*>(alignedInputPtr1);
404 std::copy(inputValue1.begin(), inputValue1.end(), intputPtr1);
Narumol Prangnawarat265e53e2020-10-30 16:06:55 +0000405
406 InputTensors inputTensors
407 {
Narumol Prangnawarate5f0b242021-05-07 17:52:36 +0100408 { 0, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), alignedInputPtr0) },
409 { 1, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 1), alignedInputPtr1) },
Narumol Prangnawarat265e53e2020-10-30 16:06:55 +0000410 { 2, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 2), inputData2.data()) }
411 };
412 OutputTensors outputTensors
413 {
414 { 0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data()) }
415 };
416
417 runtime->GetProfiler(netId)->EnableProfiling(true);
418
419 // Do the inference
420 runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
421
422 // Retrieve the Profiler.Print() output to get the workload execution
423 ProfilerManager& profilerManager = armnn::ProfilerManager::GetInstance();
424 std::stringstream ss;
425 profilerManager.GetProfiler()->Print(ss);;
426 std::string dump = ss.str();
427
428 // Executed Subtraction using CpuAcc
429 std::size_t found = dump.find("NeonSubtractionWorkload_Execute");
Sadik Armagan1625efc2021-06-10 18:24:34 +0100430 CHECK(found != std::string::npos);
Narumol Prangnawarat265e53e2020-10-30 16:06:55 +0000431
432 // Correctly switch back to GpuAcc
433 found = dump.find("ClPooling2dWorkload_Execute");
Sadik Armagan1625efc2021-06-10 18:24:34 +0100434 CHECK(found != std::string::npos);
Narumol Prangnawarat265e53e2020-10-30 16:06:55 +0000435
436 // Contain CopyMemGeneric
437 found = dump.find("CopyMemGeneric");
Sadik Armagan1625efc2021-06-10 18:24:34 +0100438 CHECK(found != std::string::npos);
Narumol Prangnawarat265e53e2020-10-30 16:06:55 +0000439
440 // Check output is as expected
Sadik Armagan1625efc2021-06-10 18:24:34 +0100441 CHECK(outputData == expectedOutput);
Narumol Prangnawarate5f0b242021-05-07 17:52:36 +0100442
443 runtime->UnloadNetwork(netId);
Narumol Prangnawarat265e53e2020-10-30 16:06:55 +0000444}
445
Sadik Armagan1625efc2021-06-10 18:24:34 +0100446TEST_CASE("ClImportDisableFallbackSubgraphToNeon")
Narumol Prangnawarat265e53e2020-10-30 16:06:55 +0000447{
448 using namespace armnn;
449
450 IRuntime::CreationOptions options;
451 IRuntimePtr runtime(IRuntime::Create(options));
452
453 // Builds up the structure of the network.
454 INetworkPtr net(INetwork::Create());
455
456 Pooling2dDescriptor desc;
457
458 IConnectableLayer* input0 = net->AddInputLayer(0, "input0");
459 IConnectableLayer* input1 = net->AddInputLayer(1, "input1");
460 IConnectableLayer* input2 = net->AddInputLayer(2, "input2");
461 IConnectableLayer* add = net->AddAdditionLayer("add");
462 IConnectableLayer* sub = net->AddSubtractionLayer("sub");
463 IConnectableLayer* pooling = net->AddPooling2dLayer(desc, "pooling");
464 IConnectableLayer* output = net->AddOutputLayer(0, "output");
465
466 input0->GetOutputSlot(0).Connect(add->GetInputSlot(0));
467 input1->GetOutputSlot(0).Connect(add->GetInputSlot(1));
468 input2->GetOutputSlot(0).Connect(sub->GetInputSlot(0));
469 add->GetOutputSlot(0).Connect(sub->GetInputSlot(1));
470 sub->GetOutputSlot(0).Connect(pooling->GetInputSlot(0));
471 pooling->GetOutputSlot(0).Connect(output->GetInputSlot(0));
472
473 TensorInfo info = TensorInfo({ 1, 2, 3, 2 }, DataType::Float32);
Cathal Corbett5b8093c2021-10-22 11:12:07 +0100474 info.SetConstant(true);
Narumol Prangnawarat265e53e2020-10-30 16:06:55 +0000475 TensorInfo poolingInfo = TensorInfo({ 1, 2, 1, 1 }, DataType::Float32);
476
477 input0->GetOutputSlot(0).SetTensorInfo(info);
478 input1->GetOutputSlot(0).SetTensorInfo(info);
479 input2->GetOutputSlot(0).SetTensorInfo(info);
480 add->GetOutputSlot(0).SetTensorInfo(info);
481 sub->GetOutputSlot(0).SetTensorInfo(info);
482 pooling->GetOutputSlot(0).SetTensorInfo(poolingInfo);
483
484 std::vector<BackendId> backends = { Compute::GpuAcc, Compute::CpuAcc };
485 // Use BackendSelectionHint to specify CpuAcc for Subtraction layer
486 sub->BackendSelectionHint(backends[1]);
487
488 // optimize the network
489 OptimizerOptions optOptions;
490 IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec(), optOptions);
491
Francis Murtagh3d2b4b22021-02-15 18:23:17 +0000492 Graph& graph = GetGraphForTesting(optNet.get());
Narumol Prangnawarat265e53e2020-10-30 16:06:55 +0000493
494 armnn::Layer* const layer0 = GetFirstLayerWithName(graph, "input0");
495 armnn::Layer* const layer1 = GetFirstLayerWithName(graph, "input1");
496 armnn::Layer* const layer2 = GetFirstLayerWithName(graph, "input2");
497 armnn::Layer* const layer3 = GetFirstLayerWithName(graph, "add");
498 armnn::Layer* const layer4 = GetFirstLayerWithName(graph, "[ add (0) -> sub (1) ]");
499 armnn::Layer* const layer5 = GetFirstLayerWithName(graph, "sub");
500 armnn::Layer* const layer6 = GetFirstLayerWithName(graph, "[ sub (0) -> pooling (0) ]");
501 armnn::Layer* const layer7 = GetFirstLayerWithName(graph, "pooling");
502 armnn::Layer* const layer8 = GetFirstLayerWithName(graph, "output");
503
504 // Checks order is valid.
Sadik Armagan1625efc2021-06-10 18:24:34 +0100505 CHECK(CheckOrder(graph, layer0, layer1));
506 CHECK(CheckOrder(graph, layer1, layer2));
507 CHECK(CheckOrder(graph, layer2, layer3));
508 CHECK(CheckOrder(graph, layer3, layer4));
509 CHECK(CheckOrder(graph, layer4, layer5));
510 CHECK(CheckOrder(graph, layer5, layer6));
511 CHECK(CheckOrder(graph, layer6, layer7));
512 CHECK(CheckOrder(graph, layer7, layer8));
Narumol Prangnawarat265e53e2020-10-30 16:06:55 +0000513
514 // Use memory import between backends
Sadik Armagan1625efc2021-06-10 18:24:34 +0100515 CHECK((layer4->GetType() == LayerType::MemCopy));
516 CHECK((layer6->GetType() == LayerType::MemCopy));
Narumol Prangnawarat265e53e2020-10-30 16:06:55 +0000517
518 // Correctly use backend hint
Sadik Armagan1625efc2021-06-10 18:24:34 +0100519 CHECK((layer5->GetBackendId() == Compute::CpuAcc ));
Narumol Prangnawarat265e53e2020-10-30 16:06:55 +0000520
521 // Load it into the runtime. It should pass.
522 NetworkId netId;
523 runtime->LoadNetwork(netId, std::move(optNet));
524
525 // Creates structures for input & output
526 std::vector<float> inputData0
527 {
528 1.0f, 1.0f, 2.0f, 2.0f, 2.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f, 6.0f
529 };
530 std::vector<float> inputData1
531 {
532 0.0f, 1.0f, 1.0f, 2.0f, 3.0f, 3.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f
533 };
534 std::vector<float> inputData2
535 {
536 12.0f, 11.0f, 10.0f, 9.0f, 8.0f, 7.0f, 6.0f, 5.0f, 4.0f, 3.0f, 2.0f, 1.0f
537 };
538
539 std::vector<float> outputData(2);
540
541 std::vector<float> expectedOutput{ 11.0f, -1.0f };
542
543 InputTensors inputTensors
544 {
545 { 0, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData0.data()) },
546 { 1, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 1), inputData1.data()) },
547 { 2, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 2), inputData2.data()) }
548 };
549 OutputTensors outputTensors
550 {
551 { 0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data()) }
552 };
553
554 runtime->GetProfiler(netId)->EnableProfiling(true);
555
556 // Do the inference
557 runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
558
559 // Retrieve the Profiler.Print() output to get the workload execution
560 ProfilerManager& profilerManager = armnn::ProfilerManager::GetInstance();
561 std::stringstream ss;
562 profilerManager.GetProfiler()->Print(ss);;
563 std::string dump = ss.str();
564
565 // Executed Subtraction using CpuAcc
566 std::size_t found = dump.find("NeonSubtractionWorkload_Execute");
Sadik Armagan1625efc2021-06-10 18:24:34 +0100567 CHECK(found != std::string::npos);
Narumol Prangnawarat265e53e2020-10-30 16:06:55 +0000568
569 // Correctly switch back to GpuAcc
570 found = dump.find("ClPooling2dWorkload_Execute");
Sadik Armagan1625efc2021-06-10 18:24:34 +0100571 CHECK(found != std::string::npos);
Narumol Prangnawarat265e53e2020-10-30 16:06:55 +0000572
573 // Contain CopyMemGeneric
574 found = dump.find("CopyMemGeneric");
Sadik Armagan1625efc2021-06-10 18:24:34 +0100575 CHECK(found != std::string::npos);
Narumol Prangnawarat265e53e2020-10-30 16:06:55 +0000576
577 // Check output is as expected
Sadik Armagan1625efc2021-06-10 18:24:34 +0100578 CHECK(outputData == expectedOutput);
Narumol Prangnawarat265e53e2020-10-30 16:06:55 +0000579}
580
Sadik Armagan1625efc2021-06-10 18:24:34 +0100581}