blob: 383a5f654ca21690e45c86e4eba914f1af6dbc8b [file] [log] [blame]
Narumol Prangnawaratb8d771a2020-08-14 11:51:12 +01001//
2// Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
3// SPDX-License-Identifier: MIT
4//
5
6#include <backendsCommon/test/CommonTestUtils.hpp>
7#include <backendsCommon/test/mockBackend/MockImportBackend.hpp>
8
9#include <test/GraphUtils.hpp>
10
11#include <boost/test/unit_test.hpp>
12
13BOOST_AUTO_TEST_SUITE(NeonFallback)
14
Narumol Prangnawaratb8d771a2020-08-14 11:51:12 +010015BOOST_AUTO_TEST_CASE(FallbackImportToCpuAcc)
16{
17 using namespace armnn;
18
Narumol Prangnawarate5f0b242021-05-07 17:52:36 +010019 // Create a mock backend objectN
Narumol Prangnawaratb8d771a2020-08-14 11:51:12 +010020 MockImportBackendInitialiser initialiser; // Register the Mock Backend
21 auto backendObjPtr = CreateBackendObject(MockImportBackendId());
22 BOOST_TEST((backendObjPtr != nullptr));
23
24 BackendIdSet backendIds = BackendRegistryInstance().GetBackendIds();
25 if (backendIds.find("MockRef") == backendIds.end())
26 {
27 std::string message = "Cannot load MockRef";
28 BOOST_FAIL(message);
29 }
30
31 // Create runtime in which test will run and allow fallback to CpuRef.
32 IRuntime::CreationOptions options;
33 IRuntimePtr runtime(IRuntime::Create(options));
34
35 // Builds up the structure of the network.
36 INetworkPtr net(INetwork::Create());
37
38 IConnectableLayer* input0 = net->AddInputLayer(0, "input0");
39 IConnectableLayer* input1 = net->AddInputLayer(1, "input1");
40 IConnectableLayer* input2 = net->AddInputLayer(2, "input2");
41 IConnectableLayer* add = net->AddAdditionLayer("add");
42 IConnectableLayer* sub = net->AddSubtractionLayer("sub");
43 IConnectableLayer* output = net->AddOutputLayer(0, "output");
44
45 input0->GetOutputSlot(0).Connect(add->GetInputSlot(0));
46 input1->GetOutputSlot(0).Connect(add->GetInputSlot(1));
47 input2->GetOutputSlot(0).Connect(sub->GetInputSlot(0));
48 add->GetOutputSlot(0).Connect(sub->GetInputSlot(1));
49 sub->GetOutputSlot(0).Connect(output->GetInputSlot(0));
50
51 TensorInfo info = TensorInfo({ 1, 2, 3, 2 }, DataType::Float32);
52
53 input0->GetOutputSlot(0).SetTensorInfo(info);
54 input1->GetOutputSlot(0).SetTensorInfo(info);
55 input2->GetOutputSlot(0).SetTensorInfo(info);
56 add->GetOutputSlot(0).SetTensorInfo(info);
57 sub->GetOutputSlot(0).SetTensorInfo(info);
58
59 // optimize the network
60 std::vector<BackendId> backends = { "MockRef", Compute::CpuAcc };
Narumol Prangnawarata2493a02020-08-19 14:39:07 +010061 OptimizerOptions optOptions;
62 optOptions.m_ImportEnabled = true;
63 IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec(), optOptions);
Narumol Prangnawaratb8d771a2020-08-14 11:51:12 +010064
Francis Murtagh3d2b4b22021-02-15 18:23:17 +000065 Graph& graph = GetGraphForTesting(optNet.get());
Narumol Prangnawaratb8d771a2020-08-14 11:51:12 +010066
67 armnn::Layer* const layer0 = GetFirstLayerWithName(graph, "input0");
68 armnn::Layer* const layer1 = GetFirstLayerWithName(graph, "input1");
69 armnn::Layer* const layer2 = GetFirstLayerWithName(graph, "input2");
70 armnn::Layer* const layer3 = GetFirstLayerWithName(graph, "add");
71 armnn::Layer* const layer4 = GetFirstLayerWithName(graph, "[ add (0) -> sub (1) ]");
72 armnn::Layer* const layer5 = GetFirstLayerWithName(graph, "sub");
73 armnn::Layer* const layer6 = GetFirstLayerWithName(graph, "output");
74
75 // Checks order is valid.
76 BOOST_TEST(CheckOrder(graph, layer0, layer1));
77 BOOST_TEST(CheckOrder(graph, layer1, layer2));
78 BOOST_TEST(CheckOrder(graph, layer2, layer3));
79 BOOST_TEST(CheckOrder(graph, layer3, layer4));
80 BOOST_TEST(CheckOrder(graph, layer4, layer5));
81 BOOST_TEST(CheckOrder(graph, layer5, layer6));
82
83 // Load it into the runtime. It should pass.
84 NetworkId netId;
85 std::string ignoredErrorMessage;
Francis Murtagh73d3e2e2021-04-29 14:23:04 +010086 INetworkProperties networkProperties(false, MemorySource::Malloc, MemorySource::Malloc);
Narumol Prangnawaratb8d771a2020-08-14 11:51:12 +010087 runtime->LoadNetwork(netId, std::move(optNet), ignoredErrorMessage, networkProperties);
88
89 // Creates structures for input & output
90 std::vector<float> inputData0
91 {
92 1.0f, 1.0f, 2.0f, 2.0f, 2.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f, 6.0f
93 };
94 std::vector<float> inputData1
95 {
96 0.0f, 1.0f, 1.0f, 2.0f, 3.0f, 3.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f
97 };
98 std::vector<float> inputData2
99 {
100 12.0f, 11.0f, 10.0f, 9.0f, 8.0f, 7.0f, 6.0f, 5.0f, 4.0f, 3.0f, 2.0f, 1.0f
101 };
102
103 std::vector<float> outputData(12);
104
105 std::vector<float> expectedOutput
106 {
107 11.0f, 9.0f, 7.0f, 5.0f, 3.0f, 1.0f, -1.0f, -3.0f, -5.0f, -7.0f, -9.0f, -11.0f
108 };
109
110 InputTensors inputTensors
111 {
112 { 0, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData0.data()) },
113 { 1, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 1), inputData1.data()) },
114 { 2, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 2), inputData2.data()) }
115 };
116 OutputTensors outputTensors
117 {
118 { 0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data()) }
119 };
120
121 runtime->GetProfiler(netId)->EnableProfiling(true);
122
123 // Do the inference
124 runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
125
126 // Retrieve the Profiler.Print() output to get the workload execution
127 ProfilerManager& profilerManager = armnn::ProfilerManager::GetInstance();
128 std::stringstream ss;
129 profilerManager.GetProfiler()->Print(ss);;
130 std::string dump = ss.str();
131
132 // Contains ImportMemGeneric
133 std::size_t found = dump.find("ImportMemGeneric");
134 BOOST_TEST(found != std::string::npos);
135
136 // Contains SyncMemGeneric
137 found = dump.find("SyncMemGeneric");
138 BOOST_TEST(found != std::string::npos);
139
140 // Does not contain CopyMemGeneric
141 found = dump.find("CopyMemGeneric");
142 BOOST_TEST(found == std::string::npos);
143
144 // Use memory import between backends
145 BOOST_TEST((layer4->GetType() == LayerType::MemImport));
146
147 // Check output is as expected
148 BOOST_TEST(outputData == expectedOutput);
149}
150
151BOOST_AUTO_TEST_CASE(FallbackPaddingCopyToCpuAcc)
152{
153 using namespace armnn;
154
155 // Create a mock backend object
156 MockImportBackendInitialiser initialiser; // Register the Mock Backend
157 auto backendObjPtr = CreateBackendObject(MockImportBackendId());
158 BOOST_TEST((backendObjPtr != nullptr));
159
160 BackendIdSet backendIds = BackendRegistryInstance().GetBackendIds();
161 if (backendIds.find("MockRef") == backendIds.end())
162 {
163 std::string message = "Cannot load MockRef";
164 BOOST_FAIL(message);
165 }
166
167 // Create runtime in which test will run and allow fallback to CpuRef.
168 IRuntime::CreationOptions options;
169 IRuntimePtr runtime(IRuntime::Create(options));
170
171 // Builds up the structure of the network.
172 INetworkPtr net(INetwork::Create());
173
174 Pooling2dDescriptor desc;
175
176 IConnectableLayer* input0 = net->AddInputLayer(0, "input0");
177 IConnectableLayer* input1 = net->AddInputLayer(1, "input1");
178 IConnectableLayer* add = net->AddAdditionLayer("add");
179 IConnectableLayer* pooling = net->AddPooling2dLayer(desc, "pooling");
180 IConnectableLayer* output = net->AddOutputLayer(0, "output");
181
182 input0->GetOutputSlot(0).Connect(add->GetInputSlot(0));
183 input1->GetOutputSlot(0).Connect(add->GetInputSlot(1));
184 add->GetOutputSlot(0).Connect(pooling->GetInputSlot(0));
185 pooling->GetOutputSlot(0).Connect(output->GetInputSlot(0));
186
187 TensorInfo info = TensorInfo({ 1, 2, 3, 2 }, DataType::Float32);
188 TensorInfo poolingInfo = TensorInfo({ 1, 2, 1, 1 }, DataType::Float32);
189
190 input0->GetOutputSlot(0).SetTensorInfo(info);
191 input1->GetOutputSlot(0).SetTensorInfo(info);
192 add->GetOutputSlot(0).SetTensorInfo(info);
193 pooling->GetOutputSlot(0).SetTensorInfo(poolingInfo);
194
195 // optimize the network
196 std::vector<BackendId> backends = { "MockRef", Compute::CpuAcc };
Narumol Prangnawarata2493a02020-08-19 14:39:07 +0100197 OptimizerOptions optOptions;
198 optOptions.m_ImportEnabled = true;
199 IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec(), optOptions);
Narumol Prangnawaratb8d771a2020-08-14 11:51:12 +0100200
Francis Murtagh3d2b4b22021-02-15 18:23:17 +0000201 Graph& graph = GetGraphForTesting(optNet.get());
Narumol Prangnawaratb8d771a2020-08-14 11:51:12 +0100202
203 armnn::Layer* const layer0 = GetFirstLayerWithName(graph, "input0");
204 armnn::Layer* const layer1 = GetFirstLayerWithName(graph, "input1");
205 armnn::Layer* const layer2 = GetFirstLayerWithName(graph, "add");
206 armnn::Layer* const layer3 = GetFirstLayerWithName(graph, "[ add (0) -> pooling (0) ]");
207 armnn::Layer* const layer4 = GetFirstLayerWithName(graph, "pooling");
208 armnn::Layer* const layer5 = GetFirstLayerWithName(graph, "output");
209
210 // Checks order is valid.
211 BOOST_TEST(CheckOrder(graph, layer0, layer1));
212 BOOST_TEST(CheckOrder(graph, layer1, layer2));
213 BOOST_TEST(CheckOrder(graph, layer2, layer3));
214 BOOST_TEST(CheckOrder(graph, layer3, layer4));
215 BOOST_TEST(CheckOrder(graph, layer4, layer5));
216
217 // Load it into the runtime. It should pass.
218 NetworkId netId;
219 std::string ignoredErrorMessage;
Francis Murtagh73d3e2e2021-04-29 14:23:04 +0100220 INetworkProperties networkProperties(false, MemorySource::Malloc, MemorySource::Malloc);
Narumol Prangnawaratb8d771a2020-08-14 11:51:12 +0100221
222 runtime->LoadNetwork(netId, std::move(optNet), ignoredErrorMessage, networkProperties);
223
224 // Creates structures for input & output
225 std::vector<float> inputData0
226 {
227 1.0f, 1.0f, 2.0f, 2.0f, 2.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f, 6.0f
228 };
229 std::vector<float> inputData1
230 {
231 0.0f, 1.0f, 1.0f, 2.0f, 3.0f, 3.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f
232 };
233
234 std::vector<float> outputData(2);
235
236 std::vector<float> expectedOutput
237 {
238 6.0f, 12.0f
239 };
240
241 InputTensors inputTensors
242 {
243 { 0, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData0.data()) },
244 { 1, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 1), inputData1.data()) }
245 };
246 OutputTensors outputTensors
247 {
248 { 0, armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data()) }
249 };
250
251 runtime->GetProfiler(netId)->EnableProfiling(true);
252
253 // Do the inference
254 runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
255
256 // Retrieve the Profiler.Print() output to get the workload execution
257 ProfilerManager& profilerManager = armnn::ProfilerManager::GetInstance();
258 std::stringstream ss;
259 profilerManager.GetProfiler()->Print(ss);;
260 std::string dump = ss.str();
261
262 // Contains CopyMemGeneric between the backends
263 std::size_t found = dump.find("CopyMemGeneric");
264 BOOST_TEST(found != std::string::npos);
265
266 // Contains SyncMemGeneric for the output
267 found = dump.find("SyncMemGeneric");
268 BOOST_TEST(found != std::string::npos);
269
270 // Does not contain ImportMemGeneric
271 found = dump.find("ImportMemGeneric");
272 BOOST_TEST(found == std::string::npos);
273
274 // Use memory import between backends
275 BOOST_TEST((layer3->GetType() == LayerType::MemCopy));
276
277 // Check output is as expected
278 BOOST_TEST(outputData == expectedOutput);
279}
280
281BOOST_AUTO_TEST_CASE(FallbackImportFromCpuAcc)
282{
283 using namespace armnn;
284
285 // Create a mock backend object
286 MockImportBackendInitialiser initialiser; // Register the Mock Backend
287 auto backendObjPtr = CreateBackendObject(MockImportBackendId());
288 BOOST_TEST((backendObjPtr != nullptr));
289
290 BackendIdSet backendIds = BackendRegistryInstance().GetBackendIds();
291 if (backendIds.find("MockRef") == backendIds.end())
292 {
293 std::string message = "Cannot load MockRef";
294 BOOST_FAIL(message);
295 }
296
297 // Create runtime in which test will run and allow fallback to CpuRef.
298 IRuntime::CreationOptions options;
299 IRuntimePtr runtime(IRuntime::Create(options));
300
301 // Builds up the structure of the network.
302 INetworkPtr net(INetwork::Create());
303
304 IConnectableLayer* input0 = net->AddInputLayer(0, "input0");
305 IConnectableLayer* input1 = net->AddInputLayer(1, "input1");
306 IConnectableLayer* input2 = net->AddInputLayer(2, "input2");
307 IConnectableLayer* sub = net->AddSubtractionLayer("sub");
308 IConnectableLayer* add = net->AddAdditionLayer("add");
309 IConnectableLayer* output = net->AddOutputLayer(0, "output");
310
311 input0->GetOutputSlot(0).Connect(sub->GetInputSlot(0));
312 input1->GetOutputSlot(0).Connect(sub->GetInputSlot(1));
313 input2->GetOutputSlot(0).Connect(add->GetInputSlot(0));
314 sub->GetOutputSlot(0).Connect(add->GetInputSlot(1));
315 add->GetOutputSlot(0).Connect(output->GetInputSlot(0));
316
317 TensorInfo info = TensorInfo({ 1, 2, 3, 2 }, DataType::Float32);
318
319 input0->GetOutputSlot(0).SetTensorInfo(info);
320 input1->GetOutputSlot(0).SetTensorInfo(info);
321 input2->GetOutputSlot(0).SetTensorInfo(info);
322 sub->GetOutputSlot(0).SetTensorInfo(info);
323 add->GetOutputSlot(0).SetTensorInfo(info);
324
325 // optimize the network
326 std::vector<BackendId> backends = { "MockRef", Compute::CpuAcc };
Narumol Prangnawarata2493a02020-08-19 14:39:07 +0100327 OptimizerOptions optOptions;
328 optOptions.m_ImportEnabled = true;
329 IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec(), optOptions);
Narumol Prangnawaratb8d771a2020-08-14 11:51:12 +0100330
Francis Murtagh3d2b4b22021-02-15 18:23:17 +0000331 Graph& graph = GetGraphForTesting(optNet.get());
Narumol Prangnawaratb8d771a2020-08-14 11:51:12 +0100332
333 armnn::Layer* const layer0 = GetFirstLayerWithName(graph, "input0");
334 armnn::Layer* const layer1 = GetFirstLayerWithName(graph, "input1");
335 armnn::Layer* const layer2 = GetFirstLayerWithName(graph, "input2");
336 armnn::Layer* const layer3 = GetFirstLayerWithName(graph, "sub");
337 armnn::Layer* const layer4 = GetFirstLayerWithName(graph, "[ sub (0) -> add (1) ]");
338 armnn::Layer* const layer5 = GetFirstLayerWithName(graph, "add");
339 armnn::Layer* const layer6 = GetFirstLayerWithName(graph, "output");
340
341 // Checks order is valid.
342 BOOST_TEST(CheckOrder(graph, layer0, layer1));
343 BOOST_TEST(CheckOrder(graph, layer1, layer2));
344 BOOST_TEST(CheckOrder(graph, layer2, layer3));
345 BOOST_TEST(CheckOrder(graph, layer3, layer4));
346 BOOST_TEST(CheckOrder(graph, layer4, layer5));
347 BOOST_TEST(CheckOrder(graph, layer5, layer6));
348
349 // Load it into the runtime. It should pass.
350 NetworkId netId;
351 std::string ignoredErrorMessage;
Narumol Prangnawaratb8d771a2020-08-14 11:51:12 +0100352
Francis Murtagh73d3e2e2021-04-29 14:23:04 +0100353 INetworkProperties networkProperties(false, MemorySource::Malloc, MemorySource::Malloc);
Narumol Prangnawaratb8d771a2020-08-14 11:51:12 +0100354 runtime->LoadNetwork(netId, std::move(optNet), ignoredErrorMessage, networkProperties);
355
356 // Creates structures for input & output
357 std::vector<float> inputData0
358 {
359 1.0f, 1.0f, 2.0f, 2.0f, 2.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f, 0.0f
360 };
361 std::vector<float> inputData1
362 {
363 0.0f, 1.0f, 1.0f, 2.0f, 3.0f, 3.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f
364 };
365 std::vector<float> inputData2
366 {
367 12.0f, 11.0f, 10.0f, 9.0f, 8.0f, 7.0f, 6.0f, 5.0f, 4.0f, 3.0f, 2.0f, 1.0f
368 };
369
370 std::vector<float> outputData(12);
371
372 std::vector<float> expectedOutput
373 {
374 13.0f, 11.0f, 11.0f, 9.0f, 7.0f, 7.0f, 7.0f, 5.0f, 5.0f, 3.0f, 3.0f, -5.0f
375 };
376
377 InputTensors inputTensors
378 {
379 { 0, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData0.data()) },
380 { 1, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 1), inputData1.data()) },
381 { 2, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 2), inputData2.data()) }
382 };
383 OutputTensors outputTensors
384 {
385 { 0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data()) }
386 };
387
388 runtime->GetProfiler(netId)->EnableProfiling(true);
389
390 // Do the inference
391 runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
392
393 // Retrieve the Profiler.Print() output to get the workload execution
394 ProfilerManager& profilerManager = armnn::ProfilerManager::GetInstance();
395 std::stringstream ss;
396 profilerManager.GetProfiler()->Print(ss);;
397 std::string dump = ss.str();
398
399 // Contains ImportMemGeneric
400 std::size_t found = dump.find("ImportMemGeneric");
401 BOOST_TEST(found != std::string::npos);
402
403 // Contains SyncMemGeneric
404 found = dump.find("SyncMemGeneric");
405 BOOST_TEST(found != std::string::npos);
406
407 // Does not contain CopyMemGeneric
408 found = dump.find("CopyMemGeneric");
409 BOOST_TEST(found == std::string::npos);
410
411 // Use memory import between backends
412 BOOST_TEST((layer4->GetType() == LayerType::MemImport));
413
414 // Check output is as expected
415 BOOST_TEST(outputData == expectedOutput);
416}
417
418BOOST_AUTO_TEST_CASE(FallbackPaddingCopyFromCpuAcc)
419{
420 using namespace armnn;
421
422 // Create a mock backend object
423 MockImportBackendInitialiser initialiser; // Register the Mock Backend
424 auto backendObjPtr = CreateBackendObject(MockImportBackendId());
425 BOOST_TEST((backendObjPtr != nullptr));
426
427 BackendIdSet backendIds = BackendRegistryInstance().GetBackendIds();
428 if (backendIds.find("MockRef") == backendIds.end())
429 {
430 std::string message = "Cannot load MockRef";
431 BOOST_FAIL(message);
432 }
433
434 // Create runtime in which test will run and allow fallback to CpuRef.
435 IRuntime::CreationOptions options;
436 IRuntimePtr runtime(IRuntime::Create(options));
437
438 // Builds up the structure of the network.
439 INetworkPtr net(INetwork::Create());
440
441 Pooling2dDescriptor desc;
442
443 IConnectableLayer* input0 = net->AddInputLayer(0, "input0");
444 IConnectableLayer* input1 = net->AddInputLayer(1, "input1");
445 IConnectableLayer* pooling = net->AddPooling2dLayer(desc, "pooling");
446 IConnectableLayer* add = net->AddAdditionLayer("add");
447 IConnectableLayer* output = net->AddOutputLayer(0, "output");
448
449 input0->GetOutputSlot(0).Connect(pooling->GetInputSlot(0));
450 input1->GetOutputSlot(0).Connect(add->GetInputSlot(1));
451 pooling->GetOutputSlot(0).Connect(add->GetInputSlot(0));
452 add->GetOutputSlot(0).Connect(output->GetInputSlot(0));
453
454 TensorInfo inputInfo = TensorInfo({ 1, 2, 3, 2 }, DataType::Float32);
455 TensorInfo poolingInfo = TensorInfo({ 1, 2, 1, 1 }, DataType::Float32);
456
457 input0->GetOutputSlot(0).SetTensorInfo(inputInfo);
458 input1->GetOutputSlot(0).SetTensorInfo(poolingInfo);
459 pooling->GetOutputSlot(0).SetTensorInfo(poolingInfo);
460 add->GetOutputSlot(0).SetTensorInfo(poolingInfo);
461
462 // optimize the network
463 std::vector<BackendId> backends = { "MockRef", Compute::CpuAcc };
Narumol Prangnawarata2493a02020-08-19 14:39:07 +0100464 OptimizerOptions optOptions;
465 optOptions.m_ImportEnabled = true;
466 IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec(), optOptions);
Narumol Prangnawaratb8d771a2020-08-14 11:51:12 +0100467
Francis Murtagh3d2b4b22021-02-15 18:23:17 +0000468 Graph& graph = GetGraphForTesting(optNet.get());
Narumol Prangnawaratb8d771a2020-08-14 11:51:12 +0100469
470 armnn::Layer* const layer0 = GetFirstLayerWithName(graph, "input0");
471 armnn::Layer* const layer1 = GetFirstLayerWithName(graph, "input1");
472 armnn::Layer* const layer2 = GetFirstLayerWithName(graph, "pooling");
473 armnn::Layer* const layer3 = GetFirstLayerWithName(graph, "[ pooling (0) -> add (0) ]");
474 armnn::Layer* const layer4 = GetFirstLayerWithName(graph, "add");
475 armnn::Layer* const layer5 = GetFirstLayerWithName(graph, "output");
476
477 // Checks order is valid.
478 BOOST_TEST(CheckOrder(graph, layer0, layer1));
479 BOOST_TEST(CheckOrder(graph, layer1, layer2));
480 BOOST_TEST(CheckOrder(graph, layer2, layer3));
481 BOOST_TEST(CheckOrder(graph, layer3, layer4));
482 BOOST_TEST(CheckOrder(graph, layer4, layer5));
483
484 // Load it into the runtime. It should pass.
485 NetworkId netId;
486 std::string ignoredErrorMessage;
Francis Murtagh73d3e2e2021-04-29 14:23:04 +0100487 INetworkProperties networkProperties(false, MemorySource::Malloc, MemorySource::Malloc);
Narumol Prangnawaratb8d771a2020-08-14 11:51:12 +0100488
489 runtime->LoadNetwork(netId, std::move(optNet), ignoredErrorMessage, networkProperties);
490
491 // Creates structures for input & output
492 std::vector<float> inputData0
493 {
494 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, 10.0f, 11.0f, 12.0f
495 };
496 std::vector<float> inputData1
497 {
498 -1.0f, 3.0f
499 };
500
501 std::vector<float> outputData(2);
502
503 std::vector<float> expectedOutput
504 {
505 5.0f, 15.0f
506 };
507
508 InputTensors inputTensors
509 {
510 { 0, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData0.data()) },
511 { 1, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 1), inputData1.data()) }
512 };
513 OutputTensors outputTensors
514 {
515 { 0, armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data()) }
516 };
517
518 runtime->GetProfiler(netId)->EnableProfiling(true);
519
520 // Do the inference
521 runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
522
523 // Retrieve the Profiler.Print() output to get the workload execution
524 ProfilerManager& profilerManager = armnn::ProfilerManager::GetInstance();
525 std::stringstream ss;
526 profilerManager.GetProfiler()->Print(ss);;
527 std::string dump = ss.str();
528
529 // Contains CopyMemGeneric between the backends
530 std::size_t found = dump.find("CopyMemGeneric");
531 BOOST_TEST(found != std::string::npos);
532
533 // Contains SyncMemGeneric for the output
534 found = dump.find("SyncMemGeneric");
535 BOOST_TEST(found != std::string::npos);
536
537 // Does not contain ImportMemGeneric
538 found = dump.find("ImportMemGeneric");
539 BOOST_TEST(found == std::string::npos);
540
541 // Use memory import between backends
542 BOOST_TEST((layer3->GetType() == LayerType::MemCopy));
543
544 // Check output is as expected
545 BOOST_TEST(outputData == expectedOutput);
546}
547
Narumol Prangnawarata2493a02020-08-19 14:39:07 +0100548BOOST_AUTO_TEST_CASE(FallbackDisableImportFromCpuAcc)
549{
550 using namespace armnn;
551
552 // Create a mock backend object
553 MockImportBackendInitialiser initialiser; // Register the Mock Backend
554 auto backendObjPtr = CreateBackendObject(MockImportBackendId());
555 BOOST_TEST((backendObjPtr != nullptr));
556
557 BackendIdSet backendIds = BackendRegistryInstance().GetBackendIds();
558 if (backendIds.find("MockRef") == backendIds.end())
559 {
560 std::string message = "Cannot load MockRef";
561 BOOST_FAIL(message);
562 }
563
564 // Create runtime in which test will run and allow fallback to CpuRef.
565 IRuntime::CreationOptions options;
566 IRuntimePtr runtime(IRuntime::Create(options));
567
568 // Builds up the structure of the network.
569 INetworkPtr net(INetwork::Create());
570
571 IConnectableLayer* input0 = net->AddInputLayer(0, "input0");
572 IConnectableLayer* input1 = net->AddInputLayer(1, "input1");
573 IConnectableLayer* input2 = net->AddInputLayer(2, "input2");
574 IConnectableLayer* sub = net->AddSubtractionLayer("sub");
575 IConnectableLayer* add = net->AddAdditionLayer("add");
576 IConnectableLayer* output = net->AddOutputLayer(0, "output");
577
578 input0->GetOutputSlot(0).Connect(sub->GetInputSlot(0));
579 input1->GetOutputSlot(0).Connect(sub->GetInputSlot(1));
580 input2->GetOutputSlot(0).Connect(add->GetInputSlot(0));
581 sub->GetOutputSlot(0).Connect(add->GetInputSlot(1));
582 add->GetOutputSlot(0).Connect(output->GetInputSlot(0));
583
584 TensorInfo info = TensorInfo({ 1, 2, 3, 2 }, DataType::Float32);
585
586 input0->GetOutputSlot(0).SetTensorInfo(info);
587 input1->GetOutputSlot(0).SetTensorInfo(info);
588 input2->GetOutputSlot(0).SetTensorInfo(info);
589 sub->GetOutputSlot(0).SetTensorInfo(info);
590 add->GetOutputSlot(0).SetTensorInfo(info);
591
592 // optimize the network
593 std::vector<BackendId> backends = { "MockRef", Compute::CpuAcc };
594 IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec());
595
Francis Murtagh3d2b4b22021-02-15 18:23:17 +0000596 Graph& graph = GetGraphForTesting(optNet.get());
Narumol Prangnawarata2493a02020-08-19 14:39:07 +0100597
598 armnn::Layer* const layer0 = GetFirstLayerWithName(graph, "input0");
599 armnn::Layer* const layer1 = GetFirstLayerWithName(graph, "input1");
600 armnn::Layer* const layer2 = GetFirstLayerWithName(graph, "input2");
601 armnn::Layer* const layer3 = GetFirstLayerWithName(graph, "sub");
602 armnn::Layer* const layer4 = GetFirstLayerWithName(graph, "[ sub (0) -> add (1) ]");
603 armnn::Layer* const layer5 = GetFirstLayerWithName(graph, "add");
604 armnn::Layer* const layer6 = GetFirstLayerWithName(graph, "output");
605
606 // Checks order is valid.
607 BOOST_TEST(CheckOrder(graph, layer0, layer1));
608 BOOST_TEST(CheckOrder(graph, layer1, layer2));
609 BOOST_TEST(CheckOrder(graph, layer2, layer3));
610 BOOST_TEST(CheckOrder(graph, layer3, layer4));
611 BOOST_TEST(CheckOrder(graph, layer4, layer5));
612 BOOST_TEST(CheckOrder(graph, layer5, layer6));
613
614 // Load it into the runtime. It should pass.
615 NetworkId netId;
616 std::string ignoredErrorMessage;
Francis Murtagh73d3e2e2021-04-29 14:23:04 +0100617 INetworkProperties networkProperties(false, MemorySource::Undefined, MemorySource::Undefined);
Narumol Prangnawarata2493a02020-08-19 14:39:07 +0100618
619 runtime->LoadNetwork(netId, std::move(optNet), ignoredErrorMessage, networkProperties);
620
621 // Creates structures for input & output
622 std::vector<float> inputData0
623 {
624 1.0f, 1.0f, 2.0f, 2.0f, 2.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f, 0.0f
625 };
626 std::vector<float> inputData1
627 {
628 0.0f, 1.0f, 1.0f, 2.0f, 3.0f, 3.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f
629 };
630 std::vector<float> inputData2
631 {
632 12.0f, 11.0f, 10.0f, 9.0f, 8.0f, 7.0f, 6.0f, 5.0f, 4.0f, 3.0f, 2.0f, 1.0f
633 };
634
635 std::vector<float> outputData(12);
636
637 std::vector<float> expectedOutput
638 {
639 13.0f, 11.0f, 11.0f, 9.0f, 7.0f, 7.0f, 7.0f, 5.0f, 5.0f, 3.0f, 3.0f, -5.0f
640 };
641
642 InputTensors inputTensors
643 {
644 { 0, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData0.data()) },
645 { 1, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 1), inputData1.data()) },
646 { 2, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 2), inputData2.data()) }
647 };
648 OutputTensors outputTensors
649 {
650 { 0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data()) }
651 };
652
653 runtime->GetProfiler(netId)->EnableProfiling(true);
654
655 // Do the inference
656 runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
657
658 // Retrieve the Profiler.Print() output to get the workload execution
659 ProfilerManager& profilerManager = armnn::ProfilerManager::GetInstance();
660 std::stringstream ss;
661 profilerManager.GetProfiler()->Print(ss);;
662 std::string dump = ss.str();
663
664 // Contains CopyMemGeneric between the backends
665 std::size_t found = dump.find("CopyMemGeneric");
666 BOOST_TEST(found != std::string::npos);
667
668 // Does not contain ImportMemGeneric
669 found = dump.find("ImportMemGeneric");
670 BOOST_TEST(found == std::string::npos);
671
672 // Use memory import between backends
673 BOOST_TEST((layer4->GetType() == LayerType::MemCopy));
674
675 // Check output is as expected
676 BOOST_TEST(outputData == expectedOutput);
677}
678
Narumol Prangnawarat265e53e2020-10-30 16:06:55 +0000679#if defined(ARMCOMPUTECL_ENABLED)
Narumol Prangnawarate5f0b242021-05-07 17:52:36 +0100680BOOST_AUTO_TEST_CASE(NeonImportEnabledFallbackToCl)
Narumol Prangnawarat265e53e2020-10-30 16:06:55 +0000681{
682 using namespace armnn;
683
684 IRuntime::CreationOptions options;
685 IRuntimePtr runtime(IRuntime::Create(options));
686
687 // Builds up the structure of the network.
688 INetworkPtr net(INetwork::Create());
689
690 IConnectableLayer* input0 = net->AddInputLayer(0, "input0");
691 IConnectableLayer* input1 = net->AddInputLayer(1, "input1");
692 IConnectableLayer* input2 = net->AddInputLayer(2, "input2");
693 IConnectableLayer* add = net->AddAdditionLayer("add");
694 IConnectableLayer* sub = net->AddSubtractionLayer("sub");
695 IConnectableLayer* output = net->AddOutputLayer(0, "output");
696
697 input0->GetOutputSlot(0).Connect(add->GetInputSlot(0));
698 input1->GetOutputSlot(0).Connect(add->GetInputSlot(1));
699 input2->GetOutputSlot(0).Connect(sub->GetInputSlot(0));
700 add->GetOutputSlot(0).Connect(sub->GetInputSlot(1));
701 sub->GetOutputSlot(0).Connect(output->GetInputSlot(0));
702
Narumol Prangnawarate5f0b242021-05-07 17:52:36 +0100703 TensorInfo info = TensorInfo({ 1, 2, 4, 2 }, DataType::Float32);
Narumol Prangnawarat265e53e2020-10-30 16:06:55 +0000704
705 input0->GetOutputSlot(0).SetTensorInfo(info);
706 input1->GetOutputSlot(0).SetTensorInfo(info);
707 input2->GetOutputSlot(0).SetTensorInfo(info);
708 add->GetOutputSlot(0).SetTensorInfo(info);
709 sub->GetOutputSlot(0).SetTensorInfo(info);
710
711 std::vector<BackendId> backends = { Compute::CpuAcc, Compute::GpuAcc };
712 // Use BackendSelectionHint to specify GpuAcc for Subtraction layer
713 sub->BackendSelectionHint(backends[1]);
714
715 // optimize the network
716 OptimizerOptions optOptions;
717 optOptions.m_ImportEnabled = true;
718 IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec(), optOptions);
719
Francis Murtagh3d2b4b22021-02-15 18:23:17 +0000720 Graph& graph = GetGraphForTesting(optNet.get());
Narumol Prangnawarat265e53e2020-10-30 16:06:55 +0000721
722 armnn::Layer* const layer0 = GetFirstLayerWithName(graph, "input0");
723 armnn::Layer* const layer1 = GetFirstLayerWithName(graph, "input1");
724 armnn::Layer* const layer2 = GetFirstLayerWithName(graph, "input2");
725 armnn::Layer* const layer3 = GetFirstLayerWithName(graph, "add");
726 armnn::Layer* const layer4 = GetFirstLayerWithName(graph, "[ add (0) -> sub (1) ]");
727 armnn::Layer* const layer5 = GetFirstLayerWithName(graph, "sub");
728 armnn::Layer* const layer6 = GetFirstLayerWithName(graph, "output");
729
730 // Checks order is valid.
731 BOOST_TEST(CheckOrder(graph, layer0, layer1));
732 BOOST_TEST(CheckOrder(graph, layer1, layer2));
733 BOOST_TEST(CheckOrder(graph, layer2, layer3));
734 BOOST_TEST(CheckOrder(graph, layer3, layer4));
735 BOOST_TEST(CheckOrder(graph, layer4, layer5));
736 BOOST_TEST(CheckOrder(graph, layer5, layer6));
737
738 // Use memory import between backends
739 BOOST_TEST((layer4->GetType() == LayerType::MemCopy));
740
741 // Correctly use backend hint
742 BOOST_TEST((layer5->GetBackendId() == Compute::GpuAcc ));
743
744 // Load it into the runtime. It should pass.
745 NetworkId netId;
746 std::string ignoredErrorMessage;
Francis Murtagh73d3e2e2021-04-29 14:23:04 +0100747
748 INetworkProperties networkProperties(false, MemorySource::Malloc, MemorySource::Malloc);
Narumol Prangnawarat265e53e2020-10-30 16:06:55 +0000749
750 runtime->LoadNetwork(netId, std::move(optNet), ignoredErrorMessage, networkProperties);
751
752 // Creates structures for input & output
753 std::vector<float> inputData0
754 {
Narumol Prangnawarate5f0b242021-05-07 17:52:36 +0100755 1.0f, 1.0f, 2.0f, 2.0f, 2.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f, 6.0f, 1.0f, 1.0f, 2.0f, 2.0f
Narumol Prangnawarat265e53e2020-10-30 16:06:55 +0000756 };
757 std::vector<float> inputData1
758 {
Narumol Prangnawarate5f0b242021-05-07 17:52:36 +0100759 0.0f, 1.0f, 1.0f, 2.0f, 3.0f, 3.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f, 0.0f, 1.0f, 1.0f, 2.0f
Narumol Prangnawarat265e53e2020-10-30 16:06:55 +0000760 };
761 std::vector<float> inputData2
762 {
Narumol Prangnawarate5f0b242021-05-07 17:52:36 +0100763 12.0f, 11.0f, 10.0f, 9.0f, 8.0f, 7.0f, 6.0f, 5.0f, 4.0f, 3.0f, 2.0f, 1.0f, 12.0f, 11.0f, 10.0f, 9.0f
Narumol Prangnawarat265e53e2020-10-30 16:06:55 +0000764 };
765
Narumol Prangnawarate5f0b242021-05-07 17:52:36 +0100766 std::vector<float> outputData(16);
Narumol Prangnawarat265e53e2020-10-30 16:06:55 +0000767
768 std::vector<float> expectedOutput
769 {
Narumol Prangnawarate5f0b242021-05-07 17:52:36 +0100770 11.0f, 9.0f, 7.0f, 5.0f, 3.0f, 1.0f, -1.0f, -3.0f, -5.0f, -7.0f, -9.0f, -11.0f, 11.0f, 9.0f, 7.0f, 5.0f
Narumol Prangnawarat265e53e2020-10-30 16:06:55 +0000771 };
772
Narumol Prangnawarate5f0b242021-05-07 17:52:36 +0100773 // Creates structures for input & output
774 unsigned int numElements = info.GetNumElements();
775 size_t totalBytes = numElements * sizeof(float);
776
777 // Prepare aligned data
778 const size_t alignment = 64;
779 size_t space = totalBytes + alignment + alignment;
780 auto inputData = std::make_unique<uint8_t[]>(space);
781 void* alignedInputPtr = inputData.get();
782 BOOST_CHECK(std::align(alignment, totalBytes, alignedInputPtr, space));
783
784 auto* intputPtr = reinterpret_cast<float*>(alignedInputPtr);
785 std::copy(inputData2.begin(), inputData2.end(), intputPtr);
786
Narumol Prangnawarat265e53e2020-10-30 16:06:55 +0000787 InputTensors inputTensors
788 {
789 { 0, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData0.data()) },
790 { 1, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 1), inputData1.data()) },
Narumol Prangnawarate5f0b242021-05-07 17:52:36 +0100791 { 2, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 2), alignedInputPtr) }
Narumol Prangnawarat265e53e2020-10-30 16:06:55 +0000792 };
793 OutputTensors outputTensors
794 {
795 { 0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data()) }
796 };
797
798 runtime->GetProfiler(netId)->EnableProfiling(true);
799
800 // Do the inference
801 runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
802
803 // Retrieve the Profiler.Print() output to get the workload execution
804 ProfilerManager& profilerManager = armnn::ProfilerManager::GetInstance();
805 std::stringstream ss;
806 profilerManager.GetProfiler()->Print(ss);;
807 std::string dump = ss.str();
808
809 // Executed Subtraction using GpuAcc
810 std::size_t found = dump.find("ClSubtractionWorkload_Execute");
811 BOOST_TEST(found != std::string::npos);
812
813 // Contain CopyMemGeneric
814 found = dump.find("CopyMemGeneric");
815 BOOST_TEST(found != std::string::npos);
816
817 // Check output is as expected
Narumol Prangnawarate5f0b242021-05-07 17:52:36 +0100818 for(unsigned int i = 0; i < numElements; ++i)
819 {
820 BOOST_TEST(outputData[i] == expectedOutput[i]);
821 }
822 runtime->UnloadNetwork(netId);
Narumol Prangnawarat265e53e2020-10-30 16:06:55 +0000823}
824
825BOOST_AUTO_TEST_CASE(NeonImportDisabledFallbackToCl)
826{
827 using namespace armnn;
828
829 IRuntime::CreationOptions options;
830 IRuntimePtr runtime(IRuntime::Create(options));
831
832 // Builds up the structure of the network.
833 INetworkPtr net(INetwork::Create());
834
835 IConnectableLayer* input0 = net->AddInputLayer(0, "input0");
836 IConnectableLayer* input1 = net->AddInputLayer(1, "input1");
837 IConnectableLayer* input2 = net->AddInputLayer(2, "input2");
838 IConnectableLayer* add = net->AddAdditionLayer("add");
839 IConnectableLayer* sub = net->AddSubtractionLayer("sub");
840 IConnectableLayer* output = net->AddOutputLayer(0, "output");
841
842 input0->GetOutputSlot(0).Connect(add->GetInputSlot(0));
843 input1->GetOutputSlot(0).Connect(add->GetInputSlot(1));
844 input2->GetOutputSlot(0).Connect(sub->GetInputSlot(0));
845 add->GetOutputSlot(0).Connect(sub->GetInputSlot(1));
846 sub->GetOutputSlot(0).Connect(output->GetInputSlot(0));
847
848 TensorInfo info = TensorInfo({ 1, 2, 3, 2 }, DataType::Float32);
849
850 input0->GetOutputSlot(0).SetTensorInfo(info);
851 input1->GetOutputSlot(0).SetTensorInfo(info);
852 input2->GetOutputSlot(0).SetTensorInfo(info);
853 add->GetOutputSlot(0).SetTensorInfo(info);
854 sub->GetOutputSlot(0).SetTensorInfo(info);
855
856 std::vector<BackendId> backends = { Compute::CpuAcc, Compute::GpuAcc };
857 // Use BackendSelectionHint to specify GpuAcc for Subtraction layer
858 sub->BackendSelectionHint(backends[1]);
859
860 // optimize the network
861 OptimizerOptions optOptions;
862 IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec(), optOptions);
863
Francis Murtagh3d2b4b22021-02-15 18:23:17 +0000864 Graph& graph = GetGraphForTesting(optNet.get());
Narumol Prangnawarat265e53e2020-10-30 16:06:55 +0000865
866 armnn::Layer* const layer0 = GetFirstLayerWithName(graph, "input0");
867 armnn::Layer* const layer1 = GetFirstLayerWithName(graph, "input1");
868 armnn::Layer* const layer2 = GetFirstLayerWithName(graph, "input2");
869 armnn::Layer* const layer3 = GetFirstLayerWithName(graph, "add");
870 armnn::Layer* const layer4 = GetFirstLayerWithName(graph, "[ add (0) -> sub (1) ]");
871 armnn::Layer* const layer5 = GetFirstLayerWithName(graph, "sub");
872 armnn::Layer* const layer6 = GetFirstLayerWithName(graph, "output");
873
874 // Checks order is valid.
875 BOOST_TEST(CheckOrder(graph, layer0, layer1));
876 BOOST_TEST(CheckOrder(graph, layer1, layer2));
877 BOOST_TEST(CheckOrder(graph, layer2, layer3));
878 BOOST_TEST(CheckOrder(graph, layer3, layer4));
879 BOOST_TEST(CheckOrder(graph, layer4, layer5));
880 BOOST_TEST(CheckOrder(graph, layer5, layer6));
881
882 // Use memory import between backends
883 BOOST_TEST((layer4->GetType() == LayerType::MemCopy));
884
885 // Correctly use backend hint
886 BOOST_TEST((layer5->GetBackendId() == Compute::GpuAcc ));
887
888 // Load it into the runtime. It should pass.
889 NetworkId netId;
890 runtime->LoadNetwork(netId, std::move(optNet));
891
892 // Creates structures for input & output
893 std::vector<float> inputData0
894 {
895 1.0f, 1.0f, 2.0f, 2.0f, 2.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f, 6.0f
896 };
897 std::vector<float> inputData1
898 {
899 0.0f, 1.0f, 1.0f, 2.0f, 3.0f, 3.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f
900 };
901 std::vector<float> inputData2
902 {
903 12.0f, 11.0f, 10.0f, 9.0f, 8.0f, 7.0f, 6.0f, 5.0f, 4.0f, 3.0f, 2.0f, 1.0f
904 };
905
906 std::vector<float> outputData(12);
907
908 std::vector<float> expectedOutput
909 {
910 11.0f, 9.0f, 7.0f, 5.0f, 3.0f, 1.0f, -1.0f, -3.0f, -5.0f, -7.0f, -9.0f, -11.0f
911 };
912
913 InputTensors inputTensors
914 {
915 { 0, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData0.data()) },
916 { 1, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 1), inputData1.data()) },
917 { 2, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 2), inputData2.data()) }
918 };
919 OutputTensors outputTensors
920 {
921 { 0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data()) }
922 };
923
924 runtime->GetProfiler(netId)->EnableProfiling(true);
925
926 // Do the inference
927 runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
928
929 // Retrieve the Profiler.Print() output to get the workload execution
930 ProfilerManager& profilerManager = armnn::ProfilerManager::GetInstance();
931 std::stringstream ss;
932 profilerManager.GetProfiler()->Print(ss);;
933 std::string dump = ss.str();
934
935 // Executed Subtraction using GpuAcc
936 std::size_t found = dump.find("ClSubtractionWorkload_Execute");
937 BOOST_TEST(found != std::string::npos);
938
939 // Contain CopyMemGeneric
940 found = dump.find("CopyMemGeneric");
941 BOOST_TEST(found != std::string::npos);
942
943 // Check output is as expected
944 BOOST_TEST(outputData == expectedOutput);
945}
946
Narumol Prangnawarate5f0b242021-05-07 17:52:36 +0100947BOOST_AUTO_TEST_CASE(NeonImportEnabledFallbackSubgraphToCl)
Narumol Prangnawarat265e53e2020-10-30 16:06:55 +0000948{
949 using namespace armnn;
950
951 IRuntime::CreationOptions options;
952 IRuntimePtr runtime(IRuntime::Create(options));
953
954 // Builds up the structure of the network.
955 INetworkPtr net(INetwork::Create());
956
957 Pooling2dDescriptor desc;
Narumol Prangnawarate5f0b242021-05-07 17:52:36 +0100958 desc.m_PoolWidth = 2;
959 desc.m_PoolHeight = 2;
960 desc.m_StrideX = 2;
961 desc.m_StrideY = 2;
Narumol Prangnawarat265e53e2020-10-30 16:06:55 +0000962
963 IConnectableLayer* input0 = net->AddInputLayer(0, "input0");
964 IConnectableLayer* input1 = net->AddInputLayer(1, "input1");
965 IConnectableLayer* input2 = net->AddInputLayer(2, "input2");
966 IConnectableLayer* add = net->AddAdditionLayer("add");
967 IConnectableLayer* sub = net->AddSubtractionLayer("sub");
968 IConnectableLayer* pooling = net->AddPooling2dLayer(desc, "pooling");
969 IConnectableLayer* output = net->AddOutputLayer(0, "output");
970
971 input0->GetOutputSlot(0).Connect(add->GetInputSlot(0));
972 input1->GetOutputSlot(0).Connect(add->GetInputSlot(1));
973 input2->GetOutputSlot(0).Connect(sub->GetInputSlot(0));
974 add->GetOutputSlot(0).Connect(sub->GetInputSlot(1));
975 sub->GetOutputSlot(0).Connect(pooling->GetInputSlot(0));
976 pooling->GetOutputSlot(0).Connect(output->GetInputSlot(0));
977
Narumol Prangnawarate5f0b242021-05-07 17:52:36 +0100978 TensorInfo info = TensorInfo({ 1, 2, 4, 2 }, DataType::Float32);
979 TensorInfo poolingInfo = TensorInfo({ 1, 2, 2, 1 }, DataType::Float32);
Narumol Prangnawarat265e53e2020-10-30 16:06:55 +0000980
981 input0->GetOutputSlot(0).SetTensorInfo(info);
982 input1->GetOutputSlot(0).SetTensorInfo(info);
983 input2->GetOutputSlot(0).SetTensorInfo(info);
984 add->GetOutputSlot(0).SetTensorInfo(info);
985 sub->GetOutputSlot(0).SetTensorInfo(info);
986 pooling->GetOutputSlot(0).SetTensorInfo(poolingInfo);
987
988 std::vector<BackendId> backends = { Compute::CpuAcc, Compute::GpuAcc };
989 // Use BackendSelectionHint to specify GpuAcc for Subtraction layer
990 sub->BackendSelectionHint(backends[1]);
991
992 // optimize the network
993 OptimizerOptions optOptions;
994 optOptions.m_ImportEnabled = true;
995 IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec(), optOptions);
996
Francis Murtagh3d2b4b22021-02-15 18:23:17 +0000997 Graph& graph = GetGraphForTesting(optNet.get());
Narumol Prangnawarat265e53e2020-10-30 16:06:55 +0000998
999 armnn::Layer* const layer0 = GetFirstLayerWithName(graph, "input0");
1000 armnn::Layer* const layer1 = GetFirstLayerWithName(graph, "input1");
1001 armnn::Layer* const layer2 = GetFirstLayerWithName(graph, "input2");
1002 armnn::Layer* const layer3 = GetFirstLayerWithName(graph, "add");
1003 armnn::Layer* const layer4 = GetFirstLayerWithName(graph, "[ add (0) -> sub (1) ]");
1004 armnn::Layer* const layer5 = GetFirstLayerWithName(graph, "sub");
1005 armnn::Layer* const layer6 = GetFirstLayerWithName(graph, "[ sub (0) -> pooling (0) ]");
1006 armnn::Layer* const layer7 = GetFirstLayerWithName(graph, "pooling");
1007 armnn::Layer* const layer8 = GetFirstLayerWithName(graph, "output");
1008
1009 // Checks order is valid.
1010 BOOST_TEST(CheckOrder(graph, layer0, layer1));
1011 BOOST_TEST(CheckOrder(graph, layer1, layer2));
1012 BOOST_TEST(CheckOrder(graph, layer2, layer3));
1013 BOOST_TEST(CheckOrder(graph, layer3, layer4));
1014 BOOST_TEST(CheckOrder(graph, layer4, layer5));
1015 BOOST_TEST(CheckOrder(graph, layer5, layer6));
1016 BOOST_TEST(CheckOrder(graph, layer6, layer7));
1017 BOOST_TEST(CheckOrder(graph, layer7, layer8));
1018
1019 // Use memory import between backends
1020 BOOST_TEST((layer4->GetType() == LayerType::MemCopy));
1021 BOOST_TEST((layer6->GetType() == LayerType::MemCopy));
1022
1023 // Correctly use backend hint
1024 BOOST_TEST((layer5->GetBackendId() == Compute::GpuAcc ));
1025
1026 // Load it into the runtime. It should pass.
1027 NetworkId netId;
1028 std::string ignoredErrorMessage;
Francis Murtagh73d3e2e2021-04-29 14:23:04 +01001029
1030 INetworkProperties networkProperties(false, MemorySource::Malloc, MemorySource::Malloc);
Narumol Prangnawarat265e53e2020-10-30 16:06:55 +00001031
1032 runtime->LoadNetwork(netId, std::move(optNet), ignoredErrorMessage, networkProperties);
1033
1034 // Creates structures for input & output
1035 std::vector<float> inputData0
1036 {
Narumol Prangnawarate5f0b242021-05-07 17:52:36 +01001037 1.0f, 1.0f, 2.0f, 2.0f, 2.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f, 6.0f, 1.0f, 1.0f, 2.0f, 2.0f
Narumol Prangnawarat265e53e2020-10-30 16:06:55 +00001038 };
1039 std::vector<float> inputData1
1040 {
Narumol Prangnawarate5f0b242021-05-07 17:52:36 +01001041 0.0f, 1.0f, 1.0f, 2.0f, 3.0f, 3.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f, 0.0f, 1.0f, 1.0f, 2.0f
Narumol Prangnawarat265e53e2020-10-30 16:06:55 +00001042 };
1043 std::vector<float> inputData2
1044 {
Narumol Prangnawarate5f0b242021-05-07 17:52:36 +01001045 12.0f, 11.0f, 10.0f, 9.0f, 8.0f, 7.0f, 6.0f, 5.0f, 4.0f, 3.0f, 2.0f, 1.0f, 12.0f, 11.0f, 10.0f, 9.0f
Narumol Prangnawarat265e53e2020-10-30 16:06:55 +00001046 };
1047
Narumol Prangnawarate5f0b242021-05-07 17:52:36 +01001048 std::vector<float> outputData(4);
Narumol Prangnawarat265e53e2020-10-30 16:06:55 +00001049
Narumol Prangnawarate5f0b242021-05-07 17:52:36 +01001050 std::vector<float> expectedOutput{ 11.0f, 3.0f, -5.0f, 11.0f };
1051
1052 // Prepare aligned data
1053 unsigned int numElements = info.GetNumElements();
1054 size_t totalBytes = numElements * sizeof(float);
1055 const size_t alignment = 64;
1056 size_t space = totalBytes + alignment + alignment;
1057 auto inputData = std::make_unique<uint8_t[]>(space);
1058 void* alignedInputPtr = inputData.get();
1059 BOOST_CHECK(std::align(alignment, totalBytes, alignedInputPtr, space));
1060
1061 auto* intputPtr = reinterpret_cast<float*>(alignedInputPtr);
1062 std::copy(inputData2.begin(), inputData2.end(), intputPtr);
Narumol Prangnawarat265e53e2020-10-30 16:06:55 +00001063
1064 InputTensors inputTensors
1065 {
1066 { 0, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData0.data()) },
1067 { 1, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 1), inputData1.data()) },
Narumol Prangnawarate5f0b242021-05-07 17:52:36 +01001068 { 2, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 2), alignedInputPtr) }
Narumol Prangnawarat265e53e2020-10-30 16:06:55 +00001069 };
1070 OutputTensors outputTensors
1071 {
1072 { 0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data()) }
1073 };
1074
1075 runtime->GetProfiler(netId)->EnableProfiling(true);
1076
1077 // Do the inference
1078 runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
1079
1080 // Retrieve the Profiler.Print() output to get the workload execution
1081 ProfilerManager& profilerManager = armnn::ProfilerManager::GetInstance();
1082 std::stringstream ss;
1083 profilerManager.GetProfiler()->Print(ss);;
1084 std::string dump = ss.str();
1085
1086 // Executed Subtraction using GpuAcc
1087 std::size_t found = dump.find("ClSubtractionWorkload_Execute");
1088 BOOST_TEST(found != std::string::npos);
1089
1090 // Correctly switch back to CpuAcc
1091 found = dump.find("NeonPooling2dWorkload_Execute");
1092 BOOST_TEST(found != std::string::npos);
1093
1094 // Contain CopyMemGeneric
1095 found = dump.find("CopyMemGeneric");
1096 BOOST_TEST(found != std::string::npos);
1097
1098 // Contains SyncMemGeneric for output
1099 found = dump.find("SyncMemGeneric");
1100 BOOST_TEST(found != std::string::npos);
1101
1102 // Check output is as expected
1103 BOOST_TEST(outputData == expectedOutput);
Narumol Prangnawarate5f0b242021-05-07 17:52:36 +01001104 runtime->UnloadNetwork(netId);
Narumol Prangnawarat265e53e2020-10-30 16:06:55 +00001105}
1106
1107BOOST_AUTO_TEST_CASE(NeonImportDisableFallbackSubgraphToCl)
1108{
1109 using namespace armnn;
1110
1111 IRuntime::CreationOptions options;
1112 IRuntimePtr runtime(IRuntime::Create(options));
1113
1114 // Builds up the structure of the network.
1115 INetworkPtr net(INetwork::Create());
1116
1117 Pooling2dDescriptor desc;
1118
1119 IConnectableLayer* input0 = net->AddInputLayer(0, "input0");
1120 IConnectableLayer* input1 = net->AddInputLayer(1, "input1");
1121 IConnectableLayer* input2 = net->AddInputLayer(2, "input2");
1122 IConnectableLayer* add = net->AddAdditionLayer("add");
1123 IConnectableLayer* sub = net->AddSubtractionLayer("sub");
1124 IConnectableLayer* pooling = net->AddPooling2dLayer(desc, "pooling");
1125 IConnectableLayer* output = net->AddOutputLayer(0, "output");
1126
1127 input0->GetOutputSlot(0).Connect(add->GetInputSlot(0));
1128 input1->GetOutputSlot(0).Connect(add->GetInputSlot(1));
1129 input2->GetOutputSlot(0).Connect(sub->GetInputSlot(0));
1130 add->GetOutputSlot(0).Connect(sub->GetInputSlot(1));
1131 sub->GetOutputSlot(0).Connect(pooling->GetInputSlot(0));
1132 pooling->GetOutputSlot(0).Connect(output->GetInputSlot(0));
1133
1134 TensorInfo info = TensorInfo({ 1, 2, 3, 2 }, DataType::Float32);
1135 TensorInfo poolingInfo = TensorInfo({ 1, 2, 1, 1 }, DataType::Float32);
1136
1137 input0->GetOutputSlot(0).SetTensorInfo(info);
1138 input1->GetOutputSlot(0).SetTensorInfo(info);
1139 input2->GetOutputSlot(0).SetTensorInfo(info);
1140 add->GetOutputSlot(0).SetTensorInfo(info);
1141 sub->GetOutputSlot(0).SetTensorInfo(info);
1142 pooling->GetOutputSlot(0).SetTensorInfo(poolingInfo);
1143
1144 std::vector<BackendId> backends = { Compute::CpuAcc, Compute::GpuAcc };
1145 // Use BackendSelectionHint to specify GpuAcc for Subtraction layer
1146 sub->BackendSelectionHint(backends[1]);
1147
1148 // optimize the network
1149 OptimizerOptions optOptions;
1150 IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec(), optOptions);
1151
Francis Murtagh3d2b4b22021-02-15 18:23:17 +00001152 Graph& graph = GetGraphForTesting(optNet.get());
Narumol Prangnawarat265e53e2020-10-30 16:06:55 +00001153
1154 armnn::Layer* const layer0 = GetFirstLayerWithName(graph, "input0");
1155 armnn::Layer* const layer1 = GetFirstLayerWithName(graph, "input1");
1156 armnn::Layer* const layer2 = GetFirstLayerWithName(graph, "input2");
1157 armnn::Layer* const layer3 = GetFirstLayerWithName(graph, "add");
1158 armnn::Layer* const layer4 = GetFirstLayerWithName(graph, "[ add (0) -> sub (1) ]");
1159 armnn::Layer* const layer5 = GetFirstLayerWithName(graph, "sub");
1160 armnn::Layer* const layer6 = GetFirstLayerWithName(graph, "[ sub (0) -> pooling (0) ]");
1161 armnn::Layer* const layer7 = GetFirstLayerWithName(graph, "pooling");
1162 armnn::Layer* const layer8 = GetFirstLayerWithName(graph, "output");
1163
1164 // Checks order is valid.
1165 BOOST_TEST(CheckOrder(graph, layer0, layer1));
1166 BOOST_TEST(CheckOrder(graph, layer1, layer2));
1167 BOOST_TEST(CheckOrder(graph, layer2, layer3));
1168 BOOST_TEST(CheckOrder(graph, layer3, layer4));
1169 BOOST_TEST(CheckOrder(graph, layer4, layer5));
1170 BOOST_TEST(CheckOrder(graph, layer5, layer6));
1171 BOOST_TEST(CheckOrder(graph, layer6, layer7));
1172 BOOST_TEST(CheckOrder(graph, layer7, layer8));
1173
1174 // Use memory import between backends
1175 BOOST_TEST((layer4->GetType() == LayerType::MemCopy));
1176 BOOST_TEST((layer6->GetType() == LayerType::MemCopy));
1177
1178 // Correctly use backend hint
1179 BOOST_TEST((layer5->GetBackendId() == Compute::GpuAcc ));
1180
1181 // Load it into the runtime. It should pass.
1182 NetworkId netId;
1183 runtime->LoadNetwork(netId, std::move(optNet));
1184
1185 // Creates structures for input & output
1186 std::vector<float> inputData0
1187 {
1188 1.0f, 1.0f, 2.0f, 2.0f, 2.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f, 6.0f
1189 };
1190 std::vector<float> inputData1
1191 {
1192 0.0f, 1.0f, 1.0f, 2.0f, 3.0f, 3.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f
1193 };
1194 std::vector<float> inputData2
1195 {
1196 12.0f, 11.0f, 10.0f, 9.0f, 8.0f, 7.0f, 6.0f, 5.0f, 4.0f, 3.0f, 2.0f, 1.0f
1197 };
1198
1199 std::vector<float> outputData(2);
1200
1201 std::vector<float> expectedOutput{ 11.0f, -1.0f };
1202
1203 InputTensors inputTensors
1204 {
1205 { 0, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData0.data()) },
1206 { 1, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 1), inputData1.data()) },
1207 { 2, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 2), inputData2.data()) }
1208 };
1209 OutputTensors outputTensors
1210 {
1211 { 0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data()) }
1212 };
1213
1214 runtime->GetProfiler(netId)->EnableProfiling(true);
1215
1216 // Do the inference
1217 runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
1218
1219 // Retrieve the Profiler.Print() output to get the workload execution
1220 ProfilerManager& profilerManager = armnn::ProfilerManager::GetInstance();
1221 std::stringstream ss;
1222 profilerManager.GetProfiler()->Print(ss);;
1223 std::string dump = ss.str();
1224
1225 // Executed Subtraction using GpuAcc
1226 std::size_t found = dump.find("ClSubtractionWorkload_Execute");
1227 BOOST_TEST(found != std::string::npos);
1228
1229 // Correctly switch back to CpuAcc
1230 found = dump.find("NeonPooling2dWorkload_Execute");
1231 BOOST_TEST(found != std::string::npos);
1232
1233 // Contain CopyMemGeneric
1234 found = dump.find("CopyMemGeneric");
1235 BOOST_TEST(found != std::string::npos);
1236
1237 // Check output is as expected
1238 BOOST_TEST(outputData == expectedOutput);
1239}
1240#endif
1241
Narumol Prangnawaratb8d771a2020-08-14 11:51:12 +01001242BOOST_AUTO_TEST_SUITE_END()