blob: 2d70cc2b1b3ea870e7c94d9e65c8d36aa6b5bcb6 [file] [log] [blame]
Narumol Prangnawaratb8d771a2020-08-14 11:51:12 +01001//
2// Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
3// SPDX-License-Identifier: MIT
4//
5
6#include <backendsCommon/test/CommonTestUtils.hpp>
7#include <backendsCommon/test/mockBackend/MockImportBackend.hpp>
8
9#include <test/GraphUtils.hpp>
10
11#include <boost/test/unit_test.hpp>
12
13BOOST_AUTO_TEST_SUITE(NeonFallback)
14
Narumol Prangnawaratb8d771a2020-08-14 11:51:12 +010015BOOST_AUTO_TEST_CASE(FallbackImportToCpuAcc)
16{
17 using namespace armnn;
18
19 // Create a mock backend object
20 MockImportBackendInitialiser initialiser; // Register the Mock Backend
21 auto backendObjPtr = CreateBackendObject(MockImportBackendId());
22 BOOST_TEST((backendObjPtr != nullptr));
23
24 BackendIdSet backendIds = BackendRegistryInstance().GetBackendIds();
25 if (backendIds.find("MockRef") == backendIds.end())
26 {
27 std::string message = "Cannot load MockRef";
28 BOOST_FAIL(message);
29 }
30
31 // Create runtime in which test will run and allow fallback to CpuRef.
32 IRuntime::CreationOptions options;
33 IRuntimePtr runtime(IRuntime::Create(options));
34
35 // Builds up the structure of the network.
36 INetworkPtr net(INetwork::Create());
37
38 IConnectableLayer* input0 = net->AddInputLayer(0, "input0");
39 IConnectableLayer* input1 = net->AddInputLayer(1, "input1");
40 IConnectableLayer* input2 = net->AddInputLayer(2, "input2");
41 IConnectableLayer* add = net->AddAdditionLayer("add");
42 IConnectableLayer* sub = net->AddSubtractionLayer("sub");
43 IConnectableLayer* output = net->AddOutputLayer(0, "output");
44
45 input0->GetOutputSlot(0).Connect(add->GetInputSlot(0));
46 input1->GetOutputSlot(0).Connect(add->GetInputSlot(1));
47 input2->GetOutputSlot(0).Connect(sub->GetInputSlot(0));
48 add->GetOutputSlot(0).Connect(sub->GetInputSlot(1));
49 sub->GetOutputSlot(0).Connect(output->GetInputSlot(0));
50
51 TensorInfo info = TensorInfo({ 1, 2, 3, 2 }, DataType::Float32);
52
53 input0->GetOutputSlot(0).SetTensorInfo(info);
54 input1->GetOutputSlot(0).SetTensorInfo(info);
55 input2->GetOutputSlot(0).SetTensorInfo(info);
56 add->GetOutputSlot(0).SetTensorInfo(info);
57 sub->GetOutputSlot(0).SetTensorInfo(info);
58
59 // optimize the network
60 std::vector<BackendId> backends = { "MockRef", Compute::CpuAcc };
Narumol Prangnawarata2493a02020-08-19 14:39:07 +010061 OptimizerOptions optOptions;
62 optOptions.m_ImportEnabled = true;
63 IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec(), optOptions);
Narumol Prangnawaratb8d771a2020-08-14 11:51:12 +010064
Francis Murtagh3d2b4b22021-02-15 18:23:17 +000065 Graph& graph = GetGraphForTesting(optNet.get());
Narumol Prangnawaratb8d771a2020-08-14 11:51:12 +010066
67 armnn::Layer* const layer0 = GetFirstLayerWithName(graph, "input0");
68 armnn::Layer* const layer1 = GetFirstLayerWithName(graph, "input1");
69 armnn::Layer* const layer2 = GetFirstLayerWithName(graph, "input2");
70 armnn::Layer* const layer3 = GetFirstLayerWithName(graph, "add");
71 armnn::Layer* const layer4 = GetFirstLayerWithName(graph, "[ add (0) -> sub (1) ]");
72 armnn::Layer* const layer5 = GetFirstLayerWithName(graph, "sub");
73 armnn::Layer* const layer6 = GetFirstLayerWithName(graph, "output");
74
75 // Checks order is valid.
76 BOOST_TEST(CheckOrder(graph, layer0, layer1));
77 BOOST_TEST(CheckOrder(graph, layer1, layer2));
78 BOOST_TEST(CheckOrder(graph, layer2, layer3));
79 BOOST_TEST(CheckOrder(graph, layer3, layer4));
80 BOOST_TEST(CheckOrder(graph, layer4, layer5));
81 BOOST_TEST(CheckOrder(graph, layer5, layer6));
82
83 // Load it into the runtime. It should pass.
84 NetworkId netId;
85 std::string ignoredErrorMessage;
86 INetworkProperties networkProperties(true, true);
87
88 runtime->LoadNetwork(netId, std::move(optNet), ignoredErrorMessage, networkProperties);
89
90 // Creates structures for input & output
91 std::vector<float> inputData0
92 {
93 1.0f, 1.0f, 2.0f, 2.0f, 2.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f, 6.0f
94 };
95 std::vector<float> inputData1
96 {
97 0.0f, 1.0f, 1.0f, 2.0f, 3.0f, 3.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f
98 };
99 std::vector<float> inputData2
100 {
101 12.0f, 11.0f, 10.0f, 9.0f, 8.0f, 7.0f, 6.0f, 5.0f, 4.0f, 3.0f, 2.0f, 1.0f
102 };
103
104 std::vector<float> outputData(12);
105
106 std::vector<float> expectedOutput
107 {
108 11.0f, 9.0f, 7.0f, 5.0f, 3.0f, 1.0f, -1.0f, -3.0f, -5.0f, -7.0f, -9.0f, -11.0f
109 };
110
111 InputTensors inputTensors
112 {
113 { 0, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData0.data()) },
114 { 1, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 1), inputData1.data()) },
115 { 2, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 2), inputData2.data()) }
116 };
117 OutputTensors outputTensors
118 {
119 { 0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data()) }
120 };
121
122 runtime->GetProfiler(netId)->EnableProfiling(true);
123
124 // Do the inference
125 runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
126
127 // Retrieve the Profiler.Print() output to get the workload execution
128 ProfilerManager& profilerManager = armnn::ProfilerManager::GetInstance();
129 std::stringstream ss;
130 profilerManager.GetProfiler()->Print(ss);;
131 std::string dump = ss.str();
132
133 // Contains ImportMemGeneric
134 std::size_t found = dump.find("ImportMemGeneric");
135 BOOST_TEST(found != std::string::npos);
136
137 // Contains SyncMemGeneric
138 found = dump.find("SyncMemGeneric");
139 BOOST_TEST(found != std::string::npos);
140
141 // Does not contain CopyMemGeneric
142 found = dump.find("CopyMemGeneric");
143 BOOST_TEST(found == std::string::npos);
144
145 // Use memory import between backends
146 BOOST_TEST((layer4->GetType() == LayerType::MemImport));
147
148 // Check output is as expected
149 BOOST_TEST(outputData == expectedOutput);
150}
151
152BOOST_AUTO_TEST_CASE(FallbackPaddingCopyToCpuAcc)
153{
154 using namespace armnn;
155
156 // Create a mock backend object
157 MockImportBackendInitialiser initialiser; // Register the Mock Backend
158 auto backendObjPtr = CreateBackendObject(MockImportBackendId());
159 BOOST_TEST((backendObjPtr != nullptr));
160
161 BackendIdSet backendIds = BackendRegistryInstance().GetBackendIds();
162 if (backendIds.find("MockRef") == backendIds.end())
163 {
164 std::string message = "Cannot load MockRef";
165 BOOST_FAIL(message);
166 }
167
168 // Create runtime in which test will run and allow fallback to CpuRef.
169 IRuntime::CreationOptions options;
170 IRuntimePtr runtime(IRuntime::Create(options));
171
172 // Builds up the structure of the network.
173 INetworkPtr net(INetwork::Create());
174
175 Pooling2dDescriptor desc;
176
177 IConnectableLayer* input0 = net->AddInputLayer(0, "input0");
178 IConnectableLayer* input1 = net->AddInputLayer(1, "input1");
179 IConnectableLayer* add = net->AddAdditionLayer("add");
180 IConnectableLayer* pooling = net->AddPooling2dLayer(desc, "pooling");
181 IConnectableLayer* output = net->AddOutputLayer(0, "output");
182
183 input0->GetOutputSlot(0).Connect(add->GetInputSlot(0));
184 input1->GetOutputSlot(0).Connect(add->GetInputSlot(1));
185 add->GetOutputSlot(0).Connect(pooling->GetInputSlot(0));
186 pooling->GetOutputSlot(0).Connect(output->GetInputSlot(0));
187
188 TensorInfo info = TensorInfo({ 1, 2, 3, 2 }, DataType::Float32);
189 TensorInfo poolingInfo = TensorInfo({ 1, 2, 1, 1 }, DataType::Float32);
190
191 input0->GetOutputSlot(0).SetTensorInfo(info);
192 input1->GetOutputSlot(0).SetTensorInfo(info);
193 add->GetOutputSlot(0).SetTensorInfo(info);
194 pooling->GetOutputSlot(0).SetTensorInfo(poolingInfo);
195
196 // optimize the network
197 std::vector<BackendId> backends = { "MockRef", Compute::CpuAcc };
Narumol Prangnawarata2493a02020-08-19 14:39:07 +0100198 OptimizerOptions optOptions;
199 optOptions.m_ImportEnabled = true;
200 IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec(), optOptions);
Narumol Prangnawaratb8d771a2020-08-14 11:51:12 +0100201
Francis Murtagh3d2b4b22021-02-15 18:23:17 +0000202 Graph& graph = GetGraphForTesting(optNet.get());
Narumol Prangnawaratb8d771a2020-08-14 11:51:12 +0100203
204 armnn::Layer* const layer0 = GetFirstLayerWithName(graph, "input0");
205 armnn::Layer* const layer1 = GetFirstLayerWithName(graph, "input1");
206 armnn::Layer* const layer2 = GetFirstLayerWithName(graph, "add");
207 armnn::Layer* const layer3 = GetFirstLayerWithName(graph, "[ add (0) -> pooling (0) ]");
208 armnn::Layer* const layer4 = GetFirstLayerWithName(graph, "pooling");
209 armnn::Layer* const layer5 = GetFirstLayerWithName(graph, "output");
210
211 // Checks order is valid.
212 BOOST_TEST(CheckOrder(graph, layer0, layer1));
213 BOOST_TEST(CheckOrder(graph, layer1, layer2));
214 BOOST_TEST(CheckOrder(graph, layer2, layer3));
215 BOOST_TEST(CheckOrder(graph, layer3, layer4));
216 BOOST_TEST(CheckOrder(graph, layer4, layer5));
217
218 // Load it into the runtime. It should pass.
219 NetworkId netId;
220 std::string ignoredErrorMessage;
221 INetworkProperties networkProperties(true, true);
222
223 runtime->LoadNetwork(netId, std::move(optNet), ignoredErrorMessage, networkProperties);
224
225 // Creates structures for input & output
226 std::vector<float> inputData0
227 {
228 1.0f, 1.0f, 2.0f, 2.0f, 2.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f, 6.0f
229 };
230 std::vector<float> inputData1
231 {
232 0.0f, 1.0f, 1.0f, 2.0f, 3.0f, 3.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f
233 };
234
235 std::vector<float> outputData(2);
236
237 std::vector<float> expectedOutput
238 {
239 6.0f, 12.0f
240 };
241
242 InputTensors inputTensors
243 {
244 { 0, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData0.data()) },
245 { 1, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 1), inputData1.data()) }
246 };
247 OutputTensors outputTensors
248 {
249 { 0, armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data()) }
250 };
251
252 runtime->GetProfiler(netId)->EnableProfiling(true);
253
254 // Do the inference
255 runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
256
257 // Retrieve the Profiler.Print() output to get the workload execution
258 ProfilerManager& profilerManager = armnn::ProfilerManager::GetInstance();
259 std::stringstream ss;
260 profilerManager.GetProfiler()->Print(ss);;
261 std::string dump = ss.str();
262
263 // Contains CopyMemGeneric between the backends
264 std::size_t found = dump.find("CopyMemGeneric");
265 BOOST_TEST(found != std::string::npos);
266
267 // Contains SyncMemGeneric for the output
268 found = dump.find("SyncMemGeneric");
269 BOOST_TEST(found != std::string::npos);
270
271 // Does not contain ImportMemGeneric
272 found = dump.find("ImportMemGeneric");
273 BOOST_TEST(found == std::string::npos);
274
275 // Use memory import between backends
276 BOOST_TEST((layer3->GetType() == LayerType::MemCopy));
277
278 // Check output is as expected
279 BOOST_TEST(outputData == expectedOutput);
280}
281
282BOOST_AUTO_TEST_CASE(FallbackImportFromCpuAcc)
283{
284 using namespace armnn;
285
286 // Create a mock backend object
287 MockImportBackendInitialiser initialiser; // Register the Mock Backend
288 auto backendObjPtr = CreateBackendObject(MockImportBackendId());
289 BOOST_TEST((backendObjPtr != nullptr));
290
291 BackendIdSet backendIds = BackendRegistryInstance().GetBackendIds();
292 if (backendIds.find("MockRef") == backendIds.end())
293 {
294 std::string message = "Cannot load MockRef";
295 BOOST_FAIL(message);
296 }
297
298 // Create runtime in which test will run and allow fallback to CpuRef.
299 IRuntime::CreationOptions options;
300 IRuntimePtr runtime(IRuntime::Create(options));
301
302 // Builds up the structure of the network.
303 INetworkPtr net(INetwork::Create());
304
305 IConnectableLayer* input0 = net->AddInputLayer(0, "input0");
306 IConnectableLayer* input1 = net->AddInputLayer(1, "input1");
307 IConnectableLayer* input2 = net->AddInputLayer(2, "input2");
308 IConnectableLayer* sub = net->AddSubtractionLayer("sub");
309 IConnectableLayer* add = net->AddAdditionLayer("add");
310 IConnectableLayer* output = net->AddOutputLayer(0, "output");
311
312 input0->GetOutputSlot(0).Connect(sub->GetInputSlot(0));
313 input1->GetOutputSlot(0).Connect(sub->GetInputSlot(1));
314 input2->GetOutputSlot(0).Connect(add->GetInputSlot(0));
315 sub->GetOutputSlot(0).Connect(add->GetInputSlot(1));
316 add->GetOutputSlot(0).Connect(output->GetInputSlot(0));
317
318 TensorInfo info = TensorInfo({ 1, 2, 3, 2 }, DataType::Float32);
319
320 input0->GetOutputSlot(0).SetTensorInfo(info);
321 input1->GetOutputSlot(0).SetTensorInfo(info);
322 input2->GetOutputSlot(0).SetTensorInfo(info);
323 sub->GetOutputSlot(0).SetTensorInfo(info);
324 add->GetOutputSlot(0).SetTensorInfo(info);
325
326 // optimize the network
327 std::vector<BackendId> backends = { "MockRef", Compute::CpuAcc };
Narumol Prangnawarata2493a02020-08-19 14:39:07 +0100328 OptimizerOptions optOptions;
329 optOptions.m_ImportEnabled = true;
330 IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec(), optOptions);
Narumol Prangnawaratb8d771a2020-08-14 11:51:12 +0100331
Francis Murtagh3d2b4b22021-02-15 18:23:17 +0000332 Graph& graph = GetGraphForTesting(optNet.get());
Narumol Prangnawaratb8d771a2020-08-14 11:51:12 +0100333
334 armnn::Layer* const layer0 = GetFirstLayerWithName(graph, "input0");
335 armnn::Layer* const layer1 = GetFirstLayerWithName(graph, "input1");
336 armnn::Layer* const layer2 = GetFirstLayerWithName(graph, "input2");
337 armnn::Layer* const layer3 = GetFirstLayerWithName(graph, "sub");
338 armnn::Layer* const layer4 = GetFirstLayerWithName(graph, "[ sub (0) -> add (1) ]");
339 armnn::Layer* const layer5 = GetFirstLayerWithName(graph, "add");
340 armnn::Layer* const layer6 = GetFirstLayerWithName(graph, "output");
341
342 // Checks order is valid.
343 BOOST_TEST(CheckOrder(graph, layer0, layer1));
344 BOOST_TEST(CheckOrder(graph, layer1, layer2));
345 BOOST_TEST(CheckOrder(graph, layer2, layer3));
346 BOOST_TEST(CheckOrder(graph, layer3, layer4));
347 BOOST_TEST(CheckOrder(graph, layer4, layer5));
348 BOOST_TEST(CheckOrder(graph, layer5, layer6));
349
350 // Load it into the runtime. It should pass.
351 NetworkId netId;
352 std::string ignoredErrorMessage;
353 INetworkProperties networkProperties(true, true);
354
355 runtime->LoadNetwork(netId, std::move(optNet), ignoredErrorMessage, networkProperties);
356
357 // Creates structures for input & output
358 std::vector<float> inputData0
359 {
360 1.0f, 1.0f, 2.0f, 2.0f, 2.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f, 0.0f
361 };
362 std::vector<float> inputData1
363 {
364 0.0f, 1.0f, 1.0f, 2.0f, 3.0f, 3.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f
365 };
366 std::vector<float> inputData2
367 {
368 12.0f, 11.0f, 10.0f, 9.0f, 8.0f, 7.0f, 6.0f, 5.0f, 4.0f, 3.0f, 2.0f, 1.0f
369 };
370
371 std::vector<float> outputData(12);
372
373 std::vector<float> expectedOutput
374 {
375 13.0f, 11.0f, 11.0f, 9.0f, 7.0f, 7.0f, 7.0f, 5.0f, 5.0f, 3.0f, 3.0f, -5.0f
376 };
377
378 InputTensors inputTensors
379 {
380 { 0, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData0.data()) },
381 { 1, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 1), inputData1.data()) },
382 { 2, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 2), inputData2.data()) }
383 };
384 OutputTensors outputTensors
385 {
386 { 0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data()) }
387 };
388
389 runtime->GetProfiler(netId)->EnableProfiling(true);
390
391 // Do the inference
392 runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
393
394 // Retrieve the Profiler.Print() output to get the workload execution
395 ProfilerManager& profilerManager = armnn::ProfilerManager::GetInstance();
396 std::stringstream ss;
397 profilerManager.GetProfiler()->Print(ss);;
398 std::string dump = ss.str();
399
400 // Contains ImportMemGeneric
401 std::size_t found = dump.find("ImportMemGeneric");
402 BOOST_TEST(found != std::string::npos);
403
404 // Contains SyncMemGeneric
405 found = dump.find("SyncMemGeneric");
406 BOOST_TEST(found != std::string::npos);
407
408 // Does not contain CopyMemGeneric
409 found = dump.find("CopyMemGeneric");
410 BOOST_TEST(found == std::string::npos);
411
412 // Use memory import between backends
413 BOOST_TEST((layer4->GetType() == LayerType::MemImport));
414
415 // Check output is as expected
416 BOOST_TEST(outputData == expectedOutput);
417}
418
419BOOST_AUTO_TEST_CASE(FallbackPaddingCopyFromCpuAcc)
420{
421 using namespace armnn;
422
423 // Create a mock backend object
424 MockImportBackendInitialiser initialiser; // Register the Mock Backend
425 auto backendObjPtr = CreateBackendObject(MockImportBackendId());
426 BOOST_TEST((backendObjPtr != nullptr));
427
428 BackendIdSet backendIds = BackendRegistryInstance().GetBackendIds();
429 if (backendIds.find("MockRef") == backendIds.end())
430 {
431 std::string message = "Cannot load MockRef";
432 BOOST_FAIL(message);
433 }
434
435 // Create runtime in which test will run and allow fallback to CpuRef.
436 IRuntime::CreationOptions options;
437 IRuntimePtr runtime(IRuntime::Create(options));
438
439 // Builds up the structure of the network.
440 INetworkPtr net(INetwork::Create());
441
442 Pooling2dDescriptor desc;
443
444 IConnectableLayer* input0 = net->AddInputLayer(0, "input0");
445 IConnectableLayer* input1 = net->AddInputLayer(1, "input1");
446 IConnectableLayer* pooling = net->AddPooling2dLayer(desc, "pooling");
447 IConnectableLayer* add = net->AddAdditionLayer("add");
448 IConnectableLayer* output = net->AddOutputLayer(0, "output");
449
450 input0->GetOutputSlot(0).Connect(pooling->GetInputSlot(0));
451 input1->GetOutputSlot(0).Connect(add->GetInputSlot(1));
452 pooling->GetOutputSlot(0).Connect(add->GetInputSlot(0));
453 add->GetOutputSlot(0).Connect(output->GetInputSlot(0));
454
455 TensorInfo inputInfo = TensorInfo({ 1, 2, 3, 2 }, DataType::Float32);
456 TensorInfo poolingInfo = TensorInfo({ 1, 2, 1, 1 }, DataType::Float32);
457
458 input0->GetOutputSlot(0).SetTensorInfo(inputInfo);
459 input1->GetOutputSlot(0).SetTensorInfo(poolingInfo);
460 pooling->GetOutputSlot(0).SetTensorInfo(poolingInfo);
461 add->GetOutputSlot(0).SetTensorInfo(poolingInfo);
462
463 // optimize the network
464 std::vector<BackendId> backends = { "MockRef", Compute::CpuAcc };
Narumol Prangnawarata2493a02020-08-19 14:39:07 +0100465 OptimizerOptions optOptions;
466 optOptions.m_ImportEnabled = true;
467 IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec(), optOptions);
Narumol Prangnawaratb8d771a2020-08-14 11:51:12 +0100468
Francis Murtagh3d2b4b22021-02-15 18:23:17 +0000469 Graph& graph = GetGraphForTesting(optNet.get());
Narumol Prangnawaratb8d771a2020-08-14 11:51:12 +0100470
471 armnn::Layer* const layer0 = GetFirstLayerWithName(graph, "input0");
472 armnn::Layer* const layer1 = GetFirstLayerWithName(graph, "input1");
473 armnn::Layer* const layer2 = GetFirstLayerWithName(graph, "pooling");
474 armnn::Layer* const layer3 = GetFirstLayerWithName(graph, "[ pooling (0) -> add (0) ]");
475 armnn::Layer* const layer4 = GetFirstLayerWithName(graph, "add");
476 armnn::Layer* const layer5 = GetFirstLayerWithName(graph, "output");
477
478 // Checks order is valid.
479 BOOST_TEST(CheckOrder(graph, layer0, layer1));
480 BOOST_TEST(CheckOrder(graph, layer1, layer2));
481 BOOST_TEST(CheckOrder(graph, layer2, layer3));
482 BOOST_TEST(CheckOrder(graph, layer3, layer4));
483 BOOST_TEST(CheckOrder(graph, layer4, layer5));
484
485 // Load it into the runtime. It should pass.
486 NetworkId netId;
487 std::string ignoredErrorMessage;
488 INetworkProperties networkProperties(true, true);
489
490 runtime->LoadNetwork(netId, std::move(optNet), ignoredErrorMessage, networkProperties);
491
492 // Creates structures for input & output
493 std::vector<float> inputData0
494 {
495 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, 10.0f, 11.0f, 12.0f
496 };
497 std::vector<float> inputData1
498 {
499 -1.0f, 3.0f
500 };
501
502 std::vector<float> outputData(2);
503
504 std::vector<float> expectedOutput
505 {
506 5.0f, 15.0f
507 };
508
509 InputTensors inputTensors
510 {
511 { 0, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData0.data()) },
512 { 1, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 1), inputData1.data()) }
513 };
514 OutputTensors outputTensors
515 {
516 { 0, armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data()) }
517 };
518
519 runtime->GetProfiler(netId)->EnableProfiling(true);
520
521 // Do the inference
522 runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
523
524 // Retrieve the Profiler.Print() output to get the workload execution
525 ProfilerManager& profilerManager = armnn::ProfilerManager::GetInstance();
526 std::stringstream ss;
527 profilerManager.GetProfiler()->Print(ss);;
528 std::string dump = ss.str();
529
530 // Contains CopyMemGeneric between the backends
531 std::size_t found = dump.find("CopyMemGeneric");
532 BOOST_TEST(found != std::string::npos);
533
534 // Contains SyncMemGeneric for the output
535 found = dump.find("SyncMemGeneric");
536 BOOST_TEST(found != std::string::npos);
537
538 // Does not contain ImportMemGeneric
539 found = dump.find("ImportMemGeneric");
540 BOOST_TEST(found == std::string::npos);
541
542 // Use memory import between backends
543 BOOST_TEST((layer3->GetType() == LayerType::MemCopy));
544
545 // Check output is as expected
546 BOOST_TEST(outputData == expectedOutput);
547}
548
Narumol Prangnawarata2493a02020-08-19 14:39:07 +0100549BOOST_AUTO_TEST_CASE(FallbackDisableImportFromCpuAcc)
550{
551 using namespace armnn;
552
553 // Create a mock backend object
554 MockImportBackendInitialiser initialiser; // Register the Mock Backend
555 auto backendObjPtr = CreateBackendObject(MockImportBackendId());
556 BOOST_TEST((backendObjPtr != nullptr));
557
558 BackendIdSet backendIds = BackendRegistryInstance().GetBackendIds();
559 if (backendIds.find("MockRef") == backendIds.end())
560 {
561 std::string message = "Cannot load MockRef";
562 BOOST_FAIL(message);
563 }
564
565 // Create runtime in which test will run and allow fallback to CpuRef.
566 IRuntime::CreationOptions options;
567 IRuntimePtr runtime(IRuntime::Create(options));
568
569 // Builds up the structure of the network.
570 INetworkPtr net(INetwork::Create());
571
572 IConnectableLayer* input0 = net->AddInputLayer(0, "input0");
573 IConnectableLayer* input1 = net->AddInputLayer(1, "input1");
574 IConnectableLayer* input2 = net->AddInputLayer(2, "input2");
575 IConnectableLayer* sub = net->AddSubtractionLayer("sub");
576 IConnectableLayer* add = net->AddAdditionLayer("add");
577 IConnectableLayer* output = net->AddOutputLayer(0, "output");
578
579 input0->GetOutputSlot(0).Connect(sub->GetInputSlot(0));
580 input1->GetOutputSlot(0).Connect(sub->GetInputSlot(1));
581 input2->GetOutputSlot(0).Connect(add->GetInputSlot(0));
582 sub->GetOutputSlot(0).Connect(add->GetInputSlot(1));
583 add->GetOutputSlot(0).Connect(output->GetInputSlot(0));
584
585 TensorInfo info = TensorInfo({ 1, 2, 3, 2 }, DataType::Float32);
586
587 input0->GetOutputSlot(0).SetTensorInfo(info);
588 input1->GetOutputSlot(0).SetTensorInfo(info);
589 input2->GetOutputSlot(0).SetTensorInfo(info);
590 sub->GetOutputSlot(0).SetTensorInfo(info);
591 add->GetOutputSlot(0).SetTensorInfo(info);
592
593 // optimize the network
594 std::vector<BackendId> backends = { "MockRef", Compute::CpuAcc };
595 IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec());
596
Francis Murtagh3d2b4b22021-02-15 18:23:17 +0000597 Graph& graph = GetGraphForTesting(optNet.get());
Narumol Prangnawarata2493a02020-08-19 14:39:07 +0100598
599 armnn::Layer* const layer0 = GetFirstLayerWithName(graph, "input0");
600 armnn::Layer* const layer1 = GetFirstLayerWithName(graph, "input1");
601 armnn::Layer* const layer2 = GetFirstLayerWithName(graph, "input2");
602 armnn::Layer* const layer3 = GetFirstLayerWithName(graph, "sub");
603 armnn::Layer* const layer4 = GetFirstLayerWithName(graph, "[ sub (0) -> add (1) ]");
604 armnn::Layer* const layer5 = GetFirstLayerWithName(graph, "add");
605 armnn::Layer* const layer6 = GetFirstLayerWithName(graph, "output");
606
607 // Checks order is valid.
608 BOOST_TEST(CheckOrder(graph, layer0, layer1));
609 BOOST_TEST(CheckOrder(graph, layer1, layer2));
610 BOOST_TEST(CheckOrder(graph, layer2, layer3));
611 BOOST_TEST(CheckOrder(graph, layer3, layer4));
612 BOOST_TEST(CheckOrder(graph, layer4, layer5));
613 BOOST_TEST(CheckOrder(graph, layer5, layer6));
614
615 // Load it into the runtime. It should pass.
616 NetworkId netId;
617 std::string ignoredErrorMessage;
618 INetworkProperties networkProperties(false, false);
619
620 runtime->LoadNetwork(netId, std::move(optNet), ignoredErrorMessage, networkProperties);
621
622 // Creates structures for input & output
623 std::vector<float> inputData0
624 {
625 1.0f, 1.0f, 2.0f, 2.0f, 2.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f, 0.0f
626 };
627 std::vector<float> inputData1
628 {
629 0.0f, 1.0f, 1.0f, 2.0f, 3.0f, 3.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f
630 };
631 std::vector<float> inputData2
632 {
633 12.0f, 11.0f, 10.0f, 9.0f, 8.0f, 7.0f, 6.0f, 5.0f, 4.0f, 3.0f, 2.0f, 1.0f
634 };
635
636 std::vector<float> outputData(12);
637
638 std::vector<float> expectedOutput
639 {
640 13.0f, 11.0f, 11.0f, 9.0f, 7.0f, 7.0f, 7.0f, 5.0f, 5.0f, 3.0f, 3.0f, -5.0f
641 };
642
643 InputTensors inputTensors
644 {
645 { 0, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData0.data()) },
646 { 1, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 1), inputData1.data()) },
647 { 2, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 2), inputData2.data()) }
648 };
649 OutputTensors outputTensors
650 {
651 { 0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data()) }
652 };
653
654 runtime->GetProfiler(netId)->EnableProfiling(true);
655
656 // Do the inference
657 runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
658
659 // Retrieve the Profiler.Print() output to get the workload execution
660 ProfilerManager& profilerManager = armnn::ProfilerManager::GetInstance();
661 std::stringstream ss;
662 profilerManager.GetProfiler()->Print(ss);;
663 std::string dump = ss.str();
664
665 // Contains CopyMemGeneric between the backends
666 std::size_t found = dump.find("CopyMemGeneric");
667 BOOST_TEST(found != std::string::npos);
668
669 // Does not contain ImportMemGeneric
670 found = dump.find("ImportMemGeneric");
671 BOOST_TEST(found == std::string::npos);
672
673 // Use memory import between backends
674 BOOST_TEST((layer4->GetType() == LayerType::MemCopy));
675
676 // Check output is as expected
677 BOOST_TEST(outputData == expectedOutput);
678}
679
Narumol Prangnawarat265e53e2020-10-30 16:06:55 +0000680#if defined(ARMCOMPUTECL_ENABLED)
681BOOST_AUTO_TEST_CASE(NeonImportEnabledFallbackToCl)
682{
683 using namespace armnn;
684
685 IRuntime::CreationOptions options;
686 IRuntimePtr runtime(IRuntime::Create(options));
687
688 // Builds up the structure of the network.
689 INetworkPtr net(INetwork::Create());
690
691 IConnectableLayer* input0 = net->AddInputLayer(0, "input0");
692 IConnectableLayer* input1 = net->AddInputLayer(1, "input1");
693 IConnectableLayer* input2 = net->AddInputLayer(2, "input2");
694 IConnectableLayer* add = net->AddAdditionLayer("add");
695 IConnectableLayer* sub = net->AddSubtractionLayer("sub");
696 IConnectableLayer* output = net->AddOutputLayer(0, "output");
697
698 input0->GetOutputSlot(0).Connect(add->GetInputSlot(0));
699 input1->GetOutputSlot(0).Connect(add->GetInputSlot(1));
700 input2->GetOutputSlot(0).Connect(sub->GetInputSlot(0));
701 add->GetOutputSlot(0).Connect(sub->GetInputSlot(1));
702 sub->GetOutputSlot(0).Connect(output->GetInputSlot(0));
703
704 TensorInfo info = TensorInfo({ 1, 2, 3, 2 }, DataType::Float32);
705
706 input0->GetOutputSlot(0).SetTensorInfo(info);
707 input1->GetOutputSlot(0).SetTensorInfo(info);
708 input2->GetOutputSlot(0).SetTensorInfo(info);
709 add->GetOutputSlot(0).SetTensorInfo(info);
710 sub->GetOutputSlot(0).SetTensorInfo(info);
711
712 std::vector<BackendId> backends = { Compute::CpuAcc, Compute::GpuAcc };
713 // Use BackendSelectionHint to specify GpuAcc for Subtraction layer
714 sub->BackendSelectionHint(backends[1]);
715
716 // optimize the network
717 OptimizerOptions optOptions;
718 optOptions.m_ImportEnabled = true;
719 IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec(), optOptions);
720
Francis Murtagh3d2b4b22021-02-15 18:23:17 +0000721 Graph& graph = GetGraphForTesting(optNet.get());
Narumol Prangnawarat265e53e2020-10-30 16:06:55 +0000722
723 armnn::Layer* const layer0 = GetFirstLayerWithName(graph, "input0");
724 armnn::Layer* const layer1 = GetFirstLayerWithName(graph, "input1");
725 armnn::Layer* const layer2 = GetFirstLayerWithName(graph, "input2");
726 armnn::Layer* const layer3 = GetFirstLayerWithName(graph, "add");
727 armnn::Layer* const layer4 = GetFirstLayerWithName(graph, "[ add (0) -> sub (1) ]");
728 armnn::Layer* const layer5 = GetFirstLayerWithName(graph, "sub");
729 armnn::Layer* const layer6 = GetFirstLayerWithName(graph, "output");
730
731 // Checks order is valid.
732 BOOST_TEST(CheckOrder(graph, layer0, layer1));
733 BOOST_TEST(CheckOrder(graph, layer1, layer2));
734 BOOST_TEST(CheckOrder(graph, layer2, layer3));
735 BOOST_TEST(CheckOrder(graph, layer3, layer4));
736 BOOST_TEST(CheckOrder(graph, layer4, layer5));
737 BOOST_TEST(CheckOrder(graph, layer5, layer6));
738
739 // Use memory import between backends
740 BOOST_TEST((layer4->GetType() == LayerType::MemCopy));
741
742 // Correctly use backend hint
743 BOOST_TEST((layer5->GetBackendId() == Compute::GpuAcc ));
744
745 // Load it into the runtime. It should pass.
746 NetworkId netId;
747 std::string ignoredErrorMessage;
748 INetworkProperties networkProperties(true, true);
749
750 runtime->LoadNetwork(netId, std::move(optNet), ignoredErrorMessage, networkProperties);
751
752 // Creates structures for input & output
753 std::vector<float> inputData0
754 {
755 1.0f, 1.0f, 2.0f, 2.0f, 2.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f, 6.0f
756 };
757 std::vector<float> inputData1
758 {
759 0.0f, 1.0f, 1.0f, 2.0f, 3.0f, 3.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f
760 };
761 std::vector<float> inputData2
762 {
763 12.0f, 11.0f, 10.0f, 9.0f, 8.0f, 7.0f, 6.0f, 5.0f, 4.0f, 3.0f, 2.0f, 1.0f
764 };
765
766 std::vector<float> outputData(12);
767
768 std::vector<float> expectedOutput
769 {
770 11.0f, 9.0f, 7.0f, 5.0f, 3.0f, 1.0f, -1.0f, -3.0f, -5.0f, -7.0f, -9.0f, -11.0f
771 };
772
773 InputTensors inputTensors
774 {
775 { 0, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData0.data()) },
776 { 1, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 1), inputData1.data()) },
777 { 2, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 2), inputData2.data()) }
778 };
779 OutputTensors outputTensors
780 {
781 { 0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data()) }
782 };
783
784 runtime->GetProfiler(netId)->EnableProfiling(true);
785
786 // Do the inference
787 runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
788
789 // Retrieve the Profiler.Print() output to get the workload execution
790 ProfilerManager& profilerManager = armnn::ProfilerManager::GetInstance();
791 std::stringstream ss;
792 profilerManager.GetProfiler()->Print(ss);;
793 std::string dump = ss.str();
794
795 // Executed Subtraction using GpuAcc
796 std::size_t found = dump.find("ClSubtractionWorkload_Execute");
797 BOOST_TEST(found != std::string::npos);
798
799 // Contain CopyMemGeneric
800 found = dump.find("CopyMemGeneric");
801 BOOST_TEST(found != std::string::npos);
802
803 // Check output is as expected
804 BOOST_TEST(outputData == expectedOutput);
805}
806
807BOOST_AUTO_TEST_CASE(NeonImportDisabledFallbackToCl)
808{
809 using namespace armnn;
810
811 IRuntime::CreationOptions options;
812 IRuntimePtr runtime(IRuntime::Create(options));
813
814 // Builds up the structure of the network.
815 INetworkPtr net(INetwork::Create());
816
817 IConnectableLayer* input0 = net->AddInputLayer(0, "input0");
818 IConnectableLayer* input1 = net->AddInputLayer(1, "input1");
819 IConnectableLayer* input2 = net->AddInputLayer(2, "input2");
820 IConnectableLayer* add = net->AddAdditionLayer("add");
821 IConnectableLayer* sub = net->AddSubtractionLayer("sub");
822 IConnectableLayer* output = net->AddOutputLayer(0, "output");
823
824 input0->GetOutputSlot(0).Connect(add->GetInputSlot(0));
825 input1->GetOutputSlot(0).Connect(add->GetInputSlot(1));
826 input2->GetOutputSlot(0).Connect(sub->GetInputSlot(0));
827 add->GetOutputSlot(0).Connect(sub->GetInputSlot(1));
828 sub->GetOutputSlot(0).Connect(output->GetInputSlot(0));
829
830 TensorInfo info = TensorInfo({ 1, 2, 3, 2 }, DataType::Float32);
831
832 input0->GetOutputSlot(0).SetTensorInfo(info);
833 input1->GetOutputSlot(0).SetTensorInfo(info);
834 input2->GetOutputSlot(0).SetTensorInfo(info);
835 add->GetOutputSlot(0).SetTensorInfo(info);
836 sub->GetOutputSlot(0).SetTensorInfo(info);
837
838 std::vector<BackendId> backends = { Compute::CpuAcc, Compute::GpuAcc };
839 // Use BackendSelectionHint to specify GpuAcc for Subtraction layer
840 sub->BackendSelectionHint(backends[1]);
841
842 // optimize the network
843 OptimizerOptions optOptions;
844 IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec(), optOptions);
845
Francis Murtagh3d2b4b22021-02-15 18:23:17 +0000846 Graph& graph = GetGraphForTesting(optNet.get());
Narumol Prangnawarat265e53e2020-10-30 16:06:55 +0000847
848 armnn::Layer* const layer0 = GetFirstLayerWithName(graph, "input0");
849 armnn::Layer* const layer1 = GetFirstLayerWithName(graph, "input1");
850 armnn::Layer* const layer2 = GetFirstLayerWithName(graph, "input2");
851 armnn::Layer* const layer3 = GetFirstLayerWithName(graph, "add");
852 armnn::Layer* const layer4 = GetFirstLayerWithName(graph, "[ add (0) -> sub (1) ]");
853 armnn::Layer* const layer5 = GetFirstLayerWithName(graph, "sub");
854 armnn::Layer* const layer6 = GetFirstLayerWithName(graph, "output");
855
856 // Checks order is valid.
857 BOOST_TEST(CheckOrder(graph, layer0, layer1));
858 BOOST_TEST(CheckOrder(graph, layer1, layer2));
859 BOOST_TEST(CheckOrder(graph, layer2, layer3));
860 BOOST_TEST(CheckOrder(graph, layer3, layer4));
861 BOOST_TEST(CheckOrder(graph, layer4, layer5));
862 BOOST_TEST(CheckOrder(graph, layer5, layer6));
863
864 // Use memory import between backends
865 BOOST_TEST((layer4->GetType() == LayerType::MemCopy));
866
867 // Correctly use backend hint
868 BOOST_TEST((layer5->GetBackendId() == Compute::GpuAcc ));
869
870 // Load it into the runtime. It should pass.
871 NetworkId netId;
872 runtime->LoadNetwork(netId, std::move(optNet));
873
874 // Creates structures for input & output
875 std::vector<float> inputData0
876 {
877 1.0f, 1.0f, 2.0f, 2.0f, 2.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f, 6.0f
878 };
879 std::vector<float> inputData1
880 {
881 0.0f, 1.0f, 1.0f, 2.0f, 3.0f, 3.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f
882 };
883 std::vector<float> inputData2
884 {
885 12.0f, 11.0f, 10.0f, 9.0f, 8.0f, 7.0f, 6.0f, 5.0f, 4.0f, 3.0f, 2.0f, 1.0f
886 };
887
888 std::vector<float> outputData(12);
889
890 std::vector<float> expectedOutput
891 {
892 11.0f, 9.0f, 7.0f, 5.0f, 3.0f, 1.0f, -1.0f, -3.0f, -5.0f, -7.0f, -9.0f, -11.0f
893 };
894
895 InputTensors inputTensors
896 {
897 { 0, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData0.data()) },
898 { 1, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 1), inputData1.data()) },
899 { 2, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 2), inputData2.data()) }
900 };
901 OutputTensors outputTensors
902 {
903 { 0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data()) }
904 };
905
906 runtime->GetProfiler(netId)->EnableProfiling(true);
907
908 // Do the inference
909 runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
910
911 // Retrieve the Profiler.Print() output to get the workload execution
912 ProfilerManager& profilerManager = armnn::ProfilerManager::GetInstance();
913 std::stringstream ss;
914 profilerManager.GetProfiler()->Print(ss);;
915 std::string dump = ss.str();
916
917 // Executed Subtraction using GpuAcc
918 std::size_t found = dump.find("ClSubtractionWorkload_Execute");
919 BOOST_TEST(found != std::string::npos);
920
921 // Contain CopyMemGeneric
922 found = dump.find("CopyMemGeneric");
923 BOOST_TEST(found != std::string::npos);
924
925 // Check output is as expected
926 BOOST_TEST(outputData == expectedOutput);
927}
928
929BOOST_AUTO_TEST_CASE(NeonImportEnabledFallbackSubgraphToCl)
930{
931 using namespace armnn;
932
933 IRuntime::CreationOptions options;
934 IRuntimePtr runtime(IRuntime::Create(options));
935
936 // Builds up the structure of the network.
937 INetworkPtr net(INetwork::Create());
938
939 Pooling2dDescriptor desc;
940
941 IConnectableLayer* input0 = net->AddInputLayer(0, "input0");
942 IConnectableLayer* input1 = net->AddInputLayer(1, "input1");
943 IConnectableLayer* input2 = net->AddInputLayer(2, "input2");
944 IConnectableLayer* add = net->AddAdditionLayer("add");
945 IConnectableLayer* sub = net->AddSubtractionLayer("sub");
946 IConnectableLayer* pooling = net->AddPooling2dLayer(desc, "pooling");
947 IConnectableLayer* output = net->AddOutputLayer(0, "output");
948
949 input0->GetOutputSlot(0).Connect(add->GetInputSlot(0));
950 input1->GetOutputSlot(0).Connect(add->GetInputSlot(1));
951 input2->GetOutputSlot(0).Connect(sub->GetInputSlot(0));
952 add->GetOutputSlot(0).Connect(sub->GetInputSlot(1));
953 sub->GetOutputSlot(0).Connect(pooling->GetInputSlot(0));
954 pooling->GetOutputSlot(0).Connect(output->GetInputSlot(0));
955
956 TensorInfo info = TensorInfo({ 1, 2, 3, 2 }, DataType::Float32);
957 TensorInfo poolingInfo = TensorInfo({ 1, 2, 1, 1 }, DataType::Float32);
958
959 input0->GetOutputSlot(0).SetTensorInfo(info);
960 input1->GetOutputSlot(0).SetTensorInfo(info);
961 input2->GetOutputSlot(0).SetTensorInfo(info);
962 add->GetOutputSlot(0).SetTensorInfo(info);
963 sub->GetOutputSlot(0).SetTensorInfo(info);
964 pooling->GetOutputSlot(0).SetTensorInfo(poolingInfo);
965
966 std::vector<BackendId> backends = { Compute::CpuAcc, Compute::GpuAcc };
967 // Use BackendSelectionHint to specify GpuAcc for Subtraction layer
968 sub->BackendSelectionHint(backends[1]);
969
970 // optimize the network
971 OptimizerOptions optOptions;
972 optOptions.m_ImportEnabled = true;
973 IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec(), optOptions);
974
Francis Murtagh3d2b4b22021-02-15 18:23:17 +0000975 Graph& graph = GetGraphForTesting(optNet.get());
Narumol Prangnawarat265e53e2020-10-30 16:06:55 +0000976
977 armnn::Layer* const layer0 = GetFirstLayerWithName(graph, "input0");
978 armnn::Layer* const layer1 = GetFirstLayerWithName(graph, "input1");
979 armnn::Layer* const layer2 = GetFirstLayerWithName(graph, "input2");
980 armnn::Layer* const layer3 = GetFirstLayerWithName(graph, "add");
981 armnn::Layer* const layer4 = GetFirstLayerWithName(graph, "[ add (0) -> sub (1) ]");
982 armnn::Layer* const layer5 = GetFirstLayerWithName(graph, "sub");
983 armnn::Layer* const layer6 = GetFirstLayerWithName(graph, "[ sub (0) -> pooling (0) ]");
984 armnn::Layer* const layer7 = GetFirstLayerWithName(graph, "pooling");
985 armnn::Layer* const layer8 = GetFirstLayerWithName(graph, "output");
986
987 // Checks order is valid.
988 BOOST_TEST(CheckOrder(graph, layer0, layer1));
989 BOOST_TEST(CheckOrder(graph, layer1, layer2));
990 BOOST_TEST(CheckOrder(graph, layer2, layer3));
991 BOOST_TEST(CheckOrder(graph, layer3, layer4));
992 BOOST_TEST(CheckOrder(graph, layer4, layer5));
993 BOOST_TEST(CheckOrder(graph, layer5, layer6));
994 BOOST_TEST(CheckOrder(graph, layer6, layer7));
995 BOOST_TEST(CheckOrder(graph, layer7, layer8));
996
997 // Use memory import between backends
998 BOOST_TEST((layer4->GetType() == LayerType::MemCopy));
999 BOOST_TEST((layer6->GetType() == LayerType::MemCopy));
1000
1001 // Correctly use backend hint
1002 BOOST_TEST((layer5->GetBackendId() == Compute::GpuAcc ));
1003
1004 // Load it into the runtime. It should pass.
1005 NetworkId netId;
1006 std::string ignoredErrorMessage;
1007 INetworkProperties networkProperties(true, true);
1008
1009 runtime->LoadNetwork(netId, std::move(optNet), ignoredErrorMessage, networkProperties);
1010
1011 // Creates structures for input & output
1012 std::vector<float> inputData0
1013 {
1014 1.0f, 1.0f, 2.0f, 2.0f, 2.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f, 6.0f
1015 };
1016 std::vector<float> inputData1
1017 {
1018 0.0f, 1.0f, 1.0f, 2.0f, 3.0f, 3.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f
1019 };
1020 std::vector<float> inputData2
1021 {
1022 12.0f, 11.0f, 10.0f, 9.0f, 8.0f, 7.0f, 6.0f, 5.0f, 4.0f, 3.0f, 2.0f, 1.0f
1023 };
1024
1025 std::vector<float> outputData(2);
1026
1027 std::vector<float> expectedOutput{ 11.0f, -1.0f };
1028
1029 InputTensors inputTensors
1030 {
1031 { 0, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData0.data()) },
1032 { 1, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 1), inputData1.data()) },
1033 { 2, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 2), inputData2.data()) }
1034 };
1035 OutputTensors outputTensors
1036 {
1037 { 0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data()) }
1038 };
1039
1040 runtime->GetProfiler(netId)->EnableProfiling(true);
1041
1042 // Do the inference
1043 runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
1044
1045 // Retrieve the Profiler.Print() output to get the workload execution
1046 ProfilerManager& profilerManager = armnn::ProfilerManager::GetInstance();
1047 std::stringstream ss;
1048 profilerManager.GetProfiler()->Print(ss);;
1049 std::string dump = ss.str();
1050
1051 // Executed Subtraction using GpuAcc
1052 std::size_t found = dump.find("ClSubtractionWorkload_Execute");
1053 BOOST_TEST(found != std::string::npos);
1054
1055 // Correctly switch back to CpuAcc
1056 found = dump.find("NeonPooling2dWorkload_Execute");
1057 BOOST_TEST(found != std::string::npos);
1058
1059 // Contain CopyMemGeneric
1060 found = dump.find("CopyMemGeneric");
1061 BOOST_TEST(found != std::string::npos);
1062
1063 // Contains SyncMemGeneric for output
1064 found = dump.find("SyncMemGeneric");
1065 BOOST_TEST(found != std::string::npos);
1066
1067 // Check output is as expected
1068 BOOST_TEST(outputData == expectedOutput);
1069}
1070
1071BOOST_AUTO_TEST_CASE(NeonImportDisableFallbackSubgraphToCl)
1072{
1073 using namespace armnn;
1074
1075 IRuntime::CreationOptions options;
1076 IRuntimePtr runtime(IRuntime::Create(options));
1077
1078 // Builds up the structure of the network.
1079 INetworkPtr net(INetwork::Create());
1080
1081 Pooling2dDescriptor desc;
1082
1083 IConnectableLayer* input0 = net->AddInputLayer(0, "input0");
1084 IConnectableLayer* input1 = net->AddInputLayer(1, "input1");
1085 IConnectableLayer* input2 = net->AddInputLayer(2, "input2");
1086 IConnectableLayer* add = net->AddAdditionLayer("add");
1087 IConnectableLayer* sub = net->AddSubtractionLayer("sub");
1088 IConnectableLayer* pooling = net->AddPooling2dLayer(desc, "pooling");
1089 IConnectableLayer* output = net->AddOutputLayer(0, "output");
1090
1091 input0->GetOutputSlot(0).Connect(add->GetInputSlot(0));
1092 input1->GetOutputSlot(0).Connect(add->GetInputSlot(1));
1093 input2->GetOutputSlot(0).Connect(sub->GetInputSlot(0));
1094 add->GetOutputSlot(0).Connect(sub->GetInputSlot(1));
1095 sub->GetOutputSlot(0).Connect(pooling->GetInputSlot(0));
1096 pooling->GetOutputSlot(0).Connect(output->GetInputSlot(0));
1097
1098 TensorInfo info = TensorInfo({ 1, 2, 3, 2 }, DataType::Float32);
1099 TensorInfo poolingInfo = TensorInfo({ 1, 2, 1, 1 }, DataType::Float32);
1100
1101 input0->GetOutputSlot(0).SetTensorInfo(info);
1102 input1->GetOutputSlot(0).SetTensorInfo(info);
1103 input2->GetOutputSlot(0).SetTensorInfo(info);
1104 add->GetOutputSlot(0).SetTensorInfo(info);
1105 sub->GetOutputSlot(0).SetTensorInfo(info);
1106 pooling->GetOutputSlot(0).SetTensorInfo(poolingInfo);
1107
1108 std::vector<BackendId> backends = { Compute::CpuAcc, Compute::GpuAcc };
1109 // Use BackendSelectionHint to specify GpuAcc for Subtraction layer
1110 sub->BackendSelectionHint(backends[1]);
1111
1112 // optimize the network
1113 OptimizerOptions optOptions;
1114 IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec(), optOptions);
1115
Francis Murtagh3d2b4b22021-02-15 18:23:17 +00001116 Graph& graph = GetGraphForTesting(optNet.get());
Narumol Prangnawarat265e53e2020-10-30 16:06:55 +00001117
1118 armnn::Layer* const layer0 = GetFirstLayerWithName(graph, "input0");
1119 armnn::Layer* const layer1 = GetFirstLayerWithName(graph, "input1");
1120 armnn::Layer* const layer2 = GetFirstLayerWithName(graph, "input2");
1121 armnn::Layer* const layer3 = GetFirstLayerWithName(graph, "add");
1122 armnn::Layer* const layer4 = GetFirstLayerWithName(graph, "[ add (0) -> sub (1) ]");
1123 armnn::Layer* const layer5 = GetFirstLayerWithName(graph, "sub");
1124 armnn::Layer* const layer6 = GetFirstLayerWithName(graph, "[ sub (0) -> pooling (0) ]");
1125 armnn::Layer* const layer7 = GetFirstLayerWithName(graph, "pooling");
1126 armnn::Layer* const layer8 = GetFirstLayerWithName(graph, "output");
1127
1128 // Checks order is valid.
1129 BOOST_TEST(CheckOrder(graph, layer0, layer1));
1130 BOOST_TEST(CheckOrder(graph, layer1, layer2));
1131 BOOST_TEST(CheckOrder(graph, layer2, layer3));
1132 BOOST_TEST(CheckOrder(graph, layer3, layer4));
1133 BOOST_TEST(CheckOrder(graph, layer4, layer5));
1134 BOOST_TEST(CheckOrder(graph, layer5, layer6));
1135 BOOST_TEST(CheckOrder(graph, layer6, layer7));
1136 BOOST_TEST(CheckOrder(graph, layer7, layer8));
1137
1138 // Use memory import between backends
1139 BOOST_TEST((layer4->GetType() == LayerType::MemCopy));
1140 BOOST_TEST((layer6->GetType() == LayerType::MemCopy));
1141
1142 // Correctly use backend hint
1143 BOOST_TEST((layer5->GetBackendId() == Compute::GpuAcc ));
1144
1145 // Load it into the runtime. It should pass.
1146 NetworkId netId;
1147 runtime->LoadNetwork(netId, std::move(optNet));
1148
1149 // Creates structures for input & output
1150 std::vector<float> inputData0
1151 {
1152 1.0f, 1.0f, 2.0f, 2.0f, 2.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f, 6.0f
1153 };
1154 std::vector<float> inputData1
1155 {
1156 0.0f, 1.0f, 1.0f, 2.0f, 3.0f, 3.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f
1157 };
1158 std::vector<float> inputData2
1159 {
1160 12.0f, 11.0f, 10.0f, 9.0f, 8.0f, 7.0f, 6.0f, 5.0f, 4.0f, 3.0f, 2.0f, 1.0f
1161 };
1162
1163 std::vector<float> outputData(2);
1164
1165 std::vector<float> expectedOutput{ 11.0f, -1.0f };
1166
1167 InputTensors inputTensors
1168 {
1169 { 0, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData0.data()) },
1170 { 1, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 1), inputData1.data()) },
1171 { 2, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 2), inputData2.data()) }
1172 };
1173 OutputTensors outputTensors
1174 {
1175 { 0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data()) }
1176 };
1177
1178 runtime->GetProfiler(netId)->EnableProfiling(true);
1179
1180 // Do the inference
1181 runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
1182
1183 // Retrieve the Profiler.Print() output to get the workload execution
1184 ProfilerManager& profilerManager = armnn::ProfilerManager::GetInstance();
1185 std::stringstream ss;
1186 profilerManager.GetProfiler()->Print(ss);;
1187 std::string dump = ss.str();
1188
1189 // Executed Subtraction using GpuAcc
1190 std::size_t found = dump.find("ClSubtractionWorkload_Execute");
1191 BOOST_TEST(found != std::string::npos);
1192
1193 // Correctly switch back to CpuAcc
1194 found = dump.find("NeonPooling2dWorkload_Execute");
1195 BOOST_TEST(found != std::string::npos);
1196
1197 // Contain CopyMemGeneric
1198 found = dump.find("CopyMemGeneric");
1199 BOOST_TEST(found != std::string::npos);
1200
1201 // Check output is as expected
1202 BOOST_TEST(outputData == expectedOutput);
1203}
1204#endif
1205
Narumol Prangnawaratb8d771a2020-08-14 11:51:12 +01001206BOOST_AUTO_TEST_SUITE_END()