blob: fd7fbbc4d5d6a659442dc64072eaa75dc56f7f86 [file] [log] [blame]
Narumol Prangnawaratb8d771a2020-08-14 11:51:12 +01001//
2// Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
3// SPDX-License-Identifier: MIT
4//
5
6#include <backendsCommon/test/CommonTestUtils.hpp>
7#include <backendsCommon/test/mockBackend/MockImportBackend.hpp>
8
9#include <test/GraphUtils.hpp>
10
11#include <boost/test/unit_test.hpp>
12
13BOOST_AUTO_TEST_SUITE(NeonFallback)
14
Narumol Prangnawaratb8d771a2020-08-14 11:51:12 +010015BOOST_AUTO_TEST_CASE(FallbackImportToCpuAcc)
16{
17 using namespace armnn;
18
19 // Create a mock backend object
20 MockImportBackendInitialiser initialiser; // Register the Mock Backend
21 auto backendObjPtr = CreateBackendObject(MockImportBackendId());
22 BOOST_TEST((backendObjPtr != nullptr));
23
24 BackendIdSet backendIds = BackendRegistryInstance().GetBackendIds();
25 if (backendIds.find("MockRef") == backendIds.end())
26 {
27 std::string message = "Cannot load MockRef";
28 BOOST_FAIL(message);
29 }
30
31 // Create runtime in which test will run and allow fallback to CpuRef.
32 IRuntime::CreationOptions options;
33 IRuntimePtr runtime(IRuntime::Create(options));
34
35 // Builds up the structure of the network.
36 INetworkPtr net(INetwork::Create());
37
38 IConnectableLayer* input0 = net->AddInputLayer(0, "input0");
39 IConnectableLayer* input1 = net->AddInputLayer(1, "input1");
40 IConnectableLayer* input2 = net->AddInputLayer(2, "input2");
41 IConnectableLayer* add = net->AddAdditionLayer("add");
42 IConnectableLayer* sub = net->AddSubtractionLayer("sub");
43 IConnectableLayer* output = net->AddOutputLayer(0, "output");
44
45 input0->GetOutputSlot(0).Connect(add->GetInputSlot(0));
46 input1->GetOutputSlot(0).Connect(add->GetInputSlot(1));
47 input2->GetOutputSlot(0).Connect(sub->GetInputSlot(0));
48 add->GetOutputSlot(0).Connect(sub->GetInputSlot(1));
49 sub->GetOutputSlot(0).Connect(output->GetInputSlot(0));
50
51 TensorInfo info = TensorInfo({ 1, 2, 3, 2 }, DataType::Float32);
52
53 input0->GetOutputSlot(0).SetTensorInfo(info);
54 input1->GetOutputSlot(0).SetTensorInfo(info);
55 input2->GetOutputSlot(0).SetTensorInfo(info);
56 add->GetOutputSlot(0).SetTensorInfo(info);
57 sub->GetOutputSlot(0).SetTensorInfo(info);
58
59 // optimize the network
60 std::vector<BackendId> backends = { "MockRef", Compute::CpuAcc };
Narumol Prangnawarata2493a02020-08-19 14:39:07 +010061 OptimizerOptions optOptions;
62 optOptions.m_ImportEnabled = true;
63 IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec(), optOptions);
Narumol Prangnawaratb8d771a2020-08-14 11:51:12 +010064
65 OptimizedNetwork* optNetObjPtr = PolymorphicDowncast<OptimizedNetwork*>(optNet.get());
66 Graph& graph = optNetObjPtr->GetGraph();
67
68 armnn::Layer* const layer0 = GetFirstLayerWithName(graph, "input0");
69 armnn::Layer* const layer1 = GetFirstLayerWithName(graph, "input1");
70 armnn::Layer* const layer2 = GetFirstLayerWithName(graph, "input2");
71 armnn::Layer* const layer3 = GetFirstLayerWithName(graph, "add");
72 armnn::Layer* const layer4 = GetFirstLayerWithName(graph, "[ add (0) -> sub (1) ]");
73 armnn::Layer* const layer5 = GetFirstLayerWithName(graph, "sub");
74 armnn::Layer* const layer6 = GetFirstLayerWithName(graph, "output");
75
76 // Checks order is valid.
77 BOOST_TEST(CheckOrder(graph, layer0, layer1));
78 BOOST_TEST(CheckOrder(graph, layer1, layer2));
79 BOOST_TEST(CheckOrder(graph, layer2, layer3));
80 BOOST_TEST(CheckOrder(graph, layer3, layer4));
81 BOOST_TEST(CheckOrder(graph, layer4, layer5));
82 BOOST_TEST(CheckOrder(graph, layer5, layer6));
83
84 // Load it into the runtime. It should pass.
85 NetworkId netId;
86 std::string ignoredErrorMessage;
87 INetworkProperties networkProperties(true, true);
88
89 runtime->LoadNetwork(netId, std::move(optNet), ignoredErrorMessage, networkProperties);
90
91 // Creates structures for input & output
92 std::vector<float> inputData0
93 {
94 1.0f, 1.0f, 2.0f, 2.0f, 2.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f, 6.0f
95 };
96 std::vector<float> inputData1
97 {
98 0.0f, 1.0f, 1.0f, 2.0f, 3.0f, 3.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f
99 };
100 std::vector<float> inputData2
101 {
102 12.0f, 11.0f, 10.0f, 9.0f, 8.0f, 7.0f, 6.0f, 5.0f, 4.0f, 3.0f, 2.0f, 1.0f
103 };
104
105 std::vector<float> outputData(12);
106
107 std::vector<float> expectedOutput
108 {
109 11.0f, 9.0f, 7.0f, 5.0f, 3.0f, 1.0f, -1.0f, -3.0f, -5.0f, -7.0f, -9.0f, -11.0f
110 };
111
112 InputTensors inputTensors
113 {
114 { 0, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData0.data()) },
115 { 1, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 1), inputData1.data()) },
116 { 2, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 2), inputData2.data()) }
117 };
118 OutputTensors outputTensors
119 {
120 { 0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data()) }
121 };
122
123 runtime->GetProfiler(netId)->EnableProfiling(true);
124
125 // Do the inference
126 runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
127
128 // Retrieve the Profiler.Print() output to get the workload execution
129 ProfilerManager& profilerManager = armnn::ProfilerManager::GetInstance();
130 std::stringstream ss;
131 profilerManager.GetProfiler()->Print(ss);;
132 std::string dump = ss.str();
133
134 // Contains ImportMemGeneric
135 std::size_t found = dump.find("ImportMemGeneric");
136 BOOST_TEST(found != std::string::npos);
137
138 // Contains SyncMemGeneric
139 found = dump.find("SyncMemGeneric");
140 BOOST_TEST(found != std::string::npos);
141
142 // Does not contain CopyMemGeneric
143 found = dump.find("CopyMemGeneric");
144 BOOST_TEST(found == std::string::npos);
145
146 // Use memory import between backends
147 BOOST_TEST((layer4->GetType() == LayerType::MemImport));
148
149 // Check output is as expected
150 BOOST_TEST(outputData == expectedOutput);
151}
152
153BOOST_AUTO_TEST_CASE(FallbackPaddingCopyToCpuAcc)
154{
155 using namespace armnn;
156
157 // Create a mock backend object
158 MockImportBackendInitialiser initialiser; // Register the Mock Backend
159 auto backendObjPtr = CreateBackendObject(MockImportBackendId());
160 BOOST_TEST((backendObjPtr != nullptr));
161
162 BackendIdSet backendIds = BackendRegistryInstance().GetBackendIds();
163 if (backendIds.find("MockRef") == backendIds.end())
164 {
165 std::string message = "Cannot load MockRef";
166 BOOST_FAIL(message);
167 }
168
169 // Create runtime in which test will run and allow fallback to CpuRef.
170 IRuntime::CreationOptions options;
171 IRuntimePtr runtime(IRuntime::Create(options));
172
173 // Builds up the structure of the network.
174 INetworkPtr net(INetwork::Create());
175
176 Pooling2dDescriptor desc;
177
178 IConnectableLayer* input0 = net->AddInputLayer(0, "input0");
179 IConnectableLayer* input1 = net->AddInputLayer(1, "input1");
180 IConnectableLayer* add = net->AddAdditionLayer("add");
181 IConnectableLayer* pooling = net->AddPooling2dLayer(desc, "pooling");
182 IConnectableLayer* output = net->AddOutputLayer(0, "output");
183
184 input0->GetOutputSlot(0).Connect(add->GetInputSlot(0));
185 input1->GetOutputSlot(0).Connect(add->GetInputSlot(1));
186 add->GetOutputSlot(0).Connect(pooling->GetInputSlot(0));
187 pooling->GetOutputSlot(0).Connect(output->GetInputSlot(0));
188
189 TensorInfo info = TensorInfo({ 1, 2, 3, 2 }, DataType::Float32);
190 TensorInfo poolingInfo = TensorInfo({ 1, 2, 1, 1 }, DataType::Float32);
191
192 input0->GetOutputSlot(0).SetTensorInfo(info);
193 input1->GetOutputSlot(0).SetTensorInfo(info);
194 add->GetOutputSlot(0).SetTensorInfo(info);
195 pooling->GetOutputSlot(0).SetTensorInfo(poolingInfo);
196
197 // optimize the network
198 std::vector<BackendId> backends = { "MockRef", Compute::CpuAcc };
Narumol Prangnawarata2493a02020-08-19 14:39:07 +0100199 OptimizerOptions optOptions;
200 optOptions.m_ImportEnabled = true;
201 IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec(), optOptions);
Narumol Prangnawaratb8d771a2020-08-14 11:51:12 +0100202
203 OptimizedNetwork* optNetObjPtr = PolymorphicDowncast<OptimizedNetwork*>(optNet.get());
204 Graph& graph = optNetObjPtr->GetGraph();
205
206 armnn::Layer* const layer0 = GetFirstLayerWithName(graph, "input0");
207 armnn::Layer* const layer1 = GetFirstLayerWithName(graph, "input1");
208 armnn::Layer* const layer2 = GetFirstLayerWithName(graph, "add");
209 armnn::Layer* const layer3 = GetFirstLayerWithName(graph, "[ add (0) -> pooling (0) ]");
210 armnn::Layer* const layer4 = GetFirstLayerWithName(graph, "pooling");
211 armnn::Layer* const layer5 = GetFirstLayerWithName(graph, "output");
212
213 // Checks order is valid.
214 BOOST_TEST(CheckOrder(graph, layer0, layer1));
215 BOOST_TEST(CheckOrder(graph, layer1, layer2));
216 BOOST_TEST(CheckOrder(graph, layer2, layer3));
217 BOOST_TEST(CheckOrder(graph, layer3, layer4));
218 BOOST_TEST(CheckOrder(graph, layer4, layer5));
219
220 // Load it into the runtime. It should pass.
221 NetworkId netId;
222 std::string ignoredErrorMessage;
223 INetworkProperties networkProperties(true, true);
224
225 runtime->LoadNetwork(netId, std::move(optNet), ignoredErrorMessage, networkProperties);
226
227 // Creates structures for input & output
228 std::vector<float> inputData0
229 {
230 1.0f, 1.0f, 2.0f, 2.0f, 2.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f, 6.0f
231 };
232 std::vector<float> inputData1
233 {
234 0.0f, 1.0f, 1.0f, 2.0f, 3.0f, 3.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f
235 };
236
237 std::vector<float> outputData(2);
238
239 std::vector<float> expectedOutput
240 {
241 6.0f, 12.0f
242 };
243
244 InputTensors inputTensors
245 {
246 { 0, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData0.data()) },
247 { 1, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 1), inputData1.data()) }
248 };
249 OutputTensors outputTensors
250 {
251 { 0, armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data()) }
252 };
253
254 runtime->GetProfiler(netId)->EnableProfiling(true);
255
256 // Do the inference
257 runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
258
259 // Retrieve the Profiler.Print() output to get the workload execution
260 ProfilerManager& profilerManager = armnn::ProfilerManager::GetInstance();
261 std::stringstream ss;
262 profilerManager.GetProfiler()->Print(ss);;
263 std::string dump = ss.str();
264
265 // Contains CopyMemGeneric between the backends
266 std::size_t found = dump.find("CopyMemGeneric");
267 BOOST_TEST(found != std::string::npos);
268
269 // Contains SyncMemGeneric for the output
270 found = dump.find("SyncMemGeneric");
271 BOOST_TEST(found != std::string::npos);
272
273 // Does not contain ImportMemGeneric
274 found = dump.find("ImportMemGeneric");
275 BOOST_TEST(found == std::string::npos);
276
277 // Use memory import between backends
278 BOOST_TEST((layer3->GetType() == LayerType::MemCopy));
279
280 // Check output is as expected
281 BOOST_TEST(outputData == expectedOutput);
282}
283
284BOOST_AUTO_TEST_CASE(FallbackImportFromCpuAcc)
285{
286 using namespace armnn;
287
288 // Create a mock backend object
289 MockImportBackendInitialiser initialiser; // Register the Mock Backend
290 auto backendObjPtr = CreateBackendObject(MockImportBackendId());
291 BOOST_TEST((backendObjPtr != nullptr));
292
293 BackendIdSet backendIds = BackendRegistryInstance().GetBackendIds();
294 if (backendIds.find("MockRef") == backendIds.end())
295 {
296 std::string message = "Cannot load MockRef";
297 BOOST_FAIL(message);
298 }
299
300 // Create runtime in which test will run and allow fallback to CpuRef.
301 IRuntime::CreationOptions options;
302 IRuntimePtr runtime(IRuntime::Create(options));
303
304 // Builds up the structure of the network.
305 INetworkPtr net(INetwork::Create());
306
307 IConnectableLayer* input0 = net->AddInputLayer(0, "input0");
308 IConnectableLayer* input1 = net->AddInputLayer(1, "input1");
309 IConnectableLayer* input2 = net->AddInputLayer(2, "input2");
310 IConnectableLayer* sub = net->AddSubtractionLayer("sub");
311 IConnectableLayer* add = net->AddAdditionLayer("add");
312 IConnectableLayer* output = net->AddOutputLayer(0, "output");
313
314 input0->GetOutputSlot(0).Connect(sub->GetInputSlot(0));
315 input1->GetOutputSlot(0).Connect(sub->GetInputSlot(1));
316 input2->GetOutputSlot(0).Connect(add->GetInputSlot(0));
317 sub->GetOutputSlot(0).Connect(add->GetInputSlot(1));
318 add->GetOutputSlot(0).Connect(output->GetInputSlot(0));
319
320 TensorInfo info = TensorInfo({ 1, 2, 3, 2 }, DataType::Float32);
321
322 input0->GetOutputSlot(0).SetTensorInfo(info);
323 input1->GetOutputSlot(0).SetTensorInfo(info);
324 input2->GetOutputSlot(0).SetTensorInfo(info);
325 sub->GetOutputSlot(0).SetTensorInfo(info);
326 add->GetOutputSlot(0).SetTensorInfo(info);
327
328 // optimize the network
329 std::vector<BackendId> backends = { "MockRef", Compute::CpuAcc };
Narumol Prangnawarata2493a02020-08-19 14:39:07 +0100330 OptimizerOptions optOptions;
331 optOptions.m_ImportEnabled = true;
332 IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec(), optOptions);
Narumol Prangnawaratb8d771a2020-08-14 11:51:12 +0100333
334 OptimizedNetwork* optNetObjPtr = PolymorphicDowncast<OptimizedNetwork*>(optNet.get());
335 Graph& graph = optNetObjPtr->GetGraph();
336
337 armnn::Layer* const layer0 = GetFirstLayerWithName(graph, "input0");
338 armnn::Layer* const layer1 = GetFirstLayerWithName(graph, "input1");
339 armnn::Layer* const layer2 = GetFirstLayerWithName(graph, "input2");
340 armnn::Layer* const layer3 = GetFirstLayerWithName(graph, "sub");
341 armnn::Layer* const layer4 = GetFirstLayerWithName(graph, "[ sub (0) -> add (1) ]");
342 armnn::Layer* const layer5 = GetFirstLayerWithName(graph, "add");
343 armnn::Layer* const layer6 = GetFirstLayerWithName(graph, "output");
344
345 // Checks order is valid.
346 BOOST_TEST(CheckOrder(graph, layer0, layer1));
347 BOOST_TEST(CheckOrder(graph, layer1, layer2));
348 BOOST_TEST(CheckOrder(graph, layer2, layer3));
349 BOOST_TEST(CheckOrder(graph, layer3, layer4));
350 BOOST_TEST(CheckOrder(graph, layer4, layer5));
351 BOOST_TEST(CheckOrder(graph, layer5, layer6));
352
353 // Load it into the runtime. It should pass.
354 NetworkId netId;
355 std::string ignoredErrorMessage;
356 INetworkProperties networkProperties(true, true);
357
358 runtime->LoadNetwork(netId, std::move(optNet), ignoredErrorMessage, networkProperties);
359
360 // Creates structures for input & output
361 std::vector<float> inputData0
362 {
363 1.0f, 1.0f, 2.0f, 2.0f, 2.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f, 0.0f
364 };
365 std::vector<float> inputData1
366 {
367 0.0f, 1.0f, 1.0f, 2.0f, 3.0f, 3.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f
368 };
369 std::vector<float> inputData2
370 {
371 12.0f, 11.0f, 10.0f, 9.0f, 8.0f, 7.0f, 6.0f, 5.0f, 4.0f, 3.0f, 2.0f, 1.0f
372 };
373
374 std::vector<float> outputData(12);
375
376 std::vector<float> expectedOutput
377 {
378 13.0f, 11.0f, 11.0f, 9.0f, 7.0f, 7.0f, 7.0f, 5.0f, 5.0f, 3.0f, 3.0f, -5.0f
379 };
380
381 InputTensors inputTensors
382 {
383 { 0, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData0.data()) },
384 { 1, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 1), inputData1.data()) },
385 { 2, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 2), inputData2.data()) }
386 };
387 OutputTensors outputTensors
388 {
389 { 0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data()) }
390 };
391
392 runtime->GetProfiler(netId)->EnableProfiling(true);
393
394 // Do the inference
395 runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
396
397 // Retrieve the Profiler.Print() output to get the workload execution
398 ProfilerManager& profilerManager = armnn::ProfilerManager::GetInstance();
399 std::stringstream ss;
400 profilerManager.GetProfiler()->Print(ss);;
401 std::string dump = ss.str();
402
403 // Contains ImportMemGeneric
404 std::size_t found = dump.find("ImportMemGeneric");
405 BOOST_TEST(found != std::string::npos);
406
407 // Contains SyncMemGeneric
408 found = dump.find("SyncMemGeneric");
409 BOOST_TEST(found != std::string::npos);
410
411 // Does not contain CopyMemGeneric
412 found = dump.find("CopyMemGeneric");
413 BOOST_TEST(found == std::string::npos);
414
415 // Use memory import between backends
416 BOOST_TEST((layer4->GetType() == LayerType::MemImport));
417
418 // Check output is as expected
419 BOOST_TEST(outputData == expectedOutput);
420}
421
422BOOST_AUTO_TEST_CASE(FallbackPaddingCopyFromCpuAcc)
423{
424 using namespace armnn;
425
426 // Create a mock backend object
427 MockImportBackendInitialiser initialiser; // Register the Mock Backend
428 auto backendObjPtr = CreateBackendObject(MockImportBackendId());
429 BOOST_TEST((backendObjPtr != nullptr));
430
431 BackendIdSet backendIds = BackendRegistryInstance().GetBackendIds();
432 if (backendIds.find("MockRef") == backendIds.end())
433 {
434 std::string message = "Cannot load MockRef";
435 BOOST_FAIL(message);
436 }
437
438 // Create runtime in which test will run and allow fallback to CpuRef.
439 IRuntime::CreationOptions options;
440 IRuntimePtr runtime(IRuntime::Create(options));
441
442 // Builds up the structure of the network.
443 INetworkPtr net(INetwork::Create());
444
445 Pooling2dDescriptor desc;
446
447 IConnectableLayer* input0 = net->AddInputLayer(0, "input0");
448 IConnectableLayer* input1 = net->AddInputLayer(1, "input1");
449 IConnectableLayer* pooling = net->AddPooling2dLayer(desc, "pooling");
450 IConnectableLayer* add = net->AddAdditionLayer("add");
451 IConnectableLayer* output = net->AddOutputLayer(0, "output");
452
453 input0->GetOutputSlot(0).Connect(pooling->GetInputSlot(0));
454 input1->GetOutputSlot(0).Connect(add->GetInputSlot(1));
455 pooling->GetOutputSlot(0).Connect(add->GetInputSlot(0));
456 add->GetOutputSlot(0).Connect(output->GetInputSlot(0));
457
458 TensorInfo inputInfo = TensorInfo({ 1, 2, 3, 2 }, DataType::Float32);
459 TensorInfo poolingInfo = TensorInfo({ 1, 2, 1, 1 }, DataType::Float32);
460
461 input0->GetOutputSlot(0).SetTensorInfo(inputInfo);
462 input1->GetOutputSlot(0).SetTensorInfo(poolingInfo);
463 pooling->GetOutputSlot(0).SetTensorInfo(poolingInfo);
464 add->GetOutputSlot(0).SetTensorInfo(poolingInfo);
465
466 // optimize the network
467 std::vector<BackendId> backends = { "MockRef", Compute::CpuAcc };
Narumol Prangnawarata2493a02020-08-19 14:39:07 +0100468 OptimizerOptions optOptions;
469 optOptions.m_ImportEnabled = true;
470 IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec(), optOptions);
Narumol Prangnawaratb8d771a2020-08-14 11:51:12 +0100471
472 OptimizedNetwork* optNetObjPtr = PolymorphicDowncast<OptimizedNetwork*>(optNet.get());
473 Graph& graph = optNetObjPtr->GetGraph();
474
475 armnn::Layer* const layer0 = GetFirstLayerWithName(graph, "input0");
476 armnn::Layer* const layer1 = GetFirstLayerWithName(graph, "input1");
477 armnn::Layer* const layer2 = GetFirstLayerWithName(graph, "pooling");
478 armnn::Layer* const layer3 = GetFirstLayerWithName(graph, "[ pooling (0) -> add (0) ]");
479 armnn::Layer* const layer4 = GetFirstLayerWithName(graph, "add");
480 armnn::Layer* const layer5 = GetFirstLayerWithName(graph, "output");
481
482 // Checks order is valid.
483 BOOST_TEST(CheckOrder(graph, layer0, layer1));
484 BOOST_TEST(CheckOrder(graph, layer1, layer2));
485 BOOST_TEST(CheckOrder(graph, layer2, layer3));
486 BOOST_TEST(CheckOrder(graph, layer3, layer4));
487 BOOST_TEST(CheckOrder(graph, layer4, layer5));
488
489 // Load it into the runtime. It should pass.
490 NetworkId netId;
491 std::string ignoredErrorMessage;
492 INetworkProperties networkProperties(true, true);
493
494 runtime->LoadNetwork(netId, std::move(optNet), ignoredErrorMessage, networkProperties);
495
496 // Creates structures for input & output
497 std::vector<float> inputData0
498 {
499 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, 10.0f, 11.0f, 12.0f
500 };
501 std::vector<float> inputData1
502 {
503 -1.0f, 3.0f
504 };
505
506 std::vector<float> outputData(2);
507
508 std::vector<float> expectedOutput
509 {
510 5.0f, 15.0f
511 };
512
513 InputTensors inputTensors
514 {
515 { 0, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData0.data()) },
516 { 1, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 1), inputData1.data()) }
517 };
518 OutputTensors outputTensors
519 {
520 { 0, armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data()) }
521 };
522
523 runtime->GetProfiler(netId)->EnableProfiling(true);
524
525 // Do the inference
526 runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
527
528 // Retrieve the Profiler.Print() output to get the workload execution
529 ProfilerManager& profilerManager = armnn::ProfilerManager::GetInstance();
530 std::stringstream ss;
531 profilerManager.GetProfiler()->Print(ss);;
532 std::string dump = ss.str();
533
534 // Contains CopyMemGeneric between the backends
535 std::size_t found = dump.find("CopyMemGeneric");
536 BOOST_TEST(found != std::string::npos);
537
538 // Contains SyncMemGeneric for the output
539 found = dump.find("SyncMemGeneric");
540 BOOST_TEST(found != std::string::npos);
541
542 // Does not contain ImportMemGeneric
543 found = dump.find("ImportMemGeneric");
544 BOOST_TEST(found == std::string::npos);
545
546 // Use memory import between backends
547 BOOST_TEST((layer3->GetType() == LayerType::MemCopy));
548
549 // Check output is as expected
550 BOOST_TEST(outputData == expectedOutput);
551}
552
Narumol Prangnawarata2493a02020-08-19 14:39:07 +0100553BOOST_AUTO_TEST_CASE(FallbackDisableImportFromCpuAcc)
554{
555 using namespace armnn;
556
557 // Create a mock backend object
558 MockImportBackendInitialiser initialiser; // Register the Mock Backend
559 auto backendObjPtr = CreateBackendObject(MockImportBackendId());
560 BOOST_TEST((backendObjPtr != nullptr));
561
562 BackendIdSet backendIds = BackendRegistryInstance().GetBackendIds();
563 if (backendIds.find("MockRef") == backendIds.end())
564 {
565 std::string message = "Cannot load MockRef";
566 BOOST_FAIL(message);
567 }
568
569 // Create runtime in which test will run and allow fallback to CpuRef.
570 IRuntime::CreationOptions options;
571 IRuntimePtr runtime(IRuntime::Create(options));
572
573 // Builds up the structure of the network.
574 INetworkPtr net(INetwork::Create());
575
576 IConnectableLayer* input0 = net->AddInputLayer(0, "input0");
577 IConnectableLayer* input1 = net->AddInputLayer(1, "input1");
578 IConnectableLayer* input2 = net->AddInputLayer(2, "input2");
579 IConnectableLayer* sub = net->AddSubtractionLayer("sub");
580 IConnectableLayer* add = net->AddAdditionLayer("add");
581 IConnectableLayer* output = net->AddOutputLayer(0, "output");
582
583 input0->GetOutputSlot(0).Connect(sub->GetInputSlot(0));
584 input1->GetOutputSlot(0).Connect(sub->GetInputSlot(1));
585 input2->GetOutputSlot(0).Connect(add->GetInputSlot(0));
586 sub->GetOutputSlot(0).Connect(add->GetInputSlot(1));
587 add->GetOutputSlot(0).Connect(output->GetInputSlot(0));
588
589 TensorInfo info = TensorInfo({ 1, 2, 3, 2 }, DataType::Float32);
590
591 input0->GetOutputSlot(0).SetTensorInfo(info);
592 input1->GetOutputSlot(0).SetTensorInfo(info);
593 input2->GetOutputSlot(0).SetTensorInfo(info);
594 sub->GetOutputSlot(0).SetTensorInfo(info);
595 add->GetOutputSlot(0).SetTensorInfo(info);
596
597 // optimize the network
598 std::vector<BackendId> backends = { "MockRef", Compute::CpuAcc };
599 IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec());
600
601 OptimizedNetwork* optNetObjPtr = PolymorphicDowncast<OptimizedNetwork*>(optNet.get());
602 Graph& graph = optNetObjPtr->GetGraph();
603
604 armnn::Layer* const layer0 = GetFirstLayerWithName(graph, "input0");
605 armnn::Layer* const layer1 = GetFirstLayerWithName(graph, "input1");
606 armnn::Layer* const layer2 = GetFirstLayerWithName(graph, "input2");
607 armnn::Layer* const layer3 = GetFirstLayerWithName(graph, "sub");
608 armnn::Layer* const layer4 = GetFirstLayerWithName(graph, "[ sub (0) -> add (1) ]");
609 armnn::Layer* const layer5 = GetFirstLayerWithName(graph, "add");
610 armnn::Layer* const layer6 = GetFirstLayerWithName(graph, "output");
611
612 // Checks order is valid.
613 BOOST_TEST(CheckOrder(graph, layer0, layer1));
614 BOOST_TEST(CheckOrder(graph, layer1, layer2));
615 BOOST_TEST(CheckOrder(graph, layer2, layer3));
616 BOOST_TEST(CheckOrder(graph, layer3, layer4));
617 BOOST_TEST(CheckOrder(graph, layer4, layer5));
618 BOOST_TEST(CheckOrder(graph, layer5, layer6));
619
620 // Load it into the runtime. It should pass.
621 NetworkId netId;
622 std::string ignoredErrorMessage;
623 INetworkProperties networkProperties(false, false);
624
625 runtime->LoadNetwork(netId, std::move(optNet), ignoredErrorMessage, networkProperties);
626
627 // Creates structures for input & output
628 std::vector<float> inputData0
629 {
630 1.0f, 1.0f, 2.0f, 2.0f, 2.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f, 0.0f
631 };
632 std::vector<float> inputData1
633 {
634 0.0f, 1.0f, 1.0f, 2.0f, 3.0f, 3.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f
635 };
636 std::vector<float> inputData2
637 {
638 12.0f, 11.0f, 10.0f, 9.0f, 8.0f, 7.0f, 6.0f, 5.0f, 4.0f, 3.0f, 2.0f, 1.0f
639 };
640
641 std::vector<float> outputData(12);
642
643 std::vector<float> expectedOutput
644 {
645 13.0f, 11.0f, 11.0f, 9.0f, 7.0f, 7.0f, 7.0f, 5.0f, 5.0f, 3.0f, 3.0f, -5.0f
646 };
647
648 InputTensors inputTensors
649 {
650 { 0, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData0.data()) },
651 { 1, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 1), inputData1.data()) },
652 { 2, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 2), inputData2.data()) }
653 };
654 OutputTensors outputTensors
655 {
656 { 0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data()) }
657 };
658
659 runtime->GetProfiler(netId)->EnableProfiling(true);
660
661 // Do the inference
662 runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
663
664 // Retrieve the Profiler.Print() output to get the workload execution
665 ProfilerManager& profilerManager = armnn::ProfilerManager::GetInstance();
666 std::stringstream ss;
667 profilerManager.GetProfiler()->Print(ss);;
668 std::string dump = ss.str();
669
670 // Contains CopyMemGeneric between the backends
671 std::size_t found = dump.find("CopyMemGeneric");
672 BOOST_TEST(found != std::string::npos);
673
674 // Does not contain ImportMemGeneric
675 found = dump.find("ImportMemGeneric");
676 BOOST_TEST(found == std::string::npos);
677
678 // Use memory import between backends
679 BOOST_TEST((layer4->GetType() == LayerType::MemCopy));
680
681 // Check output is as expected
682 BOOST_TEST(outputData == expectedOutput);
683}
684
Narumol Prangnawarat265e53e2020-10-30 16:06:55 +0000685#if defined(ARMCOMPUTECL_ENABLED)
686BOOST_AUTO_TEST_CASE(NeonImportEnabledFallbackToCl)
687{
688 using namespace armnn;
689
690 IRuntime::CreationOptions options;
691 IRuntimePtr runtime(IRuntime::Create(options));
692
693 // Builds up the structure of the network.
694 INetworkPtr net(INetwork::Create());
695
696 IConnectableLayer* input0 = net->AddInputLayer(0, "input0");
697 IConnectableLayer* input1 = net->AddInputLayer(1, "input1");
698 IConnectableLayer* input2 = net->AddInputLayer(2, "input2");
699 IConnectableLayer* add = net->AddAdditionLayer("add");
700 IConnectableLayer* sub = net->AddSubtractionLayer("sub");
701 IConnectableLayer* output = net->AddOutputLayer(0, "output");
702
703 input0->GetOutputSlot(0).Connect(add->GetInputSlot(0));
704 input1->GetOutputSlot(0).Connect(add->GetInputSlot(1));
705 input2->GetOutputSlot(0).Connect(sub->GetInputSlot(0));
706 add->GetOutputSlot(0).Connect(sub->GetInputSlot(1));
707 sub->GetOutputSlot(0).Connect(output->GetInputSlot(0));
708
709 TensorInfo info = TensorInfo({ 1, 2, 3, 2 }, DataType::Float32);
710
711 input0->GetOutputSlot(0).SetTensorInfo(info);
712 input1->GetOutputSlot(0).SetTensorInfo(info);
713 input2->GetOutputSlot(0).SetTensorInfo(info);
714 add->GetOutputSlot(0).SetTensorInfo(info);
715 sub->GetOutputSlot(0).SetTensorInfo(info);
716
717 std::vector<BackendId> backends = { Compute::CpuAcc, Compute::GpuAcc };
718 // Use BackendSelectionHint to specify GpuAcc for Subtraction layer
719 sub->BackendSelectionHint(backends[1]);
720
721 // optimize the network
722 OptimizerOptions optOptions;
723 optOptions.m_ImportEnabled = true;
724 IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec(), optOptions);
725
726 OptimizedNetwork* optNetObjPtr = PolymorphicDowncast<OptimizedNetwork*>(optNet.get());
727 Graph& graph = optNetObjPtr->GetGraph();
728
729 armnn::Layer* const layer0 = GetFirstLayerWithName(graph, "input0");
730 armnn::Layer* const layer1 = GetFirstLayerWithName(graph, "input1");
731 armnn::Layer* const layer2 = GetFirstLayerWithName(graph, "input2");
732 armnn::Layer* const layer3 = GetFirstLayerWithName(graph, "add");
733 armnn::Layer* const layer4 = GetFirstLayerWithName(graph, "[ add (0) -> sub (1) ]");
734 armnn::Layer* const layer5 = GetFirstLayerWithName(graph, "sub");
735 armnn::Layer* const layer6 = GetFirstLayerWithName(graph, "output");
736
737 // Checks order is valid.
738 BOOST_TEST(CheckOrder(graph, layer0, layer1));
739 BOOST_TEST(CheckOrder(graph, layer1, layer2));
740 BOOST_TEST(CheckOrder(graph, layer2, layer3));
741 BOOST_TEST(CheckOrder(graph, layer3, layer4));
742 BOOST_TEST(CheckOrder(graph, layer4, layer5));
743 BOOST_TEST(CheckOrder(graph, layer5, layer6));
744
745 // Use memory import between backends
746 BOOST_TEST((layer4->GetType() == LayerType::MemCopy));
747
748 // Correctly use backend hint
749 BOOST_TEST((layer5->GetBackendId() == Compute::GpuAcc ));
750
751 // Load it into the runtime. It should pass.
752 NetworkId netId;
753 std::string ignoredErrorMessage;
754 INetworkProperties networkProperties(true, true);
755
756 runtime->LoadNetwork(netId, std::move(optNet), ignoredErrorMessage, networkProperties);
757
758 // Creates structures for input & output
759 std::vector<float> inputData0
760 {
761 1.0f, 1.0f, 2.0f, 2.0f, 2.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f, 6.0f
762 };
763 std::vector<float> inputData1
764 {
765 0.0f, 1.0f, 1.0f, 2.0f, 3.0f, 3.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f
766 };
767 std::vector<float> inputData2
768 {
769 12.0f, 11.0f, 10.0f, 9.0f, 8.0f, 7.0f, 6.0f, 5.0f, 4.0f, 3.0f, 2.0f, 1.0f
770 };
771
772 std::vector<float> outputData(12);
773
774 std::vector<float> expectedOutput
775 {
776 11.0f, 9.0f, 7.0f, 5.0f, 3.0f, 1.0f, -1.0f, -3.0f, -5.0f, -7.0f, -9.0f, -11.0f
777 };
778
779 InputTensors inputTensors
780 {
781 { 0, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData0.data()) },
782 { 1, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 1), inputData1.data()) },
783 { 2, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 2), inputData2.data()) }
784 };
785 OutputTensors outputTensors
786 {
787 { 0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data()) }
788 };
789
790 runtime->GetProfiler(netId)->EnableProfiling(true);
791
792 // Do the inference
793 runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
794
795 // Retrieve the Profiler.Print() output to get the workload execution
796 ProfilerManager& profilerManager = armnn::ProfilerManager::GetInstance();
797 std::stringstream ss;
798 profilerManager.GetProfiler()->Print(ss);;
799 std::string dump = ss.str();
800
801 // Executed Subtraction using GpuAcc
802 std::size_t found = dump.find("ClSubtractionWorkload_Execute");
803 BOOST_TEST(found != std::string::npos);
804
805 // Contain CopyMemGeneric
806 found = dump.find("CopyMemGeneric");
807 BOOST_TEST(found != std::string::npos);
808
809 // Check output is as expected
810 BOOST_TEST(outputData == expectedOutput);
811}
812
813BOOST_AUTO_TEST_CASE(NeonImportDisabledFallbackToCl)
814{
815 using namespace armnn;
816
817 IRuntime::CreationOptions options;
818 IRuntimePtr runtime(IRuntime::Create(options));
819
820 // Builds up the structure of the network.
821 INetworkPtr net(INetwork::Create());
822
823 IConnectableLayer* input0 = net->AddInputLayer(0, "input0");
824 IConnectableLayer* input1 = net->AddInputLayer(1, "input1");
825 IConnectableLayer* input2 = net->AddInputLayer(2, "input2");
826 IConnectableLayer* add = net->AddAdditionLayer("add");
827 IConnectableLayer* sub = net->AddSubtractionLayer("sub");
828 IConnectableLayer* output = net->AddOutputLayer(0, "output");
829
830 input0->GetOutputSlot(0).Connect(add->GetInputSlot(0));
831 input1->GetOutputSlot(0).Connect(add->GetInputSlot(1));
832 input2->GetOutputSlot(0).Connect(sub->GetInputSlot(0));
833 add->GetOutputSlot(0).Connect(sub->GetInputSlot(1));
834 sub->GetOutputSlot(0).Connect(output->GetInputSlot(0));
835
836 TensorInfo info = TensorInfo({ 1, 2, 3, 2 }, DataType::Float32);
837
838 input0->GetOutputSlot(0).SetTensorInfo(info);
839 input1->GetOutputSlot(0).SetTensorInfo(info);
840 input2->GetOutputSlot(0).SetTensorInfo(info);
841 add->GetOutputSlot(0).SetTensorInfo(info);
842 sub->GetOutputSlot(0).SetTensorInfo(info);
843
844 std::vector<BackendId> backends = { Compute::CpuAcc, Compute::GpuAcc };
845 // Use BackendSelectionHint to specify GpuAcc for Subtraction layer
846 sub->BackendSelectionHint(backends[1]);
847
848 // optimize the network
849 OptimizerOptions optOptions;
850 IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec(), optOptions);
851
852 OptimizedNetwork* optNetObjPtr = PolymorphicDowncast<OptimizedNetwork*>(optNet.get());
853 Graph& graph = optNetObjPtr->GetGraph();
854
855 armnn::Layer* const layer0 = GetFirstLayerWithName(graph, "input0");
856 armnn::Layer* const layer1 = GetFirstLayerWithName(graph, "input1");
857 armnn::Layer* const layer2 = GetFirstLayerWithName(graph, "input2");
858 armnn::Layer* const layer3 = GetFirstLayerWithName(graph, "add");
859 armnn::Layer* const layer4 = GetFirstLayerWithName(graph, "[ add (0) -> sub (1) ]");
860 armnn::Layer* const layer5 = GetFirstLayerWithName(graph, "sub");
861 armnn::Layer* const layer6 = GetFirstLayerWithName(graph, "output");
862
863 // Checks order is valid.
864 BOOST_TEST(CheckOrder(graph, layer0, layer1));
865 BOOST_TEST(CheckOrder(graph, layer1, layer2));
866 BOOST_TEST(CheckOrder(graph, layer2, layer3));
867 BOOST_TEST(CheckOrder(graph, layer3, layer4));
868 BOOST_TEST(CheckOrder(graph, layer4, layer5));
869 BOOST_TEST(CheckOrder(graph, layer5, layer6));
870
871 // Use memory import between backends
872 BOOST_TEST((layer4->GetType() == LayerType::MemCopy));
873
874 // Correctly use backend hint
875 BOOST_TEST((layer5->GetBackendId() == Compute::GpuAcc ));
876
877 // Load it into the runtime. It should pass.
878 NetworkId netId;
879 runtime->LoadNetwork(netId, std::move(optNet));
880
881 // Creates structures for input & output
882 std::vector<float> inputData0
883 {
884 1.0f, 1.0f, 2.0f, 2.0f, 2.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f, 6.0f
885 };
886 std::vector<float> inputData1
887 {
888 0.0f, 1.0f, 1.0f, 2.0f, 3.0f, 3.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f
889 };
890 std::vector<float> inputData2
891 {
892 12.0f, 11.0f, 10.0f, 9.0f, 8.0f, 7.0f, 6.0f, 5.0f, 4.0f, 3.0f, 2.0f, 1.0f
893 };
894
895 std::vector<float> outputData(12);
896
897 std::vector<float> expectedOutput
898 {
899 11.0f, 9.0f, 7.0f, 5.0f, 3.0f, 1.0f, -1.0f, -3.0f, -5.0f, -7.0f, -9.0f, -11.0f
900 };
901
902 InputTensors inputTensors
903 {
904 { 0, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData0.data()) },
905 { 1, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 1), inputData1.data()) },
906 { 2, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 2), inputData2.data()) }
907 };
908 OutputTensors outputTensors
909 {
910 { 0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data()) }
911 };
912
913 runtime->GetProfiler(netId)->EnableProfiling(true);
914
915 // Do the inference
916 runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
917
918 // Retrieve the Profiler.Print() output to get the workload execution
919 ProfilerManager& profilerManager = armnn::ProfilerManager::GetInstance();
920 std::stringstream ss;
921 profilerManager.GetProfiler()->Print(ss);;
922 std::string dump = ss.str();
923
924 // Executed Subtraction using GpuAcc
925 std::size_t found = dump.find("ClSubtractionWorkload_Execute");
926 BOOST_TEST(found != std::string::npos);
927
928 // Contain CopyMemGeneric
929 found = dump.find("CopyMemGeneric");
930 BOOST_TEST(found != std::string::npos);
931
932 // Check output is as expected
933 BOOST_TEST(outputData == expectedOutput);
934}
935
936BOOST_AUTO_TEST_CASE(NeonImportEnabledFallbackSubgraphToCl)
937{
938 using namespace armnn;
939
940 IRuntime::CreationOptions options;
941 IRuntimePtr runtime(IRuntime::Create(options));
942
943 // Builds up the structure of the network.
944 INetworkPtr net(INetwork::Create());
945
946 Pooling2dDescriptor desc;
947
948 IConnectableLayer* input0 = net->AddInputLayer(0, "input0");
949 IConnectableLayer* input1 = net->AddInputLayer(1, "input1");
950 IConnectableLayer* input2 = net->AddInputLayer(2, "input2");
951 IConnectableLayer* add = net->AddAdditionLayer("add");
952 IConnectableLayer* sub = net->AddSubtractionLayer("sub");
953 IConnectableLayer* pooling = net->AddPooling2dLayer(desc, "pooling");
954 IConnectableLayer* output = net->AddOutputLayer(0, "output");
955
956 input0->GetOutputSlot(0).Connect(add->GetInputSlot(0));
957 input1->GetOutputSlot(0).Connect(add->GetInputSlot(1));
958 input2->GetOutputSlot(0).Connect(sub->GetInputSlot(0));
959 add->GetOutputSlot(0).Connect(sub->GetInputSlot(1));
960 sub->GetOutputSlot(0).Connect(pooling->GetInputSlot(0));
961 pooling->GetOutputSlot(0).Connect(output->GetInputSlot(0));
962
963 TensorInfo info = TensorInfo({ 1, 2, 3, 2 }, DataType::Float32);
964 TensorInfo poolingInfo = TensorInfo({ 1, 2, 1, 1 }, DataType::Float32);
965
966 input0->GetOutputSlot(0).SetTensorInfo(info);
967 input1->GetOutputSlot(0).SetTensorInfo(info);
968 input2->GetOutputSlot(0).SetTensorInfo(info);
969 add->GetOutputSlot(0).SetTensorInfo(info);
970 sub->GetOutputSlot(0).SetTensorInfo(info);
971 pooling->GetOutputSlot(0).SetTensorInfo(poolingInfo);
972
973 std::vector<BackendId> backends = { Compute::CpuAcc, Compute::GpuAcc };
974 // Use BackendSelectionHint to specify GpuAcc for Subtraction layer
975 sub->BackendSelectionHint(backends[1]);
976
977 // optimize the network
978 OptimizerOptions optOptions;
979 optOptions.m_ImportEnabled = true;
980 IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec(), optOptions);
981
982 OptimizedNetwork* optNetObjPtr = PolymorphicDowncast<OptimizedNetwork*>(optNet.get());
983 Graph& graph = optNetObjPtr->GetGraph();
984
985 armnn::Layer* const layer0 = GetFirstLayerWithName(graph, "input0");
986 armnn::Layer* const layer1 = GetFirstLayerWithName(graph, "input1");
987 armnn::Layer* const layer2 = GetFirstLayerWithName(graph, "input2");
988 armnn::Layer* const layer3 = GetFirstLayerWithName(graph, "add");
989 armnn::Layer* const layer4 = GetFirstLayerWithName(graph, "[ add (0) -> sub (1) ]");
990 armnn::Layer* const layer5 = GetFirstLayerWithName(graph, "sub");
991 armnn::Layer* const layer6 = GetFirstLayerWithName(graph, "[ sub (0) -> pooling (0) ]");
992 armnn::Layer* const layer7 = GetFirstLayerWithName(graph, "pooling");
993 armnn::Layer* const layer8 = GetFirstLayerWithName(graph, "output");
994
995 // Checks order is valid.
996 BOOST_TEST(CheckOrder(graph, layer0, layer1));
997 BOOST_TEST(CheckOrder(graph, layer1, layer2));
998 BOOST_TEST(CheckOrder(graph, layer2, layer3));
999 BOOST_TEST(CheckOrder(graph, layer3, layer4));
1000 BOOST_TEST(CheckOrder(graph, layer4, layer5));
1001 BOOST_TEST(CheckOrder(graph, layer5, layer6));
1002 BOOST_TEST(CheckOrder(graph, layer6, layer7));
1003 BOOST_TEST(CheckOrder(graph, layer7, layer8));
1004
1005 // Use memory import between backends
1006 BOOST_TEST((layer4->GetType() == LayerType::MemCopy));
1007 BOOST_TEST((layer6->GetType() == LayerType::MemCopy));
1008
1009 // Correctly use backend hint
1010 BOOST_TEST((layer5->GetBackendId() == Compute::GpuAcc ));
1011
1012 // Load it into the runtime. It should pass.
1013 NetworkId netId;
1014 std::string ignoredErrorMessage;
1015 INetworkProperties networkProperties(true, true);
1016
1017 runtime->LoadNetwork(netId, std::move(optNet), ignoredErrorMessage, networkProperties);
1018
1019 // Creates structures for input & output
1020 std::vector<float> inputData0
1021 {
1022 1.0f, 1.0f, 2.0f, 2.0f, 2.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f, 6.0f
1023 };
1024 std::vector<float> inputData1
1025 {
1026 0.0f, 1.0f, 1.0f, 2.0f, 3.0f, 3.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f
1027 };
1028 std::vector<float> inputData2
1029 {
1030 12.0f, 11.0f, 10.0f, 9.0f, 8.0f, 7.0f, 6.0f, 5.0f, 4.0f, 3.0f, 2.0f, 1.0f
1031 };
1032
1033 std::vector<float> outputData(2);
1034
1035 std::vector<float> expectedOutput{ 11.0f, -1.0f };
1036
1037 InputTensors inputTensors
1038 {
1039 { 0, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData0.data()) },
1040 { 1, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 1), inputData1.data()) },
1041 { 2, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 2), inputData2.data()) }
1042 };
1043 OutputTensors outputTensors
1044 {
1045 { 0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data()) }
1046 };
1047
1048 runtime->GetProfiler(netId)->EnableProfiling(true);
1049
1050 // Do the inference
1051 runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
1052
1053 // Retrieve the Profiler.Print() output to get the workload execution
1054 ProfilerManager& profilerManager = armnn::ProfilerManager::GetInstance();
1055 std::stringstream ss;
1056 profilerManager.GetProfiler()->Print(ss);;
1057 std::string dump = ss.str();
1058
1059 // Executed Subtraction using GpuAcc
1060 std::size_t found = dump.find("ClSubtractionWorkload_Execute");
1061 BOOST_TEST(found != std::string::npos);
1062
1063 // Correctly switch back to CpuAcc
1064 found = dump.find("NeonPooling2dWorkload_Execute");
1065 BOOST_TEST(found != std::string::npos);
1066
1067 // Contain CopyMemGeneric
1068 found = dump.find("CopyMemGeneric");
1069 BOOST_TEST(found != std::string::npos);
1070
1071 // Contains SyncMemGeneric for output
1072 found = dump.find("SyncMemGeneric");
1073 BOOST_TEST(found != std::string::npos);
1074
1075 // Check output is as expected
1076 BOOST_TEST(outputData == expectedOutput);
1077}
1078
1079BOOST_AUTO_TEST_CASE(NeonImportDisableFallbackSubgraphToCl)
1080{
1081 using namespace armnn;
1082
1083 IRuntime::CreationOptions options;
1084 IRuntimePtr runtime(IRuntime::Create(options));
1085
1086 // Builds up the structure of the network.
1087 INetworkPtr net(INetwork::Create());
1088
1089 Pooling2dDescriptor desc;
1090
1091 IConnectableLayer* input0 = net->AddInputLayer(0, "input0");
1092 IConnectableLayer* input1 = net->AddInputLayer(1, "input1");
1093 IConnectableLayer* input2 = net->AddInputLayer(2, "input2");
1094 IConnectableLayer* add = net->AddAdditionLayer("add");
1095 IConnectableLayer* sub = net->AddSubtractionLayer("sub");
1096 IConnectableLayer* pooling = net->AddPooling2dLayer(desc, "pooling");
1097 IConnectableLayer* output = net->AddOutputLayer(0, "output");
1098
1099 input0->GetOutputSlot(0).Connect(add->GetInputSlot(0));
1100 input1->GetOutputSlot(0).Connect(add->GetInputSlot(1));
1101 input2->GetOutputSlot(0).Connect(sub->GetInputSlot(0));
1102 add->GetOutputSlot(0).Connect(sub->GetInputSlot(1));
1103 sub->GetOutputSlot(0).Connect(pooling->GetInputSlot(0));
1104 pooling->GetOutputSlot(0).Connect(output->GetInputSlot(0));
1105
1106 TensorInfo info = TensorInfo({ 1, 2, 3, 2 }, DataType::Float32);
1107 TensorInfo poolingInfo = TensorInfo({ 1, 2, 1, 1 }, DataType::Float32);
1108
1109 input0->GetOutputSlot(0).SetTensorInfo(info);
1110 input1->GetOutputSlot(0).SetTensorInfo(info);
1111 input2->GetOutputSlot(0).SetTensorInfo(info);
1112 add->GetOutputSlot(0).SetTensorInfo(info);
1113 sub->GetOutputSlot(0).SetTensorInfo(info);
1114 pooling->GetOutputSlot(0).SetTensorInfo(poolingInfo);
1115
1116 std::vector<BackendId> backends = { Compute::CpuAcc, Compute::GpuAcc };
1117 // Use BackendSelectionHint to specify GpuAcc for Subtraction layer
1118 sub->BackendSelectionHint(backends[1]);
1119
1120 // optimize the network
1121 OptimizerOptions optOptions;
1122 IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec(), optOptions);
1123
1124 OptimizedNetwork* optNetObjPtr = PolymorphicDowncast<OptimizedNetwork*>(optNet.get());
1125 Graph& graph = optNetObjPtr->GetGraph();
1126
1127 armnn::Layer* const layer0 = GetFirstLayerWithName(graph, "input0");
1128 armnn::Layer* const layer1 = GetFirstLayerWithName(graph, "input1");
1129 armnn::Layer* const layer2 = GetFirstLayerWithName(graph, "input2");
1130 armnn::Layer* const layer3 = GetFirstLayerWithName(graph, "add");
1131 armnn::Layer* const layer4 = GetFirstLayerWithName(graph, "[ add (0) -> sub (1) ]");
1132 armnn::Layer* const layer5 = GetFirstLayerWithName(graph, "sub");
1133 armnn::Layer* const layer6 = GetFirstLayerWithName(graph, "[ sub (0) -> pooling (0) ]");
1134 armnn::Layer* const layer7 = GetFirstLayerWithName(graph, "pooling");
1135 armnn::Layer* const layer8 = GetFirstLayerWithName(graph, "output");
1136
1137 // Checks order is valid.
1138 BOOST_TEST(CheckOrder(graph, layer0, layer1));
1139 BOOST_TEST(CheckOrder(graph, layer1, layer2));
1140 BOOST_TEST(CheckOrder(graph, layer2, layer3));
1141 BOOST_TEST(CheckOrder(graph, layer3, layer4));
1142 BOOST_TEST(CheckOrder(graph, layer4, layer5));
1143 BOOST_TEST(CheckOrder(graph, layer5, layer6));
1144 BOOST_TEST(CheckOrder(graph, layer6, layer7));
1145 BOOST_TEST(CheckOrder(graph, layer7, layer8));
1146
1147 // Use memory import between backends
1148 BOOST_TEST((layer4->GetType() == LayerType::MemCopy));
1149 BOOST_TEST((layer6->GetType() == LayerType::MemCopy));
1150
1151 // Correctly use backend hint
1152 BOOST_TEST((layer5->GetBackendId() == Compute::GpuAcc ));
1153
1154 // Load it into the runtime. It should pass.
1155 NetworkId netId;
1156 runtime->LoadNetwork(netId, std::move(optNet));
1157
1158 // Creates structures for input & output
1159 std::vector<float> inputData0
1160 {
1161 1.0f, 1.0f, 2.0f, 2.0f, 2.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f, 6.0f
1162 };
1163 std::vector<float> inputData1
1164 {
1165 0.0f, 1.0f, 1.0f, 2.0f, 3.0f, 3.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f
1166 };
1167 std::vector<float> inputData2
1168 {
1169 12.0f, 11.0f, 10.0f, 9.0f, 8.0f, 7.0f, 6.0f, 5.0f, 4.0f, 3.0f, 2.0f, 1.0f
1170 };
1171
1172 std::vector<float> outputData(2);
1173
1174 std::vector<float> expectedOutput{ 11.0f, -1.0f };
1175
1176 InputTensors inputTensors
1177 {
1178 { 0, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData0.data()) },
1179 { 1, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 1), inputData1.data()) },
1180 { 2, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 2), inputData2.data()) }
1181 };
1182 OutputTensors outputTensors
1183 {
1184 { 0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data()) }
1185 };
1186
1187 runtime->GetProfiler(netId)->EnableProfiling(true);
1188
1189 // Do the inference
1190 runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
1191
1192 // Retrieve the Profiler.Print() output to get the workload execution
1193 ProfilerManager& profilerManager = armnn::ProfilerManager::GetInstance();
1194 std::stringstream ss;
1195 profilerManager.GetProfiler()->Print(ss);;
1196 std::string dump = ss.str();
1197
1198 // Executed Subtraction using GpuAcc
1199 std::size_t found = dump.find("ClSubtractionWorkload_Execute");
1200 BOOST_TEST(found != std::string::npos);
1201
1202 // Correctly switch back to CpuAcc
1203 found = dump.find("NeonPooling2dWorkload_Execute");
1204 BOOST_TEST(found != std::string::npos);
1205
1206 // Contain CopyMemGeneric
1207 found = dump.find("CopyMemGeneric");
1208 BOOST_TEST(found != std::string::npos);
1209
1210 // Check output is as expected
1211 BOOST_TEST(outputData == expectedOutput);
1212}
1213#endif
1214
Narumol Prangnawaratb8d771a2020-08-14 11:51:12 +01001215BOOST_AUTO_TEST_SUITE_END()