blob: 5885cbe8ef8b536edfcd17823eccfb3d8cc050cf [file] [log] [blame]
Narumol Prangnawarat265e53e2020-10-30 16:06:55 +00001//
2// Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
3// SPDX-License-Identifier: MIT
4//
5
6#include <backendsCommon/test/CommonTestUtils.hpp>
7
8#include <test/GraphUtils.hpp>
9
10#include <boost/test/unit_test.hpp>
11
12BOOST_AUTO_TEST_SUITE(ClFallback)
13
14BOOST_AUTO_TEST_CASE(ClImportEnabledFallbackToNeon)
15{
16 using namespace armnn;
17
18 IRuntime::CreationOptions options;
19 IRuntimePtr runtime(IRuntime::Create(options));
20
21 // Builds up the structure of the network.
22 INetworkPtr net(INetwork::Create());
23
24 IConnectableLayer* input0 = net->AddInputLayer(0, "input0");
25 IConnectableLayer* input1 = net->AddInputLayer(1, "input1");
26 IConnectableLayer* input2 = net->AddInputLayer(2, "input2");
27 IConnectableLayer* add = net->AddAdditionLayer("add");
28 IConnectableLayer* sub = net->AddSubtractionLayer("sub");
29 IConnectableLayer* output = net->AddOutputLayer(0, "output");
30
31 input0->GetOutputSlot(0).Connect(add->GetInputSlot(0));
32 input1->GetOutputSlot(0).Connect(add->GetInputSlot(1));
33 input2->GetOutputSlot(0).Connect(sub->GetInputSlot(0));
34 add->GetOutputSlot(0).Connect(sub->GetInputSlot(1));
35 sub->GetOutputSlot(0).Connect(output->GetInputSlot(0));
36
37 TensorInfo info = TensorInfo({ 1, 2, 3, 2 }, DataType::Float32);
38
39 input0->GetOutputSlot(0).SetTensorInfo(info);
40 input1->GetOutputSlot(0).SetTensorInfo(info);
41 input2->GetOutputSlot(0).SetTensorInfo(info);
42 add->GetOutputSlot(0).SetTensorInfo(info);
43 sub->GetOutputSlot(0).SetTensorInfo(info);
44
45 std::vector<BackendId> backends = { Compute::GpuAcc, Compute::CpuAcc };
46 // Use BackendSelectionHint to specify CpuAcc for Subtraction layer
47 sub->BackendSelectionHint(backends[1]);
48
49 // optimize the network
50 OptimizerOptions optOptions;
51 optOptions.m_ImportEnabled = true;
52 IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec(), optOptions);
53
54 OptimizedNetwork* optNetObjPtr = PolymorphicDowncast<OptimizedNetwork*>(optNet.get());
55 Graph& graph = optNetObjPtr->GetGraph();
56
57 armnn::Layer* const layer0 = GetFirstLayerWithName(graph, "input0");
58 armnn::Layer* const layer1 = GetFirstLayerWithName(graph, "input1");
59 armnn::Layer* const layer2 = GetFirstLayerWithName(graph, "input2");
60 armnn::Layer* const layer3 = GetFirstLayerWithName(graph, "add");
61 armnn::Layer* const layer4 = GetFirstLayerWithName(graph, "[ add (0) -> sub (1) ]");
62 armnn::Layer* const layer5 = GetFirstLayerWithName(graph, "sub");
63 armnn::Layer* const layer6 = GetFirstLayerWithName(graph, "output");
64
65 // Checks order is valid.
66 BOOST_TEST(CheckOrder(graph, layer0, layer1));
67 BOOST_TEST(CheckOrder(graph, layer1, layer2));
68 BOOST_TEST(CheckOrder(graph, layer2, layer3));
69 BOOST_TEST(CheckOrder(graph, layer3, layer4));
70 BOOST_TEST(CheckOrder(graph, layer4, layer5));
71 BOOST_TEST(CheckOrder(graph, layer5, layer6));
72
73 // Use memory import between backends
74 BOOST_TEST((layer4->GetType() == LayerType::MemCopy));
75
76 // Correctly use backend hint
77 BOOST_TEST((layer5->GetBackendId() == Compute::CpuAcc ));
78
79 // Load it into the runtime. It should pass.
80 NetworkId netId;
81 std::string ignoredErrorMessage;
82 INetworkProperties networkProperties(true, true);
83
84 runtime->LoadNetwork(netId, std::move(optNet), ignoredErrorMessage, networkProperties);
85
86 // Creates structures for input & output
87 std::vector<float> inputData0
88 {
89 1.0f, 1.0f, 2.0f, 2.0f, 2.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f, 6.0f
90 };
91 std::vector<float> inputData1
92 {
93 0.0f, 1.0f, 1.0f, 2.0f, 3.0f, 3.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f
94 };
95 std::vector<float> inputData2
96 {
97 12.0f, 11.0f, 10.0f, 9.0f, 8.0f, 7.0f, 6.0f, 5.0f, 4.0f, 3.0f, 2.0f, 1.0f
98 };
99
100 std::vector<float> outputData(12);
101
102 std::vector<float> expectedOutput
103 {
104 11.0f, 9.0f, 7.0f, 5.0f, 3.0f, 1.0f, -1.0f, -3.0f, -5.0f, -7.0f, -9.0f, -11.0f
105 };
106
107 InputTensors inputTensors
108 {
109 { 0, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData0.data()) },
110 { 1, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 1), inputData1.data()) },
111 { 2, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 2), inputData2.data()) }
112 };
113 OutputTensors outputTensors
114 {
115 { 0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data()) }
116 };
117
118 runtime->GetProfiler(netId)->EnableProfiling(true);
119
120 // Do the inference
121 runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
122
123 // Retrieve the Profiler.Print() output to get the workload execution
124 ProfilerManager& profilerManager = armnn::ProfilerManager::GetInstance();
125 std::stringstream ss;
126 profilerManager.GetProfiler()->Print(ss);;
127 std::string dump = ss.str();
128
129 // Executed Subtraction using CpuAcc
130 std::size_t found = dump.find("NeonSubtractionWorkload_Execute");
131 BOOST_TEST(found != std::string::npos);
132
133 // Contain CopyMemGeneric
134 found = dump.find("CopyMemGeneric");
135 BOOST_TEST(found != std::string::npos);
136
137 // Check output is as expected
138 BOOST_TEST(outputData == expectedOutput);
139}
140
141BOOST_AUTO_TEST_CASE(ClImportDisabledFallbackToNeon)
142{
143 using namespace armnn;
144
145 IRuntime::CreationOptions options;
146 IRuntimePtr runtime(IRuntime::Create(options));
147
148 // Builds up the structure of the network.
149 INetworkPtr net(INetwork::Create());
150
151 IConnectableLayer* input0 = net->AddInputLayer(0, "input0");
152 IConnectableLayer* input1 = net->AddInputLayer(1, "input1");
153 IConnectableLayer* input2 = net->AddInputLayer(2, "input2");
154 IConnectableLayer* add = net->AddAdditionLayer("add");
155 IConnectableLayer* sub = net->AddSubtractionLayer("sub");
156 IConnectableLayer* output = net->AddOutputLayer(0, "output");
157
158 input0->GetOutputSlot(0).Connect(add->GetInputSlot(0));
159 input1->GetOutputSlot(0).Connect(add->GetInputSlot(1));
160 input2->GetOutputSlot(0).Connect(sub->GetInputSlot(0));
161 add->GetOutputSlot(0).Connect(sub->GetInputSlot(1));
162 sub->GetOutputSlot(0).Connect(output->GetInputSlot(0));
163
164 TensorInfo info = TensorInfo({ 1, 2, 3, 2 }, DataType::Float32);
165
166 input0->GetOutputSlot(0).SetTensorInfo(info);
167 input1->GetOutputSlot(0).SetTensorInfo(info);
168 input2->GetOutputSlot(0).SetTensorInfo(info);
169 add->GetOutputSlot(0).SetTensorInfo(info);
170 sub->GetOutputSlot(0).SetTensorInfo(info);
171
172 std::vector<BackendId> backends = { Compute::GpuAcc, Compute::CpuAcc };
173 // Use BackendSelectionHint to specify CpuAcc for Subtraction layer
174 sub->BackendSelectionHint(backends[1]);
175
176 // optimize the network
177 OptimizerOptions optOptions;
178 IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec(), optOptions);
179
180 OptimizedNetwork* optNetObjPtr = PolymorphicDowncast<OptimizedNetwork*>(optNet.get());
181 Graph& graph = optNetObjPtr->GetGraph();
182
183 armnn::Layer* const layer0 = GetFirstLayerWithName(graph, "input0");
184 armnn::Layer* const layer1 = GetFirstLayerWithName(graph, "input1");
185 armnn::Layer* const layer2 = GetFirstLayerWithName(graph, "input2");
186 armnn::Layer* const layer3 = GetFirstLayerWithName(graph, "add");
187 armnn::Layer* const layer4 = GetFirstLayerWithName(graph, "[ add (0) -> sub (1) ]");
188 armnn::Layer* const layer5 = GetFirstLayerWithName(graph, "sub");
189 armnn::Layer* const layer6 = GetFirstLayerWithName(graph, "output");
190
191 // Checks order is valid.
192 BOOST_TEST(CheckOrder(graph, layer0, layer1));
193 BOOST_TEST(CheckOrder(graph, layer1, layer2));
194 BOOST_TEST(CheckOrder(graph, layer2, layer3));
195 BOOST_TEST(CheckOrder(graph, layer3, layer4));
196 BOOST_TEST(CheckOrder(graph, layer4, layer5));
197 BOOST_TEST(CheckOrder(graph, layer5, layer6));
198
199 // Use memory import between backends
200 BOOST_TEST((layer4->GetType() == LayerType::MemCopy));
201
202 // Correctly use backend hint
203 BOOST_TEST((layer5->GetBackendId() == Compute::CpuAcc ));
204
205 // Load it into the runtime. It should pass.
206 NetworkId netId;
207 runtime->LoadNetwork(netId, std::move(optNet));
208
209 // Creates structures for input & output
210 std::vector<float> inputData0
211 {
212 1.0f, 1.0f, 2.0f, 2.0f, 2.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f, 6.0f
213 };
214 std::vector<float> inputData1
215 {
216 0.0f, 1.0f, 1.0f, 2.0f, 3.0f, 3.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f
217 };
218 std::vector<float> inputData2
219 {
220 12.0f, 11.0f, 10.0f, 9.0f, 8.0f, 7.0f, 6.0f, 5.0f, 4.0f, 3.0f, 2.0f, 1.0f
221 };
222
223 std::vector<float> outputData(12);
224
225 std::vector<float> expectedOutput
226 {
227 11.0f, 9.0f, 7.0f, 5.0f, 3.0f, 1.0f, -1.0f, -3.0f, -5.0f, -7.0f, -9.0f, -11.0f
228 };
229
230 InputTensors inputTensors
231 {
232 { 0, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData0.data()) },
233 { 1, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 1), inputData1.data()) },
234 { 2, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 2), inputData2.data()) }
235 };
236 OutputTensors outputTensors
237 {
238 { 0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data()) }
239 };
240
241 runtime->GetProfiler(netId)->EnableProfiling(true);
242
243 // Do the inference
244 runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
245
246 // Retrieve the Profiler.Print() output to get the workload execution
247 ProfilerManager& profilerManager = armnn::ProfilerManager::GetInstance();
248 std::stringstream ss;
249 profilerManager.GetProfiler()->Print(ss);;
250 std::string dump = ss.str();
251
252 // Executed Subtraction using CpuAcc
253 std::size_t found = dump.find("NeonSubtractionWorkload_Execute");
254 BOOST_TEST(found != std::string::npos);
255
256 // Contain CopyMemGeneric
257 found = dump.find("CopyMemGeneric");
258 BOOST_TEST(found != std::string::npos);
259
260 // Check output is as expected
261 BOOST_TEST(outputData == expectedOutput);
262}
263
264BOOST_AUTO_TEST_CASE(ClImportEnabledFallbackSubgraphToNeon)
265{
266 using namespace armnn;
267
268 IRuntime::CreationOptions options;
269 IRuntimePtr runtime(IRuntime::Create(options));
270
271 // Builds up the structure of the network.
272 INetworkPtr net(INetwork::Create());
273
274 Pooling2dDescriptor desc;
275
276 IConnectableLayer* input0 = net->AddInputLayer(0, "input0");
277 IConnectableLayer* input1 = net->AddInputLayer(1, "input1");
278 IConnectableLayer* input2 = net->AddInputLayer(2, "input2");
279 IConnectableLayer* add = net->AddAdditionLayer("add");
280 IConnectableLayer* sub = net->AddSubtractionLayer("sub");
281 IConnectableLayer* pooling = net->AddPooling2dLayer(desc, "pooling");
282 IConnectableLayer* output = net->AddOutputLayer(0, "output");
283
284 input0->GetOutputSlot(0).Connect(add->GetInputSlot(0));
285 input1->GetOutputSlot(0).Connect(add->GetInputSlot(1));
286 input2->GetOutputSlot(0).Connect(sub->GetInputSlot(0));
287 add->GetOutputSlot(0).Connect(sub->GetInputSlot(1));
288 sub->GetOutputSlot(0).Connect(pooling->GetInputSlot(0));
289 pooling->GetOutputSlot(0).Connect(output->GetInputSlot(0));
290
291 TensorInfo info = TensorInfo({ 1, 2, 3, 2 }, DataType::Float32);
292 TensorInfo poolingInfo = TensorInfo({ 1, 2, 1, 1 }, DataType::Float32);
293
294 input0->GetOutputSlot(0).SetTensorInfo(info);
295 input1->GetOutputSlot(0).SetTensorInfo(info);
296 input2->GetOutputSlot(0).SetTensorInfo(info);
297 add->GetOutputSlot(0).SetTensorInfo(info);
298 sub->GetOutputSlot(0).SetTensorInfo(info);
299 pooling->GetOutputSlot(0).SetTensorInfo(poolingInfo);
300
301 std::vector<BackendId> backends = { Compute::GpuAcc, Compute::CpuAcc };
302 // Use BackendSelectionHint to specify CpuAcc for Subtraction layer
303 sub->BackendSelectionHint(backends[1]);
304
305 // optimize the network
306 OptimizerOptions optOptions;
307 optOptions.m_ImportEnabled = true;
308 IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec(), optOptions);
309
310 OptimizedNetwork* optNetObjPtr = PolymorphicDowncast<OptimizedNetwork*>(optNet.get());
311 Graph& graph = optNetObjPtr->GetGraph();
312
313 armnn::Layer* const layer0 = GetFirstLayerWithName(graph, "input0");
314 armnn::Layer* const layer1 = GetFirstLayerWithName(graph, "input1");
315 armnn::Layer* const layer2 = GetFirstLayerWithName(graph, "input2");
316 armnn::Layer* const layer3 = GetFirstLayerWithName(graph, "add");
317 armnn::Layer* const layer4 = GetFirstLayerWithName(graph, "[ add (0) -> sub (1) ]");
318 armnn::Layer* const layer5 = GetFirstLayerWithName(graph, "sub");
319 armnn::Layer* const layer6 = GetFirstLayerWithName(graph, "[ sub (0) -> pooling (0) ]");
320 armnn::Layer* const layer7 = GetFirstLayerWithName(graph, "pooling");
321 armnn::Layer* const layer8 = GetFirstLayerWithName(graph, "output");
322
323 // Checks order is valid.
324 BOOST_TEST(CheckOrder(graph, layer0, layer1));
325 BOOST_TEST(CheckOrder(graph, layer1, layer2));
326 BOOST_TEST(CheckOrder(graph, layer2, layer3));
327 BOOST_TEST(CheckOrder(graph, layer3, layer4));
328 BOOST_TEST(CheckOrder(graph, layer4, layer5));
329 BOOST_TEST(CheckOrder(graph, layer5, layer6));
330 BOOST_TEST(CheckOrder(graph, layer6, layer7));
331 BOOST_TEST(CheckOrder(graph, layer7, layer8));
332
333 // Use memory import between backends
334 BOOST_TEST((layer4->GetType() == LayerType::MemCopy));
335 BOOST_TEST((layer6->GetType() == LayerType::MemCopy));
336
337 // Correctly use backend hint
338 BOOST_TEST((layer5->GetBackendId() == Compute::CpuAcc ));
339
340 // Load it into the runtime. It should pass.
341 NetworkId netId;
342 std::string ignoredErrorMessage;
343 INetworkProperties networkProperties(true, true);
344
345 runtime->LoadNetwork(netId, std::move(optNet), ignoredErrorMessage, networkProperties);
346
347 // Creates structures for input & output
348 std::vector<float> inputData0
349 {
350 1.0f, 1.0f, 2.0f, 2.0f, 2.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f, 6.0f
351 };
352 std::vector<float> inputData1
353 {
354 0.0f, 1.0f, 1.0f, 2.0f, 3.0f, 3.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f
355 };
356 std::vector<float> inputData2
357 {
358 12.0f, 11.0f, 10.0f, 9.0f, 8.0f, 7.0f, 6.0f, 5.0f, 4.0f, 3.0f, 2.0f, 1.0f
359 };
360
361 std::vector<float> outputData(2);
362
363 std::vector<float> expectedOutput{ 11.0f, -1.0f };
364
365 InputTensors inputTensors
366 {
367 { 0, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData0.data()) },
368 { 1, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 1), inputData1.data()) },
369 { 2, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 2), inputData2.data()) }
370 };
371 OutputTensors outputTensors
372 {
373 { 0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data()) }
374 };
375
376 runtime->GetProfiler(netId)->EnableProfiling(true);
377
378 // Do the inference
379 runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
380
381 // Retrieve the Profiler.Print() output to get the workload execution
382 ProfilerManager& profilerManager = armnn::ProfilerManager::GetInstance();
383 std::stringstream ss;
384 profilerManager.GetProfiler()->Print(ss);;
385 std::string dump = ss.str();
386
387 // Executed Subtraction using CpuAcc
388 std::size_t found = dump.find("NeonSubtractionWorkload_Execute");
389 BOOST_TEST(found != std::string::npos);
390
391 // Correctly switch back to GpuAcc
392 found = dump.find("ClPooling2dWorkload_Execute");
393 BOOST_TEST(found != std::string::npos);
394
395 // Contain CopyMemGeneric
396 found = dump.find("CopyMemGeneric");
397 BOOST_TEST(found != std::string::npos);
398
399 // Check output is as expected
400 BOOST_TEST(outputData == expectedOutput);
401}
402
403BOOST_AUTO_TEST_CASE(ClImportDisableFallbackSubgraphToNeon)
404{
405 using namespace armnn;
406
407 IRuntime::CreationOptions options;
408 IRuntimePtr runtime(IRuntime::Create(options));
409
410 // Builds up the structure of the network.
411 INetworkPtr net(INetwork::Create());
412
413 Pooling2dDescriptor desc;
414
415 IConnectableLayer* input0 = net->AddInputLayer(0, "input0");
416 IConnectableLayer* input1 = net->AddInputLayer(1, "input1");
417 IConnectableLayer* input2 = net->AddInputLayer(2, "input2");
418 IConnectableLayer* add = net->AddAdditionLayer("add");
419 IConnectableLayer* sub = net->AddSubtractionLayer("sub");
420 IConnectableLayer* pooling = net->AddPooling2dLayer(desc, "pooling");
421 IConnectableLayer* output = net->AddOutputLayer(0, "output");
422
423 input0->GetOutputSlot(0).Connect(add->GetInputSlot(0));
424 input1->GetOutputSlot(0).Connect(add->GetInputSlot(1));
425 input2->GetOutputSlot(0).Connect(sub->GetInputSlot(0));
426 add->GetOutputSlot(0).Connect(sub->GetInputSlot(1));
427 sub->GetOutputSlot(0).Connect(pooling->GetInputSlot(0));
428 pooling->GetOutputSlot(0).Connect(output->GetInputSlot(0));
429
430 TensorInfo info = TensorInfo({ 1, 2, 3, 2 }, DataType::Float32);
431 TensorInfo poolingInfo = TensorInfo({ 1, 2, 1, 1 }, DataType::Float32);
432
433 input0->GetOutputSlot(0).SetTensorInfo(info);
434 input1->GetOutputSlot(0).SetTensorInfo(info);
435 input2->GetOutputSlot(0).SetTensorInfo(info);
436 add->GetOutputSlot(0).SetTensorInfo(info);
437 sub->GetOutputSlot(0).SetTensorInfo(info);
438 pooling->GetOutputSlot(0).SetTensorInfo(poolingInfo);
439
440 std::vector<BackendId> backends = { Compute::GpuAcc, Compute::CpuAcc };
441 // Use BackendSelectionHint to specify CpuAcc for Subtraction layer
442 sub->BackendSelectionHint(backends[1]);
443
444 // optimize the network
445 OptimizerOptions optOptions;
446 IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec(), optOptions);
447
448 OptimizedNetwork* optNetObjPtr = PolymorphicDowncast<OptimizedNetwork*>(optNet.get());
449 Graph& graph = optNetObjPtr->GetGraph();
450
451 armnn::Layer* const layer0 = GetFirstLayerWithName(graph, "input0");
452 armnn::Layer* const layer1 = GetFirstLayerWithName(graph, "input1");
453 armnn::Layer* const layer2 = GetFirstLayerWithName(graph, "input2");
454 armnn::Layer* const layer3 = GetFirstLayerWithName(graph, "add");
455 armnn::Layer* const layer4 = GetFirstLayerWithName(graph, "[ add (0) -> sub (1) ]");
456 armnn::Layer* const layer5 = GetFirstLayerWithName(graph, "sub");
457 armnn::Layer* const layer6 = GetFirstLayerWithName(graph, "[ sub (0) -> pooling (0) ]");
458 armnn::Layer* const layer7 = GetFirstLayerWithName(graph, "pooling");
459 armnn::Layer* const layer8 = GetFirstLayerWithName(graph, "output");
460
461 // Checks order is valid.
462 BOOST_TEST(CheckOrder(graph, layer0, layer1));
463 BOOST_TEST(CheckOrder(graph, layer1, layer2));
464 BOOST_TEST(CheckOrder(graph, layer2, layer3));
465 BOOST_TEST(CheckOrder(graph, layer3, layer4));
466 BOOST_TEST(CheckOrder(graph, layer4, layer5));
467 BOOST_TEST(CheckOrder(graph, layer5, layer6));
468 BOOST_TEST(CheckOrder(graph, layer6, layer7));
469 BOOST_TEST(CheckOrder(graph, layer7, layer8));
470
471 // Use memory import between backends
472 BOOST_TEST((layer4->GetType() == LayerType::MemCopy));
473 BOOST_TEST((layer6->GetType() == LayerType::MemCopy));
474
475 // Correctly use backend hint
476 BOOST_TEST((layer5->GetBackendId() == Compute::CpuAcc ));
477
478 // Load it into the runtime. It should pass.
479 NetworkId netId;
480 runtime->LoadNetwork(netId, std::move(optNet));
481
482 // Creates structures for input & output
483 std::vector<float> inputData0
484 {
485 1.0f, 1.0f, 2.0f, 2.0f, 2.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f, 6.0f
486 };
487 std::vector<float> inputData1
488 {
489 0.0f, 1.0f, 1.0f, 2.0f, 3.0f, 3.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f
490 };
491 std::vector<float> inputData2
492 {
493 12.0f, 11.0f, 10.0f, 9.0f, 8.0f, 7.0f, 6.0f, 5.0f, 4.0f, 3.0f, 2.0f, 1.0f
494 };
495
496 std::vector<float> outputData(2);
497
498 std::vector<float> expectedOutput{ 11.0f, -1.0f };
499
500 InputTensors inputTensors
501 {
502 { 0, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData0.data()) },
503 { 1, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 1), inputData1.data()) },
504 { 2, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 2), inputData2.data()) }
505 };
506 OutputTensors outputTensors
507 {
508 { 0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data()) }
509 };
510
511 runtime->GetProfiler(netId)->EnableProfiling(true);
512
513 // Do the inference
514 runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
515
516 // Retrieve the Profiler.Print() output to get the workload execution
517 ProfilerManager& profilerManager = armnn::ProfilerManager::GetInstance();
518 std::stringstream ss;
519 profilerManager.GetProfiler()->Print(ss);;
520 std::string dump = ss.str();
521
522 // Executed Subtraction using CpuAcc
523 std::size_t found = dump.find("NeonSubtractionWorkload_Execute");
524 BOOST_TEST(found != std::string::npos);
525
526 // Correctly switch back to GpuAcc
527 found = dump.find("ClPooling2dWorkload_Execute");
528 BOOST_TEST(found != std::string::npos);
529
530 // Contain CopyMemGeneric
531 found = dump.find("CopyMemGeneric");
532 BOOST_TEST(found != std::string::npos);
533
534 // Check output is as expected
535 BOOST_TEST(outputData == expectedOutput);
536}
537
538BOOST_AUTO_TEST_SUITE_END()