blob: 51a983a681a8fe828df39120ce812ca13343ae86 [file] [log] [blame]
Narumol Prangnawarat265e53e2020-10-30 16:06:55 +00001//
2// Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
3// SPDX-License-Identifier: MIT
4//
5
Sadik Armagana097d2a2021-11-24 15:47:28 +00006#include <CommonTestUtils.hpp>
Narumol Prangnawarat265e53e2020-10-30 16:06:55 +00007
Sadik Armagana097d2a2021-11-24 15:47:28 +00008#include <GraphUtils.hpp>
Narumol Prangnawarat265e53e2020-10-30 16:06:55 +00009
Sadik Armagan1625efc2021-06-10 18:24:34 +010010#include <doctest/doctest.h>
Narumol Prangnawarat265e53e2020-10-30 16:06:55 +000011
Sadik Armagan1625efc2021-06-10 18:24:34 +010012TEST_SUITE("ClFallback")
13{
14TEST_CASE("ClImportEnabledFallbackToNeon")
Narumol Prangnawarat265e53e2020-10-30 16:06:55 +000015{
16 using namespace armnn;
17
18 IRuntime::CreationOptions options;
19 IRuntimePtr runtime(IRuntime::Create(options));
20
21 // Builds up the structure of the network.
22 INetworkPtr net(INetwork::Create());
23
24 IConnectableLayer* input0 = net->AddInputLayer(0, "input0");
25 IConnectableLayer* input1 = net->AddInputLayer(1, "input1");
26 IConnectableLayer* input2 = net->AddInputLayer(2, "input2");
27 IConnectableLayer* add = net->AddAdditionLayer("add");
28 IConnectableLayer* sub = net->AddSubtractionLayer("sub");
29 IConnectableLayer* output = net->AddOutputLayer(0, "output");
30
31 input0->GetOutputSlot(0).Connect(add->GetInputSlot(0));
32 input1->GetOutputSlot(0).Connect(add->GetInputSlot(1));
33 input2->GetOutputSlot(0).Connect(sub->GetInputSlot(0));
34 add->GetOutputSlot(0).Connect(sub->GetInputSlot(1));
35 sub->GetOutputSlot(0).Connect(output->GetInputSlot(0));
36
Narumol Prangnawarate5f0b242021-05-07 17:52:36 +010037 TensorInfo info = TensorInfo({ 1, 2, 4, 2 }, DataType::Float32);
Cathal Corbett5b8093c2021-10-22 11:12:07 +010038 info.SetConstant(true);
Narumol Prangnawarat265e53e2020-10-30 16:06:55 +000039
40 input0->GetOutputSlot(0).SetTensorInfo(info);
41 input1->GetOutputSlot(0).SetTensorInfo(info);
42 input2->GetOutputSlot(0).SetTensorInfo(info);
43 add->GetOutputSlot(0).SetTensorInfo(info);
44 sub->GetOutputSlot(0).SetTensorInfo(info);
45
46 std::vector<BackendId> backends = { Compute::GpuAcc, Compute::CpuAcc };
47 // Use BackendSelectionHint to specify CpuAcc for Subtraction layer
48 sub->BackendSelectionHint(backends[1]);
49
50 // optimize the network
51 OptimizerOptions optOptions;
52 optOptions.m_ImportEnabled = true;
Colm Donelan03bf98a2022-05-30 15:20:36 +010053 optOptions.m_ExportEnabled = true;
Narumol Prangnawarat265e53e2020-10-30 16:06:55 +000054 IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec(), optOptions);
55
Francis Murtagh3d2b4b22021-02-15 18:23:17 +000056 Graph& graph = GetGraphForTesting(optNet.get());
Narumol Prangnawarat265e53e2020-10-30 16:06:55 +000057
58 armnn::Layer* const layer0 = GetFirstLayerWithName(graph, "input0");
59 armnn::Layer* const layer1 = GetFirstLayerWithName(graph, "input1");
60 armnn::Layer* const layer2 = GetFirstLayerWithName(graph, "input2");
61 armnn::Layer* const layer3 = GetFirstLayerWithName(graph, "add");
62 armnn::Layer* const layer4 = GetFirstLayerWithName(graph, "[ add (0) -> sub (1) ]");
63 armnn::Layer* const layer5 = GetFirstLayerWithName(graph, "sub");
64 armnn::Layer* const layer6 = GetFirstLayerWithName(graph, "output");
65
66 // Checks order is valid.
Sadik Armagan1625efc2021-06-10 18:24:34 +010067 CHECK(CheckOrder(graph, layer0, layer1));
68 CHECK(CheckOrder(graph, layer1, layer2));
69 CHECK(CheckOrder(graph, layer2, layer3));
70 CHECK(CheckOrder(graph, layer3, layer4));
71 CHECK(CheckOrder(graph, layer4, layer5));
72 CHECK(CheckOrder(graph, layer5, layer6));
Narumol Prangnawarat265e53e2020-10-30 16:06:55 +000073
74 // Use memory import between backends
Sadik Armagan1625efc2021-06-10 18:24:34 +010075 CHECK((layer4->GetType() == LayerType::MemCopy));
Narumol Prangnawarat265e53e2020-10-30 16:06:55 +000076
77 // Correctly use backend hint
Sadik Armagan1625efc2021-06-10 18:24:34 +010078 CHECK((layer5->GetBackendId() == Compute::CpuAcc ));
Narumol Prangnawarat265e53e2020-10-30 16:06:55 +000079
80 // Load it into the runtime. It should pass.
81 NetworkId netId;
82 std::string ignoredErrorMessage;
Francis Murtagh73d3e2e2021-04-29 14:23:04 +010083 INetworkProperties networkProperties(false, MemorySource::Malloc, MemorySource::Malloc);
Narumol Prangnawarat265e53e2020-10-30 16:06:55 +000084 runtime->LoadNetwork(netId, std::move(optNet), ignoredErrorMessage, networkProperties);
85
86 // Creates structures for input & output
Narumol Prangnawarate5f0b242021-05-07 17:52:36 +010087 std::vector<float> inputValue0
Narumol Prangnawarat265e53e2020-10-30 16:06:55 +000088 {
Narumol Prangnawarate5f0b242021-05-07 17:52:36 +010089 1.0f, 1.0f, 2.0f, 2.0f, 2.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f, 6.0f, 1.0f, 1.0f, 2.0f, 2.0f
Narumol Prangnawarat265e53e2020-10-30 16:06:55 +000090 };
Narumol Prangnawarate5f0b242021-05-07 17:52:36 +010091 std::vector<float> inputValue1
Narumol Prangnawarat265e53e2020-10-30 16:06:55 +000092 {
Narumol Prangnawarate5f0b242021-05-07 17:52:36 +010093 0.0f, 1.0f, 1.0f, 2.0f, 3.0f, 3.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f, 0.0f, 1.0f, 1.0f, 2.0f
Narumol Prangnawarat265e53e2020-10-30 16:06:55 +000094 };
95 std::vector<float> inputData2
96 {
Narumol Prangnawarate5f0b242021-05-07 17:52:36 +010097 12.0f, 11.0f, 10.0f, 9.0f, 8.0f, 7.0f, 6.0f, 5.0f, 4.0f, 3.0f, 2.0f, 1.0f, 12.0f, 11.0f, 10.0f, 9.0f
Narumol Prangnawarat265e53e2020-10-30 16:06:55 +000098 };
99
Narumol Prangnawarate5f0b242021-05-07 17:52:36 +0100100 std::vector<float> outputData(16);
Narumol Prangnawarat265e53e2020-10-30 16:06:55 +0000101
102 std::vector<float> expectedOutput
103 {
Narumol Prangnawarate5f0b242021-05-07 17:52:36 +0100104 11.0f, 9.0f, 7.0f, 5.0f, 3.0f, 1.0f, -1.0f, -3.0f, -5.0f, -7.0f, -9.0f, -11.0f, 11.0f, 9.0f, 7.0f, 5.0f
Narumol Prangnawarat265e53e2020-10-30 16:06:55 +0000105 };
106
Narumol Prangnawarate5f0b242021-05-07 17:52:36 +0100107 // Prepare aligned data
108 unsigned int numElements = info.GetNumElements();
109 size_t totalBytes = numElements * sizeof(float);
110 const size_t alignment = 64;
111 size_t space = totalBytes + alignment + alignment;
112 auto inputData0 = std::make_unique<uint8_t[]>(space);
113 void* alignedInputPtr0 = inputData0.get();
Sadik Armagan1625efc2021-06-10 18:24:34 +0100114 CHECK(std::align(alignment, totalBytes, alignedInputPtr0, space));
Narumol Prangnawarate5f0b242021-05-07 17:52:36 +0100115
116 auto* intputPtr0 = reinterpret_cast<float*>(alignedInputPtr0);
117 std::copy(inputValue0.begin(), inputValue0.end(), intputPtr0);
118
119 auto inputData1 = std::make_unique<uint8_t[]>(space);
120 void* alignedInputPtr1 = inputData1.get();
Sadik Armagan1625efc2021-06-10 18:24:34 +0100121 CHECK(std::align(alignment, totalBytes, alignedInputPtr1, space));
Narumol Prangnawarate5f0b242021-05-07 17:52:36 +0100122
123 auto* intputPtr1 = reinterpret_cast<float*>(alignedInputPtr1);
124 std::copy(inputValue1.begin(), inputValue1.end(), intputPtr1);
125
Narumol Prangnawarat265e53e2020-10-30 16:06:55 +0000126 InputTensors inputTensors
127 {
Narumol Prangnawarate5f0b242021-05-07 17:52:36 +0100128 { 0, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), alignedInputPtr0) },
129 { 1, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 1), alignedInputPtr1) },
Narumol Prangnawarat265e53e2020-10-30 16:06:55 +0000130 { 2, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 2), inputData2.data()) }
131 };
132 OutputTensors outputTensors
133 {
134 { 0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data()) }
135 };
136
137 runtime->GetProfiler(netId)->EnableProfiling(true);
138
139 // Do the inference
140 runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
141
142 // Retrieve the Profiler.Print() output to get the workload execution
143 ProfilerManager& profilerManager = armnn::ProfilerManager::GetInstance();
144 std::stringstream ss;
145 profilerManager.GetProfiler()->Print(ss);;
146 std::string dump = ss.str();
147
148 // Executed Subtraction using CpuAcc
149 std::size_t found = dump.find("NeonSubtractionWorkload_Execute");
Sadik Armagan1625efc2021-06-10 18:24:34 +0100150 CHECK(found != std::string::npos);
Narumol Prangnawarat265e53e2020-10-30 16:06:55 +0000151
152 // Contain CopyMemGeneric
153 found = dump.find("CopyMemGeneric");
Sadik Armagan1625efc2021-06-10 18:24:34 +0100154 CHECK(found != std::string::npos);
Narumol Prangnawarat265e53e2020-10-30 16:06:55 +0000155
156 // Check output is as expected
Sadik Armagan1625efc2021-06-10 18:24:34 +0100157 CHECK(outputData == expectedOutput);
Narumol Prangnawarate5f0b242021-05-07 17:52:36 +0100158
159 runtime->UnloadNetwork(netId);
Narumol Prangnawarat265e53e2020-10-30 16:06:55 +0000160}
161
Sadik Armagan1625efc2021-06-10 18:24:34 +0100162TEST_CASE("ClImportDisabledFallbackToNeon")
Narumol Prangnawarat265e53e2020-10-30 16:06:55 +0000163{
164 using namespace armnn;
165
166 IRuntime::CreationOptions options;
167 IRuntimePtr runtime(IRuntime::Create(options));
168
169 // Builds up the structure of the network.
170 INetworkPtr net(INetwork::Create());
171
172 IConnectableLayer* input0 = net->AddInputLayer(0, "input0");
173 IConnectableLayer* input1 = net->AddInputLayer(1, "input1");
174 IConnectableLayer* input2 = net->AddInputLayer(2, "input2");
175 IConnectableLayer* add = net->AddAdditionLayer("add");
176 IConnectableLayer* sub = net->AddSubtractionLayer("sub");
177 IConnectableLayer* output = net->AddOutputLayer(0, "output");
178
179 input0->GetOutputSlot(0).Connect(add->GetInputSlot(0));
180 input1->GetOutputSlot(0).Connect(add->GetInputSlot(1));
181 input2->GetOutputSlot(0).Connect(sub->GetInputSlot(0));
182 add->GetOutputSlot(0).Connect(sub->GetInputSlot(1));
183 sub->GetOutputSlot(0).Connect(output->GetInputSlot(0));
184
185 TensorInfo info = TensorInfo({ 1, 2, 3, 2 }, DataType::Float32);
Cathal Corbett5b8093c2021-10-22 11:12:07 +0100186 info.SetConstant(true);
Narumol Prangnawarat265e53e2020-10-30 16:06:55 +0000187
188 input0->GetOutputSlot(0).SetTensorInfo(info);
189 input1->GetOutputSlot(0).SetTensorInfo(info);
190 input2->GetOutputSlot(0).SetTensorInfo(info);
191 add->GetOutputSlot(0).SetTensorInfo(info);
192 sub->GetOutputSlot(0).SetTensorInfo(info);
193
194 std::vector<BackendId> backends = { Compute::GpuAcc, Compute::CpuAcc };
195 // Use BackendSelectionHint to specify CpuAcc for Subtraction layer
196 sub->BackendSelectionHint(backends[1]);
197
198 // optimize the network
199 OptimizerOptions optOptions;
200 IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec(), optOptions);
201
Francis Murtagh3d2b4b22021-02-15 18:23:17 +0000202 Graph& graph = GetGraphForTesting(optNet.get());
Narumol Prangnawarat265e53e2020-10-30 16:06:55 +0000203
204 armnn::Layer* const layer0 = GetFirstLayerWithName(graph, "input0");
205 armnn::Layer* const layer1 = GetFirstLayerWithName(graph, "input1");
206 armnn::Layer* const layer2 = GetFirstLayerWithName(graph, "input2");
207 armnn::Layer* const layer3 = GetFirstLayerWithName(graph, "add");
208 armnn::Layer* const layer4 = GetFirstLayerWithName(graph, "[ add (0) -> sub (1) ]");
209 armnn::Layer* const layer5 = GetFirstLayerWithName(graph, "sub");
210 armnn::Layer* const layer6 = GetFirstLayerWithName(graph, "output");
211
212 // Checks order is valid.
Sadik Armagan1625efc2021-06-10 18:24:34 +0100213 CHECK(CheckOrder(graph, layer0, layer1));
214 CHECK(CheckOrder(graph, layer1, layer2));
215 CHECK(CheckOrder(graph, layer2, layer3));
216 CHECK(CheckOrder(graph, layer3, layer4));
217 CHECK(CheckOrder(graph, layer4, layer5));
218 CHECK(CheckOrder(graph, layer5, layer6));
Narumol Prangnawarat265e53e2020-10-30 16:06:55 +0000219
220 // Use memory import between backends
Sadik Armagan1625efc2021-06-10 18:24:34 +0100221 CHECK((layer4->GetType() == LayerType::MemCopy));
Narumol Prangnawarat265e53e2020-10-30 16:06:55 +0000222
223 // Correctly use backend hint
Sadik Armagan1625efc2021-06-10 18:24:34 +0100224 CHECK((layer5->GetBackendId() == Compute::CpuAcc ));
Narumol Prangnawarat265e53e2020-10-30 16:06:55 +0000225
226 // Load it into the runtime. It should pass.
227 NetworkId netId;
228 runtime->LoadNetwork(netId, std::move(optNet));
229
230 // Creates structures for input & output
231 std::vector<float> inputData0
232 {
233 1.0f, 1.0f, 2.0f, 2.0f, 2.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f, 6.0f
234 };
235 std::vector<float> inputData1
236 {
237 0.0f, 1.0f, 1.0f, 2.0f, 3.0f, 3.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f
238 };
239 std::vector<float> inputData2
240 {
241 12.0f, 11.0f, 10.0f, 9.0f, 8.0f, 7.0f, 6.0f, 5.0f, 4.0f, 3.0f, 2.0f, 1.0f
242 };
243
244 std::vector<float> outputData(12);
245
246 std::vector<float> expectedOutput
247 {
248 11.0f, 9.0f, 7.0f, 5.0f, 3.0f, 1.0f, -1.0f, -3.0f, -5.0f, -7.0f, -9.0f, -11.0f
249 };
250
251 InputTensors inputTensors
252 {
253 { 0, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData0.data()) },
254 { 1, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 1), inputData1.data()) },
255 { 2, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 2), inputData2.data()) }
256 };
257 OutputTensors outputTensors
258 {
259 { 0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data()) }
260 };
261
262 runtime->GetProfiler(netId)->EnableProfiling(true);
263
264 // Do the inference
265 runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
266
267 // Retrieve the Profiler.Print() output to get the workload execution
268 ProfilerManager& profilerManager = armnn::ProfilerManager::GetInstance();
269 std::stringstream ss;
270 profilerManager.GetProfiler()->Print(ss);;
271 std::string dump = ss.str();
272
273 // Executed Subtraction using CpuAcc
274 std::size_t found = dump.find("NeonSubtractionWorkload_Execute");
Sadik Armagan1625efc2021-06-10 18:24:34 +0100275 CHECK(found != std::string::npos);
Narumol Prangnawarat265e53e2020-10-30 16:06:55 +0000276
277 // Contain CopyMemGeneric
278 found = dump.find("CopyMemGeneric");
Sadik Armagan1625efc2021-06-10 18:24:34 +0100279 CHECK(found != std::string::npos);
Narumol Prangnawarat265e53e2020-10-30 16:06:55 +0000280
281 // Check output is as expected
Sadik Armagan1625efc2021-06-10 18:24:34 +0100282 CHECK(outputData == expectedOutput);
Narumol Prangnawarat265e53e2020-10-30 16:06:55 +0000283}
284
Sadik Armagan1625efc2021-06-10 18:24:34 +0100285TEST_CASE("ClImportEnabledFallbackSubgraphToNeon")
Narumol Prangnawarat265e53e2020-10-30 16:06:55 +0000286{
287 using namespace armnn;
288
289 IRuntime::CreationOptions options;
290 IRuntimePtr runtime(IRuntime::Create(options));
291
292 // Builds up the structure of the network.
293 INetworkPtr net(INetwork::Create());
294
295 Pooling2dDescriptor desc;
Narumol Prangnawarate5f0b242021-05-07 17:52:36 +0100296 desc.m_PoolWidth = 2;
297 desc.m_PoolHeight = 2;
298 desc.m_StrideX = 2;
299 desc.m_StrideY = 2;
Narumol Prangnawarat265e53e2020-10-30 16:06:55 +0000300
301 IConnectableLayer* input0 = net->AddInputLayer(0, "input0");
302 IConnectableLayer* input1 = net->AddInputLayer(1, "input1");
303 IConnectableLayer* input2 = net->AddInputLayer(2, "input2");
304 IConnectableLayer* add = net->AddAdditionLayer("add");
305 IConnectableLayer* sub = net->AddSubtractionLayer("sub");
306 IConnectableLayer* pooling = net->AddPooling2dLayer(desc, "pooling");
307 IConnectableLayer* output = net->AddOutputLayer(0, "output");
308
309 input0->GetOutputSlot(0).Connect(add->GetInputSlot(0));
310 input1->GetOutputSlot(0).Connect(add->GetInputSlot(1));
311 input2->GetOutputSlot(0).Connect(sub->GetInputSlot(0));
312 add->GetOutputSlot(0).Connect(sub->GetInputSlot(1));
313 sub->GetOutputSlot(0).Connect(pooling->GetInputSlot(0));
314 pooling->GetOutputSlot(0).Connect(output->GetInputSlot(0));
315
Narumol Prangnawarate5f0b242021-05-07 17:52:36 +0100316 TensorInfo info = TensorInfo({ 1, 2, 4, 2 }, DataType::Float32);
Cathal Corbett5b8093c2021-10-22 11:12:07 +0100317 info.SetConstant(true);
Narumol Prangnawarate5f0b242021-05-07 17:52:36 +0100318 TensorInfo poolingInfo = TensorInfo({ 1, 2, 2, 1 }, DataType::Float32);
Narumol Prangnawarat265e53e2020-10-30 16:06:55 +0000319
320 input0->GetOutputSlot(0).SetTensorInfo(info);
321 input1->GetOutputSlot(0).SetTensorInfo(info);
322 input2->GetOutputSlot(0).SetTensorInfo(info);
323 add->GetOutputSlot(0).SetTensorInfo(info);
324 sub->GetOutputSlot(0).SetTensorInfo(info);
325 pooling->GetOutputSlot(0).SetTensorInfo(poolingInfo);
326
327 std::vector<BackendId> backends = { Compute::GpuAcc, Compute::CpuAcc };
328 // Use BackendSelectionHint to specify CpuAcc for Subtraction layer
329 sub->BackendSelectionHint(backends[1]);
330
331 // optimize the network
332 OptimizerOptions optOptions;
333 optOptions.m_ImportEnabled = true;
Colm Donelan03bf98a2022-05-30 15:20:36 +0100334 optOptions.m_ExportEnabled = true;
Narumol Prangnawarat265e53e2020-10-30 16:06:55 +0000335 IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec(), optOptions);
336
Francis Murtagh3d2b4b22021-02-15 18:23:17 +0000337 Graph& graph = GetGraphForTesting(optNet.get());
Narumol Prangnawarat265e53e2020-10-30 16:06:55 +0000338
339 armnn::Layer* const layer0 = GetFirstLayerWithName(graph, "input0");
340 armnn::Layer* const layer1 = GetFirstLayerWithName(graph, "input1");
341 armnn::Layer* const layer2 = GetFirstLayerWithName(graph, "input2");
342 armnn::Layer* const layer3 = GetFirstLayerWithName(graph, "add");
343 armnn::Layer* const layer4 = GetFirstLayerWithName(graph, "[ add (0) -> sub (1) ]");
344 armnn::Layer* const layer5 = GetFirstLayerWithName(graph, "sub");
345 armnn::Layer* const layer6 = GetFirstLayerWithName(graph, "[ sub (0) -> pooling (0) ]");
346 armnn::Layer* const layer7 = GetFirstLayerWithName(graph, "pooling");
347 armnn::Layer* const layer8 = GetFirstLayerWithName(graph, "output");
348
349 // Checks order is valid.
Sadik Armagan1625efc2021-06-10 18:24:34 +0100350 CHECK(CheckOrder(graph, layer0, layer1));
351 CHECK(CheckOrder(graph, layer1, layer2));
352 CHECK(CheckOrder(graph, layer2, layer3));
353 CHECK(CheckOrder(graph, layer3, layer4));
354 CHECK(CheckOrder(graph, layer4, layer5));
355 CHECK(CheckOrder(graph, layer5, layer6));
356 CHECK(CheckOrder(graph, layer6, layer7));
357 CHECK(CheckOrder(graph, layer7, layer8));
Narumol Prangnawarat265e53e2020-10-30 16:06:55 +0000358
359 // Use memory import between backends
Sadik Armagan1625efc2021-06-10 18:24:34 +0100360 CHECK((layer4->GetType() == LayerType::MemCopy));
361 CHECK((layer6->GetType() == LayerType::MemCopy));
Narumol Prangnawarat265e53e2020-10-30 16:06:55 +0000362
363 // Correctly use backend hint
Sadik Armagan1625efc2021-06-10 18:24:34 +0100364 CHECK((layer5->GetBackendId() == Compute::CpuAcc ));
Narumol Prangnawarat265e53e2020-10-30 16:06:55 +0000365
366 // Load it into the runtime. It should pass.
367 NetworkId netId;
368 std::string ignoredErrorMessage;
Francis Murtagh73d3e2e2021-04-29 14:23:04 +0100369 INetworkProperties networkProperties(false, MemorySource::Malloc, MemorySource::Malloc);
Narumol Prangnawarat265e53e2020-10-30 16:06:55 +0000370 runtime->LoadNetwork(netId, std::move(optNet), ignoredErrorMessage, networkProperties);
371
372 // Creates structures for input & output
Narumol Prangnawarate5f0b242021-05-07 17:52:36 +0100373 std::vector<float> inputValue0
Narumol Prangnawarat265e53e2020-10-30 16:06:55 +0000374 {
Narumol Prangnawarate5f0b242021-05-07 17:52:36 +0100375 1.0f, 1.0f, 2.0f, 2.0f, 2.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f, 6.0f, 1.0f, 1.0f, 2.0f, 2.0f
Narumol Prangnawarat265e53e2020-10-30 16:06:55 +0000376 };
Narumol Prangnawarate5f0b242021-05-07 17:52:36 +0100377 std::vector<float> inputValue1
Narumol Prangnawarat265e53e2020-10-30 16:06:55 +0000378 {
Narumol Prangnawarate5f0b242021-05-07 17:52:36 +0100379 0.0f, 1.0f, 1.0f, 2.0f, 3.0f, 3.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f, 0.0f, 1.0f, 1.0f, 2.0f
Narumol Prangnawarat265e53e2020-10-30 16:06:55 +0000380 };
381 std::vector<float> inputData2
382 {
Narumol Prangnawarate5f0b242021-05-07 17:52:36 +0100383 12.0f, 11.0f, 10.0f, 9.0f, 8.0f, 7.0f, 6.0f, 5.0f, 4.0f, 3.0f, 2.0f, 1.0f, 12.0f, 11.0f, 10.0f, 9.0f
Narumol Prangnawarat265e53e2020-10-30 16:06:55 +0000384 };
385
Narumol Prangnawarate5f0b242021-05-07 17:52:36 +0100386 std::vector<float> outputData(4);
Narumol Prangnawarat265e53e2020-10-30 16:06:55 +0000387
Narumol Prangnawarate5f0b242021-05-07 17:52:36 +0100388 std::vector<float> expectedOutput{ 11.0f, 3.0f, -5.0f, 11.0f };
389
390 unsigned int numElements = info.GetNumElements();
391 size_t totalBytes = numElements * sizeof(float);
392 const size_t alignment = 64;
393 size_t space = totalBytes + alignment + alignment;
394 auto inputData0 = std::make_unique<uint8_t[]>(space);
395 void* alignedInputPtr0 = inputData0.get();
Sadik Armagan1625efc2021-06-10 18:24:34 +0100396 CHECK(std::align(alignment, totalBytes, alignedInputPtr0, space));
Narumol Prangnawarate5f0b242021-05-07 17:52:36 +0100397
398 auto* intputPtr0 = reinterpret_cast<float*>(alignedInputPtr0);
399 std::copy(inputValue0.begin(), inputValue0.end(), intputPtr0);
400
401 auto inputData1 = std::make_unique<uint8_t[]>(space);
402 void* alignedInputPtr1 = inputData1.get();
Sadik Armagan1625efc2021-06-10 18:24:34 +0100403 CHECK(std::align(alignment, totalBytes, alignedInputPtr1, space));
Narumol Prangnawarate5f0b242021-05-07 17:52:36 +0100404
405 auto* intputPtr1 = reinterpret_cast<float*>(alignedInputPtr1);
406 std::copy(inputValue1.begin(), inputValue1.end(), intputPtr1);
Narumol Prangnawarat265e53e2020-10-30 16:06:55 +0000407
408 InputTensors inputTensors
409 {
Narumol Prangnawarate5f0b242021-05-07 17:52:36 +0100410 { 0, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), alignedInputPtr0) },
411 { 1, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 1), alignedInputPtr1) },
Narumol Prangnawarat265e53e2020-10-30 16:06:55 +0000412 { 2, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 2), inputData2.data()) }
413 };
414 OutputTensors outputTensors
415 {
416 { 0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data()) }
417 };
418
419 runtime->GetProfiler(netId)->EnableProfiling(true);
420
421 // Do the inference
422 runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
423
424 // Retrieve the Profiler.Print() output to get the workload execution
425 ProfilerManager& profilerManager = armnn::ProfilerManager::GetInstance();
426 std::stringstream ss;
427 profilerManager.GetProfiler()->Print(ss);;
428 std::string dump = ss.str();
429
430 // Executed Subtraction using CpuAcc
431 std::size_t found = dump.find("NeonSubtractionWorkload_Execute");
Sadik Armagan1625efc2021-06-10 18:24:34 +0100432 CHECK(found != std::string::npos);
Narumol Prangnawarat265e53e2020-10-30 16:06:55 +0000433
434 // Correctly switch back to GpuAcc
435 found = dump.find("ClPooling2dWorkload_Execute");
Sadik Armagan1625efc2021-06-10 18:24:34 +0100436 CHECK(found != std::string::npos);
Narumol Prangnawarat265e53e2020-10-30 16:06:55 +0000437
438 // Contain CopyMemGeneric
439 found = dump.find("CopyMemGeneric");
Sadik Armagan1625efc2021-06-10 18:24:34 +0100440 CHECK(found != std::string::npos);
Narumol Prangnawarat265e53e2020-10-30 16:06:55 +0000441
442 // Check output is as expected
Sadik Armagan1625efc2021-06-10 18:24:34 +0100443 CHECK(outputData == expectedOutput);
Narumol Prangnawarate5f0b242021-05-07 17:52:36 +0100444
445 runtime->UnloadNetwork(netId);
Narumol Prangnawarat265e53e2020-10-30 16:06:55 +0000446}
447
Sadik Armagan1625efc2021-06-10 18:24:34 +0100448TEST_CASE("ClImportDisableFallbackSubgraphToNeon")
Narumol Prangnawarat265e53e2020-10-30 16:06:55 +0000449{
450 using namespace armnn;
451
452 IRuntime::CreationOptions options;
453 IRuntimePtr runtime(IRuntime::Create(options));
454
455 // Builds up the structure of the network.
456 INetworkPtr net(INetwork::Create());
457
458 Pooling2dDescriptor desc;
459
460 IConnectableLayer* input0 = net->AddInputLayer(0, "input0");
461 IConnectableLayer* input1 = net->AddInputLayer(1, "input1");
462 IConnectableLayer* input2 = net->AddInputLayer(2, "input2");
463 IConnectableLayer* add = net->AddAdditionLayer("add");
464 IConnectableLayer* sub = net->AddSubtractionLayer("sub");
465 IConnectableLayer* pooling = net->AddPooling2dLayer(desc, "pooling");
466 IConnectableLayer* output = net->AddOutputLayer(0, "output");
467
468 input0->GetOutputSlot(0).Connect(add->GetInputSlot(0));
469 input1->GetOutputSlot(0).Connect(add->GetInputSlot(1));
470 input2->GetOutputSlot(0).Connect(sub->GetInputSlot(0));
471 add->GetOutputSlot(0).Connect(sub->GetInputSlot(1));
472 sub->GetOutputSlot(0).Connect(pooling->GetInputSlot(0));
473 pooling->GetOutputSlot(0).Connect(output->GetInputSlot(0));
474
475 TensorInfo info = TensorInfo({ 1, 2, 3, 2 }, DataType::Float32);
Cathal Corbett5b8093c2021-10-22 11:12:07 +0100476 info.SetConstant(true);
Narumol Prangnawarat265e53e2020-10-30 16:06:55 +0000477 TensorInfo poolingInfo = TensorInfo({ 1, 2, 1, 1 }, DataType::Float32);
478
479 input0->GetOutputSlot(0).SetTensorInfo(info);
480 input1->GetOutputSlot(0).SetTensorInfo(info);
481 input2->GetOutputSlot(0).SetTensorInfo(info);
482 add->GetOutputSlot(0).SetTensorInfo(info);
483 sub->GetOutputSlot(0).SetTensorInfo(info);
484 pooling->GetOutputSlot(0).SetTensorInfo(poolingInfo);
485
486 std::vector<BackendId> backends = { Compute::GpuAcc, Compute::CpuAcc };
487 // Use BackendSelectionHint to specify CpuAcc for Subtraction layer
488 sub->BackendSelectionHint(backends[1]);
489
490 // optimize the network
491 OptimizerOptions optOptions;
492 IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec(), optOptions);
493
Francis Murtagh3d2b4b22021-02-15 18:23:17 +0000494 Graph& graph = GetGraphForTesting(optNet.get());
Narumol Prangnawarat265e53e2020-10-30 16:06:55 +0000495
496 armnn::Layer* const layer0 = GetFirstLayerWithName(graph, "input0");
497 armnn::Layer* const layer1 = GetFirstLayerWithName(graph, "input1");
498 armnn::Layer* const layer2 = GetFirstLayerWithName(graph, "input2");
499 armnn::Layer* const layer3 = GetFirstLayerWithName(graph, "add");
500 armnn::Layer* const layer4 = GetFirstLayerWithName(graph, "[ add (0) -> sub (1) ]");
501 armnn::Layer* const layer5 = GetFirstLayerWithName(graph, "sub");
502 armnn::Layer* const layer6 = GetFirstLayerWithName(graph, "[ sub (0) -> pooling (0) ]");
503 armnn::Layer* const layer7 = GetFirstLayerWithName(graph, "pooling");
504 armnn::Layer* const layer8 = GetFirstLayerWithName(graph, "output");
505
506 // Checks order is valid.
Sadik Armagan1625efc2021-06-10 18:24:34 +0100507 CHECK(CheckOrder(graph, layer0, layer1));
508 CHECK(CheckOrder(graph, layer1, layer2));
509 CHECK(CheckOrder(graph, layer2, layer3));
510 CHECK(CheckOrder(graph, layer3, layer4));
511 CHECK(CheckOrder(graph, layer4, layer5));
512 CHECK(CheckOrder(graph, layer5, layer6));
513 CHECK(CheckOrder(graph, layer6, layer7));
514 CHECK(CheckOrder(graph, layer7, layer8));
Narumol Prangnawarat265e53e2020-10-30 16:06:55 +0000515
516 // Use memory import between backends
Sadik Armagan1625efc2021-06-10 18:24:34 +0100517 CHECK((layer4->GetType() == LayerType::MemCopy));
518 CHECK((layer6->GetType() == LayerType::MemCopy));
Narumol Prangnawarat265e53e2020-10-30 16:06:55 +0000519
520 // Correctly use backend hint
Sadik Armagan1625efc2021-06-10 18:24:34 +0100521 CHECK((layer5->GetBackendId() == Compute::CpuAcc ));
Narumol Prangnawarat265e53e2020-10-30 16:06:55 +0000522
523 // Load it into the runtime. It should pass.
524 NetworkId netId;
525 runtime->LoadNetwork(netId, std::move(optNet));
526
527 // Creates structures for input & output
528 std::vector<float> inputData0
529 {
530 1.0f, 1.0f, 2.0f, 2.0f, 2.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f, 6.0f
531 };
532 std::vector<float> inputData1
533 {
534 0.0f, 1.0f, 1.0f, 2.0f, 3.0f, 3.0f, 3.0f, 4.0f, 4.0f, 5.0f, 5.0f, 6.0f
535 };
536 std::vector<float> inputData2
537 {
538 12.0f, 11.0f, 10.0f, 9.0f, 8.0f, 7.0f, 6.0f, 5.0f, 4.0f, 3.0f, 2.0f, 1.0f
539 };
540
541 std::vector<float> outputData(2);
542
543 std::vector<float> expectedOutput{ 11.0f, -1.0f };
544
545 InputTensors inputTensors
546 {
547 { 0, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData0.data()) },
548 { 1, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 1), inputData1.data()) },
549 { 2, armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 2), inputData2.data()) }
550 };
551 OutputTensors outputTensors
552 {
553 { 0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data()) }
554 };
555
556 runtime->GetProfiler(netId)->EnableProfiling(true);
557
558 // Do the inference
559 runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
560
561 // Retrieve the Profiler.Print() output to get the workload execution
562 ProfilerManager& profilerManager = armnn::ProfilerManager::GetInstance();
563 std::stringstream ss;
564 profilerManager.GetProfiler()->Print(ss);;
565 std::string dump = ss.str();
566
567 // Executed Subtraction using CpuAcc
568 std::size_t found = dump.find("NeonSubtractionWorkload_Execute");
Sadik Armagan1625efc2021-06-10 18:24:34 +0100569 CHECK(found != std::string::npos);
Narumol Prangnawarat265e53e2020-10-30 16:06:55 +0000570
571 // Correctly switch back to GpuAcc
572 found = dump.find("ClPooling2dWorkload_Execute");
Sadik Armagan1625efc2021-06-10 18:24:34 +0100573 CHECK(found != std::string::npos);
Narumol Prangnawarat265e53e2020-10-30 16:06:55 +0000574
575 // Contain CopyMemGeneric
576 found = dump.find("CopyMemGeneric");
Sadik Armagan1625efc2021-06-10 18:24:34 +0100577 CHECK(found != std::string::npos);
Narumol Prangnawarat265e53e2020-10-30 16:06:55 +0000578
579 // Check output is as expected
Sadik Armagan1625efc2021-06-10 18:24:34 +0100580 CHECK(outputData == expectedOutput);
Narumol Prangnawarat265e53e2020-10-30 16:06:55 +0000581}
582
Sadik Armagan1625efc2021-06-10 18:24:34 +0100583}