blob: 44ae2beb768ab84427c06ad417198a4e4988de25 [file] [log] [blame]
Aron Virginas-Tar70104002018-10-24 15:33:28 +01001//
2// Copyright © 2017 Arm Ltd. All rights reserved.
3// SPDX-License-Identifier: MIT
4//
5#pragma once
6
Sadik Armagana097d2a2021-11-24 15:47:28 +00007#include <CommonTestUtils.hpp>
Mike Kelly386ff1a2021-03-29 15:04:50 +01008
Matthew Bentham246bd462020-01-20 16:16:06 +00009#include <armnn/Descriptors.hpp>
narpra01b9546cf2018-11-20 15:21:28 +000010#include <armnn/INetwork.hpp>
Matthew Bentham246bd462020-01-20 16:16:06 +000011#include <armnn/IRuntime.hpp>
Aron Virginas-Tar70104002018-10-24 15:33:28 +010012
Aron Virginas-Tar48623a02019-10-22 10:00:28 +010013#include <Profiling.hpp>
Colm Donelanc42a9872022-02-02 16:35:09 +000014#include <armnnUtils/QuantizeHelper.hpp>
Aron Virginas-Tar48623a02019-10-22 10:00:28 +010015#include <ResolveType.hpp>
Aron Virginas-Tar70104002018-10-24 15:33:28 +010016
Sadik Armagan1625efc2021-06-10 18:24:34 +010017#include <doctest/doctest.h>
narpra01b9546cf2018-11-20 15:21:28 +000018
Aron Virginas-Tar70104002018-10-24 15:33:28 +010019#include <vector>
20
21namespace
22{
23
24using namespace armnn;
25
26template<typename T>
27bool ConstantUsageTest(const std::vector<BackendId>& computeDevice,
28 const TensorInfo& commonTensorInfo,
29 const std::vector<T>& inputData,
30 const std::vector<T>& constantData,
31 const std::vector<T>& expectedOutputData)
32{
33 // Create runtime in which test will run
34 IRuntime::CreationOptions options;
35 IRuntimePtr runtime(IRuntime::Create(options));
36
37 // Builds up the structure of the network.
38 INetworkPtr net(INetwork::Create());
39
40 IConnectableLayer* input = net->AddInputLayer(0);
41 IConnectableLayer* constant = net->AddConstantLayer(ConstTensor(commonTensorInfo, constantData));
42 IConnectableLayer* add = net->AddAdditionLayer();
43 IConnectableLayer* output = net->AddOutputLayer(0);
44
45 input->GetOutputSlot(0).Connect(add->GetInputSlot(0));
46 constant->GetOutputSlot(0).Connect(add->GetInputSlot(1));
47 add->GetOutputSlot(0).Connect(output->GetInputSlot(0));
48
49 // Sets the tensors in the network.
50 input->GetOutputSlot(0).SetTensorInfo(commonTensorInfo);
51 constant->GetOutputSlot(0).SetTensorInfo(commonTensorInfo);
52 add->GetOutputSlot(0).SetTensorInfo(commonTensorInfo);
53
54 // optimize the network
55 IOptimizedNetworkPtr optNet = Optimize(*net, computeDevice, runtime->GetDeviceSpec());
56
57 // Loads it into the runtime.
58 NetworkId netId;
59 runtime->LoadNetwork(netId, std::move(optNet));
60
61 // Creates structures for input & output.
62 std::vector<T> outputData(inputData.size());
63
64 InputTensors inputTensors
65 {
66 {0, ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData.data())}
67 };
68 OutputTensors outputTensors
69 {
70 {0, Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data())}
71 };
72
73 // Does the inference.
74 runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
75
76 // Checks the results.
77 return outputData == expectedOutputData;
78}
79
80inline bool ConstantUsageFloat32Test(const std::vector<BackendId>& backends)
81{
Cathal Corbett5b8093c2021-10-22 11:12:07 +010082 TensorInfo commonTensorInfo({ 2, 3 }, DataType::Float32);
83 commonTensorInfo.SetConstant(true);
Aron Virginas-Tar70104002018-10-24 15:33:28 +010084
85 return ConstantUsageTest(backends,
86 commonTensorInfo,
87 std::vector<float>{ 1.f, 2.f, 3.f, 4.f, 5.f, 6.f }, // Input.
88 std::vector<float>{ 6.f, 5.f, 4.f, 3.f, 2.f, 1.f }, // Const input.
89 std::vector<float>{ 7.f, 7.f, 7.f, 7.f, 7.f, 7.f } // Expected output.
90 );
91}
92
93inline bool ConstantUsageUint8Test(const std::vector<BackendId>& backends)
94{
Derek Lambertif90c56d2020-01-10 17:14:08 +000095 TensorInfo commonTensorInfo({ 2, 3 }, DataType::QAsymmU8);
Aron Virginas-Tar70104002018-10-24 15:33:28 +010096
97 const float scale = 0.023529f;
98 const int8_t offset = -43;
99
100 commonTensorInfo.SetQuantizationScale(scale);
101 commonTensorInfo.SetQuantizationOffset(offset);
Cathal Corbett5b8093c2021-10-22 11:12:07 +0100102 commonTensorInfo.SetConstant(true);
Aron Virginas-Tar70104002018-10-24 15:33:28 +0100103
104 return ConstantUsageTest(backends,
105 commonTensorInfo,
Aron Virginas-Tar48623a02019-10-22 10:00:28 +0100106 armnnUtils::QuantizedVector<uint8_t>({ 1.f, 2.f, 3.f, 4.f, 5.f, 6.f }, scale, offset), // Input.
107 armnnUtils::QuantizedVector<uint8_t>({ 6.f, 5.f, 4.f, 3.f, 2.f, 1.f }, scale, offset), // Const input.
108 armnnUtils::QuantizedVector<uint8_t>({ 7.f, 7.f, 7.f, 7.f, 7.f, 7.f }, scale, offset) // Expected output.
Aron Virginas-Tar70104002018-10-24 15:33:28 +0100109 );
110}
111
Ferran Balaguer83239f92019-09-19 11:49:25 +0100112// Utility function to find the number of instances of a substring within a string.
113int SubStringCounter(std::string& string, std::string&& substring)
114{
115 std::size_t found = 0;
116 int count = 0;
117 // Look for the substring starting from where we last found the substring
118 while((found = string.find(substring, found)) != std::string::npos)
119 {
120 count++;
121 // Offset by substring length to avoid finding the same substring twice
122 found += substring.length();
123 }
124 return count;
125}
126
Nattapat Chaimanowong1fcb4ff2019-01-24 15:25:26 +0000127template<DataType ArmnnIType, DataType ArmnnOType,
128 typename TInput = ResolveType<ArmnnIType>, typename TOutput = ResolveType<ArmnnOType>>
narpra01b9546cf2018-11-20 15:21:28 +0000129void EndToEndLayerTestImpl(INetworkPtr network,
kevmay012b4d88e2019-01-24 14:05:09 +0000130 const std::map<int, std::vector<TInput>>& inputTensorData,
131 const std::map<int, std::vector<TOutput>>& expectedOutputData,
Jan Eilersbca73e12020-03-11 12:52:46 +0000132 std::vector<BackendId> backends,
133 float tolerance = 0.000001f)
narpra01b9546cf2018-11-20 15:21:28 +0000134{
135 // Create runtime in which test will run
136 IRuntime::CreationOptions options;
137 IRuntimePtr runtime(IRuntime::Create(options));
138
139 // optimize the network
140 IOptimizedNetworkPtr optNet = Optimize(*network, backends, runtime->GetDeviceSpec());
141
142 // Loads it into the runtime.
143 NetworkId netId;
144 runtime->LoadNetwork(netId, std::move(optNet));
145
146 InputTensors inputTensors;
147 inputTensors.reserve(inputTensorData.size());
148 for (auto&& it : inputTensorData)
149 {
150 inputTensors.push_back({it.first,
151 ConstTensor(runtime->GetInputTensorInfo(netId, it.first), it.second.data())});
152 }
153 OutputTensors outputTensors;
154 outputTensors.reserve(expectedOutputData.size());
kevmay012b4d88e2019-01-24 14:05:09 +0000155 std::map<int, std::vector<TOutput>> outputStorage;
narpra01b9546cf2018-11-20 15:21:28 +0000156 for (auto&& it : expectedOutputData)
157 {
kevmay012b4d88e2019-01-24 14:05:09 +0000158 std::vector<TOutput> out(it.second.size());
narpra01b9546cf2018-11-20 15:21:28 +0000159 outputStorage.emplace(it.first, out);
160 outputTensors.push_back({it.first,
161 Tensor(runtime->GetOutputTensorInfo(netId, it.first),
162 outputStorage.at(it.first).data())});
163 }
164
165 // Does the inference.
166 runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
167
168 // Checks the results.
169 for (auto&& it : expectedOutputData)
170 {
kevmay012b4d88e2019-01-24 14:05:09 +0000171 std::vector<TOutput> out = outputStorage.at(it.first);
Aron Virginas-Tarf97f6da2019-10-01 18:35:44 +0100172 for (unsigned int i = 0; i < out.size(); ++i)
Nattapat Chaimanowong1fcb4ff2019-01-24 15:25:26 +0000173 {
Sadik Armagan1625efc2021-06-10 18:24:34 +0100174 CHECK_MESSAGE(Compare<ArmnnOType>(it.second[i], out[i], tolerance) == true,
Teresa Charlin2e3f4d22020-07-29 14:29:20 +0100175 "Actual output: " << out[i] << ". Expected output:" << it.second[i]);
176
Nattapat Chaimanowong1fcb4ff2019-01-24 15:25:26 +0000177 }
narpra01b9546cf2018-11-20 15:21:28 +0000178 }
179}
180
David Monahan4f1e8e42019-09-04 09:22:10 +0100181inline void ImportNonAlignedInputPointerTest(std::vector<BackendId> backends)
Ferran Balaguerdcaa6102019-08-21 13:28:38 +0100182{
183 using namespace armnn;
184
185 // Create runtime in which test will run
186 IRuntime::CreationOptions options;
187 IRuntimePtr runtime(armnn::IRuntime::Create(options));
188
189 // build up the structure of the network
190 INetworkPtr net(INetwork::Create());
191
192 IConnectableLayer* input = net->AddInputLayer(0);
193
David Monahan3fb7e102019-08-20 11:25:29 +0100194 ActivationDescriptor descriptor;
195 descriptor.m_Function = ActivationFunction::Square;
196 IConnectableLayer* pooling = net->AddActivationLayer(descriptor);
Ferran Balaguerdcaa6102019-08-21 13:28:38 +0100197
198 IConnectableLayer* output = net->AddOutputLayer(0);
199
David Monahan3fb7e102019-08-20 11:25:29 +0100200 input->GetOutputSlot(0).Connect(pooling->GetInputSlot(0));
201 pooling->GetOutputSlot(0).Connect(output->GetInputSlot(0));
Ferran Balaguerdcaa6102019-08-21 13:28:38 +0100202
Cathal Corbett5b8093c2021-10-22 11:12:07 +0100203 input->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32, 0.0f, 0, true));
David Monahan3fb7e102019-08-20 11:25:29 +0100204 pooling->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32));
Ferran Balaguerdcaa6102019-08-21 13:28:38 +0100205
206 // Optimize the network
Francis Murtagh626bd902022-06-21 13:16:23 +0000207 OptimizerOptions optimizedOptions;
208 optimizedOptions.m_ImportEnabled = true;
209 IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec(), optimizedOptions);
Sadik Armagan1625efc2021-06-10 18:24:34 +0100210 CHECK(optNet);
Ferran Balaguerdcaa6102019-08-21 13:28:38 +0100211
212 // Loads it into the runtime.
213 NetworkId netId;
David Monahan4f1e8e42019-09-04 09:22:10 +0100214 std::string ignoredErrorMessage;
215 // Enable Importing
Francis Murtagh73d3e2e2021-04-29 14:23:04 +0100216 INetworkProperties networkProperties(false, MemorySource::Malloc, MemorySource::Undefined);
David Monahan4f1e8e42019-09-04 09:22:10 +0100217 runtime->LoadNetwork(netId, std::move(optNet), ignoredErrorMessage, networkProperties);
Ferran Balaguerdcaa6102019-08-21 13:28:38 +0100218
219 // Creates structures for input & output
220 std::vector<float> inputData
221 {
David Monahan3fb7e102019-08-20 11:25:29 +0100222 1.0f, 2.0f, 3.0f, 4.0f
Ferran Balaguerdcaa6102019-08-21 13:28:38 +0100223 };
224
225 // Misaligned input
Aron Virginas-Tard9f7c8b2019-09-13 13:37:03 +0100226 float* misalignedInputData = reinterpret_cast<float*>(reinterpret_cast<char*>(inputData.data()) + 1);
Ferran Balaguerdcaa6102019-08-21 13:28:38 +0100227
David Monahan3fb7e102019-08-20 11:25:29 +0100228 std::vector<float> outputData(4);
Ferran Balaguerdcaa6102019-08-21 13:28:38 +0100229
David Monahan4f1e8e42019-09-04 09:22:10 +0100230 // Aligned output
David Monahan3fb7e102019-08-20 11:25:29 +0100231 float* alignedOutputData = outputData.data();
David Monahan4f1e8e42019-09-04 09:22:10 +0100232
233 InputTensors inputTensors
234 {
235 {0,armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), misalignedInputData)},
236 };
237 OutputTensors outputTensors
238 {
239 {0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), alignedOutputData)}
240 };
241
David Monahan4f1e8e42019-09-04 09:22:10 +0100242 runtime->GetProfiler(netId)->EnableProfiling(true);
243
244 // Do the inference and expect it to fail with a ImportMemoryException
Sadik Armagan1625efc2021-06-10 18:24:34 +0100245 CHECK_THROWS_AS(runtime->EnqueueWorkload(netId, inputTensors, outputTensors), MemoryImportException);
David Monahan4f1e8e42019-09-04 09:22:10 +0100246}
247
Ferran Balaguer83239f92019-09-19 11:49:25 +0100248inline void ExportNonAlignedOutputPointerTest(std::vector<BackendId> backends)
David Monahan4f1e8e42019-09-04 09:22:10 +0100249{
250 using namespace armnn;
251
252 // Create runtime in which test will run
253 IRuntime::CreationOptions options;
254 IRuntimePtr runtime(armnn::IRuntime::Create(options));
255
256 // build up the structure of the network
257 INetworkPtr net(INetwork::Create());
258
259 IConnectableLayer* input = net->AddInputLayer(0);
260
David Monahan3fb7e102019-08-20 11:25:29 +0100261 ActivationDescriptor descriptor;
262 descriptor.m_Function = ActivationFunction::Square;
263 IConnectableLayer* pooling = net->AddActivationLayer(descriptor);
David Monahan4f1e8e42019-09-04 09:22:10 +0100264
265 IConnectableLayer* output = net->AddOutputLayer(0);
266
David Monahan3fb7e102019-08-20 11:25:29 +0100267 input->GetOutputSlot(0).Connect(pooling->GetInputSlot(0));
268 pooling->GetOutputSlot(0).Connect(output->GetInputSlot(0));
David Monahan4f1e8e42019-09-04 09:22:10 +0100269
Cathal Corbett5b8093c2021-10-22 11:12:07 +0100270 input->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32, 0.0f, 0, true));
David Monahan3fb7e102019-08-20 11:25:29 +0100271 pooling->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32));
David Monahan4f1e8e42019-09-04 09:22:10 +0100272
273 // Optimize the network
Francis Murtagh626bd902022-06-21 13:16:23 +0000274 OptimizerOptions optimizedOptions;
275 optimizedOptions.m_ImportEnabled = true;
276 optimizedOptions.m_ExportEnabled = true;
277 IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec(), optimizedOptions);
Sadik Armagan1625efc2021-06-10 18:24:34 +0100278 CHECK(optNet);
David Monahan4f1e8e42019-09-04 09:22:10 +0100279
280 // Loads it into the runtime.
281 NetworkId netId;
282 std::string ignoredErrorMessage;
David Monahan3fb7e102019-08-20 11:25:29 +0100283 // Enable Importing and Exporting
Francis Murtagh73d3e2e2021-04-29 14:23:04 +0100284 INetworkProperties networkProperties(false, MemorySource::Malloc, MemorySource::Malloc);
David Monahan4f1e8e42019-09-04 09:22:10 +0100285 runtime->LoadNetwork(netId, std::move(optNet), ignoredErrorMessage, networkProperties);
286
287 // Creates structures for input & output
288 std::vector<float> inputData
289 {
290 1.0f, 2.0f, 3.0f, 4.0f, 5.0f
291 };
292
293 // Aligned input
David Monahan3fb7e102019-08-20 11:25:29 +0100294 float* alignedInputData = inputData.data();
David Monahan4f1e8e42019-09-04 09:22:10 +0100295
296 std::vector<float> outputData(5);
297
Ferran Balaguerdcaa6102019-08-21 13:28:38 +0100298 // Misaligned output
Aron Virginas-Tard9f7c8b2019-09-13 13:37:03 +0100299 float* misalignedOutputData = reinterpret_cast<float*>(reinterpret_cast<char*>(outputData.data()) + 1);
Ferran Balaguerdcaa6102019-08-21 13:28:38 +0100300
301 InputTensors inputTensors
302 {
David Monahan4f1e8e42019-09-04 09:22:10 +0100303 {0,armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), alignedInputData)},
Ferran Balaguerdcaa6102019-08-21 13:28:38 +0100304 };
305 OutputTensors outputTensors
306 {
307 {0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), misalignedOutputData)}
308 };
309
Ferran Balaguer83239f92019-09-19 11:49:25 +0100310 // Do the inference and expect it to fail with a ExportMemoryException
311 if (backends[0] == Compute::CpuAcc)
312 {
313 // For CpuAcc the NeonTensorHandle will throw its own exception on misaligned memory
Sadik Armagan1625efc2021-06-10 18:24:34 +0100314 CHECK_THROWS_AS(runtime->EnqueueWorkload(netId, inputTensors, outputTensors), MemoryImportException);
Ferran Balaguer83239f92019-09-19 11:49:25 +0100315 }
316 else
317 {
Sadik Armagan1625efc2021-06-10 18:24:34 +0100318 CHECK_THROWS_AS(runtime->EnqueueWorkload(netId, inputTensors, outputTensors), MemoryExportException);
Ferran Balaguer83239f92019-09-19 11:49:25 +0100319 }
Ferran Balaguerdcaa6102019-08-21 13:28:38 +0100320}
321
322inline void ImportAlignedPointerTest(std::vector<BackendId> backends)
323{
324 using namespace armnn;
325
326 // Create runtime in which test will run
327 IRuntime::CreationOptions options;
328 IRuntimePtr runtime(armnn::IRuntime::Create(options));
329
330 // build up the structure of the network
331 INetworkPtr net(INetwork::Create());
332
333 IConnectableLayer* input = net->AddInputLayer(0);
334
David Monahan3fb7e102019-08-20 11:25:29 +0100335 ActivationDescriptor descriptor;
336 descriptor.m_Function = ActivationFunction::Square;
337 IConnectableLayer* pooling = net->AddActivationLayer(descriptor);
Ferran Balaguerdcaa6102019-08-21 13:28:38 +0100338
339 IConnectableLayer* output = net->AddOutputLayer(0);
340
David Monahan3fb7e102019-08-20 11:25:29 +0100341 input->GetOutputSlot(0).Connect(pooling->GetInputSlot(0));
342 pooling->GetOutputSlot(0).Connect(output->GetInputSlot(0));
Ferran Balaguerdcaa6102019-08-21 13:28:38 +0100343
Cathal Corbett5b8093c2021-10-22 11:12:07 +0100344 input->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32, 0.0f, 0, true));
David Monahan3fb7e102019-08-20 11:25:29 +0100345 pooling->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32));
Ferran Balaguerdcaa6102019-08-21 13:28:38 +0100346
347 // Optimize the network
Francis Murtagh626bd902022-06-21 13:16:23 +0000348 OptimizerOptions optimizedOptions;
349 optimizedOptions.m_ImportEnabled = true;
350 optimizedOptions.m_ExportEnabled = true;
351 IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec(), optimizedOptions);
Sadik Armagan1625efc2021-06-10 18:24:34 +0100352 CHECK(optNet);
Ferran Balaguerdcaa6102019-08-21 13:28:38 +0100353
354 // Loads it into the runtime.
355 NetworkId netId;
David Monahan4f1e8e42019-09-04 09:22:10 +0100356 std::string ignoredErrorMessage;
357 // Enable Importing
Francis Murtagh73d3e2e2021-04-29 14:23:04 +0100358 INetworkProperties networkProperties(false, MemorySource::Malloc, MemorySource::Malloc);
David Monahan4f1e8e42019-09-04 09:22:10 +0100359 runtime->LoadNetwork(netId, std::move(optNet), ignoredErrorMessage, networkProperties);
Ferran Balaguerdcaa6102019-08-21 13:28:38 +0100360
361 // Creates structures for input & output
362 std::vector<float> inputData
363 {
364 1.0f, 2.0f, 3.0f, 4.0f
365 };
366
367 std::vector<float> outputData(4);
368
James Conroy57d10b72019-10-25 09:44:14 +0100369 std::vector<float> expectedOutput
370 {
371 1.0f, 4.0f, 9.0f, 16.0f
372 };
373
Ferran Balaguerdcaa6102019-08-21 13:28:38 +0100374 InputTensors inputTensors
375 {
376 {0,armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData.data())},
377 };
378 OutputTensors outputTensors
379 {
380 {0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data())}
381 };
382
Ferran Balaguerdcaa6102019-08-21 13:28:38 +0100383 runtime->GetProfiler(netId)->EnableProfiling(true);
384
385 // Do the inference
386 runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
387
388 // Retrieve the Profiler.Print() output to get the workload execution
389 ProfilerManager& profilerManager = armnn::ProfilerManager::GetInstance();
390 std::stringstream ss;
David Monahan16829712022-02-03 17:04:59 +0000391 profilerManager.GetProfiler()->Print(ss);
Ferran Balaguerdcaa6102019-08-21 13:28:38 +0100392 std::string dump = ss.str();
393
David Monahan3fb7e102019-08-20 11:25:29 +0100394 // Contains ActivationWorkload
395 std::size_t found = dump.find("ActivationWorkload");
Sadik Armagan1625efc2021-06-10 18:24:34 +0100396 CHECK(found != std::string::npos);
James Conroy57d10b72019-10-25 09:44:14 +0100397
Ferran Balaguerdcaa6102019-08-21 13:28:38 +0100398 // Contains SyncMemGeneric
399 found = dump.find("SyncMemGeneric");
Sadik Armagan1625efc2021-06-10 18:24:34 +0100400 CHECK(found != std::string::npos);
James Conroy57d10b72019-10-25 09:44:14 +0100401
Ferran Balaguer83239f92019-09-19 11:49:25 +0100402 // Does not contain CopyMemGeneric
Ferran Balaguerdcaa6102019-08-21 13:28:38 +0100403 found = dump.find("CopyMemGeneric");
Sadik Armagan1625efc2021-06-10 18:24:34 +0100404 CHECK(found == std::string::npos);
James Conroy57d10b72019-10-25 09:44:14 +0100405
406 // Check output is as expected
Sadik Armagan1625efc2021-06-10 18:24:34 +0100407 CHECK(outputData == expectedOutput);
Ferran Balaguerdcaa6102019-08-21 13:28:38 +0100408}
409
Ferran Balaguer83239f92019-09-19 11:49:25 +0100410inline void ImportOnlyWorkload(std::vector<BackendId> backends)
411{
412 using namespace armnn;
413
414 IRuntime::CreationOptions options;
415 IRuntimePtr runtime(IRuntime::Create(options));
416
417 // Builds up the structure of the network.
418 INetworkPtr net(INetwork::Create());
419
420 IConnectableLayer* input = net->AddInputLayer(0);
421
422 ActivationDescriptor descriptor;
423 descriptor.m_Function = ActivationFunction::Square;
424 IConnectableLayer* pooling = net->AddActivationLayer(descriptor);
425
426 IConnectableLayer* output = net->AddOutputLayer(0);
427
428 input->GetOutputSlot(0).Connect(pooling->GetInputSlot(0));
429 pooling->GetOutputSlot(0).Connect(output->GetInputSlot(0));
430
Cathal Corbett5b8093c2021-10-22 11:12:07 +0100431 input->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32, 0.0f, 0, true));
Ferran Balaguer83239f92019-09-19 11:49:25 +0100432 pooling->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32));
433
434 // optimize the network
Francis Murtagh626bd902022-06-21 13:16:23 +0000435 OptimizerOptions optimizedOptions;
436 optimizedOptions.m_ImportEnabled = true;
437 IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec(), optimizedOptions);
Ferran Balaguer83239f92019-09-19 11:49:25 +0100438
Sadik Armagan1625efc2021-06-10 18:24:34 +0100439 INFO("Load Network");
Ferran Balaguer83239f92019-09-19 11:49:25 +0100440 // Load it into the runtime. It should pass.
441 NetworkId netId;
442 std::string ignoredErrorMessage;
Francis Murtagh73d3e2e2021-04-29 14:23:04 +0100443
444 INetworkProperties networkProperties(false, MemorySource::Malloc, MemorySource::Undefined);
445
Sadik Armagan1625efc2021-06-10 18:24:34 +0100446 CHECK(runtime->LoadNetwork(netId, std::move(optNet),ignoredErrorMessage, networkProperties)
Ferran Balaguer83239f92019-09-19 11:49:25 +0100447 == Status::Success);
448
Sadik Armagan1625efc2021-06-10 18:24:34 +0100449 INFO("Generate Data");
Ferran Balaguer83239f92019-09-19 11:49:25 +0100450 // Creates structures for input & output
451 std::vector<float> inputData
452 {
453 1.0f, 2.0f, 3.0f, 4.0f
454 };
455
456 std::vector<float> outputData(4);
457
458 std::vector<float> expectedOutput
459 {
460 1.0f, 4.0f, 9.0f, 16.0f
461 };
462
David Monahan646bc8a2022-01-31 14:29:14 +0000463 INFO("Create Inference");
Cathal Corbett5b8093c2021-10-22 11:12:07 +0100464
Ferran Balaguer83239f92019-09-19 11:49:25 +0100465 InputTensors inputTensors
466 {
467 {0,armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData.data())},
468 };
469 OutputTensors outputTensors
470 {
471 {0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data())}
472 };
473
Sadik Armagan1625efc2021-06-10 18:24:34 +0100474 INFO("Get Profiler");
Ferran Balaguer83239f92019-09-19 11:49:25 +0100475 runtime->GetProfiler(netId)->EnableProfiling(true);
476
Sadik Armagan1625efc2021-06-10 18:24:34 +0100477 INFO("Run Inference");
Ferran Balaguer83239f92019-09-19 11:49:25 +0100478 // Do the inference
479 runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
480
Sadik Armagan1625efc2021-06-10 18:24:34 +0100481 INFO("Print Profiler");
Ferran Balaguer83239f92019-09-19 11:49:25 +0100482 // Retrieve the Profiler.Print() output to get the workload execution
483 ProfilerManager& profilerManager = armnn::ProfilerManager::GetInstance();
484 std::stringstream ss;
David Monahan16829712022-02-03 17:04:59 +0000485 profilerManager.GetProfiler()->Print(ss);
Ferran Balaguer83239f92019-09-19 11:49:25 +0100486 std::string dump = ss.str();
487
488 // Check there are no SyncMemGeneric workloads as we didn't export
Sadik Armagan1625efc2021-06-10 18:24:34 +0100489 INFO("Find SyncMemGeneric");
Ferran Balaguer83239f92019-09-19 11:49:25 +0100490 int count = SubStringCounter(dump, "SyncMemGeneric");
Sadik Armagan1625efc2021-06-10 18:24:34 +0100491 CHECK(count == 0);
Ferran Balaguer83239f92019-09-19 11:49:25 +0100492
493 // Should only be 1 CopyMemGeneric for the output as we imported
Sadik Armagan1625efc2021-06-10 18:24:34 +0100494 INFO("Find CopyMemGeneric");
Ferran Balaguer83239f92019-09-19 11:49:25 +0100495 count = SubStringCounter(dump, "CopyMemGeneric");
Sadik Armagan1625efc2021-06-10 18:24:34 +0100496 CHECK(count == 1);
Ferran Balaguer83239f92019-09-19 11:49:25 +0100497
498 // Check the output is correct
Sadik Armagan1625efc2021-06-10 18:24:34 +0100499 CHECK(std::equal(outputData.begin(), outputData.end(), expectedOutput.begin(), expectedOutput.end()));
Ferran Balaguer83239f92019-09-19 11:49:25 +0100500}
501
502inline void ExportOnlyWorkload(std::vector<BackendId> backends)
503{
504 using namespace armnn;
505
506 IRuntime::CreationOptions options;
507 IRuntimePtr runtime(IRuntime::Create(options));
508
509 // Builds up the structure of the network.
510 INetworkPtr net(INetwork::Create());
511
512 IConnectableLayer* input = net->AddInputLayer(0);
513
514 ActivationDescriptor descriptor;
515 descriptor.m_Function = ActivationFunction::Square;
516 IConnectableLayer* pooling = net->AddActivationLayer(descriptor);
517
518 IConnectableLayer* output = net->AddOutputLayer(0);
519
520 input->GetOutputSlot(0).Connect(pooling->GetInputSlot(0));
521 pooling->GetOutputSlot(0).Connect(output->GetInputSlot(0));
522
Cathal Corbett5b8093c2021-10-22 11:12:07 +0100523 input->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32, 0.0f, 0, true));
Ferran Balaguer83239f92019-09-19 11:49:25 +0100524 pooling->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32));
525
526 // optimize the network
Francis Murtagh626bd902022-06-21 13:16:23 +0000527 OptimizerOptions optimizedOptions;
528 optimizedOptions.m_ExportEnabled = true;
529 IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec(), optimizedOptions);
Ferran Balaguer83239f92019-09-19 11:49:25 +0100530
Sadik Armagan1625efc2021-06-10 18:24:34 +0100531 INFO("Load Network");
Ferran Balaguer83239f92019-09-19 11:49:25 +0100532 // Load it into the runtime. It should pass.
533 NetworkId netId;
534 std::string ignoredErrorMessage;
Francis Murtagh73d3e2e2021-04-29 14:23:04 +0100535 INetworkProperties networkProperties(false, MemorySource::Undefined, MemorySource::Malloc);
Sadik Armagan1625efc2021-06-10 18:24:34 +0100536 CHECK(runtime->LoadNetwork(netId, std::move(optNet),ignoredErrorMessage, networkProperties)
Ferran Balaguer83239f92019-09-19 11:49:25 +0100537 == Status::Success);
538
Sadik Armagan1625efc2021-06-10 18:24:34 +0100539 INFO("Generate Data");
Ferran Balaguer83239f92019-09-19 11:49:25 +0100540 // Creates structures for input & output
541 std::vector<float> inputData
542 {
543 1.0f, 2.0f, 3.0f, 4.0f
544 };
545
546 std::vector<float> outputData(4);
547
548 std::vector<float> expectedOutput
549 {
550 1.0f, 4.0f, 9.0f, 16.0f
551 };
552
David Monahan646bc8a2022-01-31 14:29:14 +0000553 INFO("Create Inference");
Cathal Corbett5b8093c2021-10-22 11:12:07 +0100554
Ferran Balaguer83239f92019-09-19 11:49:25 +0100555 InputTensors inputTensors
556 {
557 {0,armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData.data())},
558 };
559 OutputTensors outputTensors
560 {
561 {0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data())}
562 };
563
Sadik Armagan1625efc2021-06-10 18:24:34 +0100564 INFO("Get Profiler");
Ferran Balaguer83239f92019-09-19 11:49:25 +0100565 runtime->GetProfiler(netId)->EnableProfiling(true);
566
Sadik Armagan1625efc2021-06-10 18:24:34 +0100567 INFO("Run Inference");
Ferran Balaguer83239f92019-09-19 11:49:25 +0100568 // Do the inference
569 runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
570
Sadik Armagan1625efc2021-06-10 18:24:34 +0100571 INFO("Print Profiler");
Ferran Balaguer83239f92019-09-19 11:49:25 +0100572 // Retrieve the Profiler.Print() output to get the workload execution
573 ProfilerManager& profilerManager = armnn::ProfilerManager::GetInstance();
574 std::stringstream ss;
David Monahan16829712022-02-03 17:04:59 +0000575 profilerManager.GetProfiler()->Print(ss);
Ferran Balaguer83239f92019-09-19 11:49:25 +0100576 std::string dump = ss.str();
577
578 // Check there is a SyncMemGeneric workload as we exported
Sadik Armagan1625efc2021-06-10 18:24:34 +0100579 INFO("Find SyncMemGeneric");
Ferran Balaguer83239f92019-09-19 11:49:25 +0100580 int count = SubStringCounter(dump, "SyncMemGeneric");
Sadik Armagan1625efc2021-06-10 18:24:34 +0100581 CHECK(count == 1);
Ferran Balaguer83239f92019-09-19 11:49:25 +0100582
583 // Should be 1 CopyMemGeneric for the output as we did not import
Sadik Armagan1625efc2021-06-10 18:24:34 +0100584 INFO("Find CopyMemGeneric");
Ferran Balaguer83239f92019-09-19 11:49:25 +0100585 count = SubStringCounter(dump, "CopyMemGeneric");
Sadik Armagan1625efc2021-06-10 18:24:34 +0100586 CHECK(count == 1);
Ferran Balaguer83239f92019-09-19 11:49:25 +0100587
588 // Check the output is correct
Sadik Armagan1625efc2021-06-10 18:24:34 +0100589 CHECK(std::equal(outputData.begin(), outputData.end(), expectedOutput.begin(), expectedOutput.end()));
Ferran Balaguer83239f92019-09-19 11:49:25 +0100590}
591
592inline void ImportAndExportWorkload(std::vector<BackendId> backends)
593{
594 using namespace armnn;
595
596 IRuntime::CreationOptions options;
597 IRuntimePtr runtime(IRuntime::Create(options));
598
599 // Builds up the structure of the network.
600 INetworkPtr net(INetwork::Create());
601
602 IConnectableLayer* input = net->AddInputLayer(0);
603
604 ActivationDescriptor descriptor;
605 descriptor.m_Function = ActivationFunction::Square;
606 IConnectableLayer* pooling = net->AddActivationLayer(descriptor);
607
608 IConnectableLayer* output = net->AddOutputLayer(0);
609
610 input->GetOutputSlot(0).Connect(pooling->GetInputSlot(0));
611 pooling->GetOutputSlot(0).Connect(output->GetInputSlot(0));
612
Cathal Corbett5b8093c2021-10-22 11:12:07 +0100613 input->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32, 0.0f, 0, true));
Ferran Balaguer83239f92019-09-19 11:49:25 +0100614 pooling->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32));
615
Francis Murtagh626bd902022-06-21 13:16:23 +0000616 OptimizerOptions optimizedOptions;
617 optimizedOptions.m_ImportEnabled = true;
618 optimizedOptions.m_ExportEnabled = true;
619 IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec(), optimizedOptions);
Ferran Balaguer83239f92019-09-19 11:49:25 +0100620
Sadik Armagan1625efc2021-06-10 18:24:34 +0100621 INFO("Load Network");
Ferran Balaguer83239f92019-09-19 11:49:25 +0100622 // Load it into the runtime. It should pass.
623 NetworkId netId;
624 std::string ignoredErrorMessage;
Francis Murtagh73d3e2e2021-04-29 14:23:04 +0100625
626 INetworkProperties networkProperties(false, MemorySource::Malloc, MemorySource::Malloc);
627
Sadik Armagan1625efc2021-06-10 18:24:34 +0100628 CHECK(runtime->LoadNetwork(netId, std::move(optNet),ignoredErrorMessage, networkProperties)
Ferran Balaguer83239f92019-09-19 11:49:25 +0100629 == Status::Success);
630
Sadik Armagan1625efc2021-06-10 18:24:34 +0100631 INFO("Generate Data");
Ferran Balaguer83239f92019-09-19 11:49:25 +0100632 // Creates structures for input & output
633 std::vector<float> inputData
634 {
635 1.0f, 2.0f, 3.0f, 4.0f
636 };
637
638 std::vector<float> outputData(4);
639
640 std::vector<float> expectedOutput
641 {
642 1.0f, 4.0f, 9.0f, 16.0f
643 };
644
David Monahan646bc8a2022-01-31 14:29:14 +0000645 INFO("Create inference");
Cathal Corbett5b8093c2021-10-22 11:12:07 +0100646
Ferran Balaguer83239f92019-09-19 11:49:25 +0100647 InputTensors inputTensors
648 {
649 {0,armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData.data())},
650 };
651 OutputTensors outputTensors
652 {
653 {0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data())}
654 };
655
Sadik Armagan1625efc2021-06-10 18:24:34 +0100656 INFO("Get Profiler");
Ferran Balaguer83239f92019-09-19 11:49:25 +0100657 runtime->GetProfiler(netId)->EnableProfiling(true);
658
Sadik Armagan1625efc2021-06-10 18:24:34 +0100659 INFO("Run Inference");
Ferran Balaguer83239f92019-09-19 11:49:25 +0100660 // Do the inference
661 runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
662
Sadik Armagan1625efc2021-06-10 18:24:34 +0100663 INFO("Print Profiler");
Ferran Balaguer83239f92019-09-19 11:49:25 +0100664 // Retrieve the Profiler.Print() output to get the workload execution
665 ProfilerManager& profilerManager = armnn::ProfilerManager::GetInstance();
666 std::stringstream ss;
David Monahan16829712022-02-03 17:04:59 +0000667 profilerManager.GetProfiler()->Print(ss);
Ferran Balaguer83239f92019-09-19 11:49:25 +0100668 std::string dump = ss.str();
669
670 // Check there is a SyncMemGeneric workload as we exported
Sadik Armagan1625efc2021-06-10 18:24:34 +0100671 INFO("Find SyncMemGeneric");
Ferran Balaguer83239f92019-09-19 11:49:25 +0100672 int count = SubStringCounter(dump, "SyncMemGeneric");
Sadik Armagan1625efc2021-06-10 18:24:34 +0100673 CHECK(count == 1);
Ferran Balaguer83239f92019-09-19 11:49:25 +0100674
675 // Shouldn't be any CopyMemGeneric workloads
Sadik Armagan1625efc2021-06-10 18:24:34 +0100676 INFO("Find CopyMemGeneric");
Ferran Balaguer83239f92019-09-19 11:49:25 +0100677 count = SubStringCounter(dump, "CopyMemGeneric");
Sadik Armagan1625efc2021-06-10 18:24:34 +0100678 CHECK(count == 0);
Ferran Balaguer83239f92019-09-19 11:49:25 +0100679
680 // Check the output is correct
Sadik Armagan1625efc2021-06-10 18:24:34 +0100681 CHECK(std::equal(outputData.begin(), outputData.end(), expectedOutput.begin(), expectedOutput.end()));
Ferran Balaguer83239f92019-09-19 11:49:25 +0100682}
683
684inline void ExportOutputWithSeveralOutputSlotConnectionsTest(std::vector<BackendId> backends)
685{
686 using namespace armnn;
687
688 // Create runtime in which test will run
689 IRuntime::CreationOptions options;
690 IRuntimePtr runtime(armnn::IRuntime::Create(options));
691
692 // build up the structure of the network
693 INetworkPtr net(INetwork::Create());
694
695 IConnectableLayer* input = net->AddInputLayer(0);
696
697 ActivationDescriptor descriptor;
698 descriptor.m_Function = ActivationFunction::Square;
699 IConnectableLayer* activation = net->AddActivationLayer(descriptor);
700
701 IConnectableLayer* output0 = net->AddOutputLayer(0);
702 IConnectableLayer* output1 = net->AddOutputLayer(1);
703
704 input->GetOutputSlot(0).Connect(activation->GetInputSlot(0));
705 activation->GetOutputSlot(0).Connect(output0->GetInputSlot(0));
706 activation->GetOutputSlot(0).Connect(output1->GetInputSlot(0));
707
Cathal Corbett5b8093c2021-10-22 11:12:07 +0100708 input->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 4, 1 }, DataType::Float32, 0.0f, 0, true));
Ferran Balaguer83239f92019-09-19 11:49:25 +0100709 activation->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 4, 1 }, DataType::Float32));
710
711 // Optimize the network
Francis Murtagh626bd902022-06-21 13:16:23 +0000712 OptimizerOptions optimizedOptions;
713 optimizedOptions.m_ImportEnabled = true;
714 optimizedOptions.m_ExportEnabled = true;
715 IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec(), optimizedOptions);
Ferran Balaguer83239f92019-09-19 11:49:25 +0100716
717 // Loads it into the runtime.
718 NetworkId netId;
719 std::string ignoredErrorMessage;
720 // Enable Importing
Francis Murtagh73d3e2e2021-04-29 14:23:04 +0100721 INetworkProperties networkProperties(false, MemorySource::Malloc, MemorySource::Malloc);
Ferran Balaguer83239f92019-09-19 11:49:25 +0100722 runtime->LoadNetwork(netId, std::move(optNet), ignoredErrorMessage, networkProperties);
723
724 // Creates structures for input & output
725 std::vector<float> inputData
726 {
727 1.0f, 2.0f, 3.0f, 4.0f
728 };
729
730 std::vector<float> outputData0(4);
731 std::vector<float> outputData1(4);
732
Narumol Prangnawarat3b90af62020-06-26 11:00:21 +0100733 std::vector<float> expectedOutput
734 {
735 1.0f, 4.0f, 9.0f, 16.0f
736 };
737
Ferran Balaguer83239f92019-09-19 11:49:25 +0100738 InputTensors inputTensors
739 {
740 {0,armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData.data())},
741 };
742 OutputTensors outputTensors
743 {
744 {0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData0.data())},
745 {1,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 1), outputData1.data())}
746 };
747
748 // The result of the inference is not important, just the fact that there
749 // should not be CopyMemGeneric workloads.
750 runtime->GetProfiler(netId)->EnableProfiling(true);
751
752 // Do the inference
753 runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
754
755 // Retrieve the Profiler.Print() output to get the workload execution
756 ProfilerManager& profilerManager = armnn::ProfilerManager::GetInstance();
757 std::stringstream ss;
758 profilerManager.GetProfiler()->Print(ss);
759 std::string dump = ss.str();
760
761 std::size_t found = std::string::npos;
762
763 if (backends[0] == Compute::CpuRef)
764 {
765 found = dump.find("RefActivationWorkload");
766 }
767 else if (backends[0] == Compute::CpuAcc)
768 {
769 found = dump.find("NeonActivationWorkload");
770 }
771 else if (backends[0] == Compute::GpuAcc)
772 {
773 found = dump.find("ClActivationWorkload");
774 }
775
Sadik Armagan1625efc2021-06-10 18:24:34 +0100776 CHECK(found != std::string::npos);
Ferran Balaguer83239f92019-09-19 11:49:25 +0100777 // No contains SyncMemGeneric
778 found = dump.find("SyncMemGeneric");
Sadik Armagan1625efc2021-06-10 18:24:34 +0100779 CHECK(found == std::string::npos);
Ferran Balaguer83239f92019-09-19 11:49:25 +0100780 // Contains CopyMemGeneric
781 found = dump.find("CopyMemGeneric");
Sadik Armagan1625efc2021-06-10 18:24:34 +0100782 CHECK(found != std::string::npos);
Narumol Prangnawarat3b90af62020-06-26 11:00:21 +0100783
784 // Check that the outputs are correct
Sadik Armagan1625efc2021-06-10 18:24:34 +0100785 CHECK(std::equal(outputData0.begin(), outputData0.end(),
786 expectedOutput.begin(), expectedOutput.end()));
787 CHECK(std::equal(outputData1.begin(), outputData1.end(),
788 expectedOutput.begin(), expectedOutput.end()));
Ferran Balaguer83239f92019-09-19 11:49:25 +0100789}
790
David Monahan0a99a142020-03-13 07:52:54 +0000791inline void StridedSliceInvalidSliceEndToEndTest(std::vector<BackendId> backends)
792{
793 using namespace armnn;
794
795 // Create runtime in which test will run
796 IRuntime::CreationOptions options;
797 IRuntimePtr runtime(armnn::IRuntime::Create(options));
798
799 // build up the structure of the network
800 INetworkPtr net(INetwork::Create());
801
802 IConnectableLayer* input = net->AddInputLayer(0);
803
804 // Configure a strided slice with a stride the same size as the input but with a ShrinkAxisMask on the first
805 // dim of the output to make it too small to hold the specified slice.
806 StridedSliceDescriptor descriptor;
807 descriptor.m_Begin = {0, 0};
808 descriptor.m_End = {2, 3};
809 descriptor.m_Stride = {1, 1};
810 descriptor.m_BeginMask = 0;
811 descriptor.m_EndMask = 0;
812 descriptor.m_ShrinkAxisMask = 1;
813 IConnectableLayer* stridedSlice = net->AddStridedSliceLayer(descriptor);
814
815 IConnectableLayer* output0 = net->AddOutputLayer(0);
816
817 input->GetOutputSlot(0).Connect(stridedSlice->GetInputSlot(0));
818 stridedSlice->GetOutputSlot(0).Connect(output0->GetInputSlot(0));
819
Cathal Corbett5b8093c2021-10-22 11:12:07 +0100820 input->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 2, 3 }, DataType::Float32, 0.0f, 0, true));
David Monahan0a99a142020-03-13 07:52:54 +0000821 stridedSlice->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 3 }, DataType::Float32));
822
823 // Attempt to optimize the network and check that the correct exception is thrown
Sadik Armagan1625efc2021-06-10 18:24:34 +0100824 CHECK_THROWS_AS(Optimize(*net, backends, runtime->GetDeviceSpec()), armnn::LayerValidationException);
David Monahan0a99a142020-03-13 07:52:54 +0000825}
826
David Monahan646bc8a2022-01-31 14:29:14 +0000827inline void ForceImportWithAlignedBuffersEndToEndTest(std::vector<BackendId> backends)
828{
829 /**
830 * This test is similar to the Import tests above, we create a network with a square function and pass in a vector
831 * with 4 floats, square them. and validate the output. We then check the profiling logs to see if input/output
832 * tensors are copied (CopyMemGeneric) or imported (SyncMemGeneric)
833 * In this case all inputs and outputs should be imported
834 */
835 using namespace armnn;
836 IRuntime::CreationOptions options;
837 IRuntimePtr runtime(IRuntime::Create(options));
838
839 // Builds up the structure of the network.
840 INetworkPtr net(INetwork::Create());
841 IConnectableLayer* input = net->AddInputLayer(0);
842 ActivationDescriptor descriptor;
843 descriptor.m_Function = ActivationFunction::Square;
844 IConnectableLayer* activationLayer = net->AddActivationLayer(descriptor);
845 IConnectableLayer* output = net->AddOutputLayer(0);
846 input->GetOutputSlot(0).Connect(activationLayer->GetInputSlot(0));
847 activationLayer->GetOutputSlot(0).Connect(output->GetInputSlot(0));
848 input->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32, 0.0f, 0, true));
849 activationLayer->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32));
850 IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec());
851 INFO("Load Network");
852
853 // Load it into the runtime. It should pass.
854 NetworkId netId;
855 std::string ignoredErrorMessage;
856 INetworkProperties networkProperties(false, MemorySource::Undefined, MemorySource::Undefined);
857 CHECK(runtime->LoadNetwork(netId, std::move(optNet),ignoredErrorMessage, networkProperties)
858 == Status::Success);
859 INFO("Generate Data");
860
861 // Creates structures for input & output
862 std::vector<float> inputData
863 {
864 1.0f, 2.0f, 3.0f, 4.0f
865 };
866 std::vector<float> outputData(4);
867 std::vector<float> expectedOutput
868 {
869 1.0f, 4.0f, 9.0f, 16.0f
870 };
871
872 // Check our input and output pointers are actually aligned
873 uintptr_t alignment = GetDataTypeSize(DataType::Float32);
874 CHECK(!(reinterpret_cast<uintptr_t>(inputData.data()) % alignment));
875 CHECK(!(reinterpret_cast<uintptr_t>(outputData.data()) % alignment));
876
877 INFO("Create Inference");
878 InputTensors inputTensors
879 {
880 {0,armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData.data())},
881 };
882 OutputTensors outputTensors
883 {
884 {0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data())}
885 };
886
887 runtime->GetProfiler(netId)->EnableProfiling(true);
888 std::vector<ImportedInputId> importedInputIds =
889 runtime->ImportInputs(netId, inputTensors, MemorySource::Malloc);
Colm Doneland7ceec52022-07-06 12:09:05 +0100890 CHECK(importedInputIds.size() == 1);
David Monahan646bc8a2022-01-31 14:29:14 +0000891 std::vector<ImportedOutputId> importedOutputIds =
892 runtime->ImportOutputs(netId, outputTensors, MemorySource::Malloc);
Colm Doneland7ceec52022-07-06 12:09:05 +0100893 CHECK(importedOutputIds.size() == 1);
David Monahan646bc8a2022-01-31 14:29:14 +0000894 // Do the inference and force the import as the memory is aligned.
Colm Doneland7ceec52022-07-06 12:09:05 +0100895 runtime->EnqueueWorkload(netId, InputTensors(), OutputTensors(), importedInputIds, importedOutputIds);
David Monahan646bc8a2022-01-31 14:29:14 +0000896
897 // Retrieve the Profiler.Print() output to get the workload execution
898 ProfilerManager& profilerManager = armnn::ProfilerManager::GetInstance();
899 std::stringstream ss;
David Monahan16829712022-02-03 17:04:59 +0000900 profilerManager.GetProfiler()->Print(ss);
David Monahan646bc8a2022-01-31 14:29:14 +0000901 std::string dump = ss.str();
902
903 if (backends[0] == Compute::CpuAcc)
904 {
905 // Reconfigure has not been implemented for CpuAcc so it will always copy, this will break whenever
906 // reconfigure is implemented
907 int count = SubStringCounter(dump, "SyncMemGeneric");
908 CHECK(count == 0);
909 // Should be 2 CopyMemGeneric workloads
910 count = SubStringCounter(dump, "CopyMemGeneric");
911 CHECK(count == 2);
912 }
913 else
914 {
915 // Check there is a SyncMemGeneric workload as we exported
916 int count = SubStringCounter(dump, "SyncMemGeneric");
917 CHECK(count == 1);
918 // Shouldn't be any CopyMemGeneric workloads
919 count = SubStringCounter(dump, "CopyMemGeneric");
920 CHECK(count == 0);
921 }
922 // Check the output is correct
923 CHECK(std::equal(outputData.begin(), outputData.end(), expectedOutput.begin(), expectedOutput.end()));
924}
925
926inline void ForceImportWithMisalignedInputBuffersEndToEndTest(std::vector<BackendId> backends)
927{
928 /**
929 * This test is similar to the Import tests above, we create a network with a square function and pass in a vector
930 * with 4 floats, square them. and validate the output. We then check the profiling logs to see if input/output
931 * tensors are copied (CopyMemGeneric) or imported (SyncMemGeneric)
932 * In this case all only the output should be imported
933 */
934 using namespace armnn;
935
936 IRuntime::CreationOptions options;
937 IRuntimePtr runtime(IRuntime::Create(options));
938
939 // Builds up the structure of the network.
940 INetworkPtr net(INetwork::Create());
941 IConnectableLayer* input = net->AddInputLayer(0);
942
943 ActivationDescriptor descriptor;
944 descriptor.m_Function = ActivationFunction::Square;
945 IConnectableLayer* activationLayer = net->AddActivationLayer(descriptor);
946
947 IConnectableLayer* output = net->AddOutputLayer(0);
948
949 input->GetOutputSlot(0).Connect(activationLayer->GetInputSlot(0));
950 activationLayer->GetOutputSlot(0).Connect(output->GetInputSlot(0));
951 input->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32, 0.0f, 0, true));
952 activationLayer->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32));
953
954 IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec());
955 INFO("Load Network");
956 // Load it into the runtime. It should pass.
957 NetworkId netId;
958 std::string ignoredErrorMessage;
959 INetworkProperties networkProperties(false, MemorySource::Undefined, MemorySource::Undefined);
960 CHECK(runtime->LoadNetwork(netId, std::move(optNet),ignoredErrorMessage, networkProperties)
961 == Status::Success);
962 INFO("Generate Data");
963
964 // This code looks a little funky but the idea is to create a buffer of floats but offset by the size of a char
965 // this will guarantee that the resultant buffer is misaligned and thus should always be copied.
966 auto memPtr = std::malloc(4 * sizeof(float) + sizeof(char));
967
968 float* misalignedMemPtr = reinterpret_cast<float*>(reinterpret_cast<char*>(memPtr) + 1);
969
970 // Check if our pointer is truly misaligned
971 uintptr_t alignment = GetDataTypeSize(DataType::Float32);
972 CHECK (reinterpret_cast<uintptr_t>(misalignedMemPtr) % alignment);
973
Matthew Benthamc92bbd72022-02-10 11:12:34 +0000974 std::vector<float> inputData
David Monahan646bc8a2022-01-31 14:29:14 +0000975 {
Matthew Benthamc92bbd72022-02-10 11:12:34 +0000976 1.0f, 2.0f, 3.0f, 4.0f
977 };
978
979 std::memcpy(misalignedMemPtr, inputData.data(), 4*sizeof(float));
David Monahan646bc8a2022-01-31 14:29:14 +0000980
981 std::vector<float> outputData(4);
982 // Check our output buffer is aligned
983 CHECK(!(reinterpret_cast<uintptr_t>(outputData.data()) % alignment));
984
985 std::vector<float> expectedOutput
986 {
987 1.0f, 4.0f, 9.0f, 16.0f
988 };
989
990 INFO("Create Inference");
991 InputTensors inputTensors
992 {
993 {0,armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), misalignedMemPtr)},
994 };
995 OutputTensors outputTensors
996 {
997 {0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data())}
998 };
999 runtime->GetProfiler(netId)->EnableProfiling(true);
1000 std::vector<ImportedInputId> importedInputIds =
1001 runtime->ImportInputs(netId, inputTensors, MemorySource::Malloc);
Colm Doneland7ceec52022-07-06 12:09:05 +01001002 // We expect the import to have failed.
1003 CHECK(importedInputIds.size() == 0);
David Monahan646bc8a2022-01-31 14:29:14 +00001004 std::vector<ImportedOutputId> importedOutputIds =
1005 runtime->ImportOutputs(netId, outputTensors, MemorySource::Malloc);
Colm Doneland7ceec52022-07-06 12:09:05 +01001006 CHECK(importedOutputIds.size() == 1);
David Monahan646bc8a2022-01-31 14:29:14 +00001007
1008 // Do the inference and force the import as the memory is misaligned.
Colm Doneland7ceec52022-07-06 12:09:05 +01001009 runtime->EnqueueWorkload(netId, inputTensors, OutputTensors(), importedInputIds, importedOutputIds);
David Monahan646bc8a2022-01-31 14:29:14 +00001010
1011 // Retrieve the Profiler.Print() output to get the workload execution
1012 ProfilerManager& profilerManager = armnn::ProfilerManager::GetInstance();
1013 std::stringstream ss;
David Monahan16829712022-02-03 17:04:59 +00001014 profilerManager.GetProfiler()->Print(ss);
David Monahan646bc8a2022-01-31 14:29:14 +00001015 std::string dump = ss.str();
1016
1017 // GpuAcc is a different case to CpuRef and CpuAcc, it doesn't use the buffer directly but instead maps it to a
1018 // new set of addresses within Gpu Memory. This will almost always be auto-aligned, so we don't need to check
1019 // for imports/copies. Only that the output is correct.
1020 if (backends[0] != Compute::GpuAcc)
1021 {
1022 if (backends[0] == Compute::CpuAcc)
1023 {
1024 // Reconfigure has not been implemented for CpuAcc so it will always copy, this will break whenever
1025 // reconfigure is implemented
1026 // We should get 0 SyncMemGeneric for the Output
1027 int count = SubStringCounter(dump, "SyncMemGeneric");
1028 CHECK(count == 0);
1029 // Should be 2 CopyMemGeneric as we copied the input
1030 count = SubStringCounter(dump, "CopyMemGeneric");
1031 CHECK(count == 2);
1032 }
1033 else
1034 {
1035 // We should get 1 SyncMemGeneric for the Output
1036 int count = SubStringCounter(dump, "SyncMemGeneric");
1037 CHECK(count == 1);
1038 // Should only be 1 CopyMemGeneric as we copied the input
1039 count = SubStringCounter(dump, "CopyMemGeneric");
1040 CHECK(count == 1);
1041 }
1042 }
1043 // Check the output is correct
1044 CHECK(std::equal(outputData.begin(), outputData.end(), expectedOutput.begin(), expectedOutput.end()));
1045 std::free(memPtr);
1046}
1047
1048inline void ForceImportWithMisalignedOutputBuffersEndToEndTest(std::vector<BackendId> backends)
1049{
1050 /**
1051 * This test is similar to the Import tests above, we create a network with a square function and pass in a vector
1052 * with 4 floats, square them. and validate the output. We then check the profiling logs to see if input/output
1053 * tensors are copied (CopyMemGeneric) or imported (SyncMemGeneric)
1054 * In this case all only the input should be imported
1055 */
1056 using namespace armnn;
1057
1058 IRuntime::CreationOptions options;
1059 IRuntimePtr runtime(IRuntime::Create(options));
1060
1061 // Builds up the structure of the network.
1062 INetworkPtr net(INetwork::Create());
1063 IConnectableLayer* input = net->AddInputLayer(0);
1064
1065 ActivationDescriptor descriptor;
1066 descriptor.m_Function = ActivationFunction::Square;
1067 IConnectableLayer* activationLayer = net->AddActivationLayer(descriptor);
1068
1069 IConnectableLayer* output = net->AddOutputLayer(0);
1070
1071 input->GetOutputSlot(0).Connect(activationLayer->GetInputSlot(0));
1072 activationLayer->GetOutputSlot(0).Connect(output->GetInputSlot(0));
1073 input->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32, 0.0f, 0, true));
1074 activationLayer->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32));
1075
1076 IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec());
1077 INFO("Load Network");
1078 // Load it into the runtime. It should pass.
1079 NetworkId netId;
1080 std::string ignoredErrorMessage;
1081 INetworkProperties networkProperties(false, MemorySource::Undefined, MemorySource::Undefined);
1082 CHECK(runtime->LoadNetwork(netId, std::move(optNet),ignoredErrorMessage, networkProperties)
1083 == Status::Success);
1084 INFO("Generate Data");
1085
1086 // This code looks a little funky but the idea is to create a buffer of floats but offset by the size of a char
1087 // this will guarantee that the resultant buffer is misaligned and thus should always be copied.
1088 auto memPtr = std::malloc(4 * sizeof(float) + sizeof(char));
1089
1090 float* misalignedMemPtr = reinterpret_cast<float*>(reinterpret_cast<char*>(memPtr) + 1);
1091
1092 // Check if our pointer is truly misaligned
1093 uintptr_t alignment = GetDataTypeSize(DataType::Float32);
1094 CHECK (reinterpret_cast<uintptr_t>(misalignedMemPtr) % alignment);
1095
1096 // Creates structures for input & output
1097 std::vector<float> inputData
1098 {
1099 1.0f, 2.0f, 3.0f, 4.0f
1100 };
1101
1102 // Check our input buffer is aligned
1103 CHECK(!(reinterpret_cast<uintptr_t>(inputData.data()) % alignment));
1104 std::vector<float> expectedOutput
1105 {
1106 1.0f, 4.0f, 9.0f, 16.0f
1107 };
1108
1109 INFO("Create Inference");
1110 InputTensors inputTensors
1111 {
1112 {0,armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData.data())},
1113 };
1114 OutputTensors outputTensors
1115 {
1116 {0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), misalignedMemPtr)}
1117 };
1118 runtime->GetProfiler(netId)->EnableProfiling(true);
1119 std::vector<ImportedInputId> importedInputIds =
1120 runtime->ImportInputs(netId, inputTensors, MemorySource::Malloc);
Colm Doneland7ceec52022-07-06 12:09:05 +01001121 CHECK(importedInputIds.size() == 1);
1122 // We expect this to fail.
David Monahan646bc8a2022-01-31 14:29:14 +00001123 std::vector<ImportedOutputId> importedOutputIds =
1124 runtime->ImportOutputs(netId, outputTensors, MemorySource::Malloc);
Colm Doneland7ceec52022-07-06 12:09:05 +01001125 CHECK(importedOutputIds.size() == 0);
David Monahan646bc8a2022-01-31 14:29:14 +00001126
Colm Doneland7ceec52022-07-06 12:09:05 +01001127 // Even if importing the output failed we still expect to be able to get it to work.
1128 runtime->EnqueueWorkload(netId, InputTensors(), outputTensors, importedInputIds, importedOutputIds);
David Monahan646bc8a2022-01-31 14:29:14 +00001129
1130 // Retrieve the Profiler.Print() output to get the workload execution
1131 ProfilerManager& profilerManager = armnn::ProfilerManager::GetInstance();
1132 std::stringstream ss;
David Monahan16829712022-02-03 17:04:59 +00001133 profilerManager.GetProfiler()->Print(ss);
David Monahan646bc8a2022-01-31 14:29:14 +00001134 std::string dump = ss.str();
1135
1136 // GpuAcc is a different case to CpuRef and CpuAcc, it doesn't use the buffer directly but instead maps it to a
1137 // new set of addresses within Gpu Memory. This will almost always be auto-aligned, so we don't need to check
1138 // for imports/copies. Only that the output is correct.
1139 if (backends[0] != Compute::GpuAcc)
1140 {
1141 // Even though we Imported the Input we still shouldn't have a SyncMemGeneric
1142 int count = SubStringCounter(dump, "SyncMemGeneric");
1143 CHECK(count == 0);
1144 // Should only be 1 CopyMemGeneric as we copied the input
1145 count = SubStringCounter(dump, "CopyMemGeneric");
1146 if (backends[0] == Compute::CpuAcc)
1147 {
1148 // Reconfigure has not been implemented for CpuAcc so it will always copy, this will break whenever
1149 // reconfigure is implemented
1150 CHECK(count == 2);
1151 }
1152 else
1153 {
1154 CHECK(count == 1);
1155 }
1156 // Check the output is correct
1157 }
1158 unsigned int index = 0;
David Monahaneef6b762022-02-10 16:01:58 +00001159 std::vector<float> outputData(expectedOutput.size(), 0);
1160 std::memcpy(outputData.data(), misalignedMemPtr, expectedOutput.size() * sizeof(float));
David Monahan646bc8a2022-01-31 14:29:14 +00001161 for (auto outputValue : expectedOutput)
1162 {
David Monahaneef6b762022-02-10 16:01:58 +00001163 CHECK(outputValue == outputData[index]);
David Monahan646bc8a2022-01-31 14:29:14 +00001164 ++index;
1165 }
1166 std::free(memPtr);
1167}
1168
1169inline void ForceImportWithMisalignedInputAndOutputBuffersEndToEndTest(std::vector<BackendId> backends)
1170{
1171 /**
1172 * This test is similar to the Import tests above, we create a network with a square function and pass in a vector
1173 * with 4 floats, square them. and validate the output. We then check the profiling logs to see if input/output
1174 * tensors are copied (CopyMemGeneric) or imported (SyncMemGeneric)
1175 * In this case all inputs and outputs should be copied
1176 */
1177 using namespace armnn;
1178
1179 IRuntime::CreationOptions options;
1180 IRuntimePtr runtime(IRuntime::Create(options));
1181
1182 // Builds up the structure of the network.
1183 INetworkPtr net(INetwork::Create());
1184 IConnectableLayer* input = net->AddInputLayer(0);
1185
1186 ActivationDescriptor descriptor;
1187 descriptor.m_Function = ActivationFunction::Square;
1188 IConnectableLayer* activationLayer = net->AddActivationLayer(descriptor);
1189
1190 IConnectableLayer* output = net->AddOutputLayer(0);
1191
1192 input->GetOutputSlot(0).Connect(activationLayer->GetInputSlot(0));
1193 activationLayer->GetOutputSlot(0).Connect(output->GetInputSlot(0));
1194 input->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32, 0.0f, 0, true));
1195 activationLayer->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32));
1196
1197 IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec());
1198 INFO("Load Network");
1199 // Load it into the runtime. It should pass.
1200 NetworkId netId;
1201 std::string ignoredErrorMessage;
1202 INetworkProperties networkProperties(false, MemorySource::Undefined, MemorySource::Undefined);
1203 CHECK(runtime->LoadNetwork(netId, std::move(optNet),ignoredErrorMessage, networkProperties)
1204 == Status::Success);
1205 INFO("Generate Data");
1206
1207 // This code looks a little funky but the idea is to create a buffer of floats but offset by the size of a char
1208 // this will guarantee that the resultant buffer is misaligned and thus should always be copied.
1209 auto inputMemPtr = std::malloc(4 * sizeof(float) + sizeof(char));
1210 float* misalignedInputPtr = reinterpret_cast<float*>(reinterpret_cast<char*>(inputMemPtr) + 1);
1211
1212 // Check if our pointer is truly misaligned
1213 uintptr_t alignment = GetDataTypeSize(DataType::Float32);
1214 CHECK (reinterpret_cast<uintptr_t>(misalignedInputPtr) % alignment);
Matthew Benthamc92bbd72022-02-10 11:12:34 +00001215 std::vector<float> inputData
David Monahan646bc8a2022-01-31 14:29:14 +00001216 {
Matthew Benthamc92bbd72022-02-10 11:12:34 +00001217 1.0f, 2.0f, 3.0f, 4.0f
1218 };
1219 std::memcpy(misalignedInputPtr, inputData.data(), 4*sizeof(float));
David Monahan646bc8a2022-01-31 14:29:14 +00001220
1221 auto outputMemPtr = std::malloc(4 * sizeof(float) + sizeof(char));
1222 float* misalignedOutputPtr = reinterpret_cast<float*>(reinterpret_cast<char*>(outputMemPtr) + 1);
1223
1224 // Check if our pointer is truly misaligned
1225 CHECK (reinterpret_cast<uintptr_t>(misalignedOutputPtr) % alignment);
1226
1227 std::vector<float> expectedOutput
1228 {
1229 1.0f, 4.0f, 9.0f, 16.0f
1230 };
1231
1232 INFO("Create Inference");
1233 InputTensors inputTensors
1234 {
1235 {0,armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), misalignedInputPtr)},
1236 };
1237 OutputTensors outputTensors
1238 {
1239 {0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), misalignedOutputPtr)}
1240 };
1241 runtime->GetProfiler(netId)->EnableProfiling(true);
1242 std::vector<ImportedInputId> importedInputIds =
1243 runtime->ImportInputs(netId, inputTensors, MemorySource::Malloc);
Colm Doneland7ceec52022-07-06 12:09:05 +01001244 // Import should have failed.
1245 CHECK(importedInputIds.size() == 0);
David Monahan646bc8a2022-01-31 14:29:14 +00001246 std::vector<ImportedOutputId> importedOutputIds =
1247 runtime->ImportOutputs(netId, outputTensors, MemorySource::Malloc);
Colm Doneland7ceec52022-07-06 12:09:05 +01001248 // Import should have failed.
1249 CHECK(importedOutputIds.size() == 0);
David Monahan646bc8a2022-01-31 14:29:14 +00001250
1251 // Do the inference and force the import as the memory is misaligned.
1252 runtime->EnqueueWorkload(netId, inputTensors, outputTensors, importedInputIds, importedOutputIds);
1253
1254 // Retrieve the Profiler.Print() output to get the workload execution
1255 ProfilerManager& profilerManager = armnn::ProfilerManager::GetInstance();
1256 std::stringstream ss;
David Monahan16829712022-02-03 17:04:59 +00001257 profilerManager.GetProfiler()->Print(ss);
David Monahan646bc8a2022-01-31 14:29:14 +00001258 std::string dump = ss.str();
1259
1260 // GpuAcc is a different case to CpuRef and CpuAcc, it doesn't use the buffer directly but instead maps it to a
1261 // new set of addresses within Gpu Memory. This will almost always be auto-aligned, so we don't need to check
1262 // for imports/copies. Only that the output is correct.
1263 if (backends[0] != Compute::GpuAcc)
1264 {
1265 // We can only copy so there should be no SyncMemGeneric
1266 int count = SubStringCounter(dump, "SyncMemGeneric");
1267 CHECK(count == 0);
1268 // Should only be CopyMemGeneric workloads as we copied all buffers
1269 count = SubStringCounter(dump, "CopyMemGeneric");
1270 CHECK(count == 2);
1271 }
1272 // Check the output is correct
1273 unsigned int index = 0;
David Monahaneef6b762022-02-10 16:01:58 +00001274 std::vector<float> outputData(expectedOutput.size(), 0);
Matthew Benthamc92bbd72022-02-10 11:12:34 +00001275 std::memcpy(outputData.data(), misalignedOutputPtr, expectedOutput.size() * sizeof(float));
1276 for (auto expectedValue : expectedOutput)
David Monahan646bc8a2022-01-31 14:29:14 +00001277 {
Matthew Benthamc92bbd72022-02-10 11:12:34 +00001278 CHECK(expectedValue == outputData[index]);
David Monahan646bc8a2022-01-31 14:29:14 +00001279 ++index;
1280 }
1281 std::free(inputMemPtr);
1282 std::free(outputMemPtr);
1283}
1284
David Monahan16829712022-02-03 17:04:59 +00001285inline void ForceImportRepeatedInferencesEndToEndTest(std::vector<BackendId> backends)
1286{
1287 /**
1288 * This test is similar to the Import tests above, we create a network with a square function and pass in a vector
1289 * with 4 floats, square them. and validate the output. We then check the profiling logs to see if input/output
1290 * tensors are copied (CopyMemGeneric) or imported (SyncMemGeneric)
1291 * In this we create some aligned buffers, import them into a network and validate the output and number of
1292 * SynMemGeneric/CopyMemgeneric. Then we try the same network again with misaligned buffers to make sure it falls
1293 * back to copying correctly.
1294 */
1295 using namespace armnn;
1296
1297 IRuntime::CreationOptions options;
1298 IRuntimePtr runtime(IRuntime::Create(options));
1299
1300 // Builds up the structure of the network.
1301 INetworkPtr net(INetwork::Create());
1302 IConnectableLayer* input = net->AddInputLayer(0);
1303
1304 ActivationDescriptor descriptor;
1305 descriptor.m_Function = ActivationFunction::Square;
1306 IConnectableLayer* activationLayer = net->AddActivationLayer(descriptor);
1307
1308 IConnectableLayer* output = net->AddOutputLayer(0);
1309
1310 input->GetOutputSlot(0).Connect(activationLayer->GetInputSlot(0));
1311 activationLayer->GetOutputSlot(0).Connect(output->GetInputSlot(0));
1312 input->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32, 0.0f, 0, true));
1313 activationLayer->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32));
1314
1315 IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec());
1316 INFO("Load Network");
1317 // Load it into the runtime. It should pass.
1318 NetworkId netId;
1319 std::string ignoredErrorMessage;
1320 INetworkProperties networkProperties(false, MemorySource::Undefined, MemorySource::Undefined);
1321 CHECK(runtime->LoadNetwork(netId, std::move(optNet),ignoredErrorMessage, networkProperties)
1322 == Status::Success);
1323 INFO("Generate Data");
1324
1325 // Creates structures for input & output
1326 std::vector<float> inputData
1327 {
1328 1.0f, 2.0f, 3.0f, 4.0f
1329 };
1330 std::vector<float> outputData(4);
1331 std::vector<float> expectedOutput
1332 {
1333 1.0f, 4.0f, 9.0f, 16.0f
1334 };
1335
1336 // Check our input and output pointers are actually aligned
1337 uintptr_t alignment = GetDataTypeSize(DataType::Float32);
1338 CHECK(!(reinterpret_cast<uintptr_t>(inputData.data()) % alignment));
1339 CHECK(!(reinterpret_cast<uintptr_t>(outputData.data()) % alignment));
1340
1341 INFO("Create Inference");
1342 InputTensors inputTensors
1343 {
1344 {0,armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData.data())},
1345 };
1346 OutputTensors outputTensors
1347 {
1348 {0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data())}
1349 };
1350
1351 runtime->GetProfiler(netId)->EnableProfiling(true);
1352 std::vector<ImportedInputId> importedInputIds =
1353 runtime->ImportInputs(netId, inputTensors, MemorySource::Malloc);
Colm Doneland7ceec52022-07-06 12:09:05 +01001354 CHECK(importedInputIds.size() == 1);
David Monahan16829712022-02-03 17:04:59 +00001355 std::vector<ImportedOutputId> importedOutputIds =
1356 runtime->ImportOutputs(netId, outputTensors, MemorySource::Malloc);
Colm Doneland7ceec52022-07-06 12:09:05 +01001357 CHECK(importedOutputIds.size() == 1);
David Monahan16829712022-02-03 17:04:59 +00001358 // Do the inference and force the import as the memory is aligned.
Colm Doneland7ceec52022-07-06 12:09:05 +01001359 runtime->EnqueueWorkload(netId, InputTensors(), OutputTensors(), importedInputIds, importedOutputIds);
David Monahan16829712022-02-03 17:04:59 +00001360
1361 // Retrieve the Profiler.AnalyzeEventsAndWriteResults() output to get the workload execution
1362 ProfilerManager& profilerManager = armnn::ProfilerManager::GetInstance();
1363 std::stringstream ss;
1364 profilerManager.GetProfiler()->AnalyzeEventsAndWriteResults(ss);
1365 std::string dump = ss.str();
1366
1367 if (backends[0] == Compute::CpuAcc)
1368 {
1369 // Reconfigure has not been implemented for CpuAcc so it will always copy, this will break whenever
1370 // reconfigure is implemented
1371 int count = SubStringCounter(dump, "SyncMemGeneric");
1372 CHECK(count == 0);
1373 // Should be 2 CopyMemGeneric workloads
1374 count = SubStringCounter(dump, "CopyMemGeneric");
1375 CHECK(count >= 1);
1376 }
1377 else
1378 {
1379 // Check there is at least 1 SyncMemGeneric workload as we exported
1380 int count = SubStringCounter(dump, "SyncMemGeneric");
1381 CHECK(count >= 1);
1382 // Shouldn't be any CopyMemGeneric workloads
1383 count = SubStringCounter(dump, "CopyMemGeneric");
1384 CHECK(count == 0);
1385 }
1386 // Check the output is correct
1387 CHECK(std::equal(outputData.begin(), outputData.end(), expectedOutput.begin(), expectedOutput.end()));
1388
1389 // This code looks a little funky but the idea is to create a buffer of floats but offset by the size of a char
1390 // this will guarantee that the resultant buffer is misaligned and thus should always be copied.
1391 auto inputMemPtr = std::malloc(4 * sizeof(float) + sizeof(char));
1392 float* misalignedInputPtr = reinterpret_cast<float*>(reinterpret_cast<char*>(inputMemPtr) + 1);
1393
1394 // Check if our pointer is truly misaligned
1395 CHECK (reinterpret_cast<uintptr_t>(misalignedInputPtr) % alignment);
Matthew Benthamc92bbd72022-02-10 11:12:34 +00001396
1397 std::vector<float> inputValues
David Monahan16829712022-02-03 17:04:59 +00001398 {
Matthew Benthamc92bbd72022-02-10 11:12:34 +00001399 2.0f, 3.0f, 4.0f, 5.0f
1400 };
1401
1402 std::memcpy(misalignedInputPtr, inputValues.data(), inputValues.size()*sizeof(float));
David Monahan16829712022-02-03 17:04:59 +00001403
1404 auto outputMemPtr = std::malloc(4 * sizeof(float) + sizeof(char));
1405 float* misalignedOutputPtr = reinterpret_cast<float*>(reinterpret_cast<char*>(outputMemPtr) + 1);
1406
1407 // Check if our pointer is truly misaligned
1408 CHECK (reinterpret_cast<uintptr_t>(misalignedOutputPtr) % alignment);
1409
1410 std::vector<float> expectedMisalignedOutput
1411 {
1412 4.0f, 9.0f, 16.0f, 25.0f
1413 };
1414
1415 INFO("Create Second Inference");
1416 InputTensors inputTensorsMisaligned
1417 {
1418 {0,armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), misalignedInputPtr)},
1419 };
1420 OutputTensors outputTensorsMisaligned
1421 {
1422 {0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), misalignedOutputPtr)}
1423 };
1424 importedInputIds = runtime->ImportInputs(netId, inputTensorsMisaligned, MemorySource::Malloc);
Colm Doneland7ceec52022-07-06 12:09:05 +01001425 // Import should fail.
1426 CHECK(importedInputIds.size() == 0);
David Monahan16829712022-02-03 17:04:59 +00001427 importedOutputIds = runtime->ImportOutputs(netId, outputTensorsMisaligned, MemorySource::Malloc);
Colm Doneland7ceec52022-07-06 12:09:05 +01001428 // Import should fail.
1429 CHECK(importedOutputIds.size() == 0);
David Monahan16829712022-02-03 17:04:59 +00001430
1431 // Do the inference and force the import as the memory is misaligned.
1432 runtime->EnqueueWorkload(netId,
1433 inputTensorsMisaligned,
1434 outputTensorsMisaligned,
1435 importedInputIds,
1436 importedOutputIds);
1437
1438 // Retrieve the Profiler.AnalyzeEventsAndWriteResults() output to get the workload execution
1439 // We need to use AnalyzeEventsAndWriteResults here to make sure the second inference has been profiled
1440 profilerManager.GetProfiler()->AnalyzeEventsAndWriteResults(ss);
1441 dump = ss.str();
1442
1443 // GpuAcc is a different case to CpuRef and CpuAcc, it doesn't use the buffer directly but instead maps it to a
1444 // new set of addresses within Gpu Memory. This will almost always be auto-aligned, so we don't need to check
1445 // for imports/copies. Only that the output is correct.
1446 if (backends[0] != Compute::GpuAcc)
1447 {
1448 // The SyncMemGeneric will still be in the profiling log from the first inference
1449 int count = SubStringCounter(dump, "SyncMemGeneric");
1450 CHECK(count >= 1);
1451 // We should now see CopyMemGeneric workloads as we copied all buffers
1452 count = SubStringCounter(dump, "CopyMemGeneric");
1453 CHECK(count >= 1);
1454 }
1455 // Check the output is correct
1456 unsigned int index = 0;
David Monahaneef6b762022-02-10 16:01:58 +00001457 std::vector<float> alignedOutputData(expectedMisalignedOutput.size(), 0);
Matthew Benthamc92bbd72022-02-10 11:12:34 +00001458 std::memcpy(alignedOutputData.data(), misalignedOutputPtr, expectedMisalignedOutput.size() * sizeof(float));
David Monahan16829712022-02-03 17:04:59 +00001459 for (auto outputValue : expectedMisalignedOutput)
1460 {
Matthew Benthamc92bbd72022-02-10 11:12:34 +00001461 CHECK(outputValue == alignedOutputData[index]);
David Monahan16829712022-02-03 17:04:59 +00001462 ++index;
1463 }
1464 // Clean up to avoid interfering with other tests
1465 runtime->UnloadNetwork(netId);
1466 std::free(inputMemPtr);
1467 std::free(outputMemPtr);
1468}
1469
1470
1471inline void ForceImportRepeatedInferencesInvertedEndToEndTest(std::vector<BackendId> backends)
1472{
1473 /**
1474 * This test is similar to the Import tests above, we create a network with a square function and pass in a vector
1475 * with 4 floats, square them. and validate the output. We then check the profiling logs to see if input/output
1476 * tensors are copied (CopyMemGeneric) or imported (SyncMemGeneric)
1477 * In this we create some misaligned buffers, copy them into a network and validate the output and number of
1478 * SynMemGeneric/CopyMemgeneric. Then we try the same network again with aligned buffers to make sure it switches
1479 * to importing correctly.
1480 */
1481 using namespace armnn;
1482
1483 IRuntime::CreationOptions options;
1484 IRuntimePtr runtime(IRuntime::Create(options));
1485
1486 // Builds up the structure of the network.
1487 INetworkPtr net(INetwork::Create());
1488 IConnectableLayer* input = net->AddInputLayer(0);
1489
1490 ActivationDescriptor descriptor;
1491 descriptor.m_Function = ActivationFunction::Square;
1492 IConnectableLayer* activationLayer = net->AddActivationLayer(descriptor);
1493
1494 IConnectableLayer* output = net->AddOutputLayer(0);
1495
1496 input->GetOutputSlot(0).Connect(activationLayer->GetInputSlot(0));
1497 activationLayer->GetOutputSlot(0).Connect(output->GetInputSlot(0));
1498 input->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32, 0.0f, 0, true));
1499 activationLayer->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32));
1500
1501 IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec());
1502 INFO("Load Network");
1503 // Load it into the runtime. It should pass.
1504 NetworkId netId;
1505 std::string ignoredErrorMessage;
1506 INetworkProperties networkProperties(false, MemorySource::Undefined, MemorySource::Undefined);
1507 CHECK(runtime->LoadNetwork(netId, std::move(optNet),ignoredErrorMessage, networkProperties)
1508 == Status::Success);
1509 INFO("Generate Data");
1510
1511 // This code looks a little funky but the idea is to create a buffer of floats but offset by the size of a char
1512 // this will guarantee that the resultant buffer is misaligned and thus should always be copied.
1513 auto inputMemPtr = std::malloc(4 * sizeof(float) + sizeof(char));
1514 float* misalignedInputPtr = reinterpret_cast<float*>(reinterpret_cast<char*>(inputMemPtr) + 1);
1515
1516 // Check if our pointer is truly misaligned
1517 uintptr_t alignment = GetDataTypeSize(DataType::Float32);
1518 CHECK (reinterpret_cast<uintptr_t>(misalignedInputPtr) % alignment);
Matthew Benthamc92bbd72022-02-10 11:12:34 +00001519 std::vector<float> inputValues
David Monahan16829712022-02-03 17:04:59 +00001520 {
Matthew Benthamc92bbd72022-02-10 11:12:34 +00001521 2.0f, 3.0f, 4.0f, 5.0f
1522 };
1523 std::memcpy(misalignedInputPtr, inputValues.data(), inputValues.size() * sizeof(float));
David Monahan16829712022-02-03 17:04:59 +00001524
1525 auto outputMemPtr = std::malloc(4 * sizeof(float) + sizeof(char));
1526 float* misalignedOutputPtr = reinterpret_cast<float*>(reinterpret_cast<char*>(outputMemPtr) + 1);
1527
1528 // Check if our pointer is truly misaligned
1529 CHECK (reinterpret_cast<uintptr_t>(misalignedOutputPtr) % alignment);
1530
1531 std::vector<float> expectedMisalignedOutput
1532 {
1533 4.0f, 9.0f, 16.0f, 25.0f
1534 };
1535
1536 INFO("Create Second Inference");
1537 InputTensors inputTensorsMisaligned
1538 {
1539 {0,armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), misalignedInputPtr)},
1540 };
1541 OutputTensors outputTensorsMisaligned
1542 {
1543 {0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), misalignedOutputPtr)}
1544 };
1545 runtime->GetProfiler(netId)->EnableProfiling(true);
1546 std::vector<ImportedInputId> importedInputIds =
1547 runtime->ImportInputs(netId, inputTensorsMisaligned, MemorySource::Malloc);
Colm Doneland7ceec52022-07-06 12:09:05 +01001548 // Import should fail.
1549 CHECK(importedInputIds.size() == 0);
David Monahan16829712022-02-03 17:04:59 +00001550 std::vector<ImportedOutputId> importedOutputIds =
1551 runtime->ImportOutputs(netId, outputTensorsMisaligned, MemorySource::Malloc);
Colm Doneland7ceec52022-07-06 12:09:05 +01001552 // Import should fail.
1553 CHECK(importedOutputIds.size() == 0);
David Monahan16829712022-02-03 17:04:59 +00001554
1555 // Do the inference and force the import as the memory is misaligned.
1556 runtime->EnqueueWorkload(netId,
1557 inputTensorsMisaligned,
1558 outputTensorsMisaligned,
1559 importedInputIds,
1560 importedOutputIds);
1561
1562 // Retrieve the Profiler.AnalyzeEventsAndWriteResults() output to get the workload execution
1563 ProfilerManager& profilerManager = armnn::ProfilerManager::GetInstance();
1564 std::stringstream ss;
1565 profilerManager.GetProfiler()->AnalyzeEventsAndWriteResults(ss);
1566 std::string dump = ss.str();
1567
1568 // GpuAcc is a different case to CpuRef and CpuAcc, it doesn't use the buffer directly but instead maps it to a
1569 // new set of addresses within Gpu Memory. This will almost always be auto-aligned, so we don't need to check
1570 // for imports/copies. Only that the output is correct.
1571 if (backends[0] != Compute::GpuAcc)
1572 {
1573 // We can only copy so there should be no SyncMemGeneric
1574 int count = SubStringCounter(dump, "SyncMemGeneric");
1575 CHECK(count == 0);
1576 // Should only be CopyMemGeneric workloads as we copied all buffers
1577 count = SubStringCounter(dump, "CopyMemGeneric");
1578 CHECK(count >= 1);
1579 }
1580 // Check the output is correct
1581 unsigned int index = 0;
Matthew Benthamc92bbd72022-02-10 11:12:34 +00001582 std::vector<float> alignedOutput(expectedMisalignedOutput.size());
1583 std::memcpy(alignedOutput.data(), misalignedOutputPtr, expectedMisalignedOutput.size()*sizeof(float));
David Monahan16829712022-02-03 17:04:59 +00001584 for (auto outputValue : expectedMisalignedOutput)
1585 {
Matthew Benthamc92bbd72022-02-10 11:12:34 +00001586 CHECK(outputValue == alignedOutput[index]);
David Monahan16829712022-02-03 17:04:59 +00001587 ++index;
1588 }
1589 std::free(inputMemPtr);
1590 std::free(outputMemPtr);
1591
1592 // Creates structures for input & output
1593 std::vector<float> inputData
1594 {
1595 1.0f, 2.0f, 3.0f, 4.0f
1596 };
1597 std::vector<float> outputData(4);
1598 std::vector<float> expectedOutput
1599 {
1600 1.0f, 4.0f, 9.0f, 16.0f
1601 };
1602
1603 // Check our input and output pointers are actually aligned
1604 CHECK(!(reinterpret_cast<uintptr_t>(inputData.data()) % alignment));
1605 CHECK(!(reinterpret_cast<uintptr_t>(outputData.data()) % alignment));
1606
1607 INFO("Create Inference");
1608 InputTensors inputTensors
1609 {
1610 {0,armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData.data())},
1611 };
1612 OutputTensors outputTensors
1613 {
1614 {0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data())}
1615 };
1616
1617 importedInputIds = runtime->ImportInputs(netId, inputTensors, MemorySource::Malloc);
Colm Doneland7ceec52022-07-06 12:09:05 +01001618 CHECK(importedInputIds.size() == 1);
David Monahan16829712022-02-03 17:04:59 +00001619 importedOutputIds = runtime->ImportOutputs(netId, outputTensors, MemorySource::Malloc);
Colm Doneland7ceec52022-07-06 12:09:05 +01001620 CHECK(importedOutputIds.size() == 1);
David Monahan16829712022-02-03 17:04:59 +00001621 // Do the inference and force the import as the memory is aligned.
Colm Doneland7ceec52022-07-06 12:09:05 +01001622 runtime->EnqueueWorkload(netId, InputTensors(), OutputTensors(), importedInputIds, importedOutputIds);
David Monahan16829712022-02-03 17:04:59 +00001623
1624 // Retrieve the Profiler.AnalyzeEventsAndWriteResults() output to get the workload execution
1625 // We need to use AnalyzeEventsAndWriteResults here to make sure the second inference has been profiled
1626 profilerManager.GetProfiler()->AnalyzeEventsAndWriteResults(ss);
1627 dump = ss.str();
1628
1629 if (backends[0] == Compute::CpuAcc)
1630 {
1631 // Reconfigure has not been implemented for CpuAcc so it will always copy, this will break whenever
1632 // reconfigure is implemented
1633 int count = SubStringCounter(dump, "SyncMemGeneric");
1634 CHECK(count == 0);
1635 // Should be 2 CopyMemGeneric workloads
1636 count = SubStringCounter(dump, "CopyMemGeneric");
1637 CHECK(count >= 1);
1638 }
1639 else
1640 {
1641 // Repeated inferences make it difficult to check for an accurate count. So we just validate that we have a
1642 // SyncMemGeneric Workload when we previously didn't
1643 int count = SubStringCounter(dump, "SyncMemGeneric");
1644 CHECK(count >= 1);
1645 // Should still be some CopyMemGeneric Workloads from the last inference
1646 count = SubStringCounter(dump, "CopyMemGeneric");
1647 CHECK(count >= 1);
1648 }
1649 // Check the output is correct
1650 CHECK(std::equal(outputData.begin(), outputData.end(), expectedOutput.begin(), expectedOutput.end()));
1651 // Clean up to avoid interfering with other tests
1652 runtime->UnloadNetwork(netId);
1653}
1654
Nattapat Chaimanowong1fcb4ff2019-01-24 15:25:26 +00001655} // anonymous namespace