blob: cc5aa23ca35897fffc499c89f6ed5835cbb2ac49 [file] [log] [blame]
Aron Virginas-Tar70104002018-10-24 15:33:28 +01001//
2// Copyright © 2017 Arm Ltd. All rights reserved.
3// SPDX-License-Identifier: MIT
4//
5#pragma once
6
Sadik Armagana097d2a2021-11-24 15:47:28 +00007#include <CommonTestUtils.hpp>
Mike Kelly386ff1a2021-03-29 15:04:50 +01008
Matthew Bentham246bd462020-01-20 16:16:06 +00009#include <armnn/Descriptors.hpp>
narpra01b9546cf2018-11-20 15:21:28 +000010#include <armnn/INetwork.hpp>
Matthew Bentham246bd462020-01-20 16:16:06 +000011#include <armnn/IRuntime.hpp>
Aron Virginas-Tar70104002018-10-24 15:33:28 +010012
Aron Virginas-Tar48623a02019-10-22 10:00:28 +010013#include <Profiling.hpp>
Colm Donelanc42a9872022-02-02 16:35:09 +000014#include <armnnUtils/QuantizeHelper.hpp>
Aron Virginas-Tar48623a02019-10-22 10:00:28 +010015#include <ResolveType.hpp>
Aron Virginas-Tar70104002018-10-24 15:33:28 +010016
Sadik Armagan1625efc2021-06-10 18:24:34 +010017#include <doctest/doctest.h>
narpra01b9546cf2018-11-20 15:21:28 +000018
Aron Virginas-Tar70104002018-10-24 15:33:28 +010019#include <vector>
20
21namespace
22{
23
24using namespace armnn;
25
26template<typename T>
27bool ConstantUsageTest(const std::vector<BackendId>& computeDevice,
28 const TensorInfo& commonTensorInfo,
29 const std::vector<T>& inputData,
30 const std::vector<T>& constantData,
31 const std::vector<T>& expectedOutputData)
32{
33 // Create runtime in which test will run
34 IRuntime::CreationOptions options;
35 IRuntimePtr runtime(IRuntime::Create(options));
36
37 // Builds up the structure of the network.
38 INetworkPtr net(INetwork::Create());
39
40 IConnectableLayer* input = net->AddInputLayer(0);
41 IConnectableLayer* constant = net->AddConstantLayer(ConstTensor(commonTensorInfo, constantData));
42 IConnectableLayer* add = net->AddAdditionLayer();
43 IConnectableLayer* output = net->AddOutputLayer(0);
44
45 input->GetOutputSlot(0).Connect(add->GetInputSlot(0));
46 constant->GetOutputSlot(0).Connect(add->GetInputSlot(1));
47 add->GetOutputSlot(0).Connect(output->GetInputSlot(0));
48
49 // Sets the tensors in the network.
50 input->GetOutputSlot(0).SetTensorInfo(commonTensorInfo);
51 constant->GetOutputSlot(0).SetTensorInfo(commonTensorInfo);
52 add->GetOutputSlot(0).SetTensorInfo(commonTensorInfo);
53
54 // optimize the network
55 IOptimizedNetworkPtr optNet = Optimize(*net, computeDevice, runtime->GetDeviceSpec());
56
57 // Loads it into the runtime.
58 NetworkId netId;
59 runtime->LoadNetwork(netId, std::move(optNet));
60
61 // Creates structures for input & output.
62 std::vector<T> outputData(inputData.size());
63
64 InputTensors inputTensors
65 {
66 {0, ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData.data())}
67 };
68 OutputTensors outputTensors
69 {
70 {0, Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data())}
71 };
72
73 // Does the inference.
74 runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
75
76 // Checks the results.
77 return outputData == expectedOutputData;
78}
79
80inline bool ConstantUsageFloat32Test(const std::vector<BackendId>& backends)
81{
Cathal Corbett5b8093c2021-10-22 11:12:07 +010082 TensorInfo commonTensorInfo({ 2, 3 }, DataType::Float32);
83 commonTensorInfo.SetConstant(true);
Aron Virginas-Tar70104002018-10-24 15:33:28 +010084
85 return ConstantUsageTest(backends,
86 commonTensorInfo,
87 std::vector<float>{ 1.f, 2.f, 3.f, 4.f, 5.f, 6.f }, // Input.
88 std::vector<float>{ 6.f, 5.f, 4.f, 3.f, 2.f, 1.f }, // Const input.
89 std::vector<float>{ 7.f, 7.f, 7.f, 7.f, 7.f, 7.f } // Expected output.
90 );
91}
92
93inline bool ConstantUsageUint8Test(const std::vector<BackendId>& backends)
94{
Derek Lambertif90c56d2020-01-10 17:14:08 +000095 TensorInfo commonTensorInfo({ 2, 3 }, DataType::QAsymmU8);
Aron Virginas-Tar70104002018-10-24 15:33:28 +010096
97 const float scale = 0.023529f;
98 const int8_t offset = -43;
99
100 commonTensorInfo.SetQuantizationScale(scale);
101 commonTensorInfo.SetQuantizationOffset(offset);
Cathal Corbett5b8093c2021-10-22 11:12:07 +0100102 commonTensorInfo.SetConstant(true);
Aron Virginas-Tar70104002018-10-24 15:33:28 +0100103
104 return ConstantUsageTest(backends,
105 commonTensorInfo,
Aron Virginas-Tar48623a02019-10-22 10:00:28 +0100106 armnnUtils::QuantizedVector<uint8_t>({ 1.f, 2.f, 3.f, 4.f, 5.f, 6.f }, scale, offset), // Input.
107 armnnUtils::QuantizedVector<uint8_t>({ 6.f, 5.f, 4.f, 3.f, 2.f, 1.f }, scale, offset), // Const input.
108 armnnUtils::QuantizedVector<uint8_t>({ 7.f, 7.f, 7.f, 7.f, 7.f, 7.f }, scale, offset) // Expected output.
Aron Virginas-Tar70104002018-10-24 15:33:28 +0100109 );
110}
111
Ferran Balaguer83239f92019-09-19 11:49:25 +0100112// Utility function to find the number of instances of a substring within a string.
113int SubStringCounter(std::string& string, std::string&& substring)
114{
115 std::size_t found = 0;
116 int count = 0;
117 // Look for the substring starting from where we last found the substring
118 while((found = string.find(substring, found)) != std::string::npos)
119 {
120 count++;
121 // Offset by substring length to avoid finding the same substring twice
122 found += substring.length();
123 }
124 return count;
125}
126
Nattapat Chaimanowong1fcb4ff2019-01-24 15:25:26 +0000127template<DataType ArmnnIType, DataType ArmnnOType,
128 typename TInput = ResolveType<ArmnnIType>, typename TOutput = ResolveType<ArmnnOType>>
narpra01b9546cf2018-11-20 15:21:28 +0000129void EndToEndLayerTestImpl(INetworkPtr network,
kevmay012b4d88e2019-01-24 14:05:09 +0000130 const std::map<int, std::vector<TInput>>& inputTensorData,
131 const std::map<int, std::vector<TOutput>>& expectedOutputData,
Jan Eilersbca73e12020-03-11 12:52:46 +0000132 std::vector<BackendId> backends,
133 float tolerance = 0.000001f)
narpra01b9546cf2018-11-20 15:21:28 +0000134{
135 // Create runtime in which test will run
136 IRuntime::CreationOptions options;
137 IRuntimePtr runtime(IRuntime::Create(options));
138
139 // optimize the network
140 IOptimizedNetworkPtr optNet = Optimize(*network, backends, runtime->GetDeviceSpec());
141
142 // Loads it into the runtime.
143 NetworkId netId;
144 runtime->LoadNetwork(netId, std::move(optNet));
145
146 InputTensors inputTensors;
147 inputTensors.reserve(inputTensorData.size());
148 for (auto&& it : inputTensorData)
149 {
150 inputTensors.push_back({it.first,
151 ConstTensor(runtime->GetInputTensorInfo(netId, it.first), it.second.data())});
152 }
153 OutputTensors outputTensors;
154 outputTensors.reserve(expectedOutputData.size());
kevmay012b4d88e2019-01-24 14:05:09 +0000155 std::map<int, std::vector<TOutput>> outputStorage;
narpra01b9546cf2018-11-20 15:21:28 +0000156 for (auto&& it : expectedOutputData)
157 {
kevmay012b4d88e2019-01-24 14:05:09 +0000158 std::vector<TOutput> out(it.second.size());
narpra01b9546cf2018-11-20 15:21:28 +0000159 outputStorage.emplace(it.first, out);
160 outputTensors.push_back({it.first,
161 Tensor(runtime->GetOutputTensorInfo(netId, it.first),
162 outputStorage.at(it.first).data())});
163 }
164
165 // Does the inference.
166 runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
167
168 // Checks the results.
169 for (auto&& it : expectedOutputData)
170 {
kevmay012b4d88e2019-01-24 14:05:09 +0000171 std::vector<TOutput> out = outputStorage.at(it.first);
Aron Virginas-Tarf97f6da2019-10-01 18:35:44 +0100172 for (unsigned int i = 0; i < out.size(); ++i)
Nattapat Chaimanowong1fcb4ff2019-01-24 15:25:26 +0000173 {
Sadik Armagan1625efc2021-06-10 18:24:34 +0100174 CHECK_MESSAGE(Compare<ArmnnOType>(it.second[i], out[i], tolerance) == true,
Teresa Charlin2e3f4d22020-07-29 14:29:20 +0100175 "Actual output: " << out[i] << ". Expected output:" << it.second[i]);
176
Nattapat Chaimanowong1fcb4ff2019-01-24 15:25:26 +0000177 }
narpra01b9546cf2018-11-20 15:21:28 +0000178 }
179}
180
David Monahan4f1e8e42019-09-04 09:22:10 +0100181inline void ImportNonAlignedInputPointerTest(std::vector<BackendId> backends)
Ferran Balaguerdcaa6102019-08-21 13:28:38 +0100182{
183 using namespace armnn;
184
185 // Create runtime in which test will run
186 IRuntime::CreationOptions options;
187 IRuntimePtr runtime(armnn::IRuntime::Create(options));
188
189 // build up the structure of the network
190 INetworkPtr net(INetwork::Create());
191
192 IConnectableLayer* input = net->AddInputLayer(0);
193
David Monahan3fb7e102019-08-20 11:25:29 +0100194 ActivationDescriptor descriptor;
195 descriptor.m_Function = ActivationFunction::Square;
196 IConnectableLayer* pooling = net->AddActivationLayer(descriptor);
Ferran Balaguerdcaa6102019-08-21 13:28:38 +0100197
198 IConnectableLayer* output = net->AddOutputLayer(0);
199
David Monahan3fb7e102019-08-20 11:25:29 +0100200 input->GetOutputSlot(0).Connect(pooling->GetInputSlot(0));
201 pooling->GetOutputSlot(0).Connect(output->GetInputSlot(0));
Ferran Balaguerdcaa6102019-08-21 13:28:38 +0100202
Cathal Corbett5b8093c2021-10-22 11:12:07 +0100203 input->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32, 0.0f, 0, true));
David Monahan3fb7e102019-08-20 11:25:29 +0100204 pooling->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32));
Ferran Balaguerdcaa6102019-08-21 13:28:38 +0100205
206 // Optimize the network
Colm Donelan03bf98a2022-05-30 15:20:36 +0100207 OptimizerOptions optimizedOptions;
208 optimizedOptions.m_ImportEnabled = true;
209 IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec(), optimizedOptions);
Sadik Armagan1625efc2021-06-10 18:24:34 +0100210 CHECK(optNet);
Ferran Balaguerdcaa6102019-08-21 13:28:38 +0100211
212 // Loads it into the runtime.
213 NetworkId netId;
David Monahan4f1e8e42019-09-04 09:22:10 +0100214 std::string ignoredErrorMessage;
215 // Enable Importing
Francis Murtagh73d3e2e2021-04-29 14:23:04 +0100216 INetworkProperties networkProperties(false, MemorySource::Malloc, MemorySource::Undefined);
David Monahan4f1e8e42019-09-04 09:22:10 +0100217 runtime->LoadNetwork(netId, std::move(optNet), ignoredErrorMessage, networkProperties);
Ferran Balaguerdcaa6102019-08-21 13:28:38 +0100218
219 // Creates structures for input & output
220 std::vector<float> inputData
221 {
David Monahan3fb7e102019-08-20 11:25:29 +0100222 1.0f, 2.0f, 3.0f, 4.0f
Ferran Balaguerdcaa6102019-08-21 13:28:38 +0100223 };
224
225 // Misaligned input
Aron Virginas-Tard9f7c8b2019-09-13 13:37:03 +0100226 float* misalignedInputData = reinterpret_cast<float*>(reinterpret_cast<char*>(inputData.data()) + 1);
Ferran Balaguerdcaa6102019-08-21 13:28:38 +0100227
David Monahan3fb7e102019-08-20 11:25:29 +0100228 std::vector<float> outputData(4);
Ferran Balaguerdcaa6102019-08-21 13:28:38 +0100229
David Monahan4f1e8e42019-09-04 09:22:10 +0100230 // Aligned output
David Monahan3fb7e102019-08-20 11:25:29 +0100231 float* alignedOutputData = outputData.data();
David Monahan4f1e8e42019-09-04 09:22:10 +0100232
233 InputTensors inputTensors
234 {
235 {0,armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), misalignedInputData)},
236 };
237 OutputTensors outputTensors
238 {
239 {0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), alignedOutputData)}
240 };
241
David Monahan4f1e8e42019-09-04 09:22:10 +0100242 runtime->GetProfiler(netId)->EnableProfiling(true);
243
244 // Do the inference and expect it to fail with a ImportMemoryException
Sadik Armagan1625efc2021-06-10 18:24:34 +0100245 CHECK_THROWS_AS(runtime->EnqueueWorkload(netId, inputTensors, outputTensors), MemoryImportException);
David Monahan4f1e8e42019-09-04 09:22:10 +0100246}
247
Ferran Balaguer83239f92019-09-19 11:49:25 +0100248inline void ExportNonAlignedOutputPointerTest(std::vector<BackendId> backends)
David Monahan4f1e8e42019-09-04 09:22:10 +0100249{
250 using namespace armnn;
251
252 // Create runtime in which test will run
253 IRuntime::CreationOptions options;
254 IRuntimePtr runtime(armnn::IRuntime::Create(options));
255
256 // build up the structure of the network
257 INetworkPtr net(INetwork::Create());
258
259 IConnectableLayer* input = net->AddInputLayer(0);
260
David Monahan3fb7e102019-08-20 11:25:29 +0100261 ActivationDescriptor descriptor;
262 descriptor.m_Function = ActivationFunction::Square;
263 IConnectableLayer* pooling = net->AddActivationLayer(descriptor);
David Monahan4f1e8e42019-09-04 09:22:10 +0100264
265 IConnectableLayer* output = net->AddOutputLayer(0);
266
David Monahan3fb7e102019-08-20 11:25:29 +0100267 input->GetOutputSlot(0).Connect(pooling->GetInputSlot(0));
268 pooling->GetOutputSlot(0).Connect(output->GetInputSlot(0));
David Monahan4f1e8e42019-09-04 09:22:10 +0100269
Cathal Corbett5b8093c2021-10-22 11:12:07 +0100270 input->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32, 0.0f, 0, true));
David Monahan3fb7e102019-08-20 11:25:29 +0100271 pooling->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32));
David Monahan4f1e8e42019-09-04 09:22:10 +0100272
273 // Optimize the network
Colm Donelan03bf98a2022-05-30 15:20:36 +0100274 OptimizerOptions optimizedOptions;
275 optimizedOptions.m_ImportEnabled = true;
276 optimizedOptions.m_ExportEnabled = true;
277 IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec(), optimizedOptions);
Sadik Armagan1625efc2021-06-10 18:24:34 +0100278 CHECK(optNet);
David Monahan4f1e8e42019-09-04 09:22:10 +0100279
280 // Loads it into the runtime.
281 NetworkId netId;
282 std::string ignoredErrorMessage;
David Monahan3fb7e102019-08-20 11:25:29 +0100283 // Enable Importing and Exporting
Francis Murtagh73d3e2e2021-04-29 14:23:04 +0100284 INetworkProperties networkProperties(false, MemorySource::Malloc, MemorySource::Malloc);
David Monahan4f1e8e42019-09-04 09:22:10 +0100285 runtime->LoadNetwork(netId, std::move(optNet), ignoredErrorMessage, networkProperties);
286
287 // Creates structures for input & output
288 std::vector<float> inputData
289 {
290 1.0f, 2.0f, 3.0f, 4.0f, 5.0f
291 };
292
293 // Aligned input
David Monahan3fb7e102019-08-20 11:25:29 +0100294 float* alignedInputData = inputData.data();
David Monahan4f1e8e42019-09-04 09:22:10 +0100295
296 std::vector<float> outputData(5);
297
Ferran Balaguerdcaa6102019-08-21 13:28:38 +0100298 // Misaligned output
Aron Virginas-Tard9f7c8b2019-09-13 13:37:03 +0100299 float* misalignedOutputData = reinterpret_cast<float*>(reinterpret_cast<char*>(outputData.data()) + 1);
Ferran Balaguerdcaa6102019-08-21 13:28:38 +0100300
301 InputTensors inputTensors
302 {
David Monahan4f1e8e42019-09-04 09:22:10 +0100303 {0,armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), alignedInputData)},
Ferran Balaguerdcaa6102019-08-21 13:28:38 +0100304 };
305 OutputTensors outputTensors
306 {
307 {0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), misalignedOutputData)}
308 };
309
Ferran Balaguer83239f92019-09-19 11:49:25 +0100310 // Do the inference and expect it to fail with a ExportMemoryException
311 if (backends[0] == Compute::CpuAcc)
312 {
313 // For CpuAcc the NeonTensorHandle will throw its own exception on misaligned memory
Sadik Armagan1625efc2021-06-10 18:24:34 +0100314 CHECK_THROWS_AS(runtime->EnqueueWorkload(netId, inputTensors, outputTensors), MemoryImportException);
Ferran Balaguer83239f92019-09-19 11:49:25 +0100315 }
316 else
317 {
Sadik Armagan1625efc2021-06-10 18:24:34 +0100318 CHECK_THROWS_AS(runtime->EnqueueWorkload(netId, inputTensors, outputTensors), MemoryExportException);
Ferran Balaguer83239f92019-09-19 11:49:25 +0100319 }
Ferran Balaguerdcaa6102019-08-21 13:28:38 +0100320}
321
322inline void ImportAlignedPointerTest(std::vector<BackendId> backends)
323{
324 using namespace armnn;
325
326 // Create runtime in which test will run
327 IRuntime::CreationOptions options;
328 IRuntimePtr runtime(armnn::IRuntime::Create(options));
329
330 // build up the structure of the network
331 INetworkPtr net(INetwork::Create());
332
333 IConnectableLayer* input = net->AddInputLayer(0);
334
David Monahan3fb7e102019-08-20 11:25:29 +0100335 ActivationDescriptor descriptor;
336 descriptor.m_Function = ActivationFunction::Square;
337 IConnectableLayer* pooling = net->AddActivationLayer(descriptor);
Ferran Balaguerdcaa6102019-08-21 13:28:38 +0100338
339 IConnectableLayer* output = net->AddOutputLayer(0);
340
David Monahan3fb7e102019-08-20 11:25:29 +0100341 input->GetOutputSlot(0).Connect(pooling->GetInputSlot(0));
342 pooling->GetOutputSlot(0).Connect(output->GetInputSlot(0));
Ferran Balaguerdcaa6102019-08-21 13:28:38 +0100343
Cathal Corbett5b8093c2021-10-22 11:12:07 +0100344 input->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32, 0.0f, 0, true));
David Monahan3fb7e102019-08-20 11:25:29 +0100345 pooling->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32));
Ferran Balaguerdcaa6102019-08-21 13:28:38 +0100346
347 // Optimize the network
Colm Donelan03bf98a2022-05-30 15:20:36 +0100348 OptimizerOptions optimizedOptions;
349 optimizedOptions.m_ImportEnabled = true;
350 optimizedOptions.m_ExportEnabled = true;
351 IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec(), optimizedOptions);
Sadik Armagan1625efc2021-06-10 18:24:34 +0100352 CHECK(optNet);
Ferran Balaguerdcaa6102019-08-21 13:28:38 +0100353
354 // Loads it into the runtime.
355 NetworkId netId;
David Monahan4f1e8e42019-09-04 09:22:10 +0100356 std::string ignoredErrorMessage;
357 // Enable Importing
Francis Murtagh73d3e2e2021-04-29 14:23:04 +0100358 INetworkProperties networkProperties(false, MemorySource::Malloc, MemorySource::Malloc);
David Monahan4f1e8e42019-09-04 09:22:10 +0100359 runtime->LoadNetwork(netId, std::move(optNet), ignoredErrorMessage, networkProperties);
Ferran Balaguerdcaa6102019-08-21 13:28:38 +0100360
361 // Creates structures for input & output
362 std::vector<float> inputData
363 {
364 1.0f, 2.0f, 3.0f, 4.0f
365 };
366
367 std::vector<float> outputData(4);
368
James Conroy57d10b72019-10-25 09:44:14 +0100369 std::vector<float> expectedOutput
370 {
371 1.0f, 4.0f, 9.0f, 16.0f
372 };
373
Ferran Balaguerdcaa6102019-08-21 13:28:38 +0100374 InputTensors inputTensors
375 {
376 {0,armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData.data())},
377 };
378 OutputTensors outputTensors
379 {
380 {0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data())}
381 };
382
Ferran Balaguerdcaa6102019-08-21 13:28:38 +0100383 runtime->GetProfiler(netId)->EnableProfiling(true);
384
385 // Do the inference
386 runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
387
388 // Retrieve the Profiler.Print() output to get the workload execution
389 ProfilerManager& profilerManager = armnn::ProfilerManager::GetInstance();
390 std::stringstream ss;
David Monahan16829712022-02-03 17:04:59 +0000391 profilerManager.GetProfiler()->Print(ss);
Ferran Balaguerdcaa6102019-08-21 13:28:38 +0100392 std::string dump = ss.str();
393
David Monahan3fb7e102019-08-20 11:25:29 +0100394 // Contains ActivationWorkload
395 std::size_t found = dump.find("ActivationWorkload");
Sadik Armagan1625efc2021-06-10 18:24:34 +0100396 CHECK(found != std::string::npos);
James Conroy57d10b72019-10-25 09:44:14 +0100397
Ferran Balaguerdcaa6102019-08-21 13:28:38 +0100398 // Contains SyncMemGeneric
399 found = dump.find("SyncMemGeneric");
Sadik Armagan1625efc2021-06-10 18:24:34 +0100400 CHECK(found != std::string::npos);
James Conroy57d10b72019-10-25 09:44:14 +0100401
Ferran Balaguer83239f92019-09-19 11:49:25 +0100402 // Does not contain CopyMemGeneric
Ferran Balaguerdcaa6102019-08-21 13:28:38 +0100403 found = dump.find("CopyMemGeneric");
Sadik Armagan1625efc2021-06-10 18:24:34 +0100404 CHECK(found == std::string::npos);
James Conroy57d10b72019-10-25 09:44:14 +0100405
406 // Check output is as expected
Sadik Armagan1625efc2021-06-10 18:24:34 +0100407 CHECK(outputData == expectedOutput);
Ferran Balaguerdcaa6102019-08-21 13:28:38 +0100408}
409
Ferran Balaguer83239f92019-09-19 11:49:25 +0100410inline void ImportOnlyWorkload(std::vector<BackendId> backends)
411{
412 using namespace armnn;
413
414 IRuntime::CreationOptions options;
415 IRuntimePtr runtime(IRuntime::Create(options));
416
417 // Builds up the structure of the network.
418 INetworkPtr net(INetwork::Create());
419
420 IConnectableLayer* input = net->AddInputLayer(0);
421
422 ActivationDescriptor descriptor;
423 descriptor.m_Function = ActivationFunction::Square;
424 IConnectableLayer* pooling = net->AddActivationLayer(descriptor);
425
426 IConnectableLayer* output = net->AddOutputLayer(0);
427
428 input->GetOutputSlot(0).Connect(pooling->GetInputSlot(0));
429 pooling->GetOutputSlot(0).Connect(output->GetInputSlot(0));
430
Cathal Corbett5b8093c2021-10-22 11:12:07 +0100431 input->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32, 0.0f, 0, true));
Ferran Balaguer83239f92019-09-19 11:49:25 +0100432 pooling->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32));
433
434 // optimize the network
Colm Donelan03bf98a2022-05-30 15:20:36 +0100435 OptimizerOptions optimizedOptions;
436 optimizedOptions.m_ImportEnabled = true;
437 IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec(), optimizedOptions);
Ferran Balaguer83239f92019-09-19 11:49:25 +0100438
Sadik Armagan1625efc2021-06-10 18:24:34 +0100439 INFO("Load Network");
Ferran Balaguer83239f92019-09-19 11:49:25 +0100440 // Load it into the runtime. It should pass.
441 NetworkId netId;
442 std::string ignoredErrorMessage;
Francis Murtagh73d3e2e2021-04-29 14:23:04 +0100443
444 INetworkProperties networkProperties(false, MemorySource::Malloc, MemorySource::Undefined);
445
Sadik Armagan1625efc2021-06-10 18:24:34 +0100446 CHECK(runtime->LoadNetwork(netId, std::move(optNet),ignoredErrorMessage, networkProperties)
Ferran Balaguer83239f92019-09-19 11:49:25 +0100447 == Status::Success);
448
Sadik Armagan1625efc2021-06-10 18:24:34 +0100449 INFO("Generate Data");
Ferran Balaguer83239f92019-09-19 11:49:25 +0100450 // Creates structures for input & output
451 std::vector<float> inputData
452 {
453 1.0f, 2.0f, 3.0f, 4.0f
454 };
455
456 std::vector<float> outputData(4);
457
458 std::vector<float> expectedOutput
459 {
460 1.0f, 4.0f, 9.0f, 16.0f
461 };
462
David Monahan646bc8a2022-01-31 14:29:14 +0000463 INFO("Create Inference");
Cathal Corbett5b8093c2021-10-22 11:12:07 +0100464
Ferran Balaguer83239f92019-09-19 11:49:25 +0100465 InputTensors inputTensors
466 {
467 {0,armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData.data())},
468 };
469 OutputTensors outputTensors
470 {
471 {0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data())}
472 };
473
Sadik Armagan1625efc2021-06-10 18:24:34 +0100474 INFO("Get Profiler");
Ferran Balaguer83239f92019-09-19 11:49:25 +0100475 runtime->GetProfiler(netId)->EnableProfiling(true);
476
Sadik Armagan1625efc2021-06-10 18:24:34 +0100477 INFO("Run Inference");
Ferran Balaguer83239f92019-09-19 11:49:25 +0100478 // Do the inference
479 runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
480
Sadik Armagan1625efc2021-06-10 18:24:34 +0100481 INFO("Print Profiler");
Ferran Balaguer83239f92019-09-19 11:49:25 +0100482 // Retrieve the Profiler.Print() output to get the workload execution
483 ProfilerManager& profilerManager = armnn::ProfilerManager::GetInstance();
484 std::stringstream ss;
David Monahan16829712022-02-03 17:04:59 +0000485 profilerManager.GetProfiler()->Print(ss);
Ferran Balaguer83239f92019-09-19 11:49:25 +0100486 std::string dump = ss.str();
487
488 // Check there are no SyncMemGeneric workloads as we didn't export
Sadik Armagan1625efc2021-06-10 18:24:34 +0100489 INFO("Find SyncMemGeneric");
Ferran Balaguer83239f92019-09-19 11:49:25 +0100490 int count = SubStringCounter(dump, "SyncMemGeneric");
Sadik Armagan1625efc2021-06-10 18:24:34 +0100491 CHECK(count == 0);
Ferran Balaguer83239f92019-09-19 11:49:25 +0100492
493 // Should only be 1 CopyMemGeneric for the output as we imported
Sadik Armagan1625efc2021-06-10 18:24:34 +0100494 INFO("Find CopyMemGeneric");
Ferran Balaguer83239f92019-09-19 11:49:25 +0100495 count = SubStringCounter(dump, "CopyMemGeneric");
Sadik Armagan1625efc2021-06-10 18:24:34 +0100496 CHECK(count == 1);
Ferran Balaguer83239f92019-09-19 11:49:25 +0100497
498 // Check the output is correct
Sadik Armagan1625efc2021-06-10 18:24:34 +0100499 CHECK(std::equal(outputData.begin(), outputData.end(), expectedOutput.begin(), expectedOutput.end()));
Ferran Balaguer83239f92019-09-19 11:49:25 +0100500}
501
502inline void ExportOnlyWorkload(std::vector<BackendId> backends)
503{
504 using namespace armnn;
505
506 IRuntime::CreationOptions options;
507 IRuntimePtr runtime(IRuntime::Create(options));
508
509 // Builds up the structure of the network.
510 INetworkPtr net(INetwork::Create());
511
512 IConnectableLayer* input = net->AddInputLayer(0);
513
514 ActivationDescriptor descriptor;
515 descriptor.m_Function = ActivationFunction::Square;
516 IConnectableLayer* pooling = net->AddActivationLayer(descriptor);
517
518 IConnectableLayer* output = net->AddOutputLayer(0);
519
520 input->GetOutputSlot(0).Connect(pooling->GetInputSlot(0));
521 pooling->GetOutputSlot(0).Connect(output->GetInputSlot(0));
522
Cathal Corbett5b8093c2021-10-22 11:12:07 +0100523 input->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32, 0.0f, 0, true));
Ferran Balaguer83239f92019-09-19 11:49:25 +0100524 pooling->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32));
525
526 // optimize the network
Colm Donelan03bf98a2022-05-30 15:20:36 +0100527 OptimizerOptions optimizedOptions;
528 optimizedOptions.m_ExportEnabled = true;
529 IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec(), optimizedOptions);
Ferran Balaguer83239f92019-09-19 11:49:25 +0100530
Sadik Armagan1625efc2021-06-10 18:24:34 +0100531 INFO("Load Network");
Ferran Balaguer83239f92019-09-19 11:49:25 +0100532 // Load it into the runtime. It should pass.
533 NetworkId netId;
534 std::string ignoredErrorMessage;
Francis Murtagh73d3e2e2021-04-29 14:23:04 +0100535 INetworkProperties networkProperties(false, MemorySource::Undefined, MemorySource::Malloc);
Sadik Armagan1625efc2021-06-10 18:24:34 +0100536 CHECK(runtime->LoadNetwork(netId, std::move(optNet),ignoredErrorMessage, networkProperties)
Ferran Balaguer83239f92019-09-19 11:49:25 +0100537 == Status::Success);
538
Sadik Armagan1625efc2021-06-10 18:24:34 +0100539 INFO("Generate Data");
Ferran Balaguer83239f92019-09-19 11:49:25 +0100540 // Creates structures for input & output
541 std::vector<float> inputData
542 {
543 1.0f, 2.0f, 3.0f, 4.0f
544 };
545
546 std::vector<float> outputData(4);
547
548 std::vector<float> expectedOutput
549 {
550 1.0f, 4.0f, 9.0f, 16.0f
551 };
552
David Monahan646bc8a2022-01-31 14:29:14 +0000553 INFO("Create Inference");
Cathal Corbett5b8093c2021-10-22 11:12:07 +0100554
Ferran Balaguer83239f92019-09-19 11:49:25 +0100555 InputTensors inputTensors
556 {
557 {0,armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData.data())},
558 };
559 OutputTensors outputTensors
560 {
561 {0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data())}
562 };
563
Sadik Armagan1625efc2021-06-10 18:24:34 +0100564 INFO("Get Profiler");
Ferran Balaguer83239f92019-09-19 11:49:25 +0100565 runtime->GetProfiler(netId)->EnableProfiling(true);
566
Sadik Armagan1625efc2021-06-10 18:24:34 +0100567 INFO("Run Inference");
Ferran Balaguer83239f92019-09-19 11:49:25 +0100568 // Do the inference
569 runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
570
Sadik Armagan1625efc2021-06-10 18:24:34 +0100571 INFO("Print Profiler");
Ferran Balaguer83239f92019-09-19 11:49:25 +0100572 // Retrieve the Profiler.Print() output to get the workload execution
573 ProfilerManager& profilerManager = armnn::ProfilerManager::GetInstance();
574 std::stringstream ss;
David Monahan16829712022-02-03 17:04:59 +0000575 profilerManager.GetProfiler()->Print(ss);
Ferran Balaguer83239f92019-09-19 11:49:25 +0100576 std::string dump = ss.str();
577
578 // Check there is a SyncMemGeneric workload as we exported
Sadik Armagan1625efc2021-06-10 18:24:34 +0100579 INFO("Find SyncMemGeneric");
Ferran Balaguer83239f92019-09-19 11:49:25 +0100580 int count = SubStringCounter(dump, "SyncMemGeneric");
Sadik Armagan1625efc2021-06-10 18:24:34 +0100581 CHECK(count == 1);
Ferran Balaguer83239f92019-09-19 11:49:25 +0100582
583 // Should be 1 CopyMemGeneric for the output as we did not import
Sadik Armagan1625efc2021-06-10 18:24:34 +0100584 INFO("Find CopyMemGeneric");
Ferran Balaguer83239f92019-09-19 11:49:25 +0100585 count = SubStringCounter(dump, "CopyMemGeneric");
Sadik Armagan1625efc2021-06-10 18:24:34 +0100586 CHECK(count == 1);
Ferran Balaguer83239f92019-09-19 11:49:25 +0100587
588 // Check the output is correct
Sadik Armagan1625efc2021-06-10 18:24:34 +0100589 CHECK(std::equal(outputData.begin(), outputData.end(), expectedOutput.begin(), expectedOutput.end()));
Ferran Balaguer83239f92019-09-19 11:49:25 +0100590}
591
592inline void ImportAndExportWorkload(std::vector<BackendId> backends)
593{
594 using namespace armnn;
595
596 IRuntime::CreationOptions options;
597 IRuntimePtr runtime(IRuntime::Create(options));
598
599 // Builds up the structure of the network.
600 INetworkPtr net(INetwork::Create());
601
602 IConnectableLayer* input = net->AddInputLayer(0);
603
604 ActivationDescriptor descriptor;
605 descriptor.m_Function = ActivationFunction::Square;
606 IConnectableLayer* pooling = net->AddActivationLayer(descriptor);
607
608 IConnectableLayer* output = net->AddOutputLayer(0);
609
610 input->GetOutputSlot(0).Connect(pooling->GetInputSlot(0));
611 pooling->GetOutputSlot(0).Connect(output->GetInputSlot(0));
612
Cathal Corbett5b8093c2021-10-22 11:12:07 +0100613 input->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32, 0.0f, 0, true));
Ferran Balaguer83239f92019-09-19 11:49:25 +0100614 pooling->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32));
615
Colm Donelan03bf98a2022-05-30 15:20:36 +0100616 OptimizerOptions optimizedOptions;
617 optimizedOptions.m_ImportEnabled = true;
618 optimizedOptions.m_ExportEnabled = true;
619 IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec(), optimizedOptions);
Ferran Balaguer83239f92019-09-19 11:49:25 +0100620
Sadik Armagan1625efc2021-06-10 18:24:34 +0100621 INFO("Load Network");
Ferran Balaguer83239f92019-09-19 11:49:25 +0100622 // Load it into the runtime. It should pass.
623 NetworkId netId;
624 std::string ignoredErrorMessage;
Francis Murtagh73d3e2e2021-04-29 14:23:04 +0100625
626 INetworkProperties networkProperties(false, MemorySource::Malloc, MemorySource::Malloc);
627
Sadik Armagan1625efc2021-06-10 18:24:34 +0100628 CHECK(runtime->LoadNetwork(netId, std::move(optNet),ignoredErrorMessage, networkProperties)
Ferran Balaguer83239f92019-09-19 11:49:25 +0100629 == Status::Success);
630
Sadik Armagan1625efc2021-06-10 18:24:34 +0100631 INFO("Generate Data");
Ferran Balaguer83239f92019-09-19 11:49:25 +0100632 // Creates structures for input & output
633 std::vector<float> inputData
634 {
635 1.0f, 2.0f, 3.0f, 4.0f
636 };
637
638 std::vector<float> outputData(4);
639
640 std::vector<float> expectedOutput
641 {
642 1.0f, 4.0f, 9.0f, 16.0f
643 };
644
David Monahan646bc8a2022-01-31 14:29:14 +0000645 INFO("Create inference");
Cathal Corbett5b8093c2021-10-22 11:12:07 +0100646
Ferran Balaguer83239f92019-09-19 11:49:25 +0100647 InputTensors inputTensors
648 {
649 {0,armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData.data())},
650 };
651 OutputTensors outputTensors
652 {
653 {0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data())}
654 };
655
Sadik Armagan1625efc2021-06-10 18:24:34 +0100656 INFO("Get Profiler");
Ferran Balaguer83239f92019-09-19 11:49:25 +0100657 runtime->GetProfiler(netId)->EnableProfiling(true);
658
Sadik Armagan1625efc2021-06-10 18:24:34 +0100659 INFO("Run Inference");
Ferran Balaguer83239f92019-09-19 11:49:25 +0100660 // Do the inference
661 runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
662
Sadik Armagan1625efc2021-06-10 18:24:34 +0100663 INFO("Print Profiler");
Ferran Balaguer83239f92019-09-19 11:49:25 +0100664 // Retrieve the Profiler.Print() output to get the workload execution
665 ProfilerManager& profilerManager = armnn::ProfilerManager::GetInstance();
666 std::stringstream ss;
David Monahan16829712022-02-03 17:04:59 +0000667 profilerManager.GetProfiler()->Print(ss);
Ferran Balaguer83239f92019-09-19 11:49:25 +0100668 std::string dump = ss.str();
669
670 // Check there is a SyncMemGeneric workload as we exported
Sadik Armagan1625efc2021-06-10 18:24:34 +0100671 INFO("Find SyncMemGeneric");
Ferran Balaguer83239f92019-09-19 11:49:25 +0100672 int count = SubStringCounter(dump, "SyncMemGeneric");
Sadik Armagan1625efc2021-06-10 18:24:34 +0100673 CHECK(count == 1);
Ferran Balaguer83239f92019-09-19 11:49:25 +0100674
675 // Shouldn't be any CopyMemGeneric workloads
Sadik Armagan1625efc2021-06-10 18:24:34 +0100676 INFO("Find CopyMemGeneric");
Ferran Balaguer83239f92019-09-19 11:49:25 +0100677 count = SubStringCounter(dump, "CopyMemGeneric");
Sadik Armagan1625efc2021-06-10 18:24:34 +0100678 CHECK(count == 0);
Ferran Balaguer83239f92019-09-19 11:49:25 +0100679
680 // Check the output is correct
Sadik Armagan1625efc2021-06-10 18:24:34 +0100681 CHECK(std::equal(outputData.begin(), outputData.end(), expectedOutput.begin(), expectedOutput.end()));
Ferran Balaguer83239f92019-09-19 11:49:25 +0100682}
683
684inline void ExportOutputWithSeveralOutputSlotConnectionsTest(std::vector<BackendId> backends)
685{
686 using namespace armnn;
687
688 // Create runtime in which test will run
689 IRuntime::CreationOptions options;
690 IRuntimePtr runtime(armnn::IRuntime::Create(options));
691
692 // build up the structure of the network
693 INetworkPtr net(INetwork::Create());
694
695 IConnectableLayer* input = net->AddInputLayer(0);
696
697 ActivationDescriptor descriptor;
698 descriptor.m_Function = ActivationFunction::Square;
699 IConnectableLayer* activation = net->AddActivationLayer(descriptor);
700
701 IConnectableLayer* output0 = net->AddOutputLayer(0);
702 IConnectableLayer* output1 = net->AddOutputLayer(1);
703
704 input->GetOutputSlot(0).Connect(activation->GetInputSlot(0));
705 activation->GetOutputSlot(0).Connect(output0->GetInputSlot(0));
706 activation->GetOutputSlot(0).Connect(output1->GetInputSlot(0));
707
Cathal Corbett5b8093c2021-10-22 11:12:07 +0100708 input->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 4, 1 }, DataType::Float32, 0.0f, 0, true));
Ferran Balaguer83239f92019-09-19 11:49:25 +0100709 activation->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 4, 1 }, DataType::Float32));
710
711 // Optimize the network
Colm Donelan03bf98a2022-05-30 15:20:36 +0100712 OptimizerOptions optimizedOptions;
713 optimizedOptions.m_ImportEnabled = true;
714 optimizedOptions.m_ExportEnabled = true;
715 IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec(), optimizedOptions);
Ferran Balaguer83239f92019-09-19 11:49:25 +0100716
717 // Loads it into the runtime.
718 NetworkId netId;
719 std::string ignoredErrorMessage;
720 // Enable Importing
Francis Murtagh73d3e2e2021-04-29 14:23:04 +0100721 INetworkProperties networkProperties(false, MemorySource::Malloc, MemorySource::Malloc);
Ferran Balaguer83239f92019-09-19 11:49:25 +0100722 runtime->LoadNetwork(netId, std::move(optNet), ignoredErrorMessage, networkProperties);
723
724 // Creates structures for input & output
725 std::vector<float> inputData
726 {
727 1.0f, 2.0f, 3.0f, 4.0f
728 };
729
730 std::vector<float> outputData0(4);
731 std::vector<float> outputData1(4);
732
Narumol Prangnawarat3b90af62020-06-26 11:00:21 +0100733 std::vector<float> expectedOutput
734 {
735 1.0f, 4.0f, 9.0f, 16.0f
736 };
737
Ferran Balaguer83239f92019-09-19 11:49:25 +0100738 InputTensors inputTensors
739 {
740 {0,armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData.data())},
741 };
742 OutputTensors outputTensors
743 {
744 {0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData0.data())},
745 {1,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 1), outputData1.data())}
746 };
747
748 // The result of the inference is not important, just the fact that there
749 // should not be CopyMemGeneric workloads.
750 runtime->GetProfiler(netId)->EnableProfiling(true);
751
752 // Do the inference
753 runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
754
755 // Retrieve the Profiler.Print() output to get the workload execution
756 ProfilerManager& profilerManager = armnn::ProfilerManager::GetInstance();
757 std::stringstream ss;
758 profilerManager.GetProfiler()->Print(ss);
759 std::string dump = ss.str();
760
761 std::size_t found = std::string::npos;
762
763 if (backends[0] == Compute::CpuRef)
764 {
765 found = dump.find("RefActivationWorkload");
766 }
767 else if (backends[0] == Compute::CpuAcc)
768 {
769 found = dump.find("NeonActivationWorkload");
770 }
771 else if (backends[0] == Compute::GpuAcc)
772 {
773 found = dump.find("ClActivationWorkload");
774 }
775
Sadik Armagan1625efc2021-06-10 18:24:34 +0100776 CHECK(found != std::string::npos);
Ferran Balaguer83239f92019-09-19 11:49:25 +0100777 // No contains SyncMemGeneric
778 found = dump.find("SyncMemGeneric");
Sadik Armagan1625efc2021-06-10 18:24:34 +0100779 CHECK(found == std::string::npos);
Ferran Balaguer83239f92019-09-19 11:49:25 +0100780 // Contains CopyMemGeneric
781 found = dump.find("CopyMemGeneric");
Sadik Armagan1625efc2021-06-10 18:24:34 +0100782 CHECK(found != std::string::npos);
Narumol Prangnawarat3b90af62020-06-26 11:00:21 +0100783
784 // Check that the outputs are correct
Sadik Armagan1625efc2021-06-10 18:24:34 +0100785 CHECK(std::equal(outputData0.begin(), outputData0.end(),
786 expectedOutput.begin(), expectedOutput.end()));
787 CHECK(std::equal(outputData1.begin(), outputData1.end(),
788 expectedOutput.begin(), expectedOutput.end()));
Ferran Balaguer83239f92019-09-19 11:49:25 +0100789}
790
David Monahan0a99a142020-03-13 07:52:54 +0000791inline void StridedSliceInvalidSliceEndToEndTest(std::vector<BackendId> backends)
792{
793 using namespace armnn;
794
795 // Create runtime in which test will run
796 IRuntime::CreationOptions options;
797 IRuntimePtr runtime(armnn::IRuntime::Create(options));
798
799 // build up the structure of the network
800 INetworkPtr net(INetwork::Create());
801
802 IConnectableLayer* input = net->AddInputLayer(0);
803
804 // Configure a strided slice with a stride the same size as the input but with a ShrinkAxisMask on the first
805 // dim of the output to make it too small to hold the specified slice.
806 StridedSliceDescriptor descriptor;
807 descriptor.m_Begin = {0, 0};
808 descriptor.m_End = {2, 3};
809 descriptor.m_Stride = {1, 1};
810 descriptor.m_BeginMask = 0;
811 descriptor.m_EndMask = 0;
812 descriptor.m_ShrinkAxisMask = 1;
813 IConnectableLayer* stridedSlice = net->AddStridedSliceLayer(descriptor);
814
815 IConnectableLayer* output0 = net->AddOutputLayer(0);
816
817 input->GetOutputSlot(0).Connect(stridedSlice->GetInputSlot(0));
818 stridedSlice->GetOutputSlot(0).Connect(output0->GetInputSlot(0));
819
Cathal Corbett5b8093c2021-10-22 11:12:07 +0100820 input->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 2, 3 }, DataType::Float32, 0.0f, 0, true));
David Monahan0a99a142020-03-13 07:52:54 +0000821 stridedSlice->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 3 }, DataType::Float32));
822
823 // Attempt to optimize the network and check that the correct exception is thrown
Sadik Armagan1625efc2021-06-10 18:24:34 +0100824 CHECK_THROWS_AS(Optimize(*net, backends, runtime->GetDeviceSpec()), armnn::LayerValidationException);
David Monahan0a99a142020-03-13 07:52:54 +0000825}
826
David Monahan646bc8a2022-01-31 14:29:14 +0000827inline void ForceImportWithAlignedBuffersEndToEndTest(std::vector<BackendId> backends)
828{
829 /**
830 * This test is similar to the Import tests above, we create a network with a square function and pass in a vector
831 * with 4 floats, square them. and validate the output. We then check the profiling logs to see if input/output
832 * tensors are copied (CopyMemGeneric) or imported (SyncMemGeneric)
833 * In this case all inputs and outputs should be imported
834 */
835 using namespace armnn;
836 IRuntime::CreationOptions options;
837 IRuntimePtr runtime(IRuntime::Create(options));
838
839 // Builds up the structure of the network.
840 INetworkPtr net(INetwork::Create());
841 IConnectableLayer* input = net->AddInputLayer(0);
842 ActivationDescriptor descriptor;
843 descriptor.m_Function = ActivationFunction::Square;
844 IConnectableLayer* activationLayer = net->AddActivationLayer(descriptor);
845 IConnectableLayer* output = net->AddOutputLayer(0);
846 input->GetOutputSlot(0).Connect(activationLayer->GetInputSlot(0));
847 activationLayer->GetOutputSlot(0).Connect(output->GetInputSlot(0));
848 input->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32, 0.0f, 0, true));
849 activationLayer->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32));
850 IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec());
851 INFO("Load Network");
852
853 // Load it into the runtime. It should pass.
854 NetworkId netId;
855 std::string ignoredErrorMessage;
856 INetworkProperties networkProperties(false, MemorySource::Undefined, MemorySource::Undefined);
857 CHECK(runtime->LoadNetwork(netId, std::move(optNet),ignoredErrorMessage, networkProperties)
858 == Status::Success);
859 INFO("Generate Data");
860
861 // Creates structures for input & output
862 std::vector<float> inputData
863 {
864 1.0f, 2.0f, 3.0f, 4.0f
865 };
866 std::vector<float> outputData(4);
867 std::vector<float> expectedOutput
868 {
869 1.0f, 4.0f, 9.0f, 16.0f
870 };
871
872 // Check our input and output pointers are actually aligned
873 uintptr_t alignment = GetDataTypeSize(DataType::Float32);
874 CHECK(!(reinterpret_cast<uintptr_t>(inputData.data()) % alignment));
875 CHECK(!(reinterpret_cast<uintptr_t>(outputData.data()) % alignment));
876
877 INFO("Create Inference");
878 InputTensors inputTensors
879 {
880 {0,armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData.data())},
881 };
882 OutputTensors outputTensors
883 {
884 {0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data())}
885 };
886
887 runtime->GetProfiler(netId)->EnableProfiling(true);
888 std::vector<ImportedInputId> importedInputIds =
889 runtime->ImportInputs(netId, inputTensors, MemorySource::Malloc);
890 std::vector<ImportedOutputId> importedOutputIds =
891 runtime->ImportOutputs(netId, outputTensors, MemorySource::Malloc);
892 // Do the inference and force the import as the memory is aligned.
893 runtime->EnqueueWorkload(netId, inputTensors, outputTensors, importedInputIds, importedOutputIds);
894
895 // Retrieve the Profiler.Print() output to get the workload execution
896 ProfilerManager& profilerManager = armnn::ProfilerManager::GetInstance();
897 std::stringstream ss;
David Monahan16829712022-02-03 17:04:59 +0000898 profilerManager.GetProfiler()->Print(ss);
David Monahan646bc8a2022-01-31 14:29:14 +0000899 std::string dump = ss.str();
900
901 if (backends[0] == Compute::CpuAcc)
902 {
903 // Reconfigure has not been implemented for CpuAcc so it will always copy, this will break whenever
904 // reconfigure is implemented
905 int count = SubStringCounter(dump, "SyncMemGeneric");
906 CHECK(count == 0);
907 // Should be 2 CopyMemGeneric workloads
908 count = SubStringCounter(dump, "CopyMemGeneric");
909 CHECK(count == 2);
910 }
911 else
912 {
913 // Check there is a SyncMemGeneric workload as we exported
914 int count = SubStringCounter(dump, "SyncMemGeneric");
915 CHECK(count == 1);
916 // Shouldn't be any CopyMemGeneric workloads
917 count = SubStringCounter(dump, "CopyMemGeneric");
918 CHECK(count == 0);
919 }
920 // Check the output is correct
921 CHECK(std::equal(outputData.begin(), outputData.end(), expectedOutput.begin(), expectedOutput.end()));
922}
923
924inline void ForceImportWithMisalignedInputBuffersEndToEndTest(std::vector<BackendId> backends)
925{
926 /**
927 * This test is similar to the Import tests above, we create a network with a square function and pass in a vector
928 * with 4 floats, square them. and validate the output. We then check the profiling logs to see if input/output
929 * tensors are copied (CopyMemGeneric) or imported (SyncMemGeneric)
930 * In this case all only the output should be imported
931 */
932 using namespace armnn;
933
934 IRuntime::CreationOptions options;
935 IRuntimePtr runtime(IRuntime::Create(options));
936
937 // Builds up the structure of the network.
938 INetworkPtr net(INetwork::Create());
939 IConnectableLayer* input = net->AddInputLayer(0);
940
941 ActivationDescriptor descriptor;
942 descriptor.m_Function = ActivationFunction::Square;
943 IConnectableLayer* activationLayer = net->AddActivationLayer(descriptor);
944
945 IConnectableLayer* output = net->AddOutputLayer(0);
946
947 input->GetOutputSlot(0).Connect(activationLayer->GetInputSlot(0));
948 activationLayer->GetOutputSlot(0).Connect(output->GetInputSlot(0));
949 input->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32, 0.0f, 0, true));
950 activationLayer->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32));
951
952 IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec());
953 INFO("Load Network");
954 // Load it into the runtime. It should pass.
955 NetworkId netId;
956 std::string ignoredErrorMessage;
957 INetworkProperties networkProperties(false, MemorySource::Undefined, MemorySource::Undefined);
958 CHECK(runtime->LoadNetwork(netId, std::move(optNet),ignoredErrorMessage, networkProperties)
959 == Status::Success);
960 INFO("Generate Data");
961
962 // This code looks a little funky but the idea is to create a buffer of floats but offset by the size of a char
963 // this will guarantee that the resultant buffer is misaligned and thus should always be copied.
964 auto memPtr = std::malloc(4 * sizeof(float) + sizeof(char));
965
966 float* misalignedMemPtr = reinterpret_cast<float*>(reinterpret_cast<char*>(memPtr) + 1);
967
968 // Check if our pointer is truly misaligned
969 uintptr_t alignment = GetDataTypeSize(DataType::Float32);
970 CHECK (reinterpret_cast<uintptr_t>(misalignedMemPtr) % alignment);
971
Matthew Benthamc92bbd72022-02-10 11:12:34 +0000972 std::vector<float> inputData
David Monahan646bc8a2022-01-31 14:29:14 +0000973 {
Matthew Benthamc92bbd72022-02-10 11:12:34 +0000974 1.0f, 2.0f, 3.0f, 4.0f
975 };
976
977 std::memcpy(misalignedMemPtr, inputData.data(), 4*sizeof(float));
David Monahan646bc8a2022-01-31 14:29:14 +0000978
979 std::vector<float> outputData(4);
980 // Check our output buffer is aligned
981 CHECK(!(reinterpret_cast<uintptr_t>(outputData.data()) % alignment));
982
983 std::vector<float> expectedOutput
984 {
985 1.0f, 4.0f, 9.0f, 16.0f
986 };
987
988 INFO("Create Inference");
989 InputTensors inputTensors
990 {
991 {0,armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), misalignedMemPtr)},
992 };
993 OutputTensors outputTensors
994 {
995 {0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data())}
996 };
997 runtime->GetProfiler(netId)->EnableProfiling(true);
998 std::vector<ImportedInputId> importedInputIds =
999 runtime->ImportInputs(netId, inputTensors, MemorySource::Malloc);
1000 std::vector<ImportedOutputId> importedOutputIds =
1001 runtime->ImportOutputs(netId, outputTensors, MemorySource::Malloc);
1002
1003 // Do the inference and force the import as the memory is misaligned.
1004 runtime->EnqueueWorkload(netId, inputTensors, outputTensors, importedInputIds, importedOutputIds);
1005
1006 // Retrieve the Profiler.Print() output to get the workload execution
1007 ProfilerManager& profilerManager = armnn::ProfilerManager::GetInstance();
1008 std::stringstream ss;
David Monahan16829712022-02-03 17:04:59 +00001009 profilerManager.GetProfiler()->Print(ss);
David Monahan646bc8a2022-01-31 14:29:14 +00001010 std::string dump = ss.str();
1011
1012 // GpuAcc is a different case to CpuRef and CpuAcc, it doesn't use the buffer directly but instead maps it to a
1013 // new set of addresses within Gpu Memory. This will almost always be auto-aligned, so we don't need to check
1014 // for imports/copies. Only that the output is correct.
1015 if (backends[0] != Compute::GpuAcc)
1016 {
1017 if (backends[0] == Compute::CpuAcc)
1018 {
1019 // Reconfigure has not been implemented for CpuAcc so it will always copy, this will break whenever
1020 // reconfigure is implemented
1021 // We should get 0 SyncMemGeneric for the Output
1022 int count = SubStringCounter(dump, "SyncMemGeneric");
1023 CHECK(count == 0);
1024 // Should be 2 CopyMemGeneric as we copied the input
1025 count = SubStringCounter(dump, "CopyMemGeneric");
1026 CHECK(count == 2);
1027 }
1028 else
1029 {
1030 // We should get 1 SyncMemGeneric for the Output
1031 int count = SubStringCounter(dump, "SyncMemGeneric");
1032 CHECK(count == 1);
1033 // Should only be 1 CopyMemGeneric as we copied the input
1034 count = SubStringCounter(dump, "CopyMemGeneric");
1035 CHECK(count == 1);
1036 }
1037 }
1038 // Check the output is correct
1039 CHECK(std::equal(outputData.begin(), outputData.end(), expectedOutput.begin(), expectedOutput.end()));
1040 std::free(memPtr);
1041}
1042
1043inline void ForceImportWithMisalignedOutputBuffersEndToEndTest(std::vector<BackendId> backends)
1044{
1045 /**
1046 * This test is similar to the Import tests above, we create a network with a square function and pass in a vector
1047 * with 4 floats, square them. and validate the output. We then check the profiling logs to see if input/output
1048 * tensors are copied (CopyMemGeneric) or imported (SyncMemGeneric)
1049 * In this case all only the input should be imported
1050 */
1051 using namespace armnn;
1052
1053 IRuntime::CreationOptions options;
1054 IRuntimePtr runtime(IRuntime::Create(options));
1055
1056 // Builds up the structure of the network.
1057 INetworkPtr net(INetwork::Create());
1058 IConnectableLayer* input = net->AddInputLayer(0);
1059
1060 ActivationDescriptor descriptor;
1061 descriptor.m_Function = ActivationFunction::Square;
1062 IConnectableLayer* activationLayer = net->AddActivationLayer(descriptor);
1063
1064 IConnectableLayer* output = net->AddOutputLayer(0);
1065
1066 input->GetOutputSlot(0).Connect(activationLayer->GetInputSlot(0));
1067 activationLayer->GetOutputSlot(0).Connect(output->GetInputSlot(0));
1068 input->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32, 0.0f, 0, true));
1069 activationLayer->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32));
1070
1071 IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec());
1072 INFO("Load Network");
1073 // Load it into the runtime. It should pass.
1074 NetworkId netId;
1075 std::string ignoredErrorMessage;
1076 INetworkProperties networkProperties(false, MemorySource::Undefined, MemorySource::Undefined);
1077 CHECK(runtime->LoadNetwork(netId, std::move(optNet),ignoredErrorMessage, networkProperties)
1078 == Status::Success);
1079 INFO("Generate Data");
1080
1081 // This code looks a little funky but the idea is to create a buffer of floats but offset by the size of a char
1082 // this will guarantee that the resultant buffer is misaligned and thus should always be copied.
1083 auto memPtr = std::malloc(4 * sizeof(float) + sizeof(char));
1084
1085 float* misalignedMemPtr = reinterpret_cast<float*>(reinterpret_cast<char*>(memPtr) + 1);
1086
1087 // Check if our pointer is truly misaligned
1088 uintptr_t alignment = GetDataTypeSize(DataType::Float32);
1089 CHECK (reinterpret_cast<uintptr_t>(misalignedMemPtr) % alignment);
1090
1091 // Creates structures for input & output
1092 std::vector<float> inputData
1093 {
1094 1.0f, 2.0f, 3.0f, 4.0f
1095 };
1096
1097 // Check our input buffer is aligned
1098 CHECK(!(reinterpret_cast<uintptr_t>(inputData.data()) % alignment));
1099 std::vector<float> expectedOutput
1100 {
1101 1.0f, 4.0f, 9.0f, 16.0f
1102 };
1103
1104 INFO("Create Inference");
1105 InputTensors inputTensors
1106 {
1107 {0,armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData.data())},
1108 };
1109 OutputTensors outputTensors
1110 {
1111 {0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), misalignedMemPtr)}
1112 };
1113 runtime->GetProfiler(netId)->EnableProfiling(true);
1114 std::vector<ImportedInputId> importedInputIds =
1115 runtime->ImportInputs(netId, inputTensors, MemorySource::Malloc);
1116 std::vector<ImportedOutputId> importedOutputIds =
1117 runtime->ImportOutputs(netId, outputTensors, MemorySource::Malloc);
1118
1119 // Do the inference and force the import as the memory is misaligned.
1120 runtime->EnqueueWorkload(netId, inputTensors, outputTensors, importedInputIds, importedOutputIds);
1121
1122 // Retrieve the Profiler.Print() output to get the workload execution
1123 ProfilerManager& profilerManager = armnn::ProfilerManager::GetInstance();
1124 std::stringstream ss;
David Monahan16829712022-02-03 17:04:59 +00001125 profilerManager.GetProfiler()->Print(ss);
David Monahan646bc8a2022-01-31 14:29:14 +00001126 std::string dump = ss.str();
1127
1128 // GpuAcc is a different case to CpuRef and CpuAcc, it doesn't use the buffer directly but instead maps it to a
1129 // new set of addresses within Gpu Memory. This will almost always be auto-aligned, so we don't need to check
1130 // for imports/copies. Only that the output is correct.
1131 if (backends[0] != Compute::GpuAcc)
1132 {
1133 // Even though we Imported the Input we still shouldn't have a SyncMemGeneric
1134 int count = SubStringCounter(dump, "SyncMemGeneric");
1135 CHECK(count == 0);
1136 // Should only be 1 CopyMemGeneric as we copied the input
1137 count = SubStringCounter(dump, "CopyMemGeneric");
1138 if (backends[0] == Compute::CpuAcc)
1139 {
1140 // Reconfigure has not been implemented for CpuAcc so it will always copy, this will break whenever
1141 // reconfigure is implemented
1142 CHECK(count == 2);
1143 }
1144 else
1145 {
1146 CHECK(count == 1);
1147 }
1148 // Check the output is correct
1149 }
1150 unsigned int index = 0;
David Monahaneef6b762022-02-10 16:01:58 +00001151 std::vector<float> outputData(expectedOutput.size(), 0);
1152 std::memcpy(outputData.data(), misalignedMemPtr, expectedOutput.size() * sizeof(float));
David Monahan646bc8a2022-01-31 14:29:14 +00001153 for (auto outputValue : expectedOutput)
1154 {
David Monahaneef6b762022-02-10 16:01:58 +00001155 CHECK(outputValue == outputData[index]);
David Monahan646bc8a2022-01-31 14:29:14 +00001156 ++index;
1157 }
1158 std::free(memPtr);
1159}
1160
1161inline void ForceImportWithMisalignedInputAndOutputBuffersEndToEndTest(std::vector<BackendId> backends)
1162{
1163 /**
1164 * This test is similar to the Import tests above, we create a network with a square function and pass in a vector
1165 * with 4 floats, square them. and validate the output. We then check the profiling logs to see if input/output
1166 * tensors are copied (CopyMemGeneric) or imported (SyncMemGeneric)
1167 * In this case all inputs and outputs should be copied
1168 */
1169 using namespace armnn;
1170
1171 IRuntime::CreationOptions options;
1172 IRuntimePtr runtime(IRuntime::Create(options));
1173
1174 // Builds up the structure of the network.
1175 INetworkPtr net(INetwork::Create());
1176 IConnectableLayer* input = net->AddInputLayer(0);
1177
1178 ActivationDescriptor descriptor;
1179 descriptor.m_Function = ActivationFunction::Square;
1180 IConnectableLayer* activationLayer = net->AddActivationLayer(descriptor);
1181
1182 IConnectableLayer* output = net->AddOutputLayer(0);
1183
1184 input->GetOutputSlot(0).Connect(activationLayer->GetInputSlot(0));
1185 activationLayer->GetOutputSlot(0).Connect(output->GetInputSlot(0));
1186 input->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32, 0.0f, 0, true));
1187 activationLayer->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32));
1188
1189 IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec());
1190 INFO("Load Network");
1191 // Load it into the runtime. It should pass.
1192 NetworkId netId;
1193 std::string ignoredErrorMessage;
1194 INetworkProperties networkProperties(false, MemorySource::Undefined, MemorySource::Undefined);
1195 CHECK(runtime->LoadNetwork(netId, std::move(optNet),ignoredErrorMessage, networkProperties)
1196 == Status::Success);
1197 INFO("Generate Data");
1198
1199 // This code looks a little funky but the idea is to create a buffer of floats but offset by the size of a char
1200 // this will guarantee that the resultant buffer is misaligned and thus should always be copied.
1201 auto inputMemPtr = std::malloc(4 * sizeof(float) + sizeof(char));
1202 float* misalignedInputPtr = reinterpret_cast<float*>(reinterpret_cast<char*>(inputMemPtr) + 1);
1203
1204 // Check if our pointer is truly misaligned
1205 uintptr_t alignment = GetDataTypeSize(DataType::Float32);
1206 CHECK (reinterpret_cast<uintptr_t>(misalignedInputPtr) % alignment);
Matthew Benthamc92bbd72022-02-10 11:12:34 +00001207 std::vector<float> inputData
David Monahan646bc8a2022-01-31 14:29:14 +00001208 {
Matthew Benthamc92bbd72022-02-10 11:12:34 +00001209 1.0f, 2.0f, 3.0f, 4.0f
1210 };
1211 std::memcpy(misalignedInputPtr, inputData.data(), 4*sizeof(float));
David Monahan646bc8a2022-01-31 14:29:14 +00001212
1213 auto outputMemPtr = std::malloc(4 * sizeof(float) + sizeof(char));
1214 float* misalignedOutputPtr = reinterpret_cast<float*>(reinterpret_cast<char*>(outputMemPtr) + 1);
1215
1216 // Check if our pointer is truly misaligned
1217 CHECK (reinterpret_cast<uintptr_t>(misalignedOutputPtr) % alignment);
1218
1219 std::vector<float> expectedOutput
1220 {
1221 1.0f, 4.0f, 9.0f, 16.0f
1222 };
1223
1224 INFO("Create Inference");
1225 InputTensors inputTensors
1226 {
1227 {0,armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), misalignedInputPtr)},
1228 };
1229 OutputTensors outputTensors
1230 {
1231 {0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), misalignedOutputPtr)}
1232 };
1233 runtime->GetProfiler(netId)->EnableProfiling(true);
1234 std::vector<ImportedInputId> importedInputIds =
1235 runtime->ImportInputs(netId, inputTensors, MemorySource::Malloc);
1236 std::vector<ImportedOutputId> importedOutputIds =
1237 runtime->ImportOutputs(netId, outputTensors, MemorySource::Malloc);
1238
1239 // Do the inference and force the import as the memory is misaligned.
1240 runtime->EnqueueWorkload(netId, inputTensors, outputTensors, importedInputIds, importedOutputIds);
1241
1242 // Retrieve the Profiler.Print() output to get the workload execution
1243 ProfilerManager& profilerManager = armnn::ProfilerManager::GetInstance();
1244 std::stringstream ss;
David Monahan16829712022-02-03 17:04:59 +00001245 profilerManager.GetProfiler()->Print(ss);
David Monahan646bc8a2022-01-31 14:29:14 +00001246 std::string dump = ss.str();
1247
1248 // GpuAcc is a different case to CpuRef and CpuAcc, it doesn't use the buffer directly but instead maps it to a
1249 // new set of addresses within Gpu Memory. This will almost always be auto-aligned, so we don't need to check
1250 // for imports/copies. Only that the output is correct.
1251 if (backends[0] != Compute::GpuAcc)
1252 {
1253 // We can only copy so there should be no SyncMemGeneric
1254 int count = SubStringCounter(dump, "SyncMemGeneric");
1255 CHECK(count == 0);
1256 // Should only be CopyMemGeneric workloads as we copied all buffers
1257 count = SubStringCounter(dump, "CopyMemGeneric");
1258 CHECK(count == 2);
1259 }
1260 // Check the output is correct
1261 unsigned int index = 0;
David Monahaneef6b762022-02-10 16:01:58 +00001262 std::vector<float> outputData(expectedOutput.size(), 0);
Matthew Benthamc92bbd72022-02-10 11:12:34 +00001263 std::memcpy(outputData.data(), misalignedOutputPtr, expectedOutput.size() * sizeof(float));
1264 for (auto expectedValue : expectedOutput)
David Monahan646bc8a2022-01-31 14:29:14 +00001265 {
Matthew Benthamc92bbd72022-02-10 11:12:34 +00001266 CHECK(expectedValue == outputData[index]);
David Monahan646bc8a2022-01-31 14:29:14 +00001267 ++index;
1268 }
1269 std::free(inputMemPtr);
1270 std::free(outputMemPtr);
1271}
1272
David Monahan16829712022-02-03 17:04:59 +00001273inline void ForceImportRepeatedInferencesEndToEndTest(std::vector<BackendId> backends)
1274{
1275 /**
1276 * This test is similar to the Import tests above, we create a network with a square function and pass in a vector
1277 * with 4 floats, square them. and validate the output. We then check the profiling logs to see if input/output
1278 * tensors are copied (CopyMemGeneric) or imported (SyncMemGeneric)
1279 * In this we create some aligned buffers, import them into a network and validate the output and number of
1280 * SynMemGeneric/CopyMemgeneric. Then we try the same network again with misaligned buffers to make sure it falls
1281 * back to copying correctly.
1282 */
1283 using namespace armnn;
1284
1285 IRuntime::CreationOptions options;
1286 IRuntimePtr runtime(IRuntime::Create(options));
1287
1288 // Builds up the structure of the network.
1289 INetworkPtr net(INetwork::Create());
1290 IConnectableLayer* input = net->AddInputLayer(0);
1291
1292 ActivationDescriptor descriptor;
1293 descriptor.m_Function = ActivationFunction::Square;
1294 IConnectableLayer* activationLayer = net->AddActivationLayer(descriptor);
1295
1296 IConnectableLayer* output = net->AddOutputLayer(0);
1297
1298 input->GetOutputSlot(0).Connect(activationLayer->GetInputSlot(0));
1299 activationLayer->GetOutputSlot(0).Connect(output->GetInputSlot(0));
1300 input->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32, 0.0f, 0, true));
1301 activationLayer->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32));
1302
1303 IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec());
1304 INFO("Load Network");
1305 // Load it into the runtime. It should pass.
1306 NetworkId netId;
1307 std::string ignoredErrorMessage;
1308 INetworkProperties networkProperties(false, MemorySource::Undefined, MemorySource::Undefined);
1309 CHECK(runtime->LoadNetwork(netId, std::move(optNet),ignoredErrorMessage, networkProperties)
1310 == Status::Success);
1311 INFO("Generate Data");
1312
1313 // Creates structures for input & output
1314 std::vector<float> inputData
1315 {
1316 1.0f, 2.0f, 3.0f, 4.0f
1317 };
1318 std::vector<float> outputData(4);
1319 std::vector<float> expectedOutput
1320 {
1321 1.0f, 4.0f, 9.0f, 16.0f
1322 };
1323
1324 // Check our input and output pointers are actually aligned
1325 uintptr_t alignment = GetDataTypeSize(DataType::Float32);
1326 CHECK(!(reinterpret_cast<uintptr_t>(inputData.data()) % alignment));
1327 CHECK(!(reinterpret_cast<uintptr_t>(outputData.data()) % alignment));
1328
1329 INFO("Create Inference");
1330 InputTensors inputTensors
1331 {
1332 {0,armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData.data())},
1333 };
1334 OutputTensors outputTensors
1335 {
1336 {0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data())}
1337 };
1338
1339 runtime->GetProfiler(netId)->EnableProfiling(true);
1340 std::vector<ImportedInputId> importedInputIds =
1341 runtime->ImportInputs(netId, inputTensors, MemorySource::Malloc);
1342 std::vector<ImportedOutputId> importedOutputIds =
1343 runtime->ImportOutputs(netId, outputTensors, MemorySource::Malloc);
1344 // Do the inference and force the import as the memory is aligned.
1345 runtime->EnqueueWorkload(netId, inputTensors, outputTensors, importedInputIds, importedOutputIds);
1346
1347 // Retrieve the Profiler.AnalyzeEventsAndWriteResults() output to get the workload execution
1348 ProfilerManager& profilerManager = armnn::ProfilerManager::GetInstance();
1349 std::stringstream ss;
1350 profilerManager.GetProfiler()->AnalyzeEventsAndWriteResults(ss);
1351 std::string dump = ss.str();
1352
1353 if (backends[0] == Compute::CpuAcc)
1354 {
1355 // Reconfigure has not been implemented for CpuAcc so it will always copy, this will break whenever
1356 // reconfigure is implemented
1357 int count = SubStringCounter(dump, "SyncMemGeneric");
1358 CHECK(count == 0);
1359 // Should be 2 CopyMemGeneric workloads
1360 count = SubStringCounter(dump, "CopyMemGeneric");
1361 CHECK(count >= 1);
1362 }
1363 else
1364 {
1365 // Check there is at least 1 SyncMemGeneric workload as we exported
1366 int count = SubStringCounter(dump, "SyncMemGeneric");
1367 CHECK(count >= 1);
1368 // Shouldn't be any CopyMemGeneric workloads
1369 count = SubStringCounter(dump, "CopyMemGeneric");
1370 CHECK(count == 0);
1371 }
1372 // Check the output is correct
1373 CHECK(std::equal(outputData.begin(), outputData.end(), expectedOutput.begin(), expectedOutput.end()));
1374
1375 // This code looks a little funky but the idea is to create a buffer of floats but offset by the size of a char
1376 // this will guarantee that the resultant buffer is misaligned and thus should always be copied.
1377 auto inputMemPtr = std::malloc(4 * sizeof(float) + sizeof(char));
1378 float* misalignedInputPtr = reinterpret_cast<float*>(reinterpret_cast<char*>(inputMemPtr) + 1);
1379
1380 // Check if our pointer is truly misaligned
1381 CHECK (reinterpret_cast<uintptr_t>(misalignedInputPtr) % alignment);
Matthew Benthamc92bbd72022-02-10 11:12:34 +00001382
1383 std::vector<float> inputValues
David Monahan16829712022-02-03 17:04:59 +00001384 {
Matthew Benthamc92bbd72022-02-10 11:12:34 +00001385 2.0f, 3.0f, 4.0f, 5.0f
1386 };
1387
1388 std::memcpy(misalignedInputPtr, inputValues.data(), inputValues.size()*sizeof(float));
David Monahan16829712022-02-03 17:04:59 +00001389
1390 auto outputMemPtr = std::malloc(4 * sizeof(float) + sizeof(char));
1391 float* misalignedOutputPtr = reinterpret_cast<float*>(reinterpret_cast<char*>(outputMemPtr) + 1);
1392
1393 // Check if our pointer is truly misaligned
1394 CHECK (reinterpret_cast<uintptr_t>(misalignedOutputPtr) % alignment);
1395
1396 std::vector<float> expectedMisalignedOutput
1397 {
1398 4.0f, 9.0f, 16.0f, 25.0f
1399 };
1400
1401 INFO("Create Second Inference");
1402 InputTensors inputTensorsMisaligned
1403 {
1404 {0,armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), misalignedInputPtr)},
1405 };
1406 OutputTensors outputTensorsMisaligned
1407 {
1408 {0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), misalignedOutputPtr)}
1409 };
1410 importedInputIds = runtime->ImportInputs(netId, inputTensorsMisaligned, MemorySource::Malloc);
1411 importedOutputIds = runtime->ImportOutputs(netId, outputTensorsMisaligned, MemorySource::Malloc);
1412
1413 // Do the inference and force the import as the memory is misaligned.
1414 runtime->EnqueueWorkload(netId,
1415 inputTensorsMisaligned,
1416 outputTensorsMisaligned,
1417 importedInputIds,
1418 importedOutputIds);
1419
1420 // Retrieve the Profiler.AnalyzeEventsAndWriteResults() output to get the workload execution
1421 // We need to use AnalyzeEventsAndWriteResults here to make sure the second inference has been profiled
1422 profilerManager.GetProfiler()->AnalyzeEventsAndWriteResults(ss);
1423 dump = ss.str();
1424
1425 // GpuAcc is a different case to CpuRef and CpuAcc, it doesn't use the buffer directly but instead maps it to a
1426 // new set of addresses within Gpu Memory. This will almost always be auto-aligned, so we don't need to check
1427 // for imports/copies. Only that the output is correct.
1428 if (backends[0] != Compute::GpuAcc)
1429 {
1430 // The SyncMemGeneric will still be in the profiling log from the first inference
1431 int count = SubStringCounter(dump, "SyncMemGeneric");
1432 CHECK(count >= 1);
1433 // We should now see CopyMemGeneric workloads as we copied all buffers
1434 count = SubStringCounter(dump, "CopyMemGeneric");
1435 CHECK(count >= 1);
1436 }
1437 // Check the output is correct
1438 unsigned int index = 0;
David Monahaneef6b762022-02-10 16:01:58 +00001439 std::vector<float> alignedOutputData(expectedMisalignedOutput.size(), 0);
Matthew Benthamc92bbd72022-02-10 11:12:34 +00001440 std::memcpy(alignedOutputData.data(), misalignedOutputPtr, expectedMisalignedOutput.size() * sizeof(float));
David Monahan16829712022-02-03 17:04:59 +00001441 for (auto outputValue : expectedMisalignedOutput)
1442 {
Matthew Benthamc92bbd72022-02-10 11:12:34 +00001443 CHECK(outputValue == alignedOutputData[index]);
David Monahan16829712022-02-03 17:04:59 +00001444 ++index;
1445 }
1446 // Clean up to avoid interfering with other tests
1447 runtime->UnloadNetwork(netId);
1448 std::free(inputMemPtr);
1449 std::free(outputMemPtr);
1450}
1451
1452
1453inline void ForceImportRepeatedInferencesInvertedEndToEndTest(std::vector<BackendId> backends)
1454{
1455 /**
1456 * This test is similar to the Import tests above, we create a network with a square function and pass in a vector
1457 * with 4 floats, square them. and validate the output. We then check the profiling logs to see if input/output
1458 * tensors are copied (CopyMemGeneric) or imported (SyncMemGeneric)
1459 * In this we create some misaligned buffers, copy them into a network and validate the output and number of
1460 * SynMemGeneric/CopyMemgeneric. Then we try the same network again with aligned buffers to make sure it switches
1461 * to importing correctly.
1462 */
1463 using namespace armnn;
1464
1465 IRuntime::CreationOptions options;
1466 IRuntimePtr runtime(IRuntime::Create(options));
1467
1468 // Builds up the structure of the network.
1469 INetworkPtr net(INetwork::Create());
1470 IConnectableLayer* input = net->AddInputLayer(0);
1471
1472 ActivationDescriptor descriptor;
1473 descriptor.m_Function = ActivationFunction::Square;
1474 IConnectableLayer* activationLayer = net->AddActivationLayer(descriptor);
1475
1476 IConnectableLayer* output = net->AddOutputLayer(0);
1477
1478 input->GetOutputSlot(0).Connect(activationLayer->GetInputSlot(0));
1479 activationLayer->GetOutputSlot(0).Connect(output->GetInputSlot(0));
1480 input->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32, 0.0f, 0, true));
1481 activationLayer->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32));
1482
1483 IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec());
1484 INFO("Load Network");
1485 // Load it into the runtime. It should pass.
1486 NetworkId netId;
1487 std::string ignoredErrorMessage;
1488 INetworkProperties networkProperties(false, MemorySource::Undefined, MemorySource::Undefined);
1489 CHECK(runtime->LoadNetwork(netId, std::move(optNet),ignoredErrorMessage, networkProperties)
1490 == Status::Success);
1491 INFO("Generate Data");
1492
1493 // This code looks a little funky but the idea is to create a buffer of floats but offset by the size of a char
1494 // this will guarantee that the resultant buffer is misaligned and thus should always be copied.
1495 auto inputMemPtr = std::malloc(4 * sizeof(float) + sizeof(char));
1496 float* misalignedInputPtr = reinterpret_cast<float*>(reinterpret_cast<char*>(inputMemPtr) + 1);
1497
1498 // Check if our pointer is truly misaligned
1499 uintptr_t alignment = GetDataTypeSize(DataType::Float32);
1500 CHECK (reinterpret_cast<uintptr_t>(misalignedInputPtr) % alignment);
Matthew Benthamc92bbd72022-02-10 11:12:34 +00001501 std::vector<float> inputValues
David Monahan16829712022-02-03 17:04:59 +00001502 {
Matthew Benthamc92bbd72022-02-10 11:12:34 +00001503 2.0f, 3.0f, 4.0f, 5.0f
1504 };
1505 std::memcpy(misalignedInputPtr, inputValues.data(), inputValues.size() * sizeof(float));
David Monahan16829712022-02-03 17:04:59 +00001506
1507 auto outputMemPtr = std::malloc(4 * sizeof(float) + sizeof(char));
1508 float* misalignedOutputPtr = reinterpret_cast<float*>(reinterpret_cast<char*>(outputMemPtr) + 1);
1509
1510 // Check if our pointer is truly misaligned
1511 CHECK (reinterpret_cast<uintptr_t>(misalignedOutputPtr) % alignment);
1512
1513 std::vector<float> expectedMisalignedOutput
1514 {
1515 4.0f, 9.0f, 16.0f, 25.0f
1516 };
1517
1518 INFO("Create Second Inference");
1519 InputTensors inputTensorsMisaligned
1520 {
1521 {0,armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), misalignedInputPtr)},
1522 };
1523 OutputTensors outputTensorsMisaligned
1524 {
1525 {0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), misalignedOutputPtr)}
1526 };
1527 runtime->GetProfiler(netId)->EnableProfiling(true);
1528 std::vector<ImportedInputId> importedInputIds =
1529 runtime->ImportInputs(netId, inputTensorsMisaligned, MemorySource::Malloc);
1530 std::vector<ImportedOutputId> importedOutputIds =
1531 runtime->ImportOutputs(netId, outputTensorsMisaligned, MemorySource::Malloc);
1532
1533 // Do the inference and force the import as the memory is misaligned.
1534 runtime->EnqueueWorkload(netId,
1535 inputTensorsMisaligned,
1536 outputTensorsMisaligned,
1537 importedInputIds,
1538 importedOutputIds);
1539
1540 // Retrieve the Profiler.AnalyzeEventsAndWriteResults() output to get the workload execution
1541 ProfilerManager& profilerManager = armnn::ProfilerManager::GetInstance();
1542 std::stringstream ss;
1543 profilerManager.GetProfiler()->AnalyzeEventsAndWriteResults(ss);
1544 std::string dump = ss.str();
1545
1546 // GpuAcc is a different case to CpuRef and CpuAcc, it doesn't use the buffer directly but instead maps it to a
1547 // new set of addresses within Gpu Memory. This will almost always be auto-aligned, so we don't need to check
1548 // for imports/copies. Only that the output is correct.
1549 if (backends[0] != Compute::GpuAcc)
1550 {
1551 // We can only copy so there should be no SyncMemGeneric
1552 int count = SubStringCounter(dump, "SyncMemGeneric");
1553 CHECK(count == 0);
1554 // Should only be CopyMemGeneric workloads as we copied all buffers
1555 count = SubStringCounter(dump, "CopyMemGeneric");
1556 CHECK(count >= 1);
1557 }
1558 // Check the output is correct
1559 unsigned int index = 0;
Matthew Benthamc92bbd72022-02-10 11:12:34 +00001560 std::vector<float> alignedOutput(expectedMisalignedOutput.size());
1561 std::memcpy(alignedOutput.data(), misalignedOutputPtr, expectedMisalignedOutput.size()*sizeof(float));
David Monahan16829712022-02-03 17:04:59 +00001562 for (auto outputValue : expectedMisalignedOutput)
1563 {
Matthew Benthamc92bbd72022-02-10 11:12:34 +00001564 CHECK(outputValue == alignedOutput[index]);
David Monahan16829712022-02-03 17:04:59 +00001565 ++index;
1566 }
1567 std::free(inputMemPtr);
1568 std::free(outputMemPtr);
1569
1570 // Creates structures for input & output
1571 std::vector<float> inputData
1572 {
1573 1.0f, 2.0f, 3.0f, 4.0f
1574 };
1575 std::vector<float> outputData(4);
1576 std::vector<float> expectedOutput
1577 {
1578 1.0f, 4.0f, 9.0f, 16.0f
1579 };
1580
1581 // Check our input and output pointers are actually aligned
1582 CHECK(!(reinterpret_cast<uintptr_t>(inputData.data()) % alignment));
1583 CHECK(!(reinterpret_cast<uintptr_t>(outputData.data()) % alignment));
1584
1585 INFO("Create Inference");
1586 InputTensors inputTensors
1587 {
1588 {0,armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData.data())},
1589 };
1590 OutputTensors outputTensors
1591 {
1592 {0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data())}
1593 };
1594
1595 importedInputIds = runtime->ImportInputs(netId, inputTensors, MemorySource::Malloc);
1596 importedOutputIds = runtime->ImportOutputs(netId, outputTensors, MemorySource::Malloc);
1597 // Do the inference and force the import as the memory is aligned.
1598 runtime->EnqueueWorkload(netId, inputTensors, outputTensors, importedInputIds, importedOutputIds);
1599
1600 // Retrieve the Profiler.AnalyzeEventsAndWriteResults() output to get the workload execution
1601 // We need to use AnalyzeEventsAndWriteResults here to make sure the second inference has been profiled
1602 profilerManager.GetProfiler()->AnalyzeEventsAndWriteResults(ss);
1603 dump = ss.str();
1604
1605 if (backends[0] == Compute::CpuAcc)
1606 {
1607 // Reconfigure has not been implemented for CpuAcc so it will always copy, this will break whenever
1608 // reconfigure is implemented
1609 int count = SubStringCounter(dump, "SyncMemGeneric");
1610 CHECK(count == 0);
1611 // Should be 2 CopyMemGeneric workloads
1612 count = SubStringCounter(dump, "CopyMemGeneric");
1613 CHECK(count >= 1);
1614 }
1615 else
1616 {
1617 // Repeated inferences make it difficult to check for an accurate count. So we just validate that we have a
1618 // SyncMemGeneric Workload when we previously didn't
1619 int count = SubStringCounter(dump, "SyncMemGeneric");
1620 CHECK(count >= 1);
1621 // Should still be some CopyMemGeneric Workloads from the last inference
1622 count = SubStringCounter(dump, "CopyMemGeneric");
1623 CHECK(count >= 1);
1624 }
1625 // Check the output is correct
1626 CHECK(std::equal(outputData.begin(), outputData.end(), expectedOutput.begin(), expectedOutput.end()));
1627 // Clean up to avoid interfering with other tests
1628 runtime->UnloadNetwork(netId);
1629}
1630
Nattapat Chaimanowong1fcb4ff2019-01-24 15:25:26 +00001631} // anonymous namespace