blob: 77901df44479bce38359d7ca524716adc3733707 [file] [log] [blame]
Aron Virginas-Tar70104002018-10-24 15:33:28 +01001//
2// Copyright © 2017 Arm Ltd. All rights reserved.
3// SPDX-License-Identifier: MIT
4//
5#pragma once
6
Sadik Armagana097d2a2021-11-24 15:47:28 +00007#include <CommonTestUtils.hpp>
Mike Kelly386ff1a2021-03-29 15:04:50 +01008
Matthew Bentham246bd462020-01-20 16:16:06 +00009#include <armnn/Descriptors.hpp>
narpra01b9546cf2018-11-20 15:21:28 +000010#include <armnn/INetwork.hpp>
Matthew Bentham246bd462020-01-20 16:16:06 +000011#include <armnn/IRuntime.hpp>
Aron Virginas-Tar70104002018-10-24 15:33:28 +010012
Aron Virginas-Tar48623a02019-10-22 10:00:28 +010013#include <Profiling.hpp>
Colm Donelanc42a9872022-02-02 16:35:09 +000014#include <armnnUtils/QuantizeHelper.hpp>
Aron Virginas-Tar48623a02019-10-22 10:00:28 +010015#include <ResolveType.hpp>
Aron Virginas-Tar70104002018-10-24 15:33:28 +010016
Sadik Armagan1625efc2021-06-10 18:24:34 +010017#include <doctest/doctest.h>
narpra01b9546cf2018-11-20 15:21:28 +000018
Aron Virginas-Tar70104002018-10-24 15:33:28 +010019#include <vector>
20
21namespace
22{
23
24using namespace armnn;
25
26template<typename T>
27bool ConstantUsageTest(const std::vector<BackendId>& computeDevice,
28 const TensorInfo& commonTensorInfo,
29 const std::vector<T>& inputData,
30 const std::vector<T>& constantData,
31 const std::vector<T>& expectedOutputData)
32{
33 // Create runtime in which test will run
34 IRuntime::CreationOptions options;
35 IRuntimePtr runtime(IRuntime::Create(options));
36
37 // Builds up the structure of the network.
38 INetworkPtr net(INetwork::Create());
39
40 IConnectableLayer* input = net->AddInputLayer(0);
41 IConnectableLayer* constant = net->AddConstantLayer(ConstTensor(commonTensorInfo, constantData));
42 IConnectableLayer* add = net->AddAdditionLayer();
43 IConnectableLayer* output = net->AddOutputLayer(0);
44
45 input->GetOutputSlot(0).Connect(add->GetInputSlot(0));
46 constant->GetOutputSlot(0).Connect(add->GetInputSlot(1));
47 add->GetOutputSlot(0).Connect(output->GetInputSlot(0));
48
49 // Sets the tensors in the network.
50 input->GetOutputSlot(0).SetTensorInfo(commonTensorInfo);
51 constant->GetOutputSlot(0).SetTensorInfo(commonTensorInfo);
52 add->GetOutputSlot(0).SetTensorInfo(commonTensorInfo);
53
54 // optimize the network
55 IOptimizedNetworkPtr optNet = Optimize(*net, computeDevice, runtime->GetDeviceSpec());
56
57 // Loads it into the runtime.
58 NetworkId netId;
59 runtime->LoadNetwork(netId, std::move(optNet));
60
61 // Creates structures for input & output.
62 std::vector<T> outputData(inputData.size());
63
64 InputTensors inputTensors
65 {
66 {0, ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData.data())}
67 };
68 OutputTensors outputTensors
69 {
70 {0, Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data())}
71 };
72
73 // Does the inference.
74 runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
75
76 // Checks the results.
77 return outputData == expectedOutputData;
78}
79
80inline bool ConstantUsageFloat32Test(const std::vector<BackendId>& backends)
81{
Cathal Corbett5b8093c2021-10-22 11:12:07 +010082 TensorInfo commonTensorInfo({ 2, 3 }, DataType::Float32);
83 commonTensorInfo.SetConstant(true);
Aron Virginas-Tar70104002018-10-24 15:33:28 +010084
85 return ConstantUsageTest(backends,
86 commonTensorInfo,
87 std::vector<float>{ 1.f, 2.f, 3.f, 4.f, 5.f, 6.f }, // Input.
88 std::vector<float>{ 6.f, 5.f, 4.f, 3.f, 2.f, 1.f }, // Const input.
89 std::vector<float>{ 7.f, 7.f, 7.f, 7.f, 7.f, 7.f } // Expected output.
90 );
91}
92
93inline bool ConstantUsageUint8Test(const std::vector<BackendId>& backends)
94{
Derek Lambertif90c56d2020-01-10 17:14:08 +000095 TensorInfo commonTensorInfo({ 2, 3 }, DataType::QAsymmU8);
Aron Virginas-Tar70104002018-10-24 15:33:28 +010096
97 const float scale = 0.023529f;
98 const int8_t offset = -43;
99
100 commonTensorInfo.SetQuantizationScale(scale);
101 commonTensorInfo.SetQuantizationOffset(offset);
Cathal Corbett5b8093c2021-10-22 11:12:07 +0100102 commonTensorInfo.SetConstant(true);
Aron Virginas-Tar70104002018-10-24 15:33:28 +0100103
104 return ConstantUsageTest(backends,
105 commonTensorInfo,
Aron Virginas-Tar48623a02019-10-22 10:00:28 +0100106 armnnUtils::QuantizedVector<uint8_t>({ 1.f, 2.f, 3.f, 4.f, 5.f, 6.f }, scale, offset), // Input.
107 armnnUtils::QuantizedVector<uint8_t>({ 6.f, 5.f, 4.f, 3.f, 2.f, 1.f }, scale, offset), // Const input.
108 armnnUtils::QuantizedVector<uint8_t>({ 7.f, 7.f, 7.f, 7.f, 7.f, 7.f }, scale, offset) // Expected output.
Aron Virginas-Tar70104002018-10-24 15:33:28 +0100109 );
110}
111
Ferran Balaguer83239f92019-09-19 11:49:25 +0100112// Utility function to find the number of instances of a substring within a string.
113int SubStringCounter(std::string& string, std::string&& substring)
114{
115 std::size_t found = 0;
116 int count = 0;
117 // Look for the substring starting from where we last found the substring
118 while((found = string.find(substring, found)) != std::string::npos)
119 {
120 count++;
121 // Offset by substring length to avoid finding the same substring twice
122 found += substring.length();
123 }
124 return count;
125}
126
Nattapat Chaimanowong1fcb4ff2019-01-24 15:25:26 +0000127template<DataType ArmnnIType, DataType ArmnnOType,
128 typename TInput = ResolveType<ArmnnIType>, typename TOutput = ResolveType<ArmnnOType>>
narpra01b9546cf2018-11-20 15:21:28 +0000129void EndToEndLayerTestImpl(INetworkPtr network,
kevmay012b4d88e2019-01-24 14:05:09 +0000130 const std::map<int, std::vector<TInput>>& inputTensorData,
131 const std::map<int, std::vector<TOutput>>& expectedOutputData,
Jan Eilersbca73e12020-03-11 12:52:46 +0000132 std::vector<BackendId> backends,
133 float tolerance = 0.000001f)
narpra01b9546cf2018-11-20 15:21:28 +0000134{
135 // Create runtime in which test will run
136 IRuntime::CreationOptions options;
137 IRuntimePtr runtime(IRuntime::Create(options));
138
139 // optimize the network
140 IOptimizedNetworkPtr optNet = Optimize(*network, backends, runtime->GetDeviceSpec());
141
142 // Loads it into the runtime.
143 NetworkId netId;
144 runtime->LoadNetwork(netId, std::move(optNet));
145
146 InputTensors inputTensors;
147 inputTensors.reserve(inputTensorData.size());
148 for (auto&& it : inputTensorData)
149 {
150 inputTensors.push_back({it.first,
151 ConstTensor(runtime->GetInputTensorInfo(netId, it.first), it.second.data())});
152 }
153 OutputTensors outputTensors;
154 outputTensors.reserve(expectedOutputData.size());
kevmay012b4d88e2019-01-24 14:05:09 +0000155 std::map<int, std::vector<TOutput>> outputStorage;
narpra01b9546cf2018-11-20 15:21:28 +0000156 for (auto&& it : expectedOutputData)
157 {
kevmay012b4d88e2019-01-24 14:05:09 +0000158 std::vector<TOutput> out(it.second.size());
narpra01b9546cf2018-11-20 15:21:28 +0000159 outputStorage.emplace(it.first, out);
160 outputTensors.push_back({it.first,
161 Tensor(runtime->GetOutputTensorInfo(netId, it.first),
162 outputStorage.at(it.first).data())});
163 }
164
165 // Does the inference.
166 runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
167
168 // Checks the results.
169 for (auto&& it : expectedOutputData)
170 {
kevmay012b4d88e2019-01-24 14:05:09 +0000171 std::vector<TOutput> out = outputStorage.at(it.first);
Aron Virginas-Tarf97f6da2019-10-01 18:35:44 +0100172 for (unsigned int i = 0; i < out.size(); ++i)
Nattapat Chaimanowong1fcb4ff2019-01-24 15:25:26 +0000173 {
Sadik Armagan1625efc2021-06-10 18:24:34 +0100174 CHECK_MESSAGE(Compare<ArmnnOType>(it.second[i], out[i], tolerance) == true,
Teresa Charlin2e3f4d22020-07-29 14:29:20 +0100175 "Actual output: " << out[i] << ". Expected output:" << it.second[i]);
176
Nattapat Chaimanowong1fcb4ff2019-01-24 15:25:26 +0000177 }
narpra01b9546cf2018-11-20 15:21:28 +0000178 }
179}
180
David Monahan4f1e8e42019-09-04 09:22:10 +0100181inline void ImportNonAlignedInputPointerTest(std::vector<BackendId> backends)
Ferran Balaguerdcaa6102019-08-21 13:28:38 +0100182{
183 using namespace armnn;
184
185 // Create runtime in which test will run
186 IRuntime::CreationOptions options;
187 IRuntimePtr runtime(armnn::IRuntime::Create(options));
188
189 // build up the structure of the network
190 INetworkPtr net(INetwork::Create());
191
192 IConnectableLayer* input = net->AddInputLayer(0);
193
David Monahan3fb7e102019-08-20 11:25:29 +0100194 ActivationDescriptor descriptor;
195 descriptor.m_Function = ActivationFunction::Square;
196 IConnectableLayer* pooling = net->AddActivationLayer(descriptor);
Ferran Balaguerdcaa6102019-08-21 13:28:38 +0100197
198 IConnectableLayer* output = net->AddOutputLayer(0);
199
David Monahan3fb7e102019-08-20 11:25:29 +0100200 input->GetOutputSlot(0).Connect(pooling->GetInputSlot(0));
201 pooling->GetOutputSlot(0).Connect(output->GetInputSlot(0));
Ferran Balaguerdcaa6102019-08-21 13:28:38 +0100202
Cathal Corbett5b8093c2021-10-22 11:12:07 +0100203 input->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32, 0.0f, 0, true));
David Monahan3fb7e102019-08-20 11:25:29 +0100204 pooling->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32));
Ferran Balaguerdcaa6102019-08-21 13:28:38 +0100205
206 // Optimize the network
James Conroya0f8b152022-06-21 11:31:47 +0000207 IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec());
Sadik Armagan1625efc2021-06-10 18:24:34 +0100208 CHECK(optNet);
Ferran Balaguerdcaa6102019-08-21 13:28:38 +0100209
210 // Loads it into the runtime.
211 NetworkId netId;
David Monahan4f1e8e42019-09-04 09:22:10 +0100212 std::string ignoredErrorMessage;
213 // Enable Importing
Francis Murtagh73d3e2e2021-04-29 14:23:04 +0100214 INetworkProperties networkProperties(false, MemorySource::Malloc, MemorySource::Undefined);
David Monahan4f1e8e42019-09-04 09:22:10 +0100215 runtime->LoadNetwork(netId, std::move(optNet), ignoredErrorMessage, networkProperties);
Ferran Balaguerdcaa6102019-08-21 13:28:38 +0100216
217 // Creates structures for input & output
218 std::vector<float> inputData
219 {
David Monahan3fb7e102019-08-20 11:25:29 +0100220 1.0f, 2.0f, 3.0f, 4.0f
Ferran Balaguerdcaa6102019-08-21 13:28:38 +0100221 };
222
223 // Misaligned input
Aron Virginas-Tard9f7c8b2019-09-13 13:37:03 +0100224 float* misalignedInputData = reinterpret_cast<float*>(reinterpret_cast<char*>(inputData.data()) + 1);
Ferran Balaguerdcaa6102019-08-21 13:28:38 +0100225
David Monahan3fb7e102019-08-20 11:25:29 +0100226 std::vector<float> outputData(4);
Ferran Balaguerdcaa6102019-08-21 13:28:38 +0100227
David Monahan4f1e8e42019-09-04 09:22:10 +0100228 // Aligned output
David Monahan3fb7e102019-08-20 11:25:29 +0100229 float* alignedOutputData = outputData.data();
David Monahan4f1e8e42019-09-04 09:22:10 +0100230
231 InputTensors inputTensors
232 {
233 {0,armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), misalignedInputData)},
234 };
235 OutputTensors outputTensors
236 {
237 {0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), alignedOutputData)}
238 };
239
David Monahan4f1e8e42019-09-04 09:22:10 +0100240 runtime->GetProfiler(netId)->EnableProfiling(true);
241
242 // Do the inference and expect it to fail with a ImportMemoryException
Sadik Armagan1625efc2021-06-10 18:24:34 +0100243 CHECK_THROWS_AS(runtime->EnqueueWorkload(netId, inputTensors, outputTensors), MemoryImportException);
David Monahan4f1e8e42019-09-04 09:22:10 +0100244}
245
Ferran Balaguer83239f92019-09-19 11:49:25 +0100246inline void ExportNonAlignedOutputPointerTest(std::vector<BackendId> backends)
David Monahan4f1e8e42019-09-04 09:22:10 +0100247{
248 using namespace armnn;
249
250 // Create runtime in which test will run
251 IRuntime::CreationOptions options;
252 IRuntimePtr runtime(armnn::IRuntime::Create(options));
253
254 // build up the structure of the network
255 INetworkPtr net(INetwork::Create());
256
257 IConnectableLayer* input = net->AddInputLayer(0);
258
David Monahan3fb7e102019-08-20 11:25:29 +0100259 ActivationDescriptor descriptor;
260 descriptor.m_Function = ActivationFunction::Square;
261 IConnectableLayer* pooling = net->AddActivationLayer(descriptor);
David Monahan4f1e8e42019-09-04 09:22:10 +0100262
263 IConnectableLayer* output = net->AddOutputLayer(0);
264
David Monahan3fb7e102019-08-20 11:25:29 +0100265 input->GetOutputSlot(0).Connect(pooling->GetInputSlot(0));
266 pooling->GetOutputSlot(0).Connect(output->GetInputSlot(0));
David Monahan4f1e8e42019-09-04 09:22:10 +0100267
Cathal Corbett5b8093c2021-10-22 11:12:07 +0100268 input->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32, 0.0f, 0, true));
David Monahan3fb7e102019-08-20 11:25:29 +0100269 pooling->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32));
David Monahan4f1e8e42019-09-04 09:22:10 +0100270
271 // Optimize the network
James Conroya0f8b152022-06-21 11:31:47 +0000272 IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec());
Sadik Armagan1625efc2021-06-10 18:24:34 +0100273 CHECK(optNet);
David Monahan4f1e8e42019-09-04 09:22:10 +0100274
275 // Loads it into the runtime.
276 NetworkId netId;
277 std::string ignoredErrorMessage;
David Monahan3fb7e102019-08-20 11:25:29 +0100278 // Enable Importing and Exporting
Francis Murtagh73d3e2e2021-04-29 14:23:04 +0100279 INetworkProperties networkProperties(false, MemorySource::Malloc, MemorySource::Malloc);
David Monahan4f1e8e42019-09-04 09:22:10 +0100280 runtime->LoadNetwork(netId, std::move(optNet), ignoredErrorMessage, networkProperties);
281
282 // Creates structures for input & output
283 std::vector<float> inputData
284 {
285 1.0f, 2.0f, 3.0f, 4.0f, 5.0f
286 };
287
288 // Aligned input
David Monahan3fb7e102019-08-20 11:25:29 +0100289 float* alignedInputData = inputData.data();
David Monahan4f1e8e42019-09-04 09:22:10 +0100290
291 std::vector<float> outputData(5);
292
Ferran Balaguerdcaa6102019-08-21 13:28:38 +0100293 // Misaligned output
Aron Virginas-Tard9f7c8b2019-09-13 13:37:03 +0100294 float* misalignedOutputData = reinterpret_cast<float*>(reinterpret_cast<char*>(outputData.data()) + 1);
Ferran Balaguerdcaa6102019-08-21 13:28:38 +0100295
296 InputTensors inputTensors
297 {
David Monahan4f1e8e42019-09-04 09:22:10 +0100298 {0,armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), alignedInputData)},
Ferran Balaguerdcaa6102019-08-21 13:28:38 +0100299 };
300 OutputTensors outputTensors
301 {
302 {0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), misalignedOutputData)}
303 };
304
Ferran Balaguer83239f92019-09-19 11:49:25 +0100305 // Do the inference and expect it to fail with a ExportMemoryException
306 if (backends[0] == Compute::CpuAcc)
307 {
308 // For CpuAcc the NeonTensorHandle will throw its own exception on misaligned memory
Sadik Armagan1625efc2021-06-10 18:24:34 +0100309 CHECK_THROWS_AS(runtime->EnqueueWorkload(netId, inputTensors, outputTensors), MemoryImportException);
Ferran Balaguer83239f92019-09-19 11:49:25 +0100310 }
311 else
312 {
Sadik Armagan1625efc2021-06-10 18:24:34 +0100313 CHECK_THROWS_AS(runtime->EnqueueWorkload(netId, inputTensors, outputTensors), MemoryExportException);
Ferran Balaguer83239f92019-09-19 11:49:25 +0100314 }
Ferran Balaguerdcaa6102019-08-21 13:28:38 +0100315}
316
317inline void ImportAlignedPointerTest(std::vector<BackendId> backends)
318{
319 using namespace armnn;
320
321 // Create runtime in which test will run
322 IRuntime::CreationOptions options;
323 IRuntimePtr runtime(armnn::IRuntime::Create(options));
324
325 // build up the structure of the network
326 INetworkPtr net(INetwork::Create());
327
328 IConnectableLayer* input = net->AddInputLayer(0);
329
David Monahan3fb7e102019-08-20 11:25:29 +0100330 ActivationDescriptor descriptor;
331 descriptor.m_Function = ActivationFunction::Square;
332 IConnectableLayer* pooling = net->AddActivationLayer(descriptor);
Ferran Balaguerdcaa6102019-08-21 13:28:38 +0100333
334 IConnectableLayer* output = net->AddOutputLayer(0);
335
David Monahan3fb7e102019-08-20 11:25:29 +0100336 input->GetOutputSlot(0).Connect(pooling->GetInputSlot(0));
337 pooling->GetOutputSlot(0).Connect(output->GetInputSlot(0));
Ferran Balaguerdcaa6102019-08-21 13:28:38 +0100338
Cathal Corbett5b8093c2021-10-22 11:12:07 +0100339 input->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32, 0.0f, 0, true));
David Monahan3fb7e102019-08-20 11:25:29 +0100340 pooling->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32));
Ferran Balaguerdcaa6102019-08-21 13:28:38 +0100341
342 // Optimize the network
James Conroya0f8b152022-06-21 11:31:47 +0000343 IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec());
Sadik Armagan1625efc2021-06-10 18:24:34 +0100344 CHECK(optNet);
Ferran Balaguerdcaa6102019-08-21 13:28:38 +0100345
346 // Loads it into the runtime.
347 NetworkId netId;
David Monahan4f1e8e42019-09-04 09:22:10 +0100348 std::string ignoredErrorMessage;
349 // Enable Importing
Francis Murtagh73d3e2e2021-04-29 14:23:04 +0100350 INetworkProperties networkProperties(false, MemorySource::Malloc, MemorySource::Malloc);
David Monahan4f1e8e42019-09-04 09:22:10 +0100351 runtime->LoadNetwork(netId, std::move(optNet), ignoredErrorMessage, networkProperties);
Ferran Balaguerdcaa6102019-08-21 13:28:38 +0100352
353 // Creates structures for input & output
354 std::vector<float> inputData
355 {
356 1.0f, 2.0f, 3.0f, 4.0f
357 };
358
359 std::vector<float> outputData(4);
360
James Conroy57d10b72019-10-25 09:44:14 +0100361 std::vector<float> expectedOutput
362 {
363 1.0f, 4.0f, 9.0f, 16.0f
364 };
365
Ferran Balaguerdcaa6102019-08-21 13:28:38 +0100366 InputTensors inputTensors
367 {
368 {0,armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData.data())},
369 };
370 OutputTensors outputTensors
371 {
372 {0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data())}
373 };
374
Ferran Balaguerdcaa6102019-08-21 13:28:38 +0100375 runtime->GetProfiler(netId)->EnableProfiling(true);
376
377 // Do the inference
378 runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
379
380 // Retrieve the Profiler.Print() output to get the workload execution
381 ProfilerManager& profilerManager = armnn::ProfilerManager::GetInstance();
382 std::stringstream ss;
David Monahan16829712022-02-03 17:04:59 +0000383 profilerManager.GetProfiler()->Print(ss);
Ferran Balaguerdcaa6102019-08-21 13:28:38 +0100384 std::string dump = ss.str();
385
David Monahan3fb7e102019-08-20 11:25:29 +0100386 // Contains ActivationWorkload
387 std::size_t found = dump.find("ActivationWorkload");
Sadik Armagan1625efc2021-06-10 18:24:34 +0100388 CHECK(found != std::string::npos);
James Conroy57d10b72019-10-25 09:44:14 +0100389
Ferran Balaguerdcaa6102019-08-21 13:28:38 +0100390 // Contains SyncMemGeneric
391 found = dump.find("SyncMemGeneric");
Sadik Armagan1625efc2021-06-10 18:24:34 +0100392 CHECK(found != std::string::npos);
James Conroy57d10b72019-10-25 09:44:14 +0100393
Ferran Balaguer83239f92019-09-19 11:49:25 +0100394 // Does not contain CopyMemGeneric
Ferran Balaguerdcaa6102019-08-21 13:28:38 +0100395 found = dump.find("CopyMemGeneric");
Sadik Armagan1625efc2021-06-10 18:24:34 +0100396 CHECK(found == std::string::npos);
James Conroy57d10b72019-10-25 09:44:14 +0100397
398 // Check output is as expected
Sadik Armagan1625efc2021-06-10 18:24:34 +0100399 CHECK(outputData == expectedOutput);
Ferran Balaguerdcaa6102019-08-21 13:28:38 +0100400}
401
Ferran Balaguer83239f92019-09-19 11:49:25 +0100402inline void ImportOnlyWorkload(std::vector<BackendId> backends)
403{
404 using namespace armnn;
405
406 IRuntime::CreationOptions options;
407 IRuntimePtr runtime(IRuntime::Create(options));
408
409 // Builds up the structure of the network.
410 INetworkPtr net(INetwork::Create());
411
412 IConnectableLayer* input = net->AddInputLayer(0);
413
414 ActivationDescriptor descriptor;
415 descriptor.m_Function = ActivationFunction::Square;
416 IConnectableLayer* pooling = net->AddActivationLayer(descriptor);
417
418 IConnectableLayer* output = net->AddOutputLayer(0);
419
420 input->GetOutputSlot(0).Connect(pooling->GetInputSlot(0));
421 pooling->GetOutputSlot(0).Connect(output->GetInputSlot(0));
422
Cathal Corbett5b8093c2021-10-22 11:12:07 +0100423 input->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32, 0.0f, 0, true));
Ferran Balaguer83239f92019-09-19 11:49:25 +0100424 pooling->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32));
425
426 // optimize the network
James Conroya0f8b152022-06-21 11:31:47 +0000427 IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec());
Ferran Balaguer83239f92019-09-19 11:49:25 +0100428
Sadik Armagan1625efc2021-06-10 18:24:34 +0100429 INFO("Load Network");
Ferran Balaguer83239f92019-09-19 11:49:25 +0100430 // Load it into the runtime. It should pass.
431 NetworkId netId;
432 std::string ignoredErrorMessage;
Francis Murtagh73d3e2e2021-04-29 14:23:04 +0100433
434 INetworkProperties networkProperties(false, MemorySource::Malloc, MemorySource::Undefined);
435
Sadik Armagan1625efc2021-06-10 18:24:34 +0100436 CHECK(runtime->LoadNetwork(netId, std::move(optNet),ignoredErrorMessage, networkProperties)
Ferran Balaguer83239f92019-09-19 11:49:25 +0100437 == Status::Success);
438
Sadik Armagan1625efc2021-06-10 18:24:34 +0100439 INFO("Generate Data");
Ferran Balaguer83239f92019-09-19 11:49:25 +0100440 // Creates structures for input & output
441 std::vector<float> inputData
442 {
443 1.0f, 2.0f, 3.0f, 4.0f
444 };
445
446 std::vector<float> outputData(4);
447
448 std::vector<float> expectedOutput
449 {
450 1.0f, 4.0f, 9.0f, 16.0f
451 };
452
David Monahan646bc8a2022-01-31 14:29:14 +0000453 INFO("Create Inference");
Cathal Corbett5b8093c2021-10-22 11:12:07 +0100454
Ferran Balaguer83239f92019-09-19 11:49:25 +0100455 InputTensors inputTensors
456 {
457 {0,armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData.data())},
458 };
459 OutputTensors outputTensors
460 {
461 {0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data())}
462 };
463
Sadik Armagan1625efc2021-06-10 18:24:34 +0100464 INFO("Get Profiler");
Ferran Balaguer83239f92019-09-19 11:49:25 +0100465 runtime->GetProfiler(netId)->EnableProfiling(true);
466
Sadik Armagan1625efc2021-06-10 18:24:34 +0100467 INFO("Run Inference");
Ferran Balaguer83239f92019-09-19 11:49:25 +0100468 // Do the inference
469 runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
470
Sadik Armagan1625efc2021-06-10 18:24:34 +0100471 INFO("Print Profiler");
Ferran Balaguer83239f92019-09-19 11:49:25 +0100472 // Retrieve the Profiler.Print() output to get the workload execution
473 ProfilerManager& profilerManager = armnn::ProfilerManager::GetInstance();
474 std::stringstream ss;
David Monahan16829712022-02-03 17:04:59 +0000475 profilerManager.GetProfiler()->Print(ss);
Ferran Balaguer83239f92019-09-19 11:49:25 +0100476 std::string dump = ss.str();
477
478 // Check there are no SyncMemGeneric workloads as we didn't export
Sadik Armagan1625efc2021-06-10 18:24:34 +0100479 INFO("Find SyncMemGeneric");
Ferran Balaguer83239f92019-09-19 11:49:25 +0100480 int count = SubStringCounter(dump, "SyncMemGeneric");
Sadik Armagan1625efc2021-06-10 18:24:34 +0100481 CHECK(count == 0);
Ferran Balaguer83239f92019-09-19 11:49:25 +0100482
483 // Should only be 1 CopyMemGeneric for the output as we imported
Sadik Armagan1625efc2021-06-10 18:24:34 +0100484 INFO("Find CopyMemGeneric");
Ferran Balaguer83239f92019-09-19 11:49:25 +0100485 count = SubStringCounter(dump, "CopyMemGeneric");
Sadik Armagan1625efc2021-06-10 18:24:34 +0100486 CHECK(count == 1);
Ferran Balaguer83239f92019-09-19 11:49:25 +0100487
488 // Check the output is correct
Sadik Armagan1625efc2021-06-10 18:24:34 +0100489 CHECK(std::equal(outputData.begin(), outputData.end(), expectedOutput.begin(), expectedOutput.end()));
Ferran Balaguer83239f92019-09-19 11:49:25 +0100490}
491
492inline void ExportOnlyWorkload(std::vector<BackendId> backends)
493{
494 using namespace armnn;
495
496 IRuntime::CreationOptions options;
497 IRuntimePtr runtime(IRuntime::Create(options));
498
499 // Builds up the structure of the network.
500 INetworkPtr net(INetwork::Create());
501
502 IConnectableLayer* input = net->AddInputLayer(0);
503
504 ActivationDescriptor descriptor;
505 descriptor.m_Function = ActivationFunction::Square;
506 IConnectableLayer* pooling = net->AddActivationLayer(descriptor);
507
508 IConnectableLayer* output = net->AddOutputLayer(0);
509
510 input->GetOutputSlot(0).Connect(pooling->GetInputSlot(0));
511 pooling->GetOutputSlot(0).Connect(output->GetInputSlot(0));
512
Cathal Corbett5b8093c2021-10-22 11:12:07 +0100513 input->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32, 0.0f, 0, true));
Ferran Balaguer83239f92019-09-19 11:49:25 +0100514 pooling->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32));
515
516 // optimize the network
James Conroya0f8b152022-06-21 11:31:47 +0000517 IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec());
Ferran Balaguer83239f92019-09-19 11:49:25 +0100518
Sadik Armagan1625efc2021-06-10 18:24:34 +0100519 INFO("Load Network");
Ferran Balaguer83239f92019-09-19 11:49:25 +0100520 // Load it into the runtime. It should pass.
521 NetworkId netId;
522 std::string ignoredErrorMessage;
Francis Murtagh73d3e2e2021-04-29 14:23:04 +0100523 INetworkProperties networkProperties(false, MemorySource::Undefined, MemorySource::Malloc);
Sadik Armagan1625efc2021-06-10 18:24:34 +0100524 CHECK(runtime->LoadNetwork(netId, std::move(optNet),ignoredErrorMessage, networkProperties)
Ferran Balaguer83239f92019-09-19 11:49:25 +0100525 == Status::Success);
526
Sadik Armagan1625efc2021-06-10 18:24:34 +0100527 INFO("Generate Data");
Ferran Balaguer83239f92019-09-19 11:49:25 +0100528 // Creates structures for input & output
529 std::vector<float> inputData
530 {
531 1.0f, 2.0f, 3.0f, 4.0f
532 };
533
534 std::vector<float> outputData(4);
535
536 std::vector<float> expectedOutput
537 {
538 1.0f, 4.0f, 9.0f, 16.0f
539 };
540
David Monahan646bc8a2022-01-31 14:29:14 +0000541 INFO("Create Inference");
Cathal Corbett5b8093c2021-10-22 11:12:07 +0100542
Ferran Balaguer83239f92019-09-19 11:49:25 +0100543 InputTensors inputTensors
544 {
545 {0,armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData.data())},
546 };
547 OutputTensors outputTensors
548 {
549 {0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data())}
550 };
551
Sadik Armagan1625efc2021-06-10 18:24:34 +0100552 INFO("Get Profiler");
Ferran Balaguer83239f92019-09-19 11:49:25 +0100553 runtime->GetProfiler(netId)->EnableProfiling(true);
554
Sadik Armagan1625efc2021-06-10 18:24:34 +0100555 INFO("Run Inference");
Ferran Balaguer83239f92019-09-19 11:49:25 +0100556 // Do the inference
557 runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
558
Sadik Armagan1625efc2021-06-10 18:24:34 +0100559 INFO("Print Profiler");
Ferran Balaguer83239f92019-09-19 11:49:25 +0100560 // Retrieve the Profiler.Print() output to get the workload execution
561 ProfilerManager& profilerManager = armnn::ProfilerManager::GetInstance();
562 std::stringstream ss;
David Monahan16829712022-02-03 17:04:59 +0000563 profilerManager.GetProfiler()->Print(ss);
Ferran Balaguer83239f92019-09-19 11:49:25 +0100564 std::string dump = ss.str();
565
566 // Check there is a SyncMemGeneric workload as we exported
Sadik Armagan1625efc2021-06-10 18:24:34 +0100567 INFO("Find SyncMemGeneric");
Ferran Balaguer83239f92019-09-19 11:49:25 +0100568 int count = SubStringCounter(dump, "SyncMemGeneric");
Sadik Armagan1625efc2021-06-10 18:24:34 +0100569 CHECK(count == 1);
Ferran Balaguer83239f92019-09-19 11:49:25 +0100570
571 // Should be 1 CopyMemGeneric for the output as we did not import
Sadik Armagan1625efc2021-06-10 18:24:34 +0100572 INFO("Find CopyMemGeneric");
Ferran Balaguer83239f92019-09-19 11:49:25 +0100573 count = SubStringCounter(dump, "CopyMemGeneric");
Sadik Armagan1625efc2021-06-10 18:24:34 +0100574 CHECK(count == 1);
Ferran Balaguer83239f92019-09-19 11:49:25 +0100575
576 // Check the output is correct
Sadik Armagan1625efc2021-06-10 18:24:34 +0100577 CHECK(std::equal(outputData.begin(), outputData.end(), expectedOutput.begin(), expectedOutput.end()));
Ferran Balaguer83239f92019-09-19 11:49:25 +0100578}
579
580inline void ImportAndExportWorkload(std::vector<BackendId> backends)
581{
582 using namespace armnn;
583
584 IRuntime::CreationOptions options;
585 IRuntimePtr runtime(IRuntime::Create(options));
586
587 // Builds up the structure of the network.
588 INetworkPtr net(INetwork::Create());
589
590 IConnectableLayer* input = net->AddInputLayer(0);
591
592 ActivationDescriptor descriptor;
593 descriptor.m_Function = ActivationFunction::Square;
594 IConnectableLayer* pooling = net->AddActivationLayer(descriptor);
595
596 IConnectableLayer* output = net->AddOutputLayer(0);
597
598 input->GetOutputSlot(0).Connect(pooling->GetInputSlot(0));
599 pooling->GetOutputSlot(0).Connect(output->GetInputSlot(0));
600
Cathal Corbett5b8093c2021-10-22 11:12:07 +0100601 input->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32, 0.0f, 0, true));
Ferran Balaguer83239f92019-09-19 11:49:25 +0100602 pooling->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32));
603
James Conroya0f8b152022-06-21 11:31:47 +0000604 IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec());
Ferran Balaguer83239f92019-09-19 11:49:25 +0100605
Sadik Armagan1625efc2021-06-10 18:24:34 +0100606 INFO("Load Network");
Ferran Balaguer83239f92019-09-19 11:49:25 +0100607 // Load it into the runtime. It should pass.
608 NetworkId netId;
609 std::string ignoredErrorMessage;
Francis Murtagh73d3e2e2021-04-29 14:23:04 +0100610
611 INetworkProperties networkProperties(false, MemorySource::Malloc, MemorySource::Malloc);
612
Sadik Armagan1625efc2021-06-10 18:24:34 +0100613 CHECK(runtime->LoadNetwork(netId, std::move(optNet),ignoredErrorMessage, networkProperties)
Ferran Balaguer83239f92019-09-19 11:49:25 +0100614 == Status::Success);
615
Sadik Armagan1625efc2021-06-10 18:24:34 +0100616 INFO("Generate Data");
Ferran Balaguer83239f92019-09-19 11:49:25 +0100617 // Creates structures for input & output
618 std::vector<float> inputData
619 {
620 1.0f, 2.0f, 3.0f, 4.0f
621 };
622
623 std::vector<float> outputData(4);
624
625 std::vector<float> expectedOutput
626 {
627 1.0f, 4.0f, 9.0f, 16.0f
628 };
629
David Monahan646bc8a2022-01-31 14:29:14 +0000630 INFO("Create inference");
Cathal Corbett5b8093c2021-10-22 11:12:07 +0100631
Ferran Balaguer83239f92019-09-19 11:49:25 +0100632 InputTensors inputTensors
633 {
634 {0,armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData.data())},
635 };
636 OutputTensors outputTensors
637 {
638 {0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data())}
639 };
640
Sadik Armagan1625efc2021-06-10 18:24:34 +0100641 INFO("Get Profiler");
Ferran Balaguer83239f92019-09-19 11:49:25 +0100642 runtime->GetProfiler(netId)->EnableProfiling(true);
643
Sadik Armagan1625efc2021-06-10 18:24:34 +0100644 INFO("Run Inference");
Ferran Balaguer83239f92019-09-19 11:49:25 +0100645 // Do the inference
646 runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
647
Sadik Armagan1625efc2021-06-10 18:24:34 +0100648 INFO("Print Profiler");
Ferran Balaguer83239f92019-09-19 11:49:25 +0100649 // Retrieve the Profiler.Print() output to get the workload execution
650 ProfilerManager& profilerManager = armnn::ProfilerManager::GetInstance();
651 std::stringstream ss;
David Monahan16829712022-02-03 17:04:59 +0000652 profilerManager.GetProfiler()->Print(ss);
Ferran Balaguer83239f92019-09-19 11:49:25 +0100653 std::string dump = ss.str();
654
655 // Check there is a SyncMemGeneric workload as we exported
Sadik Armagan1625efc2021-06-10 18:24:34 +0100656 INFO("Find SyncMemGeneric");
Ferran Balaguer83239f92019-09-19 11:49:25 +0100657 int count = SubStringCounter(dump, "SyncMemGeneric");
Sadik Armagan1625efc2021-06-10 18:24:34 +0100658 CHECK(count == 1);
Ferran Balaguer83239f92019-09-19 11:49:25 +0100659
660 // Shouldn't be any CopyMemGeneric workloads
Sadik Armagan1625efc2021-06-10 18:24:34 +0100661 INFO("Find CopyMemGeneric");
Ferran Balaguer83239f92019-09-19 11:49:25 +0100662 count = SubStringCounter(dump, "CopyMemGeneric");
Sadik Armagan1625efc2021-06-10 18:24:34 +0100663 CHECK(count == 0);
Ferran Balaguer83239f92019-09-19 11:49:25 +0100664
665 // Check the output is correct
Sadik Armagan1625efc2021-06-10 18:24:34 +0100666 CHECK(std::equal(outputData.begin(), outputData.end(), expectedOutput.begin(), expectedOutput.end()));
Ferran Balaguer83239f92019-09-19 11:49:25 +0100667}
668
669inline void ExportOutputWithSeveralOutputSlotConnectionsTest(std::vector<BackendId> backends)
670{
671 using namespace armnn;
672
673 // Create runtime in which test will run
674 IRuntime::CreationOptions options;
675 IRuntimePtr runtime(armnn::IRuntime::Create(options));
676
677 // build up the structure of the network
678 INetworkPtr net(INetwork::Create());
679
680 IConnectableLayer* input = net->AddInputLayer(0);
681
682 ActivationDescriptor descriptor;
683 descriptor.m_Function = ActivationFunction::Square;
684 IConnectableLayer* activation = net->AddActivationLayer(descriptor);
685
686 IConnectableLayer* output0 = net->AddOutputLayer(0);
687 IConnectableLayer* output1 = net->AddOutputLayer(1);
688
689 input->GetOutputSlot(0).Connect(activation->GetInputSlot(0));
690 activation->GetOutputSlot(0).Connect(output0->GetInputSlot(0));
691 activation->GetOutputSlot(0).Connect(output1->GetInputSlot(0));
692
Cathal Corbett5b8093c2021-10-22 11:12:07 +0100693 input->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 4, 1 }, DataType::Float32, 0.0f, 0, true));
Ferran Balaguer83239f92019-09-19 11:49:25 +0100694 activation->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 4, 1 }, DataType::Float32));
695
696 // Optimize the network
James Conroya0f8b152022-06-21 11:31:47 +0000697 IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec());
Ferran Balaguer83239f92019-09-19 11:49:25 +0100698
699 // Loads it into the runtime.
700 NetworkId netId;
701 std::string ignoredErrorMessage;
702 // Enable Importing
Francis Murtagh73d3e2e2021-04-29 14:23:04 +0100703 INetworkProperties networkProperties(false, MemorySource::Malloc, MemorySource::Malloc);
Ferran Balaguer83239f92019-09-19 11:49:25 +0100704 runtime->LoadNetwork(netId, std::move(optNet), ignoredErrorMessage, networkProperties);
705
706 // Creates structures for input & output
707 std::vector<float> inputData
708 {
709 1.0f, 2.0f, 3.0f, 4.0f
710 };
711
712 std::vector<float> outputData0(4);
713 std::vector<float> outputData1(4);
714
Narumol Prangnawarat3b90af62020-06-26 11:00:21 +0100715 std::vector<float> expectedOutput
716 {
717 1.0f, 4.0f, 9.0f, 16.0f
718 };
719
Ferran Balaguer83239f92019-09-19 11:49:25 +0100720 InputTensors inputTensors
721 {
722 {0,armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData.data())},
723 };
724 OutputTensors outputTensors
725 {
726 {0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData0.data())},
727 {1,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 1), outputData1.data())}
728 };
729
730 // The result of the inference is not important, just the fact that there
731 // should not be CopyMemGeneric workloads.
732 runtime->GetProfiler(netId)->EnableProfiling(true);
733
734 // Do the inference
735 runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
736
737 // Retrieve the Profiler.Print() output to get the workload execution
738 ProfilerManager& profilerManager = armnn::ProfilerManager::GetInstance();
739 std::stringstream ss;
740 profilerManager.GetProfiler()->Print(ss);
741 std::string dump = ss.str();
742
743 std::size_t found = std::string::npos;
744
745 if (backends[0] == Compute::CpuRef)
746 {
747 found = dump.find("RefActivationWorkload");
748 }
749 else if (backends[0] == Compute::CpuAcc)
750 {
751 found = dump.find("NeonActivationWorkload");
752 }
753 else if (backends[0] == Compute::GpuAcc)
754 {
755 found = dump.find("ClActivationWorkload");
756 }
757
Sadik Armagan1625efc2021-06-10 18:24:34 +0100758 CHECK(found != std::string::npos);
Ferran Balaguer83239f92019-09-19 11:49:25 +0100759 // No contains SyncMemGeneric
760 found = dump.find("SyncMemGeneric");
Sadik Armagan1625efc2021-06-10 18:24:34 +0100761 CHECK(found == std::string::npos);
Ferran Balaguer83239f92019-09-19 11:49:25 +0100762 // Contains CopyMemGeneric
763 found = dump.find("CopyMemGeneric");
Sadik Armagan1625efc2021-06-10 18:24:34 +0100764 CHECK(found != std::string::npos);
Narumol Prangnawarat3b90af62020-06-26 11:00:21 +0100765
766 // Check that the outputs are correct
Sadik Armagan1625efc2021-06-10 18:24:34 +0100767 CHECK(std::equal(outputData0.begin(), outputData0.end(),
768 expectedOutput.begin(), expectedOutput.end()));
769 CHECK(std::equal(outputData1.begin(), outputData1.end(),
770 expectedOutput.begin(), expectedOutput.end()));
Ferran Balaguer83239f92019-09-19 11:49:25 +0100771}
772
David Monahan0a99a142020-03-13 07:52:54 +0000773inline void StridedSliceInvalidSliceEndToEndTest(std::vector<BackendId> backends)
774{
775 using namespace armnn;
776
777 // Create runtime in which test will run
778 IRuntime::CreationOptions options;
779 IRuntimePtr runtime(armnn::IRuntime::Create(options));
780
781 // build up the structure of the network
782 INetworkPtr net(INetwork::Create());
783
784 IConnectableLayer* input = net->AddInputLayer(0);
785
786 // Configure a strided slice with a stride the same size as the input but with a ShrinkAxisMask on the first
787 // dim of the output to make it too small to hold the specified slice.
788 StridedSliceDescriptor descriptor;
789 descriptor.m_Begin = {0, 0};
790 descriptor.m_End = {2, 3};
791 descriptor.m_Stride = {1, 1};
792 descriptor.m_BeginMask = 0;
793 descriptor.m_EndMask = 0;
794 descriptor.m_ShrinkAxisMask = 1;
795 IConnectableLayer* stridedSlice = net->AddStridedSliceLayer(descriptor);
796
797 IConnectableLayer* output0 = net->AddOutputLayer(0);
798
799 input->GetOutputSlot(0).Connect(stridedSlice->GetInputSlot(0));
800 stridedSlice->GetOutputSlot(0).Connect(output0->GetInputSlot(0));
801
Cathal Corbett5b8093c2021-10-22 11:12:07 +0100802 input->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 2, 3 }, DataType::Float32, 0.0f, 0, true));
David Monahan0a99a142020-03-13 07:52:54 +0000803 stridedSlice->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 3 }, DataType::Float32));
804
805 // Attempt to optimize the network and check that the correct exception is thrown
Sadik Armagan1625efc2021-06-10 18:24:34 +0100806 CHECK_THROWS_AS(Optimize(*net, backends, runtime->GetDeviceSpec()), armnn::LayerValidationException);
David Monahan0a99a142020-03-13 07:52:54 +0000807}
808
David Monahan646bc8a2022-01-31 14:29:14 +0000809inline void ForceImportWithAlignedBuffersEndToEndTest(std::vector<BackendId> backends)
810{
811 /**
812 * This test is similar to the Import tests above, we create a network with a square function and pass in a vector
813 * with 4 floats, square them. and validate the output. We then check the profiling logs to see if input/output
814 * tensors are copied (CopyMemGeneric) or imported (SyncMemGeneric)
815 * In this case all inputs and outputs should be imported
816 */
817 using namespace armnn;
818 IRuntime::CreationOptions options;
819 IRuntimePtr runtime(IRuntime::Create(options));
820
821 // Builds up the structure of the network.
822 INetworkPtr net(INetwork::Create());
823 IConnectableLayer* input = net->AddInputLayer(0);
824 ActivationDescriptor descriptor;
825 descriptor.m_Function = ActivationFunction::Square;
826 IConnectableLayer* activationLayer = net->AddActivationLayer(descriptor);
827 IConnectableLayer* output = net->AddOutputLayer(0);
828 input->GetOutputSlot(0).Connect(activationLayer->GetInputSlot(0));
829 activationLayer->GetOutputSlot(0).Connect(output->GetInputSlot(0));
830 input->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32, 0.0f, 0, true));
831 activationLayer->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32));
832 IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec());
833 INFO("Load Network");
834
835 // Load it into the runtime. It should pass.
836 NetworkId netId;
837 std::string ignoredErrorMessage;
838 INetworkProperties networkProperties(false, MemorySource::Undefined, MemorySource::Undefined);
839 CHECK(runtime->LoadNetwork(netId, std::move(optNet),ignoredErrorMessage, networkProperties)
840 == Status::Success);
841 INFO("Generate Data");
842
843 // Creates structures for input & output
844 std::vector<float> inputData
845 {
846 1.0f, 2.0f, 3.0f, 4.0f
847 };
848 std::vector<float> outputData(4);
849 std::vector<float> expectedOutput
850 {
851 1.0f, 4.0f, 9.0f, 16.0f
852 };
853
854 // Check our input and output pointers are actually aligned
855 uintptr_t alignment = GetDataTypeSize(DataType::Float32);
856 CHECK(!(reinterpret_cast<uintptr_t>(inputData.data()) % alignment));
857 CHECK(!(reinterpret_cast<uintptr_t>(outputData.data()) % alignment));
858
859 INFO("Create Inference");
860 InputTensors inputTensors
861 {
862 {0,armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData.data())},
863 };
864 OutputTensors outputTensors
865 {
866 {0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data())}
867 };
868
869 runtime->GetProfiler(netId)->EnableProfiling(true);
870 std::vector<ImportedInputId> importedInputIds =
871 runtime->ImportInputs(netId, inputTensors, MemorySource::Malloc);
872 std::vector<ImportedOutputId> importedOutputIds =
873 runtime->ImportOutputs(netId, outputTensors, MemorySource::Malloc);
874 // Do the inference and force the import as the memory is aligned.
875 runtime->EnqueueWorkload(netId, inputTensors, outputTensors, importedInputIds, importedOutputIds);
876
877 // Retrieve the Profiler.Print() output to get the workload execution
878 ProfilerManager& profilerManager = armnn::ProfilerManager::GetInstance();
879 std::stringstream ss;
David Monahan16829712022-02-03 17:04:59 +0000880 profilerManager.GetProfiler()->Print(ss);
David Monahan646bc8a2022-01-31 14:29:14 +0000881 std::string dump = ss.str();
882
883 if (backends[0] == Compute::CpuAcc)
884 {
885 // Reconfigure has not been implemented for CpuAcc so it will always copy, this will break whenever
886 // reconfigure is implemented
887 int count = SubStringCounter(dump, "SyncMemGeneric");
888 CHECK(count == 0);
889 // Should be 2 CopyMemGeneric workloads
890 count = SubStringCounter(dump, "CopyMemGeneric");
891 CHECK(count == 2);
892 }
893 else
894 {
895 // Check there is a SyncMemGeneric workload as we exported
896 int count = SubStringCounter(dump, "SyncMemGeneric");
897 CHECK(count == 1);
898 // Shouldn't be any CopyMemGeneric workloads
899 count = SubStringCounter(dump, "CopyMemGeneric");
900 CHECK(count == 0);
901 }
902 // Check the output is correct
903 CHECK(std::equal(outputData.begin(), outputData.end(), expectedOutput.begin(), expectedOutput.end()));
904}
905
906inline void ForceImportWithMisalignedInputBuffersEndToEndTest(std::vector<BackendId> backends)
907{
908 /**
909 * This test is similar to the Import tests above, we create a network with a square function and pass in a vector
910 * with 4 floats, square them. and validate the output. We then check the profiling logs to see if input/output
911 * tensors are copied (CopyMemGeneric) or imported (SyncMemGeneric)
912 * In this case all only the output should be imported
913 */
914 using namespace armnn;
915
916 IRuntime::CreationOptions options;
917 IRuntimePtr runtime(IRuntime::Create(options));
918
919 // Builds up the structure of the network.
920 INetworkPtr net(INetwork::Create());
921 IConnectableLayer* input = net->AddInputLayer(0);
922
923 ActivationDescriptor descriptor;
924 descriptor.m_Function = ActivationFunction::Square;
925 IConnectableLayer* activationLayer = net->AddActivationLayer(descriptor);
926
927 IConnectableLayer* output = net->AddOutputLayer(0);
928
929 input->GetOutputSlot(0).Connect(activationLayer->GetInputSlot(0));
930 activationLayer->GetOutputSlot(0).Connect(output->GetInputSlot(0));
931 input->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32, 0.0f, 0, true));
932 activationLayer->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32));
933
934 IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec());
935 INFO("Load Network");
936 // Load it into the runtime. It should pass.
937 NetworkId netId;
938 std::string ignoredErrorMessage;
939 INetworkProperties networkProperties(false, MemorySource::Undefined, MemorySource::Undefined);
940 CHECK(runtime->LoadNetwork(netId, std::move(optNet),ignoredErrorMessage, networkProperties)
941 == Status::Success);
942 INFO("Generate Data");
943
944 // This code looks a little funky but the idea is to create a buffer of floats but offset by the size of a char
945 // this will guarantee that the resultant buffer is misaligned and thus should always be copied.
946 auto memPtr = std::malloc(4 * sizeof(float) + sizeof(char));
947
948 float* misalignedMemPtr = reinterpret_cast<float*>(reinterpret_cast<char*>(memPtr) + 1);
949
950 // Check if our pointer is truly misaligned
951 uintptr_t alignment = GetDataTypeSize(DataType::Float32);
952 CHECK (reinterpret_cast<uintptr_t>(misalignedMemPtr) % alignment);
953
Matthew Benthamc92bbd72022-02-10 11:12:34 +0000954 std::vector<float> inputData
David Monahan646bc8a2022-01-31 14:29:14 +0000955 {
Matthew Benthamc92bbd72022-02-10 11:12:34 +0000956 1.0f, 2.0f, 3.0f, 4.0f
957 };
958
959 std::memcpy(misalignedMemPtr, inputData.data(), 4*sizeof(float));
David Monahan646bc8a2022-01-31 14:29:14 +0000960
961 std::vector<float> outputData(4);
962 // Check our output buffer is aligned
963 CHECK(!(reinterpret_cast<uintptr_t>(outputData.data()) % alignment));
964
965 std::vector<float> expectedOutput
966 {
967 1.0f, 4.0f, 9.0f, 16.0f
968 };
969
970 INFO("Create Inference");
971 InputTensors inputTensors
972 {
973 {0,armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), misalignedMemPtr)},
974 };
975 OutputTensors outputTensors
976 {
977 {0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data())}
978 };
979 runtime->GetProfiler(netId)->EnableProfiling(true);
980 std::vector<ImportedInputId> importedInputIds =
981 runtime->ImportInputs(netId, inputTensors, MemorySource::Malloc);
982 std::vector<ImportedOutputId> importedOutputIds =
983 runtime->ImportOutputs(netId, outputTensors, MemorySource::Malloc);
984
985 // Do the inference and force the import as the memory is misaligned.
986 runtime->EnqueueWorkload(netId, inputTensors, outputTensors, importedInputIds, importedOutputIds);
987
988 // Retrieve the Profiler.Print() output to get the workload execution
989 ProfilerManager& profilerManager = armnn::ProfilerManager::GetInstance();
990 std::stringstream ss;
David Monahan16829712022-02-03 17:04:59 +0000991 profilerManager.GetProfiler()->Print(ss);
David Monahan646bc8a2022-01-31 14:29:14 +0000992 std::string dump = ss.str();
993
994 // GpuAcc is a different case to CpuRef and CpuAcc, it doesn't use the buffer directly but instead maps it to a
995 // new set of addresses within Gpu Memory. This will almost always be auto-aligned, so we don't need to check
996 // for imports/copies. Only that the output is correct.
997 if (backends[0] != Compute::GpuAcc)
998 {
999 if (backends[0] == Compute::CpuAcc)
1000 {
1001 // Reconfigure has not been implemented for CpuAcc so it will always copy, this will break whenever
1002 // reconfigure is implemented
1003 // We should get 0 SyncMemGeneric for the Output
1004 int count = SubStringCounter(dump, "SyncMemGeneric");
1005 CHECK(count == 0);
1006 // Should be 2 CopyMemGeneric as we copied the input
1007 count = SubStringCounter(dump, "CopyMemGeneric");
1008 CHECK(count == 2);
1009 }
1010 else
1011 {
1012 // We should get 1 SyncMemGeneric for the Output
1013 int count = SubStringCounter(dump, "SyncMemGeneric");
1014 CHECK(count == 1);
1015 // Should only be 1 CopyMemGeneric as we copied the input
1016 count = SubStringCounter(dump, "CopyMemGeneric");
1017 CHECK(count == 1);
1018 }
1019 }
1020 // Check the output is correct
1021 CHECK(std::equal(outputData.begin(), outputData.end(), expectedOutput.begin(), expectedOutput.end()));
1022 std::free(memPtr);
1023}
1024
1025inline void ForceImportWithMisalignedOutputBuffersEndToEndTest(std::vector<BackendId> backends)
1026{
1027 /**
1028 * This test is similar to the Import tests above, we create a network with a square function and pass in a vector
1029 * with 4 floats, square them. and validate the output. We then check the profiling logs to see if input/output
1030 * tensors are copied (CopyMemGeneric) or imported (SyncMemGeneric)
1031 * In this case all only the input should be imported
1032 */
1033 using namespace armnn;
1034
1035 IRuntime::CreationOptions options;
1036 IRuntimePtr runtime(IRuntime::Create(options));
1037
1038 // Builds up the structure of the network.
1039 INetworkPtr net(INetwork::Create());
1040 IConnectableLayer* input = net->AddInputLayer(0);
1041
1042 ActivationDescriptor descriptor;
1043 descriptor.m_Function = ActivationFunction::Square;
1044 IConnectableLayer* activationLayer = net->AddActivationLayer(descriptor);
1045
1046 IConnectableLayer* output = net->AddOutputLayer(0);
1047
1048 input->GetOutputSlot(0).Connect(activationLayer->GetInputSlot(0));
1049 activationLayer->GetOutputSlot(0).Connect(output->GetInputSlot(0));
1050 input->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32, 0.0f, 0, true));
1051 activationLayer->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32));
1052
1053 IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec());
1054 INFO("Load Network");
1055 // Load it into the runtime. It should pass.
1056 NetworkId netId;
1057 std::string ignoredErrorMessage;
1058 INetworkProperties networkProperties(false, MemorySource::Undefined, MemorySource::Undefined);
1059 CHECK(runtime->LoadNetwork(netId, std::move(optNet),ignoredErrorMessage, networkProperties)
1060 == Status::Success);
1061 INFO("Generate Data");
1062
1063 // This code looks a little funky but the idea is to create a buffer of floats but offset by the size of a char
1064 // this will guarantee that the resultant buffer is misaligned and thus should always be copied.
1065 auto memPtr = std::malloc(4 * sizeof(float) + sizeof(char));
1066
1067 float* misalignedMemPtr = reinterpret_cast<float*>(reinterpret_cast<char*>(memPtr) + 1);
1068
1069 // Check if our pointer is truly misaligned
1070 uintptr_t alignment = GetDataTypeSize(DataType::Float32);
1071 CHECK (reinterpret_cast<uintptr_t>(misalignedMemPtr) % alignment);
1072
1073 // Creates structures for input & output
1074 std::vector<float> inputData
1075 {
1076 1.0f, 2.0f, 3.0f, 4.0f
1077 };
1078
1079 // Check our input buffer is aligned
1080 CHECK(!(reinterpret_cast<uintptr_t>(inputData.data()) % alignment));
1081 std::vector<float> expectedOutput
1082 {
1083 1.0f, 4.0f, 9.0f, 16.0f
1084 };
1085
1086 INFO("Create Inference");
1087 InputTensors inputTensors
1088 {
1089 {0,armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData.data())},
1090 };
1091 OutputTensors outputTensors
1092 {
1093 {0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), misalignedMemPtr)}
1094 };
1095 runtime->GetProfiler(netId)->EnableProfiling(true);
1096 std::vector<ImportedInputId> importedInputIds =
1097 runtime->ImportInputs(netId, inputTensors, MemorySource::Malloc);
1098 std::vector<ImportedOutputId> importedOutputIds =
1099 runtime->ImportOutputs(netId, outputTensors, MemorySource::Malloc);
1100
1101 // Do the inference and force the import as the memory is misaligned.
1102 runtime->EnqueueWorkload(netId, inputTensors, outputTensors, importedInputIds, importedOutputIds);
1103
1104 // Retrieve the Profiler.Print() output to get the workload execution
1105 ProfilerManager& profilerManager = armnn::ProfilerManager::GetInstance();
1106 std::stringstream ss;
David Monahan16829712022-02-03 17:04:59 +00001107 profilerManager.GetProfiler()->Print(ss);
David Monahan646bc8a2022-01-31 14:29:14 +00001108 std::string dump = ss.str();
1109
1110 // GpuAcc is a different case to CpuRef and CpuAcc, it doesn't use the buffer directly but instead maps it to a
1111 // new set of addresses within Gpu Memory. This will almost always be auto-aligned, so we don't need to check
1112 // for imports/copies. Only that the output is correct.
1113 if (backends[0] != Compute::GpuAcc)
1114 {
1115 // Even though we Imported the Input we still shouldn't have a SyncMemGeneric
1116 int count = SubStringCounter(dump, "SyncMemGeneric");
1117 CHECK(count == 0);
1118 // Should only be 1 CopyMemGeneric as we copied the input
1119 count = SubStringCounter(dump, "CopyMemGeneric");
1120 if (backends[0] == Compute::CpuAcc)
1121 {
1122 // Reconfigure has not been implemented for CpuAcc so it will always copy, this will break whenever
1123 // reconfigure is implemented
1124 CHECK(count == 2);
1125 }
1126 else
1127 {
1128 CHECK(count == 1);
1129 }
1130 // Check the output is correct
1131 }
1132 unsigned int index = 0;
David Monahaneef6b762022-02-10 16:01:58 +00001133 std::vector<float> outputData(expectedOutput.size(), 0);
1134 std::memcpy(outputData.data(), misalignedMemPtr, expectedOutput.size() * sizeof(float));
David Monahan646bc8a2022-01-31 14:29:14 +00001135 for (auto outputValue : expectedOutput)
1136 {
David Monahaneef6b762022-02-10 16:01:58 +00001137 CHECK(outputValue == outputData[index]);
David Monahan646bc8a2022-01-31 14:29:14 +00001138 ++index;
1139 }
1140 std::free(memPtr);
1141}
1142
1143inline void ForceImportWithMisalignedInputAndOutputBuffersEndToEndTest(std::vector<BackendId> backends)
1144{
1145 /**
1146 * This test is similar to the Import tests above, we create a network with a square function and pass in a vector
1147 * with 4 floats, square them. and validate the output. We then check the profiling logs to see if input/output
1148 * tensors are copied (CopyMemGeneric) or imported (SyncMemGeneric)
1149 * In this case all inputs and outputs should be copied
1150 */
1151 using namespace armnn;
1152
1153 IRuntime::CreationOptions options;
1154 IRuntimePtr runtime(IRuntime::Create(options));
1155
1156 // Builds up the structure of the network.
1157 INetworkPtr net(INetwork::Create());
1158 IConnectableLayer* input = net->AddInputLayer(0);
1159
1160 ActivationDescriptor descriptor;
1161 descriptor.m_Function = ActivationFunction::Square;
1162 IConnectableLayer* activationLayer = net->AddActivationLayer(descriptor);
1163
1164 IConnectableLayer* output = net->AddOutputLayer(0);
1165
1166 input->GetOutputSlot(0).Connect(activationLayer->GetInputSlot(0));
1167 activationLayer->GetOutputSlot(0).Connect(output->GetInputSlot(0));
1168 input->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32, 0.0f, 0, true));
1169 activationLayer->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32));
1170
1171 IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec());
1172 INFO("Load Network");
1173 // Load it into the runtime. It should pass.
1174 NetworkId netId;
1175 std::string ignoredErrorMessage;
1176 INetworkProperties networkProperties(false, MemorySource::Undefined, MemorySource::Undefined);
1177 CHECK(runtime->LoadNetwork(netId, std::move(optNet),ignoredErrorMessage, networkProperties)
1178 == Status::Success);
1179 INFO("Generate Data");
1180
1181 // This code looks a little funky but the idea is to create a buffer of floats but offset by the size of a char
1182 // this will guarantee that the resultant buffer is misaligned and thus should always be copied.
1183 auto inputMemPtr = std::malloc(4 * sizeof(float) + sizeof(char));
1184 float* misalignedInputPtr = reinterpret_cast<float*>(reinterpret_cast<char*>(inputMemPtr) + 1);
1185
1186 // Check if our pointer is truly misaligned
1187 uintptr_t alignment = GetDataTypeSize(DataType::Float32);
1188 CHECK (reinterpret_cast<uintptr_t>(misalignedInputPtr) % alignment);
Matthew Benthamc92bbd72022-02-10 11:12:34 +00001189 std::vector<float> inputData
David Monahan646bc8a2022-01-31 14:29:14 +00001190 {
Matthew Benthamc92bbd72022-02-10 11:12:34 +00001191 1.0f, 2.0f, 3.0f, 4.0f
1192 };
1193 std::memcpy(misalignedInputPtr, inputData.data(), 4*sizeof(float));
David Monahan646bc8a2022-01-31 14:29:14 +00001194
1195 auto outputMemPtr = std::malloc(4 * sizeof(float) + sizeof(char));
1196 float* misalignedOutputPtr = reinterpret_cast<float*>(reinterpret_cast<char*>(outputMemPtr) + 1);
1197
1198 // Check if our pointer is truly misaligned
1199 CHECK (reinterpret_cast<uintptr_t>(misalignedOutputPtr) % alignment);
1200
1201 std::vector<float> expectedOutput
1202 {
1203 1.0f, 4.0f, 9.0f, 16.0f
1204 };
1205
1206 INFO("Create Inference");
1207 InputTensors inputTensors
1208 {
1209 {0,armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), misalignedInputPtr)},
1210 };
1211 OutputTensors outputTensors
1212 {
1213 {0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), misalignedOutputPtr)}
1214 };
1215 runtime->GetProfiler(netId)->EnableProfiling(true);
1216 std::vector<ImportedInputId> importedInputIds =
1217 runtime->ImportInputs(netId, inputTensors, MemorySource::Malloc);
1218 std::vector<ImportedOutputId> importedOutputIds =
1219 runtime->ImportOutputs(netId, outputTensors, MemorySource::Malloc);
1220
1221 // Do the inference and force the import as the memory is misaligned.
1222 runtime->EnqueueWorkload(netId, inputTensors, outputTensors, importedInputIds, importedOutputIds);
1223
1224 // Retrieve the Profiler.Print() output to get the workload execution
1225 ProfilerManager& profilerManager = armnn::ProfilerManager::GetInstance();
1226 std::stringstream ss;
David Monahan16829712022-02-03 17:04:59 +00001227 profilerManager.GetProfiler()->Print(ss);
David Monahan646bc8a2022-01-31 14:29:14 +00001228 std::string dump = ss.str();
1229
1230 // GpuAcc is a different case to CpuRef and CpuAcc, it doesn't use the buffer directly but instead maps it to a
1231 // new set of addresses within Gpu Memory. This will almost always be auto-aligned, so we don't need to check
1232 // for imports/copies. Only that the output is correct.
1233 if (backends[0] != Compute::GpuAcc)
1234 {
1235 // We can only copy so there should be no SyncMemGeneric
1236 int count = SubStringCounter(dump, "SyncMemGeneric");
1237 CHECK(count == 0);
1238 // Should only be CopyMemGeneric workloads as we copied all buffers
1239 count = SubStringCounter(dump, "CopyMemGeneric");
1240 CHECK(count == 2);
1241 }
1242 // Check the output is correct
1243 unsigned int index = 0;
David Monahaneef6b762022-02-10 16:01:58 +00001244 std::vector<float> outputData(expectedOutput.size(), 0);
Matthew Benthamc92bbd72022-02-10 11:12:34 +00001245 std::memcpy(outputData.data(), misalignedOutputPtr, expectedOutput.size() * sizeof(float));
1246 for (auto expectedValue : expectedOutput)
David Monahan646bc8a2022-01-31 14:29:14 +00001247 {
Matthew Benthamc92bbd72022-02-10 11:12:34 +00001248 CHECK(expectedValue == outputData[index]);
David Monahan646bc8a2022-01-31 14:29:14 +00001249 ++index;
1250 }
1251 std::free(inputMemPtr);
1252 std::free(outputMemPtr);
1253}
1254
David Monahan16829712022-02-03 17:04:59 +00001255inline void ForceImportRepeatedInferencesEndToEndTest(std::vector<BackendId> backends)
1256{
1257 /**
1258 * This test is similar to the Import tests above, we create a network with a square function and pass in a vector
1259 * with 4 floats, square them. and validate the output. We then check the profiling logs to see if input/output
1260 * tensors are copied (CopyMemGeneric) or imported (SyncMemGeneric)
1261 * In this we create some aligned buffers, import them into a network and validate the output and number of
1262 * SynMemGeneric/CopyMemgeneric. Then we try the same network again with misaligned buffers to make sure it falls
1263 * back to copying correctly.
1264 */
1265 using namespace armnn;
1266
1267 IRuntime::CreationOptions options;
1268 IRuntimePtr runtime(IRuntime::Create(options));
1269
1270 // Builds up the structure of the network.
1271 INetworkPtr net(INetwork::Create());
1272 IConnectableLayer* input = net->AddInputLayer(0);
1273
1274 ActivationDescriptor descriptor;
1275 descriptor.m_Function = ActivationFunction::Square;
1276 IConnectableLayer* activationLayer = net->AddActivationLayer(descriptor);
1277
1278 IConnectableLayer* output = net->AddOutputLayer(0);
1279
1280 input->GetOutputSlot(0).Connect(activationLayer->GetInputSlot(0));
1281 activationLayer->GetOutputSlot(0).Connect(output->GetInputSlot(0));
1282 input->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32, 0.0f, 0, true));
1283 activationLayer->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32));
1284
1285 IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec());
1286 INFO("Load Network");
1287 // Load it into the runtime. It should pass.
1288 NetworkId netId;
1289 std::string ignoredErrorMessage;
1290 INetworkProperties networkProperties(false, MemorySource::Undefined, MemorySource::Undefined);
1291 CHECK(runtime->LoadNetwork(netId, std::move(optNet),ignoredErrorMessage, networkProperties)
1292 == Status::Success);
1293 INFO("Generate Data");
1294
1295 // Creates structures for input & output
1296 std::vector<float> inputData
1297 {
1298 1.0f, 2.0f, 3.0f, 4.0f
1299 };
1300 std::vector<float> outputData(4);
1301 std::vector<float> expectedOutput
1302 {
1303 1.0f, 4.0f, 9.0f, 16.0f
1304 };
1305
1306 // Check our input and output pointers are actually aligned
1307 uintptr_t alignment = GetDataTypeSize(DataType::Float32);
1308 CHECK(!(reinterpret_cast<uintptr_t>(inputData.data()) % alignment));
1309 CHECK(!(reinterpret_cast<uintptr_t>(outputData.data()) % alignment));
1310
1311 INFO("Create Inference");
1312 InputTensors inputTensors
1313 {
1314 {0,armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData.data())},
1315 };
1316 OutputTensors outputTensors
1317 {
1318 {0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data())}
1319 };
1320
1321 runtime->GetProfiler(netId)->EnableProfiling(true);
1322 std::vector<ImportedInputId> importedInputIds =
1323 runtime->ImportInputs(netId, inputTensors, MemorySource::Malloc);
1324 std::vector<ImportedOutputId> importedOutputIds =
1325 runtime->ImportOutputs(netId, outputTensors, MemorySource::Malloc);
1326 // Do the inference and force the import as the memory is aligned.
1327 runtime->EnqueueWorkload(netId, inputTensors, outputTensors, importedInputIds, importedOutputIds);
1328
1329 // Retrieve the Profiler.AnalyzeEventsAndWriteResults() output to get the workload execution
1330 ProfilerManager& profilerManager = armnn::ProfilerManager::GetInstance();
1331 std::stringstream ss;
1332 profilerManager.GetProfiler()->AnalyzeEventsAndWriteResults(ss);
1333 std::string dump = ss.str();
1334
1335 if (backends[0] == Compute::CpuAcc)
1336 {
1337 // Reconfigure has not been implemented for CpuAcc so it will always copy, this will break whenever
1338 // reconfigure is implemented
1339 int count = SubStringCounter(dump, "SyncMemGeneric");
1340 CHECK(count == 0);
1341 // Should be 2 CopyMemGeneric workloads
1342 count = SubStringCounter(dump, "CopyMemGeneric");
1343 CHECK(count >= 1);
1344 }
1345 else
1346 {
1347 // Check there is at least 1 SyncMemGeneric workload as we exported
1348 int count = SubStringCounter(dump, "SyncMemGeneric");
1349 CHECK(count >= 1);
1350 // Shouldn't be any CopyMemGeneric workloads
1351 count = SubStringCounter(dump, "CopyMemGeneric");
1352 CHECK(count == 0);
1353 }
1354 // Check the output is correct
1355 CHECK(std::equal(outputData.begin(), outputData.end(), expectedOutput.begin(), expectedOutput.end()));
1356
1357 // This code looks a little funky but the idea is to create a buffer of floats but offset by the size of a char
1358 // this will guarantee that the resultant buffer is misaligned and thus should always be copied.
1359 auto inputMemPtr = std::malloc(4 * sizeof(float) + sizeof(char));
1360 float* misalignedInputPtr = reinterpret_cast<float*>(reinterpret_cast<char*>(inputMemPtr) + 1);
1361
1362 // Check if our pointer is truly misaligned
1363 CHECK (reinterpret_cast<uintptr_t>(misalignedInputPtr) % alignment);
Matthew Benthamc92bbd72022-02-10 11:12:34 +00001364
1365 std::vector<float> inputValues
David Monahan16829712022-02-03 17:04:59 +00001366 {
Matthew Benthamc92bbd72022-02-10 11:12:34 +00001367 2.0f, 3.0f, 4.0f, 5.0f
1368 };
1369
1370 std::memcpy(misalignedInputPtr, inputValues.data(), inputValues.size()*sizeof(float));
David Monahan16829712022-02-03 17:04:59 +00001371
1372 auto outputMemPtr = std::malloc(4 * sizeof(float) + sizeof(char));
1373 float* misalignedOutputPtr = reinterpret_cast<float*>(reinterpret_cast<char*>(outputMemPtr) + 1);
1374
1375 // Check if our pointer is truly misaligned
1376 CHECK (reinterpret_cast<uintptr_t>(misalignedOutputPtr) % alignment);
1377
1378 std::vector<float> expectedMisalignedOutput
1379 {
1380 4.0f, 9.0f, 16.0f, 25.0f
1381 };
1382
1383 INFO("Create Second Inference");
1384 InputTensors inputTensorsMisaligned
1385 {
1386 {0,armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), misalignedInputPtr)},
1387 };
1388 OutputTensors outputTensorsMisaligned
1389 {
1390 {0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), misalignedOutputPtr)}
1391 };
1392 importedInputIds = runtime->ImportInputs(netId, inputTensorsMisaligned, MemorySource::Malloc);
1393 importedOutputIds = runtime->ImportOutputs(netId, outputTensorsMisaligned, MemorySource::Malloc);
1394
1395 // Do the inference and force the import as the memory is misaligned.
1396 runtime->EnqueueWorkload(netId,
1397 inputTensorsMisaligned,
1398 outputTensorsMisaligned,
1399 importedInputIds,
1400 importedOutputIds);
1401
1402 // Retrieve the Profiler.AnalyzeEventsAndWriteResults() output to get the workload execution
1403 // We need to use AnalyzeEventsAndWriteResults here to make sure the second inference has been profiled
1404 profilerManager.GetProfiler()->AnalyzeEventsAndWriteResults(ss);
1405 dump = ss.str();
1406
1407 // GpuAcc is a different case to CpuRef and CpuAcc, it doesn't use the buffer directly but instead maps it to a
1408 // new set of addresses within Gpu Memory. This will almost always be auto-aligned, so we don't need to check
1409 // for imports/copies. Only that the output is correct.
1410 if (backends[0] != Compute::GpuAcc)
1411 {
1412 // The SyncMemGeneric will still be in the profiling log from the first inference
1413 int count = SubStringCounter(dump, "SyncMemGeneric");
1414 CHECK(count >= 1);
1415 // We should now see CopyMemGeneric workloads as we copied all buffers
1416 count = SubStringCounter(dump, "CopyMemGeneric");
1417 CHECK(count >= 1);
1418 }
1419 // Check the output is correct
1420 unsigned int index = 0;
David Monahaneef6b762022-02-10 16:01:58 +00001421 std::vector<float> alignedOutputData(expectedMisalignedOutput.size(), 0);
Matthew Benthamc92bbd72022-02-10 11:12:34 +00001422 std::memcpy(alignedOutputData.data(), misalignedOutputPtr, expectedMisalignedOutput.size() * sizeof(float));
David Monahan16829712022-02-03 17:04:59 +00001423 for (auto outputValue : expectedMisalignedOutput)
1424 {
Matthew Benthamc92bbd72022-02-10 11:12:34 +00001425 CHECK(outputValue == alignedOutputData[index]);
David Monahan16829712022-02-03 17:04:59 +00001426 ++index;
1427 }
1428 // Clean up to avoid interfering with other tests
1429 runtime->UnloadNetwork(netId);
1430 std::free(inputMemPtr);
1431 std::free(outputMemPtr);
1432}
1433
1434
1435inline void ForceImportRepeatedInferencesInvertedEndToEndTest(std::vector<BackendId> backends)
1436{
1437 /**
1438 * This test is similar to the Import tests above, we create a network with a square function and pass in a vector
1439 * with 4 floats, square them. and validate the output. We then check the profiling logs to see if input/output
1440 * tensors are copied (CopyMemGeneric) or imported (SyncMemGeneric)
1441 * In this we create some misaligned buffers, copy them into a network and validate the output and number of
1442 * SynMemGeneric/CopyMemgeneric. Then we try the same network again with aligned buffers to make sure it switches
1443 * to importing correctly.
1444 */
1445 using namespace armnn;
1446
1447 IRuntime::CreationOptions options;
1448 IRuntimePtr runtime(IRuntime::Create(options));
1449
1450 // Builds up the structure of the network.
1451 INetworkPtr net(INetwork::Create());
1452 IConnectableLayer* input = net->AddInputLayer(0);
1453
1454 ActivationDescriptor descriptor;
1455 descriptor.m_Function = ActivationFunction::Square;
1456 IConnectableLayer* activationLayer = net->AddActivationLayer(descriptor);
1457
1458 IConnectableLayer* output = net->AddOutputLayer(0);
1459
1460 input->GetOutputSlot(0).Connect(activationLayer->GetInputSlot(0));
1461 activationLayer->GetOutputSlot(0).Connect(output->GetInputSlot(0));
1462 input->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32, 0.0f, 0, true));
1463 activationLayer->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32));
1464
1465 IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec());
1466 INFO("Load Network");
1467 // Load it into the runtime. It should pass.
1468 NetworkId netId;
1469 std::string ignoredErrorMessage;
1470 INetworkProperties networkProperties(false, MemorySource::Undefined, MemorySource::Undefined);
1471 CHECK(runtime->LoadNetwork(netId, std::move(optNet),ignoredErrorMessage, networkProperties)
1472 == Status::Success);
1473 INFO("Generate Data");
1474
1475 // This code looks a little funky but the idea is to create a buffer of floats but offset by the size of a char
1476 // this will guarantee that the resultant buffer is misaligned and thus should always be copied.
1477 auto inputMemPtr = std::malloc(4 * sizeof(float) + sizeof(char));
1478 float* misalignedInputPtr = reinterpret_cast<float*>(reinterpret_cast<char*>(inputMemPtr) + 1);
1479
1480 // Check if our pointer is truly misaligned
1481 uintptr_t alignment = GetDataTypeSize(DataType::Float32);
1482 CHECK (reinterpret_cast<uintptr_t>(misalignedInputPtr) % alignment);
Matthew Benthamc92bbd72022-02-10 11:12:34 +00001483 std::vector<float> inputValues
David Monahan16829712022-02-03 17:04:59 +00001484 {
Matthew Benthamc92bbd72022-02-10 11:12:34 +00001485 2.0f, 3.0f, 4.0f, 5.0f
1486 };
1487 std::memcpy(misalignedInputPtr, inputValues.data(), inputValues.size() * sizeof(float));
David Monahan16829712022-02-03 17:04:59 +00001488
1489 auto outputMemPtr = std::malloc(4 * sizeof(float) + sizeof(char));
1490 float* misalignedOutputPtr = reinterpret_cast<float*>(reinterpret_cast<char*>(outputMemPtr) + 1);
1491
1492 // Check if our pointer is truly misaligned
1493 CHECK (reinterpret_cast<uintptr_t>(misalignedOutputPtr) % alignment);
1494
1495 std::vector<float> expectedMisalignedOutput
1496 {
1497 4.0f, 9.0f, 16.0f, 25.0f
1498 };
1499
1500 INFO("Create Second Inference");
1501 InputTensors inputTensorsMisaligned
1502 {
1503 {0,armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), misalignedInputPtr)},
1504 };
1505 OutputTensors outputTensorsMisaligned
1506 {
1507 {0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), misalignedOutputPtr)}
1508 };
1509 runtime->GetProfiler(netId)->EnableProfiling(true);
1510 std::vector<ImportedInputId> importedInputIds =
1511 runtime->ImportInputs(netId, inputTensorsMisaligned, MemorySource::Malloc);
1512 std::vector<ImportedOutputId> importedOutputIds =
1513 runtime->ImportOutputs(netId, outputTensorsMisaligned, MemorySource::Malloc);
1514
1515 // Do the inference and force the import as the memory is misaligned.
1516 runtime->EnqueueWorkload(netId,
1517 inputTensorsMisaligned,
1518 outputTensorsMisaligned,
1519 importedInputIds,
1520 importedOutputIds);
1521
1522 // Retrieve the Profiler.AnalyzeEventsAndWriteResults() output to get the workload execution
1523 ProfilerManager& profilerManager = armnn::ProfilerManager::GetInstance();
1524 std::stringstream ss;
1525 profilerManager.GetProfiler()->AnalyzeEventsAndWriteResults(ss);
1526 std::string dump = ss.str();
1527
1528 // GpuAcc is a different case to CpuRef and CpuAcc, it doesn't use the buffer directly but instead maps it to a
1529 // new set of addresses within Gpu Memory. This will almost always be auto-aligned, so we don't need to check
1530 // for imports/copies. Only that the output is correct.
1531 if (backends[0] != Compute::GpuAcc)
1532 {
1533 // We can only copy so there should be no SyncMemGeneric
1534 int count = SubStringCounter(dump, "SyncMemGeneric");
1535 CHECK(count == 0);
1536 // Should only be CopyMemGeneric workloads as we copied all buffers
1537 count = SubStringCounter(dump, "CopyMemGeneric");
1538 CHECK(count >= 1);
1539 }
1540 // Check the output is correct
1541 unsigned int index = 0;
Matthew Benthamc92bbd72022-02-10 11:12:34 +00001542 std::vector<float> alignedOutput(expectedMisalignedOutput.size());
1543 std::memcpy(alignedOutput.data(), misalignedOutputPtr, expectedMisalignedOutput.size()*sizeof(float));
David Monahan16829712022-02-03 17:04:59 +00001544 for (auto outputValue : expectedMisalignedOutput)
1545 {
Matthew Benthamc92bbd72022-02-10 11:12:34 +00001546 CHECK(outputValue == alignedOutput[index]);
David Monahan16829712022-02-03 17:04:59 +00001547 ++index;
1548 }
1549 std::free(inputMemPtr);
1550 std::free(outputMemPtr);
1551
1552 // Creates structures for input & output
1553 std::vector<float> inputData
1554 {
1555 1.0f, 2.0f, 3.0f, 4.0f
1556 };
1557 std::vector<float> outputData(4);
1558 std::vector<float> expectedOutput
1559 {
1560 1.0f, 4.0f, 9.0f, 16.0f
1561 };
1562
1563 // Check our input and output pointers are actually aligned
1564 CHECK(!(reinterpret_cast<uintptr_t>(inputData.data()) % alignment));
1565 CHECK(!(reinterpret_cast<uintptr_t>(outputData.data()) % alignment));
1566
1567 INFO("Create Inference");
1568 InputTensors inputTensors
1569 {
1570 {0,armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData.data())},
1571 };
1572 OutputTensors outputTensors
1573 {
1574 {0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data())}
1575 };
1576
1577 importedInputIds = runtime->ImportInputs(netId, inputTensors, MemorySource::Malloc);
1578 importedOutputIds = runtime->ImportOutputs(netId, outputTensors, MemorySource::Malloc);
1579 // Do the inference and force the import as the memory is aligned.
1580 runtime->EnqueueWorkload(netId, inputTensors, outputTensors, importedInputIds, importedOutputIds);
1581
1582 // Retrieve the Profiler.AnalyzeEventsAndWriteResults() output to get the workload execution
1583 // We need to use AnalyzeEventsAndWriteResults here to make sure the second inference has been profiled
1584 profilerManager.GetProfiler()->AnalyzeEventsAndWriteResults(ss);
1585 dump = ss.str();
1586
1587 if (backends[0] == Compute::CpuAcc)
1588 {
1589 // Reconfigure has not been implemented for CpuAcc so it will always copy, this will break whenever
1590 // reconfigure is implemented
1591 int count = SubStringCounter(dump, "SyncMemGeneric");
1592 CHECK(count == 0);
1593 // Should be 2 CopyMemGeneric workloads
1594 count = SubStringCounter(dump, "CopyMemGeneric");
1595 CHECK(count >= 1);
1596 }
1597 else
1598 {
1599 // Repeated inferences make it difficult to check for an accurate count. So we just validate that we have a
1600 // SyncMemGeneric Workload when we previously didn't
1601 int count = SubStringCounter(dump, "SyncMemGeneric");
1602 CHECK(count >= 1);
1603 // Should still be some CopyMemGeneric Workloads from the last inference
1604 count = SubStringCounter(dump, "CopyMemGeneric");
1605 CHECK(count >= 1);
1606 }
1607 // Check the output is correct
1608 CHECK(std::equal(outputData.begin(), outputData.end(), expectedOutput.begin(), expectedOutput.end()));
1609 // Clean up to avoid interfering with other tests
1610 runtime->UnloadNetwork(netId);
1611}
1612
Nattapat Chaimanowong1fcb4ff2019-01-24 15:25:26 +00001613} // anonymous namespace