blob: bd5466ac04b732889ef89070ea5db1e9c052a656 [file] [log] [blame]
Aron Virginas-Tar70104002018-10-24 15:33:28 +01001//
Teresa Charlindf15c4e2023-02-21 15:16:09 +00002// Copyright © 2017-2023 Arm Ltd and Contributors. All rights reserved.
Aron Virginas-Tar70104002018-10-24 15:33:28 +01003// SPDX-License-Identifier: MIT
4//
5#pragma once
6
Sadik Armagana097d2a2021-11-24 15:47:28 +00007#include <CommonTestUtils.hpp>
Mike Kelly386ff1a2021-03-29 15:04:50 +01008
Matthew Bentham246bd462020-01-20 16:16:06 +00009#include <armnn/Descriptors.hpp>
narpra01b9546cf2018-11-20 15:21:28 +000010#include <armnn/INetwork.hpp>
Matthew Bentham246bd462020-01-20 16:16:06 +000011#include <armnn/IRuntime.hpp>
Aron Virginas-Tar70104002018-10-24 15:33:28 +010012
Aron Virginas-Tar48623a02019-10-22 10:00:28 +010013#include <Profiling.hpp>
Colm Donelanc42a9872022-02-02 16:35:09 +000014#include <armnnUtils/QuantizeHelper.hpp>
Aron Virginas-Tar48623a02019-10-22 10:00:28 +010015#include <ResolveType.hpp>
Aron Virginas-Tar70104002018-10-24 15:33:28 +010016
Sadik Armagan1625efc2021-06-10 18:24:34 +010017#include <doctest/doctest.h>
narpra01b9546cf2018-11-20 15:21:28 +000018
Aron Virginas-Tar70104002018-10-24 15:33:28 +010019#include <vector>
20
21namespace
22{
23
24using namespace armnn;
25
26template<typename T>
27bool ConstantUsageTest(const std::vector<BackendId>& computeDevice,
28 const TensorInfo& commonTensorInfo,
29 const std::vector<T>& inputData,
30 const std::vector<T>& constantData,
31 const std::vector<T>& expectedOutputData)
32{
33 // Create runtime in which test will run
34 IRuntime::CreationOptions options;
35 IRuntimePtr runtime(IRuntime::Create(options));
36
37 // Builds up the structure of the network.
38 INetworkPtr net(INetwork::Create());
39
40 IConnectableLayer* input = net->AddInputLayer(0);
41 IConnectableLayer* constant = net->AddConstantLayer(ConstTensor(commonTensorInfo, constantData));
Mike Kelly2c14db62023-03-15 15:06:23 +000042 ARMNN_NO_DEPRECATE_WARN_BEGIN
Mike Kelly1a05aad2023-03-31 18:00:00 +010043 IConnectableLayer* add = net->AddAdditionLayer();
Mike Kelly2c14db62023-03-15 15:06:23 +000044 ARMNN_NO_DEPRECATE_WARN_END
Aron Virginas-Tar70104002018-10-24 15:33:28 +010045 IConnectableLayer* output = net->AddOutputLayer(0);
46
47 input->GetOutputSlot(0).Connect(add->GetInputSlot(0));
48 constant->GetOutputSlot(0).Connect(add->GetInputSlot(1));
49 add->GetOutputSlot(0).Connect(output->GetInputSlot(0));
50
51 // Sets the tensors in the network.
52 input->GetOutputSlot(0).SetTensorInfo(commonTensorInfo);
53 constant->GetOutputSlot(0).SetTensorInfo(commonTensorInfo);
54 add->GetOutputSlot(0).SetTensorInfo(commonTensorInfo);
55
56 // optimize the network
57 IOptimizedNetworkPtr optNet = Optimize(*net, computeDevice, runtime->GetDeviceSpec());
58
59 // Loads it into the runtime.
60 NetworkId netId;
Teresa Charlindf15c4e2023-02-21 15:16:09 +000061 std::string errorMessage;
62 armnn::Status loadingStatus = runtime->LoadNetwork(netId, std::move(optNet), errorMessage);
63 CHECK_MESSAGE(loadingStatus == Status::Success, errorMessage);
Aron Virginas-Tar70104002018-10-24 15:33:28 +010064
65 // Creates structures for input & output.
66 std::vector<T> outputData(inputData.size());
67
68 InputTensors inputTensors
69 {
70 {0, ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData.data())}
71 };
72 OutputTensors outputTensors
73 {
74 {0, Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data())}
75 };
76
77 // Does the inference.
78 runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
79
80 // Checks the results.
81 return outputData == expectedOutputData;
82}
83
84inline bool ConstantUsageFloat32Test(const std::vector<BackendId>& backends)
85{
Cathal Corbett5b8093c2021-10-22 11:12:07 +010086 TensorInfo commonTensorInfo({ 2, 3 }, DataType::Float32);
87 commonTensorInfo.SetConstant(true);
Aron Virginas-Tar70104002018-10-24 15:33:28 +010088
89 return ConstantUsageTest(backends,
90 commonTensorInfo,
91 std::vector<float>{ 1.f, 2.f, 3.f, 4.f, 5.f, 6.f }, // Input.
92 std::vector<float>{ 6.f, 5.f, 4.f, 3.f, 2.f, 1.f }, // Const input.
93 std::vector<float>{ 7.f, 7.f, 7.f, 7.f, 7.f, 7.f } // Expected output.
94 );
95}
96
97inline bool ConstantUsageUint8Test(const std::vector<BackendId>& backends)
98{
Derek Lambertif90c56d2020-01-10 17:14:08 +000099 TensorInfo commonTensorInfo({ 2, 3 }, DataType::QAsymmU8);
Aron Virginas-Tar70104002018-10-24 15:33:28 +0100100
101 const float scale = 0.023529f;
102 const int8_t offset = -43;
103
104 commonTensorInfo.SetQuantizationScale(scale);
105 commonTensorInfo.SetQuantizationOffset(offset);
Cathal Corbett5b8093c2021-10-22 11:12:07 +0100106 commonTensorInfo.SetConstant(true);
Aron Virginas-Tar70104002018-10-24 15:33:28 +0100107
108 return ConstantUsageTest(backends,
109 commonTensorInfo,
Aron Virginas-Tar48623a02019-10-22 10:00:28 +0100110 armnnUtils::QuantizedVector<uint8_t>({ 1.f, 2.f, 3.f, 4.f, 5.f, 6.f }, scale, offset), // Input.
111 armnnUtils::QuantizedVector<uint8_t>({ 6.f, 5.f, 4.f, 3.f, 2.f, 1.f }, scale, offset), // Const input.
112 armnnUtils::QuantizedVector<uint8_t>({ 7.f, 7.f, 7.f, 7.f, 7.f, 7.f }, scale, offset) // Expected output.
Aron Virginas-Tar70104002018-10-24 15:33:28 +0100113 );
114}
115
Ferran Balaguer83239f92019-09-19 11:49:25 +0100116// Utility function to find the number of instances of a substring within a string.
117int SubStringCounter(std::string& string, std::string&& substring)
118{
119 std::size_t found = 0;
120 int count = 0;
121 // Look for the substring starting from where we last found the substring
122 while((found = string.find(substring, found)) != std::string::npos)
123 {
124 count++;
125 // Offset by substring length to avoid finding the same substring twice
126 found += substring.length();
127 }
128 return count;
129}
130
Nattapat Chaimanowong1fcb4ff2019-01-24 15:25:26 +0000131template<DataType ArmnnIType, DataType ArmnnOType,
132 typename TInput = ResolveType<ArmnnIType>, typename TOutput = ResolveType<ArmnnOType>>
narpra01b9546cf2018-11-20 15:21:28 +0000133void EndToEndLayerTestImpl(INetworkPtr network,
kevmay012b4d88e2019-01-24 14:05:09 +0000134 const std::map<int, std::vector<TInput>>& inputTensorData,
135 const std::map<int, std::vector<TOutput>>& expectedOutputData,
Jan Eilersbca73e12020-03-11 12:52:46 +0000136 std::vector<BackendId> backends,
137 float tolerance = 0.000001f)
narpra01b9546cf2018-11-20 15:21:28 +0000138{
139 // Create runtime in which test will run
140 IRuntime::CreationOptions options;
141 IRuntimePtr runtime(IRuntime::Create(options));
142
143 // optimize the network
144 IOptimizedNetworkPtr optNet = Optimize(*network, backends, runtime->GetDeviceSpec());
145
146 // Loads it into the runtime.
147 NetworkId netId;
Teresa Charlindf15c4e2023-02-21 15:16:09 +0000148 std::string errorMessage;
149 armnn::Status loadingStatus = runtime->LoadNetwork(netId, std::move(optNet), errorMessage);
150 CHECK_MESSAGE(loadingStatus == Status::Success, errorMessage);
narpra01b9546cf2018-11-20 15:21:28 +0000151
152 InputTensors inputTensors;
153 inputTensors.reserve(inputTensorData.size());
154 for (auto&& it : inputTensorData)
155 {
156 inputTensors.push_back({it.first,
157 ConstTensor(runtime->GetInputTensorInfo(netId, it.first), it.second.data())});
158 }
159 OutputTensors outputTensors;
160 outputTensors.reserve(expectedOutputData.size());
kevmay012b4d88e2019-01-24 14:05:09 +0000161 std::map<int, std::vector<TOutput>> outputStorage;
narpra01b9546cf2018-11-20 15:21:28 +0000162 for (auto&& it : expectedOutputData)
163 {
kevmay012b4d88e2019-01-24 14:05:09 +0000164 std::vector<TOutput> out(it.second.size());
narpra01b9546cf2018-11-20 15:21:28 +0000165 outputStorage.emplace(it.first, out);
166 outputTensors.push_back({it.first,
167 Tensor(runtime->GetOutputTensorInfo(netId, it.first),
168 outputStorage.at(it.first).data())});
169 }
170
171 // Does the inference.
172 runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
173
174 // Checks the results.
175 for (auto&& it : expectedOutputData)
176 {
kevmay012b4d88e2019-01-24 14:05:09 +0000177 std::vector<TOutput> out = outputStorage.at(it.first);
Aron Virginas-Tarf97f6da2019-10-01 18:35:44 +0100178 for (unsigned int i = 0; i < out.size(); ++i)
Nattapat Chaimanowong1fcb4ff2019-01-24 15:25:26 +0000179 {
Sadik Armagan1625efc2021-06-10 18:24:34 +0100180 CHECK_MESSAGE(Compare<ArmnnOType>(it.second[i], out[i], tolerance) == true,
Mike Kelly1a05aad2023-03-31 18:00:00 +0100181 "Actual output: " << out[i] << ". Expected output:" << it.second[i]);
Teresa Charlin2e3f4d22020-07-29 14:29:20 +0100182
Nattapat Chaimanowong1fcb4ff2019-01-24 15:25:26 +0000183 }
narpra01b9546cf2018-11-20 15:21:28 +0000184 }
185}
186
David Monahan4f1e8e42019-09-04 09:22:10 +0100187inline void ImportNonAlignedInputPointerTest(std::vector<BackendId> backends)
Ferran Balaguerdcaa6102019-08-21 13:28:38 +0100188{
189 using namespace armnn;
190
191 // Create runtime in which test will run
192 IRuntime::CreationOptions options;
193 IRuntimePtr runtime(armnn::IRuntime::Create(options));
194
195 // build up the structure of the network
196 INetworkPtr net(INetwork::Create());
197
198 IConnectableLayer* input = net->AddInputLayer(0);
199
David Monahan3fb7e102019-08-20 11:25:29 +0100200 ActivationDescriptor descriptor;
201 descriptor.m_Function = ActivationFunction::Square;
202 IConnectableLayer* pooling = net->AddActivationLayer(descriptor);
Ferran Balaguerdcaa6102019-08-21 13:28:38 +0100203
204 IConnectableLayer* output = net->AddOutputLayer(0);
205
David Monahan3fb7e102019-08-20 11:25:29 +0100206 input->GetOutputSlot(0).Connect(pooling->GetInputSlot(0));
207 pooling->GetOutputSlot(0).Connect(output->GetInputSlot(0));
Ferran Balaguerdcaa6102019-08-21 13:28:38 +0100208
Cathal Corbett5b8093c2021-10-22 11:12:07 +0100209 input->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32, 0.0f, 0, true));
David Monahan3fb7e102019-08-20 11:25:29 +0100210 pooling->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32));
Ferran Balaguerdcaa6102019-08-21 13:28:38 +0100211
212 // Optimize the network
John Mcloughlinc5ee0d72023-03-24 12:07:25 +0000213 OptimizerOptionsOpaque optimizedOptions;
214 optimizedOptions.SetImportEnabled(true);
Francis Murtagh626bd902022-06-21 13:16:23 +0000215 IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec(), optimizedOptions);
Sadik Armagan1625efc2021-06-10 18:24:34 +0100216 CHECK(optNet);
Ferran Balaguerdcaa6102019-08-21 13:28:38 +0100217
218 // Loads it into the runtime.
219 NetworkId netId;
Teresa Charlindf15c4e2023-02-21 15:16:09 +0000220 std::string errorMessage;
David Monahan4f1e8e42019-09-04 09:22:10 +0100221 // Enable Importing
Francis Murtagh73d3e2e2021-04-29 14:23:04 +0100222 INetworkProperties networkProperties(false, MemorySource::Malloc, MemorySource::Undefined);
Teresa Charlindf15c4e2023-02-21 15:16:09 +0000223 armnn::Status loadingStatus = runtime->LoadNetwork(netId, std::move(optNet), errorMessage, networkProperties);
224 CHECK_MESSAGE(loadingStatus == Status::Success, errorMessage);
Ferran Balaguerdcaa6102019-08-21 13:28:38 +0100225
226 // Creates structures for input & output
227 std::vector<float> inputData
228 {
David Monahan3fb7e102019-08-20 11:25:29 +0100229 1.0f, 2.0f, 3.0f, 4.0f
Ferran Balaguerdcaa6102019-08-21 13:28:38 +0100230 };
231
232 // Misaligned input
Aron Virginas-Tard9f7c8b2019-09-13 13:37:03 +0100233 float* misalignedInputData = reinterpret_cast<float*>(reinterpret_cast<char*>(inputData.data()) + 1);
Ferran Balaguerdcaa6102019-08-21 13:28:38 +0100234
David Monahan3fb7e102019-08-20 11:25:29 +0100235 std::vector<float> outputData(4);
Ferran Balaguerdcaa6102019-08-21 13:28:38 +0100236
David Monahan4f1e8e42019-09-04 09:22:10 +0100237 // Aligned output
David Monahan3fb7e102019-08-20 11:25:29 +0100238 float* alignedOutputData = outputData.data();
David Monahan4f1e8e42019-09-04 09:22:10 +0100239
240 InputTensors inputTensors
241 {
242 {0,armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), misalignedInputData)},
243 };
244 OutputTensors outputTensors
245 {
246 {0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), alignedOutputData)}
247 };
248
David Monahan4f1e8e42019-09-04 09:22:10 +0100249 runtime->GetProfiler(netId)->EnableProfiling(true);
250
251 // Do the inference and expect it to fail with a ImportMemoryException
Sadik Armagan1625efc2021-06-10 18:24:34 +0100252 CHECK_THROWS_AS(runtime->EnqueueWorkload(netId, inputTensors, outputTensors), MemoryImportException);
David Monahan4f1e8e42019-09-04 09:22:10 +0100253}
254
Ferran Balaguer83239f92019-09-19 11:49:25 +0100255inline void ExportNonAlignedOutputPointerTest(std::vector<BackendId> backends)
David Monahan4f1e8e42019-09-04 09:22:10 +0100256{
257 using namespace armnn;
258
259 // Create runtime in which test will run
260 IRuntime::CreationOptions options;
261 IRuntimePtr runtime(armnn::IRuntime::Create(options));
262
263 // build up the structure of the network
264 INetworkPtr net(INetwork::Create());
265
266 IConnectableLayer* input = net->AddInputLayer(0);
267
David Monahan3fb7e102019-08-20 11:25:29 +0100268 ActivationDescriptor descriptor;
269 descriptor.m_Function = ActivationFunction::Square;
270 IConnectableLayer* pooling = net->AddActivationLayer(descriptor);
David Monahan4f1e8e42019-09-04 09:22:10 +0100271
272 IConnectableLayer* output = net->AddOutputLayer(0);
273
David Monahan3fb7e102019-08-20 11:25:29 +0100274 input->GetOutputSlot(0).Connect(pooling->GetInputSlot(0));
275 pooling->GetOutputSlot(0).Connect(output->GetInputSlot(0));
David Monahan4f1e8e42019-09-04 09:22:10 +0100276
Cathal Corbett5b8093c2021-10-22 11:12:07 +0100277 input->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32, 0.0f, 0, true));
David Monahan3fb7e102019-08-20 11:25:29 +0100278 pooling->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32));
David Monahan4f1e8e42019-09-04 09:22:10 +0100279
280 // Optimize the network
John Mcloughlinc5ee0d72023-03-24 12:07:25 +0000281 OptimizerOptionsOpaque optimizedOptions;
282 optimizedOptions.SetImportEnabled(true);
283 optimizedOptions.SetExportEnabled(true);
Francis Murtagh626bd902022-06-21 13:16:23 +0000284 IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec(), optimizedOptions);
Sadik Armagan1625efc2021-06-10 18:24:34 +0100285 CHECK(optNet);
David Monahan4f1e8e42019-09-04 09:22:10 +0100286
287 // Loads it into the runtime.
288 NetworkId netId;
Teresa Charlindf15c4e2023-02-21 15:16:09 +0000289 std::string errorMessage;
David Monahan3fb7e102019-08-20 11:25:29 +0100290 // Enable Importing and Exporting
Francis Murtagh73d3e2e2021-04-29 14:23:04 +0100291 INetworkProperties networkProperties(false, MemorySource::Malloc, MemorySource::Malloc);
Teresa Charlindf15c4e2023-02-21 15:16:09 +0000292 armnn::Status loadingStatus = runtime->LoadNetwork(netId, std::move(optNet), errorMessage, networkProperties);
293 CHECK_MESSAGE(loadingStatus == Status::Success, errorMessage);
David Monahan4f1e8e42019-09-04 09:22:10 +0100294
295 // Creates structures for input & output
296 std::vector<float> inputData
297 {
298 1.0f, 2.0f, 3.0f, 4.0f, 5.0f
299 };
300
301 // Aligned input
David Monahan3fb7e102019-08-20 11:25:29 +0100302 float* alignedInputData = inputData.data();
David Monahan4f1e8e42019-09-04 09:22:10 +0100303
304 std::vector<float> outputData(5);
305
Ferran Balaguerdcaa6102019-08-21 13:28:38 +0100306 // Misaligned output
Aron Virginas-Tard9f7c8b2019-09-13 13:37:03 +0100307 float* misalignedOutputData = reinterpret_cast<float*>(reinterpret_cast<char*>(outputData.data()) + 1);
Ferran Balaguerdcaa6102019-08-21 13:28:38 +0100308
309 InputTensors inputTensors
310 {
David Monahan4f1e8e42019-09-04 09:22:10 +0100311 {0,armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), alignedInputData)},
Ferran Balaguerdcaa6102019-08-21 13:28:38 +0100312 };
313 OutputTensors outputTensors
314 {
315 {0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), misalignedOutputData)}
316 };
317
Ferran Balaguer83239f92019-09-19 11:49:25 +0100318 // Do the inference and expect it to fail with a ExportMemoryException
319 if (backends[0] == Compute::CpuAcc)
320 {
321 // For CpuAcc the NeonTensorHandle will throw its own exception on misaligned memory
Sadik Armagan1625efc2021-06-10 18:24:34 +0100322 CHECK_THROWS_AS(runtime->EnqueueWorkload(netId, inputTensors, outputTensors), MemoryImportException);
Ferran Balaguer83239f92019-09-19 11:49:25 +0100323 }
324 else
325 {
Sadik Armagan1625efc2021-06-10 18:24:34 +0100326 CHECK_THROWS_AS(runtime->EnqueueWorkload(netId, inputTensors, outputTensors), MemoryExportException);
Ferran Balaguer83239f92019-09-19 11:49:25 +0100327 }
Ferran Balaguerdcaa6102019-08-21 13:28:38 +0100328}
329
330inline void ImportAlignedPointerTest(std::vector<BackendId> backends)
331{
332 using namespace armnn;
333
334 // Create runtime in which test will run
335 IRuntime::CreationOptions options;
336 IRuntimePtr runtime(armnn::IRuntime::Create(options));
337
338 // build up the structure of the network
339 INetworkPtr net(INetwork::Create());
340
341 IConnectableLayer* input = net->AddInputLayer(0);
342
David Monahan3fb7e102019-08-20 11:25:29 +0100343 ActivationDescriptor descriptor;
344 descriptor.m_Function = ActivationFunction::Square;
345 IConnectableLayer* pooling = net->AddActivationLayer(descriptor);
Ferran Balaguerdcaa6102019-08-21 13:28:38 +0100346
347 IConnectableLayer* output = net->AddOutputLayer(0);
348
David Monahan3fb7e102019-08-20 11:25:29 +0100349 input->GetOutputSlot(0).Connect(pooling->GetInputSlot(0));
350 pooling->GetOutputSlot(0).Connect(output->GetInputSlot(0));
Ferran Balaguerdcaa6102019-08-21 13:28:38 +0100351
Cathal Corbett5b8093c2021-10-22 11:12:07 +0100352 input->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32, 0.0f, 0, true));
David Monahan3fb7e102019-08-20 11:25:29 +0100353 pooling->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32));
Ferran Balaguerdcaa6102019-08-21 13:28:38 +0100354
355 // Optimize the network
John Mcloughlinc5ee0d72023-03-24 12:07:25 +0000356 OptimizerOptionsOpaque optimizedOptions;
357 optimizedOptions.SetImportEnabled(true);
358 optimizedOptions.SetExportEnabled(true);
Francis Murtagh626bd902022-06-21 13:16:23 +0000359 IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec(), optimizedOptions);
Sadik Armagan1625efc2021-06-10 18:24:34 +0100360 CHECK(optNet);
Ferran Balaguerdcaa6102019-08-21 13:28:38 +0100361
362 // Loads it into the runtime.
363 NetworkId netId;
Teresa Charlindf15c4e2023-02-21 15:16:09 +0000364 std::string errorMessage;
David Monahan4f1e8e42019-09-04 09:22:10 +0100365 // Enable Importing
Francis Murtagh73d3e2e2021-04-29 14:23:04 +0100366 INetworkProperties networkProperties(false, MemorySource::Malloc, MemorySource::Malloc);
Teresa Charlindf15c4e2023-02-21 15:16:09 +0000367 armnn::Status loadingStatus = runtime->LoadNetwork(netId, std::move(optNet), errorMessage, networkProperties);
368 CHECK_MESSAGE(loadingStatus == Status::Success, errorMessage);
Ferran Balaguerdcaa6102019-08-21 13:28:38 +0100369
370 // Creates structures for input & output
371 std::vector<float> inputData
372 {
373 1.0f, 2.0f, 3.0f, 4.0f
374 };
375
376 std::vector<float> outputData(4);
377
James Conroy57d10b72019-10-25 09:44:14 +0100378 std::vector<float> expectedOutput
379 {
380 1.0f, 4.0f, 9.0f, 16.0f
381 };
382
Ferran Balaguerdcaa6102019-08-21 13:28:38 +0100383 InputTensors inputTensors
384 {
385 {0,armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData.data())},
386 };
387 OutputTensors outputTensors
388 {
389 {0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data())}
390 };
391
Ferran Balaguerdcaa6102019-08-21 13:28:38 +0100392 runtime->GetProfiler(netId)->EnableProfiling(true);
393
394 // Do the inference
395 runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
396
397 // Retrieve the Profiler.Print() output to get the workload execution
398 ProfilerManager& profilerManager = armnn::ProfilerManager::GetInstance();
399 std::stringstream ss;
David Monahan16829712022-02-03 17:04:59 +0000400 profilerManager.GetProfiler()->Print(ss);
Ferran Balaguerdcaa6102019-08-21 13:28:38 +0100401 std::string dump = ss.str();
402
David Monahan3fb7e102019-08-20 11:25:29 +0100403 // Contains ActivationWorkload
404 std::size_t found = dump.find("ActivationWorkload");
Sadik Armagan1625efc2021-06-10 18:24:34 +0100405 CHECK(found != std::string::npos);
James Conroy57d10b72019-10-25 09:44:14 +0100406
Ferran Balaguerdcaa6102019-08-21 13:28:38 +0100407 // Contains SyncMemGeneric
408 found = dump.find("SyncMemGeneric");
Sadik Armagan1625efc2021-06-10 18:24:34 +0100409 CHECK(found != std::string::npos);
James Conroy57d10b72019-10-25 09:44:14 +0100410
Ferran Balaguer83239f92019-09-19 11:49:25 +0100411 // Does not contain CopyMemGeneric
Ferran Balaguerdcaa6102019-08-21 13:28:38 +0100412 found = dump.find("CopyMemGeneric");
Sadik Armagan1625efc2021-06-10 18:24:34 +0100413 CHECK(found == std::string::npos);
James Conroy57d10b72019-10-25 09:44:14 +0100414
415 // Check output is as expected
Sadik Armagan1625efc2021-06-10 18:24:34 +0100416 CHECK(outputData == expectedOutput);
Ferran Balaguerdcaa6102019-08-21 13:28:38 +0100417}
418
Ferran Balaguer83239f92019-09-19 11:49:25 +0100419inline void ImportOnlyWorkload(std::vector<BackendId> backends)
420{
421 using namespace armnn;
422
423 IRuntime::CreationOptions options;
424 IRuntimePtr runtime(IRuntime::Create(options));
425
426 // Builds up the structure of the network.
427 INetworkPtr net(INetwork::Create());
428
429 IConnectableLayer* input = net->AddInputLayer(0);
430
431 ActivationDescriptor descriptor;
432 descriptor.m_Function = ActivationFunction::Square;
433 IConnectableLayer* pooling = net->AddActivationLayer(descriptor);
434
435 IConnectableLayer* output = net->AddOutputLayer(0);
436
437 input->GetOutputSlot(0).Connect(pooling->GetInputSlot(0));
438 pooling->GetOutputSlot(0).Connect(output->GetInputSlot(0));
439
Cathal Corbett5b8093c2021-10-22 11:12:07 +0100440 input->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32, 0.0f, 0, true));
Ferran Balaguer83239f92019-09-19 11:49:25 +0100441 pooling->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32));
442
443 // optimize the network
John Mcloughlinc5ee0d72023-03-24 12:07:25 +0000444 OptimizerOptionsOpaque optimizedOptions;
445 optimizedOptions.SetImportEnabled(true);
Francis Murtagh626bd902022-06-21 13:16:23 +0000446 IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec(), optimizedOptions);
Ferran Balaguer83239f92019-09-19 11:49:25 +0100447
Sadik Armagan1625efc2021-06-10 18:24:34 +0100448 INFO("Load Network");
Ferran Balaguer83239f92019-09-19 11:49:25 +0100449 // Load it into the runtime. It should pass.
450 NetworkId netId;
Teresa Charlindf15c4e2023-02-21 15:16:09 +0000451 std::string errorMessage;
Francis Murtagh73d3e2e2021-04-29 14:23:04 +0100452 INetworkProperties networkProperties(false, MemorySource::Malloc, MemorySource::Undefined);
Teresa Charlindf15c4e2023-02-21 15:16:09 +0000453 armnn::Status loadingStatus = runtime->LoadNetwork(netId, std::move(optNet), errorMessage, networkProperties);
454 CHECK_MESSAGE(loadingStatus == Status::Success, errorMessage);
Ferran Balaguer83239f92019-09-19 11:49:25 +0100455
Sadik Armagan1625efc2021-06-10 18:24:34 +0100456 INFO("Generate Data");
Ferran Balaguer83239f92019-09-19 11:49:25 +0100457 // Creates structures for input & output
458 std::vector<float> inputData
459 {
460 1.0f, 2.0f, 3.0f, 4.0f
461 };
462
463 std::vector<float> outputData(4);
464
465 std::vector<float> expectedOutput
466 {
467 1.0f, 4.0f, 9.0f, 16.0f
468 };
469
David Monahan646bc8a2022-01-31 14:29:14 +0000470 INFO("Create Inference");
Cathal Corbett5b8093c2021-10-22 11:12:07 +0100471
Ferran Balaguer83239f92019-09-19 11:49:25 +0100472 InputTensors inputTensors
473 {
474 {0,armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData.data())},
475 };
476 OutputTensors outputTensors
477 {
478 {0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data())}
479 };
480
Sadik Armagan1625efc2021-06-10 18:24:34 +0100481 INFO("Get Profiler");
Ferran Balaguer83239f92019-09-19 11:49:25 +0100482 runtime->GetProfiler(netId)->EnableProfiling(true);
483
Sadik Armagan1625efc2021-06-10 18:24:34 +0100484 INFO("Run Inference");
Ferran Balaguer83239f92019-09-19 11:49:25 +0100485 // Do the inference
486 runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
487
Sadik Armagan1625efc2021-06-10 18:24:34 +0100488 INFO("Print Profiler");
Ferran Balaguer83239f92019-09-19 11:49:25 +0100489 // Retrieve the Profiler.Print() output to get the workload execution
490 ProfilerManager& profilerManager = armnn::ProfilerManager::GetInstance();
491 std::stringstream ss;
David Monahan16829712022-02-03 17:04:59 +0000492 profilerManager.GetProfiler()->Print(ss);
Ferran Balaguer83239f92019-09-19 11:49:25 +0100493 std::string dump = ss.str();
494
495 // Check there are no SyncMemGeneric workloads as we didn't export
Sadik Armagan1625efc2021-06-10 18:24:34 +0100496 INFO("Find SyncMemGeneric");
Ferran Balaguer83239f92019-09-19 11:49:25 +0100497 int count = SubStringCounter(dump, "SyncMemGeneric");
Sadik Armagan1625efc2021-06-10 18:24:34 +0100498 CHECK(count == 0);
Ferran Balaguer83239f92019-09-19 11:49:25 +0100499
500 // Should only be 1 CopyMemGeneric for the output as we imported
Sadik Armagan1625efc2021-06-10 18:24:34 +0100501 INFO("Find CopyMemGeneric");
Ferran Balaguer83239f92019-09-19 11:49:25 +0100502 count = SubStringCounter(dump, "CopyMemGeneric");
Sadik Armagan1625efc2021-06-10 18:24:34 +0100503 CHECK(count == 1);
Ferran Balaguer83239f92019-09-19 11:49:25 +0100504
505 // Check the output is correct
Sadik Armagan1625efc2021-06-10 18:24:34 +0100506 CHECK(std::equal(outputData.begin(), outputData.end(), expectedOutput.begin(), expectedOutput.end()));
Ferran Balaguer83239f92019-09-19 11:49:25 +0100507}
508
509inline void ExportOnlyWorkload(std::vector<BackendId> backends)
510{
511 using namespace armnn;
512
513 IRuntime::CreationOptions options;
514 IRuntimePtr runtime(IRuntime::Create(options));
515
516 // Builds up the structure of the network.
517 INetworkPtr net(INetwork::Create());
518
519 IConnectableLayer* input = net->AddInputLayer(0);
520
521 ActivationDescriptor descriptor;
522 descriptor.m_Function = ActivationFunction::Square;
523 IConnectableLayer* pooling = net->AddActivationLayer(descriptor);
524
525 IConnectableLayer* output = net->AddOutputLayer(0);
526
527 input->GetOutputSlot(0).Connect(pooling->GetInputSlot(0));
528 pooling->GetOutputSlot(0).Connect(output->GetInputSlot(0));
529
Cathal Corbett5b8093c2021-10-22 11:12:07 +0100530 input->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32, 0.0f, 0, true));
Ferran Balaguer83239f92019-09-19 11:49:25 +0100531 pooling->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32));
532
533 // optimize the network
John Mcloughlinc5ee0d72023-03-24 12:07:25 +0000534 OptimizerOptionsOpaque optimizedOptions;
535 optimizedOptions.SetExportEnabled(true);
Francis Murtagh626bd902022-06-21 13:16:23 +0000536 IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec(), optimizedOptions);
Ferran Balaguer83239f92019-09-19 11:49:25 +0100537
Sadik Armagan1625efc2021-06-10 18:24:34 +0100538 INFO("Load Network");
Ferran Balaguer83239f92019-09-19 11:49:25 +0100539 // Load it into the runtime. It should pass.
540 NetworkId netId;
Teresa Charlindf15c4e2023-02-21 15:16:09 +0000541 std::string errorMessage;
Francis Murtagh73d3e2e2021-04-29 14:23:04 +0100542 INetworkProperties networkProperties(false, MemorySource::Undefined, MemorySource::Malloc);
Teresa Charlindf15c4e2023-02-21 15:16:09 +0000543 armnn::Status loadingStatus = runtime->LoadNetwork(netId, std::move(optNet), errorMessage, networkProperties);
544 CHECK_MESSAGE(loadingStatus == Status::Success, errorMessage);
Ferran Balaguer83239f92019-09-19 11:49:25 +0100545
Sadik Armagan1625efc2021-06-10 18:24:34 +0100546 INFO("Generate Data");
Ferran Balaguer83239f92019-09-19 11:49:25 +0100547 // Creates structures for input & output
548 std::vector<float> inputData
549 {
550 1.0f, 2.0f, 3.0f, 4.0f
551 };
552
553 std::vector<float> outputData(4);
554
555 std::vector<float> expectedOutput
556 {
557 1.0f, 4.0f, 9.0f, 16.0f
558 };
559
David Monahan646bc8a2022-01-31 14:29:14 +0000560 INFO("Create Inference");
Cathal Corbett5b8093c2021-10-22 11:12:07 +0100561
Ferran Balaguer83239f92019-09-19 11:49:25 +0100562 InputTensors inputTensors
563 {
564 {0,armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData.data())},
565 };
566 OutputTensors outputTensors
567 {
568 {0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data())}
569 };
570
Sadik Armagan1625efc2021-06-10 18:24:34 +0100571 INFO("Get Profiler");
Ferran Balaguer83239f92019-09-19 11:49:25 +0100572 runtime->GetProfiler(netId)->EnableProfiling(true);
573
Sadik Armagan1625efc2021-06-10 18:24:34 +0100574 INFO("Run Inference");
Ferran Balaguer83239f92019-09-19 11:49:25 +0100575 // Do the inference
576 runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
577
Sadik Armagan1625efc2021-06-10 18:24:34 +0100578 INFO("Print Profiler");
Ferran Balaguer83239f92019-09-19 11:49:25 +0100579 // Retrieve the Profiler.Print() output to get the workload execution
580 ProfilerManager& profilerManager = armnn::ProfilerManager::GetInstance();
581 std::stringstream ss;
David Monahan16829712022-02-03 17:04:59 +0000582 profilerManager.GetProfiler()->Print(ss);
Ferran Balaguer83239f92019-09-19 11:49:25 +0100583 std::string dump = ss.str();
584
585 // Check there is a SyncMemGeneric workload as we exported
Sadik Armagan1625efc2021-06-10 18:24:34 +0100586 INFO("Find SyncMemGeneric");
Ferran Balaguer83239f92019-09-19 11:49:25 +0100587 int count = SubStringCounter(dump, "SyncMemGeneric");
Sadik Armagan1625efc2021-06-10 18:24:34 +0100588 CHECK(count == 1);
Ferran Balaguer83239f92019-09-19 11:49:25 +0100589
590 // Should be 1 CopyMemGeneric for the output as we did not import
Sadik Armagan1625efc2021-06-10 18:24:34 +0100591 INFO("Find CopyMemGeneric");
Ferran Balaguer83239f92019-09-19 11:49:25 +0100592 count = SubStringCounter(dump, "CopyMemGeneric");
Sadik Armagan1625efc2021-06-10 18:24:34 +0100593 CHECK(count == 1);
Ferran Balaguer83239f92019-09-19 11:49:25 +0100594
595 // Check the output is correct
Sadik Armagan1625efc2021-06-10 18:24:34 +0100596 CHECK(std::equal(outputData.begin(), outputData.end(), expectedOutput.begin(), expectedOutput.end()));
Ferran Balaguer83239f92019-09-19 11:49:25 +0100597}
598
599inline void ImportAndExportWorkload(std::vector<BackendId> backends)
600{
601 using namespace armnn;
602
603 IRuntime::CreationOptions options;
604 IRuntimePtr runtime(IRuntime::Create(options));
605
606 // Builds up the structure of the network.
607 INetworkPtr net(INetwork::Create());
608
609 IConnectableLayer* input = net->AddInputLayer(0);
610
611 ActivationDescriptor descriptor;
612 descriptor.m_Function = ActivationFunction::Square;
613 IConnectableLayer* pooling = net->AddActivationLayer(descriptor);
614
615 IConnectableLayer* output = net->AddOutputLayer(0);
616
617 input->GetOutputSlot(0).Connect(pooling->GetInputSlot(0));
618 pooling->GetOutputSlot(0).Connect(output->GetInputSlot(0));
619
Cathal Corbett5b8093c2021-10-22 11:12:07 +0100620 input->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32, 0.0f, 0, true));
Ferran Balaguer83239f92019-09-19 11:49:25 +0100621 pooling->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32));
622
John Mcloughlinc5ee0d72023-03-24 12:07:25 +0000623 OptimizerOptionsOpaque optimizedOptions;
624 optimizedOptions.SetImportEnabled(true);
625 optimizedOptions.SetExportEnabled(true);
Francis Murtagh626bd902022-06-21 13:16:23 +0000626 IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec(), optimizedOptions);
Ferran Balaguer83239f92019-09-19 11:49:25 +0100627
Sadik Armagan1625efc2021-06-10 18:24:34 +0100628 INFO("Load Network");
Ferran Balaguer83239f92019-09-19 11:49:25 +0100629 // Load it into the runtime. It should pass.
630 NetworkId netId;
Teresa Charlindf15c4e2023-02-21 15:16:09 +0000631 std::string errorMessage;
Francis Murtagh73d3e2e2021-04-29 14:23:04 +0100632 INetworkProperties networkProperties(false, MemorySource::Malloc, MemorySource::Malloc);
Teresa Charlindf15c4e2023-02-21 15:16:09 +0000633 armnn::Status loadingStatus = runtime->LoadNetwork(netId, std::move(optNet), errorMessage, networkProperties);
634 CHECK_MESSAGE(loadingStatus == Status::Success, errorMessage);
Ferran Balaguer83239f92019-09-19 11:49:25 +0100635
Sadik Armagan1625efc2021-06-10 18:24:34 +0100636 INFO("Generate Data");
Ferran Balaguer83239f92019-09-19 11:49:25 +0100637 // Creates structures for input & output
638 std::vector<float> inputData
639 {
640 1.0f, 2.0f, 3.0f, 4.0f
641 };
642
643 std::vector<float> outputData(4);
644
645 std::vector<float> expectedOutput
646 {
647 1.0f, 4.0f, 9.0f, 16.0f
648 };
649
David Monahan646bc8a2022-01-31 14:29:14 +0000650 INFO("Create inference");
Cathal Corbett5b8093c2021-10-22 11:12:07 +0100651
Ferran Balaguer83239f92019-09-19 11:49:25 +0100652 InputTensors inputTensors
653 {
654 {0,armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData.data())},
655 };
656 OutputTensors outputTensors
657 {
658 {0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data())}
659 };
660
Sadik Armagan1625efc2021-06-10 18:24:34 +0100661 INFO("Get Profiler");
Ferran Balaguer83239f92019-09-19 11:49:25 +0100662 runtime->GetProfiler(netId)->EnableProfiling(true);
663
Sadik Armagan1625efc2021-06-10 18:24:34 +0100664 INFO("Run Inference");
Ferran Balaguer83239f92019-09-19 11:49:25 +0100665 // Do the inference
666 runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
667
Sadik Armagan1625efc2021-06-10 18:24:34 +0100668 INFO("Print Profiler");
Ferran Balaguer83239f92019-09-19 11:49:25 +0100669 // Retrieve the Profiler.Print() output to get the workload execution
670 ProfilerManager& profilerManager = armnn::ProfilerManager::GetInstance();
671 std::stringstream ss;
David Monahan16829712022-02-03 17:04:59 +0000672 profilerManager.GetProfiler()->Print(ss);
Ferran Balaguer83239f92019-09-19 11:49:25 +0100673 std::string dump = ss.str();
674
675 // Check there is a SyncMemGeneric workload as we exported
Sadik Armagan1625efc2021-06-10 18:24:34 +0100676 INFO("Find SyncMemGeneric");
Ferran Balaguer83239f92019-09-19 11:49:25 +0100677 int count = SubStringCounter(dump, "SyncMemGeneric");
Sadik Armagan1625efc2021-06-10 18:24:34 +0100678 CHECK(count == 1);
Ferran Balaguer83239f92019-09-19 11:49:25 +0100679
680 // Shouldn't be any CopyMemGeneric workloads
Sadik Armagan1625efc2021-06-10 18:24:34 +0100681 INFO("Find CopyMemGeneric");
Ferran Balaguer83239f92019-09-19 11:49:25 +0100682 count = SubStringCounter(dump, "CopyMemGeneric");
Sadik Armagan1625efc2021-06-10 18:24:34 +0100683 CHECK(count == 0);
Ferran Balaguer83239f92019-09-19 11:49:25 +0100684
685 // Check the output is correct
Sadik Armagan1625efc2021-06-10 18:24:34 +0100686 CHECK(std::equal(outputData.begin(), outputData.end(), expectedOutput.begin(), expectedOutput.end()));
Ferran Balaguer83239f92019-09-19 11:49:25 +0100687}
688
689inline void ExportOutputWithSeveralOutputSlotConnectionsTest(std::vector<BackendId> backends)
690{
691 using namespace armnn;
692
693 // Create runtime in which test will run
694 IRuntime::CreationOptions options;
695 IRuntimePtr runtime(armnn::IRuntime::Create(options));
696
697 // build up the structure of the network
698 INetworkPtr net(INetwork::Create());
699
700 IConnectableLayer* input = net->AddInputLayer(0);
701
702 ActivationDescriptor descriptor;
703 descriptor.m_Function = ActivationFunction::Square;
704 IConnectableLayer* activation = net->AddActivationLayer(descriptor);
705
706 IConnectableLayer* output0 = net->AddOutputLayer(0);
707 IConnectableLayer* output1 = net->AddOutputLayer(1);
708
709 input->GetOutputSlot(0).Connect(activation->GetInputSlot(0));
710 activation->GetOutputSlot(0).Connect(output0->GetInputSlot(0));
711 activation->GetOutputSlot(0).Connect(output1->GetInputSlot(0));
712
Cathal Corbett5b8093c2021-10-22 11:12:07 +0100713 input->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 4, 1 }, DataType::Float32, 0.0f, 0, true));
Ferran Balaguer83239f92019-09-19 11:49:25 +0100714 activation->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 4, 1 }, DataType::Float32));
715
716 // Optimize the network
John Mcloughlinc5ee0d72023-03-24 12:07:25 +0000717 OptimizerOptionsOpaque optimizedOptions;
718 optimizedOptions.SetImportEnabled(true);
719 optimizedOptions.SetExportEnabled(true);
Francis Murtagh626bd902022-06-21 13:16:23 +0000720 IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec(), optimizedOptions);
Ferran Balaguer83239f92019-09-19 11:49:25 +0100721
722 // Loads it into the runtime.
723 NetworkId netId;
Teresa Charlindf15c4e2023-02-21 15:16:09 +0000724 std::string errorMessage;
Ferran Balaguer83239f92019-09-19 11:49:25 +0100725 // Enable Importing
Francis Murtagh73d3e2e2021-04-29 14:23:04 +0100726 INetworkProperties networkProperties(false, MemorySource::Malloc, MemorySource::Malloc);
Teresa Charlindf15c4e2023-02-21 15:16:09 +0000727 armnn::Status loadingStatus = runtime->LoadNetwork(netId, std::move(optNet), errorMessage, networkProperties);
728 CHECK_MESSAGE(loadingStatus == Status::Success, errorMessage);
Ferran Balaguer83239f92019-09-19 11:49:25 +0100729
730 // Creates structures for input & output
731 std::vector<float> inputData
732 {
733 1.0f, 2.0f, 3.0f, 4.0f
734 };
735
736 std::vector<float> outputData0(4);
737 std::vector<float> outputData1(4);
738
Narumol Prangnawarat3b90af62020-06-26 11:00:21 +0100739 std::vector<float> expectedOutput
740 {
741 1.0f, 4.0f, 9.0f, 16.0f
742 };
743
Ferran Balaguer83239f92019-09-19 11:49:25 +0100744 InputTensors inputTensors
745 {
746 {0,armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData.data())},
747 };
748 OutputTensors outputTensors
749 {
750 {0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData0.data())},
751 {1,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 1), outputData1.data())}
752 };
753
754 // The result of the inference is not important, just the fact that there
755 // should not be CopyMemGeneric workloads.
756 runtime->GetProfiler(netId)->EnableProfiling(true);
757
758 // Do the inference
759 runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
760
761 // Retrieve the Profiler.Print() output to get the workload execution
762 ProfilerManager& profilerManager = armnn::ProfilerManager::GetInstance();
763 std::stringstream ss;
764 profilerManager.GetProfiler()->Print(ss);
765 std::string dump = ss.str();
766
767 std::size_t found = std::string::npos;
768
769 if (backends[0] == Compute::CpuRef)
770 {
771 found = dump.find("RefActivationWorkload");
772 }
773 else if (backends[0] == Compute::CpuAcc)
774 {
775 found = dump.find("NeonActivationWorkload");
776 }
777 else if (backends[0] == Compute::GpuAcc)
778 {
779 found = dump.find("ClActivationWorkload");
780 }
781
Sadik Armagan1625efc2021-06-10 18:24:34 +0100782 CHECK(found != std::string::npos);
Ferran Balaguer83239f92019-09-19 11:49:25 +0100783 // No contains SyncMemGeneric
784 found = dump.find("SyncMemGeneric");
Sadik Armagan1625efc2021-06-10 18:24:34 +0100785 CHECK(found == std::string::npos);
Ferran Balaguer83239f92019-09-19 11:49:25 +0100786 // Contains CopyMemGeneric
787 found = dump.find("CopyMemGeneric");
Sadik Armagan1625efc2021-06-10 18:24:34 +0100788 CHECK(found != std::string::npos);
Narumol Prangnawarat3b90af62020-06-26 11:00:21 +0100789
790 // Check that the outputs are correct
Sadik Armagan1625efc2021-06-10 18:24:34 +0100791 CHECK(std::equal(outputData0.begin(), outputData0.end(),
792 expectedOutput.begin(), expectedOutput.end()));
793 CHECK(std::equal(outputData1.begin(), outputData1.end(),
794 expectedOutput.begin(), expectedOutput.end()));
Ferran Balaguer83239f92019-09-19 11:49:25 +0100795}
796
David Monahan0a99a142020-03-13 07:52:54 +0000797inline void StridedSliceInvalidSliceEndToEndTest(std::vector<BackendId> backends)
798{
799 using namespace armnn;
800
801 // Create runtime in which test will run
802 IRuntime::CreationOptions options;
803 IRuntimePtr runtime(armnn::IRuntime::Create(options));
804
805 // build up the structure of the network
806 INetworkPtr net(INetwork::Create());
807
808 IConnectableLayer* input = net->AddInputLayer(0);
809
810 // Configure a strided slice with a stride the same size as the input but with a ShrinkAxisMask on the first
811 // dim of the output to make it too small to hold the specified slice.
812 StridedSliceDescriptor descriptor;
813 descriptor.m_Begin = {0, 0};
814 descriptor.m_End = {2, 3};
815 descriptor.m_Stride = {1, 1};
816 descriptor.m_BeginMask = 0;
817 descriptor.m_EndMask = 0;
818 descriptor.m_ShrinkAxisMask = 1;
819 IConnectableLayer* stridedSlice = net->AddStridedSliceLayer(descriptor);
820
821 IConnectableLayer* output0 = net->AddOutputLayer(0);
822
823 input->GetOutputSlot(0).Connect(stridedSlice->GetInputSlot(0));
824 stridedSlice->GetOutputSlot(0).Connect(output0->GetInputSlot(0));
825
Cathal Corbett5b8093c2021-10-22 11:12:07 +0100826 input->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 2, 3 }, DataType::Float32, 0.0f, 0, true));
David Monahan0a99a142020-03-13 07:52:54 +0000827 stridedSlice->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 3 }, DataType::Float32));
828
829 // Attempt to optimize the network and check that the correct exception is thrown
Sadik Armagan1625efc2021-06-10 18:24:34 +0100830 CHECK_THROWS_AS(Optimize(*net, backends, runtime->GetDeviceSpec()), armnn::LayerValidationException);
David Monahan0a99a142020-03-13 07:52:54 +0000831}
832
David Monahan646bc8a2022-01-31 14:29:14 +0000833inline void ForceImportWithAlignedBuffersEndToEndTest(std::vector<BackendId> backends)
834{
835 /**
836 * This test is similar to the Import tests above, we create a network with a square function and pass in a vector
837 * with 4 floats, square them. and validate the output. We then check the profiling logs to see if input/output
838 * tensors are copied (CopyMemGeneric) or imported (SyncMemGeneric)
839 * In this case all inputs and outputs should be imported
840 */
841 using namespace armnn;
842 IRuntime::CreationOptions options;
843 IRuntimePtr runtime(IRuntime::Create(options));
844
845 // Builds up the structure of the network.
846 INetworkPtr net(INetwork::Create());
847 IConnectableLayer* input = net->AddInputLayer(0);
848 ActivationDescriptor descriptor;
849 descriptor.m_Function = ActivationFunction::Square;
850 IConnectableLayer* activationLayer = net->AddActivationLayer(descriptor);
851 IConnectableLayer* output = net->AddOutputLayer(0);
852 input->GetOutputSlot(0).Connect(activationLayer->GetInputSlot(0));
853 activationLayer->GetOutputSlot(0).Connect(output->GetInputSlot(0));
854 input->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32, 0.0f, 0, true));
855 activationLayer->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32));
856 IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec());
857 INFO("Load Network");
858
859 // Load it into the runtime. It should pass.
860 NetworkId netId;
Teresa Charlindf15c4e2023-02-21 15:16:09 +0000861 std::string errorMessage;
David Monahan646bc8a2022-01-31 14:29:14 +0000862 INetworkProperties networkProperties(false, MemorySource::Undefined, MemorySource::Undefined);
Teresa Charlindf15c4e2023-02-21 15:16:09 +0000863 armnn::Status loadingStatus = runtime->LoadNetwork(netId, std::move(optNet), errorMessage, networkProperties);
864 CHECK_MESSAGE(loadingStatus == Status::Success, errorMessage);
865
David Monahan646bc8a2022-01-31 14:29:14 +0000866 INFO("Generate Data");
867
868 // Creates structures for input & output
869 std::vector<float> inputData
870 {
871 1.0f, 2.0f, 3.0f, 4.0f
872 };
873 std::vector<float> outputData(4);
874 std::vector<float> expectedOutput
875 {
876 1.0f, 4.0f, 9.0f, 16.0f
877 };
878
879 // Check our input and output pointers are actually aligned
880 uintptr_t alignment = GetDataTypeSize(DataType::Float32);
881 CHECK(!(reinterpret_cast<uintptr_t>(inputData.data()) % alignment));
882 CHECK(!(reinterpret_cast<uintptr_t>(outputData.data()) % alignment));
883
884 INFO("Create Inference");
885 InputTensors inputTensors
886 {
887 {0,armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData.data())},
888 };
889 OutputTensors outputTensors
890 {
891 {0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data())}
892 };
893
894 runtime->GetProfiler(netId)->EnableProfiling(true);
895 std::vector<ImportedInputId> importedInputIds =
896 runtime->ImportInputs(netId, inputTensors, MemorySource::Malloc);
Colm Doneland7ceec52022-07-06 12:09:05 +0100897 CHECK(importedInputIds.size() == 1);
David Monahan646bc8a2022-01-31 14:29:14 +0000898 std::vector<ImportedOutputId> importedOutputIds =
899 runtime->ImportOutputs(netId, outputTensors, MemorySource::Malloc);
Colm Doneland7ceec52022-07-06 12:09:05 +0100900 CHECK(importedOutputIds.size() == 1);
David Monahan646bc8a2022-01-31 14:29:14 +0000901 // Do the inference and force the import as the memory is aligned.
Colm Doneland7ceec52022-07-06 12:09:05 +0100902 runtime->EnqueueWorkload(netId, InputTensors(), OutputTensors(), importedInputIds, importedOutputIds);
David Monahan646bc8a2022-01-31 14:29:14 +0000903
904 // Retrieve the Profiler.Print() output to get the workload execution
905 ProfilerManager& profilerManager = armnn::ProfilerManager::GetInstance();
906 std::stringstream ss;
David Monahan16829712022-02-03 17:04:59 +0000907 profilerManager.GetProfiler()->Print(ss);
David Monahan646bc8a2022-01-31 14:29:14 +0000908 std::string dump = ss.str();
909
910 if (backends[0] == Compute::CpuAcc)
911 {
912 // Reconfigure has not been implemented for CpuAcc so it will always copy, this will break whenever
913 // reconfigure is implemented
914 int count = SubStringCounter(dump, "SyncMemGeneric");
915 CHECK(count == 0);
916 // Should be 2 CopyMemGeneric workloads
917 count = SubStringCounter(dump, "CopyMemGeneric");
918 CHECK(count == 2);
919 }
920 else
921 {
922 // Check there is a SyncMemGeneric workload as we exported
923 int count = SubStringCounter(dump, "SyncMemGeneric");
924 CHECK(count == 1);
925 // Shouldn't be any CopyMemGeneric workloads
926 count = SubStringCounter(dump, "CopyMemGeneric");
927 CHECK(count == 0);
928 }
929 // Check the output is correct
930 CHECK(std::equal(outputData.begin(), outputData.end(), expectedOutput.begin(), expectedOutput.end()));
931}
932
933inline void ForceImportWithMisalignedInputBuffersEndToEndTest(std::vector<BackendId> backends)
934{
935 /**
936 * This test is similar to the Import tests above, we create a network with a square function and pass in a vector
937 * with 4 floats, square them. and validate the output. We then check the profiling logs to see if input/output
938 * tensors are copied (CopyMemGeneric) or imported (SyncMemGeneric)
939 * In this case all only the output should be imported
940 */
941 using namespace armnn;
942
943 IRuntime::CreationOptions options;
944 IRuntimePtr runtime(IRuntime::Create(options));
945
946 // Builds up the structure of the network.
947 INetworkPtr net(INetwork::Create());
948 IConnectableLayer* input = net->AddInputLayer(0);
949
950 ActivationDescriptor descriptor;
951 descriptor.m_Function = ActivationFunction::Square;
952 IConnectableLayer* activationLayer = net->AddActivationLayer(descriptor);
953
954 IConnectableLayer* output = net->AddOutputLayer(0);
955
956 input->GetOutputSlot(0).Connect(activationLayer->GetInputSlot(0));
957 activationLayer->GetOutputSlot(0).Connect(output->GetInputSlot(0));
958 input->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32, 0.0f, 0, true));
959 activationLayer->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32));
960
961 IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec());
962 INFO("Load Network");
963 // Load it into the runtime. It should pass.
964 NetworkId netId;
Teresa Charlindf15c4e2023-02-21 15:16:09 +0000965 std::string errorMessage;
David Monahan646bc8a2022-01-31 14:29:14 +0000966 INetworkProperties networkProperties(false, MemorySource::Undefined, MemorySource::Undefined);
Teresa Charlindf15c4e2023-02-21 15:16:09 +0000967 armnn::Status loadingStatus = runtime->LoadNetwork(netId, std::move(optNet), errorMessage, networkProperties);
968 CHECK_MESSAGE(loadingStatus == Status::Success, errorMessage);
969
David Monahan646bc8a2022-01-31 14:29:14 +0000970 INFO("Generate Data");
971
972 // This code looks a little funky but the idea is to create a buffer of floats but offset by the size of a char
973 // this will guarantee that the resultant buffer is misaligned and thus should always be copied.
974 auto memPtr = std::malloc(4 * sizeof(float) + sizeof(char));
975
976 float* misalignedMemPtr = reinterpret_cast<float*>(reinterpret_cast<char*>(memPtr) + 1);
977
978 // Check if our pointer is truly misaligned
979 uintptr_t alignment = GetDataTypeSize(DataType::Float32);
980 CHECK (reinterpret_cast<uintptr_t>(misalignedMemPtr) % alignment);
981
Matthew Benthamc92bbd72022-02-10 11:12:34 +0000982 std::vector<float> inputData
David Monahan646bc8a2022-01-31 14:29:14 +0000983 {
Matthew Benthamc92bbd72022-02-10 11:12:34 +0000984 1.0f, 2.0f, 3.0f, 4.0f
985 };
986
987 std::memcpy(misalignedMemPtr, inputData.data(), 4*sizeof(float));
David Monahan646bc8a2022-01-31 14:29:14 +0000988
989 std::vector<float> outputData(4);
990 // Check our output buffer is aligned
991 CHECK(!(reinterpret_cast<uintptr_t>(outputData.data()) % alignment));
992
993 std::vector<float> expectedOutput
994 {
995 1.0f, 4.0f, 9.0f, 16.0f
996 };
997
998 INFO("Create Inference");
999 InputTensors inputTensors
1000 {
1001 {0,armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), misalignedMemPtr)},
1002 };
1003 OutputTensors outputTensors
1004 {
1005 {0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data())}
1006 };
1007 runtime->GetProfiler(netId)->EnableProfiling(true);
1008 std::vector<ImportedInputId> importedInputIds =
1009 runtime->ImportInputs(netId, inputTensors, MemorySource::Malloc);
Colm Doneland7ceec52022-07-06 12:09:05 +01001010 // We expect the import to have failed.
1011 CHECK(importedInputIds.size() == 0);
David Monahan646bc8a2022-01-31 14:29:14 +00001012 std::vector<ImportedOutputId> importedOutputIds =
1013 runtime->ImportOutputs(netId, outputTensors, MemorySource::Malloc);
Colm Doneland7ceec52022-07-06 12:09:05 +01001014 CHECK(importedOutputIds.size() == 1);
David Monahan646bc8a2022-01-31 14:29:14 +00001015
1016 // Do the inference and force the import as the memory is misaligned.
Colm Doneland7ceec52022-07-06 12:09:05 +01001017 runtime->EnqueueWorkload(netId, inputTensors, OutputTensors(), importedInputIds, importedOutputIds);
David Monahan646bc8a2022-01-31 14:29:14 +00001018
1019 // Retrieve the Profiler.Print() output to get the workload execution
1020 ProfilerManager& profilerManager = armnn::ProfilerManager::GetInstance();
1021 std::stringstream ss;
David Monahan16829712022-02-03 17:04:59 +00001022 profilerManager.GetProfiler()->Print(ss);
David Monahan646bc8a2022-01-31 14:29:14 +00001023 std::string dump = ss.str();
1024
1025 // GpuAcc is a different case to CpuRef and CpuAcc, it doesn't use the buffer directly but instead maps it to a
1026 // new set of addresses within Gpu Memory. This will almost always be auto-aligned, so we don't need to check
1027 // for imports/copies. Only that the output is correct.
1028 if (backends[0] != Compute::GpuAcc)
1029 {
1030 if (backends[0] == Compute::CpuAcc)
1031 {
1032 // Reconfigure has not been implemented for CpuAcc so it will always copy, this will break whenever
1033 // reconfigure is implemented
1034 // We should get 0 SyncMemGeneric for the Output
1035 int count = SubStringCounter(dump, "SyncMemGeneric");
1036 CHECK(count == 0);
1037 // Should be 2 CopyMemGeneric as we copied the input
1038 count = SubStringCounter(dump, "CopyMemGeneric");
1039 CHECK(count == 2);
1040 }
1041 else
1042 {
1043 // We should get 1 SyncMemGeneric for the Output
1044 int count = SubStringCounter(dump, "SyncMemGeneric");
1045 CHECK(count == 1);
1046 // Should only be 1 CopyMemGeneric as we copied the input
1047 count = SubStringCounter(dump, "CopyMemGeneric");
1048 CHECK(count == 1);
1049 }
1050 }
1051 // Check the output is correct
1052 CHECK(std::equal(outputData.begin(), outputData.end(), expectedOutput.begin(), expectedOutput.end()));
1053 std::free(memPtr);
1054}
1055
1056inline void ForceImportWithMisalignedOutputBuffersEndToEndTest(std::vector<BackendId> backends)
1057{
1058 /**
1059 * This test is similar to the Import tests above, we create a network with a square function and pass in a vector
1060 * with 4 floats, square them. and validate the output. We then check the profiling logs to see if input/output
1061 * tensors are copied (CopyMemGeneric) or imported (SyncMemGeneric)
1062 * In this case all only the input should be imported
1063 */
1064 using namespace armnn;
1065
1066 IRuntime::CreationOptions options;
1067 IRuntimePtr runtime(IRuntime::Create(options));
1068
1069 // Builds up the structure of the network.
1070 INetworkPtr net(INetwork::Create());
1071 IConnectableLayer* input = net->AddInputLayer(0);
1072
1073 ActivationDescriptor descriptor;
1074 descriptor.m_Function = ActivationFunction::Square;
1075 IConnectableLayer* activationLayer = net->AddActivationLayer(descriptor);
1076
1077 IConnectableLayer* output = net->AddOutputLayer(0);
1078
1079 input->GetOutputSlot(0).Connect(activationLayer->GetInputSlot(0));
1080 activationLayer->GetOutputSlot(0).Connect(output->GetInputSlot(0));
1081 input->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32, 0.0f, 0, true));
1082 activationLayer->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32));
1083
1084 IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec());
1085 INFO("Load Network");
1086 // Load it into the runtime. It should pass.
1087 NetworkId netId;
Teresa Charlindf15c4e2023-02-21 15:16:09 +00001088 std::string errorMessage;
David Monahan646bc8a2022-01-31 14:29:14 +00001089 INetworkProperties networkProperties(false, MemorySource::Undefined, MemorySource::Undefined);
Teresa Charlindf15c4e2023-02-21 15:16:09 +00001090 armnn::Status loadingStatus = runtime->LoadNetwork(netId, std::move(optNet), errorMessage, networkProperties);
1091 CHECK_MESSAGE(loadingStatus == Status::Success, errorMessage);
1092
David Monahan646bc8a2022-01-31 14:29:14 +00001093 INFO("Generate Data");
1094
1095 // This code looks a little funky but the idea is to create a buffer of floats but offset by the size of a char
1096 // this will guarantee that the resultant buffer is misaligned and thus should always be copied.
1097 auto memPtr = std::malloc(4 * sizeof(float) + sizeof(char));
1098
1099 float* misalignedMemPtr = reinterpret_cast<float*>(reinterpret_cast<char*>(memPtr) + 1);
1100
1101 // Check if our pointer is truly misaligned
1102 uintptr_t alignment = GetDataTypeSize(DataType::Float32);
1103 CHECK (reinterpret_cast<uintptr_t>(misalignedMemPtr) % alignment);
1104
1105 // Creates structures for input & output
1106 std::vector<float> inputData
1107 {
1108 1.0f, 2.0f, 3.0f, 4.0f
1109 };
1110
1111 // Check our input buffer is aligned
1112 CHECK(!(reinterpret_cast<uintptr_t>(inputData.data()) % alignment));
1113 std::vector<float> expectedOutput
1114 {
1115 1.0f, 4.0f, 9.0f, 16.0f
1116 };
1117
1118 INFO("Create Inference");
1119 InputTensors inputTensors
1120 {
1121 {0,armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData.data())},
1122 };
1123 OutputTensors outputTensors
1124 {
1125 {0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), misalignedMemPtr)}
1126 };
1127 runtime->GetProfiler(netId)->EnableProfiling(true);
1128 std::vector<ImportedInputId> importedInputIds =
1129 runtime->ImportInputs(netId, inputTensors, MemorySource::Malloc);
Colm Doneland7ceec52022-07-06 12:09:05 +01001130 CHECK(importedInputIds.size() == 1);
1131 // We expect this to fail.
David Monahan646bc8a2022-01-31 14:29:14 +00001132 std::vector<ImportedOutputId> importedOutputIds =
1133 runtime->ImportOutputs(netId, outputTensors, MemorySource::Malloc);
Colm Doneland7ceec52022-07-06 12:09:05 +01001134 CHECK(importedOutputIds.size() == 0);
David Monahan646bc8a2022-01-31 14:29:14 +00001135
Colm Doneland7ceec52022-07-06 12:09:05 +01001136 // Even if importing the output failed we still expect to be able to get it to work.
1137 runtime->EnqueueWorkload(netId, InputTensors(), outputTensors, importedInputIds, importedOutputIds);
David Monahan646bc8a2022-01-31 14:29:14 +00001138
1139 // Retrieve the Profiler.Print() output to get the workload execution
1140 ProfilerManager& profilerManager = armnn::ProfilerManager::GetInstance();
1141 std::stringstream ss;
David Monahan16829712022-02-03 17:04:59 +00001142 profilerManager.GetProfiler()->Print(ss);
David Monahan646bc8a2022-01-31 14:29:14 +00001143 std::string dump = ss.str();
1144
1145 // GpuAcc is a different case to CpuRef and CpuAcc, it doesn't use the buffer directly but instead maps it to a
1146 // new set of addresses within Gpu Memory. This will almost always be auto-aligned, so we don't need to check
1147 // for imports/copies. Only that the output is correct.
1148 if (backends[0] != Compute::GpuAcc)
1149 {
1150 // Even though we Imported the Input we still shouldn't have a SyncMemGeneric
1151 int count = SubStringCounter(dump, "SyncMemGeneric");
1152 CHECK(count == 0);
1153 // Should only be 1 CopyMemGeneric as we copied the input
1154 count = SubStringCounter(dump, "CopyMemGeneric");
1155 if (backends[0] == Compute::CpuAcc)
1156 {
1157 // Reconfigure has not been implemented for CpuAcc so it will always copy, this will break whenever
1158 // reconfigure is implemented
1159 CHECK(count == 2);
1160 }
1161 else
1162 {
1163 CHECK(count == 1);
1164 }
1165 // Check the output is correct
1166 }
1167 unsigned int index = 0;
David Monahaneef6b762022-02-10 16:01:58 +00001168 std::vector<float> outputData(expectedOutput.size(), 0);
1169 std::memcpy(outputData.data(), misalignedMemPtr, expectedOutput.size() * sizeof(float));
David Monahan646bc8a2022-01-31 14:29:14 +00001170 for (auto outputValue : expectedOutput)
1171 {
David Monahaneef6b762022-02-10 16:01:58 +00001172 CHECK(outputValue == outputData[index]);
David Monahan646bc8a2022-01-31 14:29:14 +00001173 ++index;
1174 }
1175 std::free(memPtr);
1176}
1177
1178inline void ForceImportWithMisalignedInputAndOutputBuffersEndToEndTest(std::vector<BackendId> backends)
1179{
1180 /**
1181 * This test is similar to the Import tests above, we create a network with a square function and pass in a vector
1182 * with 4 floats, square them. and validate the output. We then check the profiling logs to see if input/output
1183 * tensors are copied (CopyMemGeneric) or imported (SyncMemGeneric)
1184 * In this case all inputs and outputs should be copied
1185 */
1186 using namespace armnn;
1187
1188 IRuntime::CreationOptions options;
1189 IRuntimePtr runtime(IRuntime::Create(options));
1190
1191 // Builds up the structure of the network.
1192 INetworkPtr net(INetwork::Create());
1193 IConnectableLayer* input = net->AddInputLayer(0);
1194
1195 ActivationDescriptor descriptor;
1196 descriptor.m_Function = ActivationFunction::Square;
1197 IConnectableLayer* activationLayer = net->AddActivationLayer(descriptor);
1198
1199 IConnectableLayer* output = net->AddOutputLayer(0);
1200
1201 input->GetOutputSlot(0).Connect(activationLayer->GetInputSlot(0));
1202 activationLayer->GetOutputSlot(0).Connect(output->GetInputSlot(0));
1203 input->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32, 0.0f, 0, true));
1204 activationLayer->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32));
1205
1206 IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec());
1207 INFO("Load Network");
1208 // Load it into the runtime. It should pass.
1209 NetworkId netId;
Teresa Charlindf15c4e2023-02-21 15:16:09 +00001210 std::string errorMessage;
David Monahan646bc8a2022-01-31 14:29:14 +00001211 INetworkProperties networkProperties(false, MemorySource::Undefined, MemorySource::Undefined);
Teresa Charlindf15c4e2023-02-21 15:16:09 +00001212 armnn::Status loadingStatus = runtime->LoadNetwork(netId, std::move(optNet), errorMessage, networkProperties);
1213 CHECK_MESSAGE(loadingStatus == Status::Success, errorMessage);
David Monahan646bc8a2022-01-31 14:29:14 +00001214 INFO("Generate Data");
1215
1216 // This code looks a little funky but the idea is to create a buffer of floats but offset by the size of a char
1217 // this will guarantee that the resultant buffer is misaligned and thus should always be copied.
1218 auto inputMemPtr = std::malloc(4 * sizeof(float) + sizeof(char));
1219 float* misalignedInputPtr = reinterpret_cast<float*>(reinterpret_cast<char*>(inputMemPtr) + 1);
1220
1221 // Check if our pointer is truly misaligned
1222 uintptr_t alignment = GetDataTypeSize(DataType::Float32);
1223 CHECK (reinterpret_cast<uintptr_t>(misalignedInputPtr) % alignment);
Matthew Benthamc92bbd72022-02-10 11:12:34 +00001224 std::vector<float> inputData
David Monahan646bc8a2022-01-31 14:29:14 +00001225 {
Matthew Benthamc92bbd72022-02-10 11:12:34 +00001226 1.0f, 2.0f, 3.0f, 4.0f
1227 };
1228 std::memcpy(misalignedInputPtr, inputData.data(), 4*sizeof(float));
David Monahan646bc8a2022-01-31 14:29:14 +00001229
1230 auto outputMemPtr = std::malloc(4 * sizeof(float) + sizeof(char));
1231 float* misalignedOutputPtr = reinterpret_cast<float*>(reinterpret_cast<char*>(outputMemPtr) + 1);
1232
1233 // Check if our pointer is truly misaligned
1234 CHECK (reinterpret_cast<uintptr_t>(misalignedOutputPtr) % alignment);
1235
1236 std::vector<float> expectedOutput
1237 {
1238 1.0f, 4.0f, 9.0f, 16.0f
1239 };
1240
1241 INFO("Create Inference");
1242 InputTensors inputTensors
1243 {
1244 {0,armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), misalignedInputPtr)},
1245 };
1246 OutputTensors outputTensors
1247 {
1248 {0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), misalignedOutputPtr)}
1249 };
1250 runtime->GetProfiler(netId)->EnableProfiling(true);
1251 std::vector<ImportedInputId> importedInputIds =
1252 runtime->ImportInputs(netId, inputTensors, MemorySource::Malloc);
Colm Doneland7ceec52022-07-06 12:09:05 +01001253 // Import should have failed.
1254 CHECK(importedInputIds.size() == 0);
David Monahan646bc8a2022-01-31 14:29:14 +00001255 std::vector<ImportedOutputId> importedOutputIds =
1256 runtime->ImportOutputs(netId, outputTensors, MemorySource::Malloc);
Colm Doneland7ceec52022-07-06 12:09:05 +01001257 // Import should have failed.
1258 CHECK(importedOutputIds.size() == 0);
David Monahan646bc8a2022-01-31 14:29:14 +00001259
1260 // Do the inference and force the import as the memory is misaligned.
1261 runtime->EnqueueWorkload(netId, inputTensors, outputTensors, importedInputIds, importedOutputIds);
1262
1263 // Retrieve the Profiler.Print() output to get the workload execution
1264 ProfilerManager& profilerManager = armnn::ProfilerManager::GetInstance();
1265 std::stringstream ss;
David Monahan16829712022-02-03 17:04:59 +00001266 profilerManager.GetProfiler()->Print(ss);
David Monahan646bc8a2022-01-31 14:29:14 +00001267 std::string dump = ss.str();
1268
1269 // GpuAcc is a different case to CpuRef and CpuAcc, it doesn't use the buffer directly but instead maps it to a
1270 // new set of addresses within Gpu Memory. This will almost always be auto-aligned, so we don't need to check
1271 // for imports/copies. Only that the output is correct.
1272 if (backends[0] != Compute::GpuAcc)
1273 {
1274 // We can only copy so there should be no SyncMemGeneric
1275 int count = SubStringCounter(dump, "SyncMemGeneric");
1276 CHECK(count == 0);
1277 // Should only be CopyMemGeneric workloads as we copied all buffers
1278 count = SubStringCounter(dump, "CopyMemGeneric");
1279 CHECK(count == 2);
1280 }
1281 // Check the output is correct
1282 unsigned int index = 0;
David Monahaneef6b762022-02-10 16:01:58 +00001283 std::vector<float> outputData(expectedOutput.size(), 0);
Matthew Benthamc92bbd72022-02-10 11:12:34 +00001284 std::memcpy(outputData.data(), misalignedOutputPtr, expectedOutput.size() * sizeof(float));
1285 for (auto expectedValue : expectedOutput)
David Monahan646bc8a2022-01-31 14:29:14 +00001286 {
Matthew Benthamc92bbd72022-02-10 11:12:34 +00001287 CHECK(expectedValue == outputData[index]);
David Monahan646bc8a2022-01-31 14:29:14 +00001288 ++index;
1289 }
1290 std::free(inputMemPtr);
1291 std::free(outputMemPtr);
1292}
1293
David Monahan16829712022-02-03 17:04:59 +00001294inline void ForceImportRepeatedInferencesEndToEndTest(std::vector<BackendId> backends)
1295{
1296 /**
1297 * This test is similar to the Import tests above, we create a network with a square function and pass in a vector
1298 * with 4 floats, square them. and validate the output. We then check the profiling logs to see if input/output
1299 * tensors are copied (CopyMemGeneric) or imported (SyncMemGeneric)
1300 * In this we create some aligned buffers, import them into a network and validate the output and number of
1301 * SynMemGeneric/CopyMemgeneric. Then we try the same network again with misaligned buffers to make sure it falls
1302 * back to copying correctly.
1303 */
1304 using namespace armnn;
1305
1306 IRuntime::CreationOptions options;
1307 IRuntimePtr runtime(IRuntime::Create(options));
1308
1309 // Builds up the structure of the network.
1310 INetworkPtr net(INetwork::Create());
1311 IConnectableLayer* input = net->AddInputLayer(0);
1312
1313 ActivationDescriptor descriptor;
1314 descriptor.m_Function = ActivationFunction::Square;
1315 IConnectableLayer* activationLayer = net->AddActivationLayer(descriptor);
1316
1317 IConnectableLayer* output = net->AddOutputLayer(0);
1318
1319 input->GetOutputSlot(0).Connect(activationLayer->GetInputSlot(0));
1320 activationLayer->GetOutputSlot(0).Connect(output->GetInputSlot(0));
1321 input->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32, 0.0f, 0, true));
1322 activationLayer->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32));
1323
1324 IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec());
1325 INFO("Load Network");
1326 // Load it into the runtime. It should pass.
1327 NetworkId netId;
Teresa Charlindf15c4e2023-02-21 15:16:09 +00001328 std::string errorMessage;
David Monahan16829712022-02-03 17:04:59 +00001329 INetworkProperties networkProperties(false, MemorySource::Undefined, MemorySource::Undefined);
Teresa Charlindf15c4e2023-02-21 15:16:09 +00001330 armnn::Status loadingStatus = runtime->LoadNetwork(netId, std::move(optNet), errorMessage, networkProperties);
1331 CHECK_MESSAGE(loadingStatus == Status::Success, errorMessage);
David Monahan16829712022-02-03 17:04:59 +00001332 INFO("Generate Data");
1333
1334 // Creates structures for input & output
1335 std::vector<float> inputData
1336 {
1337 1.0f, 2.0f, 3.0f, 4.0f
1338 };
1339 std::vector<float> outputData(4);
1340 std::vector<float> expectedOutput
1341 {
1342 1.0f, 4.0f, 9.0f, 16.0f
1343 };
1344
1345 // Check our input and output pointers are actually aligned
1346 uintptr_t alignment = GetDataTypeSize(DataType::Float32);
1347 CHECK(!(reinterpret_cast<uintptr_t>(inputData.data()) % alignment));
1348 CHECK(!(reinterpret_cast<uintptr_t>(outputData.data()) % alignment));
1349
1350 INFO("Create Inference");
1351 InputTensors inputTensors
1352 {
1353 {0,armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData.data())},
1354 };
1355 OutputTensors outputTensors
1356 {
1357 {0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data())}
1358 };
1359
1360 runtime->GetProfiler(netId)->EnableProfiling(true);
1361 std::vector<ImportedInputId> importedInputIds =
1362 runtime->ImportInputs(netId, inputTensors, MemorySource::Malloc);
Colm Doneland7ceec52022-07-06 12:09:05 +01001363 CHECK(importedInputIds.size() == 1);
David Monahan16829712022-02-03 17:04:59 +00001364 std::vector<ImportedOutputId> importedOutputIds =
1365 runtime->ImportOutputs(netId, outputTensors, MemorySource::Malloc);
Colm Doneland7ceec52022-07-06 12:09:05 +01001366 CHECK(importedOutputIds.size() == 1);
David Monahan16829712022-02-03 17:04:59 +00001367 // Do the inference and force the import as the memory is aligned.
Colm Doneland7ceec52022-07-06 12:09:05 +01001368 runtime->EnqueueWorkload(netId, InputTensors(), OutputTensors(), importedInputIds, importedOutputIds);
David Monahan16829712022-02-03 17:04:59 +00001369
1370 // Retrieve the Profiler.AnalyzeEventsAndWriteResults() output to get the workload execution
1371 ProfilerManager& profilerManager = armnn::ProfilerManager::GetInstance();
1372 std::stringstream ss;
1373 profilerManager.GetProfiler()->AnalyzeEventsAndWriteResults(ss);
1374 std::string dump = ss.str();
1375
1376 if (backends[0] == Compute::CpuAcc)
1377 {
1378 // Reconfigure has not been implemented for CpuAcc so it will always copy, this will break whenever
1379 // reconfigure is implemented
1380 int count = SubStringCounter(dump, "SyncMemGeneric");
1381 CHECK(count == 0);
1382 // Should be 2 CopyMemGeneric workloads
1383 count = SubStringCounter(dump, "CopyMemGeneric");
1384 CHECK(count >= 1);
1385 }
1386 else
1387 {
1388 // Check there is at least 1 SyncMemGeneric workload as we exported
1389 int count = SubStringCounter(dump, "SyncMemGeneric");
1390 CHECK(count >= 1);
1391 // Shouldn't be any CopyMemGeneric workloads
1392 count = SubStringCounter(dump, "CopyMemGeneric");
1393 CHECK(count == 0);
1394 }
1395 // Check the output is correct
1396 CHECK(std::equal(outputData.begin(), outputData.end(), expectedOutput.begin(), expectedOutput.end()));
1397
1398 // This code looks a little funky but the idea is to create a buffer of floats but offset by the size of a char
1399 // this will guarantee that the resultant buffer is misaligned and thus should always be copied.
1400 auto inputMemPtr = std::malloc(4 * sizeof(float) + sizeof(char));
1401 float* misalignedInputPtr = reinterpret_cast<float*>(reinterpret_cast<char*>(inputMemPtr) + 1);
1402
1403 // Check if our pointer is truly misaligned
1404 CHECK (reinterpret_cast<uintptr_t>(misalignedInputPtr) % alignment);
Matthew Benthamc92bbd72022-02-10 11:12:34 +00001405
1406 std::vector<float> inputValues
David Monahan16829712022-02-03 17:04:59 +00001407 {
Matthew Benthamc92bbd72022-02-10 11:12:34 +00001408 2.0f, 3.0f, 4.0f, 5.0f
1409 };
1410
1411 std::memcpy(misalignedInputPtr, inputValues.data(), inputValues.size()*sizeof(float));
David Monahan16829712022-02-03 17:04:59 +00001412
1413 auto outputMemPtr = std::malloc(4 * sizeof(float) + sizeof(char));
1414 float* misalignedOutputPtr = reinterpret_cast<float*>(reinterpret_cast<char*>(outputMemPtr) + 1);
1415
1416 // Check if our pointer is truly misaligned
1417 CHECK (reinterpret_cast<uintptr_t>(misalignedOutputPtr) % alignment);
1418
1419 std::vector<float> expectedMisalignedOutput
1420 {
1421 4.0f, 9.0f, 16.0f, 25.0f
1422 };
1423
1424 INFO("Create Second Inference");
1425 InputTensors inputTensorsMisaligned
1426 {
1427 {0,armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), misalignedInputPtr)},
1428 };
1429 OutputTensors outputTensorsMisaligned
1430 {
1431 {0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), misalignedOutputPtr)}
1432 };
1433 importedInputIds = runtime->ImportInputs(netId, inputTensorsMisaligned, MemorySource::Malloc);
Colm Doneland7ceec52022-07-06 12:09:05 +01001434 // Import should fail.
1435 CHECK(importedInputIds.size() == 0);
David Monahan16829712022-02-03 17:04:59 +00001436 importedOutputIds = runtime->ImportOutputs(netId, outputTensorsMisaligned, MemorySource::Malloc);
Colm Doneland7ceec52022-07-06 12:09:05 +01001437 // Import should fail.
1438 CHECK(importedOutputIds.size() == 0);
David Monahan16829712022-02-03 17:04:59 +00001439
1440 // Do the inference and force the import as the memory is misaligned.
1441 runtime->EnqueueWorkload(netId,
1442 inputTensorsMisaligned,
1443 outputTensorsMisaligned,
1444 importedInputIds,
1445 importedOutputIds);
1446
1447 // Retrieve the Profiler.AnalyzeEventsAndWriteResults() output to get the workload execution
1448 // We need to use AnalyzeEventsAndWriteResults here to make sure the second inference has been profiled
1449 profilerManager.GetProfiler()->AnalyzeEventsAndWriteResults(ss);
1450 dump = ss.str();
1451
1452 // GpuAcc is a different case to CpuRef and CpuAcc, it doesn't use the buffer directly but instead maps it to a
1453 // new set of addresses within Gpu Memory. This will almost always be auto-aligned, so we don't need to check
1454 // for imports/copies. Only that the output is correct.
1455 if (backends[0] != Compute::GpuAcc)
1456 {
1457 // The SyncMemGeneric will still be in the profiling log from the first inference
1458 int count = SubStringCounter(dump, "SyncMemGeneric");
1459 CHECK(count >= 1);
1460 // We should now see CopyMemGeneric workloads as we copied all buffers
1461 count = SubStringCounter(dump, "CopyMemGeneric");
1462 CHECK(count >= 1);
1463 }
1464 // Check the output is correct
1465 unsigned int index = 0;
David Monahaneef6b762022-02-10 16:01:58 +00001466 std::vector<float> alignedOutputData(expectedMisalignedOutput.size(), 0);
Matthew Benthamc92bbd72022-02-10 11:12:34 +00001467 std::memcpy(alignedOutputData.data(), misalignedOutputPtr, expectedMisalignedOutput.size() * sizeof(float));
David Monahan16829712022-02-03 17:04:59 +00001468 for (auto outputValue : expectedMisalignedOutput)
1469 {
Matthew Benthamc92bbd72022-02-10 11:12:34 +00001470 CHECK(outputValue == alignedOutputData[index]);
David Monahan16829712022-02-03 17:04:59 +00001471 ++index;
1472 }
1473 // Clean up to avoid interfering with other tests
1474 runtime->UnloadNetwork(netId);
1475 std::free(inputMemPtr);
1476 std::free(outputMemPtr);
1477}
1478
1479
1480inline void ForceImportRepeatedInferencesInvertedEndToEndTest(std::vector<BackendId> backends)
1481{
1482 /**
1483 * This test is similar to the Import tests above, we create a network with a square function and pass in a vector
1484 * with 4 floats, square them. and validate the output. We then check the profiling logs to see if input/output
1485 * tensors are copied (CopyMemGeneric) or imported (SyncMemGeneric)
1486 * In this we create some misaligned buffers, copy them into a network and validate the output and number of
1487 * SynMemGeneric/CopyMemgeneric. Then we try the same network again with aligned buffers to make sure it switches
1488 * to importing correctly.
1489 */
1490 using namespace armnn;
1491
1492 IRuntime::CreationOptions options;
1493 IRuntimePtr runtime(IRuntime::Create(options));
1494
1495 // Builds up the structure of the network.
1496 INetworkPtr net(INetwork::Create());
1497 IConnectableLayer* input = net->AddInputLayer(0);
1498
1499 ActivationDescriptor descriptor;
1500 descriptor.m_Function = ActivationFunction::Square;
1501 IConnectableLayer* activationLayer = net->AddActivationLayer(descriptor);
1502
1503 IConnectableLayer* output = net->AddOutputLayer(0);
1504
1505 input->GetOutputSlot(0).Connect(activationLayer->GetInputSlot(0));
1506 activationLayer->GetOutputSlot(0).Connect(output->GetInputSlot(0));
1507 input->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32, 0.0f, 0, true));
1508 activationLayer->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32));
1509
1510 IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec());
1511 INFO("Load Network");
1512 // Load it into the runtime. It should pass.
1513 NetworkId netId;
Teresa Charlindf15c4e2023-02-21 15:16:09 +00001514 std::string errorMessage;
David Monahan16829712022-02-03 17:04:59 +00001515 INetworkProperties networkProperties(false, MemorySource::Undefined, MemorySource::Undefined);
Teresa Charlindf15c4e2023-02-21 15:16:09 +00001516 armnn::Status loadingStatus = runtime->LoadNetwork(netId, std::move(optNet), errorMessage, networkProperties);
1517 CHECK_MESSAGE(loadingStatus == Status::Success, errorMessage);
David Monahan16829712022-02-03 17:04:59 +00001518 INFO("Generate Data");
1519
1520 // This code looks a little funky but the idea is to create a buffer of floats but offset by the size of a char
1521 // this will guarantee that the resultant buffer is misaligned and thus should always be copied.
1522 auto inputMemPtr = std::malloc(4 * sizeof(float) + sizeof(char));
1523 float* misalignedInputPtr = reinterpret_cast<float*>(reinterpret_cast<char*>(inputMemPtr) + 1);
1524
1525 // Check if our pointer is truly misaligned
1526 uintptr_t alignment = GetDataTypeSize(DataType::Float32);
1527 CHECK (reinterpret_cast<uintptr_t>(misalignedInputPtr) % alignment);
Matthew Benthamc92bbd72022-02-10 11:12:34 +00001528 std::vector<float> inputValues
David Monahan16829712022-02-03 17:04:59 +00001529 {
Matthew Benthamc92bbd72022-02-10 11:12:34 +00001530 2.0f, 3.0f, 4.0f, 5.0f
1531 };
1532 std::memcpy(misalignedInputPtr, inputValues.data(), inputValues.size() * sizeof(float));
David Monahan16829712022-02-03 17:04:59 +00001533
1534 auto outputMemPtr = std::malloc(4 * sizeof(float) + sizeof(char));
1535 float* misalignedOutputPtr = reinterpret_cast<float*>(reinterpret_cast<char*>(outputMemPtr) + 1);
1536
1537 // Check if our pointer is truly misaligned
1538 CHECK (reinterpret_cast<uintptr_t>(misalignedOutputPtr) % alignment);
1539
1540 std::vector<float> expectedMisalignedOutput
1541 {
1542 4.0f, 9.0f, 16.0f, 25.0f
1543 };
1544
1545 INFO("Create Second Inference");
1546 InputTensors inputTensorsMisaligned
1547 {
1548 {0,armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), misalignedInputPtr)},
1549 };
1550 OutputTensors outputTensorsMisaligned
1551 {
1552 {0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), misalignedOutputPtr)}
1553 };
1554 runtime->GetProfiler(netId)->EnableProfiling(true);
1555 std::vector<ImportedInputId> importedInputIds =
1556 runtime->ImportInputs(netId, inputTensorsMisaligned, MemorySource::Malloc);
Colm Doneland7ceec52022-07-06 12:09:05 +01001557 // Import should fail.
1558 CHECK(importedInputIds.size() == 0);
David Monahan16829712022-02-03 17:04:59 +00001559 std::vector<ImportedOutputId> importedOutputIds =
1560 runtime->ImportOutputs(netId, outputTensorsMisaligned, MemorySource::Malloc);
Colm Doneland7ceec52022-07-06 12:09:05 +01001561 // Import should fail.
1562 CHECK(importedOutputIds.size() == 0);
David Monahan16829712022-02-03 17:04:59 +00001563
1564 // Do the inference and force the import as the memory is misaligned.
1565 runtime->EnqueueWorkload(netId,
1566 inputTensorsMisaligned,
1567 outputTensorsMisaligned,
1568 importedInputIds,
1569 importedOutputIds);
1570
1571 // Retrieve the Profiler.AnalyzeEventsAndWriteResults() output to get the workload execution
1572 ProfilerManager& profilerManager = armnn::ProfilerManager::GetInstance();
1573 std::stringstream ss;
1574 profilerManager.GetProfiler()->AnalyzeEventsAndWriteResults(ss);
1575 std::string dump = ss.str();
1576
1577 // GpuAcc is a different case to CpuRef and CpuAcc, it doesn't use the buffer directly but instead maps it to a
1578 // new set of addresses within Gpu Memory. This will almost always be auto-aligned, so we don't need to check
1579 // for imports/copies. Only that the output is correct.
1580 if (backends[0] != Compute::GpuAcc)
1581 {
1582 // We can only copy so there should be no SyncMemGeneric
1583 int count = SubStringCounter(dump, "SyncMemGeneric");
1584 CHECK(count == 0);
1585 // Should only be CopyMemGeneric workloads as we copied all buffers
1586 count = SubStringCounter(dump, "CopyMemGeneric");
1587 CHECK(count >= 1);
1588 }
1589 // Check the output is correct
1590 unsigned int index = 0;
Matthew Benthamc92bbd72022-02-10 11:12:34 +00001591 std::vector<float> alignedOutput(expectedMisalignedOutput.size());
1592 std::memcpy(alignedOutput.data(), misalignedOutputPtr, expectedMisalignedOutput.size()*sizeof(float));
David Monahan16829712022-02-03 17:04:59 +00001593 for (auto outputValue : expectedMisalignedOutput)
1594 {
Matthew Benthamc92bbd72022-02-10 11:12:34 +00001595 CHECK(outputValue == alignedOutput[index]);
David Monahan16829712022-02-03 17:04:59 +00001596 ++index;
1597 }
1598 std::free(inputMemPtr);
1599 std::free(outputMemPtr);
1600
1601 // Creates structures for input & output
1602 std::vector<float> inputData
1603 {
1604 1.0f, 2.0f, 3.0f, 4.0f
1605 };
1606 std::vector<float> outputData(4);
1607 std::vector<float> expectedOutput
1608 {
1609 1.0f, 4.0f, 9.0f, 16.0f
1610 };
1611
1612 // Check our input and output pointers are actually aligned
1613 CHECK(!(reinterpret_cast<uintptr_t>(inputData.data()) % alignment));
1614 CHECK(!(reinterpret_cast<uintptr_t>(outputData.data()) % alignment));
1615
1616 INFO("Create Inference");
1617 InputTensors inputTensors
1618 {
1619 {0,armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData.data())},
1620 };
1621 OutputTensors outputTensors
1622 {
1623 {0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data())}
1624 };
1625
1626 importedInputIds = runtime->ImportInputs(netId, inputTensors, MemorySource::Malloc);
Colm Doneland7ceec52022-07-06 12:09:05 +01001627 CHECK(importedInputIds.size() == 1);
David Monahan16829712022-02-03 17:04:59 +00001628 importedOutputIds = runtime->ImportOutputs(netId, outputTensors, MemorySource::Malloc);
Colm Doneland7ceec52022-07-06 12:09:05 +01001629 CHECK(importedOutputIds.size() == 1);
David Monahan16829712022-02-03 17:04:59 +00001630 // Do the inference and force the import as the memory is aligned.
Colm Doneland7ceec52022-07-06 12:09:05 +01001631 runtime->EnqueueWorkload(netId, InputTensors(), OutputTensors(), importedInputIds, importedOutputIds);
David Monahan16829712022-02-03 17:04:59 +00001632
1633 // Retrieve the Profiler.AnalyzeEventsAndWriteResults() output to get the workload execution
1634 // We need to use AnalyzeEventsAndWriteResults here to make sure the second inference has been profiled
1635 profilerManager.GetProfiler()->AnalyzeEventsAndWriteResults(ss);
1636 dump = ss.str();
1637
1638 if (backends[0] == Compute::CpuAcc)
1639 {
1640 // Reconfigure has not been implemented for CpuAcc so it will always copy, this will break whenever
1641 // reconfigure is implemented
1642 int count = SubStringCounter(dump, "SyncMemGeneric");
1643 CHECK(count == 0);
1644 // Should be 2 CopyMemGeneric workloads
1645 count = SubStringCounter(dump, "CopyMemGeneric");
1646 CHECK(count >= 1);
1647 }
1648 else
1649 {
1650 // Repeated inferences make it difficult to check for an accurate count. So we just validate that we have a
1651 // SyncMemGeneric Workload when we previously didn't
1652 int count = SubStringCounter(dump, "SyncMemGeneric");
1653 CHECK(count >= 1);
1654 // Should still be some CopyMemGeneric Workloads from the last inference
1655 count = SubStringCounter(dump, "CopyMemGeneric");
1656 CHECK(count >= 1);
1657 }
1658 // Check the output is correct
1659 CHECK(std::equal(outputData.begin(), outputData.end(), expectedOutput.begin(), expectedOutput.end()));
1660 // Clean up to avoid interfering with other tests
1661 runtime->UnloadNetwork(netId);
1662}
1663
Nattapat Chaimanowong1fcb4ff2019-01-24 15:25:26 +00001664} // anonymous namespace