blob: 795fc13c3224ab84b4ba78b56c1323e3308d3f9a [file] [log] [blame]
Aron Virginas-Tar70104002018-10-24 15:33:28 +01001//
Teresa Charlindf15c4e2023-02-21 15:16:09 +00002// Copyright © 2017-2023 Arm Ltd and Contributors. All rights reserved.
Aron Virginas-Tar70104002018-10-24 15:33:28 +01003// SPDX-License-Identifier: MIT
4//
5#pragma once
6
Sadik Armagana097d2a2021-11-24 15:47:28 +00007#include <CommonTestUtils.hpp>
Mike Kelly386ff1a2021-03-29 15:04:50 +01008
Matthew Bentham246bd462020-01-20 16:16:06 +00009#include <armnn/Descriptors.hpp>
narpra01b9546cf2018-11-20 15:21:28 +000010#include <armnn/INetwork.hpp>
Matthew Bentham246bd462020-01-20 16:16:06 +000011#include <armnn/IRuntime.hpp>
Aron Virginas-Tar70104002018-10-24 15:33:28 +010012
Aron Virginas-Tar48623a02019-10-22 10:00:28 +010013#include <Profiling.hpp>
Colm Donelanc42a9872022-02-02 16:35:09 +000014#include <armnnUtils/QuantizeHelper.hpp>
Aron Virginas-Tar48623a02019-10-22 10:00:28 +010015#include <ResolveType.hpp>
Aron Virginas-Tar70104002018-10-24 15:33:28 +010016
Sadik Armagan1625efc2021-06-10 18:24:34 +010017#include <doctest/doctest.h>
narpra01b9546cf2018-11-20 15:21:28 +000018
Aron Virginas-Tar70104002018-10-24 15:33:28 +010019#include <vector>
20
21namespace
22{
23
24using namespace armnn;
25
26template<typename T>
27bool ConstantUsageTest(const std::vector<BackendId>& computeDevice,
28 const TensorInfo& commonTensorInfo,
29 const std::vector<T>& inputData,
30 const std::vector<T>& constantData,
31 const std::vector<T>& expectedOutputData)
32{
33 // Create runtime in which test will run
34 IRuntime::CreationOptions options;
35 IRuntimePtr runtime(IRuntime::Create(options));
36
37 // Builds up the structure of the network.
38 INetworkPtr net(INetwork::Create());
39
40 IConnectableLayer* input = net->AddInputLayer(0);
41 IConnectableLayer* constant = net->AddConstantLayer(ConstTensor(commonTensorInfo, constantData));
42 IConnectableLayer* add = net->AddAdditionLayer();
43 IConnectableLayer* output = net->AddOutputLayer(0);
44
45 input->GetOutputSlot(0).Connect(add->GetInputSlot(0));
46 constant->GetOutputSlot(0).Connect(add->GetInputSlot(1));
47 add->GetOutputSlot(0).Connect(output->GetInputSlot(0));
48
49 // Sets the tensors in the network.
50 input->GetOutputSlot(0).SetTensorInfo(commonTensorInfo);
51 constant->GetOutputSlot(0).SetTensorInfo(commonTensorInfo);
52 add->GetOutputSlot(0).SetTensorInfo(commonTensorInfo);
53
54 // optimize the network
55 IOptimizedNetworkPtr optNet = Optimize(*net, computeDevice, runtime->GetDeviceSpec());
56
57 // Loads it into the runtime.
58 NetworkId netId;
Teresa Charlindf15c4e2023-02-21 15:16:09 +000059 std::string errorMessage;
60 armnn::Status loadingStatus = runtime->LoadNetwork(netId, std::move(optNet), errorMessage);
61 CHECK_MESSAGE(loadingStatus == Status::Success, errorMessage);
Aron Virginas-Tar70104002018-10-24 15:33:28 +010062
63 // Creates structures for input & output.
64 std::vector<T> outputData(inputData.size());
65
66 InputTensors inputTensors
67 {
68 {0, ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData.data())}
69 };
70 OutputTensors outputTensors
71 {
72 {0, Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data())}
73 };
74
75 // Does the inference.
76 runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
77
78 // Checks the results.
79 return outputData == expectedOutputData;
80}
81
82inline bool ConstantUsageFloat32Test(const std::vector<BackendId>& backends)
83{
Cathal Corbett5b8093c2021-10-22 11:12:07 +010084 TensorInfo commonTensorInfo({ 2, 3 }, DataType::Float32);
85 commonTensorInfo.SetConstant(true);
Aron Virginas-Tar70104002018-10-24 15:33:28 +010086
87 return ConstantUsageTest(backends,
88 commonTensorInfo,
89 std::vector<float>{ 1.f, 2.f, 3.f, 4.f, 5.f, 6.f }, // Input.
90 std::vector<float>{ 6.f, 5.f, 4.f, 3.f, 2.f, 1.f }, // Const input.
91 std::vector<float>{ 7.f, 7.f, 7.f, 7.f, 7.f, 7.f } // Expected output.
92 );
93}
94
95inline bool ConstantUsageUint8Test(const std::vector<BackendId>& backends)
96{
Derek Lambertif90c56d2020-01-10 17:14:08 +000097 TensorInfo commonTensorInfo({ 2, 3 }, DataType::QAsymmU8);
Aron Virginas-Tar70104002018-10-24 15:33:28 +010098
99 const float scale = 0.023529f;
100 const int8_t offset = -43;
101
102 commonTensorInfo.SetQuantizationScale(scale);
103 commonTensorInfo.SetQuantizationOffset(offset);
Cathal Corbett5b8093c2021-10-22 11:12:07 +0100104 commonTensorInfo.SetConstant(true);
Aron Virginas-Tar70104002018-10-24 15:33:28 +0100105
106 return ConstantUsageTest(backends,
107 commonTensorInfo,
Aron Virginas-Tar48623a02019-10-22 10:00:28 +0100108 armnnUtils::QuantizedVector<uint8_t>({ 1.f, 2.f, 3.f, 4.f, 5.f, 6.f }, scale, offset), // Input.
109 armnnUtils::QuantizedVector<uint8_t>({ 6.f, 5.f, 4.f, 3.f, 2.f, 1.f }, scale, offset), // Const input.
110 armnnUtils::QuantizedVector<uint8_t>({ 7.f, 7.f, 7.f, 7.f, 7.f, 7.f }, scale, offset) // Expected output.
Aron Virginas-Tar70104002018-10-24 15:33:28 +0100111 );
112}
113
Ferran Balaguer83239f92019-09-19 11:49:25 +0100114// Utility function to find the number of instances of a substring within a string.
115int SubStringCounter(std::string& string, std::string&& substring)
116{
117 std::size_t found = 0;
118 int count = 0;
119 // Look for the substring starting from where we last found the substring
120 while((found = string.find(substring, found)) != std::string::npos)
121 {
122 count++;
123 // Offset by substring length to avoid finding the same substring twice
124 found += substring.length();
125 }
126 return count;
127}
128
Nattapat Chaimanowong1fcb4ff2019-01-24 15:25:26 +0000129template<DataType ArmnnIType, DataType ArmnnOType,
130 typename TInput = ResolveType<ArmnnIType>, typename TOutput = ResolveType<ArmnnOType>>
narpra01b9546cf2018-11-20 15:21:28 +0000131void EndToEndLayerTestImpl(INetworkPtr network,
kevmay012b4d88e2019-01-24 14:05:09 +0000132 const std::map<int, std::vector<TInput>>& inputTensorData,
133 const std::map<int, std::vector<TOutput>>& expectedOutputData,
Jan Eilersbca73e12020-03-11 12:52:46 +0000134 std::vector<BackendId> backends,
135 float tolerance = 0.000001f)
narpra01b9546cf2018-11-20 15:21:28 +0000136{
137 // Create runtime in which test will run
138 IRuntime::CreationOptions options;
139 IRuntimePtr runtime(IRuntime::Create(options));
140
141 // optimize the network
142 IOptimizedNetworkPtr optNet = Optimize(*network, backends, runtime->GetDeviceSpec());
143
144 // Loads it into the runtime.
145 NetworkId netId;
Teresa Charlindf15c4e2023-02-21 15:16:09 +0000146 std::string errorMessage;
147 armnn::Status loadingStatus = runtime->LoadNetwork(netId, std::move(optNet), errorMessage);
148 CHECK_MESSAGE(loadingStatus == Status::Success, errorMessage);
narpra01b9546cf2018-11-20 15:21:28 +0000149
150 InputTensors inputTensors;
151 inputTensors.reserve(inputTensorData.size());
152 for (auto&& it : inputTensorData)
153 {
154 inputTensors.push_back({it.first,
155 ConstTensor(runtime->GetInputTensorInfo(netId, it.first), it.second.data())});
156 }
157 OutputTensors outputTensors;
158 outputTensors.reserve(expectedOutputData.size());
kevmay012b4d88e2019-01-24 14:05:09 +0000159 std::map<int, std::vector<TOutput>> outputStorage;
narpra01b9546cf2018-11-20 15:21:28 +0000160 for (auto&& it : expectedOutputData)
161 {
kevmay012b4d88e2019-01-24 14:05:09 +0000162 std::vector<TOutput> out(it.second.size());
narpra01b9546cf2018-11-20 15:21:28 +0000163 outputStorage.emplace(it.first, out);
164 outputTensors.push_back({it.first,
165 Tensor(runtime->GetOutputTensorInfo(netId, it.first),
166 outputStorage.at(it.first).data())});
167 }
168
169 // Does the inference.
170 runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
171
172 // Checks the results.
173 for (auto&& it : expectedOutputData)
174 {
kevmay012b4d88e2019-01-24 14:05:09 +0000175 std::vector<TOutput> out = outputStorage.at(it.first);
Aron Virginas-Tarf97f6da2019-10-01 18:35:44 +0100176 for (unsigned int i = 0; i < out.size(); ++i)
Nattapat Chaimanowong1fcb4ff2019-01-24 15:25:26 +0000177 {
Sadik Armagan1625efc2021-06-10 18:24:34 +0100178 CHECK_MESSAGE(Compare<ArmnnOType>(it.second[i], out[i], tolerance) == true,
Teresa Charlin2e3f4d22020-07-29 14:29:20 +0100179 "Actual output: " << out[i] << ". Expected output:" << it.second[i]);
180
Nattapat Chaimanowong1fcb4ff2019-01-24 15:25:26 +0000181 }
narpra01b9546cf2018-11-20 15:21:28 +0000182 }
183}
184
David Monahan4f1e8e42019-09-04 09:22:10 +0100185inline void ImportNonAlignedInputPointerTest(std::vector<BackendId> backends)
Ferran Balaguerdcaa6102019-08-21 13:28:38 +0100186{
187 using namespace armnn;
188
189 // Create runtime in which test will run
190 IRuntime::CreationOptions options;
191 IRuntimePtr runtime(armnn::IRuntime::Create(options));
192
193 // build up the structure of the network
194 INetworkPtr net(INetwork::Create());
195
196 IConnectableLayer* input = net->AddInputLayer(0);
197
David Monahan3fb7e102019-08-20 11:25:29 +0100198 ActivationDescriptor descriptor;
199 descriptor.m_Function = ActivationFunction::Square;
200 IConnectableLayer* pooling = net->AddActivationLayer(descriptor);
Ferran Balaguerdcaa6102019-08-21 13:28:38 +0100201
202 IConnectableLayer* output = net->AddOutputLayer(0);
203
David Monahan3fb7e102019-08-20 11:25:29 +0100204 input->GetOutputSlot(0).Connect(pooling->GetInputSlot(0));
205 pooling->GetOutputSlot(0).Connect(output->GetInputSlot(0));
Ferran Balaguerdcaa6102019-08-21 13:28:38 +0100206
Cathal Corbett5b8093c2021-10-22 11:12:07 +0100207 input->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32, 0.0f, 0, true));
David Monahan3fb7e102019-08-20 11:25:29 +0100208 pooling->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32));
Ferran Balaguerdcaa6102019-08-21 13:28:38 +0100209
210 // Optimize the network
Francis Murtagh626bd902022-06-21 13:16:23 +0000211 OptimizerOptions optimizedOptions;
212 optimizedOptions.m_ImportEnabled = true;
213 IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec(), optimizedOptions);
Sadik Armagan1625efc2021-06-10 18:24:34 +0100214 CHECK(optNet);
Ferran Balaguerdcaa6102019-08-21 13:28:38 +0100215
216 // Loads it into the runtime.
217 NetworkId netId;
Teresa Charlindf15c4e2023-02-21 15:16:09 +0000218 std::string errorMessage;
David Monahan4f1e8e42019-09-04 09:22:10 +0100219 // Enable Importing
Francis Murtagh73d3e2e2021-04-29 14:23:04 +0100220 INetworkProperties networkProperties(false, MemorySource::Malloc, MemorySource::Undefined);
Teresa Charlindf15c4e2023-02-21 15:16:09 +0000221 armnn::Status loadingStatus = runtime->LoadNetwork(netId, std::move(optNet), errorMessage, networkProperties);
222 CHECK_MESSAGE(loadingStatus == Status::Success, errorMessage);
Ferran Balaguerdcaa6102019-08-21 13:28:38 +0100223
224 // Creates structures for input & output
225 std::vector<float> inputData
226 {
David Monahan3fb7e102019-08-20 11:25:29 +0100227 1.0f, 2.0f, 3.0f, 4.0f
Ferran Balaguerdcaa6102019-08-21 13:28:38 +0100228 };
229
230 // Misaligned input
Aron Virginas-Tard9f7c8b2019-09-13 13:37:03 +0100231 float* misalignedInputData = reinterpret_cast<float*>(reinterpret_cast<char*>(inputData.data()) + 1);
Ferran Balaguerdcaa6102019-08-21 13:28:38 +0100232
David Monahan3fb7e102019-08-20 11:25:29 +0100233 std::vector<float> outputData(4);
Ferran Balaguerdcaa6102019-08-21 13:28:38 +0100234
David Monahan4f1e8e42019-09-04 09:22:10 +0100235 // Aligned output
David Monahan3fb7e102019-08-20 11:25:29 +0100236 float* alignedOutputData = outputData.data();
David Monahan4f1e8e42019-09-04 09:22:10 +0100237
238 InputTensors inputTensors
239 {
240 {0,armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), misalignedInputData)},
241 };
242 OutputTensors outputTensors
243 {
244 {0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), alignedOutputData)}
245 };
246
David Monahan4f1e8e42019-09-04 09:22:10 +0100247 runtime->GetProfiler(netId)->EnableProfiling(true);
248
249 // Do the inference and expect it to fail with a ImportMemoryException
Sadik Armagan1625efc2021-06-10 18:24:34 +0100250 CHECK_THROWS_AS(runtime->EnqueueWorkload(netId, inputTensors, outputTensors), MemoryImportException);
David Monahan4f1e8e42019-09-04 09:22:10 +0100251}
252
Ferran Balaguer83239f92019-09-19 11:49:25 +0100253inline void ExportNonAlignedOutputPointerTest(std::vector<BackendId> backends)
David Monahan4f1e8e42019-09-04 09:22:10 +0100254{
255 using namespace armnn;
256
257 // Create runtime in which test will run
258 IRuntime::CreationOptions options;
259 IRuntimePtr runtime(armnn::IRuntime::Create(options));
260
261 // build up the structure of the network
262 INetworkPtr net(INetwork::Create());
263
264 IConnectableLayer* input = net->AddInputLayer(0);
265
David Monahan3fb7e102019-08-20 11:25:29 +0100266 ActivationDescriptor descriptor;
267 descriptor.m_Function = ActivationFunction::Square;
268 IConnectableLayer* pooling = net->AddActivationLayer(descriptor);
David Monahan4f1e8e42019-09-04 09:22:10 +0100269
270 IConnectableLayer* output = net->AddOutputLayer(0);
271
David Monahan3fb7e102019-08-20 11:25:29 +0100272 input->GetOutputSlot(0).Connect(pooling->GetInputSlot(0));
273 pooling->GetOutputSlot(0).Connect(output->GetInputSlot(0));
David Monahan4f1e8e42019-09-04 09:22:10 +0100274
Cathal Corbett5b8093c2021-10-22 11:12:07 +0100275 input->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32, 0.0f, 0, true));
David Monahan3fb7e102019-08-20 11:25:29 +0100276 pooling->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32));
David Monahan4f1e8e42019-09-04 09:22:10 +0100277
278 // Optimize the network
Francis Murtagh626bd902022-06-21 13:16:23 +0000279 OptimizerOptions optimizedOptions;
280 optimizedOptions.m_ImportEnabled = true;
281 optimizedOptions.m_ExportEnabled = true;
282 IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec(), optimizedOptions);
Sadik Armagan1625efc2021-06-10 18:24:34 +0100283 CHECK(optNet);
David Monahan4f1e8e42019-09-04 09:22:10 +0100284
285 // Loads it into the runtime.
286 NetworkId netId;
Teresa Charlindf15c4e2023-02-21 15:16:09 +0000287 std::string errorMessage;
David Monahan3fb7e102019-08-20 11:25:29 +0100288 // Enable Importing and Exporting
Francis Murtagh73d3e2e2021-04-29 14:23:04 +0100289 INetworkProperties networkProperties(false, MemorySource::Malloc, MemorySource::Malloc);
Teresa Charlindf15c4e2023-02-21 15:16:09 +0000290 armnn::Status loadingStatus = runtime->LoadNetwork(netId, std::move(optNet), errorMessage, networkProperties);
291 CHECK_MESSAGE(loadingStatus == Status::Success, errorMessage);
David Monahan4f1e8e42019-09-04 09:22:10 +0100292
293 // Creates structures for input & output
294 std::vector<float> inputData
295 {
296 1.0f, 2.0f, 3.0f, 4.0f, 5.0f
297 };
298
299 // Aligned input
David Monahan3fb7e102019-08-20 11:25:29 +0100300 float* alignedInputData = inputData.data();
David Monahan4f1e8e42019-09-04 09:22:10 +0100301
302 std::vector<float> outputData(5);
303
Ferran Balaguerdcaa6102019-08-21 13:28:38 +0100304 // Misaligned output
Aron Virginas-Tard9f7c8b2019-09-13 13:37:03 +0100305 float* misalignedOutputData = reinterpret_cast<float*>(reinterpret_cast<char*>(outputData.data()) + 1);
Ferran Balaguerdcaa6102019-08-21 13:28:38 +0100306
307 InputTensors inputTensors
308 {
David Monahan4f1e8e42019-09-04 09:22:10 +0100309 {0,armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), alignedInputData)},
Ferran Balaguerdcaa6102019-08-21 13:28:38 +0100310 };
311 OutputTensors outputTensors
312 {
313 {0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), misalignedOutputData)}
314 };
315
Ferran Balaguer83239f92019-09-19 11:49:25 +0100316 // Do the inference and expect it to fail with a ExportMemoryException
317 if (backends[0] == Compute::CpuAcc)
318 {
319 // For CpuAcc the NeonTensorHandle will throw its own exception on misaligned memory
Sadik Armagan1625efc2021-06-10 18:24:34 +0100320 CHECK_THROWS_AS(runtime->EnqueueWorkload(netId, inputTensors, outputTensors), MemoryImportException);
Ferran Balaguer83239f92019-09-19 11:49:25 +0100321 }
322 else
323 {
Sadik Armagan1625efc2021-06-10 18:24:34 +0100324 CHECK_THROWS_AS(runtime->EnqueueWorkload(netId, inputTensors, outputTensors), MemoryExportException);
Ferran Balaguer83239f92019-09-19 11:49:25 +0100325 }
Ferran Balaguerdcaa6102019-08-21 13:28:38 +0100326}
327
328inline void ImportAlignedPointerTest(std::vector<BackendId> backends)
329{
330 using namespace armnn;
331
332 // Create runtime in which test will run
333 IRuntime::CreationOptions options;
334 IRuntimePtr runtime(armnn::IRuntime::Create(options));
335
336 // build up the structure of the network
337 INetworkPtr net(INetwork::Create());
338
339 IConnectableLayer* input = net->AddInputLayer(0);
340
David Monahan3fb7e102019-08-20 11:25:29 +0100341 ActivationDescriptor descriptor;
342 descriptor.m_Function = ActivationFunction::Square;
343 IConnectableLayer* pooling = net->AddActivationLayer(descriptor);
Ferran Balaguerdcaa6102019-08-21 13:28:38 +0100344
345 IConnectableLayer* output = net->AddOutputLayer(0);
346
David Monahan3fb7e102019-08-20 11:25:29 +0100347 input->GetOutputSlot(0).Connect(pooling->GetInputSlot(0));
348 pooling->GetOutputSlot(0).Connect(output->GetInputSlot(0));
Ferran Balaguerdcaa6102019-08-21 13:28:38 +0100349
Cathal Corbett5b8093c2021-10-22 11:12:07 +0100350 input->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32, 0.0f, 0, true));
David Monahan3fb7e102019-08-20 11:25:29 +0100351 pooling->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32));
Ferran Balaguerdcaa6102019-08-21 13:28:38 +0100352
353 // Optimize the network
Francis Murtagh626bd902022-06-21 13:16:23 +0000354 OptimizerOptions optimizedOptions;
355 optimizedOptions.m_ImportEnabled = true;
356 optimizedOptions.m_ExportEnabled = true;
357 IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec(), optimizedOptions);
Sadik Armagan1625efc2021-06-10 18:24:34 +0100358 CHECK(optNet);
Ferran Balaguerdcaa6102019-08-21 13:28:38 +0100359
360 // Loads it into the runtime.
361 NetworkId netId;
Teresa Charlindf15c4e2023-02-21 15:16:09 +0000362 std::string errorMessage;
David Monahan4f1e8e42019-09-04 09:22:10 +0100363 // Enable Importing
Francis Murtagh73d3e2e2021-04-29 14:23:04 +0100364 INetworkProperties networkProperties(false, MemorySource::Malloc, MemorySource::Malloc);
Teresa Charlindf15c4e2023-02-21 15:16:09 +0000365 armnn::Status loadingStatus = runtime->LoadNetwork(netId, std::move(optNet), errorMessage, networkProperties);
366 CHECK_MESSAGE(loadingStatus == Status::Success, errorMessage);
Ferran Balaguerdcaa6102019-08-21 13:28:38 +0100367
368 // Creates structures for input & output
369 std::vector<float> inputData
370 {
371 1.0f, 2.0f, 3.0f, 4.0f
372 };
373
374 std::vector<float> outputData(4);
375
James Conroy57d10b72019-10-25 09:44:14 +0100376 std::vector<float> expectedOutput
377 {
378 1.0f, 4.0f, 9.0f, 16.0f
379 };
380
Ferran Balaguerdcaa6102019-08-21 13:28:38 +0100381 InputTensors inputTensors
382 {
383 {0,armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData.data())},
384 };
385 OutputTensors outputTensors
386 {
387 {0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data())}
388 };
389
Ferran Balaguerdcaa6102019-08-21 13:28:38 +0100390 runtime->GetProfiler(netId)->EnableProfiling(true);
391
392 // Do the inference
393 runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
394
395 // Retrieve the Profiler.Print() output to get the workload execution
396 ProfilerManager& profilerManager = armnn::ProfilerManager::GetInstance();
397 std::stringstream ss;
David Monahan16829712022-02-03 17:04:59 +0000398 profilerManager.GetProfiler()->Print(ss);
Ferran Balaguerdcaa6102019-08-21 13:28:38 +0100399 std::string dump = ss.str();
400
David Monahan3fb7e102019-08-20 11:25:29 +0100401 // Contains ActivationWorkload
402 std::size_t found = dump.find("ActivationWorkload");
Sadik Armagan1625efc2021-06-10 18:24:34 +0100403 CHECK(found != std::string::npos);
James Conroy57d10b72019-10-25 09:44:14 +0100404
Ferran Balaguerdcaa6102019-08-21 13:28:38 +0100405 // Contains SyncMemGeneric
406 found = dump.find("SyncMemGeneric");
Sadik Armagan1625efc2021-06-10 18:24:34 +0100407 CHECK(found != std::string::npos);
James Conroy57d10b72019-10-25 09:44:14 +0100408
Ferran Balaguer83239f92019-09-19 11:49:25 +0100409 // Does not contain CopyMemGeneric
Ferran Balaguerdcaa6102019-08-21 13:28:38 +0100410 found = dump.find("CopyMemGeneric");
Sadik Armagan1625efc2021-06-10 18:24:34 +0100411 CHECK(found == std::string::npos);
James Conroy57d10b72019-10-25 09:44:14 +0100412
413 // Check output is as expected
Sadik Armagan1625efc2021-06-10 18:24:34 +0100414 CHECK(outputData == expectedOutput);
Ferran Balaguerdcaa6102019-08-21 13:28:38 +0100415}
416
Ferran Balaguer83239f92019-09-19 11:49:25 +0100417inline void ImportOnlyWorkload(std::vector<BackendId> backends)
418{
419 using namespace armnn;
420
421 IRuntime::CreationOptions options;
422 IRuntimePtr runtime(IRuntime::Create(options));
423
424 // Builds up the structure of the network.
425 INetworkPtr net(INetwork::Create());
426
427 IConnectableLayer* input = net->AddInputLayer(0);
428
429 ActivationDescriptor descriptor;
430 descriptor.m_Function = ActivationFunction::Square;
431 IConnectableLayer* pooling = net->AddActivationLayer(descriptor);
432
433 IConnectableLayer* output = net->AddOutputLayer(0);
434
435 input->GetOutputSlot(0).Connect(pooling->GetInputSlot(0));
436 pooling->GetOutputSlot(0).Connect(output->GetInputSlot(0));
437
Cathal Corbett5b8093c2021-10-22 11:12:07 +0100438 input->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32, 0.0f, 0, true));
Ferran Balaguer83239f92019-09-19 11:49:25 +0100439 pooling->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32));
440
441 // optimize the network
Francis Murtagh626bd902022-06-21 13:16:23 +0000442 OptimizerOptions optimizedOptions;
443 optimizedOptions.m_ImportEnabled = true;
444 IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec(), optimizedOptions);
Ferran Balaguer83239f92019-09-19 11:49:25 +0100445
Sadik Armagan1625efc2021-06-10 18:24:34 +0100446 INFO("Load Network");
Ferran Balaguer83239f92019-09-19 11:49:25 +0100447 // Load it into the runtime. It should pass.
448 NetworkId netId;
Teresa Charlindf15c4e2023-02-21 15:16:09 +0000449 std::string errorMessage;
Francis Murtagh73d3e2e2021-04-29 14:23:04 +0100450 INetworkProperties networkProperties(false, MemorySource::Malloc, MemorySource::Undefined);
Teresa Charlindf15c4e2023-02-21 15:16:09 +0000451 armnn::Status loadingStatus = runtime->LoadNetwork(netId, std::move(optNet), errorMessage, networkProperties);
452 CHECK_MESSAGE(loadingStatus == Status::Success, errorMessage);
Ferran Balaguer83239f92019-09-19 11:49:25 +0100453
Sadik Armagan1625efc2021-06-10 18:24:34 +0100454 INFO("Generate Data");
Ferran Balaguer83239f92019-09-19 11:49:25 +0100455 // Creates structures for input & output
456 std::vector<float> inputData
457 {
458 1.0f, 2.0f, 3.0f, 4.0f
459 };
460
461 std::vector<float> outputData(4);
462
463 std::vector<float> expectedOutput
464 {
465 1.0f, 4.0f, 9.0f, 16.0f
466 };
467
David Monahan646bc8a2022-01-31 14:29:14 +0000468 INFO("Create Inference");
Cathal Corbett5b8093c2021-10-22 11:12:07 +0100469
Ferran Balaguer83239f92019-09-19 11:49:25 +0100470 InputTensors inputTensors
471 {
472 {0,armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData.data())},
473 };
474 OutputTensors outputTensors
475 {
476 {0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data())}
477 };
478
Sadik Armagan1625efc2021-06-10 18:24:34 +0100479 INFO("Get Profiler");
Ferran Balaguer83239f92019-09-19 11:49:25 +0100480 runtime->GetProfiler(netId)->EnableProfiling(true);
481
Sadik Armagan1625efc2021-06-10 18:24:34 +0100482 INFO("Run Inference");
Ferran Balaguer83239f92019-09-19 11:49:25 +0100483 // Do the inference
484 runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
485
Sadik Armagan1625efc2021-06-10 18:24:34 +0100486 INFO("Print Profiler");
Ferran Balaguer83239f92019-09-19 11:49:25 +0100487 // Retrieve the Profiler.Print() output to get the workload execution
488 ProfilerManager& profilerManager = armnn::ProfilerManager::GetInstance();
489 std::stringstream ss;
David Monahan16829712022-02-03 17:04:59 +0000490 profilerManager.GetProfiler()->Print(ss);
Ferran Balaguer83239f92019-09-19 11:49:25 +0100491 std::string dump = ss.str();
492
493 // Check there are no SyncMemGeneric workloads as we didn't export
Sadik Armagan1625efc2021-06-10 18:24:34 +0100494 INFO("Find SyncMemGeneric");
Ferran Balaguer83239f92019-09-19 11:49:25 +0100495 int count = SubStringCounter(dump, "SyncMemGeneric");
Sadik Armagan1625efc2021-06-10 18:24:34 +0100496 CHECK(count == 0);
Ferran Balaguer83239f92019-09-19 11:49:25 +0100497
498 // Should only be 1 CopyMemGeneric for the output as we imported
Sadik Armagan1625efc2021-06-10 18:24:34 +0100499 INFO("Find CopyMemGeneric");
Ferran Balaguer83239f92019-09-19 11:49:25 +0100500 count = SubStringCounter(dump, "CopyMemGeneric");
Sadik Armagan1625efc2021-06-10 18:24:34 +0100501 CHECK(count == 1);
Ferran Balaguer83239f92019-09-19 11:49:25 +0100502
503 // Check the output is correct
Sadik Armagan1625efc2021-06-10 18:24:34 +0100504 CHECK(std::equal(outputData.begin(), outputData.end(), expectedOutput.begin(), expectedOutput.end()));
Ferran Balaguer83239f92019-09-19 11:49:25 +0100505}
506
507inline void ExportOnlyWorkload(std::vector<BackendId> backends)
508{
509 using namespace armnn;
510
511 IRuntime::CreationOptions options;
512 IRuntimePtr runtime(IRuntime::Create(options));
513
514 // Builds up the structure of the network.
515 INetworkPtr net(INetwork::Create());
516
517 IConnectableLayer* input = net->AddInputLayer(0);
518
519 ActivationDescriptor descriptor;
520 descriptor.m_Function = ActivationFunction::Square;
521 IConnectableLayer* pooling = net->AddActivationLayer(descriptor);
522
523 IConnectableLayer* output = net->AddOutputLayer(0);
524
525 input->GetOutputSlot(0).Connect(pooling->GetInputSlot(0));
526 pooling->GetOutputSlot(0).Connect(output->GetInputSlot(0));
527
Cathal Corbett5b8093c2021-10-22 11:12:07 +0100528 input->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32, 0.0f, 0, true));
Ferran Balaguer83239f92019-09-19 11:49:25 +0100529 pooling->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32));
530
531 // optimize the network
Francis Murtagh626bd902022-06-21 13:16:23 +0000532 OptimizerOptions optimizedOptions;
533 optimizedOptions.m_ExportEnabled = true;
534 IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec(), optimizedOptions);
Ferran Balaguer83239f92019-09-19 11:49:25 +0100535
Sadik Armagan1625efc2021-06-10 18:24:34 +0100536 INFO("Load Network");
Ferran Balaguer83239f92019-09-19 11:49:25 +0100537 // Load it into the runtime. It should pass.
538 NetworkId netId;
Teresa Charlindf15c4e2023-02-21 15:16:09 +0000539 std::string errorMessage;
Francis Murtagh73d3e2e2021-04-29 14:23:04 +0100540 INetworkProperties networkProperties(false, MemorySource::Undefined, MemorySource::Malloc);
Teresa Charlindf15c4e2023-02-21 15:16:09 +0000541 armnn::Status loadingStatus = runtime->LoadNetwork(netId, std::move(optNet), errorMessage, networkProperties);
542 CHECK_MESSAGE(loadingStatus == Status::Success, errorMessage);
Ferran Balaguer83239f92019-09-19 11:49:25 +0100543
Sadik Armagan1625efc2021-06-10 18:24:34 +0100544 INFO("Generate Data");
Ferran Balaguer83239f92019-09-19 11:49:25 +0100545 // Creates structures for input & output
546 std::vector<float> inputData
547 {
548 1.0f, 2.0f, 3.0f, 4.0f
549 };
550
551 std::vector<float> outputData(4);
552
553 std::vector<float> expectedOutput
554 {
555 1.0f, 4.0f, 9.0f, 16.0f
556 };
557
David Monahan646bc8a2022-01-31 14:29:14 +0000558 INFO("Create Inference");
Cathal Corbett5b8093c2021-10-22 11:12:07 +0100559
Ferran Balaguer83239f92019-09-19 11:49:25 +0100560 InputTensors inputTensors
561 {
562 {0,armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData.data())},
563 };
564 OutputTensors outputTensors
565 {
566 {0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data())}
567 };
568
Sadik Armagan1625efc2021-06-10 18:24:34 +0100569 INFO("Get Profiler");
Ferran Balaguer83239f92019-09-19 11:49:25 +0100570 runtime->GetProfiler(netId)->EnableProfiling(true);
571
Sadik Armagan1625efc2021-06-10 18:24:34 +0100572 INFO("Run Inference");
Ferran Balaguer83239f92019-09-19 11:49:25 +0100573 // Do the inference
574 runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
575
Sadik Armagan1625efc2021-06-10 18:24:34 +0100576 INFO("Print Profiler");
Ferran Balaguer83239f92019-09-19 11:49:25 +0100577 // Retrieve the Profiler.Print() output to get the workload execution
578 ProfilerManager& profilerManager = armnn::ProfilerManager::GetInstance();
579 std::stringstream ss;
David Monahan16829712022-02-03 17:04:59 +0000580 profilerManager.GetProfiler()->Print(ss);
Ferran Balaguer83239f92019-09-19 11:49:25 +0100581 std::string dump = ss.str();
582
583 // Check there is a SyncMemGeneric workload as we exported
Sadik Armagan1625efc2021-06-10 18:24:34 +0100584 INFO("Find SyncMemGeneric");
Ferran Balaguer83239f92019-09-19 11:49:25 +0100585 int count = SubStringCounter(dump, "SyncMemGeneric");
Sadik Armagan1625efc2021-06-10 18:24:34 +0100586 CHECK(count == 1);
Ferran Balaguer83239f92019-09-19 11:49:25 +0100587
588 // Should be 1 CopyMemGeneric for the output as we did not import
Sadik Armagan1625efc2021-06-10 18:24:34 +0100589 INFO("Find CopyMemGeneric");
Ferran Balaguer83239f92019-09-19 11:49:25 +0100590 count = SubStringCounter(dump, "CopyMemGeneric");
Sadik Armagan1625efc2021-06-10 18:24:34 +0100591 CHECK(count == 1);
Ferran Balaguer83239f92019-09-19 11:49:25 +0100592
593 // Check the output is correct
Sadik Armagan1625efc2021-06-10 18:24:34 +0100594 CHECK(std::equal(outputData.begin(), outputData.end(), expectedOutput.begin(), expectedOutput.end()));
Ferran Balaguer83239f92019-09-19 11:49:25 +0100595}
596
597inline void ImportAndExportWorkload(std::vector<BackendId> backends)
598{
599 using namespace armnn;
600
601 IRuntime::CreationOptions options;
602 IRuntimePtr runtime(IRuntime::Create(options));
603
604 // Builds up the structure of the network.
605 INetworkPtr net(INetwork::Create());
606
607 IConnectableLayer* input = net->AddInputLayer(0);
608
609 ActivationDescriptor descriptor;
610 descriptor.m_Function = ActivationFunction::Square;
611 IConnectableLayer* pooling = net->AddActivationLayer(descriptor);
612
613 IConnectableLayer* output = net->AddOutputLayer(0);
614
615 input->GetOutputSlot(0).Connect(pooling->GetInputSlot(0));
616 pooling->GetOutputSlot(0).Connect(output->GetInputSlot(0));
617
Cathal Corbett5b8093c2021-10-22 11:12:07 +0100618 input->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32, 0.0f, 0, true));
Ferran Balaguer83239f92019-09-19 11:49:25 +0100619 pooling->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32));
620
Francis Murtagh626bd902022-06-21 13:16:23 +0000621 OptimizerOptions optimizedOptions;
622 optimizedOptions.m_ImportEnabled = true;
623 optimizedOptions.m_ExportEnabled = true;
624 IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec(), optimizedOptions);
Ferran Balaguer83239f92019-09-19 11:49:25 +0100625
Sadik Armagan1625efc2021-06-10 18:24:34 +0100626 INFO("Load Network");
Ferran Balaguer83239f92019-09-19 11:49:25 +0100627 // Load it into the runtime. It should pass.
628 NetworkId netId;
Teresa Charlindf15c4e2023-02-21 15:16:09 +0000629 std::string errorMessage;
Francis Murtagh73d3e2e2021-04-29 14:23:04 +0100630 INetworkProperties networkProperties(false, MemorySource::Malloc, MemorySource::Malloc);
Teresa Charlindf15c4e2023-02-21 15:16:09 +0000631 armnn::Status loadingStatus = runtime->LoadNetwork(netId, std::move(optNet), errorMessage, networkProperties);
632 CHECK_MESSAGE(loadingStatus == Status::Success, errorMessage);
Ferran Balaguer83239f92019-09-19 11:49:25 +0100633
Sadik Armagan1625efc2021-06-10 18:24:34 +0100634 INFO("Generate Data");
Ferran Balaguer83239f92019-09-19 11:49:25 +0100635 // Creates structures for input & output
636 std::vector<float> inputData
637 {
638 1.0f, 2.0f, 3.0f, 4.0f
639 };
640
641 std::vector<float> outputData(4);
642
643 std::vector<float> expectedOutput
644 {
645 1.0f, 4.0f, 9.0f, 16.0f
646 };
647
David Monahan646bc8a2022-01-31 14:29:14 +0000648 INFO("Create inference");
Cathal Corbett5b8093c2021-10-22 11:12:07 +0100649
Ferran Balaguer83239f92019-09-19 11:49:25 +0100650 InputTensors inputTensors
651 {
652 {0,armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData.data())},
653 };
654 OutputTensors outputTensors
655 {
656 {0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data())}
657 };
658
Sadik Armagan1625efc2021-06-10 18:24:34 +0100659 INFO("Get Profiler");
Ferran Balaguer83239f92019-09-19 11:49:25 +0100660 runtime->GetProfiler(netId)->EnableProfiling(true);
661
Sadik Armagan1625efc2021-06-10 18:24:34 +0100662 INFO("Run Inference");
Ferran Balaguer83239f92019-09-19 11:49:25 +0100663 // Do the inference
664 runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
665
Sadik Armagan1625efc2021-06-10 18:24:34 +0100666 INFO("Print Profiler");
Ferran Balaguer83239f92019-09-19 11:49:25 +0100667 // Retrieve the Profiler.Print() output to get the workload execution
668 ProfilerManager& profilerManager = armnn::ProfilerManager::GetInstance();
669 std::stringstream ss;
David Monahan16829712022-02-03 17:04:59 +0000670 profilerManager.GetProfiler()->Print(ss);
Ferran Balaguer83239f92019-09-19 11:49:25 +0100671 std::string dump = ss.str();
672
673 // Check there is a SyncMemGeneric workload as we exported
Sadik Armagan1625efc2021-06-10 18:24:34 +0100674 INFO("Find SyncMemGeneric");
Ferran Balaguer83239f92019-09-19 11:49:25 +0100675 int count = SubStringCounter(dump, "SyncMemGeneric");
Sadik Armagan1625efc2021-06-10 18:24:34 +0100676 CHECK(count == 1);
Ferran Balaguer83239f92019-09-19 11:49:25 +0100677
678 // Shouldn't be any CopyMemGeneric workloads
Sadik Armagan1625efc2021-06-10 18:24:34 +0100679 INFO("Find CopyMemGeneric");
Ferran Balaguer83239f92019-09-19 11:49:25 +0100680 count = SubStringCounter(dump, "CopyMemGeneric");
Sadik Armagan1625efc2021-06-10 18:24:34 +0100681 CHECK(count == 0);
Ferran Balaguer83239f92019-09-19 11:49:25 +0100682
683 // Check the output is correct
Sadik Armagan1625efc2021-06-10 18:24:34 +0100684 CHECK(std::equal(outputData.begin(), outputData.end(), expectedOutput.begin(), expectedOutput.end()));
Ferran Balaguer83239f92019-09-19 11:49:25 +0100685}
686
687inline void ExportOutputWithSeveralOutputSlotConnectionsTest(std::vector<BackendId> backends)
688{
689 using namespace armnn;
690
691 // Create runtime in which test will run
692 IRuntime::CreationOptions options;
693 IRuntimePtr runtime(armnn::IRuntime::Create(options));
694
695 // build up the structure of the network
696 INetworkPtr net(INetwork::Create());
697
698 IConnectableLayer* input = net->AddInputLayer(0);
699
700 ActivationDescriptor descriptor;
701 descriptor.m_Function = ActivationFunction::Square;
702 IConnectableLayer* activation = net->AddActivationLayer(descriptor);
703
704 IConnectableLayer* output0 = net->AddOutputLayer(0);
705 IConnectableLayer* output1 = net->AddOutputLayer(1);
706
707 input->GetOutputSlot(0).Connect(activation->GetInputSlot(0));
708 activation->GetOutputSlot(0).Connect(output0->GetInputSlot(0));
709 activation->GetOutputSlot(0).Connect(output1->GetInputSlot(0));
710
Cathal Corbett5b8093c2021-10-22 11:12:07 +0100711 input->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 4, 1 }, DataType::Float32, 0.0f, 0, true));
Ferran Balaguer83239f92019-09-19 11:49:25 +0100712 activation->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 4, 1 }, DataType::Float32));
713
714 // Optimize the network
Francis Murtagh626bd902022-06-21 13:16:23 +0000715 OptimizerOptions optimizedOptions;
716 optimizedOptions.m_ImportEnabled = true;
717 optimizedOptions.m_ExportEnabled = true;
718 IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec(), optimizedOptions);
Ferran Balaguer83239f92019-09-19 11:49:25 +0100719
720 // Loads it into the runtime.
721 NetworkId netId;
Teresa Charlindf15c4e2023-02-21 15:16:09 +0000722 std::string errorMessage;
Ferran Balaguer83239f92019-09-19 11:49:25 +0100723 // Enable Importing
Francis Murtagh73d3e2e2021-04-29 14:23:04 +0100724 INetworkProperties networkProperties(false, MemorySource::Malloc, MemorySource::Malloc);
Teresa Charlindf15c4e2023-02-21 15:16:09 +0000725 armnn::Status loadingStatus = runtime->LoadNetwork(netId, std::move(optNet), errorMessage, networkProperties);
726 CHECK_MESSAGE(loadingStatus == Status::Success, errorMessage);
Ferran Balaguer83239f92019-09-19 11:49:25 +0100727
728 // Creates structures for input & output
729 std::vector<float> inputData
730 {
731 1.0f, 2.0f, 3.0f, 4.0f
732 };
733
734 std::vector<float> outputData0(4);
735 std::vector<float> outputData1(4);
736
Narumol Prangnawarat3b90af62020-06-26 11:00:21 +0100737 std::vector<float> expectedOutput
738 {
739 1.0f, 4.0f, 9.0f, 16.0f
740 };
741
Ferran Balaguer83239f92019-09-19 11:49:25 +0100742 InputTensors inputTensors
743 {
744 {0,armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData.data())},
745 };
746 OutputTensors outputTensors
747 {
748 {0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData0.data())},
749 {1,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 1), outputData1.data())}
750 };
751
752 // The result of the inference is not important, just the fact that there
753 // should not be CopyMemGeneric workloads.
754 runtime->GetProfiler(netId)->EnableProfiling(true);
755
756 // Do the inference
757 runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
758
759 // Retrieve the Profiler.Print() output to get the workload execution
760 ProfilerManager& profilerManager = armnn::ProfilerManager::GetInstance();
761 std::stringstream ss;
762 profilerManager.GetProfiler()->Print(ss);
763 std::string dump = ss.str();
764
765 std::size_t found = std::string::npos;
766
767 if (backends[0] == Compute::CpuRef)
768 {
769 found = dump.find("RefActivationWorkload");
770 }
771 else if (backends[0] == Compute::CpuAcc)
772 {
773 found = dump.find("NeonActivationWorkload");
774 }
775 else if (backends[0] == Compute::GpuAcc)
776 {
777 found = dump.find("ClActivationWorkload");
778 }
779
Sadik Armagan1625efc2021-06-10 18:24:34 +0100780 CHECK(found != std::string::npos);
Ferran Balaguer83239f92019-09-19 11:49:25 +0100781 // No contains SyncMemGeneric
782 found = dump.find("SyncMemGeneric");
Sadik Armagan1625efc2021-06-10 18:24:34 +0100783 CHECK(found == std::string::npos);
Ferran Balaguer83239f92019-09-19 11:49:25 +0100784 // Contains CopyMemGeneric
785 found = dump.find("CopyMemGeneric");
Sadik Armagan1625efc2021-06-10 18:24:34 +0100786 CHECK(found != std::string::npos);
Narumol Prangnawarat3b90af62020-06-26 11:00:21 +0100787
788 // Check that the outputs are correct
Sadik Armagan1625efc2021-06-10 18:24:34 +0100789 CHECK(std::equal(outputData0.begin(), outputData0.end(),
790 expectedOutput.begin(), expectedOutput.end()));
791 CHECK(std::equal(outputData1.begin(), outputData1.end(),
792 expectedOutput.begin(), expectedOutput.end()));
Ferran Balaguer83239f92019-09-19 11:49:25 +0100793}
794
David Monahan0a99a142020-03-13 07:52:54 +0000795inline void StridedSliceInvalidSliceEndToEndTest(std::vector<BackendId> backends)
796{
797 using namespace armnn;
798
799 // Create runtime in which test will run
800 IRuntime::CreationOptions options;
801 IRuntimePtr runtime(armnn::IRuntime::Create(options));
802
803 // build up the structure of the network
804 INetworkPtr net(INetwork::Create());
805
806 IConnectableLayer* input = net->AddInputLayer(0);
807
808 // Configure a strided slice with a stride the same size as the input but with a ShrinkAxisMask on the first
809 // dim of the output to make it too small to hold the specified slice.
810 StridedSliceDescriptor descriptor;
811 descriptor.m_Begin = {0, 0};
812 descriptor.m_End = {2, 3};
813 descriptor.m_Stride = {1, 1};
814 descriptor.m_BeginMask = 0;
815 descriptor.m_EndMask = 0;
816 descriptor.m_ShrinkAxisMask = 1;
817 IConnectableLayer* stridedSlice = net->AddStridedSliceLayer(descriptor);
818
819 IConnectableLayer* output0 = net->AddOutputLayer(0);
820
821 input->GetOutputSlot(0).Connect(stridedSlice->GetInputSlot(0));
822 stridedSlice->GetOutputSlot(0).Connect(output0->GetInputSlot(0));
823
Cathal Corbett5b8093c2021-10-22 11:12:07 +0100824 input->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 2, 3 }, DataType::Float32, 0.0f, 0, true));
David Monahan0a99a142020-03-13 07:52:54 +0000825 stridedSlice->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 3 }, DataType::Float32));
826
827 // Attempt to optimize the network and check that the correct exception is thrown
Sadik Armagan1625efc2021-06-10 18:24:34 +0100828 CHECK_THROWS_AS(Optimize(*net, backends, runtime->GetDeviceSpec()), armnn::LayerValidationException);
David Monahan0a99a142020-03-13 07:52:54 +0000829}
830
David Monahan646bc8a2022-01-31 14:29:14 +0000831inline void ForceImportWithAlignedBuffersEndToEndTest(std::vector<BackendId> backends)
832{
833 /**
834 * This test is similar to the Import tests above, we create a network with a square function and pass in a vector
835 * with 4 floats, square them. and validate the output. We then check the profiling logs to see if input/output
836 * tensors are copied (CopyMemGeneric) or imported (SyncMemGeneric)
837 * In this case all inputs and outputs should be imported
838 */
839 using namespace armnn;
840 IRuntime::CreationOptions options;
841 IRuntimePtr runtime(IRuntime::Create(options));
842
843 // Builds up the structure of the network.
844 INetworkPtr net(INetwork::Create());
845 IConnectableLayer* input = net->AddInputLayer(0);
846 ActivationDescriptor descriptor;
847 descriptor.m_Function = ActivationFunction::Square;
848 IConnectableLayer* activationLayer = net->AddActivationLayer(descriptor);
849 IConnectableLayer* output = net->AddOutputLayer(0);
850 input->GetOutputSlot(0).Connect(activationLayer->GetInputSlot(0));
851 activationLayer->GetOutputSlot(0).Connect(output->GetInputSlot(0));
852 input->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32, 0.0f, 0, true));
853 activationLayer->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32));
854 IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec());
855 INFO("Load Network");
856
857 // Load it into the runtime. It should pass.
858 NetworkId netId;
Teresa Charlindf15c4e2023-02-21 15:16:09 +0000859 std::string errorMessage;
David Monahan646bc8a2022-01-31 14:29:14 +0000860 INetworkProperties networkProperties(false, MemorySource::Undefined, MemorySource::Undefined);
Teresa Charlindf15c4e2023-02-21 15:16:09 +0000861 armnn::Status loadingStatus = runtime->LoadNetwork(netId, std::move(optNet), errorMessage, networkProperties);
862 CHECK_MESSAGE(loadingStatus == Status::Success, errorMessage);
863
David Monahan646bc8a2022-01-31 14:29:14 +0000864 INFO("Generate Data");
865
866 // Creates structures for input & output
867 std::vector<float> inputData
868 {
869 1.0f, 2.0f, 3.0f, 4.0f
870 };
871 std::vector<float> outputData(4);
872 std::vector<float> expectedOutput
873 {
874 1.0f, 4.0f, 9.0f, 16.0f
875 };
876
877 // Check our input and output pointers are actually aligned
878 uintptr_t alignment = GetDataTypeSize(DataType::Float32);
879 CHECK(!(reinterpret_cast<uintptr_t>(inputData.data()) % alignment));
880 CHECK(!(reinterpret_cast<uintptr_t>(outputData.data()) % alignment));
881
882 INFO("Create Inference");
883 InputTensors inputTensors
884 {
885 {0,armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData.data())},
886 };
887 OutputTensors outputTensors
888 {
889 {0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data())}
890 };
891
892 runtime->GetProfiler(netId)->EnableProfiling(true);
893 std::vector<ImportedInputId> importedInputIds =
894 runtime->ImportInputs(netId, inputTensors, MemorySource::Malloc);
Colm Doneland7ceec52022-07-06 12:09:05 +0100895 CHECK(importedInputIds.size() == 1);
David Monahan646bc8a2022-01-31 14:29:14 +0000896 std::vector<ImportedOutputId> importedOutputIds =
897 runtime->ImportOutputs(netId, outputTensors, MemorySource::Malloc);
Colm Doneland7ceec52022-07-06 12:09:05 +0100898 CHECK(importedOutputIds.size() == 1);
David Monahan646bc8a2022-01-31 14:29:14 +0000899 // Do the inference and force the import as the memory is aligned.
Colm Doneland7ceec52022-07-06 12:09:05 +0100900 runtime->EnqueueWorkload(netId, InputTensors(), OutputTensors(), importedInputIds, importedOutputIds);
David Monahan646bc8a2022-01-31 14:29:14 +0000901
902 // Retrieve the Profiler.Print() output to get the workload execution
903 ProfilerManager& profilerManager = armnn::ProfilerManager::GetInstance();
904 std::stringstream ss;
David Monahan16829712022-02-03 17:04:59 +0000905 profilerManager.GetProfiler()->Print(ss);
David Monahan646bc8a2022-01-31 14:29:14 +0000906 std::string dump = ss.str();
907
908 if (backends[0] == Compute::CpuAcc)
909 {
910 // Reconfigure has not been implemented for CpuAcc so it will always copy, this will break whenever
911 // reconfigure is implemented
912 int count = SubStringCounter(dump, "SyncMemGeneric");
913 CHECK(count == 0);
914 // Should be 2 CopyMemGeneric workloads
915 count = SubStringCounter(dump, "CopyMemGeneric");
916 CHECK(count == 2);
917 }
918 else
919 {
920 // Check there is a SyncMemGeneric workload as we exported
921 int count = SubStringCounter(dump, "SyncMemGeneric");
922 CHECK(count == 1);
923 // Shouldn't be any CopyMemGeneric workloads
924 count = SubStringCounter(dump, "CopyMemGeneric");
925 CHECK(count == 0);
926 }
927 // Check the output is correct
928 CHECK(std::equal(outputData.begin(), outputData.end(), expectedOutput.begin(), expectedOutput.end()));
929}
930
931inline void ForceImportWithMisalignedInputBuffersEndToEndTest(std::vector<BackendId> backends)
932{
933 /**
934 * This test is similar to the Import tests above, we create a network with a square function and pass in a vector
935 * with 4 floats, square them. and validate the output. We then check the profiling logs to see if input/output
936 * tensors are copied (CopyMemGeneric) or imported (SyncMemGeneric)
937 * In this case all only the output should be imported
938 */
939 using namespace armnn;
940
941 IRuntime::CreationOptions options;
942 IRuntimePtr runtime(IRuntime::Create(options));
943
944 // Builds up the structure of the network.
945 INetworkPtr net(INetwork::Create());
946 IConnectableLayer* input = net->AddInputLayer(0);
947
948 ActivationDescriptor descriptor;
949 descriptor.m_Function = ActivationFunction::Square;
950 IConnectableLayer* activationLayer = net->AddActivationLayer(descriptor);
951
952 IConnectableLayer* output = net->AddOutputLayer(0);
953
954 input->GetOutputSlot(0).Connect(activationLayer->GetInputSlot(0));
955 activationLayer->GetOutputSlot(0).Connect(output->GetInputSlot(0));
956 input->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32, 0.0f, 0, true));
957 activationLayer->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32));
958
959 IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec());
960 INFO("Load Network");
961 // Load it into the runtime. It should pass.
962 NetworkId netId;
Teresa Charlindf15c4e2023-02-21 15:16:09 +0000963 std::string errorMessage;
David Monahan646bc8a2022-01-31 14:29:14 +0000964 INetworkProperties networkProperties(false, MemorySource::Undefined, MemorySource::Undefined);
Teresa Charlindf15c4e2023-02-21 15:16:09 +0000965 armnn::Status loadingStatus = runtime->LoadNetwork(netId, std::move(optNet), errorMessage, networkProperties);
966 CHECK_MESSAGE(loadingStatus == Status::Success, errorMessage);
967
David Monahan646bc8a2022-01-31 14:29:14 +0000968 INFO("Generate Data");
969
970 // This code looks a little funky but the idea is to create a buffer of floats but offset by the size of a char
971 // this will guarantee that the resultant buffer is misaligned and thus should always be copied.
972 auto memPtr = std::malloc(4 * sizeof(float) + sizeof(char));
973
974 float* misalignedMemPtr = reinterpret_cast<float*>(reinterpret_cast<char*>(memPtr) + 1);
975
976 // Check if our pointer is truly misaligned
977 uintptr_t alignment = GetDataTypeSize(DataType::Float32);
978 CHECK (reinterpret_cast<uintptr_t>(misalignedMemPtr) % alignment);
979
Matthew Benthamc92bbd72022-02-10 11:12:34 +0000980 std::vector<float> inputData
David Monahan646bc8a2022-01-31 14:29:14 +0000981 {
Matthew Benthamc92bbd72022-02-10 11:12:34 +0000982 1.0f, 2.0f, 3.0f, 4.0f
983 };
984
985 std::memcpy(misalignedMemPtr, inputData.data(), 4*sizeof(float));
David Monahan646bc8a2022-01-31 14:29:14 +0000986
987 std::vector<float> outputData(4);
988 // Check our output buffer is aligned
989 CHECK(!(reinterpret_cast<uintptr_t>(outputData.data()) % alignment));
990
991 std::vector<float> expectedOutput
992 {
993 1.0f, 4.0f, 9.0f, 16.0f
994 };
995
996 INFO("Create Inference");
997 InputTensors inputTensors
998 {
999 {0,armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), misalignedMemPtr)},
1000 };
1001 OutputTensors outputTensors
1002 {
1003 {0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data())}
1004 };
1005 runtime->GetProfiler(netId)->EnableProfiling(true);
1006 std::vector<ImportedInputId> importedInputIds =
1007 runtime->ImportInputs(netId, inputTensors, MemorySource::Malloc);
Colm Doneland7ceec52022-07-06 12:09:05 +01001008 // We expect the import to have failed.
1009 CHECK(importedInputIds.size() == 0);
David Monahan646bc8a2022-01-31 14:29:14 +00001010 std::vector<ImportedOutputId> importedOutputIds =
1011 runtime->ImportOutputs(netId, outputTensors, MemorySource::Malloc);
Colm Doneland7ceec52022-07-06 12:09:05 +01001012 CHECK(importedOutputIds.size() == 1);
David Monahan646bc8a2022-01-31 14:29:14 +00001013
1014 // Do the inference and force the import as the memory is misaligned.
Colm Doneland7ceec52022-07-06 12:09:05 +01001015 runtime->EnqueueWorkload(netId, inputTensors, OutputTensors(), importedInputIds, importedOutputIds);
David Monahan646bc8a2022-01-31 14:29:14 +00001016
1017 // Retrieve the Profiler.Print() output to get the workload execution
1018 ProfilerManager& profilerManager = armnn::ProfilerManager::GetInstance();
1019 std::stringstream ss;
David Monahan16829712022-02-03 17:04:59 +00001020 profilerManager.GetProfiler()->Print(ss);
David Monahan646bc8a2022-01-31 14:29:14 +00001021 std::string dump = ss.str();
1022
1023 // GpuAcc is a different case to CpuRef and CpuAcc, it doesn't use the buffer directly but instead maps it to a
1024 // new set of addresses within Gpu Memory. This will almost always be auto-aligned, so we don't need to check
1025 // for imports/copies. Only that the output is correct.
1026 if (backends[0] != Compute::GpuAcc)
1027 {
1028 if (backends[0] == Compute::CpuAcc)
1029 {
1030 // Reconfigure has not been implemented for CpuAcc so it will always copy, this will break whenever
1031 // reconfigure is implemented
1032 // We should get 0 SyncMemGeneric for the Output
1033 int count = SubStringCounter(dump, "SyncMemGeneric");
1034 CHECK(count == 0);
1035 // Should be 2 CopyMemGeneric as we copied the input
1036 count = SubStringCounter(dump, "CopyMemGeneric");
1037 CHECK(count == 2);
1038 }
1039 else
1040 {
1041 // We should get 1 SyncMemGeneric for the Output
1042 int count = SubStringCounter(dump, "SyncMemGeneric");
1043 CHECK(count == 1);
1044 // Should only be 1 CopyMemGeneric as we copied the input
1045 count = SubStringCounter(dump, "CopyMemGeneric");
1046 CHECK(count == 1);
1047 }
1048 }
1049 // Check the output is correct
1050 CHECK(std::equal(outputData.begin(), outputData.end(), expectedOutput.begin(), expectedOutput.end()));
1051 std::free(memPtr);
1052}
1053
1054inline void ForceImportWithMisalignedOutputBuffersEndToEndTest(std::vector<BackendId> backends)
1055{
1056 /**
1057 * This test is similar to the Import tests above, we create a network with a square function and pass in a vector
1058 * with 4 floats, square them. and validate the output. We then check the profiling logs to see if input/output
1059 * tensors are copied (CopyMemGeneric) or imported (SyncMemGeneric)
1060 * In this case all only the input should be imported
1061 */
1062 using namespace armnn;
1063
1064 IRuntime::CreationOptions options;
1065 IRuntimePtr runtime(IRuntime::Create(options));
1066
1067 // Builds up the structure of the network.
1068 INetworkPtr net(INetwork::Create());
1069 IConnectableLayer* input = net->AddInputLayer(0);
1070
1071 ActivationDescriptor descriptor;
1072 descriptor.m_Function = ActivationFunction::Square;
1073 IConnectableLayer* activationLayer = net->AddActivationLayer(descriptor);
1074
1075 IConnectableLayer* output = net->AddOutputLayer(0);
1076
1077 input->GetOutputSlot(0).Connect(activationLayer->GetInputSlot(0));
1078 activationLayer->GetOutputSlot(0).Connect(output->GetInputSlot(0));
1079 input->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32, 0.0f, 0, true));
1080 activationLayer->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32));
1081
1082 IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec());
1083 INFO("Load Network");
1084 // Load it into the runtime. It should pass.
1085 NetworkId netId;
Teresa Charlindf15c4e2023-02-21 15:16:09 +00001086 std::string errorMessage;
David Monahan646bc8a2022-01-31 14:29:14 +00001087 INetworkProperties networkProperties(false, MemorySource::Undefined, MemorySource::Undefined);
Teresa Charlindf15c4e2023-02-21 15:16:09 +00001088 armnn::Status loadingStatus = runtime->LoadNetwork(netId, std::move(optNet), errorMessage, networkProperties);
1089 CHECK_MESSAGE(loadingStatus == Status::Success, errorMessage);
1090
David Monahan646bc8a2022-01-31 14:29:14 +00001091 INFO("Generate Data");
1092
1093 // This code looks a little funky but the idea is to create a buffer of floats but offset by the size of a char
1094 // this will guarantee that the resultant buffer is misaligned and thus should always be copied.
1095 auto memPtr = std::malloc(4 * sizeof(float) + sizeof(char));
1096
1097 float* misalignedMemPtr = reinterpret_cast<float*>(reinterpret_cast<char*>(memPtr) + 1);
1098
1099 // Check if our pointer is truly misaligned
1100 uintptr_t alignment = GetDataTypeSize(DataType::Float32);
1101 CHECK (reinterpret_cast<uintptr_t>(misalignedMemPtr) % alignment);
1102
1103 // Creates structures for input & output
1104 std::vector<float> inputData
1105 {
1106 1.0f, 2.0f, 3.0f, 4.0f
1107 };
1108
1109 // Check our input buffer is aligned
1110 CHECK(!(reinterpret_cast<uintptr_t>(inputData.data()) % alignment));
1111 std::vector<float> expectedOutput
1112 {
1113 1.0f, 4.0f, 9.0f, 16.0f
1114 };
1115
1116 INFO("Create Inference");
1117 InputTensors inputTensors
1118 {
1119 {0,armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData.data())},
1120 };
1121 OutputTensors outputTensors
1122 {
1123 {0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), misalignedMemPtr)}
1124 };
1125 runtime->GetProfiler(netId)->EnableProfiling(true);
1126 std::vector<ImportedInputId> importedInputIds =
1127 runtime->ImportInputs(netId, inputTensors, MemorySource::Malloc);
Colm Doneland7ceec52022-07-06 12:09:05 +01001128 CHECK(importedInputIds.size() == 1);
1129 // We expect this to fail.
David Monahan646bc8a2022-01-31 14:29:14 +00001130 std::vector<ImportedOutputId> importedOutputIds =
1131 runtime->ImportOutputs(netId, outputTensors, MemorySource::Malloc);
Colm Doneland7ceec52022-07-06 12:09:05 +01001132 CHECK(importedOutputIds.size() == 0);
David Monahan646bc8a2022-01-31 14:29:14 +00001133
Colm Doneland7ceec52022-07-06 12:09:05 +01001134 // Even if importing the output failed we still expect to be able to get it to work.
1135 runtime->EnqueueWorkload(netId, InputTensors(), outputTensors, importedInputIds, importedOutputIds);
David Monahan646bc8a2022-01-31 14:29:14 +00001136
1137 // Retrieve the Profiler.Print() output to get the workload execution
1138 ProfilerManager& profilerManager = armnn::ProfilerManager::GetInstance();
1139 std::stringstream ss;
David Monahan16829712022-02-03 17:04:59 +00001140 profilerManager.GetProfiler()->Print(ss);
David Monahan646bc8a2022-01-31 14:29:14 +00001141 std::string dump = ss.str();
1142
1143 // GpuAcc is a different case to CpuRef and CpuAcc, it doesn't use the buffer directly but instead maps it to a
1144 // new set of addresses within Gpu Memory. This will almost always be auto-aligned, so we don't need to check
1145 // for imports/copies. Only that the output is correct.
1146 if (backends[0] != Compute::GpuAcc)
1147 {
1148 // Even though we Imported the Input we still shouldn't have a SyncMemGeneric
1149 int count = SubStringCounter(dump, "SyncMemGeneric");
1150 CHECK(count == 0);
1151 // Should only be 1 CopyMemGeneric as we copied the input
1152 count = SubStringCounter(dump, "CopyMemGeneric");
1153 if (backends[0] == Compute::CpuAcc)
1154 {
1155 // Reconfigure has not been implemented for CpuAcc so it will always copy, this will break whenever
1156 // reconfigure is implemented
1157 CHECK(count == 2);
1158 }
1159 else
1160 {
1161 CHECK(count == 1);
1162 }
1163 // Check the output is correct
1164 }
1165 unsigned int index = 0;
David Monahaneef6b762022-02-10 16:01:58 +00001166 std::vector<float> outputData(expectedOutput.size(), 0);
1167 std::memcpy(outputData.data(), misalignedMemPtr, expectedOutput.size() * sizeof(float));
David Monahan646bc8a2022-01-31 14:29:14 +00001168 for (auto outputValue : expectedOutput)
1169 {
David Monahaneef6b762022-02-10 16:01:58 +00001170 CHECK(outputValue == outputData[index]);
David Monahan646bc8a2022-01-31 14:29:14 +00001171 ++index;
1172 }
1173 std::free(memPtr);
1174}
1175
1176inline void ForceImportWithMisalignedInputAndOutputBuffersEndToEndTest(std::vector<BackendId> backends)
1177{
1178 /**
1179 * This test is similar to the Import tests above, we create a network with a square function and pass in a vector
1180 * with 4 floats, square them. and validate the output. We then check the profiling logs to see if input/output
1181 * tensors are copied (CopyMemGeneric) or imported (SyncMemGeneric)
1182 * In this case all inputs and outputs should be copied
1183 */
1184 using namespace armnn;
1185
1186 IRuntime::CreationOptions options;
1187 IRuntimePtr runtime(IRuntime::Create(options));
1188
1189 // Builds up the structure of the network.
1190 INetworkPtr net(INetwork::Create());
1191 IConnectableLayer* input = net->AddInputLayer(0);
1192
1193 ActivationDescriptor descriptor;
1194 descriptor.m_Function = ActivationFunction::Square;
1195 IConnectableLayer* activationLayer = net->AddActivationLayer(descriptor);
1196
1197 IConnectableLayer* output = net->AddOutputLayer(0);
1198
1199 input->GetOutputSlot(0).Connect(activationLayer->GetInputSlot(0));
1200 activationLayer->GetOutputSlot(0).Connect(output->GetInputSlot(0));
1201 input->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32, 0.0f, 0, true));
1202 activationLayer->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32));
1203
1204 IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec());
1205 INFO("Load Network");
1206 // Load it into the runtime. It should pass.
1207 NetworkId netId;
Teresa Charlindf15c4e2023-02-21 15:16:09 +00001208 std::string errorMessage;
David Monahan646bc8a2022-01-31 14:29:14 +00001209 INetworkProperties networkProperties(false, MemorySource::Undefined, MemorySource::Undefined);
Teresa Charlindf15c4e2023-02-21 15:16:09 +00001210 armnn::Status loadingStatus = runtime->LoadNetwork(netId, std::move(optNet), errorMessage, networkProperties);
1211 CHECK_MESSAGE(loadingStatus == Status::Success, errorMessage);
David Monahan646bc8a2022-01-31 14:29:14 +00001212 INFO("Generate Data");
1213
1214 // This code looks a little funky but the idea is to create a buffer of floats but offset by the size of a char
1215 // this will guarantee that the resultant buffer is misaligned and thus should always be copied.
1216 auto inputMemPtr = std::malloc(4 * sizeof(float) + sizeof(char));
1217 float* misalignedInputPtr = reinterpret_cast<float*>(reinterpret_cast<char*>(inputMemPtr) + 1);
1218
1219 // Check if our pointer is truly misaligned
1220 uintptr_t alignment = GetDataTypeSize(DataType::Float32);
1221 CHECK (reinterpret_cast<uintptr_t>(misalignedInputPtr) % alignment);
Matthew Benthamc92bbd72022-02-10 11:12:34 +00001222 std::vector<float> inputData
David Monahan646bc8a2022-01-31 14:29:14 +00001223 {
Matthew Benthamc92bbd72022-02-10 11:12:34 +00001224 1.0f, 2.0f, 3.0f, 4.0f
1225 };
1226 std::memcpy(misalignedInputPtr, inputData.data(), 4*sizeof(float));
David Monahan646bc8a2022-01-31 14:29:14 +00001227
1228 auto outputMemPtr = std::malloc(4 * sizeof(float) + sizeof(char));
1229 float* misalignedOutputPtr = reinterpret_cast<float*>(reinterpret_cast<char*>(outputMemPtr) + 1);
1230
1231 // Check if our pointer is truly misaligned
1232 CHECK (reinterpret_cast<uintptr_t>(misalignedOutputPtr) % alignment);
1233
1234 std::vector<float> expectedOutput
1235 {
1236 1.0f, 4.0f, 9.0f, 16.0f
1237 };
1238
1239 INFO("Create Inference");
1240 InputTensors inputTensors
1241 {
1242 {0,armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), misalignedInputPtr)},
1243 };
1244 OutputTensors outputTensors
1245 {
1246 {0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), misalignedOutputPtr)}
1247 };
1248 runtime->GetProfiler(netId)->EnableProfiling(true);
1249 std::vector<ImportedInputId> importedInputIds =
1250 runtime->ImportInputs(netId, inputTensors, MemorySource::Malloc);
Colm Doneland7ceec52022-07-06 12:09:05 +01001251 // Import should have failed.
1252 CHECK(importedInputIds.size() == 0);
David Monahan646bc8a2022-01-31 14:29:14 +00001253 std::vector<ImportedOutputId> importedOutputIds =
1254 runtime->ImportOutputs(netId, outputTensors, MemorySource::Malloc);
Colm Doneland7ceec52022-07-06 12:09:05 +01001255 // Import should have failed.
1256 CHECK(importedOutputIds.size() == 0);
David Monahan646bc8a2022-01-31 14:29:14 +00001257
1258 // Do the inference and force the import as the memory is misaligned.
1259 runtime->EnqueueWorkload(netId, inputTensors, outputTensors, importedInputIds, importedOutputIds);
1260
1261 // Retrieve the Profiler.Print() output to get the workload execution
1262 ProfilerManager& profilerManager = armnn::ProfilerManager::GetInstance();
1263 std::stringstream ss;
David Monahan16829712022-02-03 17:04:59 +00001264 profilerManager.GetProfiler()->Print(ss);
David Monahan646bc8a2022-01-31 14:29:14 +00001265 std::string dump = ss.str();
1266
1267 // GpuAcc is a different case to CpuRef and CpuAcc, it doesn't use the buffer directly but instead maps it to a
1268 // new set of addresses within Gpu Memory. This will almost always be auto-aligned, so we don't need to check
1269 // for imports/copies. Only that the output is correct.
1270 if (backends[0] != Compute::GpuAcc)
1271 {
1272 // We can only copy so there should be no SyncMemGeneric
1273 int count = SubStringCounter(dump, "SyncMemGeneric");
1274 CHECK(count == 0);
1275 // Should only be CopyMemGeneric workloads as we copied all buffers
1276 count = SubStringCounter(dump, "CopyMemGeneric");
1277 CHECK(count == 2);
1278 }
1279 // Check the output is correct
1280 unsigned int index = 0;
David Monahaneef6b762022-02-10 16:01:58 +00001281 std::vector<float> outputData(expectedOutput.size(), 0);
Matthew Benthamc92bbd72022-02-10 11:12:34 +00001282 std::memcpy(outputData.data(), misalignedOutputPtr, expectedOutput.size() * sizeof(float));
1283 for (auto expectedValue : expectedOutput)
David Monahan646bc8a2022-01-31 14:29:14 +00001284 {
Matthew Benthamc92bbd72022-02-10 11:12:34 +00001285 CHECK(expectedValue == outputData[index]);
David Monahan646bc8a2022-01-31 14:29:14 +00001286 ++index;
1287 }
1288 std::free(inputMemPtr);
1289 std::free(outputMemPtr);
1290}
1291
David Monahan16829712022-02-03 17:04:59 +00001292inline void ForceImportRepeatedInferencesEndToEndTest(std::vector<BackendId> backends)
1293{
1294 /**
1295 * This test is similar to the Import tests above, we create a network with a square function and pass in a vector
1296 * with 4 floats, square them. and validate the output. We then check the profiling logs to see if input/output
1297 * tensors are copied (CopyMemGeneric) or imported (SyncMemGeneric)
1298 * In this we create some aligned buffers, import them into a network and validate the output and number of
1299 * SynMemGeneric/CopyMemgeneric. Then we try the same network again with misaligned buffers to make sure it falls
1300 * back to copying correctly.
1301 */
1302 using namespace armnn;
1303
1304 IRuntime::CreationOptions options;
1305 IRuntimePtr runtime(IRuntime::Create(options));
1306
1307 // Builds up the structure of the network.
1308 INetworkPtr net(INetwork::Create());
1309 IConnectableLayer* input = net->AddInputLayer(0);
1310
1311 ActivationDescriptor descriptor;
1312 descriptor.m_Function = ActivationFunction::Square;
1313 IConnectableLayer* activationLayer = net->AddActivationLayer(descriptor);
1314
1315 IConnectableLayer* output = net->AddOutputLayer(0);
1316
1317 input->GetOutputSlot(0).Connect(activationLayer->GetInputSlot(0));
1318 activationLayer->GetOutputSlot(0).Connect(output->GetInputSlot(0));
1319 input->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32, 0.0f, 0, true));
1320 activationLayer->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32));
1321
1322 IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec());
1323 INFO("Load Network");
1324 // Load it into the runtime. It should pass.
1325 NetworkId netId;
Teresa Charlindf15c4e2023-02-21 15:16:09 +00001326 std::string errorMessage;
David Monahan16829712022-02-03 17:04:59 +00001327 INetworkProperties networkProperties(false, MemorySource::Undefined, MemorySource::Undefined);
Teresa Charlindf15c4e2023-02-21 15:16:09 +00001328 armnn::Status loadingStatus = runtime->LoadNetwork(netId, std::move(optNet), errorMessage, networkProperties);
1329 CHECK_MESSAGE(loadingStatus == Status::Success, errorMessage);
David Monahan16829712022-02-03 17:04:59 +00001330 INFO("Generate Data");
1331
1332 // Creates structures for input & output
1333 std::vector<float> inputData
1334 {
1335 1.0f, 2.0f, 3.0f, 4.0f
1336 };
1337 std::vector<float> outputData(4);
1338 std::vector<float> expectedOutput
1339 {
1340 1.0f, 4.0f, 9.0f, 16.0f
1341 };
1342
1343 // Check our input and output pointers are actually aligned
1344 uintptr_t alignment = GetDataTypeSize(DataType::Float32);
1345 CHECK(!(reinterpret_cast<uintptr_t>(inputData.data()) % alignment));
1346 CHECK(!(reinterpret_cast<uintptr_t>(outputData.data()) % alignment));
1347
1348 INFO("Create Inference");
1349 InputTensors inputTensors
1350 {
1351 {0,armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData.data())},
1352 };
1353 OutputTensors outputTensors
1354 {
1355 {0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data())}
1356 };
1357
1358 runtime->GetProfiler(netId)->EnableProfiling(true);
1359 std::vector<ImportedInputId> importedInputIds =
1360 runtime->ImportInputs(netId, inputTensors, MemorySource::Malloc);
Colm Doneland7ceec52022-07-06 12:09:05 +01001361 CHECK(importedInputIds.size() == 1);
David Monahan16829712022-02-03 17:04:59 +00001362 std::vector<ImportedOutputId> importedOutputIds =
1363 runtime->ImportOutputs(netId, outputTensors, MemorySource::Malloc);
Colm Doneland7ceec52022-07-06 12:09:05 +01001364 CHECK(importedOutputIds.size() == 1);
David Monahan16829712022-02-03 17:04:59 +00001365 // Do the inference and force the import as the memory is aligned.
Colm Doneland7ceec52022-07-06 12:09:05 +01001366 runtime->EnqueueWorkload(netId, InputTensors(), OutputTensors(), importedInputIds, importedOutputIds);
David Monahan16829712022-02-03 17:04:59 +00001367
1368 // Retrieve the Profiler.AnalyzeEventsAndWriteResults() output to get the workload execution
1369 ProfilerManager& profilerManager = armnn::ProfilerManager::GetInstance();
1370 std::stringstream ss;
1371 profilerManager.GetProfiler()->AnalyzeEventsAndWriteResults(ss);
1372 std::string dump = ss.str();
1373
1374 if (backends[0] == Compute::CpuAcc)
1375 {
1376 // Reconfigure has not been implemented for CpuAcc so it will always copy, this will break whenever
1377 // reconfigure is implemented
1378 int count = SubStringCounter(dump, "SyncMemGeneric");
1379 CHECK(count == 0);
1380 // Should be 2 CopyMemGeneric workloads
1381 count = SubStringCounter(dump, "CopyMemGeneric");
1382 CHECK(count >= 1);
1383 }
1384 else
1385 {
1386 // Check there is at least 1 SyncMemGeneric workload as we exported
1387 int count = SubStringCounter(dump, "SyncMemGeneric");
1388 CHECK(count >= 1);
1389 // Shouldn't be any CopyMemGeneric workloads
1390 count = SubStringCounter(dump, "CopyMemGeneric");
1391 CHECK(count == 0);
1392 }
1393 // Check the output is correct
1394 CHECK(std::equal(outputData.begin(), outputData.end(), expectedOutput.begin(), expectedOutput.end()));
1395
1396 // This code looks a little funky but the idea is to create a buffer of floats but offset by the size of a char
1397 // this will guarantee that the resultant buffer is misaligned and thus should always be copied.
1398 auto inputMemPtr = std::malloc(4 * sizeof(float) + sizeof(char));
1399 float* misalignedInputPtr = reinterpret_cast<float*>(reinterpret_cast<char*>(inputMemPtr) + 1);
1400
1401 // Check if our pointer is truly misaligned
1402 CHECK (reinterpret_cast<uintptr_t>(misalignedInputPtr) % alignment);
Matthew Benthamc92bbd72022-02-10 11:12:34 +00001403
1404 std::vector<float> inputValues
David Monahan16829712022-02-03 17:04:59 +00001405 {
Matthew Benthamc92bbd72022-02-10 11:12:34 +00001406 2.0f, 3.0f, 4.0f, 5.0f
1407 };
1408
1409 std::memcpy(misalignedInputPtr, inputValues.data(), inputValues.size()*sizeof(float));
David Monahan16829712022-02-03 17:04:59 +00001410
1411 auto outputMemPtr = std::malloc(4 * sizeof(float) + sizeof(char));
1412 float* misalignedOutputPtr = reinterpret_cast<float*>(reinterpret_cast<char*>(outputMemPtr) + 1);
1413
1414 // Check if our pointer is truly misaligned
1415 CHECK (reinterpret_cast<uintptr_t>(misalignedOutputPtr) % alignment);
1416
1417 std::vector<float> expectedMisalignedOutput
1418 {
1419 4.0f, 9.0f, 16.0f, 25.0f
1420 };
1421
1422 INFO("Create Second Inference");
1423 InputTensors inputTensorsMisaligned
1424 {
1425 {0,armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), misalignedInputPtr)},
1426 };
1427 OutputTensors outputTensorsMisaligned
1428 {
1429 {0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), misalignedOutputPtr)}
1430 };
1431 importedInputIds = runtime->ImportInputs(netId, inputTensorsMisaligned, MemorySource::Malloc);
Colm Doneland7ceec52022-07-06 12:09:05 +01001432 // Import should fail.
1433 CHECK(importedInputIds.size() == 0);
David Monahan16829712022-02-03 17:04:59 +00001434 importedOutputIds = runtime->ImportOutputs(netId, outputTensorsMisaligned, MemorySource::Malloc);
Colm Doneland7ceec52022-07-06 12:09:05 +01001435 // Import should fail.
1436 CHECK(importedOutputIds.size() == 0);
David Monahan16829712022-02-03 17:04:59 +00001437
1438 // Do the inference and force the import as the memory is misaligned.
1439 runtime->EnqueueWorkload(netId,
1440 inputTensorsMisaligned,
1441 outputTensorsMisaligned,
1442 importedInputIds,
1443 importedOutputIds);
1444
1445 // Retrieve the Profiler.AnalyzeEventsAndWriteResults() output to get the workload execution
1446 // We need to use AnalyzeEventsAndWriteResults here to make sure the second inference has been profiled
1447 profilerManager.GetProfiler()->AnalyzeEventsAndWriteResults(ss);
1448 dump = ss.str();
1449
1450 // GpuAcc is a different case to CpuRef and CpuAcc, it doesn't use the buffer directly but instead maps it to a
1451 // new set of addresses within Gpu Memory. This will almost always be auto-aligned, so we don't need to check
1452 // for imports/copies. Only that the output is correct.
1453 if (backends[0] != Compute::GpuAcc)
1454 {
1455 // The SyncMemGeneric will still be in the profiling log from the first inference
1456 int count = SubStringCounter(dump, "SyncMemGeneric");
1457 CHECK(count >= 1);
1458 // We should now see CopyMemGeneric workloads as we copied all buffers
1459 count = SubStringCounter(dump, "CopyMemGeneric");
1460 CHECK(count >= 1);
1461 }
1462 // Check the output is correct
1463 unsigned int index = 0;
David Monahaneef6b762022-02-10 16:01:58 +00001464 std::vector<float> alignedOutputData(expectedMisalignedOutput.size(), 0);
Matthew Benthamc92bbd72022-02-10 11:12:34 +00001465 std::memcpy(alignedOutputData.data(), misalignedOutputPtr, expectedMisalignedOutput.size() * sizeof(float));
David Monahan16829712022-02-03 17:04:59 +00001466 for (auto outputValue : expectedMisalignedOutput)
1467 {
Matthew Benthamc92bbd72022-02-10 11:12:34 +00001468 CHECK(outputValue == alignedOutputData[index]);
David Monahan16829712022-02-03 17:04:59 +00001469 ++index;
1470 }
1471 // Clean up to avoid interfering with other tests
1472 runtime->UnloadNetwork(netId);
1473 std::free(inputMemPtr);
1474 std::free(outputMemPtr);
1475}
1476
1477
1478inline void ForceImportRepeatedInferencesInvertedEndToEndTest(std::vector<BackendId> backends)
1479{
1480 /**
1481 * This test is similar to the Import tests above, we create a network with a square function and pass in a vector
1482 * with 4 floats, square them. and validate the output. We then check the profiling logs to see if input/output
1483 * tensors are copied (CopyMemGeneric) or imported (SyncMemGeneric)
1484 * In this we create some misaligned buffers, copy them into a network and validate the output and number of
1485 * SynMemGeneric/CopyMemgeneric. Then we try the same network again with aligned buffers to make sure it switches
1486 * to importing correctly.
1487 */
1488 using namespace armnn;
1489
1490 IRuntime::CreationOptions options;
1491 IRuntimePtr runtime(IRuntime::Create(options));
1492
1493 // Builds up the structure of the network.
1494 INetworkPtr net(INetwork::Create());
1495 IConnectableLayer* input = net->AddInputLayer(0);
1496
1497 ActivationDescriptor descriptor;
1498 descriptor.m_Function = ActivationFunction::Square;
1499 IConnectableLayer* activationLayer = net->AddActivationLayer(descriptor);
1500
1501 IConnectableLayer* output = net->AddOutputLayer(0);
1502
1503 input->GetOutputSlot(0).Connect(activationLayer->GetInputSlot(0));
1504 activationLayer->GetOutputSlot(0).Connect(output->GetInputSlot(0));
1505 input->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32, 0.0f, 0, true));
1506 activationLayer->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32));
1507
1508 IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec());
1509 INFO("Load Network");
1510 // Load it into the runtime. It should pass.
1511 NetworkId netId;
Teresa Charlindf15c4e2023-02-21 15:16:09 +00001512 std::string errorMessage;
David Monahan16829712022-02-03 17:04:59 +00001513 INetworkProperties networkProperties(false, MemorySource::Undefined, MemorySource::Undefined);
Teresa Charlindf15c4e2023-02-21 15:16:09 +00001514 armnn::Status loadingStatus = runtime->LoadNetwork(netId, std::move(optNet), errorMessage, networkProperties);
1515 CHECK_MESSAGE(loadingStatus == Status::Success, errorMessage);
David Monahan16829712022-02-03 17:04:59 +00001516 INFO("Generate Data");
1517
1518 // This code looks a little funky but the idea is to create a buffer of floats but offset by the size of a char
1519 // this will guarantee that the resultant buffer is misaligned and thus should always be copied.
1520 auto inputMemPtr = std::malloc(4 * sizeof(float) + sizeof(char));
1521 float* misalignedInputPtr = reinterpret_cast<float*>(reinterpret_cast<char*>(inputMemPtr) + 1);
1522
1523 // Check if our pointer is truly misaligned
1524 uintptr_t alignment = GetDataTypeSize(DataType::Float32);
1525 CHECK (reinterpret_cast<uintptr_t>(misalignedInputPtr) % alignment);
Matthew Benthamc92bbd72022-02-10 11:12:34 +00001526 std::vector<float> inputValues
David Monahan16829712022-02-03 17:04:59 +00001527 {
Matthew Benthamc92bbd72022-02-10 11:12:34 +00001528 2.0f, 3.0f, 4.0f, 5.0f
1529 };
1530 std::memcpy(misalignedInputPtr, inputValues.data(), inputValues.size() * sizeof(float));
David Monahan16829712022-02-03 17:04:59 +00001531
1532 auto outputMemPtr = std::malloc(4 * sizeof(float) + sizeof(char));
1533 float* misalignedOutputPtr = reinterpret_cast<float*>(reinterpret_cast<char*>(outputMemPtr) + 1);
1534
1535 // Check if our pointer is truly misaligned
1536 CHECK (reinterpret_cast<uintptr_t>(misalignedOutputPtr) % alignment);
1537
1538 std::vector<float> expectedMisalignedOutput
1539 {
1540 4.0f, 9.0f, 16.0f, 25.0f
1541 };
1542
1543 INFO("Create Second Inference");
1544 InputTensors inputTensorsMisaligned
1545 {
1546 {0,armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), misalignedInputPtr)},
1547 };
1548 OutputTensors outputTensorsMisaligned
1549 {
1550 {0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), misalignedOutputPtr)}
1551 };
1552 runtime->GetProfiler(netId)->EnableProfiling(true);
1553 std::vector<ImportedInputId> importedInputIds =
1554 runtime->ImportInputs(netId, inputTensorsMisaligned, MemorySource::Malloc);
Colm Doneland7ceec52022-07-06 12:09:05 +01001555 // Import should fail.
1556 CHECK(importedInputIds.size() == 0);
David Monahan16829712022-02-03 17:04:59 +00001557 std::vector<ImportedOutputId> importedOutputIds =
1558 runtime->ImportOutputs(netId, outputTensorsMisaligned, MemorySource::Malloc);
Colm Doneland7ceec52022-07-06 12:09:05 +01001559 // Import should fail.
1560 CHECK(importedOutputIds.size() == 0);
David Monahan16829712022-02-03 17:04:59 +00001561
1562 // Do the inference and force the import as the memory is misaligned.
1563 runtime->EnqueueWorkload(netId,
1564 inputTensorsMisaligned,
1565 outputTensorsMisaligned,
1566 importedInputIds,
1567 importedOutputIds);
1568
1569 // Retrieve the Profiler.AnalyzeEventsAndWriteResults() output to get the workload execution
1570 ProfilerManager& profilerManager = armnn::ProfilerManager::GetInstance();
1571 std::stringstream ss;
1572 profilerManager.GetProfiler()->AnalyzeEventsAndWriteResults(ss);
1573 std::string dump = ss.str();
1574
1575 // GpuAcc is a different case to CpuRef and CpuAcc, it doesn't use the buffer directly but instead maps it to a
1576 // new set of addresses within Gpu Memory. This will almost always be auto-aligned, so we don't need to check
1577 // for imports/copies. Only that the output is correct.
1578 if (backends[0] != Compute::GpuAcc)
1579 {
1580 // We can only copy so there should be no SyncMemGeneric
1581 int count = SubStringCounter(dump, "SyncMemGeneric");
1582 CHECK(count == 0);
1583 // Should only be CopyMemGeneric workloads as we copied all buffers
1584 count = SubStringCounter(dump, "CopyMemGeneric");
1585 CHECK(count >= 1);
1586 }
1587 // Check the output is correct
1588 unsigned int index = 0;
Matthew Benthamc92bbd72022-02-10 11:12:34 +00001589 std::vector<float> alignedOutput(expectedMisalignedOutput.size());
1590 std::memcpy(alignedOutput.data(), misalignedOutputPtr, expectedMisalignedOutput.size()*sizeof(float));
David Monahan16829712022-02-03 17:04:59 +00001591 for (auto outputValue : expectedMisalignedOutput)
1592 {
Matthew Benthamc92bbd72022-02-10 11:12:34 +00001593 CHECK(outputValue == alignedOutput[index]);
David Monahan16829712022-02-03 17:04:59 +00001594 ++index;
1595 }
1596 std::free(inputMemPtr);
1597 std::free(outputMemPtr);
1598
1599 // Creates structures for input & output
1600 std::vector<float> inputData
1601 {
1602 1.0f, 2.0f, 3.0f, 4.0f
1603 };
1604 std::vector<float> outputData(4);
1605 std::vector<float> expectedOutput
1606 {
1607 1.0f, 4.0f, 9.0f, 16.0f
1608 };
1609
1610 // Check our input and output pointers are actually aligned
1611 CHECK(!(reinterpret_cast<uintptr_t>(inputData.data()) % alignment));
1612 CHECK(!(reinterpret_cast<uintptr_t>(outputData.data()) % alignment));
1613
1614 INFO("Create Inference");
1615 InputTensors inputTensors
1616 {
1617 {0,armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData.data())},
1618 };
1619 OutputTensors outputTensors
1620 {
1621 {0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data())}
1622 };
1623
1624 importedInputIds = runtime->ImportInputs(netId, inputTensors, MemorySource::Malloc);
Colm Doneland7ceec52022-07-06 12:09:05 +01001625 CHECK(importedInputIds.size() == 1);
David Monahan16829712022-02-03 17:04:59 +00001626 importedOutputIds = runtime->ImportOutputs(netId, outputTensors, MemorySource::Malloc);
Colm Doneland7ceec52022-07-06 12:09:05 +01001627 CHECK(importedOutputIds.size() == 1);
David Monahan16829712022-02-03 17:04:59 +00001628 // Do the inference and force the import as the memory is aligned.
Colm Doneland7ceec52022-07-06 12:09:05 +01001629 runtime->EnqueueWorkload(netId, InputTensors(), OutputTensors(), importedInputIds, importedOutputIds);
David Monahan16829712022-02-03 17:04:59 +00001630
1631 // Retrieve the Profiler.AnalyzeEventsAndWriteResults() output to get the workload execution
1632 // We need to use AnalyzeEventsAndWriteResults here to make sure the second inference has been profiled
1633 profilerManager.GetProfiler()->AnalyzeEventsAndWriteResults(ss);
1634 dump = ss.str();
1635
1636 if (backends[0] == Compute::CpuAcc)
1637 {
1638 // Reconfigure has not been implemented for CpuAcc so it will always copy, this will break whenever
1639 // reconfigure is implemented
1640 int count = SubStringCounter(dump, "SyncMemGeneric");
1641 CHECK(count == 0);
1642 // Should be 2 CopyMemGeneric workloads
1643 count = SubStringCounter(dump, "CopyMemGeneric");
1644 CHECK(count >= 1);
1645 }
1646 else
1647 {
1648 // Repeated inferences make it difficult to check for an accurate count. So we just validate that we have a
1649 // SyncMemGeneric Workload when we previously didn't
1650 int count = SubStringCounter(dump, "SyncMemGeneric");
1651 CHECK(count >= 1);
1652 // Should still be some CopyMemGeneric Workloads from the last inference
1653 count = SubStringCounter(dump, "CopyMemGeneric");
1654 CHECK(count >= 1);
1655 }
1656 // Check the output is correct
1657 CHECK(std::equal(outputData.begin(), outputData.end(), expectedOutput.begin(), expectedOutput.end()));
1658 // Clean up to avoid interfering with other tests
1659 runtime->UnloadNetwork(netId);
1660}
1661
Nattapat Chaimanowong1fcb4ff2019-01-24 15:25:26 +00001662} // anonymous namespace