blob: 39619e642156ca7afa1388ddcba4a6e8cc37affc [file] [log] [blame]
David Monahane4a41dc2021-04-14 16:55:36 +01001//
John Mcloughlinc5ee0d72023-03-24 12:07:25 +00002// Copyright © 2021, 2023 Arm Ltd and Contributors. All rights reserved.
David Monahane4a41dc2021-04-14 16:55:36 +01003// SPDX-License-Identifier: MIT
4//
5
6#include <arm_compute/runtime/CL/functions/CLActivationLayer.h>
7
8#include <cl/ClImportTensorHandle.hpp>
9#include <cl/ClImportTensorHandleFactory.hpp>
10#include <cl/test/ClContextControlFixture.hpp>
11
Sadik Armagan1625efc2021-06-10 18:24:34 +010012#include <doctest/doctest.h>
13
Narumol Prangnawarate5f0b242021-05-07 17:52:36 +010014#include <armnn/IRuntime.hpp>
15#include <armnn/INetwork.hpp>
Cathal Corbetta3f4fba2022-03-21 09:27:08 +000016#include "Network.hpp"
Narumol Prangnawarate5f0b242021-05-07 17:52:36 +010017
David Monahane4a41dc2021-04-14 16:55:36 +010018using namespace armnn;
19
Sadik Armagan1625efc2021-06-10 18:24:34 +010020TEST_SUITE("ClImportTensorHandleTests")
21{
22TEST_CASE_FIXTURE(ClContextControlFixture, "ClMallocImport")
David Monahane4a41dc2021-04-14 16:55:36 +010023{
24 ClImportTensorHandleFactory handleFactory(static_cast<MemorySourceFlags>(MemorySource::Malloc),
25 static_cast<MemorySourceFlags>(MemorySource::Malloc));
26
27 TensorInfo info({ 1, 24, 16, 3 }, DataType::Float32);
28 unsigned int numElements = info.GetNumElements();
29
30 // create TensorHandle for memory import
31 auto handle = handleFactory.CreateTensorHandle(info);
32
33 // Get CLtensor
34 arm_compute::CLTensor& tensor = PolymorphicDowncast<ClImportTensorHandle*>(handle.get())->GetTensor();
35
36 // Create and configure activation function
37 const arm_compute::ActivationLayerInfo act_info(arm_compute::ActivationLayerInfo::ActivationFunction::RELU);
38 arm_compute::CLActivationLayer act_func;
39 act_func.configure(&tensor, nullptr, act_info);
40
41 // Allocate user memory
42 const size_t totalBytes = tensor.info()->total_size();
43 const size_t alignment =
44 arm_compute::CLKernelLibrary::get().get_device().getInfo<CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE>();
Narumol Prangnawarate5f0b242021-05-07 17:52:36 +010045 size_t space = totalBytes + alignment + alignment;
David Monahane4a41dc2021-04-14 16:55:36 +010046 auto testData = std::make_unique<uint8_t[]>(space);
47 void* alignedPtr = testData.get();
Sadik Armagan1625efc2021-06-10 18:24:34 +010048 CHECK(std::align(alignment, totalBytes, alignedPtr, space));
David Monahane4a41dc2021-04-14 16:55:36 +010049
50 // Import memory
Sadik Armagan1625efc2021-06-10 18:24:34 +010051 CHECK(handle->Import(alignedPtr, armnn::MemorySource::Malloc));
David Monahane4a41dc2021-04-14 16:55:36 +010052
53 // Input with negative values
54 auto* typedPtr = reinterpret_cast<float*>(alignedPtr);
55 std::fill_n(typedPtr, numElements, -5.0f);
56
57 // Execute function and sync
58 act_func.run();
59 arm_compute::CLScheduler::get().sync();
60
61 // Validate result by checking that the output has no negative values
62 for(unsigned int i = 0; i < numElements; ++i)
63 {
Jan Eilersc1c872f2021-07-22 13:17:04 +010064 CHECK(typedPtr[i] == 0);
David Monahane4a41dc2021-04-14 16:55:36 +010065 }
66}
67
Sadik Armagan1625efc2021-06-10 18:24:34 +010068TEST_CASE_FIXTURE(ClContextControlFixture, "ClIncorrectMemorySourceImport")
David Monahane4a41dc2021-04-14 16:55:36 +010069{
70 ClImportTensorHandleFactory handleFactory(static_cast<MemorySourceFlags>(MemorySource::Malloc),
71 static_cast<MemorySourceFlags>(MemorySource::Malloc));
72
73 TensorInfo info({ 1, 24, 16, 3 }, DataType::Float32);
74
75 // create TensorHandle for memory import
76 auto handle = handleFactory.CreateTensorHandle(info);
77
78 // Get CLtensor
79 arm_compute::CLTensor& tensor = PolymorphicDowncast<ClImportTensorHandle*>(handle.get())->GetTensor();
80
81 // Allocate user memory
82 const size_t totalBytes = tensor.info()->total_size();
83 const size_t alignment =
84 arm_compute::CLKernelLibrary::get().get_device().getInfo<CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE>();
Narumol Prangnawarate5f0b242021-05-07 17:52:36 +010085 size_t space = totalBytes + alignment + alignment;
David Monahane4a41dc2021-04-14 16:55:36 +010086 auto testData = std::make_unique<uint8_t[]>(space);
87 void* alignedPtr = testData.get();
Sadik Armagan1625efc2021-06-10 18:24:34 +010088 CHECK(std::align(alignment, totalBytes, alignedPtr, space));
David Monahane4a41dc2021-04-14 16:55:36 +010089
90 // Import memory
Sadik Armagan1625efc2021-06-10 18:24:34 +010091 CHECK_THROWS_AS(handle->Import(alignedPtr, armnn::MemorySource::Undefined), MemoryImportException);
David Monahane4a41dc2021-04-14 16:55:36 +010092}
93
Sadik Armagan1625efc2021-06-10 18:24:34 +010094TEST_CASE_FIXTURE(ClContextControlFixture, "ClInvalidMemorySourceImport")
David Monahane4a41dc2021-04-14 16:55:36 +010095{
96 MemorySource invalidMemSource = static_cast<MemorySource>(256);
97 ClImportTensorHandleFactory handleFactory(static_cast<MemorySourceFlags>(invalidMemSource),
98 static_cast<MemorySourceFlags>(invalidMemSource));
99
100 TensorInfo info({ 1, 2, 2, 1 }, DataType::Float32);
101
102 // create TensorHandle for memory import
103 auto handle = handleFactory.CreateTensorHandle(info);
104
105 // Allocate user memory
106 std::vector<float> inputData
107 {
108 1.0f, 2.0f, 3.0f, 4.0f
109 };
110
111 // Import non-support memory
Sadik Armagan1625efc2021-06-10 18:24:34 +0100112 CHECK_THROWS_AS(handle->Import(inputData.data(), invalidMemSource), MemoryImportException);
David Monahane4a41dc2021-04-14 16:55:36 +0100113}
114
Sadik Armagan1625efc2021-06-10 18:24:34 +0100115TEST_CASE_FIXTURE(ClContextControlFixture, "ClImportEndToEnd")
Narumol Prangnawarate5f0b242021-05-07 17:52:36 +0100116{
117 // Create runtime in which test will run
118 IRuntime::CreationOptions options;
119 IRuntimePtr runtime(armnn::IRuntime::Create(options));
120
121 // build up the structure of the network
122 INetworkPtr net(INetwork::Create());
123
124 IConnectableLayer* input = net->AddInputLayer(0, "Input");
125
126 ActivationDescriptor descriptor;
127 descriptor.m_Function = ActivationFunction::ReLu;
128 IConnectableLayer* activation = net->AddActivationLayer(descriptor, "Activation");
129
130 IConnectableLayer* output = net->AddOutputLayer(0, "Output");
131
132 input->GetOutputSlot(0).Connect(activation->GetInputSlot(0));
133 activation->GetOutputSlot(0).Connect(output->GetInputSlot(0));
134
135 TensorInfo tensorInfo = TensorInfo({ 1, 24, 16, 3 }, DataType::Float32);
136 unsigned int numElements = tensorInfo.GetNumElements();
137 size_t totalBytes = numElements * sizeof(float);
138
139 input->GetOutputSlot(0).SetTensorInfo(tensorInfo);
140 activation->GetOutputSlot(0).SetTensorInfo(tensorInfo);
141
142 // Optimize the network
John Mcloughlinc5ee0d72023-03-24 12:07:25 +0000143 OptimizerOptionsOpaque optOptions;
144 optOptions.SetImportEnabled(true);
145 optOptions.SetExportEnabled(true);
Narumol Prangnawarate5f0b242021-05-07 17:52:36 +0100146 std::vector<armnn::BackendId> backends = {armnn::Compute::GpuAcc};
147 IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec(), optOptions);
Sadik Armagan1625efc2021-06-10 18:24:34 +0100148 CHECK(optNet);
Narumol Prangnawarate5f0b242021-05-07 17:52:36 +0100149
150 // Loads it into the runtime.
151 NetworkId netId;
152 std::string ignoredErrorMessage;
153 // Enable Importing
154 INetworkProperties networkProperties(false, MemorySource::Malloc, MemorySource::Malloc);
155 runtime->LoadNetwork(netId, std::move(optNet), ignoredErrorMessage, networkProperties);
156
157 // Creates structures for input & output
158 const size_t alignment =
159 arm_compute::CLKernelLibrary::get().get_device().getInfo<CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE>();
160 size_t space = totalBytes + alignment + alignment;
161 auto inputData = std::make_unique<uint8_t[]>(space);
162 void* alignedInputPtr = inputData.get();
Sadik Armagan1625efc2021-06-10 18:24:34 +0100163 CHECK(std::align(alignment, totalBytes, alignedInputPtr, space));
Narumol Prangnawarate5f0b242021-05-07 17:52:36 +0100164
165 // Input with negative values
166 auto* intputPtr = reinterpret_cast<float*>(alignedInputPtr);
167 std::fill_n(intputPtr, numElements, -5.0f);
168
169 auto outputData = std::make_unique<uint8_t[]>(space);
170 void* alignedOutputPtr = outputData.get();
Sadik Armagan1625efc2021-06-10 18:24:34 +0100171 CHECK(std::align(alignment, totalBytes, alignedOutputPtr, space));
Narumol Prangnawarat878e0f92021-05-11 19:51:14 +0100172 auto* outputPtr = reinterpret_cast<float*>(alignedOutputPtr);
173 std::fill_n(outputPtr, numElements, -10.0f);
Narumol Prangnawarate5f0b242021-05-07 17:52:36 +0100174
Cathal Corbett5b8093c2021-10-22 11:12:07 +0100175 TensorInfo inputTensorInfo = runtime->GetInputTensorInfo(netId, 0);
176 inputTensorInfo.SetConstant(true);
Narumol Prangnawarate5f0b242021-05-07 17:52:36 +0100177 InputTensors inputTensors
178 {
Cathal Corbett5b8093c2021-10-22 11:12:07 +0100179 {0,armnn::ConstTensor(inputTensorInfo, alignedInputPtr)},
Narumol Prangnawarate5f0b242021-05-07 17:52:36 +0100180 };
181 OutputTensors outputTensors
182 {
183 {0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), alignedOutputPtr)}
184 };
185
186 runtime->GetProfiler(netId)->EnableProfiling(true);
187
188 // Do the inference
189 runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
190
191 // Retrieve the Profiler.Print() output to get the workload execution
192 ProfilerManager& profilerManager = armnn::ProfilerManager::GetInstance();
193 std::stringstream ss;
194 profilerManager.GetProfiler()->Print(ss);;
195 std::string dump = ss.str();
196
197 // Contains ActivationWorkload
198 std::size_t found = dump.find("ActivationWorkload");
Sadik Armagan1625efc2021-06-10 18:24:34 +0100199 CHECK(found != std::string::npos);
Narumol Prangnawarate5f0b242021-05-07 17:52:36 +0100200
201 // Contains SyncMemGeneric
202 found = dump.find("SyncMemGeneric");
Sadik Armagan1625efc2021-06-10 18:24:34 +0100203 CHECK(found != std::string::npos);
Narumol Prangnawarate5f0b242021-05-07 17:52:36 +0100204
205 // Does not contain CopyMemGeneric
206 found = dump.find("CopyMemGeneric");
Sadik Armagan1625efc2021-06-10 18:24:34 +0100207 CHECK(found == std::string::npos);
Narumol Prangnawarate5f0b242021-05-07 17:52:36 +0100208
Narumol Prangnawarat878e0f92021-05-11 19:51:14 +0100209 runtime->UnloadNetwork(netId);
210
Narumol Prangnawarate5f0b242021-05-07 17:52:36 +0100211 // Check output is as expected
212 // Validate result by checking that the output has no negative values
213 auto* outputResult = reinterpret_cast<float*>(alignedOutputPtr);
Sadik Armagan1625efc2021-06-10 18:24:34 +0100214 CHECK(outputResult);
Narumol Prangnawarate5f0b242021-05-07 17:52:36 +0100215 for(unsigned int i = 0; i < numElements; ++i)
216 {
Sadik Armagan1625efc2021-06-10 18:24:34 +0100217 CHECK(outputResult[i] >= 0);
Narumol Prangnawarate5f0b242021-05-07 17:52:36 +0100218 }
Narumol Prangnawarate5f0b242021-05-07 17:52:36 +0100219}
220
Nikhil Raj60ab9762022-01-13 09:34:44 +0000221TEST_CASE_FIXTURE(ClContextControlFixture, "ClCanBeImported")
222{
223 ClImportTensorHandleFactory handleFactory(static_cast<MemorySourceFlags>(MemorySource::Malloc),
224 static_cast<MemorySourceFlags>(MemorySource::Malloc));
225
226 TensorInfo info({ 1, 24, 16, 3 }, DataType::Float32);
227
228 // create TensorHandle for memory import
David Monahan3826ab62022-02-21 12:26:16 +0000229 auto handle = handleFactory.CreateTensorHandle(info, DataLayout::NHWC);
Nikhil Raj60ab9762022-01-13 09:34:44 +0000230
231 // Get CLtensor
232 arm_compute::CLTensor& tensor = PolymorphicDowncast<ClImportTensorHandle*>(handle.get())->GetTensor();
233
234 // Allocate user memory
235 const size_t totalBytes = tensor.info()->total_size();
236 const size_t alignment =
237 arm_compute::CLKernelLibrary::get().get_device().getInfo<CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE>();
238 size_t space = totalBytes + alignment + alignment;
239 auto testData = std::make_unique<uint8_t[]>(space);
240 void* alignedPtr = testData.get();
241 CHECK(std::align(alignment, totalBytes, alignedPtr, space));
242
243 // Import memory
244 CHECK_THROWS_AS(handle->CanBeImported(alignedPtr, armnn::MemorySource::Undefined), MemoryImportException);
245
246}
247
248TEST_CASE("ClCanBeImportedAlignedMemory")
249{
250 ClImportTensorHandleFactory handleFactory(static_cast<MemorySourceFlags>(MemorySource::Malloc),
251 static_cast<MemorySourceFlags>(MemorySource::Malloc));
252
253 TensorInfo info({ 1, 1, 1, 1 }, DataType::Float32);
254
255 // create TensorHandle (Memory Managed status is irrelevant)
David Monahan3826ab62022-02-21 12:26:16 +0000256 auto handle = handleFactory.CreateTensorHandle(info, DataLayout::NHWC);
Nikhil Raj60ab9762022-01-13 09:34:44 +0000257 // Get CLtensor
258 arm_compute::CLTensor& tensor = PolymorphicDowncast<ClImportTensorHandle*>(handle.get())->GetTensor();
259
260 // Create an aligned buffer
261 const size_t totalBytes = tensor.info()->total_size();
262 const size_t alignment =
263 arm_compute::CLKernelLibrary::get().get_device().getInfo<CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE>();
264 size_t space = totalBytes + alignment + alignment;
265 auto testData = std::make_unique<uint8_t[]>(space);
266 void* alignedPtr = testData.get();
267 CHECK(std::align(alignment, totalBytes, alignedPtr, space));
268
269 // Check aligned buffers return true
270 CHECK(handle->CanBeImported(alignedPtr, MemorySource::Malloc) == true);
271
272 // Due to the nature of how GPU memory is mapped it is entirely possible for memory which is misaligned on cpu
273 // to be successfully import on GPU. As such there is no way to create a misaligned pointer that will always fail.
274 // Rather it will succeed on some devices and fail on others. As long as a correctly aligned buffer returns true
275 // we can be confident that it will be successfully imported. All other cases will need to be handled by the user.
276}
277
Narumol Prangnawarate2af6f42022-01-28 17:59:18 +0000278TEST_CASE_FIXTURE(ClContextControlFixture, "ClForceImportConv2dEndToEnd")
279{
280 // Create runtime in which test will run
281 IRuntime::CreationOptions options;
282 IRuntimePtr runtime(armnn::IRuntime::Create(options));
283
284 // build up the structure of the network
285 INetworkPtr network(INetwork::Create());
286
287 armnn::TensorInfo inputInfo({ 1, 3, 4, 1 }, DataType::Float32);
288 armnn::TensorInfo kernelInfo({ 1, 3, 3, 1 }, DataType::Float32);
289 armnn::TensorInfo outputInfo({ 1, 3, 4, 1 }, DataType::Float32);
290
291 kernelInfo.SetConstant(true);
292
293 std::vector<float> kernel =
294 {
295 4, 5, 6,
296 0, 0, 0,
297 3, 2, 1
298 };
299
300 const std::vector<float> expectedOutput =
301 {
302 23, 41, 33, 21,
303 44, 65, 76, 52,
304 82, 85, 79, 42
305 };
306
307 unsigned int numElements = inputInfo.GetNumElements();
308 size_t totalBytes = numElements * sizeof(float);
309
310 IConnectableLayer* const inputLayer = network->AddInputLayer(0, "input");
311 ARMNN_ASSERT(inputLayer);
312
313 armnn::ConstTensor weights(kernelInfo, kernel);
314
315 armnn::Convolution2dDescriptor convDesc2d;
316 convDesc2d.m_StrideX = 1;
317 convDesc2d.m_StrideY = 1;
318 convDesc2d.m_PadLeft = 1;
319 convDesc2d.m_PadRight = 1;
320 convDesc2d.m_PadTop = 1;
321 convDesc2d.m_PadBottom = 1;
322 convDesc2d.m_DataLayout = DataLayout::NHWC;
Keith Davisb4dd5cc2022-04-07 11:32:00 +0100323
Keith Davis721e6292022-05-17 10:06:53 +0100324 armnn::IConnectableLayer* const convLayer = network->AddConvolution2dLayer(convDesc2d, "conv");
325 armnn::IConnectableLayer* weightsLayer = network->AddConstantLayer(weights);
326
Narumol Prangnawarate2af6f42022-01-28 17:59:18 +0000327 ARMNN_ASSERT(convLayer);
328
Keith Davis721e6292022-05-17 10:06:53 +0100329 weightsLayer->GetOutputSlot(0).SetTensorInfo(weights.GetInfo());
330 weightsLayer->GetOutputSlot(0).Connect(convLayer->GetInputSlot(1u));
331
Narumol Prangnawarate2af6f42022-01-28 17:59:18 +0000332 inputLayer->GetOutputSlot(0).Connect(convLayer->GetInputSlot(0));
333 inputLayer->GetOutputSlot(0).SetTensorInfo(inputInfo);
334
335 IConnectableLayer* output = network->AddOutputLayer(0, "output");
336 convLayer->GetOutputSlot(0).Connect(output->GetInputSlot(0));
337 convLayer->GetOutputSlot(0).SetTensorInfo(outputInfo);
338
339 // Optimize the network
John Mcloughlinc5ee0d72023-03-24 12:07:25 +0000340 OptimizerOptionsOpaque optOptions;
341 optOptions.SetImportEnabled(false);
342 optOptions.SetExportEnabled(false);
Narumol Prangnawarate2af6f42022-01-28 17:59:18 +0000343 std::vector<armnn::BackendId> backends = {armnn::Compute::GpuAcc};
344 IOptimizedNetworkPtr optNet = Optimize(*network, backends, runtime->GetDeviceSpec(), optOptions);
345 CHECK(optNet);
346
347 // Loads it into the runtime.
348 NetworkId netId;
349 std::string ignoredErrorMessage;
350 // Enable Importing
351 INetworkProperties networkProperties(false, MemorySource::Undefined, MemorySource::Undefined);
352 runtime->LoadNetwork(netId, std::move(optNet), ignoredErrorMessage, networkProperties);
353
354 // Creates structures for input & output
355 const size_t alignment =
356 arm_compute::CLKernelLibrary::get().get_device().getInfo<CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE>();
357 size_t space = totalBytes + alignment + alignment;
358 auto inputData = std::make_unique<uint8_t[]>(space);
359 void* alignedInputPtr = inputData.get();
360 CHECK(std::align(alignment, totalBytes, alignedInputPtr, space));
361
362 // Input with negative values
363 auto* inputPtr = reinterpret_cast<float*>(alignedInputPtr);
364 inputPtr[0] = 1;
365 inputPtr[1] = 5;
366 inputPtr[2] = 2;
367 inputPtr[3] = 3;
368 inputPtr[4] = 8;
369 inputPtr[5] = 7;
370 inputPtr[6] = 3;
371 inputPtr[7] = 6;
372 inputPtr[8] = 3;
373 inputPtr[9] = 3;
374 inputPtr[10] = 9;
375 inputPtr[11] = 1;
376
377
378 auto outputData = std::make_unique<uint8_t[]>(space);
379 void* alignedOutputPtr = outputData.get();
380 CHECK(std::align(alignment, totalBytes, alignedOutputPtr, space));
381 auto* outputPtr = reinterpret_cast<float*>(alignedOutputPtr);
382 std::fill_n(outputPtr, numElements, -10.0f);
383
384 TensorInfo inputTensorInfo = runtime->GetInputTensorInfo(netId, 0);
385 inputTensorInfo.SetConstant(true);
386 InputTensors inputTensors
387 {
388 {0,armnn::ConstTensor(inputTensorInfo, alignedInputPtr)},
389 };
390 OutputTensors outputTensors
391 {
392 {0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), alignedOutputPtr)}
393 };
394
395 runtime->GetProfiler(netId)->EnableProfiling(true);
396
397 INFO("Run ImportInputs");
398 std::vector<ImportedInputId> importedInputIds =
399 runtime->ImportInputs(netId, inputTensors, MemorySource::Malloc);
Colm Doneland7ceec52022-07-06 12:09:05 +0100400 // We expect the import to have succeeded.
401 CHECK(importedInputIds.size() == 1);
Narumol Prangnawarate2af6f42022-01-28 17:59:18 +0000402 std::vector<ImportedOutputId> importedOutputIds =
403 runtime->ImportOutputs(netId, outputTensors, MemorySource::Malloc);
Colm Doneland7ceec52022-07-06 12:09:05 +0100404 // We expect the import to have succeeded.
405 CHECK(importedOutputIds.size() == 1);
Narumol Prangnawarate2af6f42022-01-28 17:59:18 +0000406 // Do the inference
Colm Doneland7ceec52022-07-06 12:09:05 +0100407 runtime->EnqueueWorkload(netId, InputTensors(), OutputTensors(), importedInputIds, importedOutputIds);
Narumol Prangnawarate2af6f42022-01-28 17:59:18 +0000408
409 // Retrieve the Profiler.Print() output to get the workload execution
410 ProfilerManager& profilerManager = armnn::ProfilerManager::GetInstance();
411 std::stringstream ss;
412 profilerManager.GetProfiler()->Print(ss);;
413 std::string dump = ss.str();
414
415 // Contains Convolution2dWorkload
416 std::size_t found = dump.find("Convolution2dWorkload");
417 CHECK(found != std::string::npos);
418
419 // Contains SyncMemGeneric
420 found = dump.find("SyncMemGeneric");
421 CHECK(found != std::string::npos);
422
423 // Does not contain CopyMemGeneric
424 found = dump.find("CopyMemGeneric");
425 CHECK(found == std::string::npos);
426
427 runtime->UnloadNetwork(netId);
428
429 // Check output is as expected
430 // Validate result by checking that the output has no negative values
431 auto* outputResult = reinterpret_cast<float*>(alignedOutputPtr);
432 CHECK(outputResult);
433
434 // Check the output is correct
435 CHECK(std::equal(outputResult, outputResult + numElements, expectedOutput.begin(), expectedOutput.end()));
436}
437
Cathal Corbetta3f4fba2022-03-21 09:27:08 +0000438TEST_CASE_FIXTURE(ClContextControlFixture, "ClForceImportConvertFp16toFp32EndToEnd")
439{
440 using namespace half_float::literal;
441
442 // Create runtime in which test will run
443 IRuntime::CreationOptions options;
444 IRuntimePtr runtime(armnn::IRuntime::Create(options));
445
446 // build up the structure of the network
447 NetworkImpl network;
448
449 armnn::TensorInfo inputInfo({1, 3, 2, 3}, armnn::DataType::Float16);
450 armnn::TensorInfo outputTensorInfo({1, 3, 2, 3}, armnn::DataType::Float32);
451
452 std::vector<float> expectedOutput =
453 {
454 -37.5f, -15.2f, -8.76f, -2.0f, -1.5f, -1.3f, -0.5f, -0.4f, 0.0f,
455 1.0f, 0.4f, 0.5f, 1.3f, 1.5f, 2.0f, 8.76f, 15.2f, 37.5f
456 };
457
458 unsigned int numElements = inputInfo.GetNumElements();
459 size_t totalBytesInput = numElements * sizeof(Half);
460 size_t totalBytesOutput = numElements * sizeof(float);
461
462 IConnectableLayer* const inputLayer = network.AddInputLayer(0, "input");
463 ARMNN_ASSERT(inputLayer);
464
465 armnn::IConnectableLayer* const convLayer = network.AddConvertFp16ToFp32Layer("convert");
466 ARMNN_ASSERT(convLayer);
467
468 inputLayer->GetOutputSlot(0).Connect(convLayer->GetInputSlot(0));
469 inputLayer->GetOutputSlot(0).SetTensorInfo(inputInfo);
470
471 IConnectableLayer* output = network.AddOutputLayer(0, "output");
472 convLayer->GetOutputSlot(0).Connect(output->GetInputSlot(0));
473 convLayer->GetOutputSlot(0).SetTensorInfo(outputTensorInfo);
474
475 // Optimize the network
John Mcloughlinc5ee0d72023-03-24 12:07:25 +0000476 OptimizerOptionsOpaque optOptions;
477 optOptions.SetImportEnabled(false);
478 optOptions.SetExportEnabled(false);
Cathal Corbetta3f4fba2022-03-21 09:27:08 +0000479 std::vector<armnn::BackendId> backends = {armnn::Compute::GpuAcc};
480 IOptimizedNetworkPtr optNet = Optimize(network.GetGraph(), backends, runtime->GetDeviceSpec(), optOptions);
481 CHECK(optNet);
482
483 // Loads it into the runtime.
484 NetworkId netId;
485 std::string ignoredErrorMessage;
486 // Enable Importing
487 INetworkProperties networkProperties(false, MemorySource::Undefined, MemorySource::Undefined);
488 runtime->LoadNetwork(netId, std::move(optNet), ignoredErrorMessage, networkProperties);
489
490 // Creates structures for input & output
491 const size_t alignment =
492 arm_compute::CLKernelLibrary::get().get_device().getInfo<CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE>();
493 size_t spaceInput = totalBytesInput + alignment + alignment;
494 size_t spaceOutput = totalBytesOutput + alignment + alignment;
495 auto inputData = std::make_unique<uint8_t[]>(spaceInput);
496 void* alignedInputPtr = inputData.get();
497 CHECK(std::align(alignment, totalBytesInput, alignedInputPtr, spaceInput));
498
499 // Input with negative values
500 auto* inputPtr = reinterpret_cast<Half*>(alignedInputPtr);
501 inputPtr[0] = -37.5_h;
502 inputPtr[1] = -15.2_h;
503 inputPtr[2] = -8.76_h;
504 inputPtr[3] = -2.0_h;
505 inputPtr[4] = -1.5_h;
506 inputPtr[5] = -1.3_h;
507 inputPtr[6] = -0.5_h;
508 inputPtr[7] = -0.4_h;
509 inputPtr[8] = 0.0_h;
510 inputPtr[9] = 1.0_h;
511 inputPtr[10] = 0.4_h;
512 inputPtr[11] = 0.5_h;
513 inputPtr[12] = 1.3_h;
514 inputPtr[13] = 1.5_h;
515 inputPtr[14] = 2.0_h;
516 inputPtr[15] = 8.76_h;
517 inputPtr[16] = 15.2_h;
518 inputPtr[17] = 37.5_h;
519
520 auto outputData = std::make_unique<uint8_t[]>(spaceOutput);
521 void* alignedOutputPtr = outputData.get();
522 CHECK(std::align(alignment, totalBytesOutput, alignedOutputPtr, spaceOutput));
523 auto* outputPtr = reinterpret_cast<float*>(alignedOutputPtr);
524 std::fill_n(outputPtr, numElements, -10.0f);
525
526 TensorInfo inputTensorInfo = runtime->GetInputTensorInfo(netId, 0);
527 inputTensorInfo.SetConstant(true);
528 InputTensors inputTensors
529 {
530 {0,armnn::ConstTensor(inputTensorInfo, alignedInputPtr)},
531 };
532 OutputTensors outputTensors
533 {
534 {0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), alignedOutputPtr)}
535 };
536
537 runtime->GetProfiler(netId)->EnableProfiling(true);
538
539 INFO("Run ImportInputs");
540 std::vector<ImportedInputId> importedInputIds =
541 runtime->ImportInputs(netId, inputTensors, MemorySource::Malloc);
Colm Doneland7ceec52022-07-06 12:09:05 +0100542 // We expect the import to have succeeded.
543 CHECK(importedInputIds.size() == 1);
Cathal Corbetta3f4fba2022-03-21 09:27:08 +0000544 std::vector<ImportedOutputId> importedOutputIds =
545 runtime->ImportOutputs(netId, outputTensors, MemorySource::Malloc);
Colm Doneland7ceec52022-07-06 12:09:05 +0100546 // We expect the import to have succeeded.
547 CHECK(importedOutputIds.size() == 1);
Cathal Corbetta3f4fba2022-03-21 09:27:08 +0000548
549 // Do the inference
Colm Doneland7ceec52022-07-06 12:09:05 +0100550 runtime->EnqueueWorkload(netId, InputTensors(), OutputTensors(), importedInputIds, importedOutputIds);
Cathal Corbetta3f4fba2022-03-21 09:27:08 +0000551
552 // Retrieve the Profiler.Print() output to get the workload execution
553 ProfilerManager& profilerManager = armnn::ProfilerManager::GetInstance();
554 std::stringstream ss;
555 profilerManager.GetProfiler()->Print(ss);;
556 std::string dump = ss.str();
557
558 // Contains Convolution2dWorkload
559 std::size_t found = dump.find("ConvertFp16ToFp32Workload");
560 CHECK(found != std::string::npos);
561
562 // Contains SyncMemGeneric
563 found = dump.find("SyncMemGeneric");
564 CHECK(found != std::string::npos);
565
566 // Does not contain CopyMemGeneric
567 found = dump.find("CopyMemGeneric");
568 CHECK(found == std::string::npos);
569
570 runtime->UnloadNetwork(netId);
571
572 // Check output is as expected
573 // Validate result by checking that the output has no negative values
574 auto* outputResult = reinterpret_cast<float*>(alignedOutputPtr);
575 CHECK(outputResult);
576
577 // Check the output is correct
578 for (size_t i = 0; i < numElements; ++i)
579 {
580 DOCTEST_CHECK_MESSAGE(outputResult[i] == doctest::Approx(expectedOutput[i]).epsilon(0.0004),
581 "outputValue[" << i << "]: " << outputResult[i] << " != " << expectedOutput[i]);
582 }
583}
584
585
586TEST_CASE_FIXTURE(ClContextControlFixture, "ClForceImportConvertFp32toFp16EndToEnd")
587{
588 using namespace half_float::literal;
589
590 // Create runtime in which test will run
591 IRuntime::CreationOptions options;
592 IRuntimePtr runtime(armnn::IRuntime::Create(options));
593
594 // build up the structure of the network
595 NetworkImpl network;
596
597 armnn::TensorInfo inputInfo({1, 3, 2, 3}, armnn::DataType::Float32);
598 armnn::TensorInfo outputTensorInfo({1, 3, 2, 3}, armnn::DataType::Float16);
599
600 std::vector<Half> expectedOutput =
601 {
602 -37.5_h, -15.2_h, -8.76_h, -2.0_h, -1.5_h, -1.3_h, -0.5_h, -0.4_h, 0.0_h,
603 1.0_h, 0.4_h, 0.5_h, 1.3_h, 1.5_h, 2.0_h, 8.76_h, 15.2_h, 37.5_h
604 };
605
606 unsigned int numElements = inputInfo.GetNumElements();
607 size_t totalBytesInput = numElements * sizeof(float);
608 size_t totalBytesOutput = numElements * sizeof(Half);
609
610 IConnectableLayer* const inputLayer = network.AddInputLayer(0, "input");
611 ARMNN_ASSERT(inputLayer);
612
613 armnn::IConnectableLayer* const convLayer = network.AddConvertFp32ToFp16Layer("convert");
614 ARMNN_ASSERT(convLayer);
615
616 inputLayer->GetOutputSlot(0).Connect(convLayer->GetInputSlot(0));
617 inputLayer->GetOutputSlot(0).SetTensorInfo(inputInfo);
618
619 IConnectableLayer* output = network.AddOutputLayer(0, "output");
620 convLayer->GetOutputSlot(0).Connect(output->GetInputSlot(0));
621 convLayer->GetOutputSlot(0).SetTensorInfo(outputTensorInfo);
622
623 // Optimize the network
John Mcloughlinc5ee0d72023-03-24 12:07:25 +0000624 OptimizerOptionsOpaque optOptions;
625 optOptions.SetImportEnabled(false);
626 optOptions.SetExportEnabled(false);
Cathal Corbetta3f4fba2022-03-21 09:27:08 +0000627 std::vector<armnn::BackendId> backends = {armnn::Compute::GpuAcc};
628 IOptimizedNetworkPtr optNet = Optimize(network.GetGraph(), backends, runtime->GetDeviceSpec(), optOptions);
629 CHECK(optNet);
630
631 // Loads it into the runtime.
632 NetworkId netId;
633 std::string ignoredErrorMessage;
634 // Enable Importing
635 INetworkProperties networkProperties(false, MemorySource::Undefined, MemorySource::Undefined);
636 runtime->LoadNetwork(netId, std::move(optNet), ignoredErrorMessage, networkProperties);
637
638 // Creates structures for input & output
639 const size_t alignment =
640 arm_compute::CLKernelLibrary::get().get_device().getInfo<CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE>();
641 size_t spaceInput = totalBytesInput + alignment + alignment;
642 size_t spaceOutput = totalBytesOutput + alignment + alignment;
643 auto inputData = std::make_unique<uint8_t[]>(spaceInput);
644 void* alignedInputPtr = inputData.get();
645 CHECK(std::align(alignment, totalBytesInput, alignedInputPtr, spaceInput));
646
647 // Input with negative values
648 auto* inputPtr = reinterpret_cast<float*>(alignedInputPtr);
649 inputPtr[0] = -37.5f;
650 inputPtr[1] = -15.2f;
651 inputPtr[2] = -8.76f;
652 inputPtr[3] = -2.0f;
653 inputPtr[4] = -1.5f;
654 inputPtr[5] = -1.3f;
655 inputPtr[6] = -0.5f;
656 inputPtr[7] = -0.4f;
657 inputPtr[8] = 0.0f;
658 inputPtr[9] = 1.0f;
659 inputPtr[10] = 0.4f;
660 inputPtr[11] = 0.5f;
661 inputPtr[12] = 1.3f;
662 inputPtr[13] = 1.5f;
663 inputPtr[14] = 2.0f;
664 inputPtr[15] = 8.76f;
665 inputPtr[16] = 15.2f;
666 inputPtr[17] = 37.5f;
667
668 auto outputData = std::make_unique<uint8_t[]>(spaceOutput);
669 void* alignedOutputPtr = outputData.get();
670 CHECK(std::align(alignment, totalBytesOutput, alignedOutputPtr, spaceOutput));
671 auto* outputPtr = reinterpret_cast<Half*>(alignedOutputPtr);
672 std::fill_n(outputPtr, numElements, -10.0f);
673
674 TensorInfo inputTensorInfo = runtime->GetInputTensorInfo(netId, 0);
675 inputTensorInfo.SetConstant(true);
676 InputTensors inputTensors
677 {
678 {0,armnn::ConstTensor(inputTensorInfo, alignedInputPtr)},
679 };
680 OutputTensors outputTensors
681 {
682 {0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), alignedOutputPtr)}
683 };
684
685 runtime->GetProfiler(netId)->EnableProfiling(true);
686
687 INFO("Run ImportInputs");
688 std::vector<ImportedInputId> importedInputIds =
689 runtime->ImportInputs(netId, inputTensors, MemorySource::Malloc);
Colm Doneland7ceec52022-07-06 12:09:05 +0100690 // We expect the import to have succeeded.
691 CHECK(importedInputIds.size() == 1);
Cathal Corbetta3f4fba2022-03-21 09:27:08 +0000692 std::vector<ImportedOutputId> importedOutputIds =
693 runtime->ImportOutputs(netId, outputTensors, MemorySource::Malloc);
Colm Doneland7ceec52022-07-06 12:09:05 +0100694 // We expect the import to have succeeded.
695 CHECK(importedOutputIds.size() == 1);
Cathal Corbetta3f4fba2022-03-21 09:27:08 +0000696
697 // Do the inference
Colm Doneland7ceec52022-07-06 12:09:05 +0100698 runtime->EnqueueWorkload(netId, InputTensors(), OutputTensors(), importedInputIds, importedOutputIds);
Cathal Corbetta3f4fba2022-03-21 09:27:08 +0000699
700 // Retrieve the Profiler.Print() output to get the workload execution
701 ProfilerManager& profilerManager = armnn::ProfilerManager::GetInstance();
702 std::stringstream ss;
703 profilerManager.GetProfiler()->Print(ss);;
704 std::string dump = ss.str();
705
706 // Contains Convolution2dWorkload
707 std::size_t found = dump.find("ConvertFp32ToFp16Workload");
708 CHECK(found != std::string::npos);
709
710 // Contains SyncMemGeneric
711 found = dump.find("SyncMemGeneric");
712 CHECK(found != std::string::npos);
713
714 // Does not contain CopyMemGeneric
715 found = dump.find("CopyMemGeneric");
716 CHECK(found == std::string::npos);
717
718 runtime->UnloadNetwork(netId);
719
720 // Check output is as expected
721 // Validate result by checking that the output has no negative values
722 auto* outputResult = reinterpret_cast<Half*>(alignedOutputPtr);
723 CHECK(outputResult);
724
725 // Check the output is correct
726 CHECK(std::equal(outputResult, outputResult + numElements, expectedOutput.begin(), expectedOutput.end()));
727}
728
729TEST_CASE_FIXTURE(ClContextControlFixture, "ClForceImportSimpleConvertFp32toFp16EndToEnd")
730{
731 using namespace half_float::literal;
732
733 // Create runtime in which test will run
734 IRuntime::CreationOptions options;
735 IRuntimePtr runtime(armnn::IRuntime::Create(options));
736
737 // build up the structure of the network
738 NetworkImpl network;
739
740 armnn::TensorInfo inputInfo({1}, armnn::DataType::Float32);
741 armnn::TensorInfo outputTensorInfo({1}, armnn::DataType::Float16);
742
743 std::vector<Half> expectedOutput = { 1.0_h };
744
745 unsigned int numElements = inputInfo.GetNumElements();
746 size_t totalBytesInput = numElements * sizeof(float);
747 size_t totalBytesOutput = numElements * sizeof(Half);
748
749 IConnectableLayer* const inputLayer = network.AddInputLayer(0, "input");
750 ARMNN_ASSERT(inputLayer);
751
752 armnn::IConnectableLayer* const convLayer = network.AddConvertFp32ToFp16Layer("convert");
753 ARMNN_ASSERT(convLayer);
754
755 inputLayer->GetOutputSlot(0).Connect(convLayer->GetInputSlot(0));
756 inputLayer->GetOutputSlot(0).SetTensorInfo(inputInfo);
757
758 IConnectableLayer* output = network.AddOutputLayer(0, "output");
759 convLayer->GetOutputSlot(0).Connect(output->GetInputSlot(0));
760 convLayer->GetOutputSlot(0).SetTensorInfo(outputTensorInfo);
761
762 // Optimize the network
John Mcloughlinc5ee0d72023-03-24 12:07:25 +0000763 OptimizerOptionsOpaque optOptions;
764 optOptions.SetImportEnabled(false);
765 optOptions.SetExportEnabled(false);
Cathal Corbetta3f4fba2022-03-21 09:27:08 +0000766 std::vector<armnn::BackendId> backends = {armnn::Compute::GpuAcc};
767 IOptimizedNetworkPtr optNet = Optimize(network.GetGraph(), backends, runtime->GetDeviceSpec(), optOptions);
768 CHECK(optNet);
769
770 // Loads it into the runtime.
771 NetworkId netId;
772 std::string ignoredErrorMessage;
773 // Enable Importing
774 INetworkProperties networkProperties(false, MemorySource::Undefined, MemorySource::Undefined);
775 runtime->LoadNetwork(netId, std::move(optNet), ignoredErrorMessage, networkProperties);
776
777 // Creates structures for input & output
778 const size_t alignment =
779 arm_compute::CLKernelLibrary::get().get_device().getInfo<CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE>();
780 size_t spaceInput = totalBytesInput + alignment + alignment;
781 size_t spaceOutput = totalBytesOutput + alignment + alignment;
782 auto inputData = std::make_unique<uint8_t[]>(spaceInput);
783 void* alignedInputPtr = inputData.get();
784 CHECK(std::align(alignment, totalBytesInput, alignedInputPtr, spaceInput));
785
786 // Input with negative values
787 auto* inputPtr = reinterpret_cast<float*>(alignedInputPtr);
788 inputPtr[0] = 1.0f;
789
790 auto outputData = std::make_unique<uint8_t[]>(spaceOutput);
791 void* alignedOutputPtr = outputData.get();
792 CHECK(std::align(alignment, totalBytesOutput, alignedOutputPtr, spaceOutput));
793 auto* outputPtr = reinterpret_cast<Half*>(alignedOutputPtr);
794 std::fill_n(outputPtr, numElements, -10.0f);
795
796 TensorInfo inputTensorInfo = runtime->GetInputTensorInfo(netId, 0);
797 inputTensorInfo.SetConstant(true);
798 InputTensors inputTensors
799 {
800 {0,armnn::ConstTensor(inputTensorInfo, alignedInputPtr)},
801 };
802 OutputTensors outputTensors
803 {
804 {0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), alignedOutputPtr)}
805 };
806
807 runtime->GetProfiler(netId)->EnableProfiling(true);
808
809 INFO("Run ImportInputs");
810 std::vector<ImportedInputId> importedInputIds =
811 runtime->ImportInputs(netId, inputTensors, MemorySource::Malloc);
Colm Doneland7ceec52022-07-06 12:09:05 +0100812 CHECK(importedInputIds.size() == 1);
Cathal Corbetta3f4fba2022-03-21 09:27:08 +0000813 std::vector<ImportedOutputId> importedOutputIds =
814 runtime->ImportOutputs(netId, outputTensors, MemorySource::Malloc);
Colm Doneland7ceec52022-07-06 12:09:05 +0100815 CHECK(importedOutputIds.size() == 1);
Cathal Corbetta3f4fba2022-03-21 09:27:08 +0000816
817 // Do the inference
Colm Doneland7ceec52022-07-06 12:09:05 +0100818 runtime->EnqueueWorkload(netId, InputTensors(), OutputTensors(), importedInputIds, importedOutputIds);
Cathal Corbetta3f4fba2022-03-21 09:27:08 +0000819
820 // Retrieve the Profiler.Print() output to get the workload execution
821 ProfilerManager& profilerManager = armnn::ProfilerManager::GetInstance();
822 std::stringstream ss;
823 profilerManager.GetProfiler()->Print(ss);;
824 std::string dump = ss.str();
825
826 // Contains Convolution2dWorkload
827 std::size_t found = dump.find("ConvertFp32ToFp16Workload");
828 CHECK(found != std::string::npos);
829
830 // Contains SyncMemGeneric
831 found = dump.find("SyncMemGeneric");
832 CHECK(found != std::string::npos);
833
834 // Does not contain CopyMemGeneric
835 found = dump.find("CopyMemGeneric");
836 CHECK(found == std::string::npos);
837
838 runtime->UnloadNetwork(netId);
839
840 // Check output is as expected
841 // Validate result by checking that the output has no negative values
842 auto* outputResult = reinterpret_cast<Half*>(alignedOutputPtr);
843 CHECK(outputResult);
844
845 // Check the output is correct
846 CHECK(std::equal(outputResult, outputResult + numElements, expectedOutput.begin(), expectedOutput.end()));
847}
848
David Monahan041f17a2022-03-03 10:56:17 +0000849TEST_CASE_FIXTURE(ClContextControlFixture, "ClForceImportRepeatedInferencesEndToEndTest")
850{
851/*
852 * This is a test to check the functionality of the Forced Import functionality when using repeated inferences that
853 * require switching from importing to copy. For the first inference we create aligned Pointers and check they are
Colm Doneland7ceec52022-07-06 12:09:05 +0100854 * imported correctly. For the second we use similar pointers but don't use PreImporting.
David Monahan041f17a2022-03-03 10:56:17 +0000855 */
856 // Create runtime in which test will run
857 IRuntime::CreationOptions options;
858 IRuntimePtr runtime(armnn::IRuntime::Create(options));
859
860 // build up the structure of the network
861 INetworkPtr network(INetwork::Create());
862
863 armnn::TensorInfo inputInfo({ 1, 3, 4, 1 }, DataType::Float32);
864 armnn::TensorInfo kernelInfo({ 1, 3, 3, 1 }, DataType::Float32);
865 armnn::TensorInfo outputInfo({ 1, 3, 4, 1 }, DataType::Float32);
866
867 kernelInfo.SetConstant(true);
868
869 std::vector<float> kernel =
870 {
871 4, 5, 6,
872 0, 0, 0,
873 3, 2, 1
874 };
875
876 const std::vector<float> expectedOutput =
877 {
878 23, 41, 33, 21,
879 44, 65, 76, 52,
880 82, 85, 79, 42
881 };
882
883 unsigned int numElements = inputInfo.GetNumElements();
884 size_t totalBytes = numElements * sizeof(float);
885
886 IConnectableLayer* const inputLayer = network->AddInputLayer(0, "input");
887 ARMNN_ASSERT(inputLayer);
888
889 armnn::ConstTensor weights(kernelInfo, kernel);
890
891 armnn::Convolution2dDescriptor convDesc2d;
892 convDesc2d.m_StrideX = 1;
893 convDesc2d.m_StrideY = 1;
894 convDesc2d.m_PadLeft = 1;
895 convDesc2d.m_PadRight = 1;
896 convDesc2d.m_PadTop = 1;
897 convDesc2d.m_PadBottom = 1;
898 convDesc2d.m_DataLayout = DataLayout::NHWC;
Keith Davis721e6292022-05-17 10:06:53 +0100899 armnn::IConnectableLayer* const convLayer = network->AddConvolution2dLayer(convDesc2d, "conv");
David Monahan041f17a2022-03-03 10:56:17 +0000900 ARMNN_ASSERT(convLayer);
901
Keith Davis721e6292022-05-17 10:06:53 +0100902 armnn::IConnectableLayer* weightsLayer = network->AddConstantLayer(weights);
903
904 weightsLayer->GetOutputSlot(0).SetTensorInfo(weights.GetInfo());
905 weightsLayer->GetOutputSlot(0).Connect(convLayer->GetInputSlot(1u));
906
David Monahan041f17a2022-03-03 10:56:17 +0000907 inputLayer->GetOutputSlot(0).Connect(convLayer->GetInputSlot(0));
908 inputLayer->GetOutputSlot(0).SetTensorInfo(inputInfo);
909
910 IConnectableLayer* output = network->AddOutputLayer(0, "output");
911 convLayer->GetOutputSlot(0).Connect(output->GetInputSlot(0));
912 convLayer->GetOutputSlot(0).SetTensorInfo(outputInfo);
913
914 // Optimize the network
John Mcloughlinc5ee0d72023-03-24 12:07:25 +0000915 OptimizerOptionsOpaque optOptions;
916 optOptions.SetImportEnabled(false);
917 optOptions.SetExportEnabled(false);
David Monahan041f17a2022-03-03 10:56:17 +0000918 std::vector<armnn::BackendId> backends = {armnn::Compute::GpuAcc};
919 IOptimizedNetworkPtr optNet = Optimize(*network, backends, runtime->GetDeviceSpec(), optOptions);
920 CHECK(optNet);
921
922 // Loads it into the runtime.
923 NetworkId netId;
924 std::string ignoredErrorMessage;
925 // Enable Importing
926 INetworkProperties networkProperties(false, MemorySource::Undefined, MemorySource::Undefined);
927 runtime->LoadNetwork(netId, std::move(optNet), ignoredErrorMessage, networkProperties);
928
929 // Creates structures for input & output
930 const size_t alignment =
931 arm_compute::CLKernelLibrary::get().get_device().getInfo<CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE>();
932 size_t space = totalBytes + alignment + alignment;
933 auto inputData = std::make_unique<uint8_t[]>(space);
934 void* alignedInputPtr = inputData.get();
935 CHECK(std::align(alignment, totalBytes, alignedInputPtr, space));
936
937 // Fill input with values
938 auto* inputPtr = reinterpret_cast<float*>(alignedInputPtr);
939 inputPtr[0] = 1;
940 inputPtr[1] = 5;
941 inputPtr[2] = 2;
942 inputPtr[3] = 3;
943 inputPtr[4] = 8;
944 inputPtr[5] = 7;
945 inputPtr[6] = 3;
946 inputPtr[7] = 6;
947 inputPtr[8] = 3;
948 inputPtr[9] = 3;
949 inputPtr[10] = 9;
950 inputPtr[11] = 1;
951
952
953 auto outputData = std::make_unique<uint8_t[]>(space);
954 void* alignedOutputPtr = outputData.get();
955 CHECK(std::align(alignment, totalBytes, alignedOutputPtr, space));
956 auto* outputPtr = reinterpret_cast<float*>(alignedOutputPtr);
957 std::fill_n(outputPtr, numElements, -10.0f);
958
959 TensorInfo inputTensorInfo = runtime->GetInputTensorInfo(netId, 0);
960 inputTensorInfo.SetConstant(true);
961 InputTensors inputTensors
962 {
963 {0,armnn::ConstTensor(inputTensorInfo, alignedInputPtr)},
964 };
965 OutputTensors outputTensors
966 {
967 {0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), alignedOutputPtr)}
968 };
969
970 runtime->GetProfiler(netId)->EnableProfiling(true);
971
972 INFO("Run ImportInputs");
973 std::vector<ImportedInputId> importedInputIds =
974 runtime->ImportInputs(netId, inputTensors, MemorySource::Malloc);
Colm Doneland7ceec52022-07-06 12:09:05 +0100975 // We expect the import to have succeeded.
976 CHECK(importedInputIds.size() == 1);
David Monahan041f17a2022-03-03 10:56:17 +0000977 std::vector<ImportedOutputId> importedOutputIds =
978 runtime->ImportOutputs(netId, outputTensors, MemorySource::Malloc);
Colm Doneland7ceec52022-07-06 12:09:05 +0100979 // We expect the import to have succeeded.
980 CHECK(importedOutputIds.size() == 1);
David Monahan041f17a2022-03-03 10:56:17 +0000981
982 // Do the inference
Colm Doneland7ceec52022-07-06 12:09:05 +0100983 runtime->EnqueueWorkload(netId, InputTensors(), OutputTensors(), importedInputIds, importedOutputIds);
David Monahan041f17a2022-03-03 10:56:17 +0000984
985 // Retrieve the Profiler.AnalyzeEventsAndWriteResults() output to get the workload execution
986 ProfilerManager& profilerManager = armnn::ProfilerManager::GetInstance();
987 std::stringstream ss;
988 profilerManager.GetProfiler()->AnalyzeEventsAndWriteResults(ss);
989 std::string dump = ss.str();
990
991 // Contains Convolution2dWorkload
992 std::size_t found = dump.find("Convolution2dWorkload");
993 CHECK(found != std::string::npos);
994
995 // Contains SyncMemGeneric
996 found = dump.find("SyncMemGeneric");
997 CHECK(found != std::string::npos);
998
999 // Does not contain CopyMemGeneric
1000 found = dump.find("CopyMemGeneric");
1001 CHECK(found == std::string::npos);
1002
1003 // Sync the outputs so we can read the data
1004 arm_compute::CLScheduler::get().sync();
1005
1006 // Check output is as expected
1007 auto* outputResult = reinterpret_cast<float*>(alignedOutputPtr);
1008 CHECK(outputResult);
1009 CHECK(std::equal(outputResult, outputResult + numElements, expectedOutput.begin(), expectedOutput.end()));
1010
1011 // Repeat the inference, with new tensors and without using PreImporting to force it to fall back to copying
1012
1013 // Creates structures for input & output
1014 auto inputDataCopy = std::make_unique<uint8_t[]>(space);
1015 void* copyInputPtr = inputDataCopy.get();
1016
1017 // Fill input with values
1018 auto* inputCopyPtr = reinterpret_cast<float*>(copyInputPtr);
1019 inputCopyPtr[0] = 1;
1020 inputCopyPtr[1] = 5;
1021 inputCopyPtr[2] = 2;
1022 inputCopyPtr[3] = 3;
1023 inputCopyPtr[4] = 8;
1024 inputCopyPtr[5] = 7;
1025 inputCopyPtr[6] = 3;
1026 inputCopyPtr[7] = 6;
1027 inputCopyPtr[8] = 3;
1028 inputCopyPtr[9] = 3;
1029 inputCopyPtr[10] = 9;
1030 inputCopyPtr[11] = 1;
1031
1032 // Output pre-filled with -10.0f
1033 auto outputDataCopy = std::make_unique<uint8_t[]>(space);
1034 void* copyOutputPtr = outputDataCopy.get();
1035 auto* outputCopyPtr = reinterpret_cast<float*>(copyOutputPtr);
1036 std::fill_n(outputCopyPtr, numElements, -10.0f);
1037
1038 InputTensors inputTensorsCopy
1039 {
1040 {0,armnn::ConstTensor(inputTensorInfo, copyInputPtr)},
1041 };
1042 OutputTensors outputTensorsCopy
1043 {
1044 {0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), copyOutputPtr)}
1045 };
1046
1047 // Do the inference without any pre-imported input/output ids
1048 runtime->EnqueueWorkload(netId, inputTensorsCopy, outputTensorsCopy);
1049 // Sync the outputs so we can read the data
1050 arm_compute::CLScheduler::get().sync();
1051
1052 // Check the output is correct
1053 outputResult = reinterpret_cast<float*>(copyOutputPtr);
1054 CHECK(outputResult);
1055 CHECK(std::equal(outputResult, outputResult + numElements, expectedOutput.begin(), expectedOutput.end()));
1056
1057 // Query the profiler again, this will contain the results of both inferences
1058 profilerManager.GetProfiler()->AnalyzeEventsAndWriteResults(ss);
1059 dump = ss.str();
1060
1061 // Contains Convolution2dWorkload
1062 found = dump.find("Convolution2dWorkload");
1063 CHECK(found != std::string::npos);
1064
1065 // Should still contain the SyncMemGeneric
1066 found = dump.find("SyncMemGeneric");
1067 CHECK(found != std::string::npos);
1068
1069 // Should now also contain a CopyMemGeneric
1070 found = dump.find("CopyMemGeneric");
1071 CHECK(found != std::string::npos);
1072 runtime->UnloadNetwork(netId);
1073}
1074
1075TEST_CASE_FIXTURE(ClContextControlFixture, "ClForceImportRepeatedInferencesInvertedEndToEndTest")
1076{
1077/*
1078 * This test is similar to the test above but instead of importing and then copying, we start by copying and then do
1079 * the import.
1080 */
1081 // Create runtime in which test will run
1082 IRuntime::CreationOptions options;
1083 IRuntimePtr runtime(armnn::IRuntime::Create(options));
1084
1085 // build up the structure of the network
1086 INetworkPtr network(INetwork::Create());
1087
1088 armnn::TensorInfo inputInfo({ 1, 3, 4, 1 }, DataType::Float32);
1089 armnn::TensorInfo kernelInfo({ 1, 3, 3, 1 }, DataType::Float32);
1090 armnn::TensorInfo outputInfo({ 1, 3, 4, 1 }, DataType::Float32);
1091
1092 kernelInfo.SetConstant(true);
1093
1094 std::vector<float> kernel =
1095 {
1096 4, 5, 6,
1097 0, 0, 0,
1098 3, 2, 1
1099 };
1100
1101 const std::vector<float> expectedOutput =
1102 {
1103 23, 41, 33, 21,
1104 44, 65, 76, 52,
1105 82, 85, 79, 42
1106 };
1107
1108 unsigned int numElements = inputInfo.GetNumElements();
1109 size_t totalBytes = numElements * sizeof(float);
1110
1111 IConnectableLayer* const inputLayer = network->AddInputLayer(0, "input");
1112 ARMNN_ASSERT(inputLayer);
1113
1114 armnn::ConstTensor weights(kernelInfo, kernel);
1115
1116 armnn::Convolution2dDescriptor convDesc2d;
1117 convDesc2d.m_StrideX = 1;
1118 convDesc2d.m_StrideY = 1;
1119 convDesc2d.m_PadLeft = 1;
1120 convDesc2d.m_PadRight = 1;
1121 convDesc2d.m_PadTop = 1;
1122 convDesc2d.m_PadBottom = 1;
1123 convDesc2d.m_DataLayout = DataLayout::NHWC;
Keith Davis721e6292022-05-17 10:06:53 +01001124
1125 armnn::IConnectableLayer* const convLayer = network->AddConvolution2dLayer(convDesc2d, "conv");
David Monahan041f17a2022-03-03 10:56:17 +00001126 ARMNN_ASSERT(convLayer);
1127
Keith Davis721e6292022-05-17 10:06:53 +01001128 armnn::IConnectableLayer* weightsLayer = network->AddConstantLayer(weights);
1129
1130 weightsLayer->GetOutputSlot(0).SetTensorInfo(weights.GetInfo());
1131 weightsLayer->GetOutputSlot(0).Connect(convLayer->GetInputSlot(1u));
1132
David Monahan041f17a2022-03-03 10:56:17 +00001133 inputLayer->GetOutputSlot(0).Connect(convLayer->GetInputSlot(0));
1134 inputLayer->GetOutputSlot(0).SetTensorInfo(inputInfo);
1135
1136 IConnectableLayer* output = network->AddOutputLayer(0, "output");
1137 convLayer->GetOutputSlot(0).Connect(output->GetInputSlot(0));
1138 convLayer->GetOutputSlot(0).SetTensorInfo(outputInfo);
1139
1140 // Optimize the network
John Mcloughlinc5ee0d72023-03-24 12:07:25 +00001141 OptimizerOptionsOpaque optOptions;
1142 optOptions.SetImportEnabled(false);
1143 optOptions.SetExportEnabled(false);
David Monahan041f17a2022-03-03 10:56:17 +00001144 std::vector<armnn::BackendId> backends = {armnn::Compute::GpuAcc};
1145 IOptimizedNetworkPtr optNet = Optimize(*network, backends, runtime->GetDeviceSpec(), optOptions);
1146 CHECK(optNet);
1147
1148 // Loads it into the runtime.
1149 NetworkId netId;
1150 std::string ignoredErrorMessage;
1151 // Enable Importing
1152 INetworkProperties networkProperties(false, MemorySource::Undefined, MemorySource::Undefined);
1153 runtime->LoadNetwork(netId, std::move(optNet), ignoredErrorMessage, networkProperties);
1154
1155 // Creates structures for input & output
1156 const size_t alignment =
1157 arm_compute::CLKernelLibrary::get().get_device().getInfo<CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE>();
1158 size_t space = totalBytes + alignment + alignment;
1159 auto inputData = std::make_unique<uint8_t[]>(space);
1160 void* copyInputPtr = inputData.get();
1161
1162 // Fill input with values
1163 auto* inputPtr = reinterpret_cast<float*>(copyInputPtr);
1164 inputPtr[0] = 1;
1165 inputPtr[1] = 5;
1166 inputPtr[2] = 2;
1167 inputPtr[3] = 3;
1168 inputPtr[4] = 8;
1169 inputPtr[5] = 7;
1170 inputPtr[6] = 3;
1171 inputPtr[7] = 6;
1172 inputPtr[8] = 3;
1173 inputPtr[9] = 3;
1174 inputPtr[10] = 9;
1175 inputPtr[11] = 1;
1176
1177 // Create output buffer and fill it with -10.0f
1178 auto outputData = std::make_unique<uint8_t[]>(space);
1179 void* copyOutputPtr = outputData.get();
1180 auto* outputPtr = reinterpret_cast<float*>(copyOutputPtr);
1181 std::fill_n(outputPtr, numElements, -10.0f);
1182
1183 TensorInfo inputTensorInfo = runtime->GetInputTensorInfo(netId, 0);
1184 inputTensorInfo.SetConstant(true);
1185 InputTensors inputTensors
1186 {
1187 {0,armnn::ConstTensor(inputTensorInfo, copyInputPtr)},
1188 };
1189 OutputTensors outputTensors
1190 {
1191 {0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), copyOutputPtr)}
1192 };
1193
1194 runtime->GetProfiler(netId)->EnableProfiling(true);
1195
1196 // Do the inference without any pre-imported inputs/outputs
1197 runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
1198
1199 // Retrieve the Profiler.AnalyzeEventsAndWriteResults() output to get the workload execution
1200 ProfilerManager& profilerManager = armnn::ProfilerManager::GetInstance();
1201 std::stringstream ss;
1202 profilerManager.GetProfiler()->AnalyzeEventsAndWriteResults(ss);
1203 std::string dump = ss.str();
1204
1205 // Contains Convolution2dWorkload
1206 std::size_t found = dump.find("Convolution2dWorkload");
1207 CHECK(found != std::string::npos);
1208
1209 // Does not contain SyncMemGeneric
1210 found = dump.find("SyncMemGeneric");
1211 CHECK(found == std::string::npos);
1212
1213 // Does contain CopyMemGeneric
1214 found = dump.find("CopyMemGeneric");
1215 CHECK(found != std::string::npos);
1216
1217 // Sync the outputs so we can read the data
1218 arm_compute::CLScheduler::get().sync();
1219
1220 // Check output is as expected
1221 auto* outputResult = reinterpret_cast<float*>(copyOutputPtr);
1222 CHECK(outputResult);
1223 CHECK(std::equal(outputResult, outputResult + numElements, expectedOutput.begin(), expectedOutput.end()));
1224
1225 // Repeat the inference, with new tensors and while using pre-importing to force it to import
1226
1227 // Creates structures for input & output
1228 auto inputDataImport = std::make_unique<uint8_t[]>(space);
1229 void* alignedInputImportPtr = inputDataImport.get();
1230 CHECK(std::align(alignment, totalBytes, alignedInputImportPtr, space));
1231
1232 // Fill input with values
1233 auto* inputImportPtr = reinterpret_cast<float*>(alignedInputImportPtr);
1234 inputImportPtr[0] = 1;
1235 inputImportPtr[1] = 5;
1236 inputImportPtr[2] = 2;
1237 inputImportPtr[3] = 3;
1238 inputImportPtr[4] = 8;
1239 inputImportPtr[5] = 7;
1240 inputImportPtr[6] = 3;
1241 inputImportPtr[7] = 6;
1242 inputImportPtr[8] = 3;
1243 inputImportPtr[9] = 3;
1244 inputImportPtr[10] = 9;
1245 inputImportPtr[11] = 1;
1246
1247 // Output pre-filled with -10.0f
1248 auto outputDataImport = std::make_unique<uint8_t[]>(space);
1249 void* alignedOutputImportPtr = outputDataImport.get();
1250 CHECK(std::align(alignment, totalBytes, alignedOutputImportPtr, space));
1251 auto* outputImportPtr = reinterpret_cast<float*>(alignedOutputImportPtr);
1252 std::fill_n(outputImportPtr, numElements, -10.0f);
1253
1254 InputTensors inputTensorsImport
1255 {
1256 {0,armnn::ConstTensor(inputTensorInfo, alignedInputImportPtr)},
1257 };
1258 OutputTensors outputTensorsImport
1259 {
1260 {0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), alignedOutputImportPtr)}
1261 };
1262
1263 INFO("Run ImportInputs");
1264 std::vector<ImportedInputId> importedInputIds =
1265 runtime->ImportInputs(netId, inputTensorsImport, MemorySource::Malloc);
Colm Doneland7ceec52022-07-06 12:09:05 +01001266 CHECK(importedInputIds.size() == 1);
David Monahan041f17a2022-03-03 10:56:17 +00001267 std::vector<ImportedOutputId> importedOutputIds =
1268 runtime->ImportOutputs(netId, outputTensorsImport, MemorySource::Malloc);
Colm Doneland7ceec52022-07-06 12:09:05 +01001269 CHECK(importedOutputIds.size() == 1);
David Monahan041f17a2022-03-03 10:56:17 +00001270
1271 // Do the inference with pre-imported inputs/outputs
Colm Doneland7ceec52022-07-06 12:09:05 +01001272 runtime->EnqueueWorkload(netId, InputTensors(), OutputTensors(), importedInputIds, importedOutputIds);
David Monahan041f17a2022-03-03 10:56:17 +00001273 // Sync the outputs so we can read the data
1274 arm_compute::CLScheduler::get().sync();
1275
1276 // Check the output is correct
1277 outputResult = reinterpret_cast<float*>(alignedOutputImportPtr);
1278 CHECK(outputResult);
1279 CHECK(std::equal(outputResult, outputResult + numElements, expectedOutput.begin(), expectedOutput.end()));
1280
1281
1282 // Query the profiler again, this will contain the results of both inferences
1283 profilerManager.GetProfiler()->AnalyzeEventsAndWriteResults(ss);
1284 dump = ss.str();
1285
1286 // Contains Convolution2dWorkload
1287 found = dump.find("Convolution2dWorkload");
1288 CHECK(found != std::string::npos);
1289
1290 // Should now contain the SyncMemGeneric
1291 found = dump.find("SyncMemGeneric");
1292 CHECK(found != std::string::npos);
1293
1294 // Should still contain a CopyMemGeneric from the first inference
1295 found = dump.find("CopyMemGeneric");
1296 CHECK(found != std::string::npos);
1297 runtime->UnloadNetwork(netId);
1298}
1299
Sadik Armagan1625efc2021-06-10 18:24:34 +01001300}