blob: 161765484dc33de319c9d673ffd30227462adf8b [file] [log] [blame]
David Monahane4a41dc2021-04-14 16:55:36 +01001//
2// Copyright © 2021 Arm Ltd and Contributors. All rights reserved.
3// SPDX-License-Identifier: MIT
4//
5
6#include <arm_compute/runtime/CL/functions/CLActivationLayer.h>
7
8#include <cl/ClImportTensorHandle.hpp>
9#include <cl/ClImportTensorHandleFactory.hpp>
10#include <cl/test/ClContextControlFixture.hpp>
11
Sadik Armagan1625efc2021-06-10 18:24:34 +010012#include <doctest/doctest.h>
13
David Monahane4a41dc2021-04-14 16:55:36 +010014
Narumol Prangnawarate5f0b242021-05-07 17:52:36 +010015#include <armnn/IRuntime.hpp>
16#include <armnn/INetwork.hpp>
17
David Monahane4a41dc2021-04-14 16:55:36 +010018using namespace armnn;
19
Sadik Armagan1625efc2021-06-10 18:24:34 +010020TEST_SUITE("ClImportTensorHandleTests")
21{
22TEST_CASE_FIXTURE(ClContextControlFixture, "ClMallocImport")
David Monahane4a41dc2021-04-14 16:55:36 +010023{
24 ClImportTensorHandleFactory handleFactory(static_cast<MemorySourceFlags>(MemorySource::Malloc),
25 static_cast<MemorySourceFlags>(MemorySource::Malloc));
26
27 TensorInfo info({ 1, 24, 16, 3 }, DataType::Float32);
28 unsigned int numElements = info.GetNumElements();
29
30 // create TensorHandle for memory import
31 auto handle = handleFactory.CreateTensorHandle(info);
32
33 // Get CLtensor
34 arm_compute::CLTensor& tensor = PolymorphicDowncast<ClImportTensorHandle*>(handle.get())->GetTensor();
35
36 // Create and configure activation function
37 const arm_compute::ActivationLayerInfo act_info(arm_compute::ActivationLayerInfo::ActivationFunction::RELU);
38 arm_compute::CLActivationLayer act_func;
39 act_func.configure(&tensor, nullptr, act_info);
40
41 // Allocate user memory
42 const size_t totalBytes = tensor.info()->total_size();
43 const size_t alignment =
44 arm_compute::CLKernelLibrary::get().get_device().getInfo<CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE>();
Narumol Prangnawarate5f0b242021-05-07 17:52:36 +010045 size_t space = totalBytes + alignment + alignment;
David Monahane4a41dc2021-04-14 16:55:36 +010046 auto testData = std::make_unique<uint8_t[]>(space);
47 void* alignedPtr = testData.get();
Sadik Armagan1625efc2021-06-10 18:24:34 +010048 CHECK(std::align(alignment, totalBytes, alignedPtr, space));
David Monahane4a41dc2021-04-14 16:55:36 +010049
50 // Import memory
Sadik Armagan1625efc2021-06-10 18:24:34 +010051 CHECK(handle->Import(alignedPtr, armnn::MemorySource::Malloc));
David Monahane4a41dc2021-04-14 16:55:36 +010052
53 // Input with negative values
54 auto* typedPtr = reinterpret_cast<float*>(alignedPtr);
55 std::fill_n(typedPtr, numElements, -5.0f);
56
57 // Execute function and sync
58 act_func.run();
59 arm_compute::CLScheduler::get().sync();
60
61 // Validate result by checking that the output has no negative values
62 for(unsigned int i = 0; i < numElements; ++i)
63 {
Jan Eilersc1c872f2021-07-22 13:17:04 +010064 CHECK(typedPtr[i] == 0);
David Monahane4a41dc2021-04-14 16:55:36 +010065 }
66}
67
Sadik Armagan1625efc2021-06-10 18:24:34 +010068TEST_CASE_FIXTURE(ClContextControlFixture, "ClIncorrectMemorySourceImport")
David Monahane4a41dc2021-04-14 16:55:36 +010069{
70 ClImportTensorHandleFactory handleFactory(static_cast<MemorySourceFlags>(MemorySource::Malloc),
71 static_cast<MemorySourceFlags>(MemorySource::Malloc));
72
73 TensorInfo info({ 1, 24, 16, 3 }, DataType::Float32);
74
75 // create TensorHandle for memory import
76 auto handle = handleFactory.CreateTensorHandle(info);
77
78 // Get CLtensor
79 arm_compute::CLTensor& tensor = PolymorphicDowncast<ClImportTensorHandle*>(handle.get())->GetTensor();
80
81 // Allocate user memory
82 const size_t totalBytes = tensor.info()->total_size();
83 const size_t alignment =
84 arm_compute::CLKernelLibrary::get().get_device().getInfo<CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE>();
Narumol Prangnawarate5f0b242021-05-07 17:52:36 +010085 size_t space = totalBytes + alignment + alignment;
David Monahane4a41dc2021-04-14 16:55:36 +010086 auto testData = std::make_unique<uint8_t[]>(space);
87 void* alignedPtr = testData.get();
Sadik Armagan1625efc2021-06-10 18:24:34 +010088 CHECK(std::align(alignment, totalBytes, alignedPtr, space));
David Monahane4a41dc2021-04-14 16:55:36 +010089
90 // Import memory
Sadik Armagan1625efc2021-06-10 18:24:34 +010091 CHECK_THROWS_AS(handle->Import(alignedPtr, armnn::MemorySource::Undefined), MemoryImportException);
David Monahane4a41dc2021-04-14 16:55:36 +010092}
93
Sadik Armagan1625efc2021-06-10 18:24:34 +010094TEST_CASE_FIXTURE(ClContextControlFixture, "ClInvalidMemorySourceImport")
David Monahane4a41dc2021-04-14 16:55:36 +010095{
96 MemorySource invalidMemSource = static_cast<MemorySource>(256);
97 ClImportTensorHandleFactory handleFactory(static_cast<MemorySourceFlags>(invalidMemSource),
98 static_cast<MemorySourceFlags>(invalidMemSource));
99
100 TensorInfo info({ 1, 2, 2, 1 }, DataType::Float32);
101
102 // create TensorHandle for memory import
103 auto handle = handleFactory.CreateTensorHandle(info);
104
105 // Allocate user memory
106 std::vector<float> inputData
107 {
108 1.0f, 2.0f, 3.0f, 4.0f
109 };
110
111 // Import non-support memory
Sadik Armagan1625efc2021-06-10 18:24:34 +0100112 CHECK_THROWS_AS(handle->Import(inputData.data(), invalidMemSource), MemoryImportException);
David Monahane4a41dc2021-04-14 16:55:36 +0100113}
114
Sadik Armagan1625efc2021-06-10 18:24:34 +0100115TEST_CASE_FIXTURE(ClContextControlFixture, "ClImportEndToEnd")
Narumol Prangnawarate5f0b242021-05-07 17:52:36 +0100116{
117 // Create runtime in which test will run
118 IRuntime::CreationOptions options;
119 IRuntimePtr runtime(armnn::IRuntime::Create(options));
120
121 // build up the structure of the network
122 INetworkPtr net(INetwork::Create());
123
124 IConnectableLayer* input = net->AddInputLayer(0, "Input");
125
126 ActivationDescriptor descriptor;
127 descriptor.m_Function = ActivationFunction::ReLu;
128 IConnectableLayer* activation = net->AddActivationLayer(descriptor, "Activation");
129
130 IConnectableLayer* output = net->AddOutputLayer(0, "Output");
131
132 input->GetOutputSlot(0).Connect(activation->GetInputSlot(0));
133 activation->GetOutputSlot(0).Connect(output->GetInputSlot(0));
134
135 TensorInfo tensorInfo = TensorInfo({ 1, 24, 16, 3 }, DataType::Float32);
136 unsigned int numElements = tensorInfo.GetNumElements();
137 size_t totalBytes = numElements * sizeof(float);
138
139 input->GetOutputSlot(0).SetTensorInfo(tensorInfo);
140 activation->GetOutputSlot(0).SetTensorInfo(tensorInfo);
141
142 // Optimize the network
143 OptimizerOptions optOptions;
144 optOptions.m_ImportEnabled = true;
145 std::vector<armnn::BackendId> backends = {armnn::Compute::GpuAcc};
146 IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec(), optOptions);
Sadik Armagan1625efc2021-06-10 18:24:34 +0100147 CHECK(optNet);
Narumol Prangnawarate5f0b242021-05-07 17:52:36 +0100148
149 // Loads it into the runtime.
150 NetworkId netId;
151 std::string ignoredErrorMessage;
152 // Enable Importing
153 INetworkProperties networkProperties(false, MemorySource::Malloc, MemorySource::Malloc);
154 runtime->LoadNetwork(netId, std::move(optNet), ignoredErrorMessage, networkProperties);
155
156 // Creates structures for input & output
157 const size_t alignment =
158 arm_compute::CLKernelLibrary::get().get_device().getInfo<CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE>();
159 size_t space = totalBytes + alignment + alignment;
160 auto inputData = std::make_unique<uint8_t[]>(space);
161 void* alignedInputPtr = inputData.get();
Sadik Armagan1625efc2021-06-10 18:24:34 +0100162 CHECK(std::align(alignment, totalBytes, alignedInputPtr, space));
Narumol Prangnawarate5f0b242021-05-07 17:52:36 +0100163
164 // Input with negative values
165 auto* intputPtr = reinterpret_cast<float*>(alignedInputPtr);
166 std::fill_n(intputPtr, numElements, -5.0f);
167
168 auto outputData = std::make_unique<uint8_t[]>(space);
169 void* alignedOutputPtr = outputData.get();
Sadik Armagan1625efc2021-06-10 18:24:34 +0100170 CHECK(std::align(alignment, totalBytes, alignedOutputPtr, space));
Narumol Prangnawarat878e0f92021-05-11 19:51:14 +0100171 auto* outputPtr = reinterpret_cast<float*>(alignedOutputPtr);
172 std::fill_n(outputPtr, numElements, -10.0f);
Narumol Prangnawarate5f0b242021-05-07 17:52:36 +0100173
Cathal Corbett5b8093c2021-10-22 11:12:07 +0100174 TensorInfo inputTensorInfo = runtime->GetInputTensorInfo(netId, 0);
175 inputTensorInfo.SetConstant(true);
Narumol Prangnawarate5f0b242021-05-07 17:52:36 +0100176 InputTensors inputTensors
177 {
Cathal Corbett5b8093c2021-10-22 11:12:07 +0100178 {0,armnn::ConstTensor(inputTensorInfo, alignedInputPtr)},
Narumol Prangnawarate5f0b242021-05-07 17:52:36 +0100179 };
180 OutputTensors outputTensors
181 {
182 {0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), alignedOutputPtr)}
183 };
184
185 runtime->GetProfiler(netId)->EnableProfiling(true);
186
187 // Do the inference
188 runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
189
190 // Retrieve the Profiler.Print() output to get the workload execution
191 ProfilerManager& profilerManager = armnn::ProfilerManager::GetInstance();
192 std::stringstream ss;
193 profilerManager.GetProfiler()->Print(ss);;
194 std::string dump = ss.str();
195
196 // Contains ActivationWorkload
197 std::size_t found = dump.find("ActivationWorkload");
Sadik Armagan1625efc2021-06-10 18:24:34 +0100198 CHECK(found != std::string::npos);
Narumol Prangnawarate5f0b242021-05-07 17:52:36 +0100199
200 // Contains SyncMemGeneric
201 found = dump.find("SyncMemGeneric");
Sadik Armagan1625efc2021-06-10 18:24:34 +0100202 CHECK(found != std::string::npos);
Narumol Prangnawarate5f0b242021-05-07 17:52:36 +0100203
204 // Does not contain CopyMemGeneric
205 found = dump.find("CopyMemGeneric");
Sadik Armagan1625efc2021-06-10 18:24:34 +0100206 CHECK(found == std::string::npos);
Narumol Prangnawarate5f0b242021-05-07 17:52:36 +0100207
Narumol Prangnawarat878e0f92021-05-11 19:51:14 +0100208 runtime->UnloadNetwork(netId);
209
Narumol Prangnawarate5f0b242021-05-07 17:52:36 +0100210 // Check output is as expected
211 // Validate result by checking that the output has no negative values
212 auto* outputResult = reinterpret_cast<float*>(alignedOutputPtr);
Sadik Armagan1625efc2021-06-10 18:24:34 +0100213 CHECK(outputResult);
Narumol Prangnawarate5f0b242021-05-07 17:52:36 +0100214 for(unsigned int i = 0; i < numElements; ++i)
215 {
Sadik Armagan1625efc2021-06-10 18:24:34 +0100216 CHECK(outputResult[i] >= 0);
Narumol Prangnawarate5f0b242021-05-07 17:52:36 +0100217 }
Narumol Prangnawarate5f0b242021-05-07 17:52:36 +0100218}
219
Nikhil Raj60ab9762022-01-13 09:34:44 +0000220TEST_CASE_FIXTURE(ClContextControlFixture, "ClCanBeImported")
221{
222 ClImportTensorHandleFactory handleFactory(static_cast<MemorySourceFlags>(MemorySource::Malloc),
223 static_cast<MemorySourceFlags>(MemorySource::Malloc));
224
225 TensorInfo info({ 1, 24, 16, 3 }, DataType::Float32);
226
227 // create TensorHandle for memory import
228 auto handle = handleFactory.CreateTensorHandle(info);
229
230 // Get CLtensor
231 arm_compute::CLTensor& tensor = PolymorphicDowncast<ClImportTensorHandle*>(handle.get())->GetTensor();
232
233 // Allocate user memory
234 const size_t totalBytes = tensor.info()->total_size();
235 const size_t alignment =
236 arm_compute::CLKernelLibrary::get().get_device().getInfo<CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE>();
237 size_t space = totalBytes + alignment + alignment;
238 auto testData = std::make_unique<uint8_t[]>(space);
239 void* alignedPtr = testData.get();
240 CHECK(std::align(alignment, totalBytes, alignedPtr, space));
241
242 // Import memory
243 CHECK_THROWS_AS(handle->CanBeImported(alignedPtr, armnn::MemorySource::Undefined), MemoryImportException);
244
245}
246
247TEST_CASE("ClCanBeImportedAlignedMemory")
248{
249 ClImportTensorHandleFactory handleFactory(static_cast<MemorySourceFlags>(MemorySource::Malloc),
250 static_cast<MemorySourceFlags>(MemorySource::Malloc));
251
252 TensorInfo info({ 1, 1, 1, 1 }, DataType::Float32);
253
254 // create TensorHandle (Memory Managed status is irrelevant)
255 auto handle = handleFactory.CreateTensorHandle(info);
256 // Get CLtensor
257 arm_compute::CLTensor& tensor = PolymorphicDowncast<ClImportTensorHandle*>(handle.get())->GetTensor();
258
259 // Create an aligned buffer
260 const size_t totalBytes = tensor.info()->total_size();
261 const size_t alignment =
262 arm_compute::CLKernelLibrary::get().get_device().getInfo<CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE>();
263 size_t space = totalBytes + alignment + alignment;
264 auto testData = std::make_unique<uint8_t[]>(space);
265 void* alignedPtr = testData.get();
266 CHECK(std::align(alignment, totalBytes, alignedPtr, space));
267
268 // Check aligned buffers return true
269 CHECK(handle->CanBeImported(alignedPtr, MemorySource::Malloc) == true);
270
271 // Due to the nature of how GPU memory is mapped it is entirely possible for memory which is misaligned on cpu
272 // to be successfully import on GPU. As such there is no way to create a misaligned pointer that will always fail.
273 // Rather it will succeed on some devices and fail on others. As long as a correctly aligned buffer returns true
274 // we can be confident that it will be successfully imported. All other cases will need to be handled by the user.
275}
276
Narumol Prangnawarate2af6f42022-01-28 17:59:18 +0000277TEST_CASE_FIXTURE(ClContextControlFixture, "ClForceImportConv2dEndToEnd")
278{
279 // Create runtime in which test will run
280 IRuntime::CreationOptions options;
281 IRuntimePtr runtime(armnn::IRuntime::Create(options));
282
283 // build up the structure of the network
284 INetworkPtr network(INetwork::Create());
285
286 armnn::TensorInfo inputInfo({ 1, 3, 4, 1 }, DataType::Float32);
287 armnn::TensorInfo kernelInfo({ 1, 3, 3, 1 }, DataType::Float32);
288 armnn::TensorInfo outputInfo({ 1, 3, 4, 1 }, DataType::Float32);
289
290 kernelInfo.SetConstant(true);
291
292 std::vector<float> kernel =
293 {
294 4, 5, 6,
295 0, 0, 0,
296 3, 2, 1
297 };
298
299 const std::vector<float> expectedOutput =
300 {
301 23, 41, 33, 21,
302 44, 65, 76, 52,
303 82, 85, 79, 42
304 };
305
306 unsigned int numElements = inputInfo.GetNumElements();
307 size_t totalBytes = numElements * sizeof(float);
308
309 IConnectableLayer* const inputLayer = network->AddInputLayer(0, "input");
310 ARMNN_ASSERT(inputLayer);
311
312 armnn::ConstTensor weights(kernelInfo, kernel);
313
314 armnn::Convolution2dDescriptor convDesc2d;
315 convDesc2d.m_StrideX = 1;
316 convDesc2d.m_StrideY = 1;
317 convDesc2d.m_PadLeft = 1;
318 convDesc2d.m_PadRight = 1;
319 convDesc2d.m_PadTop = 1;
320 convDesc2d.m_PadBottom = 1;
321 convDesc2d.m_DataLayout = DataLayout::NHWC;
322 armnn::IConnectableLayer* const convLayer = network->AddConvolution2dLayer(convDesc2d,
323 weights,
324 armnn::EmptyOptional(),
325 "conv");
326 ARMNN_ASSERT(convLayer);
327
328 inputLayer->GetOutputSlot(0).Connect(convLayer->GetInputSlot(0));
329 inputLayer->GetOutputSlot(0).SetTensorInfo(inputInfo);
330
331 IConnectableLayer* output = network->AddOutputLayer(0, "output");
332 convLayer->GetOutputSlot(0).Connect(output->GetInputSlot(0));
333 convLayer->GetOutputSlot(0).SetTensorInfo(outputInfo);
334
335 // Optimize the network
336 OptimizerOptions optOptions;
337 optOptions.m_ImportEnabled = false;
338 std::vector<armnn::BackendId> backends = {armnn::Compute::GpuAcc};
339 IOptimizedNetworkPtr optNet = Optimize(*network, backends, runtime->GetDeviceSpec(), optOptions);
340 CHECK(optNet);
341
342 // Loads it into the runtime.
343 NetworkId netId;
344 std::string ignoredErrorMessage;
345 // Enable Importing
346 INetworkProperties networkProperties(false, MemorySource::Undefined, MemorySource::Undefined);
347 runtime->LoadNetwork(netId, std::move(optNet), ignoredErrorMessage, networkProperties);
348
349 // Creates structures for input & output
350 const size_t alignment =
351 arm_compute::CLKernelLibrary::get().get_device().getInfo<CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE>();
352 size_t space = totalBytes + alignment + alignment;
353 auto inputData = std::make_unique<uint8_t[]>(space);
354 void* alignedInputPtr = inputData.get();
355 CHECK(std::align(alignment, totalBytes, alignedInputPtr, space));
356
357 // Input with negative values
358 auto* inputPtr = reinterpret_cast<float*>(alignedInputPtr);
359 inputPtr[0] = 1;
360 inputPtr[1] = 5;
361 inputPtr[2] = 2;
362 inputPtr[3] = 3;
363 inputPtr[4] = 8;
364 inputPtr[5] = 7;
365 inputPtr[6] = 3;
366 inputPtr[7] = 6;
367 inputPtr[8] = 3;
368 inputPtr[9] = 3;
369 inputPtr[10] = 9;
370 inputPtr[11] = 1;
371
372
373 auto outputData = std::make_unique<uint8_t[]>(space);
374 void* alignedOutputPtr = outputData.get();
375 CHECK(std::align(alignment, totalBytes, alignedOutputPtr, space));
376 auto* outputPtr = reinterpret_cast<float*>(alignedOutputPtr);
377 std::fill_n(outputPtr, numElements, -10.0f);
378
379 TensorInfo inputTensorInfo = runtime->GetInputTensorInfo(netId, 0);
380 inputTensorInfo.SetConstant(true);
381 InputTensors inputTensors
382 {
383 {0,armnn::ConstTensor(inputTensorInfo, alignedInputPtr)},
384 };
385 OutputTensors outputTensors
386 {
387 {0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), alignedOutputPtr)}
388 };
389
390 runtime->GetProfiler(netId)->EnableProfiling(true);
391
392 INFO("Run ImportInputs");
393 std::vector<ImportedInputId> importedInputIds =
394 runtime->ImportInputs(netId, inputTensors, MemorySource::Malloc);
395 std::vector<ImportedOutputId> importedOutputIds =
396 runtime->ImportOutputs(netId, outputTensors, MemorySource::Malloc);
397
398 // Do the inference
399 runtime->EnqueueWorkload(netId, inputTensors, outputTensors, importedInputIds, importedOutputIds);
400
401 // Retrieve the Profiler.Print() output to get the workload execution
402 ProfilerManager& profilerManager = armnn::ProfilerManager::GetInstance();
403 std::stringstream ss;
404 profilerManager.GetProfiler()->Print(ss);;
405 std::string dump = ss.str();
406
407 // Contains Convolution2dWorkload
408 std::size_t found = dump.find("Convolution2dWorkload");
409 CHECK(found != std::string::npos);
410
411 // Contains SyncMemGeneric
412 found = dump.find("SyncMemGeneric");
413 CHECK(found != std::string::npos);
414
415 // Does not contain CopyMemGeneric
416 found = dump.find("CopyMemGeneric");
417 CHECK(found == std::string::npos);
418
419 runtime->UnloadNetwork(netId);
420
421 // Check output is as expected
422 // Validate result by checking that the output has no negative values
423 auto* outputResult = reinterpret_cast<float*>(alignedOutputPtr);
424 CHECK(outputResult);
425
426 // Check the output is correct
427 CHECK(std::equal(outputResult, outputResult + numElements, expectedOutput.begin(), expectedOutput.end()));
428}
429
Sadik Armagan1625efc2021-06-10 18:24:34 +0100430}