blob: 5fac9d13b1e30c9b81993f2cbf7659c89e2781cf [file] [log] [blame]
David Monahane4a41dc2021-04-14 16:55:36 +01001//
2// Copyright © 2021 Arm Ltd and Contributors. All rights reserved.
3// SPDX-License-Identifier: MIT
4//
5
6#include <arm_compute/runtime/CL/functions/CLActivationLayer.h>
7
8#include <cl/ClImportTensorHandle.hpp>
9#include <cl/ClImportTensorHandleFactory.hpp>
10#include <cl/test/ClContextControlFixture.hpp>
11
Sadik Armagan1625efc2021-06-10 18:24:34 +010012#include <doctest/doctest.h>
13
David Monahane4a41dc2021-04-14 16:55:36 +010014
Narumol Prangnawarate5f0b242021-05-07 17:52:36 +010015#include <armnn/IRuntime.hpp>
16#include <armnn/INetwork.hpp>
17
David Monahane4a41dc2021-04-14 16:55:36 +010018using namespace armnn;
19
Sadik Armagan1625efc2021-06-10 18:24:34 +010020TEST_SUITE("ClImportTensorHandleTests")
21{
22TEST_CASE_FIXTURE(ClContextControlFixture, "ClMallocImport")
David Monahane4a41dc2021-04-14 16:55:36 +010023{
24 ClImportTensorHandleFactory handleFactory(static_cast<MemorySourceFlags>(MemorySource::Malloc),
25 static_cast<MemorySourceFlags>(MemorySource::Malloc));
26
27 TensorInfo info({ 1, 24, 16, 3 }, DataType::Float32);
28 unsigned int numElements = info.GetNumElements();
29
30 // create TensorHandle for memory import
31 auto handle = handleFactory.CreateTensorHandle(info);
32
33 // Get CLtensor
34 arm_compute::CLTensor& tensor = PolymorphicDowncast<ClImportTensorHandle*>(handle.get())->GetTensor();
35
36 // Create and configure activation function
37 const arm_compute::ActivationLayerInfo act_info(arm_compute::ActivationLayerInfo::ActivationFunction::RELU);
38 arm_compute::CLActivationLayer act_func;
39 act_func.configure(&tensor, nullptr, act_info);
40
41 // Allocate user memory
42 const size_t totalBytes = tensor.info()->total_size();
43 const size_t alignment =
44 arm_compute::CLKernelLibrary::get().get_device().getInfo<CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE>();
Narumol Prangnawarate5f0b242021-05-07 17:52:36 +010045 size_t space = totalBytes + alignment + alignment;
David Monahane4a41dc2021-04-14 16:55:36 +010046 auto testData = std::make_unique<uint8_t[]>(space);
47 void* alignedPtr = testData.get();
Sadik Armagan1625efc2021-06-10 18:24:34 +010048 CHECK(std::align(alignment, totalBytes, alignedPtr, space));
David Monahane4a41dc2021-04-14 16:55:36 +010049
50 // Import memory
Sadik Armagan1625efc2021-06-10 18:24:34 +010051 CHECK(handle->Import(alignedPtr, armnn::MemorySource::Malloc));
David Monahane4a41dc2021-04-14 16:55:36 +010052
53 // Input with negative values
54 auto* typedPtr = reinterpret_cast<float*>(alignedPtr);
55 std::fill_n(typedPtr, numElements, -5.0f);
56
57 // Execute function and sync
58 act_func.run();
59 arm_compute::CLScheduler::get().sync();
60
61 // Validate result by checking that the output has no negative values
62 for(unsigned int i = 0; i < numElements; ++i)
63 {
Jan Eilersc1c872f2021-07-22 13:17:04 +010064 CHECK(typedPtr[i] == 0);
David Monahane4a41dc2021-04-14 16:55:36 +010065 }
66}
67
Sadik Armagan1625efc2021-06-10 18:24:34 +010068TEST_CASE_FIXTURE(ClContextControlFixture, "ClIncorrectMemorySourceImport")
David Monahane4a41dc2021-04-14 16:55:36 +010069{
70 ClImportTensorHandleFactory handleFactory(static_cast<MemorySourceFlags>(MemorySource::Malloc),
71 static_cast<MemorySourceFlags>(MemorySource::Malloc));
72
73 TensorInfo info({ 1, 24, 16, 3 }, DataType::Float32);
74
75 // create TensorHandle for memory import
76 auto handle = handleFactory.CreateTensorHandle(info);
77
78 // Get CLtensor
79 arm_compute::CLTensor& tensor = PolymorphicDowncast<ClImportTensorHandle*>(handle.get())->GetTensor();
80
81 // Allocate user memory
82 const size_t totalBytes = tensor.info()->total_size();
83 const size_t alignment =
84 arm_compute::CLKernelLibrary::get().get_device().getInfo<CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE>();
Narumol Prangnawarate5f0b242021-05-07 17:52:36 +010085 size_t space = totalBytes + alignment + alignment;
David Monahane4a41dc2021-04-14 16:55:36 +010086 auto testData = std::make_unique<uint8_t[]>(space);
87 void* alignedPtr = testData.get();
Sadik Armagan1625efc2021-06-10 18:24:34 +010088 CHECK(std::align(alignment, totalBytes, alignedPtr, space));
David Monahane4a41dc2021-04-14 16:55:36 +010089
90 // Import memory
Sadik Armagan1625efc2021-06-10 18:24:34 +010091 CHECK_THROWS_AS(handle->Import(alignedPtr, armnn::MemorySource::Undefined), MemoryImportException);
David Monahane4a41dc2021-04-14 16:55:36 +010092}
93
Sadik Armagan1625efc2021-06-10 18:24:34 +010094TEST_CASE_FIXTURE(ClContextControlFixture, "ClInvalidMemorySourceImport")
David Monahane4a41dc2021-04-14 16:55:36 +010095{
96 MemorySource invalidMemSource = static_cast<MemorySource>(256);
97 ClImportTensorHandleFactory handleFactory(static_cast<MemorySourceFlags>(invalidMemSource),
98 static_cast<MemorySourceFlags>(invalidMemSource));
99
100 TensorInfo info({ 1, 2, 2, 1 }, DataType::Float32);
101
102 // create TensorHandle for memory import
103 auto handle = handleFactory.CreateTensorHandle(info);
104
105 // Allocate user memory
106 std::vector<float> inputData
107 {
108 1.0f, 2.0f, 3.0f, 4.0f
109 };
110
111 // Import non-support memory
Sadik Armagan1625efc2021-06-10 18:24:34 +0100112 CHECK_THROWS_AS(handle->Import(inputData.data(), invalidMemSource), MemoryImportException);
David Monahane4a41dc2021-04-14 16:55:36 +0100113}
114
Sadik Armagan1625efc2021-06-10 18:24:34 +0100115TEST_CASE_FIXTURE(ClContextControlFixture, "ClImportEndToEnd")
Narumol Prangnawarate5f0b242021-05-07 17:52:36 +0100116{
117 // Create runtime in which test will run
118 IRuntime::CreationOptions options;
119 IRuntimePtr runtime(armnn::IRuntime::Create(options));
120
121 // build up the structure of the network
122 INetworkPtr net(INetwork::Create());
123
124 IConnectableLayer* input = net->AddInputLayer(0, "Input");
125
126 ActivationDescriptor descriptor;
127 descriptor.m_Function = ActivationFunction::ReLu;
128 IConnectableLayer* activation = net->AddActivationLayer(descriptor, "Activation");
129
130 IConnectableLayer* output = net->AddOutputLayer(0, "Output");
131
132 input->GetOutputSlot(0).Connect(activation->GetInputSlot(0));
133 activation->GetOutputSlot(0).Connect(output->GetInputSlot(0));
134
135 TensorInfo tensorInfo = TensorInfo({ 1, 24, 16, 3 }, DataType::Float32);
136 unsigned int numElements = tensorInfo.GetNumElements();
137 size_t totalBytes = numElements * sizeof(float);
138
139 input->GetOutputSlot(0).SetTensorInfo(tensorInfo);
140 activation->GetOutputSlot(0).SetTensorInfo(tensorInfo);
141
142 // Optimize the network
143 OptimizerOptions optOptions;
144 optOptions.m_ImportEnabled = true;
145 std::vector<armnn::BackendId> backends = {armnn::Compute::GpuAcc};
146 IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec(), optOptions);
Sadik Armagan1625efc2021-06-10 18:24:34 +0100147 CHECK(optNet);
Narumol Prangnawarate5f0b242021-05-07 17:52:36 +0100148
149 // Loads it into the runtime.
150 NetworkId netId;
151 std::string ignoredErrorMessage;
152 // Enable Importing
153 INetworkProperties networkProperties(false, MemorySource::Malloc, MemorySource::Malloc);
154 runtime->LoadNetwork(netId, std::move(optNet), ignoredErrorMessage, networkProperties);
155
156 // Creates structures for input & output
157 const size_t alignment =
158 arm_compute::CLKernelLibrary::get().get_device().getInfo<CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE>();
159 size_t space = totalBytes + alignment + alignment;
160 auto inputData = std::make_unique<uint8_t[]>(space);
161 void* alignedInputPtr = inputData.get();
Sadik Armagan1625efc2021-06-10 18:24:34 +0100162 CHECK(std::align(alignment, totalBytes, alignedInputPtr, space));
Narumol Prangnawarate5f0b242021-05-07 17:52:36 +0100163
164 // Input with negative values
165 auto* intputPtr = reinterpret_cast<float*>(alignedInputPtr);
166 std::fill_n(intputPtr, numElements, -5.0f);
167
168 auto outputData = std::make_unique<uint8_t[]>(space);
169 void* alignedOutputPtr = outputData.get();
Sadik Armagan1625efc2021-06-10 18:24:34 +0100170 CHECK(std::align(alignment, totalBytes, alignedOutputPtr, space));
Narumol Prangnawarat878e0f92021-05-11 19:51:14 +0100171 auto* outputPtr = reinterpret_cast<float*>(alignedOutputPtr);
172 std::fill_n(outputPtr, numElements, -10.0f);
Narumol Prangnawarate5f0b242021-05-07 17:52:36 +0100173
Cathal Corbett5b8093c2021-10-22 11:12:07 +0100174 TensorInfo inputTensorInfo = runtime->GetInputTensorInfo(netId, 0);
175 inputTensorInfo.SetConstant(true);
Narumol Prangnawarate5f0b242021-05-07 17:52:36 +0100176 InputTensors inputTensors
177 {
Cathal Corbett5b8093c2021-10-22 11:12:07 +0100178 {0,armnn::ConstTensor(inputTensorInfo, alignedInputPtr)},
Narumol Prangnawarate5f0b242021-05-07 17:52:36 +0100179 };
180 OutputTensors outputTensors
181 {
182 {0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), alignedOutputPtr)}
183 };
184
185 runtime->GetProfiler(netId)->EnableProfiling(true);
186
187 // Do the inference
188 runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
189
190 // Retrieve the Profiler.Print() output to get the workload execution
191 ProfilerManager& profilerManager = armnn::ProfilerManager::GetInstance();
192 std::stringstream ss;
193 profilerManager.GetProfiler()->Print(ss);;
194 std::string dump = ss.str();
195
196 // Contains ActivationWorkload
197 std::size_t found = dump.find("ActivationWorkload");
Sadik Armagan1625efc2021-06-10 18:24:34 +0100198 CHECK(found != std::string::npos);
Narumol Prangnawarate5f0b242021-05-07 17:52:36 +0100199
200 // Contains SyncMemGeneric
201 found = dump.find("SyncMemGeneric");
Sadik Armagan1625efc2021-06-10 18:24:34 +0100202 CHECK(found != std::string::npos);
Narumol Prangnawarate5f0b242021-05-07 17:52:36 +0100203
204 // Does not contain CopyMemGeneric
205 found = dump.find("CopyMemGeneric");
Sadik Armagan1625efc2021-06-10 18:24:34 +0100206 CHECK(found == std::string::npos);
Narumol Prangnawarate5f0b242021-05-07 17:52:36 +0100207
Narumol Prangnawarat878e0f92021-05-11 19:51:14 +0100208 runtime->UnloadNetwork(netId);
209
Narumol Prangnawarate5f0b242021-05-07 17:52:36 +0100210 // Check output is as expected
211 // Validate result by checking that the output has no negative values
212 auto* outputResult = reinterpret_cast<float*>(alignedOutputPtr);
Sadik Armagan1625efc2021-06-10 18:24:34 +0100213 CHECK(outputResult);
Narumol Prangnawarate5f0b242021-05-07 17:52:36 +0100214 for(unsigned int i = 0; i < numElements; ++i)
215 {
Sadik Armagan1625efc2021-06-10 18:24:34 +0100216 CHECK(outputResult[i] >= 0);
Narumol Prangnawarate5f0b242021-05-07 17:52:36 +0100217 }
Narumol Prangnawarate5f0b242021-05-07 17:52:36 +0100218}
219
Nikhil Raj60ab9762022-01-13 09:34:44 +0000220TEST_CASE_FIXTURE(ClContextControlFixture, "ClCanBeImported")
221{
222 ClImportTensorHandleFactory handleFactory(static_cast<MemorySourceFlags>(MemorySource::Malloc),
223 static_cast<MemorySourceFlags>(MemorySource::Malloc));
224
225 TensorInfo info({ 1, 24, 16, 3 }, DataType::Float32);
226
227 // create TensorHandle for memory import
David Monahan3826ab62022-02-21 12:26:16 +0000228 auto handle = handleFactory.CreateTensorHandle(info, DataLayout::NHWC);
Nikhil Raj60ab9762022-01-13 09:34:44 +0000229
230 // Get CLtensor
231 arm_compute::CLTensor& tensor = PolymorphicDowncast<ClImportTensorHandle*>(handle.get())->GetTensor();
232
233 // Allocate user memory
234 const size_t totalBytes = tensor.info()->total_size();
235 const size_t alignment =
236 arm_compute::CLKernelLibrary::get().get_device().getInfo<CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE>();
237 size_t space = totalBytes + alignment + alignment;
238 auto testData = std::make_unique<uint8_t[]>(space);
239 void* alignedPtr = testData.get();
240 CHECK(std::align(alignment, totalBytes, alignedPtr, space));
241
242 // Import memory
243 CHECK_THROWS_AS(handle->CanBeImported(alignedPtr, armnn::MemorySource::Undefined), MemoryImportException);
244
245}
246
247TEST_CASE("ClCanBeImportedAlignedMemory")
248{
249 ClImportTensorHandleFactory handleFactory(static_cast<MemorySourceFlags>(MemorySource::Malloc),
250 static_cast<MemorySourceFlags>(MemorySource::Malloc));
251
252 TensorInfo info({ 1, 1, 1, 1 }, DataType::Float32);
253
254 // create TensorHandle (Memory Managed status is irrelevant)
David Monahan3826ab62022-02-21 12:26:16 +0000255 auto handle = handleFactory.CreateTensorHandle(info, DataLayout::NHWC);
Nikhil Raj60ab9762022-01-13 09:34:44 +0000256 // Get CLtensor
257 arm_compute::CLTensor& tensor = PolymorphicDowncast<ClImportTensorHandle*>(handle.get())->GetTensor();
258
259 // Create an aligned buffer
260 const size_t totalBytes = tensor.info()->total_size();
261 const size_t alignment =
262 arm_compute::CLKernelLibrary::get().get_device().getInfo<CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE>();
263 size_t space = totalBytes + alignment + alignment;
264 auto testData = std::make_unique<uint8_t[]>(space);
265 void* alignedPtr = testData.get();
266 CHECK(std::align(alignment, totalBytes, alignedPtr, space));
267
268 // Check aligned buffers return true
269 CHECK(handle->CanBeImported(alignedPtr, MemorySource::Malloc) == true);
270
271 // Due to the nature of how GPU memory is mapped it is entirely possible for memory which is misaligned on cpu
272 // to be successfully import on GPU. As such there is no way to create a misaligned pointer that will always fail.
273 // Rather it will succeed on some devices and fail on others. As long as a correctly aligned buffer returns true
274 // we can be confident that it will be successfully imported. All other cases will need to be handled by the user.
275}
276
Narumol Prangnawarate2af6f42022-01-28 17:59:18 +0000277TEST_CASE_FIXTURE(ClContextControlFixture, "ClForceImportConv2dEndToEnd")
278{
279 // Create runtime in which test will run
280 IRuntime::CreationOptions options;
281 IRuntimePtr runtime(armnn::IRuntime::Create(options));
282
283 // build up the structure of the network
284 INetworkPtr network(INetwork::Create());
285
286 armnn::TensorInfo inputInfo({ 1, 3, 4, 1 }, DataType::Float32);
287 armnn::TensorInfo kernelInfo({ 1, 3, 3, 1 }, DataType::Float32);
288 armnn::TensorInfo outputInfo({ 1, 3, 4, 1 }, DataType::Float32);
289
290 kernelInfo.SetConstant(true);
291
292 std::vector<float> kernel =
293 {
294 4, 5, 6,
295 0, 0, 0,
296 3, 2, 1
297 };
298
299 const std::vector<float> expectedOutput =
300 {
301 23, 41, 33, 21,
302 44, 65, 76, 52,
303 82, 85, 79, 42
304 };
305
306 unsigned int numElements = inputInfo.GetNumElements();
307 size_t totalBytes = numElements * sizeof(float);
308
309 IConnectableLayer* const inputLayer = network->AddInputLayer(0, "input");
310 ARMNN_ASSERT(inputLayer);
311
312 armnn::ConstTensor weights(kernelInfo, kernel);
313
314 armnn::Convolution2dDescriptor convDesc2d;
315 convDesc2d.m_StrideX = 1;
316 convDesc2d.m_StrideY = 1;
317 convDesc2d.m_PadLeft = 1;
318 convDesc2d.m_PadRight = 1;
319 convDesc2d.m_PadTop = 1;
320 convDesc2d.m_PadBottom = 1;
321 convDesc2d.m_DataLayout = DataLayout::NHWC;
322 armnn::IConnectableLayer* const convLayer = network->AddConvolution2dLayer(convDesc2d,
323 weights,
324 armnn::EmptyOptional(),
325 "conv");
326 ARMNN_ASSERT(convLayer);
327
328 inputLayer->GetOutputSlot(0).Connect(convLayer->GetInputSlot(0));
329 inputLayer->GetOutputSlot(0).SetTensorInfo(inputInfo);
330
331 IConnectableLayer* output = network->AddOutputLayer(0, "output");
332 convLayer->GetOutputSlot(0).Connect(output->GetInputSlot(0));
333 convLayer->GetOutputSlot(0).SetTensorInfo(outputInfo);
334
335 // Optimize the network
336 OptimizerOptions optOptions;
337 optOptions.m_ImportEnabled = false;
338 std::vector<armnn::BackendId> backends = {armnn::Compute::GpuAcc};
339 IOptimizedNetworkPtr optNet = Optimize(*network, backends, runtime->GetDeviceSpec(), optOptions);
340 CHECK(optNet);
341
342 // Loads it into the runtime.
343 NetworkId netId;
344 std::string ignoredErrorMessage;
345 // Enable Importing
346 INetworkProperties networkProperties(false, MemorySource::Undefined, MemorySource::Undefined);
347 runtime->LoadNetwork(netId, std::move(optNet), ignoredErrorMessage, networkProperties);
348
349 // Creates structures for input & output
350 const size_t alignment =
351 arm_compute::CLKernelLibrary::get().get_device().getInfo<CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE>();
352 size_t space = totalBytes + alignment + alignment;
353 auto inputData = std::make_unique<uint8_t[]>(space);
354 void* alignedInputPtr = inputData.get();
355 CHECK(std::align(alignment, totalBytes, alignedInputPtr, space));
356
357 // Input with negative values
358 auto* inputPtr = reinterpret_cast<float*>(alignedInputPtr);
359 inputPtr[0] = 1;
360 inputPtr[1] = 5;
361 inputPtr[2] = 2;
362 inputPtr[3] = 3;
363 inputPtr[4] = 8;
364 inputPtr[5] = 7;
365 inputPtr[6] = 3;
366 inputPtr[7] = 6;
367 inputPtr[8] = 3;
368 inputPtr[9] = 3;
369 inputPtr[10] = 9;
370 inputPtr[11] = 1;
371
372
373 auto outputData = std::make_unique<uint8_t[]>(space);
374 void* alignedOutputPtr = outputData.get();
375 CHECK(std::align(alignment, totalBytes, alignedOutputPtr, space));
376 auto* outputPtr = reinterpret_cast<float*>(alignedOutputPtr);
377 std::fill_n(outputPtr, numElements, -10.0f);
378
379 TensorInfo inputTensorInfo = runtime->GetInputTensorInfo(netId, 0);
380 inputTensorInfo.SetConstant(true);
381 InputTensors inputTensors
382 {
383 {0,armnn::ConstTensor(inputTensorInfo, alignedInputPtr)},
384 };
385 OutputTensors outputTensors
386 {
387 {0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), alignedOutputPtr)}
388 };
389
390 runtime->GetProfiler(netId)->EnableProfiling(true);
391
392 INFO("Run ImportInputs");
393 std::vector<ImportedInputId> importedInputIds =
394 runtime->ImportInputs(netId, inputTensors, MemorySource::Malloc);
395 std::vector<ImportedOutputId> importedOutputIds =
396 runtime->ImportOutputs(netId, outputTensors, MemorySource::Malloc);
397
398 // Do the inference
399 runtime->EnqueueWorkload(netId, inputTensors, outputTensors, importedInputIds, importedOutputIds);
400
401 // Retrieve the Profiler.Print() output to get the workload execution
402 ProfilerManager& profilerManager = armnn::ProfilerManager::GetInstance();
403 std::stringstream ss;
404 profilerManager.GetProfiler()->Print(ss);;
405 std::string dump = ss.str();
406
407 // Contains Convolution2dWorkload
408 std::size_t found = dump.find("Convolution2dWorkload");
409 CHECK(found != std::string::npos);
410
411 // Contains SyncMemGeneric
412 found = dump.find("SyncMemGeneric");
413 CHECK(found != std::string::npos);
414
415 // Does not contain CopyMemGeneric
416 found = dump.find("CopyMemGeneric");
417 CHECK(found == std::string::npos);
418
419 runtime->UnloadNetwork(netId);
420
421 // Check output is as expected
422 // Validate result by checking that the output has no negative values
423 auto* outputResult = reinterpret_cast<float*>(alignedOutputPtr);
424 CHECK(outputResult);
425
426 // Check the output is correct
427 CHECK(std::equal(outputResult, outputResult + numElements, expectedOutput.begin(), expectedOutput.end()));
428}
429
David Monahan041f17a2022-03-03 10:56:17 +0000430TEST_CASE_FIXTURE(ClContextControlFixture, "ClForceImportRepeatedInferencesEndToEndTest")
431{
432/*
433 * This is a test to check the functionality of the Forced Import functionality when using repeated inferences that
434 * require switching from importing to copy. For the first inference we create aligned Pointers and check they are
435 * imported correctly. For the second we use similar pointers but don't use PreImporting to force fall back to copy.
436 */
437 // Create runtime in which test will run
438 IRuntime::CreationOptions options;
439 IRuntimePtr runtime(armnn::IRuntime::Create(options));
440
441 // build up the structure of the network
442 INetworkPtr network(INetwork::Create());
443
444 armnn::TensorInfo inputInfo({ 1, 3, 4, 1 }, DataType::Float32);
445 armnn::TensorInfo kernelInfo({ 1, 3, 3, 1 }, DataType::Float32);
446 armnn::TensorInfo outputInfo({ 1, 3, 4, 1 }, DataType::Float32);
447
448 kernelInfo.SetConstant(true);
449
450 std::vector<float> kernel =
451 {
452 4, 5, 6,
453 0, 0, 0,
454 3, 2, 1
455 };
456
457 const std::vector<float> expectedOutput =
458 {
459 23, 41, 33, 21,
460 44, 65, 76, 52,
461 82, 85, 79, 42
462 };
463
464 unsigned int numElements = inputInfo.GetNumElements();
465 size_t totalBytes = numElements * sizeof(float);
466
467 IConnectableLayer* const inputLayer = network->AddInputLayer(0, "input");
468 ARMNN_ASSERT(inputLayer);
469
470 armnn::ConstTensor weights(kernelInfo, kernel);
471
472 armnn::Convolution2dDescriptor convDesc2d;
473 convDesc2d.m_StrideX = 1;
474 convDesc2d.m_StrideY = 1;
475 convDesc2d.m_PadLeft = 1;
476 convDesc2d.m_PadRight = 1;
477 convDesc2d.m_PadTop = 1;
478 convDesc2d.m_PadBottom = 1;
479 convDesc2d.m_DataLayout = DataLayout::NHWC;
480 armnn::IConnectableLayer* const convLayer = network->AddConvolution2dLayer(convDesc2d,
481 weights,
482 armnn::EmptyOptional(),
483 "conv");
484 ARMNN_ASSERT(convLayer);
485
486 inputLayer->GetOutputSlot(0).Connect(convLayer->GetInputSlot(0));
487 inputLayer->GetOutputSlot(0).SetTensorInfo(inputInfo);
488
489 IConnectableLayer* output = network->AddOutputLayer(0, "output");
490 convLayer->GetOutputSlot(0).Connect(output->GetInputSlot(0));
491 convLayer->GetOutputSlot(0).SetTensorInfo(outputInfo);
492
493 // Optimize the network
494 OptimizerOptions optOptions;
495 optOptions.m_ImportEnabled = false;
496 std::vector<armnn::BackendId> backends = {armnn::Compute::GpuAcc};
497 IOptimizedNetworkPtr optNet = Optimize(*network, backends, runtime->GetDeviceSpec(), optOptions);
498 CHECK(optNet);
499
500 // Loads it into the runtime.
501 NetworkId netId;
502 std::string ignoredErrorMessage;
503 // Enable Importing
504 INetworkProperties networkProperties(false, MemorySource::Undefined, MemorySource::Undefined);
505 runtime->LoadNetwork(netId, std::move(optNet), ignoredErrorMessage, networkProperties);
506
507 // Creates structures for input & output
508 const size_t alignment =
509 arm_compute::CLKernelLibrary::get().get_device().getInfo<CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE>();
510 size_t space = totalBytes + alignment + alignment;
511 auto inputData = std::make_unique<uint8_t[]>(space);
512 void* alignedInputPtr = inputData.get();
513 CHECK(std::align(alignment, totalBytes, alignedInputPtr, space));
514
515 // Fill input with values
516 auto* inputPtr = reinterpret_cast<float*>(alignedInputPtr);
517 inputPtr[0] = 1;
518 inputPtr[1] = 5;
519 inputPtr[2] = 2;
520 inputPtr[3] = 3;
521 inputPtr[4] = 8;
522 inputPtr[5] = 7;
523 inputPtr[6] = 3;
524 inputPtr[7] = 6;
525 inputPtr[8] = 3;
526 inputPtr[9] = 3;
527 inputPtr[10] = 9;
528 inputPtr[11] = 1;
529
530
531 auto outputData = std::make_unique<uint8_t[]>(space);
532 void* alignedOutputPtr = outputData.get();
533 CHECK(std::align(alignment, totalBytes, alignedOutputPtr, space));
534 auto* outputPtr = reinterpret_cast<float*>(alignedOutputPtr);
535 std::fill_n(outputPtr, numElements, -10.0f);
536
537 TensorInfo inputTensorInfo = runtime->GetInputTensorInfo(netId, 0);
538 inputTensorInfo.SetConstant(true);
539 InputTensors inputTensors
540 {
541 {0,armnn::ConstTensor(inputTensorInfo, alignedInputPtr)},
542 };
543 OutputTensors outputTensors
544 {
545 {0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), alignedOutputPtr)}
546 };
547
548 runtime->GetProfiler(netId)->EnableProfiling(true);
549
550 INFO("Run ImportInputs");
551 std::vector<ImportedInputId> importedInputIds =
552 runtime->ImportInputs(netId, inputTensors, MemorySource::Malloc);
553 std::vector<ImportedOutputId> importedOutputIds =
554 runtime->ImportOutputs(netId, outputTensors, MemorySource::Malloc);
555
556 // Do the inference
557 runtime->EnqueueWorkload(netId, inputTensors, outputTensors, importedInputIds, importedOutputIds);
558
559 // Retrieve the Profiler.AnalyzeEventsAndWriteResults() output to get the workload execution
560 ProfilerManager& profilerManager = armnn::ProfilerManager::GetInstance();
561 std::stringstream ss;
562 profilerManager.GetProfiler()->AnalyzeEventsAndWriteResults(ss);
563 std::string dump = ss.str();
564
565 // Contains Convolution2dWorkload
566 std::size_t found = dump.find("Convolution2dWorkload");
567 CHECK(found != std::string::npos);
568
569 // Contains SyncMemGeneric
570 found = dump.find("SyncMemGeneric");
571 CHECK(found != std::string::npos);
572
573 // Does not contain CopyMemGeneric
574 found = dump.find("CopyMemGeneric");
575 CHECK(found == std::string::npos);
576
577 // Sync the outputs so we can read the data
578 arm_compute::CLScheduler::get().sync();
579
580 // Check output is as expected
581 auto* outputResult = reinterpret_cast<float*>(alignedOutputPtr);
582 CHECK(outputResult);
583 CHECK(std::equal(outputResult, outputResult + numElements, expectedOutput.begin(), expectedOutput.end()));
584
585 // Repeat the inference, with new tensors and without using PreImporting to force it to fall back to copying
586
587 // Creates structures for input & output
588 auto inputDataCopy = std::make_unique<uint8_t[]>(space);
589 void* copyInputPtr = inputDataCopy.get();
590
591 // Fill input with values
592 auto* inputCopyPtr = reinterpret_cast<float*>(copyInputPtr);
593 inputCopyPtr[0] = 1;
594 inputCopyPtr[1] = 5;
595 inputCopyPtr[2] = 2;
596 inputCopyPtr[3] = 3;
597 inputCopyPtr[4] = 8;
598 inputCopyPtr[5] = 7;
599 inputCopyPtr[6] = 3;
600 inputCopyPtr[7] = 6;
601 inputCopyPtr[8] = 3;
602 inputCopyPtr[9] = 3;
603 inputCopyPtr[10] = 9;
604 inputCopyPtr[11] = 1;
605
606 // Output pre-filled with -10.0f
607 auto outputDataCopy = std::make_unique<uint8_t[]>(space);
608 void* copyOutputPtr = outputDataCopy.get();
609 auto* outputCopyPtr = reinterpret_cast<float*>(copyOutputPtr);
610 std::fill_n(outputCopyPtr, numElements, -10.0f);
611
612 InputTensors inputTensorsCopy
613 {
614 {0,armnn::ConstTensor(inputTensorInfo, copyInputPtr)},
615 };
616 OutputTensors outputTensorsCopy
617 {
618 {0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), copyOutputPtr)}
619 };
620
621 // Do the inference without any pre-imported input/output ids
622 runtime->EnqueueWorkload(netId, inputTensorsCopy, outputTensorsCopy);
623 // Sync the outputs so we can read the data
624 arm_compute::CLScheduler::get().sync();
625
626 // Check the output is correct
627 outputResult = reinterpret_cast<float*>(copyOutputPtr);
628 CHECK(outputResult);
629 CHECK(std::equal(outputResult, outputResult + numElements, expectedOutput.begin(), expectedOutput.end()));
630
631 // Query the profiler again, this will contain the results of both inferences
632 profilerManager.GetProfiler()->AnalyzeEventsAndWriteResults(ss);
633 dump = ss.str();
634
635 // Contains Convolution2dWorkload
636 found = dump.find("Convolution2dWorkload");
637 CHECK(found != std::string::npos);
638
639 // Should still contain the SyncMemGeneric
640 found = dump.find("SyncMemGeneric");
641 CHECK(found != std::string::npos);
642
643 // Should now also contain a CopyMemGeneric
644 found = dump.find("CopyMemGeneric");
645 CHECK(found != std::string::npos);
646 runtime->UnloadNetwork(netId);
647}
648
649TEST_CASE_FIXTURE(ClContextControlFixture, "ClForceImportRepeatedInferencesInvertedEndToEndTest")
650{
651/*
652 * This test is similar to the test above but instead of importing and then copying, we start by copying and then do
653 * the import.
654 */
655 // Create runtime in which test will run
656 IRuntime::CreationOptions options;
657 IRuntimePtr runtime(armnn::IRuntime::Create(options));
658
659 // build up the structure of the network
660 INetworkPtr network(INetwork::Create());
661
662 armnn::TensorInfo inputInfo({ 1, 3, 4, 1 }, DataType::Float32);
663 armnn::TensorInfo kernelInfo({ 1, 3, 3, 1 }, DataType::Float32);
664 armnn::TensorInfo outputInfo({ 1, 3, 4, 1 }, DataType::Float32);
665
666 kernelInfo.SetConstant(true);
667
668 std::vector<float> kernel =
669 {
670 4, 5, 6,
671 0, 0, 0,
672 3, 2, 1
673 };
674
675 const std::vector<float> expectedOutput =
676 {
677 23, 41, 33, 21,
678 44, 65, 76, 52,
679 82, 85, 79, 42
680 };
681
682 unsigned int numElements = inputInfo.GetNumElements();
683 size_t totalBytes = numElements * sizeof(float);
684
685 IConnectableLayer* const inputLayer = network->AddInputLayer(0, "input");
686 ARMNN_ASSERT(inputLayer);
687
688 armnn::ConstTensor weights(kernelInfo, kernel);
689
690 armnn::Convolution2dDescriptor convDesc2d;
691 convDesc2d.m_StrideX = 1;
692 convDesc2d.m_StrideY = 1;
693 convDesc2d.m_PadLeft = 1;
694 convDesc2d.m_PadRight = 1;
695 convDesc2d.m_PadTop = 1;
696 convDesc2d.m_PadBottom = 1;
697 convDesc2d.m_DataLayout = DataLayout::NHWC;
698 armnn::IConnectableLayer* const convLayer = network->AddConvolution2dLayer(convDesc2d,
699 weights,
700 armnn::EmptyOptional(),
701 "conv");
702 ARMNN_ASSERT(convLayer);
703
704 inputLayer->GetOutputSlot(0).Connect(convLayer->GetInputSlot(0));
705 inputLayer->GetOutputSlot(0).SetTensorInfo(inputInfo);
706
707 IConnectableLayer* output = network->AddOutputLayer(0, "output");
708 convLayer->GetOutputSlot(0).Connect(output->GetInputSlot(0));
709 convLayer->GetOutputSlot(0).SetTensorInfo(outputInfo);
710
711 // Optimize the network
712 OptimizerOptions optOptions;
713 optOptions.m_ImportEnabled = false;
714 std::vector<armnn::BackendId> backends = {armnn::Compute::GpuAcc};
715 IOptimizedNetworkPtr optNet = Optimize(*network, backends, runtime->GetDeviceSpec(), optOptions);
716 CHECK(optNet);
717
718 // Loads it into the runtime.
719 NetworkId netId;
720 std::string ignoredErrorMessage;
721 // Enable Importing
722 INetworkProperties networkProperties(false, MemorySource::Undefined, MemorySource::Undefined);
723 runtime->LoadNetwork(netId, std::move(optNet), ignoredErrorMessage, networkProperties);
724
725 // Creates structures for input & output
726 const size_t alignment =
727 arm_compute::CLKernelLibrary::get().get_device().getInfo<CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE>();
728 size_t space = totalBytes + alignment + alignment;
729 auto inputData = std::make_unique<uint8_t[]>(space);
730 void* copyInputPtr = inputData.get();
731
732 // Fill input with values
733 auto* inputPtr = reinterpret_cast<float*>(copyInputPtr);
734 inputPtr[0] = 1;
735 inputPtr[1] = 5;
736 inputPtr[2] = 2;
737 inputPtr[3] = 3;
738 inputPtr[4] = 8;
739 inputPtr[5] = 7;
740 inputPtr[6] = 3;
741 inputPtr[7] = 6;
742 inputPtr[8] = 3;
743 inputPtr[9] = 3;
744 inputPtr[10] = 9;
745 inputPtr[11] = 1;
746
747 // Create output buffer and fill it with -10.0f
748 auto outputData = std::make_unique<uint8_t[]>(space);
749 void* copyOutputPtr = outputData.get();
750 auto* outputPtr = reinterpret_cast<float*>(copyOutputPtr);
751 std::fill_n(outputPtr, numElements, -10.0f);
752
753 TensorInfo inputTensorInfo = runtime->GetInputTensorInfo(netId, 0);
754 inputTensorInfo.SetConstant(true);
755 InputTensors inputTensors
756 {
757 {0,armnn::ConstTensor(inputTensorInfo, copyInputPtr)},
758 };
759 OutputTensors outputTensors
760 {
761 {0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), copyOutputPtr)}
762 };
763
764 runtime->GetProfiler(netId)->EnableProfiling(true);
765
766 // Do the inference without any pre-imported inputs/outputs
767 runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
768
769 // Retrieve the Profiler.AnalyzeEventsAndWriteResults() output to get the workload execution
770 ProfilerManager& profilerManager = armnn::ProfilerManager::GetInstance();
771 std::stringstream ss;
772 profilerManager.GetProfiler()->AnalyzeEventsAndWriteResults(ss);
773 std::string dump = ss.str();
774
775 // Contains Convolution2dWorkload
776 std::size_t found = dump.find("Convolution2dWorkload");
777 CHECK(found != std::string::npos);
778
779 // Does not contain SyncMemGeneric
780 found = dump.find("SyncMemGeneric");
781 CHECK(found == std::string::npos);
782
783 // Does contain CopyMemGeneric
784 found = dump.find("CopyMemGeneric");
785 CHECK(found != std::string::npos);
786
787 // Sync the outputs so we can read the data
788 arm_compute::CLScheduler::get().sync();
789
790 // Check output is as expected
791 auto* outputResult = reinterpret_cast<float*>(copyOutputPtr);
792 CHECK(outputResult);
793 CHECK(std::equal(outputResult, outputResult + numElements, expectedOutput.begin(), expectedOutput.end()));
794
795 // Repeat the inference, with new tensors and while using pre-importing to force it to import
796
797 // Creates structures for input & output
798 auto inputDataImport = std::make_unique<uint8_t[]>(space);
799 void* alignedInputImportPtr = inputDataImport.get();
800 CHECK(std::align(alignment, totalBytes, alignedInputImportPtr, space));
801
802 // Fill input with values
803 auto* inputImportPtr = reinterpret_cast<float*>(alignedInputImportPtr);
804 inputImportPtr[0] = 1;
805 inputImportPtr[1] = 5;
806 inputImportPtr[2] = 2;
807 inputImportPtr[3] = 3;
808 inputImportPtr[4] = 8;
809 inputImportPtr[5] = 7;
810 inputImportPtr[6] = 3;
811 inputImportPtr[7] = 6;
812 inputImportPtr[8] = 3;
813 inputImportPtr[9] = 3;
814 inputImportPtr[10] = 9;
815 inputImportPtr[11] = 1;
816
817 // Output pre-filled with -10.0f
818 auto outputDataImport = std::make_unique<uint8_t[]>(space);
819 void* alignedOutputImportPtr = outputDataImport.get();
820 CHECK(std::align(alignment, totalBytes, alignedOutputImportPtr, space));
821 auto* outputImportPtr = reinterpret_cast<float*>(alignedOutputImportPtr);
822 std::fill_n(outputImportPtr, numElements, -10.0f);
823
824 InputTensors inputTensorsImport
825 {
826 {0,armnn::ConstTensor(inputTensorInfo, alignedInputImportPtr)},
827 };
828 OutputTensors outputTensorsImport
829 {
830 {0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), alignedOutputImportPtr)}
831 };
832
833 INFO("Run ImportInputs");
834 std::vector<ImportedInputId> importedInputIds =
835 runtime->ImportInputs(netId, inputTensorsImport, MemorySource::Malloc);
836 std::vector<ImportedOutputId> importedOutputIds =
837 runtime->ImportOutputs(netId, outputTensorsImport, MemorySource::Malloc);
838
839 // Do the inference with pre-imported inputs/outputs
840 runtime->EnqueueWorkload(netId, inputTensorsImport, outputTensorsImport, importedInputIds, importedOutputIds);
841 // Sync the outputs so we can read the data
842 arm_compute::CLScheduler::get().sync();
843
844 // Check the output is correct
845 outputResult = reinterpret_cast<float*>(alignedOutputImportPtr);
846 CHECK(outputResult);
847 CHECK(std::equal(outputResult, outputResult + numElements, expectedOutput.begin(), expectedOutput.end()));
848
849
850 // Query the profiler again, this will contain the results of both inferences
851 profilerManager.GetProfiler()->AnalyzeEventsAndWriteResults(ss);
852 dump = ss.str();
853
854 // Contains Convolution2dWorkload
855 found = dump.find("Convolution2dWorkload");
856 CHECK(found != std::string::npos);
857
858 // Should now contain the SyncMemGeneric
859 found = dump.find("SyncMemGeneric");
860 CHECK(found != std::string::npos);
861
862 // Should still contain a CopyMemGeneric from the first inference
863 found = dump.find("CopyMemGeneric");
864 CHECK(found != std::string::npos);
865 runtime->UnloadNetwork(netId);
866}
867
Sadik Armagan1625efc2021-06-10 18:24:34 +0100868}