blob: f03d69d99bf56533ae933192ab88665f266192f1 [file] [log] [blame]
Mike Kellyb5fdf382019-06-11 16:35:25 +01001//
2// Copyright © 2017 Arm Ltd. All rights reserved.
3// SPDX-License-Identifier: MIT
4//
5
6#define LOG_TAG "ArmnnDriver"
7
8#include "ArmnnPreparedModel_1_2.hpp"
9#include "Utils.hpp"
10
11#include <boost/format.hpp>
12#include <log/log.h>
13#include <OperationsUtils.h>
14#include <ExecutionBurstServer.h>
15#include <ValidateHal.h>
16
17#include <cassert>
18#include <cinttypes>
19
20using namespace android;
21using namespace android::hardware;
22
23static const Timing g_NoTiming = {.timeOnDevice = UINT64_MAX, .timeInDriver = UINT64_MAX};
24
25namespace {
26
27using namespace armnn_driver;
28
29void NotifyCallbackAndCheck(const ::android::sp<V1_0::IExecutionCallback>& callback, ErrorStatus errorStatus,
30 std::string callingFunction)
31{
32 Return<void> returned = callback->notify(errorStatus);
33 // This check is required, if the callback fails and it isn't checked it will bring down the service
34 if (!returned.isOk())
35 {
36 ALOGE("ArmnnDriver::%s: hidl callback failed to return properly: %s",
37 callingFunction.c_str(), returned.description().c_str());
38 }
39}
40
41void NotifyCallbackAndCheck(const ::android::sp<V1_2::IExecutionCallback>& callback, ErrorStatus errorStatus,
42 std::string callingFunction)
43{
44 Return<void> returned = callback->notify(errorStatus);
45 // This check is required, if the callback fails and it isn't checked it will bring down the service
46 if (!returned.isOk())
47 {
48 ALOGE("ArmnnDriver::%s: hidl callback failed to return properly: %s",
49 callingFunction.c_str(), returned.description().c_str());
50 }
51}
52
53bool ValidateRequestArgument(const RequestArgument& requestArg, const armnn::TensorInfo& tensorInfo)
54{
55 if (requestArg.dimensions.size() != 0)
56 {
57 if (requestArg.dimensions.size() != tensorInfo.GetNumDimensions())
58 {
59 ALOGE("Mismatched dimensions (request argument: %zu, expected: %u)",
60 requestArg.dimensions.size(), tensorInfo.GetNumDimensions());
61 return false;
62 }
63
64 for (unsigned int d = 0; d < tensorInfo.GetNumDimensions(); ++d)
65 {
66 if (requestArg.dimensions[d] != tensorInfo.GetShape()[d])
67 {
68 ALOGE("Mismatched size for dimension %d (request argument: %u, expected %u)",
69 d, requestArg.dimensions[d], tensorInfo.GetShape()[d]);
70 return false;
71 }
72 }
73 }
74
75 return true;
76}
77
78armnn::Tensor GetTensorForRequestArgument(const RequestArgument& requestArg,
79 const armnn::TensorInfo& tensorInfo,
80 const std::vector<::android::nn::RunTimePoolInfo>& requestPools)
81{
82 if (!ValidateRequestArgument(requestArg, tensorInfo))
83 {
84 return armnn::Tensor();
85 }
86
87 return armnn::Tensor(tensorInfo, GetMemoryFromPool(requestArg.location, requestPools));
88}
89
90inline std::string BuildTensorName(const char* tensorNamePrefix, std::size_t index)
91{
92 return tensorNamePrefix + std::to_string(index);
93}
94
95} // anonymous namespace
96
97using namespace android::hardware;
98
99namespace armnn_driver
100{
101
102template<typename HalVersion>
103RequestThread<ArmnnPreparedModel_1_2, HalVersion> ArmnnPreparedModel_1_2<HalVersion>::m_RequestThread;
104
105template<typename HalVersion>
106template<typename TensorBindingCollection>
107void ArmnnPreparedModel_1_2<HalVersion>::DumpTensorsIfRequired(char const* tensorNamePrefix,
108 const TensorBindingCollection& tensorBindings)
109{
110 if (!m_RequestInputsAndOutputsDumpDir.empty())
111 {
112 const std::string requestName = boost::str(boost::format("%1%_%2%.dump") % m_NetworkId % m_RequestCount);
113 for (std::size_t i = 0u; i < tensorBindings.size(); ++i)
114 {
115 DumpTensor(m_RequestInputsAndOutputsDumpDir,
116 requestName,
117 BuildTensorName(tensorNamePrefix, i),
118 tensorBindings[i].second);
119 }
120 }
121}
122
123template<typename HalVersion>
124ArmnnPreparedModel_1_2<HalVersion>::ArmnnPreparedModel_1_2(armnn::NetworkId networkId,
125 armnn::IRuntime* runtime,
126 const V1_2::Model& model,
127 const std::string& requestInputsAndOutputsDumpDir,
128 const bool gpuProfilingEnabled)
129 : m_NetworkId(networkId)
130 , m_Runtime(runtime)
131 , m_Model(model)
132 , m_RequestCount(0)
133 , m_RequestInputsAndOutputsDumpDir(requestInputsAndOutputsDumpDir)
134 , m_GpuProfilingEnabled(gpuProfilingEnabled)
135{
136 // Enable profiling if required.
137 m_Runtime->GetProfiler(m_NetworkId)->EnableProfiling(m_GpuProfilingEnabled);
138}
139
140template<typename HalVersion>
141ArmnnPreparedModel_1_2<HalVersion>::~ArmnnPreparedModel_1_2()
142{
143 // Get a hold of the profiler used by this model.
144 std::shared_ptr<armnn::IProfiler> profiler = m_Runtime->GetProfiler(m_NetworkId);
145
146 // Unload the network associated with this model.
147 m_Runtime->UnloadNetwork(m_NetworkId);
148
149 // Dump the profiling info to a file if required.
150 DumpJsonProfilingIfRequired(m_GpuProfilingEnabled, m_RequestInputsAndOutputsDumpDir, m_NetworkId, profiler.get());
151}
152
153template<typename HalVersion>
154Return <ErrorStatus> ArmnnPreparedModel_1_2<HalVersion>::execute(const Request& request,
155 const ::android::sp<V1_0::IExecutionCallback>& callback)
156{
157 return Execute<V1_0::IExecutionCallback>(request, callback);
158}
159
160template<typename HalVersion>
161Return <ErrorStatus> ArmnnPreparedModel_1_2<HalVersion>::execute_1_2(const Request& request,
162 MeasureTiming,
163 const sp<V1_2::IExecutionCallback>& callback)
164{
165 return Execute<V1_2::IExecutionCallback>(request, callback);
166}
167
168template<typename HalVersion>
169Return<void> ArmnnPreparedModel_1_2<HalVersion>::executeSynchronously(const Request& request,
170 MeasureTiming,
171 V1_2::IPreparedModel::executeSynchronously_cb cb)
172{
173 ALOGV("ArmnnPreparedModel_1_2::executeSynchronously(): %s", GetModelSummary(m_Model).c_str());
174 m_RequestCount++;
175
176 if (cb == nullptr)
177 {
178 ALOGE("ArmnnPreparedModel_1_2::executeSynchronously invalid callback passed");
179 return Void();
180 }
181
182 if (!android::nn::validateRequest(request, m_Model))
183 {
184 cb(ErrorStatus::INVALID_ARGUMENT, {}, g_NoTiming);
185 return Void();
186 }
187
188 // allocate the tensors on the heap, as they are passed to the request thread
189 auto pInputTensors = std::make_shared<armnn::InputTensors>();
190 auto pOutputTensors = std::make_shared<armnn::OutputTensors>();
191
192 // map the memory pool into shared pointers
193 // use a shared memory pools vector on the heap, as it is passed to the request thread
194 auto pMemPools = std::make_shared<std::vector<android::nn::RunTimePoolInfo>>();
195
196 if (!setRunTimePoolInfosFromHidlMemories(pMemPools.get(), request.pools))
197 {
198 cb(ErrorStatus::GENERAL_FAILURE, {}, g_NoTiming);
199 return Void();
200 }
201
202 // add the inputs and outputs with their data
203 try
204 {
205 pInputTensors->reserve(request.inputs.size());
206 for (unsigned int i = 0; i < request.inputs.size(); i++)
207 {
208 const auto& inputArg = request.inputs[i];
209
210 const armnn::TensorInfo inputTensorInfo = m_Runtime->GetInputTensorInfo(m_NetworkId, i);
211 const armnn::Tensor inputTensor = GetTensorForRequestArgument(inputArg, inputTensorInfo, *pMemPools);
212
213 if (inputTensor.GetMemoryArea() == nullptr)
214 {
215 ALOGE("Cannot execute request. Error converting request input %u to tensor", i);
216 cb(ErrorStatus::GENERAL_FAILURE, {}, g_NoTiming);
217 return Void();
218 }
219
220 pInputTensors->emplace_back(i, inputTensor);
221 }
222
223 pOutputTensors->reserve(request.outputs.size());
224 for (unsigned int i = 0; i < request.outputs.size(); i++)
225 {
226 const auto& outputArg = request.outputs[i];
227
228 const armnn::TensorInfo outputTensorInfo = m_Runtime->GetOutputTensorInfo(m_NetworkId, i);
229 const armnn::Tensor outputTensor = GetTensorForRequestArgument(outputArg, outputTensorInfo, *pMemPools);
230
231 if (outputTensor.GetMemoryArea() == nullptr)
232 {
233 ALOGE("Cannot execute request. Error converting request output %u to tensor", i);
234 cb(ErrorStatus::GENERAL_FAILURE, {}, g_NoTiming);
235 return Void();
236 }
237
238 pOutputTensors->emplace_back(i, outputTensor);
239 }
240 }
241 catch (armnn::Exception& e)
242 {
243 ALOGW("armnn::Exception caught while preparing for EnqueueWorkload: %s", e.what());
244 cb(ErrorStatus::GENERAL_FAILURE, {}, g_NoTiming);
245 return Void();
246 }
247 ALOGV("ArmnnPreparedModel_1_2::executeSynchronously() before Execution");
248
249 DumpTensorsIfRequired("Input", *pInputTensors);
250
251 // run it
252 try
253 {
254 armnn::Status status = m_Runtime->EnqueueWorkload(m_NetworkId, *pInputTensors, *pOutputTensors);
255
256 if (status != armnn::Status::Success)
257 {
258 ALOGW("EnqueueWorkload failed");
259 cb(ErrorStatus::GENERAL_FAILURE, {}, g_NoTiming);
260 return Void();
261 }
262 }
263 catch (armnn::Exception& e)
264 {
265 ALOGW("armnn::Exception caught from EnqueueWorkload: %s", e.what());
266 cb(ErrorStatus::GENERAL_FAILURE, {}, g_NoTiming);
267 return Void();
268 }
269
270 DumpTensorsIfRequired("Output", *pOutputTensors);
271
272 // Commit output buffers.
273 // Note that we update *all* pools, even if they aren't actually used as outputs -
274 // this is simpler and is what the CpuExecutor does.
275 for (android::nn::RunTimePoolInfo& pool : *pMemPools)
276 {
277 pool.update();
278 }
279 ALOGV("ArmnnPreparedModel_1_2::executeSynchronously() after Execution");
280 cb(ErrorStatus::NONE, {}, g_NoTiming);
281 return Void();
282}
283
284template<typename HalVersion>
285Return<void> ArmnnPreparedModel_1_2<HalVersion>::configureExecutionBurst(
286 const sp<V1_2::IBurstCallback>& callback,
287 const MQDescriptorSync<V1_2::FmqRequestDatum>& requestChannel,
288 const MQDescriptorSync<V1_2::FmqResultDatum>& resultChannel,
289 V1_2::IPreparedModel::configureExecutionBurst_cb cb)
290{
291 ALOGV("ArmnnPreparedModel_1_2::configureExecutionBurst");
292 const sp<V1_2::IBurstContext> burst =
293 ExecutionBurstServer::create(callback, requestChannel, resultChannel, this);
294
295 if (burst == nullptr) {
296 cb(ErrorStatus::GENERAL_FAILURE, {});
297 } else {
298 cb(ErrorStatus::NONE, burst);
299 }
300 return Void();
301}
302
303template<typename HalVersion>
304void ArmnnPreparedModel_1_2<HalVersion>::ExecuteGraph(
305 std::shared_ptr<std::vector<::android::nn::RunTimePoolInfo>>& pMemPools,
306 std::shared_ptr<armnn::InputTensors>& pInputTensors,
307 std::shared_ptr<armnn::OutputTensors>& pOutputTensors,
308 const ::android::sp<V1_0::IExecutionCallback>& callback)
309{
310 ALOGV("ArmnnPreparedModel_1_2::ExecuteGraph(...)");
311
312 DumpTensorsIfRequired("Input", *pInputTensors);
313
314 // run it
315 try
316 {
317 armnn::Status status = m_Runtime->EnqueueWorkload(m_NetworkId, *pInputTensors, *pOutputTensors);
318 if (status != armnn::Status::Success)
319 {
320 ALOGW("EnqueueWorkload failed");
321 NotifyCallbackAndCheck(callback, ErrorStatus::GENERAL_FAILURE, "ArmnnPreparedModel_1_2::ExecuteGraph");
322 return;
323 }
324 }
325 catch (armnn::Exception& e)
326 {
327 ALOGW("armnn::Exception caught from EnqueueWorkload: %s", e.what());
328 NotifyCallbackAndCheck(callback, ErrorStatus::GENERAL_FAILURE, "ArmnnPreparedModel_1_2::ExecuteGraph");
329 return;
330 }
331
332 DumpTensorsIfRequired("Output", *pOutputTensors);
333
334 // Commit output buffers.
335 // Note that we update *all* pools, even if they aren't actually used as outputs -
336 // this is simpler and is what the CpuExecutor does.
337 for (android::nn::RunTimePoolInfo& pool : *pMemPools)
338 {
339 pool.update();
340 }
341
342 NotifyCallbackAndCheck(callback, ErrorStatus::NONE, "ExecuteGraph");
343}
344
345template<typename HalVersion>
346bool ArmnnPreparedModel_1_2<HalVersion>::ExecuteWithDummyInputs()
347{
348 std::vector<std::vector<char>> storage;
349 armnn::InputTensors inputTensors;
350 for (unsigned int i = 0; i < m_Model.inputIndexes.size(); i++)
351 {
352 const armnn::TensorInfo inputTensorInfo = m_Runtime->GetInputTensorInfo(m_NetworkId, i);
353 storage.emplace_back(inputTensorInfo.GetNumBytes());
354 const armnn::ConstTensor inputTensor(inputTensorInfo, storage.back().data());
355
356 inputTensors.emplace_back(i, inputTensor);
357 }
358
359 armnn::OutputTensors outputTensors;
360 for (unsigned int i = 0; i < m_Model.outputIndexes.size(); i++)
361 {
362 const armnn::TensorInfo outputTensorInfo = m_Runtime->GetOutputTensorInfo(m_NetworkId, i);
363 storage.emplace_back(outputTensorInfo.GetNumBytes());
364 const armnn::Tensor outputTensor(outputTensorInfo, storage.back().data());
365
366 outputTensors.emplace_back(i, outputTensor);
367 }
368
369 try
370 {
371 armnn::Status status = m_Runtime->EnqueueWorkload(m_NetworkId, inputTensors, outputTensors);
372 if (status != armnn::Status::Success)
373 {
374 ALOGW("ExecuteWithDummyInputs: EnqueueWorkload failed");
375 return false;
376 }
377 }
378 catch (armnn::Exception& e)
379 {
380 ALOGW("ExecuteWithDummyInputs: armnn::Exception caught from EnqueueWorkload: %s", e.what());
381 return false;
382 }
383 return true;
384}
385
386template<typename HalVersion>
387template<typename ExecutionCallback>
388Return <ErrorStatus> ArmnnPreparedModel_1_2<HalVersion>::Execute(const Request& request,
389 const sp<ExecutionCallback>& callback)
390{
391 ALOGV("ArmnnPreparedModel_1_2::execute(): %s", GetModelSummary(m_Model).c_str());
392 m_RequestCount++;
393
394 if (callback.get() == nullptr)
395 {
396 ALOGE("ArmnnPreparedModel_1_2::execute invalid callback passed");
397 return ErrorStatus::INVALID_ARGUMENT;
398 }
399
400 if (!android::nn::validateRequest(request, m_Model))
401 {
402 NotifyCallbackAndCheck(callback, ErrorStatus::INVALID_ARGUMENT, "ArmnnPreparedModel_1_2::execute");
403 return ErrorStatus::INVALID_ARGUMENT;
404 }
405
406 if (!m_RequestInputsAndOutputsDumpDir.empty())
407 {
408 ALOGD("Dumping inputs and outputs for request %" PRIuPTR, reinterpret_cast<std::uintptr_t>(callback.get()));
409 }
410
411 // allocate the tensors on the heap, as they are passed to the request thread
412 auto pInputTensors = std::make_shared<armnn::InputTensors>();
413 auto pOutputTensors = std::make_shared<armnn::OutputTensors>();
414
415 // map the memory pool into shared pointers
416 // use a shared memory pools vector on the heap, as it is passed to the request thread
417 auto pMemPools = std::make_shared<std::vector<android::nn::RunTimePoolInfo>>();
418
419 if (!setRunTimePoolInfosFromHidlMemories(pMemPools.get(), request.pools))
420 {
421 NotifyCallbackAndCheck(callback, ErrorStatus::GENERAL_FAILURE, "ArmnnPreparedModel_1_2::execute");
422 return ErrorStatus::GENERAL_FAILURE;
423 }
424
425 // add the inputs and outputs with their data
426 try
427 {
428 pInputTensors->reserve(request.inputs.size());
429 for (unsigned int i = 0; i < request.inputs.size(); i++)
430 {
431 const auto& inputArg = request.inputs[i];
432
433 const armnn::TensorInfo inputTensorInfo = m_Runtime->GetInputTensorInfo(m_NetworkId, i);
434 const armnn::Tensor inputTensor = GetTensorForRequestArgument(inputArg, inputTensorInfo, *pMemPools);
435
436 if (inputTensor.GetMemoryArea() == nullptr)
437 {
438 ALOGE("Cannot execute request. Error converting request input %u to tensor", i);
439 NotifyCallbackAndCheck(callback, ErrorStatus::GENERAL_FAILURE,
440 "ArmnnPreparedModel_1_2::execute");
441 return ErrorStatus::GENERAL_FAILURE;
442 }
443
444 pInputTensors->emplace_back(i, inputTensor);
445 }
446
447 pOutputTensors->reserve(request.outputs.size());
448 for (unsigned int i = 0; i < request.outputs.size(); i++)
449 {
450 const auto& outputArg = request.outputs[i];
451
452 const armnn::TensorInfo outputTensorInfo = m_Runtime->GetOutputTensorInfo(m_NetworkId, i);
453 const armnn::Tensor outputTensor = GetTensorForRequestArgument(outputArg, outputTensorInfo, *pMemPools);
454 if (outputTensor.GetMemoryArea() == nullptr)
455
456 {
457 ALOGE("Cannot execute request. Error converting request output %u to tensor", i);
458 NotifyCallbackAndCheck(callback, ErrorStatus::GENERAL_FAILURE,
459 "ArmnnPreparedModel_1_2::execute");
460 return ErrorStatus::GENERAL_FAILURE;
461 }
462
463 pOutputTensors->emplace_back(i, outputTensor);
464 }
465 }
466 catch (armnn::Exception& e)
467 {
468 ALOGW("armnn::Exception caught while preparing for EnqueueWorkload: %s", e.what());
469 NotifyCallbackAndCheck(callback, ErrorStatus::GENERAL_FAILURE, "ArmnnPreparedModel_1_2::execute");
470 return ErrorStatus::GENERAL_FAILURE;
471 }
472
473 ALOGV("ArmnnPreparedModel_1_2::execute(...) before PostMsg");
474 // post the request for asynchronous execution
475 m_RequestThread.PostMsg(this, pMemPools, pInputTensors, pOutputTensors, callback);
476 ALOGV("ArmnnPreparedModel_1_2::execute(...) after PostMsg");
477
478 return ErrorStatus::NONE;
479}
480
481
482#ifdef ARMNN_ANDROID_NN_V1_2
483template class ArmnnPreparedModel_1_2<hal_1_2::HalPolicy>;
484#endif
485
486} // namespace armnn_driver