blob: d338fdc8ac4556b915d60e0cb646cecb89c9a928 [file] [log] [blame]
telsoa015307bc12018-03-09 13:51:08 +00001//
2// Copyright © 2017 Arm Ltd. All rights reserved.
3// See LICENSE file in the project root for full license information.
4//
5
6#define LOG_TAG "ArmnnDriver"
7
8#include "ArmnnPreparedModel.hpp"
9#include "Utils.hpp"
10
11#include <boost/format.hpp>
12#include <log/log.h>
13#include <OperationsUtils.h>
14
surmeh01deb3bdb2018-07-05 12:06:04 +010015#if defined(ARMNN_ANDROID_P)
16// The headers of the ML framework have changed between Android O and Android P.
17// The validation functions have been moved into their own header, ValidateHal.h.
18#include <ValidateHal.h>
19#endif
20
telsoa015307bc12018-03-09 13:51:08 +000021#include <cassert>
22#include <cinttypes>
23
24using namespace android;
25
26namespace
27{
28using namespace armnn_driver;
29
30void NotifyCallbackAndCheck(const ::android::sp<IExecutionCallback>& callback, ErrorStatus errorStatus,
31 std::string callingFunction)
32{
33 Return<void> returned = callback->notify(errorStatus);
34 // This check is required, if the callback fails and it isn't checked it will bring down the service
35 if (!returned.isOk())
36 {
37 ALOGE("ArmnnDriver::%s: hidl callback failed to return properly: %s",
38 callingFunction.c_str(), returned.description().c_str());
39 }
40}
41
42bool ValidateRequestArgument(const RequestArgument& requestArg, const armnn::TensorInfo& tensorInfo)
43{
44 if (requestArg.dimensions.size() != 0)
45 {
46 if (requestArg.dimensions.size() != tensorInfo.GetNumDimensions())
47 {
48 ALOGE("Mismatched dimensions (request argument: %zu, expected: %u)",
49 requestArg.dimensions.size(), tensorInfo.GetNumDimensions());
50 return false;
51 }
52
53 for (unsigned int d = 0; d < tensorInfo.GetNumDimensions(); ++d)
54 {
55 if (requestArg.dimensions[d] != tensorInfo.GetShape()[d])
56 {
57 ALOGE("Mismatched size for dimension %d (request argument: %u, expected %u)",
58 d, requestArg.dimensions[d], tensorInfo.GetShape()[d]);
59 return false;
60 }
61 }
62 }
63
64 return true;
65}
66
67armnn::Tensor GetTensorForRequestArgument(const RequestArgument& requestArg,
68 const armnn::TensorInfo& tensorInfo,
69 const std::vector<::android::nn::RunTimePoolInfo>& requestPools)
70{
71 if (!ValidateRequestArgument(requestArg, tensorInfo))
72 {
73 return armnn::Tensor();
74 }
75
76 return armnn::Tensor(tensorInfo, GetMemoryFromPool(requestArg.location, requestPools));
77}
78
79inline std::string BuildTensorName(const char* tensorNamePrefix, std::size_t index)
80{
81 return tensorNamePrefix + std::to_string(index);
82}
83
84}
85
telsoa01ce3e84a2018-08-31 09:31:35 +010086using namespace android::hardware;
87
telsoa015307bc12018-03-09 13:51:08 +000088namespace armnn_driver
89{
90
91RequestThread ArmnnPreparedModel::m_RequestThread;
92
93template <typename TensorBindingCollection>
94void ArmnnPreparedModel::DumpTensorsIfRequired(char const* tensorNamePrefix,
95 const TensorBindingCollection& tensorBindings)
96{
97 if (!m_RequestInputsAndOutputsDumpDir.empty())
98 {
99 const std::string requestName = boost::str(boost::format("%1%_%2%.dump") % m_NetworkId % m_RequestCount);
100 for (std::size_t i = 0u; i < tensorBindings.size(); ++i)
101 {
102 DumpTensor(m_RequestInputsAndOutputsDumpDir,
103 requestName,
104 BuildTensorName(tensorNamePrefix, i),
105 tensorBindings[i].second);
106 }
107 }
108}
109
110ArmnnPreparedModel::ArmnnPreparedModel(armnn::NetworkId networkId,
telsoa01ce3e84a2018-08-31 09:31:35 +0100111 armnn::IRuntime* runtime,
112 const neuralnetworks::V1_0::Model& model,
113 const std::string& requestInputsAndOutputsDumpDir,
114 const bool gpuProfilingEnabled)
115 : m_NetworkId(networkId)
116 , m_Runtime(runtime)
117 , m_Model(model)
118 , m_RequestCount(0)
119 , m_RequestInputsAndOutputsDumpDir(requestInputsAndOutputsDumpDir)
120 , m_GpuProfilingEnabled(gpuProfilingEnabled)
telsoa015307bc12018-03-09 13:51:08 +0000121{
telsoa01ce3e84a2018-08-31 09:31:35 +0100122 // Enable profiling if required.
123 m_Runtime->GetProfiler(m_NetworkId)->EnableProfiling(m_GpuProfilingEnabled);
telsoa015307bc12018-03-09 13:51:08 +0000124}
125
126ArmnnPreparedModel::~ArmnnPreparedModel()
127{
telsoa01ce3e84a2018-08-31 09:31:35 +0100128 // Get a hold of the profiler used by this model.
129 std::shared_ptr<armnn::IProfiler> profiler = m_Runtime->GetProfiler(m_NetworkId);
130
131 // Unload the network associated with this model.
telsoa015307bc12018-03-09 13:51:08 +0000132 m_Runtime->UnloadNetwork(m_NetworkId);
telsoa01ce3e84a2018-08-31 09:31:35 +0100133
134 // Dump the profiling info to a file if required.
135 DumpJsonProfilingIfRequired(m_GpuProfilingEnabled, m_RequestInputsAndOutputsDumpDir, m_NetworkId, profiler.get());
telsoa015307bc12018-03-09 13:51:08 +0000136}
137
138Return<ErrorStatus> ArmnnPreparedModel::execute(const Request& request,
139 const ::android::sp<IExecutionCallback>& callback)
140{
141 ALOGV("ArmnnPreparedModel::execute(): %s", GetModelSummary(m_Model).c_str());
142 m_RequestCount++;
143
144 if (callback.get() == nullptr) {
145 ALOGE("ArmnnPreparedModel::execute invalid callback passed");
146 return ErrorStatus::INVALID_ARGUMENT;
147 }
148
149 if (!android::nn::validateRequest(request, m_Model))
150 {
151 NotifyCallbackAndCheck(callback, ErrorStatus::INVALID_ARGUMENT, "ArmnnPreparedModel::execute");
152 return ErrorStatus::INVALID_ARGUMENT;
153 }
154
155 if (!m_RequestInputsAndOutputsDumpDir.empty())
156 {
157 ALOGD("Dumping inputs and outputs for request %" PRIuPTR, reinterpret_cast<std::uintptr_t>(callback.get()));
158 }
159
160 // allocate the tensors on the heap, as they are passed to the request thread
161 auto pInputTensors = std::make_shared<armnn::InputTensors>();
162 auto pOutputTensors = std::make_shared<armnn::OutputTensors>();
163
164 // map the memory pool into shared pointers
165 // use a shared memory pools vector on the heap, as it is passed to the request thread
166 auto pMemPools = std::make_shared<std::vector<android::nn::RunTimePoolInfo>>();
167 if (!setRunTimePoolInfosFromHidlMemories(pMemPools.get(), request.pools))
168 {
169 NotifyCallbackAndCheck(callback, ErrorStatus::GENERAL_FAILURE, "ArmnnPreparedModel::execute");
170 return ErrorStatus::GENERAL_FAILURE;
171 }
172
173 // add the inputs and outputs with their data
174 try
175 {
176 pInputTensors->reserve(request.inputs.size());
177 for (unsigned int i = 0; i < request.inputs.size(); i++)
178 {
179 const auto& inputArg = request.inputs[i];
180
181 const armnn::TensorInfo inputTensorInfo = m_Runtime->GetInputTensorInfo(m_NetworkId, i);
182 const armnn::Tensor inputTensor = GetTensorForRequestArgument(inputArg, inputTensorInfo, *pMemPools);
183 if (inputTensor.GetMemoryArea() == nullptr)
184 {
185 ALOGE("Cannot execute request. Error converting request input %u to tensor", i);
186 return ErrorStatus::GENERAL_FAILURE;
187 }
188
189 pInputTensors->emplace_back(i, inputTensor);
190 }
191
192 pOutputTensors->reserve(request.outputs.size());
193 for (unsigned int i = 0; i < request.outputs.size(); i++)
194 {
195 const auto& outputArg = request.outputs[i];
196
197 const armnn::TensorInfo outputTensorInfo = m_Runtime->GetOutputTensorInfo(m_NetworkId, i);
198 const armnn::Tensor outputTensor = GetTensorForRequestArgument(outputArg, outputTensorInfo, *pMemPools);
199 if (outputTensor.GetMemoryArea() == nullptr)
200 {
201 ALOGE("Cannot execute request. Error converting request output %u to tensor", i);
202 return ErrorStatus::GENERAL_FAILURE;
203 }
204
205 pOutputTensors->emplace_back(i, outputTensor);
206 }
207 }
208 catch (armnn::Exception& e)
209 {
210 ALOGW("armnn::Exception caught while preparing for EnqueueWorkload: %s", e.what());
211 NotifyCallbackAndCheck(callback, ErrorStatus::GENERAL_FAILURE, "ArmnnPreparedModel::execute");
212 return ErrorStatus::GENERAL_FAILURE;
213 }
214
215 ALOGV("ArmnnPreparedModel::execute(...) before PostMsg");
216 // post the request for asynchronous execution
217 m_RequestThread.PostMsg(this, pMemPools, pInputTensors, pOutputTensors, callback);
218 ALOGV("ArmnnPreparedModel::execute(...) after PostMsg");
219
220 return ErrorStatus::NONE; // successfully queued
221}
222
223void ArmnnPreparedModel::ExecuteGraph(std::shared_ptr<std::vector<::android::nn::RunTimePoolInfo>>& pMemPools,
224 std::shared_ptr<armnn::InputTensors>& pInputTensors,
225 std::shared_ptr<armnn::OutputTensors>& pOutputTensors,
226 const ::android::sp<IExecutionCallback>& callback)
227{
228 ALOGV("ArmnnPreparedModel::ExecuteGraph(...)");
229
230 DumpTensorsIfRequired("Input", *pInputTensors);
231
232 // run it
233 try
234 {
235 m_Runtime->EnqueueWorkload(m_NetworkId, *pInputTensors, *pOutputTensors);
236 }
237 catch (armnn::Exception& e)
238 {
239 ALOGW("armnn::Exception caught from EnqueueWorkload: %s", e.what());
240 NotifyCallbackAndCheck(callback, ErrorStatus::GENERAL_FAILURE, "ArmnnPreparedModel::ExecuteGraph");
241 return;
242 }
243
244 DumpTensorsIfRequired("Output", *pOutputTensors);
245
246 // Commit output buffers.
247 // Note that we update *all* pools, even if they aren't actually used as outputs -
248 // this is simpler and is what the CpuExecutor does.
249 for (android::nn::RunTimePoolInfo& pool : *pMemPools)
250 {
251 pool.update();
252 }
253
254 NotifyCallbackAndCheck(callback, ErrorStatus::NONE, "ExecuteGraph");
255}
256
257void ArmnnPreparedModel::ExecuteWithDummyInputs()
258{
259 std::vector<std::vector<char>> storage;
260 armnn::InputTensors inputTensors;
261 for (unsigned int i = 0; i < m_Model.inputIndexes.size(); i++)
262 {
263 const armnn::TensorInfo inputTensorInfo = m_Runtime->GetInputTensorInfo(m_NetworkId, i);
264 storage.emplace_back(inputTensorInfo.GetNumBytes());
265 const armnn::ConstTensor inputTensor(inputTensorInfo, storage.back().data());
266
267 inputTensors.emplace_back(i, inputTensor);
268 }
269
270 armnn::OutputTensors outputTensors;
271 for (unsigned int i = 0; i < m_Model.outputIndexes.size(); i++)
272 {
273 const armnn::TensorInfo outputTensorInfo = m_Runtime->GetOutputTensorInfo(m_NetworkId, i);
274 storage.emplace_back(outputTensorInfo.GetNumBytes());
275 const armnn::Tensor outputTensor(outputTensorInfo, storage.back().data());
276
277 outputTensors.emplace_back(i, outputTensor);
278 }
279
280 try
281 {
282 m_Runtime->EnqueueWorkload(m_NetworkId, inputTensors, outputTensors);
283 }
284 catch (armnn::Exception& e)
285 {
286 ALOGW("ExecuteWithDummyInputs: armnn::Exception caught from EnqueueWorkload: %s", e.what());
287 }
288}
289
telsoa01ce3e84a2018-08-31 09:31:35 +0100290} // namespace armnn_driver