blob: 18085edab515814bda5f58bb7a671bc1c0452152 [file] [log] [blame]
telsoa014fcda012018-03-09 14:13:49 +00001//
2// Copyright © 2017 Arm Ltd. All rights reserved.
David Beckecb56cd2018-09-05 12:52:57 +01003// SPDX-License-Identifier: MIT
telsoa014fcda012018-03-09 14:13:49 +00004//
5
Nattapat Chaimanowong77140882018-10-17 11:12:19 +01006#include "NeonDepthwiseConvolutionWorkload.hpp"
7
Matthew Benthamd80a7122019-01-08 17:52:37 +00008#include "NeonWorkloadUtils.hpp"
9
10#include <DataLayoutIndexed.hpp>
Aron Virginas-Tarc9cc8042018-11-01 16:15:57 +000011#include <aclCommon/ArmComputeTensorUtils.hpp>
12#include <neon/NeonLayerSupport.hpp>
13#include <backendsCommon/CpuTensorHandle.hpp>
Matteo Martincigh747ef822018-12-18 09:26:39 +000014#include <backendsCommon/WorkloadUtils.hpp>
telsoa014fcda012018-03-09 14:13:49 +000015
Matthew Benthamd80a7122019-01-08 17:52:37 +000016#include <arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h>
17
18using namespace armnnUtils;
19
telsoa014fcda012018-03-09 14:13:49 +000020namespace armnn
21{
Nattapat Chaimanowong77140882018-10-17 11:12:19 +010022
telsoa014fcda012018-03-09 14:13:49 +000023using namespace armcomputetensorutils;
24
Nattapat Chaimanowong77140882018-10-17 11:12:19 +010025arm_compute::Status NeonDepthwiseConvolutionWorkloadValidate(const TensorInfo& input,
Matteo Martincigh747ef822018-12-18 09:26:39 +000026 const TensorInfo& output,
27 const DepthwiseConvolution2dDescriptor& descriptor,
28 const TensorInfo& weights,
29 const Optional<TensorInfo>& biases)
Nattapat Chaimanowong77140882018-10-17 11:12:19 +010030{
Matteo Martincigh747ef822018-12-18 09:26:39 +000031 const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input, descriptor.m_DataLayout);
32 const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output, descriptor.m_DataLayout);
33
34 // ArmNN's weight format is [ M, I, H, W ]
35 const unsigned int aclDepthMultiplier = weights.GetShape()[0];
36
37 // Convert the weight format from ArmNN's [ M, I, H, W ] (does NOT depend on the data layout) to either
38 // [ 1, H, W, I * M ] (if NHWC) or [ 1, I * M, H, W ] (if NCHW), as required by the compute library
39 TensorInfo weightsPermuted = ConvertWeightTensorInfoFromArmnnToAcl(weights, descriptor.m_DataLayout);
40
41 // Convert the weights into the compute library format
42 const arm_compute::TensorInfo aclWeightsInfo = BuildArmComputeTensorInfo(weightsPermuted, descriptor.m_DataLayout);
Nattapat Chaimanowong77140882018-10-17 11:12:19 +010043
44 arm_compute::TensorInfo aclBiasesInfo;
45 arm_compute::TensorInfo *optionalAclBiasesInfo = nullptr;
46
47 if (descriptor.m_BiasEnabled)
48 {
49 BOOST_ASSERT(biases.has_value());
50
51 aclBiasesInfo = BuildArmComputeTensorInfo(biases.value(), descriptor.m_DataLayout);
52 optionalAclBiasesInfo = &aclBiasesInfo;
53 }
54
Pablo Tellof0bd6832019-04-26 17:58:13 +010055 arm_compute::PadStrideInfo aclPadStrideInfo = BuildArmComputePadStrideInfo(descriptor);
56 const arm_compute::Size2D aclDilationInfo = BuildArmComputeSize2D(
57 descriptor.m_DilationX,descriptor.m_DilationY);
Nattapat Chaimanowong77140882018-10-17 11:12:19 +010058
59 return arm_compute::NEDepthwiseConvolutionLayer::validate(&aclInputInfo,
60 &aclWeightsInfo,
61 optionalAclBiasesInfo,
62 &aclOutputInfo,
63 aclPadStrideInfo,
Pablo Tellof0bd6832019-04-26 17:58:13 +010064 aclDepthMultiplier,
65 arm_compute::ActivationLayerInfo(),
66 aclDilationInfo);
Nattapat Chaimanowong77140882018-10-17 11:12:19 +010067}
68
69NeonDepthwiseConvolutionWorkload::NeonDepthwiseConvolutionWorkload(
telsoa014fcda012018-03-09 14:13:49 +000070 const DepthwiseConvolution2dQueueDescriptor& descriptor,
71 const WorkloadInfo& info)
Nattapat Chaimanowong77140882018-10-17 11:12:19 +010072 : BaseWorkload<DepthwiseConvolution2dQueueDescriptor>(descriptor, info)
telsoa014fcda012018-03-09 14:13:49 +000073{
Matteo Martincigh747ef822018-12-18 09:26:39 +000074 // ArmNN's weight format is [ M, I, H, W ]
75 auto& weightInfo = m_Data.m_Weight->GetTensorInfo();
telsoa014fcda012018-03-09 14:13:49 +000076
Matteo Martincigh747ef822018-12-18 09:26:39 +000077 // Allocate a buffer for the swizzling of the weight tensor
78 std::unique_ptr<unsigned char[]> permuteBuffer(new unsigned char[m_Data.m_Weight->GetTensorInfo().GetNumBytes()]);
79
80 // Convert the weight format from ArmNN's [ M, I, H, W ] (does NOT depend on the data layout) to either
81 // [ 1, H, W, I * M ] (if NHWC) or [ 1, I * M, H, W ] (if NCHW), as required by the compute library
82 ConstTensor weightPermuted = ConvertWeightTensorFromArmnnToAcl(m_Data.m_Weight,
83 m_Data.m_Parameters.m_DataLayout,
84 permuteBuffer.get());
85
86 // Convert the weights into the compute library format
telsoa01c577f2c2018-08-31 09:22:23 +010087 m_KernelTensor = std::make_unique<arm_compute::Tensor>();
Matteo Martincigh747ef822018-12-18 09:26:39 +000088 BuildArmComputeTensor(*m_KernelTensor, weightPermuted.GetInfo(), m_Data.m_Parameters.m_DataLayout);
Mohamed Nour Abouelseoud7e7261e2018-11-27 17:35:35 +000089
telsoa014fcda012018-03-09 14:13:49 +000090 if (m_Data.m_Parameters.m_BiasEnabled)
91 {
telsoa01c577f2c2018-08-31 09:22:23 +010092 m_BiasTensor = std::make_unique<arm_compute::Tensor>();
Nikhil Rajcec6b652018-10-12 13:51:57 +010093 BuildArmComputeTensor(*m_BiasTensor, m_Data.m_Bias->GetTensorInfo(), m_Data.m_Parameters.m_DataLayout);
telsoa014fcda012018-03-09 14:13:49 +000094 }
95
Pablo Tellof0bd6832019-04-26 17:58:13 +010096 const arm_compute::Size2D aclDilationInfo = BuildArmComputeSize2D(
97 m_Data.m_Parameters.m_DilationX, m_Data.m_Parameters.m_DilationY);
98
Nattapat Chaimanowong77140882018-10-17 11:12:19 +010099 m_Data.ValidateInputsOutputs("NeonDepthwiseConvolutionWorkload", 1, 1);
telsoa014fcda012018-03-09 14:13:49 +0000100
Derek Lambertic81855f2019-06-13 17:34:19 +0100101 IAclTensorHandle* inputTensorHandle = static_cast<IAclTensorHandle*>(m_Data.m_Inputs[0]);
102 IAclTensorHandle* outputTensorHandle = static_cast<IAclTensorHandle*>(m_Data.m_Outputs[0]);
Matteo Martincigh747ef822018-12-18 09:26:39 +0000103
Mohamed Nour Abouelseoud7e7261e2018-11-27 17:35:35 +0000104 arm_compute::ITensor& input = inputTensorHandle->GetTensor();
105 arm_compute::ITensor& output = outputTensorHandle->GetTensor();
telsoa014fcda012018-03-09 14:13:49 +0000106
Nikhil Rajcec6b652018-10-12 13:51:57 +0100107 arm_compute::DataLayout aclDataLayout = ConvertDataLayout(m_Data.m_Parameters.m_DataLayout);
108 input.info()->set_data_layout(aclDataLayout);
109 output.info()->set_data_layout(aclDataLayout);
110
Bruno Goncalves22972f02019-04-26 21:03:24 -0300111 // Get the depth multiplier
Matteo Martincigh747ef822018-12-18 09:26:39 +0000112 const unsigned int depthMultiplier = weightInfo.GetShape()[0];
Mohamed Nour Abouelseoud7e7261e2018-11-27 17:35:35 +0000113
Aron Virginas-Tar6f3785d2019-07-22 15:30:22 +0100114 arm_compute::PadStrideInfo padStrideInfo = BuildArmComputePadStrideInfo(m_Data.m_Parameters);
115
Matthew Jackson5488cfa2019-08-02 14:53:10 +0100116 const arm_compute::ITensorInfo* inputInfo = input.info();
117 const arm_compute::ITensorInfo* kernelInfo = m_KernelTensor->info();
118 const arm_compute::ITensorInfo* biasInfo = m_BiasTensor ? m_BiasTensor->info() : nullptr;
119 const arm_compute::ITensorInfo* outputInfo = output.info();
Pablo Tellob38ed402019-07-26 13:45:48 +0100120
Matthew Jackson5488cfa2019-08-02 14:53:10 +0100121 // Check for optimisation opportunities
122 arm_compute::Status optimizationStatus =
123 arm_compute::NEDepthwiseConvolutionLayerOptimized::validate(inputInfo,
124 kernelInfo,
125 biasInfo,
126 outputInfo,
127 padStrideInfo,
128 depthMultiplier,
129 arm_compute::ActivationLayerInfo(),
130 aclDilationInfo);
131
132 if (optimizationStatus.error_code() == arm_compute::ErrorCode::OK)
telsoa014fcda012018-03-09 14:13:49 +0000133 {
Aron Virginas-Tar974e5b62019-07-11 14:57:01 +0100134 m_pDepthwiseConvolutionLayer = std::make_unique<arm_compute::NEDepthwiseConvolutionLayerOptimized>();
135 static_cast<arm_compute::NEDepthwiseConvolutionLayerOptimized*>(
telsoa014fcda012018-03-09 14:13:49 +0000136 m_pDepthwiseConvolutionLayer.get())->configure(&input,
telsoa01c577f2c2018-08-31 09:22:23 +0100137 m_KernelTensor.get(),
138 m_BiasTensor.get(),
telsoa014fcda012018-03-09 14:13:49 +0000139 &output,
Matteo Martincigh747ef822018-12-18 09:26:39 +0000140 padStrideInfo,
Pablo Tellof0bd6832019-04-26 17:58:13 +0100141 depthMultiplier,
142 arm_compute::ActivationLayerInfo(),
143 aclDilationInfo);
telsoa014fcda012018-03-09 14:13:49 +0000144 }
145 else
146 {
147 m_pDepthwiseConvolutionLayer = std::make_unique<arm_compute::NEDepthwiseConvolutionLayer>();
148 static_cast<arm_compute::NEDepthwiseConvolutionLayer*>(
149 m_pDepthwiseConvolutionLayer.get())->configure(&input,
telsoa01c577f2c2018-08-31 09:22:23 +0100150 m_KernelTensor.get(),
151 m_BiasTensor.get(),
telsoa014fcda012018-03-09 14:13:49 +0000152 &output,
Matteo Martincigh747ef822018-12-18 09:26:39 +0000153 padStrideInfo,
Pablo Tellof0bd6832019-04-26 17:58:13 +0100154 depthMultiplier,
155 arm_compute::ActivationLayerInfo(),
156 aclDilationInfo);
telsoa014fcda012018-03-09 14:13:49 +0000157 }
158
159 BOOST_ASSERT(m_pDepthwiseConvolutionLayer);
160
Matteo Martincigh747ef822018-12-18 09:26:39 +0000161 ScopedCpuTensorHandle weightsPermutedHandle(weightPermuted);
162 InitializeArmComputeTensorData(*m_KernelTensor, &weightsPermutedHandle);
telsoa014fcda012018-03-09 14:13:49 +0000163
Mohamed Nour Abouelseoud7e7261e2018-11-27 17:35:35 +0000164 if (m_Data.m_Parameters.m_BiasEnabled)
telsoa014fcda012018-03-09 14:13:49 +0000165 {
Nattapat Chaimanowong177d8d22018-10-16 13:21:27 +0100166 InitializeArmComputeTensorData(*m_BiasTensor, m_Data.m_Bias);
telsoa014fcda012018-03-09 14:13:49 +0000167 }
telsoa01c577f2c2018-08-31 09:22:23 +0100168
169 m_pDepthwiseConvolutionLayer->prepare();
170 FreeUnusedTensors();
telsoa014fcda012018-03-09 14:13:49 +0000171}
172
Nattapat Chaimanowong77140882018-10-17 11:12:19 +0100173void NeonDepthwiseConvolutionWorkload::Execute() const
telsoa014fcda012018-03-09 14:13:49 +0000174{
Nattapat Chaimanowong77140882018-10-17 11:12:19 +0100175 ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonDepthwiseConvolutionWorkload_Execute");
telsoa014fcda012018-03-09 14:13:49 +0000176 BOOST_ASSERT(m_pDepthwiseConvolutionLayer);
177
178 m_pDepthwiseConvolutionLayer->run();
179}
180
Nattapat Chaimanowong77140882018-10-17 11:12:19 +0100181void NeonDepthwiseConvolutionWorkload::FreeUnusedTensors()
telsoa01c577f2c2018-08-31 09:22:23 +0100182{
183 FreeTensorIfUnused(m_KernelTensor);
184 FreeTensorIfUnused(m_BiasTensor);
185}
186
telsoa014fcda012018-03-09 14:13:49 +0000187} //namespace armnn