blob: e681e95d90194a371124f8e0819997e99e64eda9 [file] [log] [blame]
telsoa01c577f2c2018-08-31 09:22:23 +01001//
2// Copyright © 2017 Arm Ltd. All rights reserved.
David Beckecb56cd2018-09-05 12:52:57 +01003// SPDX-License-Identifier: MIT
telsoa01c577f2c2018-08-31 09:22:23 +01004//
5
Matthew Benthamd8777392018-10-08 09:38:55 +01006#include "ClDepthwiseConvolutionWorkload.hpp"
telsoa01c577f2c2018-08-31 09:22:23 +01007
Aron Virginas-Tard4f0fea2019-04-09 14:08:06 +01008#include <ResolveType.hpp>
Matthew Benthamd8777392018-10-08 09:38:55 +01009#include "ClWorkloadUtils.hpp"
telsoa01c577f2c2018-08-31 09:22:23 +010010
Aron Virginas-Tarc9cc8042018-11-01 16:15:57 +000011#include <aclCommon/ArmComputeUtils.hpp>
12#include <aclCommon/ArmComputeTensorUtils.hpp>
13#include <cl/ClTensorHandle.hpp>
14#include <backendsCommon/CpuTensorHandle.hpp>
Matteo Martincigh747ef822018-12-18 09:26:39 +000015#include <backendsCommon/WorkloadUtils.hpp>
telsoa01c577f2c2018-08-31 09:22:23 +010016
Matthew Benthamd8777392018-10-08 09:38:55 +010017#include <arm_compute/runtime/CL/functions/CLDepthwiseConvolutionLayer.h>
18
telsoa01c577f2c2018-08-31 09:22:23 +010019namespace armnn
20{
21
22using namespace armcomputetensorutils;
23
24arm_compute::Status ClDepthwiseConvolutionWorkloadValidate(const TensorInfo& input,
Matteo Martincigh747ef822018-12-18 09:26:39 +000025 const TensorInfo& output,
26 const DepthwiseConvolution2dDescriptor& descriptor,
27 const TensorInfo& weights,
28 const Optional<TensorInfo>& biases)
telsoa01c577f2c2018-08-31 09:22:23 +010029{
Matteo Martincigh747ef822018-12-18 09:26:39 +000030 const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input, descriptor.m_DataLayout);
Nikhil Raja05c2102018-09-25 16:16:13 +010031 const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output, descriptor.m_DataLayout);
Matteo Martincigh747ef822018-12-18 09:26:39 +000032
33 // ArmNN's weight format is [ M, I, H, W ]
34 const unsigned int aclDepthMultiplier = weights.GetShape()[0];
35
36 // Convert the weight format from ArmNN's [ M, I, H, W ] (does NOT depend on the data layout) to either
37 // [ 1, H, W, I * M ] (if NHWC) or [ 1, I * M, H, W ] (if NCHW), as required by the compute library
38 TensorInfo weightsPermuted = ConvertWeightTensorInfoFromArmnnToAcl(weights, descriptor.m_DataLayout);
39
40 // Convert the weights into the compute library format
41 const arm_compute::TensorInfo aclWeightsInfo = BuildArmComputeTensorInfo(weightsPermuted, descriptor.m_DataLayout);
telsoa01c577f2c2018-08-31 09:22:23 +010042
43 arm_compute::TensorInfo aclBiasesInfo;
44 arm_compute::TensorInfo *optionalAclBiasesInfo = nullptr;
arovir01a6824102018-08-28 17:40:45 +010045
telsoa01c577f2c2018-08-31 09:22:23 +010046 if (descriptor.m_BiasEnabled)
47 {
David Beck5eec11d2018-10-04 15:43:17 +010048 BOOST_ASSERT(biases.has_value());
arovir01a6824102018-08-28 17:40:45 +010049
David Beck5eec11d2018-10-04 15:43:17 +010050 aclBiasesInfo = BuildArmComputeTensorInfo(biases.value(), descriptor.m_DataLayout);
telsoa01c577f2c2018-08-31 09:22:23 +010051 optionalAclBiasesInfo = &aclBiasesInfo;
52 }
53
54 const arm_compute::PadStrideInfo aclPadStrideInfo = BuildArmComputePadStrideInfo(descriptor);
Pablo Tellof0bd6832019-04-26 17:58:13 +010055 const arm_compute::Size2D aclDilationInfo = BuildArmComputeSize2D(
56 descriptor.m_DilationX,
57 descriptor.m_DilationY);
telsoa01c577f2c2018-08-31 09:22:23 +010058
59 return arm_compute::CLDepthwiseConvolutionLayer::validate(&aclInputInfo,
60 &aclWeightsInfo,
61 optionalAclBiasesInfo,
62 &aclOutputInfo,
63 aclPadStrideInfo,
Pablo Tellof0bd6832019-04-26 17:58:13 +010064 aclDepthMultiplier,
65 arm_compute::ActivationLayerInfo(),
66 aclDilationInfo);
67
telsoa01c577f2c2018-08-31 09:22:23 +010068}
69
Matthew Benthamd8777392018-10-08 09:38:55 +010070ClDepthwiseConvolutionWorkload::ClDepthwiseConvolutionWorkload(
telsoa01c577f2c2018-08-31 09:22:23 +010071 const DepthwiseConvolution2dQueueDescriptor& descriptor,
72 const WorkloadInfo& info)
Matthew Benthamd8777392018-10-08 09:38:55 +010073 : BaseWorkload<DepthwiseConvolution2dQueueDescriptor>(descriptor, info)
telsoa01c577f2c2018-08-31 09:22:23 +010074{
Matteo Martincigh747ef822018-12-18 09:26:39 +000075 // Allocate a buffer for the swizzling of the weight tensor
76 std::unique_ptr<unsigned char[]> permuteBuffer(new unsigned char[m_Data.m_Weight->GetTensorInfo().GetNumBytes()]);
telsoa01c577f2c2018-08-31 09:22:23 +010077
Matteo Martincigh747ef822018-12-18 09:26:39 +000078 // Convert the weight format from ArmNN's [ M, I, H, W ] (does NOT depend on the data layout) to either
79 // [ 1, H, W, I * M ] (if NHWC) or [ 1, I * M, H, W ] (if NCHW), as required by the compute library
80 ConstTensor weightPermuted = ConvertWeightTensorFromArmnnToAcl(m_Data.m_Weight,
81 m_Data.m_Parameters.m_DataLayout,
82 permuteBuffer.get());
83
84 // Convert the weights into the compute library format
telsoa01c577f2c2018-08-31 09:22:23 +010085 m_KernelTensor = std::make_unique<arm_compute::CLTensor>();
Matteo Martincigh747ef822018-12-18 09:26:39 +000086 BuildArmComputeTensor(*m_KernelTensor, weightPermuted.GetInfo(), m_Data.m_Parameters.m_DataLayout);
telsoa01c577f2c2018-08-31 09:22:23 +010087
88 if (m_Data.m_Parameters.m_BiasEnabled)
89 {
90 m_BiasTensor = std::make_unique<arm_compute::CLTensor>();
Nikhil Rajcec6b652018-10-12 13:51:57 +010091 BuildArmComputeTensor(*m_BiasTensor, m_Data.m_Bias->GetTensorInfo(), m_Data.m_Parameters.m_DataLayout);
telsoa01c577f2c2018-08-31 09:22:23 +010092 }
93
Pablo Tellof0bd6832019-04-26 17:58:13 +010094 const arm_compute::PadStrideInfo padStrideInfo(m_Data.m_Parameters.m_StrideX,
telsoa01c577f2c2018-08-31 09:22:23 +010095 m_Data.m_Parameters.m_StrideY,
96 m_Data.m_Parameters.m_PadLeft,
97 m_Data.m_Parameters.m_PadRight,
98 m_Data.m_Parameters.m_PadTop,
99 m_Data.m_Parameters.m_PadBottom,
100 arm_compute::DimensionRoundingType::FLOOR);
101
Pablo Tellof0bd6832019-04-26 17:58:13 +0100102 const arm_compute::Size2D aclDilationInfo = BuildArmComputeSize2D(
103 m_Data.m_Parameters.m_DilationX,
104 m_Data.m_Parameters.m_DilationY);
105
106
Matthew Benthamd8777392018-10-08 09:38:55 +0100107 std::string name = std::string("ClDepthwiseConvolutionWorkload");
telsoa01c577f2c2018-08-31 09:22:23 +0100108 m_Data.ValidateInputsOutputs(name, 1, 1);
109
110 arm_compute::ICLTensor& input = static_cast<IClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
111 arm_compute::ICLTensor& output = static_cast<IClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
112
Nikhil Rajcec6b652018-10-12 13:51:57 +0100113 arm_compute::DataLayout aclDataLayout = ConvertDataLayout(m_Data.m_Parameters.m_DataLayout);
114 input.info()->set_data_layout(aclDataLayout);
115 output.info()->set_data_layout(aclDataLayout);
116
Matteo Martincigh747ef822018-12-18 09:26:39 +0000117 // ArmNN's weight format is [ M, I, H, W ]
118 auto& weightInfo = m_Data.m_Weight->GetTensorInfo();
119
120 // Get the depth multiplier
telsoa01c577f2c2018-08-31 09:22:23 +0100121 const unsigned int depthMultiplier = weightInfo.GetShape()[0];
122
Pablo Tellof0bd6832019-04-26 17:58:13 +0100123
Matteo Martincigh747ef822018-12-18 09:26:39 +0000124 // Check for optimisation opportunities.
125 bool use3x3Optimisation = (weightInfo.GetShape()[2] == 3) && (weightInfo.GetShape()[3] == 3);
telsoa01c577f2c2018-08-31 09:22:23 +0100126 if (use3x3Optimisation)
127 {
128 m_DepthwiseConvolutionLayer = std::make_unique<arm_compute::CLDepthwiseConvolutionLayer3x3>();
129 static_cast<arm_compute::CLDepthwiseConvolutionLayer3x3*>(m_DepthwiseConvolutionLayer.get())->configure(
130 &input,
131 m_KernelTensor.get(),
132 m_BiasTensor.get(),
133 &output,
134 padStrideInfo,
Pablo Tellof0bd6832019-04-26 17:58:13 +0100135 depthMultiplier,
136 arm_compute::ActivationLayerInfo(),
137 aclDilationInfo);
telsoa01c577f2c2018-08-31 09:22:23 +0100138 }
139 else
140 {
141 m_DepthwiseConvolutionLayer = std::make_unique<arm_compute::CLDepthwiseConvolutionLayer>();
142 static_cast<arm_compute::CLDepthwiseConvolutionLayer*>(m_DepthwiseConvolutionLayer.get())->configure(
143 &input,
144 m_KernelTensor.get(),
145 m_BiasTensor.get(),
146 &output,
147 padStrideInfo,
Pablo Tellof0bd6832019-04-26 17:58:13 +0100148 depthMultiplier,
149 arm_compute::ActivationLayerInfo(),
150 aclDilationInfo);
151
telsoa01c577f2c2018-08-31 09:22:23 +0100152 }
153
154 BOOST_ASSERT(m_DepthwiseConvolutionLayer);
Matthew Benthamd8777392018-10-08 09:38:55 +0100155
Matteo Martincigh747ef822018-12-18 09:26:39 +0000156 ScopedCpuTensorHandle weightsPermutedHandle(weightPermuted);
157 InitializeArmComputeClTensorData(*m_KernelTensor, &weightsPermutedHandle);
Matthew Benthamd8777392018-10-08 09:38:55 +0100158
159 if (m_BiasTensor)
160 {
161 InitializeArmComputeClTensorData(*m_BiasTensor, m_Data.m_Bias);
162 }
163
164 m_DepthwiseConvolutionLayer->prepare();
165 FreeUnusedTensors();
telsoa01c577f2c2018-08-31 09:22:23 +0100166}
167
Matthew Benthamd8777392018-10-08 09:38:55 +0100168void ClDepthwiseConvolutionWorkload::FreeUnusedTensors()
telsoa01c577f2c2018-08-31 09:22:23 +0100169{
170 FreeTensorIfUnused(m_KernelTensor);
171 FreeTensorIfUnused(m_BiasTensor);
172}
173
Matthew Benthamd8777392018-10-08 09:38:55 +0100174void ClDepthwiseConvolutionWorkload::Execute() const
175{
176 ARMNN_SCOPED_PROFILING_EVENT_CL("ClDepthwiseConvolutionWorkload_Execute");
177 BOOST_ASSERT(m_DepthwiseConvolutionLayer);
178
Aron Virginas-Tara8e06ed2018-10-19 16:46:15 +0100179 RunClFunction(*m_DepthwiseConvolutionLayer, CHECK_LOCATION());
Matthew Benthamd8777392018-10-08 09:38:55 +0100180}
telsoa01c577f2c2018-08-31 09:22:23 +0100181
182} // namespace armnn