blob: a3c3dd9e44a54a338a5683f0492e5bd894490b46 [file] [log] [blame]
Tianle Chengfbfa49e2024-01-23 11:21:48 +00001//
2// Copyright © 2024 Arm Ltd and Contributors. All rights reserved.
3// SPDX-License-Identifier: MIT
4//
5
6#include "GpuFsaDepthwiseConvolution2d.hpp"
Tianle Chengfbfa49e2024-01-23 11:21:48 +00007#include <backendsCommon/WorkloadUtils.hpp>
Tianle Chengfbfa49e2024-01-23 11:21:48 +00008
Colm Donelanf2f99ae2024-01-31 16:45:41 +00009#include <aclCommon/ArmComputeTensorUtils.hpp>
Tianle Chengfbfa49e2024-01-23 11:21:48 +000010
11#include <arm_compute/dynamic_fusion/runtime/gpu/cl/ClWorkloadRuntime.h>
12#include <arm_compute/dynamic_fusion/sketch/gpu/GpuWorkloadContext.h>
Tianle Chengfbfa49e2024-01-23 11:21:48 +000013#include <arm_compute/dynamic_fusion/sketch/gpu/operators/GpuDepthwiseConv2d.h>
14#include <arm_compute/dynamic_fusion/sketch/gpu/operators/GpuOutput.h>
15
16#include <vector>
17
18namespace armnn
19{
20
21using namespace armcomputetensorutils;
22
23arm_compute::Status GpuFsaDepthwiseConvolution2dValidate(const TensorInfo& input,
24 const DepthwiseConvolution2dDescriptor& descriptor,
25 const TensorInfo& weights,
26 const Optional<TensorInfo>& biases)
27{
28 // Create a new workload sketch, for validation purposes
29 auto compileCtx = arm_compute::CLKernelLibrary::get().get_compile_context();
30 auto workloadContext = GpuWorkloadContext(&compileCtx);
31 GpuWorkloadSketch sketch{ &workloadContext };
32
33 // Build and create tensor infos using the sketch
34 const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input, descriptor.m_DataLayout);
35
36 // ArmNN format for weights for depthwise is [1, H, W, C] independently of the input/output layout
37 //
38 // ACL format for weights for depthwise is:
39 // - [1, H, W, C] for [N, H, W, C] input/output layout (matches with ArmNN)
40 // - [1, C, H, W] for [N, C, H, W] input/output layout
41 //
42 // Therefore ArmNN weights have to be permuted when input/output layout is [N, C, H, W] to pass them to ACL.
43 // The PermuteDepthwiseConv2dWeights backend optimization takes care of this, but it has not been performed yet,
44 // so we do the permute here for the TensorInfo weights.
45 unsigned int aclDepthMultiplier;
46 TensorInfo weightsPermuted;
47 std::tie(weightsPermuted, aclDepthMultiplier) = Convert1HWOTensorInfoToAcl(weights, input,descriptor.m_DataLayout);
48 auto weightsShape = weightsPermuted.GetShape();
49 weightsPermuted.SetShape({weightsShape[1], weightsShape[2], weightsShape[3]});
50
51 arm_compute::TensorInfo aclWeightsInfo = BuildArmComputeTensorInfo(weightsPermuted, descriptor.m_DataLayout);
52 aclWeightsInfo.set_are_values_constant(weights.IsConstant());
53
54 auto inputInfo = workloadContext.create_tensor_info(aclInputInfo);
55 auto weightInfo = workloadContext.create_tensor_info(aclWeightsInfo);
56
57 // Only create the bias tensor info if enabled, otherwise pass nullptr to validate_op
58 arm_compute::TensorInfo aclBiasInfo;
59 arm_compute::ITensorInfo* biasSketchInfoPtr = nullptr;
60
61 if (descriptor.m_BiasEnabled)
62 {
63 if(!biases.has_value())
64 {
65 throw InvalidArgumentException(
66 "GpuFsaDepthwiseConvolution2dValidate: No biases set when biases are enabled");
67 }
68 aclBiasInfo = BuildArmComputeTensorInfo(biases.value(), descriptor.m_DataLayout);
69 aclBiasInfo.set_are_values_constant(biases.value().IsConstant());
70
71 biasSketchInfoPtr = workloadContext.create_tensor_info(aclBiasInfo);
72 }
73
74 // Set DepthwiseConv2d attributes using descriptor
75 const arm_compute::Size2D aclDilationInfo = BuildArmComputeSize2D(descriptor.m_DilationX,
76 descriptor.m_DilationY);
77 const arm_compute::Padding2D aclPadInfo = BuildArmComputePaddingInfo(descriptor);
78 const arm_compute::Size2D aclStrideInfo = BuildArmComputeSize2D(descriptor.m_StrideX, descriptor.m_StrideY);
79
80 DepthwiseConv2dAttributes depthwiseConv2dAttributes{};
81 depthwiseConv2dAttributes.pad(aclPadInfo);
82 depthwiseConv2dAttributes.stride(aclStrideInfo);
83 depthwiseConv2dAttributes.dilation(aclDilationInfo);
84 depthwiseConv2dAttributes.depth_multiplier(aclDepthMultiplier);
85
86 // Validate operator, check status and update reasonIfUnsupported
87 arm_compute::Status aclStatus = GpuDepthwiseConv2d::validate_op(sketch,
88 inputInfo,
89 weightInfo,
90 biasSketchInfoPtr,
91 depthwiseConv2dAttributes);
92
93 return aclStatus;
94}
95
96void GpuFsaDepthwiseConvolution2dCreateOp(GpuFsaPreCompiledBlob* blob,
97 const TensorInfo& input,
98 const DepthwiseConvolution2dDescriptor& descriptor,
99 const TensorInfo& weights,
100 const Optional<TensorInfo>& biases)
101{
102/*
103* Creating an Op for the GpuFsa backend requires us to create and maintain quite a bit of data, which is then stored
104* in a GpuFsaPreCompiledBlob for execution later. Specifically we need:
105* GpuWorkloadContext, this contains the TensorInfos and is unique to the Graph being executed
106* Sketch, this is similar to a subgraph and can contain one or more operations. Multiple ops can be "fused" together
107* using a single sketch.
108* The inputTensorinfos / outputTensorInfos, these are pointers to the TensorInfos used when creating the sketch.
109* They refer to the TensorInfos stored within the GpuWorkloadContext and are needed when executing the sketch
110* as the TensorInfos used when creating the Tensors must match those used to create the Sketch. Otherwise the runtime
111* doesn't know which Tensors to use.
112*/
113 using namespace arm_compute::experimental::dynamic_fusion;
114 GpuWorkloadSketch* sketch = blob->sketch.get();
115 GpuWorkloadContext* workloadContext = blob->workloadContext.get();
116 std::vector<arm_compute::ITensorInfo*> inputTensorInfos = {};
117 std::vector<arm_compute::ITensorInfo*> outputTensorInfos = {};
118
119 // Build and create tensor infos using the sketch
120 const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input, descriptor.m_DataLayout);
121
122 // ArmNN format for weights for depthwise is [1, H, W, C] independently of the input/output layout
123 //
124 // ACL format for weights for depthwise is:
125 // - [1, H, W, C] for [N, H, W, C] input/output layout (matches with ArmNN)
126 // - [1, C, H, W] for [N, C, H, W] input/output layout
127 //
128 // Therefore ArmNN weights have to be permuted when input/output layout is [N, C, H, W] to pass them to ACL.
129 // The PermuteDepthwiseConv2dWeights backend optimization takes care of this, but it has not been performed yet,
130 // so we do the permute here for the TensorInfo weights.
131 unsigned int aclDepthMultiplier;
132 TensorInfo weightsPermuted;
133 std::tie(weightsPermuted, aclDepthMultiplier) = Convert1HWOTensorInfoToAcl(weights, input,descriptor.m_DataLayout);
134 auto weightsShape = weightsPermuted.GetShape();
135 weightsPermuted.SetShape({weightsShape[1], weightsShape[2], weightsShape[3]});
136
137 arm_compute::TensorInfo aclWeightsInfo = BuildArmComputeTensorInfo(weightsPermuted, descriptor.m_DataLayout);
138 aclWeightsInfo.set_are_values_constant(weights.IsConstant());
139
140 inputTensorInfos.emplace_back(workloadContext->create_tensor_info(aclInputInfo));
141 inputTensorInfos.emplace_back(workloadContext->create_tensor_info(aclWeightsInfo));
142
143 // Only create the bias tensor info if enabled, otherwise pass nullptr to validate_op
144 arm_compute::TensorInfo aclBiasInfo;
145 arm_compute::ITensorInfo* biasSketchInfoPtr = nullptr;
146
147 if (descriptor.m_BiasEnabled)
148 {
149 if(!biases.has_value())
150 {
151 throw InvalidArgumentException("GpuFsaConvolution2dValidate: No biases set when biases are enabled");
152 }
153 aclBiasInfo = BuildArmComputeTensorInfo(biases.value(), descriptor.m_DataLayout);
154 aclBiasInfo.set_are_values_constant(biases.value().IsConstant());
155
156 inputTensorInfos.emplace_back(workloadContext->create_tensor_info(aclBiasInfo));
157 biasSketchInfoPtr = inputTensorInfos[2];
158 }
159
160 // Set DepthwiseConv2d attributes using descriptor
161 const arm_compute::Size2D aclDilationInfo = BuildArmComputeSize2D(descriptor.m_DilationX,
162 descriptor.m_DilationY);
163 const arm_compute::Padding2D aclPadInfo = BuildArmComputePaddingInfo(descriptor);
164 const arm_compute::Size2D aclStrideInfo = BuildArmComputeSize2D(descriptor.m_StrideX, descriptor.m_StrideY);
165
166 DepthwiseConv2dAttributes depthwiseConv2dAttributes{};
167 depthwiseConv2dAttributes.pad(aclPadInfo);
168 depthwiseConv2dAttributes.stride(aclStrideInfo);
169 depthwiseConv2dAttributes.dilation(aclDilationInfo);
170 depthwiseConv2dAttributes.depth_multiplier(aclDepthMultiplier);
171
172 // Validate operator, check status and update reasonIfUnsupported
173 arm_compute::Status aclStatus = GpuDepthwiseConv2d::validate_op(*sketch,
174 inputTensorInfos[0],
175 inputTensorInfos[1],
176 biasSketchInfoPtr,
177 depthwiseConv2dAttributes);
178
179 const bool supported = (aclStatus.error_code() == arm_compute::ErrorCode::OK);
180 if (!supported)
181 {
182 throw BackendCapabilityException(
183 "\"GpuFsa\" backend failed during DepthwiseConvolution2D operation validation");
184 }
185
186 // Create the Op within the Sketch using the TensorInfos we have stored
187 arm_compute::ITensorInfo* convOutInfo = GpuDepthwiseConv2d::create_op(*sketch,
188 inputTensorInfos[0],
189 inputTensorInfos[1],
190 biasSketchInfoPtr,
191 depthwiseConv2dAttributes);
192
193 outputTensorInfos.emplace_back(workloadContext->create_tensor_info());
194 GpuOutput::create_op(*sketch, convOutInfo, outputTensorInfos[0]);
195
196 // Store the TensorInfos within the blob as unique_ptrs to be used later
197 blob->inputTensorInfos = std::make_unique<std::vector<arm_compute::ITensorInfo*>>(inputTensorInfos);
198 blob->outputTensorInfos = std::make_unique<std::vector<arm_compute::ITensorInfo*>>(outputTensorInfos);
199}
200
201} // namespace armnn