blob: 21077afbfbdc7fecd6fbf0da6ee6fb8d311cc395 [file] [log] [blame]
Tianle Chengfbfa49e2024-01-23 11:21:48 +00001//
2// Copyright © 2024 Arm Ltd and Contributors. All rights reserved.
3// SPDX-License-Identifier: MIT
4//
5
6#include "GpuFsaDepthwiseConvolution2d.hpp"
Teresa Charlina52bca22024-02-01 17:36:48 +00007#include "UtilsGpuFsa.hpp"
8
Tianle Chengfbfa49e2024-01-23 11:21:48 +00009#include <backendsCommon/WorkloadUtils.hpp>
Tianle Chengfbfa49e2024-01-23 11:21:48 +000010
Colm Donelanf2f99ae2024-01-31 16:45:41 +000011#include <aclCommon/ArmComputeTensorUtils.hpp>
Tianle Chengfbfa49e2024-01-23 11:21:48 +000012
Tianle Chengfbfa49e2024-01-23 11:21:48 +000013#include <arm_compute/dynamic_fusion/sketch/gpu/GpuWorkloadContext.h>
Teresa Charlina52bca22024-02-01 17:36:48 +000014#include <arm_compute/dynamic_fusion/sketch/gpu/GpuWorkloadSketch.h>
Tianle Chengfbfa49e2024-01-23 11:21:48 +000015#include <arm_compute/dynamic_fusion/sketch/gpu/operators/GpuDepthwiseConv2d.h>
16#include <arm_compute/dynamic_fusion/sketch/gpu/operators/GpuOutput.h>
17
18#include <vector>
19
Teresa Charlina52bca22024-02-01 17:36:48 +000020using namespace arm_compute::experimental::dynamic_fusion;
21using namespace armnn::armcomputetensorutils;
22
Tianle Chengfbfa49e2024-01-23 11:21:48 +000023namespace armnn
24{
25
Tianle Chengfbfa49e2024-01-23 11:21:48 +000026arm_compute::Status GpuFsaDepthwiseConvolution2dValidate(const TensorInfo& input,
27 const DepthwiseConvolution2dDescriptor& descriptor,
28 const TensorInfo& weights,
29 const Optional<TensorInfo>& biases)
30{
31 // Create a new workload sketch, for validation purposes
32 auto compileCtx = arm_compute::CLKernelLibrary::get().get_compile_context();
33 auto workloadContext = GpuWorkloadContext(&compileCtx);
34 GpuWorkloadSketch sketch{ &workloadContext };
35
36 // Build and create tensor infos using the sketch
37 const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input, descriptor.m_DataLayout);
38
39 // ArmNN format for weights for depthwise is [1, H, W, C] independently of the input/output layout
40 //
41 // ACL format for weights for depthwise is:
42 // - [1, H, W, C] for [N, H, W, C] input/output layout (matches with ArmNN)
43 // - [1, C, H, W] for [N, C, H, W] input/output layout
44 //
45 // Therefore ArmNN weights have to be permuted when input/output layout is [N, C, H, W] to pass them to ACL.
46 // The PermuteDepthwiseConv2dWeights backend optimization takes care of this, but it has not been performed yet,
47 // so we do the permute here for the TensorInfo weights.
48 unsigned int aclDepthMultiplier;
49 TensorInfo weightsPermuted;
50 std::tie(weightsPermuted, aclDepthMultiplier) = Convert1HWOTensorInfoToAcl(weights, input,descriptor.m_DataLayout);
51 auto weightsShape = weightsPermuted.GetShape();
52 weightsPermuted.SetShape({weightsShape[1], weightsShape[2], weightsShape[3]});
53
54 arm_compute::TensorInfo aclWeightsInfo = BuildArmComputeTensorInfo(weightsPermuted, descriptor.m_DataLayout);
55 aclWeightsInfo.set_are_values_constant(weights.IsConstant());
56
57 auto inputInfo = workloadContext.create_tensor_info(aclInputInfo);
58 auto weightInfo = workloadContext.create_tensor_info(aclWeightsInfo);
59
60 // Only create the bias tensor info if enabled, otherwise pass nullptr to validate_op
61 arm_compute::TensorInfo aclBiasInfo;
62 arm_compute::ITensorInfo* biasSketchInfoPtr = nullptr;
63
64 if (descriptor.m_BiasEnabled)
65 {
66 if(!biases.has_value())
67 {
68 throw InvalidArgumentException(
69 "GpuFsaDepthwiseConvolution2dValidate: No biases set when biases are enabled");
70 }
71 aclBiasInfo = BuildArmComputeTensorInfo(biases.value(), descriptor.m_DataLayout);
72 aclBiasInfo.set_are_values_constant(biases.value().IsConstant());
73
74 biasSketchInfoPtr = workloadContext.create_tensor_info(aclBiasInfo);
75 }
76
Teresa Charlina52bca22024-02-01 17:36:48 +000077 DepthwiseConv2dAttributes depthwiseConv2dAttributes = CreateDWConv2dAttributes(descriptor, aclDepthMultiplier);
Tianle Chengfbfa49e2024-01-23 11:21:48 +000078
79 // Validate operator, check status and update reasonIfUnsupported
80 arm_compute::Status aclStatus = GpuDepthwiseConv2d::validate_op(sketch,
81 inputInfo,
82 weightInfo,
83 biasSketchInfoPtr,
84 depthwiseConv2dAttributes);
85
86 return aclStatus;
87}
88
89void GpuFsaDepthwiseConvolution2dCreateOp(GpuFsaPreCompiledBlob* blob,
90 const TensorInfo& input,
91 const DepthwiseConvolution2dDescriptor& descriptor,
92 const TensorInfo& weights,
93 const Optional<TensorInfo>& biases)
94{
95/*
96* Creating an Op for the GpuFsa backend requires us to create and maintain quite a bit of data, which is then stored
97* in a GpuFsaPreCompiledBlob for execution later. Specifically we need:
98* GpuWorkloadContext, this contains the TensorInfos and is unique to the Graph being executed
99* Sketch, this is similar to a subgraph and can contain one or more operations. Multiple ops can be "fused" together
100* using a single sketch.
101* The inputTensorinfos / outputTensorInfos, these are pointers to the TensorInfos used when creating the sketch.
102* They refer to the TensorInfos stored within the GpuWorkloadContext and are needed when executing the sketch
103* as the TensorInfos used when creating the Tensors must match those used to create the Sketch. Otherwise the runtime
104* doesn't know which Tensors to use.
105*/
Tianle Chengfbfa49e2024-01-23 11:21:48 +0000106 GpuWorkloadSketch* sketch = blob->sketch.get();
107 GpuWorkloadContext* workloadContext = blob->workloadContext.get();
108 std::vector<arm_compute::ITensorInfo*> inputTensorInfos = {};
109 std::vector<arm_compute::ITensorInfo*> outputTensorInfos = {};
110
111 // Build and create tensor infos using the sketch
112 const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input, descriptor.m_DataLayout);
113
114 // ArmNN format for weights for depthwise is [1, H, W, C] independently of the input/output layout
115 //
116 // ACL format for weights for depthwise is:
117 // - [1, H, W, C] for [N, H, W, C] input/output layout (matches with ArmNN)
118 // - [1, C, H, W] for [N, C, H, W] input/output layout
119 //
120 // Therefore ArmNN weights have to be permuted when input/output layout is [N, C, H, W] to pass them to ACL.
121 // The PermuteDepthwiseConv2dWeights backend optimization takes care of this, but it has not been performed yet,
122 // so we do the permute here for the TensorInfo weights.
123 unsigned int aclDepthMultiplier;
124 TensorInfo weightsPermuted;
125 std::tie(weightsPermuted, aclDepthMultiplier) = Convert1HWOTensorInfoToAcl(weights, input,descriptor.m_DataLayout);
126 auto weightsShape = weightsPermuted.GetShape();
127 weightsPermuted.SetShape({weightsShape[1], weightsShape[2], weightsShape[3]});
128
129 arm_compute::TensorInfo aclWeightsInfo = BuildArmComputeTensorInfo(weightsPermuted, descriptor.m_DataLayout);
130 aclWeightsInfo.set_are_values_constant(weights.IsConstant());
131
132 inputTensorInfos.emplace_back(workloadContext->create_tensor_info(aclInputInfo));
133 inputTensorInfos.emplace_back(workloadContext->create_tensor_info(aclWeightsInfo));
134
135 // Only create the bias tensor info if enabled, otherwise pass nullptr to validate_op
136 arm_compute::TensorInfo aclBiasInfo;
137 arm_compute::ITensorInfo* biasSketchInfoPtr = nullptr;
138
139 if (descriptor.m_BiasEnabled)
140 {
141 if(!biases.has_value())
142 {
143 throw InvalidArgumentException("GpuFsaConvolution2dValidate: No biases set when biases are enabled");
144 }
145 aclBiasInfo = BuildArmComputeTensorInfo(biases.value(), descriptor.m_DataLayout);
146 aclBiasInfo.set_are_values_constant(biases.value().IsConstant());
147
148 inputTensorInfos.emplace_back(workloadContext->create_tensor_info(aclBiasInfo));
149 biasSketchInfoPtr = inputTensorInfos[2];
150 }
151
Teresa Charlina52bca22024-02-01 17:36:48 +0000152 DepthwiseConv2dAttributes depthwiseConv2dAttributes = CreateDWConv2dAttributes(descriptor, aclDepthMultiplier);
Tianle Chengfbfa49e2024-01-23 11:21:48 +0000153
154 // Validate operator, check status and update reasonIfUnsupported
155 arm_compute::Status aclStatus = GpuDepthwiseConv2d::validate_op(*sketch,
156 inputTensorInfos[0],
157 inputTensorInfos[1],
158 biasSketchInfoPtr,
159 depthwiseConv2dAttributes);
160
161 const bool supported = (aclStatus.error_code() == arm_compute::ErrorCode::OK);
162 if (!supported)
163 {
164 throw BackendCapabilityException(
165 "\"GpuFsa\" backend failed during DepthwiseConvolution2D operation validation");
166 }
167
168 // Create the Op within the Sketch using the TensorInfos we have stored
169 arm_compute::ITensorInfo* convOutInfo = GpuDepthwiseConv2d::create_op(*sketch,
170 inputTensorInfos[0],
171 inputTensorInfos[1],
172 biasSketchInfoPtr,
173 depthwiseConv2dAttributes);
174
175 outputTensorInfos.emplace_back(workloadContext->create_tensor_info());
176 GpuOutput::create_op(*sketch, convOutInfo, outputTensorInfos[0]);
177
178 // Store the TensorInfos within the blob as unique_ptrs to be used later
179 blob->inputTensorInfos = std::make_unique<std::vector<arm_compute::ITensorInfo*>>(inputTensorInfos);
180 blob->outputTensorInfos = std::make_unique<std::vector<arm_compute::ITensorInfo*>>(outputTensorInfos);
181}
182
183} // namespace armnn