blob: 4e9dd7526fde7fcc96edbad428f9aa1d02990e12 [file] [log] [blame]
Teresa Charlin989e2f62022-04-27 16:26:11 +01001//
Colm Donelanb4ef1632024-02-01 15:00:43 +00002// Copyright © 2022-2024 Arm Ltd and Contributors. All rights reserved.
Teresa Charlin989e2f62022-04-27 16:26:11 +01003// SPDX-License-Identifier: MIT
4//
5
6#include "ClGatherNdWorkload.hpp"
7#include "ClWorkloadUtils.hpp"
8#include "backendsCommon/WorkloadUtils.hpp"
9#include <aclCommon/ArmComputeUtils.hpp>
10#include <cl/ClTensorHandle.hpp>
11
12using namespace armnn::armcomputetensorutils;
13
14namespace armnn
15{
16arm_compute::Status ClGatherNdWorkloadValidate(const TensorInfo& paramsInfo,
17 const TensorInfo& indicesInfo,
18 const TensorInfo& outputInfo)
19{
20 // Calculate ND, K, W, C.
21 std::map<std::string, unsigned int> keyIndices = CalculateGatherNdKeyIndices(paramsInfo, indicesInfo);
22
23 /// Validate Mul
24 // Indices with shape { W, ND }
25 armnn::TensorInfo indices_W_ND_Info = indicesInfo;
26 indices_W_ND_Info.SetShape({ keyIndices["W"], keyIndices["ND"] });
27 const arm_compute::TensorInfo aclIndicesInfo = BuildArmComputeTensorInfo(indices_W_ND_Info);
28
29 // Flattened coefficients with shape { ND }
30 armnn::TensorInfo flattenedCoeff_Info = indicesInfo;
31 flattenedCoeff_Info.SetShape({ keyIndices["ND"] });
32 const arm_compute::TensorInfo aclFlattenedCoeffInfo = BuildArmComputeTensorInfo(flattenedCoeff_Info);
33
34 // Output of Mul with shape { W, ND }
35 const arm_compute::TensorInfo aclOutputMulInfo = BuildArmComputeTensorInfo(indices_W_ND_Info);
36
37 auto statusMul = arm_compute::CLPixelWiseMultiplication::validate(&aclIndicesInfo,
38 &aclFlattenedCoeffInfo,
39 &aclOutputMulInfo,
40 1.0f,
41 arm_compute::ConvertPolicy::WRAP,
42 arm_compute::RoundingPolicy::TO_ZERO,
43 arm_compute::ActivationLayerInfo());
44
45 /// Validate ReduceSum
46 // Flattened indices with shape { W }
47 armnn::TensorInfo flattenedIndices_Info = indicesInfo;
48 flattenedIndices_Info.SetShape({ keyIndices["W"] });
49 const arm_compute::TensorInfo aclFlattenedIndicesInfo = BuildArmComputeTensorInfo(flattenedIndices_Info);
50
51 const std::vector<unsigned int> armnnReduceAxes(1, 1);
52 arm_compute::Coordinates coords = BuildArmComputeReductionCoordinates(aclOutputMulInfo.num_dimensions(),
53 indices_W_ND_Info.GetNumDimensions(),
54 armnnReduceAxes);
55
56 auto statusReduceSum = arm_compute::CLReductionOperation::validate(&aclOutputMulInfo,
57 &aclFlattenedIndicesInfo,
58 static_cast<unsigned int>(coords[0]),
59 arm_compute::ReductionOperation::SUM,
60 false);
61
62 /// Validate Gather
63 // Params with shape { K, C }
64 armnn::TensorInfo params_K_C_Info = paramsInfo;
65 params_K_C_Info.SetShape({ keyIndices["K"], keyIndices["C"] });
66 const arm_compute::TensorInfo aclParamsInfo = BuildArmComputeTensorInfo(params_K_C_Info);
67
68 // Output of gather with shape { W, C }
69 armnn::TensorInfo outputGather_Info = outputInfo;
70 outputGather_Info.SetShape({ keyIndices["W"], keyIndices["C"] });
71 const arm_compute::TensorInfo aclOutputGatherInfo = BuildArmComputeTensorInfo(outputGather_Info);
72
73 auto aclAxis = ComputeAclAxis(0, params_K_C_Info);
74 auto statusGather =
75 arm_compute::CLGather::validate(&aclParamsInfo, &aclFlattenedIndicesInfo, &aclOutputGatherInfo, aclAxis);
76
77 /// Validate Reshape
78 const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(outputInfo);
79
80 auto statusReshape = arm_compute::CLReshapeLayer::validate(&aclOutputGatherInfo, &aclOutputInfo);
81
82 /// Return OK if all the layers are valid
83 auto okCode = arm_compute::ErrorCode::OK;
84 if (statusMul.error_code() == okCode &&
85 statusReduceSum.error_code() == okCode &&
86 statusGather.error_code() == okCode &&
87 statusReshape.error_code() == okCode)
88 {
89 return arm_compute::Status(arm_compute::ErrorCode::OK,
90 "All GatherND layers validate status OK.");
91 }
92 else
93 {
94 return arm_compute::Status(arm_compute::ErrorCode::RUNTIME_ERROR,
95 "GatherND layer validate status failed.");
96 }
97}
98
99ClGatherNdWorkload::ClGatherNdWorkload(const GatherNdQueueDescriptor& descriptor,
100 const WorkloadInfo& info,
101 const arm_compute::CLCompileContext& clCompileContext)
102 : ClBaseWorkload<GatherNdQueueDescriptor>(descriptor, info)
103{
104 m_Data.ValidateInputsOutputs("ClGatherNdWorkload", 2, 1);
105
106 TensorInfo paramsInfo = info.m_InputTensorInfos[0];
107 TensorInfo indicesInfo = info.m_InputTensorInfos[1];
108 TensorInfo outputInfo = info.m_OutputTensorInfos[0];
109
110 arm_compute::ICLTensor& input = static_cast<IClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
111 arm_compute::ICLTensor& indices = static_cast<IClTensorHandle*>(m_Data.m_Inputs[1])->GetTensor();
112 arm_compute::ICLTensor& output = static_cast<IClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
113
114 // Calculate ND, K, W, C.
115 std::map<std::string, unsigned int> keyIndices = CalculateGatherNdKeyIndices(paramsInfo, indicesInfo);
116
117 /// Calculate flattened indices: m_FlattenedIndices = indices * m_FlattenedCoeff.
118 /// This could be done using MatMul instead of multiplication followed by reduce sum operation,
119 /// but GeMM does not support s32 at the moment.
120
121 // Prepare the tensor to store the output of the reduce_sum operation
122 armnn::TensorInfo flattenedIndices_Info = indicesInfo;
123 flattenedIndices_Info.SetShape({ keyIndices["W"] });
124 BuildArmComputeTensor(m_FlattenedIndices, flattenedIndices_Info);
125 armcomputetensorutils::InitialiseArmComputeTensorEmpty(m_FlattenedIndices);
126
127 // Reshape indices into { W, ND }
128 indices.info()->set_tensor_shape(BuildArmComputeTensorShape({ keyIndices["W"], keyIndices["ND"] }));
129
130 // Calculate the m_FlattenedCoeff
131 TensorShape paramsShape = paramsInfo.GetShape();
132 std::vector<int32_t> flattenedCoeff(keyIndices["ND"], 1);
133 for (unsigned int i = 1; i < keyIndices["ND"]; ++i)
134 {
135 flattenedCoeff[i - 1] = static_cast<int32_t>(paramsShape[i]);
136 }
137 for (unsigned int i = keyIndices["ND"] - 1; i > 0; --i)
138 {
139 flattenedCoeff[i - 1] *= flattenedCoeff[i];
140 }
141 armnn::TensorInfo flattenedCoeff_Info = indicesInfo;
142 flattenedCoeff_Info.SetShape({ keyIndices["ND"] });
143 BuildArmComputeTensor(m_FlattenedCoeff, flattenedCoeff_Info);
144 armcomputetensorutils::InitialiseArmComputeTensorEmpty(m_FlattenedCoeff);
Teresa Charlin989e2f62022-04-27 16:26:11 +0100145 CopyArmComputeClTensorData<int32_t>(m_FlattenedCoeff, flattenedCoeff.data());
146
147 // Prepare the tensor to store the output of the multiplication
148 armnn::TensorInfo outputMul_Info = indicesInfo;
149 outputMul_Info.SetShape({ keyIndices["W"], keyIndices["ND"] });
150 BuildArmComputeTensor(m_OutputMul, outputMul_Info);
151 armcomputetensorutils::InitialiseArmComputeTensorEmpty(m_OutputMul);
152
153 // Multiply
154 m_MulLayer.configure(clCompileContext,
155 &indices,
156 &m_FlattenedCoeff,
157 &m_OutputMul,
158 1.0f,
159 arm_compute::ConvertPolicy::WRAP,
160 arm_compute::RoundingPolicy::TO_ZERO,
161 arm_compute::ActivationLayerInfo());
162
163 // Reduce Sum
164 const std::vector<unsigned int> armnnReduceAxes(1, 1);
165 arm_compute::Coordinates coords = BuildArmComputeReductionCoordinates(m_OutputMul.info()->num_dimensions(),
166 outputMul_Info.GetNumDimensions(),
167 armnnReduceAxes);
168 m_ReduceSumLayer.configure(clCompileContext,
169 &m_OutputMul,
170 &m_FlattenedIndices,
171 static_cast<unsigned int>(coords[0]),
172 arm_compute::ReductionOperation::SUM,
173 false);
174
175 /// Call Gather with adequate shapes
176 // Reshape params into { K, C }
177 paramsInfo.SetShape({ keyIndices["K"], keyIndices["C"] });
178 input.info()->set_tensor_shape(BuildArmComputeTensorShape(paramsInfo.GetShape()));
179
180 // Reshape output to have the shape given by gather { W, C }
181 // (the original outputInfo has the shape given by gatherNd)
182 armnn::TensorInfo outputGather_Info = outputInfo;
183 outputGather_Info.SetShape({ keyIndices["W"], keyIndices["C"] });
184 BuildArmComputeTensor(m_OutputGather, outputGather_Info);
185 armcomputetensorutils::InitialiseArmComputeTensorEmpty(m_OutputGather);
186 {
Mike Kelly7cbe7812023-07-25 17:37:33 +0100187 ARMNN_SCOPED_PROFILING_EVENT_CL_NAME_GUID("ClGatherNdWorkload_configure");
Teresa Charlin989e2f62022-04-27 16:26:11 +0100188 auto aclAxis = ComputeAclAxis(0, paramsInfo);
189 m_GatherLayer.configure(clCompileContext, &input, &m_FlattenedIndices, &m_OutputGather, aclAxis);
190 }
191
192 // Reshape output to the original output shape
193 m_ReshapeLayer.configure(clCompileContext, &m_OutputGather, &output);
194};
195
196void ClGatherNdWorkload::Execute() const
197{
Mike Kelly7cbe7812023-07-25 17:37:33 +0100198 ARMNN_SCOPED_PROFILING_EVENT_CL_NAME_GUID("ClGatherNdWorkload_Execute");
Teresa Charlin989e2f62022-04-27 16:26:11 +0100199 RunClFunction(m_MulLayer, CHECK_LOCATION());
200 RunClFunction(m_ReduceSumLayer, CHECK_LOCATION());
201 RunClFunction(m_GatherLayer, CHECK_LOCATION());
202 RunClFunction(m_ReshapeLayer, CHECK_LOCATION());
203}
204} // namespace armnn