blob: 1351f9685f696b6ab777e3a6248f6bede60ade80 [file] [log] [blame]
Teresa Charlin989e2f62022-04-27 16:26:11 +01001//
Mike Kelly7cbe7812023-07-25 17:37:33 +01002// Copyright © 2022-2023 Arm Ltd and Contributors. All rights reserved.
Teresa Charlin989e2f62022-04-27 16:26:11 +01003// SPDX-License-Identifier: MIT
4//
5
6#include "ClGatherNdWorkload.hpp"
7#include "ClWorkloadUtils.hpp"
8#include "backendsCommon/WorkloadUtils.hpp"
9#include <aclCommon/ArmComputeUtils.hpp>
10#include <cl/ClTensorHandle.hpp>
11
12using namespace armnn::armcomputetensorutils;
13
14namespace armnn
15{
16arm_compute::Status ClGatherNdWorkloadValidate(const TensorInfo& paramsInfo,
17 const TensorInfo& indicesInfo,
18 const TensorInfo& outputInfo)
19{
20 // Calculate ND, K, W, C.
21 std::map<std::string, unsigned int> keyIndices = CalculateGatherNdKeyIndices(paramsInfo, indicesInfo);
22
23 /// Validate Mul
24 // Indices with shape { W, ND }
25 armnn::TensorInfo indices_W_ND_Info = indicesInfo;
26 indices_W_ND_Info.SetShape({ keyIndices["W"], keyIndices["ND"] });
27 const arm_compute::TensorInfo aclIndicesInfo = BuildArmComputeTensorInfo(indices_W_ND_Info);
28
29 // Flattened coefficients with shape { ND }
30 armnn::TensorInfo flattenedCoeff_Info = indicesInfo;
31 flattenedCoeff_Info.SetShape({ keyIndices["ND"] });
32 const arm_compute::TensorInfo aclFlattenedCoeffInfo = BuildArmComputeTensorInfo(flattenedCoeff_Info);
33
34 // Output of Mul with shape { W, ND }
35 const arm_compute::TensorInfo aclOutputMulInfo = BuildArmComputeTensorInfo(indices_W_ND_Info);
36
37 auto statusMul = arm_compute::CLPixelWiseMultiplication::validate(&aclIndicesInfo,
38 &aclFlattenedCoeffInfo,
39 &aclOutputMulInfo,
40 1.0f,
41 arm_compute::ConvertPolicy::WRAP,
42 arm_compute::RoundingPolicy::TO_ZERO,
43 arm_compute::ActivationLayerInfo());
44
45 /// Validate ReduceSum
46 // Flattened indices with shape { W }
47 armnn::TensorInfo flattenedIndices_Info = indicesInfo;
48 flattenedIndices_Info.SetShape({ keyIndices["W"] });
49 const arm_compute::TensorInfo aclFlattenedIndicesInfo = BuildArmComputeTensorInfo(flattenedIndices_Info);
50
51 const std::vector<unsigned int> armnnReduceAxes(1, 1);
52 arm_compute::Coordinates coords = BuildArmComputeReductionCoordinates(aclOutputMulInfo.num_dimensions(),
53 indices_W_ND_Info.GetNumDimensions(),
54 armnnReduceAxes);
55
56 auto statusReduceSum = arm_compute::CLReductionOperation::validate(&aclOutputMulInfo,
57 &aclFlattenedIndicesInfo,
58 static_cast<unsigned int>(coords[0]),
59 arm_compute::ReductionOperation::SUM,
60 false);
61
62 /// Validate Gather
63 // Params with shape { K, C }
64 armnn::TensorInfo params_K_C_Info = paramsInfo;
65 params_K_C_Info.SetShape({ keyIndices["K"], keyIndices["C"] });
66 const arm_compute::TensorInfo aclParamsInfo = BuildArmComputeTensorInfo(params_K_C_Info);
67
68 // Output of gather with shape { W, C }
69 armnn::TensorInfo outputGather_Info = outputInfo;
70 outputGather_Info.SetShape({ keyIndices["W"], keyIndices["C"] });
71 const arm_compute::TensorInfo aclOutputGatherInfo = BuildArmComputeTensorInfo(outputGather_Info);
72
73 auto aclAxis = ComputeAclAxis(0, params_K_C_Info);
74 auto statusGather =
75 arm_compute::CLGather::validate(&aclParamsInfo, &aclFlattenedIndicesInfo, &aclOutputGatherInfo, aclAxis);
76
77 /// Validate Reshape
78 const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(outputInfo);
79
80 auto statusReshape = arm_compute::CLReshapeLayer::validate(&aclOutputGatherInfo, &aclOutputInfo);
81
82 /// Return OK if all the layers are valid
83 auto okCode = arm_compute::ErrorCode::OK;
84 if (statusMul.error_code() == okCode &&
85 statusReduceSum.error_code() == okCode &&
86 statusGather.error_code() == okCode &&
87 statusReshape.error_code() == okCode)
88 {
89 return arm_compute::Status(arm_compute::ErrorCode::OK,
90 "All GatherND layers validate status OK.");
91 }
92 else
93 {
94 return arm_compute::Status(arm_compute::ErrorCode::RUNTIME_ERROR,
95 "GatherND layer validate status failed.");
96 }
97}
98
99ClGatherNdWorkload::ClGatherNdWorkload(const GatherNdQueueDescriptor& descriptor,
100 const WorkloadInfo& info,
101 const arm_compute::CLCompileContext& clCompileContext)
102 : ClBaseWorkload<GatherNdQueueDescriptor>(descriptor, info)
103{
104 m_Data.ValidateInputsOutputs("ClGatherNdWorkload", 2, 1);
105
106 TensorInfo paramsInfo = info.m_InputTensorInfos[0];
107 TensorInfo indicesInfo = info.m_InputTensorInfos[1];
108 TensorInfo outputInfo = info.m_OutputTensorInfos[0];
109
110 arm_compute::ICLTensor& input = static_cast<IClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
111 arm_compute::ICLTensor& indices = static_cast<IClTensorHandle*>(m_Data.m_Inputs[1])->GetTensor();
112 arm_compute::ICLTensor& output = static_cast<IClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
113
114 // Calculate ND, K, W, C.
115 std::map<std::string, unsigned int> keyIndices = CalculateGatherNdKeyIndices(paramsInfo, indicesInfo);
116
117 /// Calculate flattened indices: m_FlattenedIndices = indices * m_FlattenedCoeff.
118 /// This could be done using MatMul instead of multiplication followed by reduce sum operation,
119 /// but GeMM does not support s32 at the moment.
120
121 // Prepare the tensor to store the output of the reduce_sum operation
122 armnn::TensorInfo flattenedIndices_Info = indicesInfo;
123 flattenedIndices_Info.SetShape({ keyIndices["W"] });
124 BuildArmComputeTensor(m_FlattenedIndices, flattenedIndices_Info);
125 armcomputetensorutils::InitialiseArmComputeTensorEmpty(m_FlattenedIndices);
126
127 // Reshape indices into { W, ND }
128 indices.info()->set_tensor_shape(BuildArmComputeTensorShape({ keyIndices["W"], keyIndices["ND"] }));
129
130 // Calculate the m_FlattenedCoeff
131 TensorShape paramsShape = paramsInfo.GetShape();
132 std::vector<int32_t> flattenedCoeff(keyIndices["ND"], 1);
133 for (unsigned int i = 1; i < keyIndices["ND"]; ++i)
134 {
135 flattenedCoeff[i - 1] = static_cast<int32_t>(paramsShape[i]);
136 }
137 for (unsigned int i = keyIndices["ND"] - 1; i > 0; --i)
138 {
139 flattenedCoeff[i - 1] *= flattenedCoeff[i];
140 }
141 armnn::TensorInfo flattenedCoeff_Info = indicesInfo;
142 flattenedCoeff_Info.SetShape({ keyIndices["ND"] });
143 BuildArmComputeTensor(m_FlattenedCoeff, flattenedCoeff_Info);
144 armcomputetensorutils::InitialiseArmComputeTensorEmpty(m_FlattenedCoeff);
145 ARMNN_ASSERT_MSG(indicesInfo.GetDataType() == DataType::Signed32,
146 "flattenedCoeff must be same data type as m_FlattenedCoeff");
147 CopyArmComputeClTensorData<int32_t>(m_FlattenedCoeff, flattenedCoeff.data());
148
149 // Prepare the tensor to store the output of the multiplication
150 armnn::TensorInfo outputMul_Info = indicesInfo;
151 outputMul_Info.SetShape({ keyIndices["W"], keyIndices["ND"] });
152 BuildArmComputeTensor(m_OutputMul, outputMul_Info);
153 armcomputetensorutils::InitialiseArmComputeTensorEmpty(m_OutputMul);
154
155 // Multiply
156 m_MulLayer.configure(clCompileContext,
157 &indices,
158 &m_FlattenedCoeff,
159 &m_OutputMul,
160 1.0f,
161 arm_compute::ConvertPolicy::WRAP,
162 arm_compute::RoundingPolicy::TO_ZERO,
163 arm_compute::ActivationLayerInfo());
164
165 // Reduce Sum
166 const std::vector<unsigned int> armnnReduceAxes(1, 1);
167 arm_compute::Coordinates coords = BuildArmComputeReductionCoordinates(m_OutputMul.info()->num_dimensions(),
168 outputMul_Info.GetNumDimensions(),
169 armnnReduceAxes);
170 m_ReduceSumLayer.configure(clCompileContext,
171 &m_OutputMul,
172 &m_FlattenedIndices,
173 static_cast<unsigned int>(coords[0]),
174 arm_compute::ReductionOperation::SUM,
175 false);
176
177 /// Call Gather with adequate shapes
178 // Reshape params into { K, C }
179 paramsInfo.SetShape({ keyIndices["K"], keyIndices["C"] });
180 input.info()->set_tensor_shape(BuildArmComputeTensorShape(paramsInfo.GetShape()));
181
182 // Reshape output to have the shape given by gather { W, C }
183 // (the original outputInfo has the shape given by gatherNd)
184 armnn::TensorInfo outputGather_Info = outputInfo;
185 outputGather_Info.SetShape({ keyIndices["W"], keyIndices["C"] });
186 BuildArmComputeTensor(m_OutputGather, outputGather_Info);
187 armcomputetensorutils::InitialiseArmComputeTensorEmpty(m_OutputGather);
188 {
Mike Kelly7cbe7812023-07-25 17:37:33 +0100189 ARMNN_SCOPED_PROFILING_EVENT_CL_NAME_GUID("ClGatherNdWorkload_configure");
Teresa Charlin989e2f62022-04-27 16:26:11 +0100190 auto aclAxis = ComputeAclAxis(0, paramsInfo);
191 m_GatherLayer.configure(clCompileContext, &input, &m_FlattenedIndices, &m_OutputGather, aclAxis);
192 }
193
194 // Reshape output to the original output shape
195 m_ReshapeLayer.configure(clCompileContext, &m_OutputGather, &output);
196};
197
198void ClGatherNdWorkload::Execute() const
199{
Mike Kelly7cbe7812023-07-25 17:37:33 +0100200 ARMNN_SCOPED_PROFILING_EVENT_CL_NAME_GUID("ClGatherNdWorkload_Execute");
Teresa Charlin989e2f62022-04-27 16:26:11 +0100201 RunClFunction(m_MulLayer, CHECK_LOCATION());
202 RunClFunction(m_ReduceSumLayer, CHECK_LOCATION());
203 RunClFunction(m_GatherLayer, CHECK_LOCATION());
204 RunClFunction(m_ReshapeLayer, CHECK_LOCATION());
205}
206} // namespace armnn