blob: 796345a9237fb9e72d5ac797669b5acdc5131a83 [file] [log] [blame]
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001/*
Matthew Bentham314d3e22023-06-23 10:53:52 +00002 * Copyright (c) 2017-2021, 2023 Arm Limited.
Anthony Barbier6ff3b192017-09-04 18:44:23 +01003 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
Georgios Pinitas7891a732021-08-20 21:39:25 +010024#include "src/gpu/cl/kernels/ClSoftmaxKernel.h"
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +010025
Viet-Hoa Do29254ae2023-10-13 17:40:32 +010026#include "arm_compute/core/CL/CLCompileContext.h"
27#include "arm_compute/core/CL/CLHelpers.h"
Sang-Hoon Park201e0fe2021-01-27 13:14:56 +000028#include "arm_compute/core/CL/ICLTensor.h"
Viet-Hoa Do29254ae2023-10-13 17:40:32 +010029#include "arm_compute/core/CL/OpenCL.h"
30#include "arm_compute/core/CoreTypes.h"
31#include "arm_compute/core/Dimensions.h"
32#include "arm_compute/core/Error.h"
33#include "arm_compute/core/Helpers.h"
34#include "arm_compute/core/ITensorInfo.h"
35#include "arm_compute/core/ITensorPack.h"
36#include "arm_compute/core/KernelDescriptors.h"
37#include "arm_compute/core/Steps.h"
38#include "arm_compute/core/TensorShape.h"
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +010039#include "arm_compute/core/Utils.h"
Viet-Hoa Do29254ae2023-10-13 17:40:32 +010040#include "arm_compute/core/utils/DataTypeUtils.h"
Matthew Bentham314d3e22023-06-23 10:53:52 +000041#include "arm_compute/core/utils/helpers/AdjustVecSize.h"
Matthew Bentham314d3e22023-06-23 10:53:52 +000042#include "arm_compute/core/utils/StringUtils.h"
Viet-Hoa Do29254ae2023-10-13 17:40:32 +010043#include "arm_compute/core/Validate.h"
44#include "arm_compute/core/Window.h"
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +010045
Sang-Hoon Park68dd25f2020-10-19 16:00:11 +010046#include "src/core/helpers/WindowHelpers.h"
Sang-Hoon Park201e0fe2021-01-27 13:14:56 +000047#include "support/Cast.h"
Matthew Bentham758b5ba2020-03-05 23:37:48 +000048#include "support/StringSupport.h"
Anthony Barbier6ff3b192017-09-04 18:44:23 +010049
Viet-Hoa Do29254ae2023-10-13 17:40:32 +010050#include <string>
51
Giorgio Arena2d1a8352020-10-26 15:04:08 +000052namespace arm_compute
53{
Sang-Hoon Park201e0fe2021-01-27 13:14:56 +000054namespace opencl
55{
56namespace kernels
57{
Viet-Hoa Do29254ae2023-10-13 17:40:32 +010058
59ClSoftmaxKernel::ClSoftmaxKernel()
Chunosovf450caa2017-11-08 16:09:35 +070060{
Chunosovf450caa2017-11-08 16:09:35 +070061}
Anthony Barbier6ff3b192017-09-04 18:44:23 +010062
Viet-Hoa Do29254ae2023-10-13 17:40:32 +010063Status ClSoftmaxKernel::validate(const ITensorInfo &src, const ITensorInfo &dst, const SoftmaxKernelInfo &info)
Georgios Pinitas30902ed2017-11-14 15:32:57 +000064{
Viet-Hoa Do29254ae2023-10-13 17:40:32 +010065 ARM_COMPUTE_UNUSED(src, dst, info);
Georgios Pinitas30902ed2017-11-14 15:32:57 +000066
Viet-Hoa Do29254ae2023-10-13 17:40:32 +010067 ARM_COMPUTE_RETURN_ERROR_ON(src.num_dimensions() > 4);
Georgios Pinitas30902ed2017-11-14 15:32:57 +000068
Viet-Hoa Do29254ae2023-10-13 17:40:32 +010069 ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(&src, &dst);
70
71 ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_NOT_IN( //
72 &src, DataType::F32, DataType::F16, DataType::QASYMM8, DataType::QASYMM8_SIGNED);
73 ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(&src, &dst);
74
75 ARM_COMPUTE_RETURN_ERROR_ON(info.input_data_type != src.data_type());
76 ARM_COMPUTE_RETURN_ERROR_ON(info.axis < static_cast<int32_t>(-src.num_dimensions()) ||
77 static_cast<int32_t>(src.num_dimensions()) <= info.axis);
78
79 if (is_data_type_quantized_asymmetric(src.data_type()))
Georgios Pinitas30902ed2017-11-14 15:32:57 +000080 {
Viet-Hoa Do29254ae2023-10-13 17:40:32 +010081 ARM_COMPUTE_RETURN_ERROR_ON(src.quantization_info().uniform().scale < 0);
Georgios Pinitas30902ed2017-11-14 15:32:57 +000082
Viet-Hoa Do29254ae2023-10-13 17:40:32 +010083 ARM_COMPUTE_RETURN_ERROR_ON(dst.quantization_info() !=
84 get_softmax_output_quantization_info(src.data_type(), info.is_log));
Georgios Pinitas30902ed2017-11-14 15:32:57 +000085 }
86
Georgios Pinitas631c41a2017-12-06 11:53:03 +000087 return Status{};
Georgios Pinitas30902ed2017-11-14 15:32:57 +000088}
89
Viet-Hoa Do29254ae2023-10-13 17:40:32 +010090void ClSoftmaxKernel::configure(const CLCompileContext &compile_context,
91 const ITensorInfo &src,
92 ITensorInfo &dst,
93 const SoftmaxKernelInfo &info)
Giorgio Arenab8ab9972017-11-29 15:09:39 +000094{
Viet-Hoa Do29254ae2023-10-13 17:40:32 +010095 ARM_COMPUTE_UNUSED(compile_context, src, dst, info);
Giorgio Arenab8ab9972017-11-29 15:09:39 +000096
Viet-Hoa Do29254ae2023-10-13 17:40:32 +010097 const auto &dst_shape = dst.tensor_shape();
Giorgio Arenab8ab9972017-11-29 15:09:39 +000098
Viet-Hoa Do29254ae2023-10-13 17:40:32 +010099 const auto data_type = src.data_type();
100 const auto element_size = src.element_size();
101
102 const auto is_quantized = data_type == DataType::QASYMM8 || data_type == DataType::QASYMM8_SIGNED;
103 const auto src_qinfo = src.quantization_info().uniform();
104 const auto dst_qinfo = dst.quantization_info().uniform();
105
106 const auto axis = wrap_around(info.axis, static_cast<int32_t>(src.num_dimensions()));
107 const auto length = dst_shape[axis];
108
109 const auto tmp_data_type = is_quantized ? DataType::F32 : data_type;
110
111 const auto vec_size = adjust_vec_size(16 / element_size, dst_shape[0]);
112 const auto vec_size_leftover = dst_shape[0] % vec_size;
113
114 std::string kernel_name("softmax");
115 CLBuildOptions build_opts;
116
117 build_opts.add_option("-DDATA_TYPE=" + get_cl_type_from_data_type(data_type));
118 build_opts.add_option("-DTMP_DATA_TYPE=" + get_cl_type_from_data_type(tmp_data_type));
119 build_opts.add_option("-DVEC_SIZE=" + support::cpp11::to_string(vec_size));
120 build_opts.add_option("-DVEC_SIZE_LEFTOVER=" + support::cpp11::to_string(vec_size_leftover));
121 build_opts.add_option("-DLENGTH=" + support::cpp11::to_string(length));
122 build_opts.add_option_if(info.is_log, "-DIS_LOG");
123 build_opts.add_option("-DBETA=" + float_to_string_with_full_precision(info.beta));
124
125 build_opts.add_option_if(is_quantized, "-DIS_QUANTIZED");
126 build_opts.add_option_if(is_quantized, "-DSRC_OFFSET=" + float_to_string_with_full_precision(src_qinfo.offset));
127 build_opts.add_option_if(is_quantized, "-DSRC_SCALE=" + float_to_string_with_full_precision(src_qinfo.scale));
128 build_opts.add_option_if(is_quantized, "-DDST_OFFSET=" + float_to_string_with_full_precision(dst_qinfo.offset));
129 build_opts.add_option_if(is_quantized, "-DDST_SCALE=" + float_to_string_with_full_precision(dst_qinfo.scale));
130
131 if (axis == 0)
Giorgio Arenab8ab9972017-11-29 15:09:39 +0000132 {
Viet-Hoa Do29254ae2023-10-13 17:40:32 +0100133 kernel_name += "_x";
134 build_opts.add_option("-DSOFTMAX_X");
135
136 if (is_quantized)
Giorgio Arenab8ab9972017-11-29 15:09:39 +0000137 {
Viet-Hoa Do29254ae2023-10-13 17:40:32 +0100138 _tmp_info = TensorInfo(dst_shape, 1, tmp_data_type);
Giorgio Arenab8ab9972017-11-29 15:09:39 +0000139 }
140 }
Viet-Hoa Do29254ae2023-10-13 17:40:32 +0100141 else
142 {
143 kernel_name += "_non_x";
144 build_opts.add_option("-DSOFTMAX_NON_X");
Giorgio Arenab8ab9972017-11-29 15:09:39 +0000145
Viet-Hoa Do29254ae2023-10-13 17:40:32 +0100146 TensorShape tmp_shape;
Giorgio Arenab8ab9972017-11-29 15:09:39 +0000147
Viet-Hoa Do29254ae2023-10-13 17:40:32 +0100148 tmp_shape.set(0, length * vec_size, false);
149 tmp_shape.set(1, dst_shape[0] + (vec_size - vec_size_leftover) % vec_size, false);
Chunosovd6afedc2017-11-06 22:09:45 +0700150
Viet-Hoa Do29254ae2023-10-13 17:40:32 +0100151 for (size_t i = 2; i <= static_cast<size_t>(axis); ++i)
152 {
153 tmp_shape.set(i, dst_shape[i - 1], false);
154 }
Giorgio Arena4a95bba2021-06-28 11:00:27 +0100155
Viet-Hoa Do29254ae2023-10-13 17:40:32 +0100156 for (size_t i = axis + 1; i < dst_shape.num_dimensions(); ++i)
157 {
158 tmp_shape.set(i, dst_shape[i], false);
159 }
Giorgio Arena2d1a8352020-10-26 15:04:08 +0000160
Viet-Hoa Do29254ae2023-10-13 17:40:32 +0100161 _tmp_info = TensorInfo(tmp_shape, 1, tmp_data_type);
162 }
Chunosovd6afedc2017-11-06 22:09:45 +0700163
Manuel Bottini4c6bd512020-04-08 10:15:51 +0100164 _kernel = create_kernel(compile_context, kernel_name, build_opts.options());
Chunosovd6afedc2017-11-06 22:09:45 +0700165
Viet-Hoa Do29254ae2023-10-13 17:40:32 +0100166 // Configure kernel window and kernel arguments.
167 Window win = calculate_max_window(src, Steps(vec_size));
Giorgio Arena2d1a8352020-10-26 15:04:08 +0000168
Viet-Hoa Do29254ae2023-10-13 17:40:32 +0100169 bool has_collapsed = true;
Chunosovd6afedc2017-11-06 22:09:45 +0700170
Viet-Hoa Do29254ae2023-10-13 17:40:32 +0100171 win = win.shift_dimensions(1, axis); // Remove this axis from the window/GWS.
172 win = win.collapse_if_possible(win, 2, has_collapsed);
173 ARM_COMPUTE_ERROR_ON(!has_collapsed);
Georgios Pinitas30902ed2017-11-14 15:32:57 +0000174
Giorgio Arena2d1a8352020-10-26 15:04:08 +0000175 ICLKernel::configure_internal(win);
176
Viet-Hoa Do29254ae2023-10-13 17:40:32 +0100177 _axis = axis;
178
179 _config_id = "softmax_" + lower_string(string_from_data_type(data_type));
180 _config_id += "_" + std::to_string(axis);
181 _config_id += "_" + std::to_string(length);
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100182}
183
Viet-Hoa Do29254ae2023-10-13 17:40:32 +0100184void ClSoftmaxKernel::run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue)
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100185{
186 ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
Viet-Hoa Do29254ae2023-10-13 17:40:32 +0100187 ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window);
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100188
Viet-Hoa Do29254ae2023-10-13 17:40:32 +0100189 const auto src =
190 utils::cast::polymorphic_downcast<const ICLTensor *>(tensors.get_const_tensor(TensorType::ACL_SRC));
191 auto dst = utils::cast::polymorphic_downcast<ICLTensor *>(tensors.get_tensor(TensorType::ACL_DST));
192 ICLTensor *tmp = (_tmp_info.total_size() > 0)
193 ? utils::cast::polymorphic_downcast<ICLTensor *>(tensors.get_tensor(TensorType::ACL_INT_0))
194 : nullptr;
Sang-Hoon Park201e0fe2021-01-27 13:14:56 +0000195
Viet-Hoa Do29254ae2023-10-13 17:40:32 +0100196 if (!_prepared)
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100197 {
Viet-Hoa Do29254ae2023-10-13 17:40:32 +0100198 _prepared = true;
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100199
Viet-Hoa Do29254ae2023-10-13 17:40:32 +0100200 const auto *src_info = src->info();
201 const auto *dst_info = dst->info();
202 auto src_strides = src_info->strides_in_bytes();
203 auto dst_strides = dst_info->strides_in_bytes();
204
205 const auto src_stride_axis = src_strides[_axis];
206 const auto dst_stride_axis = dst_strides[_axis];
207
208 // This axis has been removed from execution window, hence we remove it from the list of strides
209 // provided to the kernel.
210 // In case axis > 0, src/dst_stride_axis will be provided in dedicated argument independent from global ID.
211 src_strides.remove(_axis);
212 dst_strides.remove(_axis);
213
214 // Argument 0: src_ptr.
215 _kernel.setArg<cl_uint>(1, src_strides[0]);
216 _kernel.setArg<cl_uint>(2, src_strides[1]);
217 _kernel.setArg<cl_uint>(3, src_strides[2]);
218 _kernel.setArg<cl_uint>(4, src_info->offset_first_element_in_bytes());
219
220 // Argument 5: dst_ptr.
221 _kernel.setArg<cl_uint>(6, dst_strides[0]);
222 _kernel.setArg<cl_uint>(7, dst_strides[1]);
223 _kernel.setArg<cl_uint>(8, dst_strides[2]);
224 _kernel.setArg<cl_uint>(9, dst_info->offset_first_element_in_bytes());
225
226 if (tmp != nullptr)
227 {
228 const auto *tmp_info = tmp->info();
229 const auto &tmp_strides = tmp_info->strides_in_bytes();
230
231 // Argument 10: tmp_ptr.
232 _kernel.setArg<cl_uint>(11, tmp_strides[1]);
233 _kernel.setArg<cl_uint>(12, tmp_strides[2]);
234 _kernel.setArg<cl_uint>(13, tmp_strides[3]);
235 _kernel.setArg<cl_uint>(14, 0);
236 }
237
238 if (_axis > 0)
239 {
240 _kernel.setArg<cl_uint>(15, src_stride_axis);
241 _kernel.setArg<cl_uint>(16, dst_stride_axis);
242 }
243 }
244
245 _kernel.setArg(0, src->cl_buffer());
246 _kernel.setArg(5, dst->cl_buffer());
247
248 if (tmp != nullptr)
249 {
250 _kernel.setArg(10, tmp->cl_buffer());
251 }
252
253 enqueue(queue, *this, window, lws_hint());
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100254}
Viet-Hoa Do29254ae2023-10-13 17:40:32 +0100255
256const TensorInfo &ClSoftmaxKernel::tmp_tensor_info() const
257{
258 return _tmp_info;
259}
260
Sang-Hoon Park201e0fe2021-01-27 13:14:56 +0000261} // namespace kernels
262} // namespace opencl
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100263} // namespace arm_compute