Anthony Barbier | 6ff3b19 | 2017-09-04 18:44:23 +0100 | [diff] [blame] | 1 | /* |
Matthew Bentham | 314d3e2 | 2023-06-23 10:53:52 +0000 | [diff] [blame] | 2 | * Copyright (c) 2017-2021, 2023 Arm Limited. |
Anthony Barbier | 6ff3b19 | 2017-09-04 18:44:23 +0100 | [diff] [blame] | 3 | * |
| 4 | * SPDX-License-Identifier: MIT |
| 5 | * |
| 6 | * Permission is hereby granted, free of charge, to any person obtaining a copy |
| 7 | * of this software and associated documentation files (the "Software"), to |
| 8 | * deal in the Software without restriction, including without limitation the |
| 9 | * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or |
| 10 | * sell copies of the Software, and to permit persons to whom the Software is |
| 11 | * furnished to do so, subject to the following conditions: |
| 12 | * |
| 13 | * The above copyright notice and this permission notice shall be included in all |
| 14 | * copies or substantial portions of the Software. |
| 15 | * |
| 16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
| 17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
| 18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
| 19 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
| 20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
| 21 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
| 22 | * SOFTWARE. |
| 23 | */ |
Georgios Pinitas | 7891a73 | 2021-08-20 21:39:25 +0100 | [diff] [blame] | 24 | #include "src/gpu/cl/kernels/ClSoftmaxKernel.h" |
Felix Thomasmathibalan | afd38f0 | 2023-09-27 17:46:17 +0100 | [diff] [blame] | 25 | |
Viet-Hoa Do | 29254ae | 2023-10-13 17:40:32 +0100 | [diff] [blame] | 26 | #include "arm_compute/core/CL/CLCompileContext.h" |
| 27 | #include "arm_compute/core/CL/CLHelpers.h" |
Sang-Hoon Park | 201e0fe | 2021-01-27 13:14:56 +0000 | [diff] [blame] | 28 | #include "arm_compute/core/CL/ICLTensor.h" |
Viet-Hoa Do | 29254ae | 2023-10-13 17:40:32 +0100 | [diff] [blame] | 29 | #include "arm_compute/core/CL/OpenCL.h" |
| 30 | #include "arm_compute/core/CoreTypes.h" |
| 31 | #include "arm_compute/core/Dimensions.h" |
| 32 | #include "arm_compute/core/Error.h" |
| 33 | #include "arm_compute/core/Helpers.h" |
| 34 | #include "arm_compute/core/ITensorInfo.h" |
| 35 | #include "arm_compute/core/ITensorPack.h" |
| 36 | #include "arm_compute/core/KernelDescriptors.h" |
| 37 | #include "arm_compute/core/Steps.h" |
| 38 | #include "arm_compute/core/TensorShape.h" |
Felix Thomasmathibalan | afd38f0 | 2023-09-27 17:46:17 +0100 | [diff] [blame] | 39 | #include "arm_compute/core/Utils.h" |
Viet-Hoa Do | 29254ae | 2023-10-13 17:40:32 +0100 | [diff] [blame] | 40 | #include "arm_compute/core/utils/DataTypeUtils.h" |
Matthew Bentham | 314d3e2 | 2023-06-23 10:53:52 +0000 | [diff] [blame] | 41 | #include "arm_compute/core/utils/helpers/AdjustVecSize.h" |
Matthew Bentham | 314d3e2 | 2023-06-23 10:53:52 +0000 | [diff] [blame] | 42 | #include "arm_compute/core/utils/StringUtils.h" |
Viet-Hoa Do | 29254ae | 2023-10-13 17:40:32 +0100 | [diff] [blame] | 43 | #include "arm_compute/core/Validate.h" |
| 44 | #include "arm_compute/core/Window.h" |
Felix Thomasmathibalan | afd38f0 | 2023-09-27 17:46:17 +0100 | [diff] [blame] | 45 | |
Sang-Hoon Park | 68dd25f | 2020-10-19 16:00:11 +0100 | [diff] [blame] | 46 | #include "src/core/helpers/WindowHelpers.h" |
Sang-Hoon Park | 201e0fe | 2021-01-27 13:14:56 +0000 | [diff] [blame] | 47 | #include "support/Cast.h" |
Matthew Bentham | 758b5ba | 2020-03-05 23:37:48 +0000 | [diff] [blame] | 48 | #include "support/StringSupport.h" |
Anthony Barbier | 6ff3b19 | 2017-09-04 18:44:23 +0100 | [diff] [blame] | 49 | |
Viet-Hoa Do | 29254ae | 2023-10-13 17:40:32 +0100 | [diff] [blame] | 50 | #include <string> |
| 51 | |
Giorgio Arena | 2d1a835 | 2020-10-26 15:04:08 +0000 | [diff] [blame] | 52 | namespace arm_compute |
| 53 | { |
Sang-Hoon Park | 201e0fe | 2021-01-27 13:14:56 +0000 | [diff] [blame] | 54 | namespace opencl |
| 55 | { |
| 56 | namespace kernels |
| 57 | { |
Viet-Hoa Do | 29254ae | 2023-10-13 17:40:32 +0100 | [diff] [blame] | 58 | |
| 59 | ClSoftmaxKernel::ClSoftmaxKernel() |
Chunosov | f450caa | 2017-11-08 16:09:35 +0700 | [diff] [blame] | 60 | { |
Chunosov | f450caa | 2017-11-08 16:09:35 +0700 | [diff] [blame] | 61 | } |
Anthony Barbier | 6ff3b19 | 2017-09-04 18:44:23 +0100 | [diff] [blame] | 62 | |
Viet-Hoa Do | 29254ae | 2023-10-13 17:40:32 +0100 | [diff] [blame] | 63 | Status ClSoftmaxKernel::validate(const ITensorInfo &src, const ITensorInfo &dst, const SoftmaxKernelInfo &info) |
Georgios Pinitas | 30902ed | 2017-11-14 15:32:57 +0000 | [diff] [blame] | 64 | { |
Viet-Hoa Do | 29254ae | 2023-10-13 17:40:32 +0100 | [diff] [blame] | 65 | ARM_COMPUTE_UNUSED(src, dst, info); |
Georgios Pinitas | 30902ed | 2017-11-14 15:32:57 +0000 | [diff] [blame] | 66 | |
Viet-Hoa Do | 29254ae | 2023-10-13 17:40:32 +0100 | [diff] [blame] | 67 | ARM_COMPUTE_RETURN_ERROR_ON(src.num_dimensions() > 4); |
Georgios Pinitas | 30902ed | 2017-11-14 15:32:57 +0000 | [diff] [blame] | 68 | |
Viet-Hoa Do | 29254ae | 2023-10-13 17:40:32 +0100 | [diff] [blame] | 69 | ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(&src, &dst); |
| 70 | |
| 71 | ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_NOT_IN( // |
| 72 | &src, DataType::F32, DataType::F16, DataType::QASYMM8, DataType::QASYMM8_SIGNED); |
| 73 | ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(&src, &dst); |
| 74 | |
| 75 | ARM_COMPUTE_RETURN_ERROR_ON(info.input_data_type != src.data_type()); |
| 76 | ARM_COMPUTE_RETURN_ERROR_ON(info.axis < static_cast<int32_t>(-src.num_dimensions()) || |
| 77 | static_cast<int32_t>(src.num_dimensions()) <= info.axis); |
| 78 | |
| 79 | if (is_data_type_quantized_asymmetric(src.data_type())) |
Georgios Pinitas | 30902ed | 2017-11-14 15:32:57 +0000 | [diff] [blame] | 80 | { |
Viet-Hoa Do | 29254ae | 2023-10-13 17:40:32 +0100 | [diff] [blame] | 81 | ARM_COMPUTE_RETURN_ERROR_ON(src.quantization_info().uniform().scale < 0); |
Georgios Pinitas | 30902ed | 2017-11-14 15:32:57 +0000 | [diff] [blame] | 82 | |
Viet-Hoa Do | 29254ae | 2023-10-13 17:40:32 +0100 | [diff] [blame] | 83 | ARM_COMPUTE_RETURN_ERROR_ON(dst.quantization_info() != |
| 84 | get_softmax_output_quantization_info(src.data_type(), info.is_log)); |
Georgios Pinitas | 30902ed | 2017-11-14 15:32:57 +0000 | [diff] [blame] | 85 | } |
| 86 | |
Georgios Pinitas | 631c41a | 2017-12-06 11:53:03 +0000 | [diff] [blame] | 87 | return Status{}; |
Georgios Pinitas | 30902ed | 2017-11-14 15:32:57 +0000 | [diff] [blame] | 88 | } |
| 89 | |
Viet-Hoa Do | 29254ae | 2023-10-13 17:40:32 +0100 | [diff] [blame] | 90 | void ClSoftmaxKernel::configure(const CLCompileContext &compile_context, |
| 91 | const ITensorInfo &src, |
| 92 | ITensorInfo &dst, |
| 93 | const SoftmaxKernelInfo &info) |
Giorgio Arena | b8ab997 | 2017-11-29 15:09:39 +0000 | [diff] [blame] | 94 | { |
Viet-Hoa Do | 29254ae | 2023-10-13 17:40:32 +0100 | [diff] [blame] | 95 | ARM_COMPUTE_UNUSED(compile_context, src, dst, info); |
Giorgio Arena | b8ab997 | 2017-11-29 15:09:39 +0000 | [diff] [blame] | 96 | |
Viet-Hoa Do | 29254ae | 2023-10-13 17:40:32 +0100 | [diff] [blame] | 97 | const auto &dst_shape = dst.tensor_shape(); |
Giorgio Arena | b8ab997 | 2017-11-29 15:09:39 +0000 | [diff] [blame] | 98 | |
Viet-Hoa Do | 29254ae | 2023-10-13 17:40:32 +0100 | [diff] [blame] | 99 | const auto data_type = src.data_type(); |
| 100 | const auto element_size = src.element_size(); |
| 101 | |
| 102 | const auto is_quantized = data_type == DataType::QASYMM8 || data_type == DataType::QASYMM8_SIGNED; |
| 103 | const auto src_qinfo = src.quantization_info().uniform(); |
| 104 | const auto dst_qinfo = dst.quantization_info().uniform(); |
| 105 | |
| 106 | const auto axis = wrap_around(info.axis, static_cast<int32_t>(src.num_dimensions())); |
| 107 | const auto length = dst_shape[axis]; |
| 108 | |
| 109 | const auto tmp_data_type = is_quantized ? DataType::F32 : data_type; |
| 110 | |
| 111 | const auto vec_size = adjust_vec_size(16 / element_size, dst_shape[0]); |
| 112 | const auto vec_size_leftover = dst_shape[0] % vec_size; |
| 113 | |
| 114 | std::string kernel_name("softmax"); |
| 115 | CLBuildOptions build_opts; |
| 116 | |
| 117 | build_opts.add_option("-DDATA_TYPE=" + get_cl_type_from_data_type(data_type)); |
| 118 | build_opts.add_option("-DTMP_DATA_TYPE=" + get_cl_type_from_data_type(tmp_data_type)); |
| 119 | build_opts.add_option("-DVEC_SIZE=" + support::cpp11::to_string(vec_size)); |
| 120 | build_opts.add_option("-DVEC_SIZE_LEFTOVER=" + support::cpp11::to_string(vec_size_leftover)); |
| 121 | build_opts.add_option("-DLENGTH=" + support::cpp11::to_string(length)); |
| 122 | build_opts.add_option_if(info.is_log, "-DIS_LOG"); |
| 123 | build_opts.add_option("-DBETA=" + float_to_string_with_full_precision(info.beta)); |
| 124 | |
| 125 | build_opts.add_option_if(is_quantized, "-DIS_QUANTIZED"); |
| 126 | build_opts.add_option_if(is_quantized, "-DSRC_OFFSET=" + float_to_string_with_full_precision(src_qinfo.offset)); |
| 127 | build_opts.add_option_if(is_quantized, "-DSRC_SCALE=" + float_to_string_with_full_precision(src_qinfo.scale)); |
| 128 | build_opts.add_option_if(is_quantized, "-DDST_OFFSET=" + float_to_string_with_full_precision(dst_qinfo.offset)); |
| 129 | build_opts.add_option_if(is_quantized, "-DDST_SCALE=" + float_to_string_with_full_precision(dst_qinfo.scale)); |
| 130 | |
| 131 | if (axis == 0) |
Giorgio Arena | b8ab997 | 2017-11-29 15:09:39 +0000 | [diff] [blame] | 132 | { |
Viet-Hoa Do | 29254ae | 2023-10-13 17:40:32 +0100 | [diff] [blame] | 133 | kernel_name += "_x"; |
| 134 | build_opts.add_option("-DSOFTMAX_X"); |
| 135 | |
| 136 | if (is_quantized) |
Giorgio Arena | b8ab997 | 2017-11-29 15:09:39 +0000 | [diff] [blame] | 137 | { |
Viet-Hoa Do | 29254ae | 2023-10-13 17:40:32 +0100 | [diff] [blame] | 138 | _tmp_info = TensorInfo(dst_shape, 1, tmp_data_type); |
Giorgio Arena | b8ab997 | 2017-11-29 15:09:39 +0000 | [diff] [blame] | 139 | } |
| 140 | } |
Viet-Hoa Do | 29254ae | 2023-10-13 17:40:32 +0100 | [diff] [blame] | 141 | else |
| 142 | { |
| 143 | kernel_name += "_non_x"; |
| 144 | build_opts.add_option("-DSOFTMAX_NON_X"); |
Giorgio Arena | b8ab997 | 2017-11-29 15:09:39 +0000 | [diff] [blame] | 145 | |
Viet-Hoa Do | 29254ae | 2023-10-13 17:40:32 +0100 | [diff] [blame] | 146 | TensorShape tmp_shape; |
Giorgio Arena | b8ab997 | 2017-11-29 15:09:39 +0000 | [diff] [blame] | 147 | |
Viet-Hoa Do | 29254ae | 2023-10-13 17:40:32 +0100 | [diff] [blame] | 148 | tmp_shape.set(0, length * vec_size, false); |
| 149 | tmp_shape.set(1, dst_shape[0] + (vec_size - vec_size_leftover) % vec_size, false); |
Chunosov | d6afedc | 2017-11-06 22:09:45 +0700 | [diff] [blame] | 150 | |
Viet-Hoa Do | 29254ae | 2023-10-13 17:40:32 +0100 | [diff] [blame] | 151 | for (size_t i = 2; i <= static_cast<size_t>(axis); ++i) |
| 152 | { |
| 153 | tmp_shape.set(i, dst_shape[i - 1], false); |
| 154 | } |
Giorgio Arena | 4a95bba | 2021-06-28 11:00:27 +0100 | [diff] [blame] | 155 | |
Viet-Hoa Do | 29254ae | 2023-10-13 17:40:32 +0100 | [diff] [blame] | 156 | for (size_t i = axis + 1; i < dst_shape.num_dimensions(); ++i) |
| 157 | { |
| 158 | tmp_shape.set(i, dst_shape[i], false); |
| 159 | } |
Giorgio Arena | 2d1a835 | 2020-10-26 15:04:08 +0000 | [diff] [blame] | 160 | |
Viet-Hoa Do | 29254ae | 2023-10-13 17:40:32 +0100 | [diff] [blame] | 161 | _tmp_info = TensorInfo(tmp_shape, 1, tmp_data_type); |
| 162 | } |
Chunosov | d6afedc | 2017-11-06 22:09:45 +0700 | [diff] [blame] | 163 | |
Manuel Bottini | 4c6bd51 | 2020-04-08 10:15:51 +0100 | [diff] [blame] | 164 | _kernel = create_kernel(compile_context, kernel_name, build_opts.options()); |
Chunosov | d6afedc | 2017-11-06 22:09:45 +0700 | [diff] [blame] | 165 | |
Viet-Hoa Do | 29254ae | 2023-10-13 17:40:32 +0100 | [diff] [blame] | 166 | // Configure kernel window and kernel arguments. |
| 167 | Window win = calculate_max_window(src, Steps(vec_size)); |
Giorgio Arena | 2d1a835 | 2020-10-26 15:04:08 +0000 | [diff] [blame] | 168 | |
Viet-Hoa Do | 29254ae | 2023-10-13 17:40:32 +0100 | [diff] [blame] | 169 | bool has_collapsed = true; |
Chunosov | d6afedc | 2017-11-06 22:09:45 +0700 | [diff] [blame] | 170 | |
Viet-Hoa Do | 29254ae | 2023-10-13 17:40:32 +0100 | [diff] [blame] | 171 | win = win.shift_dimensions(1, axis); // Remove this axis from the window/GWS. |
| 172 | win = win.collapse_if_possible(win, 2, has_collapsed); |
| 173 | ARM_COMPUTE_ERROR_ON(!has_collapsed); |
Georgios Pinitas | 30902ed | 2017-11-14 15:32:57 +0000 | [diff] [blame] | 174 | |
Giorgio Arena | 2d1a835 | 2020-10-26 15:04:08 +0000 | [diff] [blame] | 175 | ICLKernel::configure_internal(win); |
| 176 | |
Viet-Hoa Do | 29254ae | 2023-10-13 17:40:32 +0100 | [diff] [blame] | 177 | _axis = axis; |
| 178 | |
| 179 | _config_id = "softmax_" + lower_string(string_from_data_type(data_type)); |
| 180 | _config_id += "_" + std::to_string(axis); |
| 181 | _config_id += "_" + std::to_string(length); |
Anthony Barbier | 6ff3b19 | 2017-09-04 18:44:23 +0100 | [diff] [blame] | 182 | } |
| 183 | |
Viet-Hoa Do | 29254ae | 2023-10-13 17:40:32 +0100 | [diff] [blame] | 184 | void ClSoftmaxKernel::run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue) |
Anthony Barbier | 6ff3b19 | 2017-09-04 18:44:23 +0100 | [diff] [blame] | 185 | { |
| 186 | ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); |
Viet-Hoa Do | 29254ae | 2023-10-13 17:40:32 +0100 | [diff] [blame] | 187 | ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window); |
Anthony Barbier | 6ff3b19 | 2017-09-04 18:44:23 +0100 | [diff] [blame] | 188 | |
Viet-Hoa Do | 29254ae | 2023-10-13 17:40:32 +0100 | [diff] [blame] | 189 | const auto src = |
| 190 | utils::cast::polymorphic_downcast<const ICLTensor *>(tensors.get_const_tensor(TensorType::ACL_SRC)); |
| 191 | auto dst = utils::cast::polymorphic_downcast<ICLTensor *>(tensors.get_tensor(TensorType::ACL_DST)); |
| 192 | ICLTensor *tmp = (_tmp_info.total_size() > 0) |
| 193 | ? utils::cast::polymorphic_downcast<ICLTensor *>(tensors.get_tensor(TensorType::ACL_INT_0)) |
| 194 | : nullptr; |
Sang-Hoon Park | 201e0fe | 2021-01-27 13:14:56 +0000 | [diff] [blame] | 195 | |
Viet-Hoa Do | 29254ae | 2023-10-13 17:40:32 +0100 | [diff] [blame] | 196 | if (!_prepared) |
Anthony Barbier | 6ff3b19 | 2017-09-04 18:44:23 +0100 | [diff] [blame] | 197 | { |
Viet-Hoa Do | 29254ae | 2023-10-13 17:40:32 +0100 | [diff] [blame] | 198 | _prepared = true; |
Anthony Barbier | 6ff3b19 | 2017-09-04 18:44:23 +0100 | [diff] [blame] | 199 | |
Viet-Hoa Do | 29254ae | 2023-10-13 17:40:32 +0100 | [diff] [blame] | 200 | const auto *src_info = src->info(); |
| 201 | const auto *dst_info = dst->info(); |
| 202 | auto src_strides = src_info->strides_in_bytes(); |
| 203 | auto dst_strides = dst_info->strides_in_bytes(); |
| 204 | |
| 205 | const auto src_stride_axis = src_strides[_axis]; |
| 206 | const auto dst_stride_axis = dst_strides[_axis]; |
| 207 | |
| 208 | // This axis has been removed from execution window, hence we remove it from the list of strides |
| 209 | // provided to the kernel. |
| 210 | // In case axis > 0, src/dst_stride_axis will be provided in dedicated argument independent from global ID. |
| 211 | src_strides.remove(_axis); |
| 212 | dst_strides.remove(_axis); |
| 213 | |
| 214 | // Argument 0: src_ptr. |
| 215 | _kernel.setArg<cl_uint>(1, src_strides[0]); |
| 216 | _kernel.setArg<cl_uint>(2, src_strides[1]); |
| 217 | _kernel.setArg<cl_uint>(3, src_strides[2]); |
| 218 | _kernel.setArg<cl_uint>(4, src_info->offset_first_element_in_bytes()); |
| 219 | |
| 220 | // Argument 5: dst_ptr. |
| 221 | _kernel.setArg<cl_uint>(6, dst_strides[0]); |
| 222 | _kernel.setArg<cl_uint>(7, dst_strides[1]); |
| 223 | _kernel.setArg<cl_uint>(8, dst_strides[2]); |
| 224 | _kernel.setArg<cl_uint>(9, dst_info->offset_first_element_in_bytes()); |
| 225 | |
| 226 | if (tmp != nullptr) |
| 227 | { |
| 228 | const auto *tmp_info = tmp->info(); |
| 229 | const auto &tmp_strides = tmp_info->strides_in_bytes(); |
| 230 | |
| 231 | // Argument 10: tmp_ptr. |
| 232 | _kernel.setArg<cl_uint>(11, tmp_strides[1]); |
| 233 | _kernel.setArg<cl_uint>(12, tmp_strides[2]); |
| 234 | _kernel.setArg<cl_uint>(13, tmp_strides[3]); |
| 235 | _kernel.setArg<cl_uint>(14, 0); |
| 236 | } |
| 237 | |
| 238 | if (_axis > 0) |
| 239 | { |
| 240 | _kernel.setArg<cl_uint>(15, src_stride_axis); |
| 241 | _kernel.setArg<cl_uint>(16, dst_stride_axis); |
| 242 | } |
| 243 | } |
| 244 | |
| 245 | _kernel.setArg(0, src->cl_buffer()); |
| 246 | _kernel.setArg(5, dst->cl_buffer()); |
| 247 | |
| 248 | if (tmp != nullptr) |
| 249 | { |
| 250 | _kernel.setArg(10, tmp->cl_buffer()); |
| 251 | } |
| 252 | |
| 253 | enqueue(queue, *this, window, lws_hint()); |
Anthony Barbier | 6ff3b19 | 2017-09-04 18:44:23 +0100 | [diff] [blame] | 254 | } |
Viet-Hoa Do | 29254ae | 2023-10-13 17:40:32 +0100 | [diff] [blame] | 255 | |
| 256 | const TensorInfo &ClSoftmaxKernel::tmp_tensor_info() const |
| 257 | { |
| 258 | return _tmp_info; |
| 259 | } |
| 260 | |
Sang-Hoon Park | 201e0fe | 2021-01-27 13:14:56 +0000 | [diff] [blame] | 261 | } // namespace kernels |
| 262 | } // namespace opencl |
Felix Thomasmathibalan | afd38f0 | 2023-09-27 17:46:17 +0100 | [diff] [blame] | 263 | } // namespace arm_compute |