blob: 2bec400597f8d4410fc9ea552e3dac59dbacff77 [file] [log] [blame]
Sang-Hoon Park201e0fe2021-01-27 13:14:56 +00001/*
2 * Copyright (c) 2021 Arm Limited.
3 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
Georgios Pinitas7891a732021-08-20 21:39:25 +010024#include "src/gpu/cl/operators/ClSoftmax.h"
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +010025
Sang-Hoon Park201e0fe2021-01-27 13:14:56 +000026#include "arm_compute/core/utils/misc/ShapeCalculator.h"
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +010027
28#include "src/common/utils/Log.h"
Manuel Bottini94f799e2021-06-09 16:37:32 +010029#include "src/core/helpers/MemoryHelpers.h"
Sang-Hoon Park201e0fe2021-01-27 13:14:56 +000030#include "src/core/helpers/SoftmaxHelpers.h"
Georgios Pinitas7891a732021-08-20 21:39:25 +010031#include "src/gpu/cl/kernels/ClSoftmaxKernel.h"
32#include "src/gpu/cl/operators/ClPermute.h"
33#include "src/gpu/cl/utils/ClAuxTensorHandler.h"
Sang-Hoon Park201e0fe2021-01-27 13:14:56 +000034#include "support/Cast.h"
35
Manuel Bottini94f799e2021-06-09 16:37:32 +010036using namespace arm_compute::experimental;
37
Sang-Hoon Park201e0fe2021-01-27 13:14:56 +000038namespace arm_compute
39{
40namespace opencl
41{
Sang-Hoon Park201e0fe2021-01-27 13:14:56 +000042ClSoftmax::ClSoftmax()
43 : _permute_input(std::make_unique<ClPermute>()),
44 _permute_output(std::make_unique<ClPermute>()),
45 _max_shift_exp_sum_kernel(std::make_unique<kernels::ClLogits1DMaxShiftExpSumKernel>()),
46 _norm_kernel(std::make_unique<kernels::ClLogits1DNormKernel>()),
Manuel Bottini94f799e2021-06-09 16:37:32 +010047 _max_info(),
48 _sum_info(),
49 _tmp_info(),
50 _permuted_src_info(),
51 _permuted_dst_info(),
52 _aux_mem(InternalTensorIdx::COUNT)
Sang-Hoon Park201e0fe2021-01-27 13:14:56 +000053{
54}
55
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +010056void ClSoftmax::configure(const CLCompileContext &compile_context,
57 const ITensorInfo &src,
58 ITensorInfo &dst,
59 const SoftmaxKernelInfo &info)
Sang-Hoon Park201e0fe2021-01-27 13:14:56 +000060{
61 ARM_COMPUTE_ERROR_THROW_ON(validate(src, dst, info));
ramelg012e53f172021-09-22 10:48:25 +010062 ARM_COMPUTE_LOG_PARAMS(src, dst, info);
Sang-Hoon Park201e0fe2021-01-27 13:14:56 +000063
64 const size_t actual_axis = static_cast<size_t>(wrap_around(info.axis, static_cast<int32_t>(src.num_dimensions())));
65
66 _needs_permute = actual_axis != 0;
67
68 const ITensorInfo &tmp_input_info = _needs_permute ? _permuted_src_info : src;
69 ITensorInfo &tmp_output_info = _needs_permute ? _permuted_dst_info : dst;
70
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +010071 if (_needs_permute)
Sang-Hoon Park201e0fe2021-01-27 13:14:56 +000072 {
73 const auto perm_info = softmax_helpers::get_permutation_vector_from_softmax_axis(actual_axis);
74 _permute_input->configure(compile_context, &src, &_permuted_src_info, perm_info);
75 }
76
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +010077 DataType tmp_data_type =
78 is_data_type_quantized_asymmetric(tmp_input_info.data_type()) ? DataType::S32 : tmp_input_info.data_type();
79 _tmp_info = tmp_input_info.clone()->set_data_type(tmp_data_type);
Sang-Hoon Park201e0fe2021-01-27 13:14:56 +000080
81 TensorShape max_sum_shape = tmp_input_info.tensor_shape();
82 _max_info = tmp_input_info.clone()->set_tensor_shape(max_sum_shape);
83 _sum_info = tmp_input_info.clone()->set_tensor_shape(max_sum_shape).set_data_type(tmp_data_type);
84
85 // Set GPU target to kernels
86 _max_shift_exp_sum_kernel->set_target(CLScheduler::get().target());
87
88 _max_shift_exp_sum_kernel->configure(compile_context, tmp_input_info, _max_info, _tmp_info, _sum_info, info);
89 _norm_kernel->configure(compile_context, _tmp_info, _sum_info, tmp_output_info, info);
90
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +010091 if (_needs_permute)
Sang-Hoon Park201e0fe2021-01-27 13:14:56 +000092 {
93 const auto perm_info = softmax_helpers::get_permutation_vector_from_softmax_axis(actual_axis);
94 _permute_output->configure(compile_context, &_permuted_dst_info, &dst, perm_info);
95 }
Manuel Bottini94f799e2021-06-09 16:37:32 +010096
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +010097 _aux_mem[InternalTensorIdx::SUM] =
98 MemoryInfo(offset_int_vec(InternalTensorIdx::SUM), MemoryLifetime::Temporary, _sum_info.total_size());
99 _aux_mem[InternalTensorIdx::TMP] =
100 MemoryInfo(offset_int_vec(InternalTensorIdx::TMP), MemoryLifetime::Temporary, _tmp_info.total_size());
101 _aux_mem[InternalTensorIdx::MAX] =
102 MemoryInfo(offset_int_vec(InternalTensorIdx::MAX), MemoryLifetime::Temporary, _max_info.total_size());
Manuel Bottini94f799e2021-06-09 16:37:32 +0100103
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100104 _aux_mem[InternalTensorIdx::PERMUTED_SRC] = MemoryInfo(offset_int_vec(InternalTensorIdx::PERMUTED_SRC),
105 MemoryLifetime::Temporary, _permuted_src_info.total_size());
106 _aux_mem[InternalTensorIdx::PERMUTED_DST] = MemoryInfo(offset_int_vec(InternalTensorIdx::PERMUTED_DST),
107 MemoryLifetime::Temporary, _permuted_dst_info.total_size());
Sang-Hoon Park201e0fe2021-01-27 13:14:56 +0000108}
109
110Status ClSoftmax::validate(const ITensorInfo &src, const ITensorInfo &dst, const SoftmaxKernelInfo &info)
111{
112 ARM_COMPUTE_RETURN_ERROR_ON_MSG(src.num_dimensions() > 4, "Only up to 4 dimensions are supported");
113 ARM_COMPUTE_UNUSED(info.beta);
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100114 ARM_COMPUTE_RETURN_ERROR_ON(info.axis < static_cast<int32_t>(-src.num_dimensions()) ||
115 static_cast<int32_t>(src.num_dimensions()) <= info.axis);
Sang-Hoon Park201e0fe2021-01-27 13:14:56 +0000116
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100117 const size_t actual_axis = static_cast<size_t>(wrap_around(info.axis, static_cast<int32_t>(src.num_dimensions())));
Sang-Hoon Park201e0fe2021-01-27 13:14:56 +0000118 const bool needs_permute = actual_axis != 0;
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100119 if (needs_permute)
Sang-Hoon Park201e0fe2021-01-27 13:14:56 +0000120 {
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100121 const PermutationVector permutation_vector =
122 softmax_helpers::get_permutation_vector_from_softmax_axis(actual_axis);
123 const TensorShape permuted_shape =
124 misc::shape_calculator::compute_permutation_output_shape(src, permutation_vector);
125 TensorInfo input_permuted(src.clone()->set_tensor_shape(permuted_shape));
Sang-Hoon Park201e0fe2021-01-27 13:14:56 +0000126 ARM_COMPUTE_RETURN_ON_ERROR(ClPermute::validate(&src, &input_permuted, permutation_vector));
127 TensorInfo output_permuted(dst.clone()->set_tensor_shape(permuted_shape));
128 ARM_COMPUTE_RETURN_ON_ERROR(ClPermute::validate(&output_permuted, &dst, permutation_vector));
129 }
130
131 // Create intermediate tensor info
132 DataType tmp_data_type = is_data_type_quantized_asymmetric(src.data_type()) ? DataType::S32 : src.data_type();
133 TensorInfo tensor_info_tmp(src.clone()->set_data_type(tmp_data_type).set_is_resizable(true));
134
135 TensorShape max_sum_shape = src.tensor_shape();
136 max_sum_shape.set(0, 1);
137 TensorInfo tensor_info_max(src.clone()->set_tensor_shape(max_sum_shape).set_is_resizable(true));
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100138 TensorInfo tensor_info_sum(src.clone()
139 ->set_tensor_shape(max_sum_shape)
140 .set_data_type(tmp_data_type)
141 .set_quantization_info(QuantizationInfo())
142 .set_is_resizable(true));
Sang-Hoon Park201e0fe2021-01-27 13:14:56 +0000143
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100144 ARM_COMPUTE_RETURN_ON_ERROR(
145 kernels::ClLogits1DMaxShiftExpSumKernel::validate(src, tensor_info_max, tensor_info_tmp, tensor_info_sum));
Sang-Hoon Park201e0fe2021-01-27 13:14:56 +0000146 ARM_COMPUTE_RETURN_ON_ERROR(kernels::ClLogits1DNormKernel::validate(tensor_info_tmp, tensor_info_sum, dst, info));
147
148 return Status{};
149}
150
Sang-Hoon Park201e0fe2021-01-27 13:14:56 +0000151void ClSoftmax::run(ITensorPack &tensors)
152{
Sang-Hoon Park201e0fe2021-01-27 13:14:56 +0000153 auto src = tensors.get_const_tensor(TensorType::ACL_SRC);
154 auto dst = tensors.get_tensor(TensorType::ACL_DST);
155
Manuel Bottini94f799e2021-06-09 16:37:32 +0100156 CLAuxTensorHandler sum(offset_int_vec(InternalTensorIdx::SUM), _sum_info, tensors, false);
157 CLAuxTensorHandler tmp(offset_int_vec(InternalTensorIdx::TMP), _tmp_info, tensors, false);
158 CLAuxTensorHandler max(offset_int_vec(InternalTensorIdx::MAX), _max_info, tensors, false);
159
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100160 CLAuxTensorHandler permuted_src(offset_int_vec(InternalTensorIdx::PERMUTED_SRC), _permuted_src_info, tensors,
161 false);
162 CLAuxTensorHandler permuted_dst(offset_int_vec(InternalTensorIdx::PERMUTED_DST), _permuted_dst_info, tensors,
163 false);
Manuel Bottini94f799e2021-06-09 16:37:32 +0100164
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100165 if (_needs_permute)
Manuel Bottini94f799e2021-06-09 16:37:32 +0100166 {
167 ITensorPack pack;
168 pack.add_const_tensor(TensorType::ACL_SRC, src);
169 pack.add_tensor(TensorType::ACL_DST, permuted_src.get());
170 _permute_input.get()->run(pack);
171 }
172
173 ITensorPack sum_pack;
174 ITensorPack norm_pack;
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100175 if (_needs_permute)
Manuel Bottini94f799e2021-06-09 16:37:32 +0100176 {
177 sum_pack.add_const_tensor(TensorType::ACL_SRC, permuted_src.get());
178 norm_pack.add_tensor(TensorType::ACL_DST, permuted_dst.get());
179 }
180 else
181 {
182 sum_pack.add_const_tensor(TensorType::ACL_SRC, src);
183 norm_pack.add_tensor(TensorType::ACL_DST, dst);
184 }
185 sum_pack.add_tensor(TensorType::ACL_DST, tmp.get());
186 sum_pack.add_tensor(TensorType::ACL_INT_0, max.get());
187 sum_pack.add_tensor(TensorType::ACL_INT_1, sum.get());
188
189 norm_pack.add_const_tensor(TensorType::ACL_SRC, tmp.get());
190 norm_pack.add_tensor(TensorType::ACL_INT_0, sum.get());
191
192 CLScheduler::get().enqueue_op(*_max_shift_exp_sum_kernel.get(), sum_pack, false);
193 CLScheduler::get().enqueue_op(*_norm_kernel.get(), norm_pack, false);
194
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100195 if (_needs_permute)
Manuel Bottini94f799e2021-06-09 16:37:32 +0100196 {
197 ITensorPack pack;
198 pack.add_const_tensor(TensorType::ACL_SRC, permuted_dst.get());
199 pack.add_tensor(TensorType::ACL_DST, dst);
200 _permute_output.get()->run(pack);
201 }
Sang-Hoon Park201e0fe2021-01-27 13:14:56 +0000202}
203
204experimental::MemoryRequirements ClSoftmax::workspace() const
205{
Manuel Bottini94f799e2021-06-09 16:37:32 +0100206 return _aux_mem;
Sang-Hoon Park201e0fe2021-01-27 13:14:56 +0000207}
208} // namespace opencl
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100209} // namespace arm_compute