blob: e38b7c595aaf39aa44d366b9a301105f4b0d245b [file] [log] [blame]
Sang-Hoon Park201e0fe2021-01-27 13:14:56 +00001/*
2 * Copyright (c) 2021 Arm Limited.
3 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24#ifndef ARM_COMPUTE_CL_SOFTMAX_H
25#define ARM_COMPUTE_CL_SOFTMAX_H
26
27#include "arm_compute/runtime/CL/CLTensor.h"
28#include "src/core/gpu/cl/ClCompileContext.h"
29#include "src/runtime/gpu/cl/IClOperator.h"
30
31namespace arm_compute
32{
33struct SoftmaxKernelInfo;
34
35namespace opencl
36{
37class ClPermute;
38namespace kernels
39{
40class ClLogits1DMaxShiftExpSumKernel;
41class ClLogits1DNormKernel;
42} // namespace kernels
43class ClSoftmax : public IClOperator
44{
45public:
46 /** Constructor */
47 ClSoftmax();
48 /** Configure the operator
49 *
50 * @param[in] compile_context The compile context to be used.
51 * @param[in] src Source tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32 for Softmax and F16/F32 for Log Softmax
52 * @param[out] dst Destination tensor info. Data types supported: same as @p src
53 * @param[in] info Contains information consumed by kernels for softmax described in @ref SoftmaxKernelInfo.
54 *
55 */
56 void configure(const CLCompileContext &compile_context, const ITensorInfo &src, ITensorInfo &dst, const SoftmaxKernelInfo &info);
57 /** Static function to check if the given info will lead to a valid configuration
58 *
59 * @param[in] src Source tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32 for Softmax and F16/F32 for Log Softmax
60 * @param[out] dst Destination tensor info. Data types supported: same as @p src
61 * @param[in] info Contains information consumed by kernels for softmax described in @ref SoftmaxKernelInfo.
62 *
63 */
64 static Status validate(const ITensorInfo &src, const ITensorInfo &dst, const SoftmaxKernelInfo &info);
65 // Inherited methods overridden:
66 void run(ITensorPack &tensors) override;
67 experimental::MemoryRequirements workspace() const override;
68
69private:
70 enum class InternalTensorIdx
71 {
72 MAX = 0,
73 SUM,
74 TMP,
75 PERMUTED_SRC,
76 PERMUTED_DST,
77 COUNT
78 };
79
80 /** Create a single internal tensor
81 *
82 * @param[in] info The information used to create a tensor
83 * @param[in] idx The index within the internal array the created tensor will be held
84 */
85 void create_internal_tensor(TensorInfo &info, InternalTensorIdx idx);
86 /** Create all required internal tensors */
87 void create_internal_tensor();
88 /** Function to convert from internal tensor index to @ref TensorType used externally */
89 TensorType convert_internal_idx_to_tensor_type(InternalTensorIdx idx) const;
90 /** Function to import workspace memory allocated by the caller into internal tensor instances */
91 void import_workspace_memory(ITensorPack &tensors);
92 /** Function to permute the given source tensor when permutation is required */
93 void run_source_permute(const ITensor *src);
94 /** Function to permute the intemediate tensor to the final destination tensor when permutation is required */
95 void run_destination_permute(ITensor *dst);
96 /** Function to run @ref arm_compute::opencl::kernels::ClLogits1DMaxShiftExpSumKernel */
97 void run_max_sum(const ITensor *src);
98 /** Function to run @ref kernels::ClLogits1DNormKernel */
99 void run_norm(ITensor *dst);
100
101 std::unique_ptr<ClPermute> _permute_input;
102 std::unique_ptr<ClPermute> _permute_output;
103 std::unique_ptr<kernels::ClLogits1DMaxShiftExpSumKernel> _max_shift_exp_sum_kernel;
104 std::unique_ptr<kernels::ClLogits1DNormKernel> _norm_kernel;
105 bool _needs_permute{ false };
106
107 std::array<TensorInfo, static_cast<uint32_t>(InternalTensorIdx::COUNT)> _internal_info{};
108 std::array<std::unique_ptr<CLTensor>, static_cast<uint32_t>(InternalTensorIdx::COUNT)> _internal_tensor{};
109
110 TensorInfo &_max_info;
111 TensorInfo &_sum_info;
112 TensorInfo &_tmp_info;
113 TensorInfo &_permuted_src_info;
114 TensorInfo &_permuted_dst_info;
115};
116
117} // opencl
118} // arm_compute
119#endif /* ARM_COMPUTE_CL_SOFTMAX_H */