blob: 47f95b5c4092dcdb23db7f43e6764ce41ede109d [file] [log] [blame]
Giorgio Arena232c4522022-03-03 10:09:01 +00001/*
2 * Copyright (c) 2022 Arm Limited.
3 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
SiCong Lib63b1192022-01-28 18:24:39 +000024#ifndef ENABLE_EXPERIMENTAL_DYNAMIC_FUSION
25#error "This experimental feature must be enabled with -DENABLE_EXPERIMENTAL_DYNAMIC_FUSION"
26#endif /* ENABLE_EXPERIMENTAL_DYNAMIC_FUSION */
Giorgio Arena232c4522022-03-03 10:09:01 +000027
28#include "src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClElementwiseAddKernelComponent.h"
Gunes Bayir8a879832022-03-10 21:21:01 +000029#include "arm_compute/core/Validate.h"
30#include "src/core/helpers/AutoConfiguration.h"
31#include "src/core/helpers/WindowHelpers.h"
Giorgio Arena232c4522022-03-03 10:09:01 +000032
33namespace arm_compute
34{
35namespace experimental
36{
37namespace dynamic_fusion
38{
39ComponentType ClElementwiseAddKernelComponent::get_component_type() const
40{
41 return ComponentType::Simple;
42}
43
44std::set<std::string> ClElementwiseAddKernelComponent::get_headers_list() const
45{
SiCong Lib63b1192022-01-28 18:24:39 +000046 return std::set<std::string> { "common/experimental/gemm_fused_post_ops/fp_mixed_precision_helpers.h", "tile_helpers.h" };
Giorgio Arena232c4522022-03-03 10:09:01 +000047}
48
Gunes Bayir8a879832022-03-10 21:21:01 +000049Window ClElementwiseAddKernelComponent::get_window() const
50{
51 const ITensorInfo *lhs_info = _blueprint->impl().get_kernel_argument_info(_lhs.arg_id);
52 const ITensorInfo *rhs_info = _blueprint->impl().get_kernel_argument_info(_rhs.arg_id);
53 ITensorInfo *dst_info = _blueprint->impl().get_kernel_argument_info(_blueprint->impl().get_dst_id());
54
55 ARM_COMPUTE_ERROR_ON_NULLPTR(lhs_info, rhs_info, dst_info);
56
57 const std::pair<TensorShape, ValidRegion> broadcast_pair = ITensorInfo::broadcast_shape_and_valid_region(*lhs_info, *rhs_info);
58 const TensorShape &out_shape = broadcast_pair.first;
59
60 auto_init_if_empty(*dst_info, out_shape, 1, lhs_info->data_type());
61
62 const unsigned int vector_size_byte_opencl = 16;
63 const unsigned int num_elems_processed_per_iteration = adjust_vec_size(vector_size_byte_opencl / dst_info->element_size(), dst_info->dimension(0));
64 Window win = calculate_max_window(*dst_info, Steps(num_elems_processed_per_iteration));
65
66 return win;
67}
68
Giorgio Arena232c4522022-03-03 10:09:01 +000069std::string ClElementwiseAddKernelComponent::get_component_code() const
70{
71 std::string code;
SiCong Lib63b1192022-01-28 18:24:39 +000072 const bool is_root = _blueprint->impl().group(_lhs.arg_id) == SharedVarGroup::Argument && _blueprint->impl().group(_rhs.arg_id) == SharedVarGroup::Argument;
Giorgio Arena232c4522022-03-03 10:09:01 +000073
SiCong Lib63b1192022-01-28 18:24:39 +000074 if(is_root)
Giorgio Arena232c4522022-03-03 10:09:01 +000075 {
SiCong Lib63b1192022-01-28 18:24:39 +000076 return R"_(
77 //------------------ START KERNEL {{meta_kernel_id}} ELTWISE_ADD ---------------------
78 // IN_0(LHS) {{lhs}}
79 // IN_1(RHS) {{rhs}}
80 // OUT(dst, accum) {{dst}}
81
82 // dst = lhs + rhs (mix-precision, broadcast, boundary aware)
83 TILE({{DATA_TYPE}}, M0, N0, {{dst}});
84 {
85 TILE({{DATA_TYPE}}, M0, N0, lhs_tile);
86 TILE({{DATA_TYPE}}, M0, N0, rhs_tile);
87
88 T_LOAD({{DATA_TYPE}}, M0, N0, BUFFER, {{lhs}}, cout, mout, 1, {{lhs}}_stride_y, lhs_tile);
89 T_LOAD({{DATA_TYPE}}, M0, N0, BUFFER, {{rhs}}, cout, mout, 1, {{rhs}}_stride_y, rhs_tile);
90
Michalis Spyrou06adbc52022-05-06 17:06:21 +010091#if defined(IS_BROADCAST)
SiCong Lib63b1192022-01-28 18:24:39 +000092 T_ADD_BROADCAST_X({{DATA_TYPE}}, M0, N0, lhs_tile, rhs_tile, {{dst}});
Michalis Spyrou06adbc52022-05-06 17:06:21 +010093#else // !defined(IS_BROADCAST)
94 T_ADD({{DATA_TYPE}}, M0, N0, lhs_tile, rhs_tile, {{dst}});
95#endif // defined(IS_BROADCAST)
96
Giorgio Arena232c4522022-03-03 10:09:01 +000097 }
98 //------------------ END KERNEL {{meta_kernel_id}} ELTWISE_ADD ---------------------
Giorgio Arena232c4522022-03-03 10:09:01 +000099)_";
SiCong Lib63b1192022-01-28 18:24:39 +0000100 }
101 else
102 {
103 return R"_(
104 //------------------ START KERNEL {{meta_kernel_id}} ELTWISE_ADD ---------------------
105 // IN_0/Out(Accumulator) {{acc}}
106 // IN_1(Addend) {{addend}}
107
108 // acc = addend + acc (mix-precision, broadcast, boundary aware)
109 {
110 TILE({{DATA_TYPE}}, M0, N0, addend_tile);
111
112 T_LOAD({{DATA_TYPE}}, M0, N0, BUFFER, {{addend}}, cout, mout, 1, {{addend}}_stride_y, addend_tile);
113
Michalis Spyrou06adbc52022-05-06 17:06:21 +0100114#if defined(IS_BROADCAST)
SiCong Lib63b1192022-01-28 18:24:39 +0000115 T_ADD_BROADCAST_X({{DATA_TYPE}}, M0, N0, {{acc}}, addend_tile, {{acc}});
Michalis Spyrou06adbc52022-05-06 17:06:21 +0100116#else // !defined(IS_BROADCAST)
117 T_ADD({{DATA_TYPE}}, M0, N0, {{acc}}, addend_tile, {{acc}});
118#endif // defined(IS_BROADCAST)
SiCong Lib63b1192022-01-28 18:24:39 +0000119 }
120 //------------------ END KERNEL {{meta_kernel_id}} ELTWISE_ADD ---------------------
121)_";
122 }
Giorgio Arena232c4522022-03-03 10:09:01 +0000123}
Giorgio Arenabd44caa2022-03-15 13:45:15 +0000124
125CLBuildOptions ClElementwiseAddKernelComponent::generate_build_options() const
126{
Michalis Spyrou06adbc52022-05-06 17:06:21 +0100127 const auto t_src_info = _blueprint->impl().get_kernel_argument_info(_rhs.arg_id);
SiCong Lib63b1192022-01-28 18:24:39 +0000128 const auto t_dst_info = _blueprint->impl().get_kernel_argument_info(_blueprint->impl().get_dst_id());
Giorgio Arenabd44caa2022-03-15 13:45:15 +0000129
130 CLBuildOptions build_opts{};
Michalis Spyrou06adbc52022-05-06 17:06:21 +0100131 const auto n0 = _blueprint->impl().get_execution_window().x().step();
132 const auto m0 = _blueprint->impl().get_execution_window().y().step();
133 const bool is_broadcast = t_src_info->tensor_shape() != t_dst_info->tensor_shape();
Giorgio Arenabd44caa2022-03-15 13:45:15 +0000134
SiCong Lib63b1192022-01-28 18:24:39 +0000135 build_opts.add_option("-DM0=" + support::cpp11::to_string(m0));
136 build_opts.add_option("-DN0=" + support::cpp11::to_string(n0));
Michalis Spyrou06adbc52022-05-06 17:06:21 +0100137 build_opts.add_option_if(is_broadcast, "-DIS_BROADCAST");
Giorgio Arenabd44caa2022-03-15 13:45:15 +0000138
139 return build_opts;
140}
141
SiCong Li0a486cf2022-04-07 17:41:51 +0100142std::string ClElementwiseAddKernelComponent::generate_config_id() const
143{
144 auto t_dst_info = _blueprint->impl().get_kernel_argument_info(_blueprint->impl().get_dst_id());
145 std::string config_id{};
146 config_id += lower_string(string_from_data_type(t_dst_info->data_type()));
147 config_id += "_";
148 config_id += support::cpp11::to_string(t_dst_info->dimension(0));
149 config_id += "_";
150 config_id += support::cpp11::to_string(t_dst_info->dimension(1));
151 config_id += "_";
152 config_id += lower_string(string_from_data_layout(t_dst_info->data_layout()));
153 return config_id;
154}
155
SiCong Lib63b1192022-01-28 18:24:39 +0000156void ClElementwiseAddKernelComponent::allocate_shared_vars(SharedVarTable &vtable) const
Giorgio Arena232c4522022-03-03 10:09:01 +0000157{
SiCong Lib63b1192022-01-28 18:24:39 +0000158 const bool is_root = _blueprint->impl().group(_lhs.arg_id) == SharedVarGroup::Argument && _blueprint->impl().group(_rhs.arg_id) == SharedVarGroup::Argument;
159 vtable.add(_lhs, _blueprint->impl().group(_lhs.arg_id), ClKernelArgDescriptor(_lhs.arg_id, ClKernelTensorArgType::Tensor_4D_t_Buffer), "lhs");
160 vtable.add(_rhs, _blueprint->impl().group(_rhs.arg_id), ClKernelArgDescriptor(_rhs.arg_id, ClKernelTensorArgType::Tensor_4D_t_Buffer), "rhs");
161 if(is_root)
Giorgio Arena232c4522022-03-03 10:09:01 +0000162 {
SiCong Lib63b1192022-01-28 18:24:39 +0000163 vtable.add(_dst, _blueprint->impl().group(_dst.arg_id), ClKernelArgDescriptor(_dst.arg_id, ClKernelTensorArgType::Tensor_4D_t_Buffer), "dst");
Giorgio Arena232c4522022-03-03 10:09:01 +0000164 }
SiCong Lib63b1192022-01-28 18:24:39 +0000165}
166
167ClElementwiseAddKernelComponent::TagLUT ClElementwiseAddKernelComponent::get_tag_lut(const SharedVarTable &vtable) const
168{
169 TagLUT lut{};
170 const auto t_dst_info = _blueprint->impl().get_kernel_argument_info(_blueprint->impl().get_dst_id());
171 // Arguments and global shared variables
172 const bool is_root = _blueprint->impl().group(_lhs.arg_id) == SharedVarGroup::Argument && _blueprint->impl().group(_rhs.arg_id) == SharedVarGroup::Argument;
173 if(is_root)
Giorgio Arena232c4522022-03-03 10:09:01 +0000174 {
SiCong Lib63b1192022-01-28 18:24:39 +0000175 lut["lhs"] = vtable.get(_lhs);
176 lut["rhs"] = vtable.get(_rhs);
177 lut["dst"] = vtable.get(_dst);
Giorgio Arena232c4522022-03-03 10:09:01 +0000178 }
179 else
180 {
SiCong Lib63b1192022-01-28 18:24:39 +0000181 // Determine which link is the accumulator
182 Link accumulator;
183 Link addend;
184 if(_blueprint->impl().group(_lhs.arg_id) == SharedVarGroup::Automatic)
185 {
186 accumulator = _lhs;
187 addend = _rhs;
188 }
189 else if(_blueprint->impl().group(_rhs.arg_id) == SharedVarGroup::Automatic)
190 {
191 accumulator = _rhs;
192 addend = _lhs;
193 }
194 else
195 {
196 ARM_COMPUTE_ERROR("Invalid elementwise component linking");
197 }
198 lut["acc"] = vtable.get(accumulator);
199 lut["addend"] = vtable.get(addend);
Giorgio Arena232c4522022-03-03 10:09:01 +0000200 }
SiCong Lib63b1192022-01-28 18:24:39 +0000201 // Local build options
202 lut["meta_kernel_id"] = id();
203 lut["DATA_TYPE"] = get_cl_type_from_data_type(t_dst_info->data_type());
204 return lut;
Giorgio Arena232c4522022-03-03 10:09:01 +0000205}
206} // namespace dynamic_fusion
207} // namespace experimental
SiCong Lib63b1192022-01-28 18:24:39 +0000208} // namespace arm_compute