blob: 7c805d53683fda47b13c6e071650877a16f6e064 [file] [log] [blame]
Giorgio Arena232c4522022-03-03 10:09:01 +00001/*
2 * Copyright (c) 2022 Arm Limited.
3 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
SiCong Li4e9f5682022-05-10 10:15:59 +010024#ifdef ENABLE_EXPERIMENTAL_DYNAMIC_FUSION
Giorgio Arena232c4522022-03-03 10:09:01 +000025
26#include "src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClStoreKernelComponents.h"
27
28namespace arm_compute
29{
30namespace experimental
31{
32namespace dynamic_fusion
33{
34ComponentType ClStoreBlockBoundaryAwareKernelComponent::get_component_type() const
35{
36 return ComponentType::Store;
37}
38
39std::string ClStoreBlockBoundaryAwareKernelComponent::get_component_code() const
40{
41 return R"_(
42 //------------------ START KERNEL {{meta_kernel_id}} STORE ---------------------
43
44 __global uchar *dst_addr = {{dst}}_ptr + {{dst}}_offset_first_element_in_bytes + (g_x * (uint)N0 * sizeof(DATA_TYPE)) + (COMPUTE_M0_START_ROW(g_y, M0, PARTIAL_STORE_M0) * {{dst}}_stride_y);
45
46#if defined(REINTERPRET_OUTPUT_AS_3D)
47 // Add offset for batched GEMM. The batches will be in the fourth dimension and for this reason we
48 // multiply dst_stride_z by DEPTH_GEMM3D
49 dst_addr += g_z * {{dst}}_stride_z * DEPTH_GEMM3D;
50
51#else // defined(REINTERPRET_OUTPUT_AS_3D)
52
53 // Add offset for batched GEMM
54 dst_addr += g_z * {{dst}}_stride_z;
55
56#endif // defined(REINTERPRET_OUTPUT_AS_3D)
57
58 STORE_BLOCK_BOUNDARY_AWARE(M0, N0, DATA_TYPE, {{src}}, dst_addr, {{dst}}_stride_y, g_zout, PARTIAL_STORE_M0, PARTIAL_STORE_N0, g_cond_y, g_cond_x);
59
60 //------------------ END KERNEL {{meta_kernel_id}} STORE ---------------------
61
62)_";
63}
Giorgio Arenabd44caa2022-03-15 13:45:15 +000064
65CLBuildOptions ClStoreBlockBoundaryAwareKernelComponent::generate_build_options() const
66{
67 auto t_dst_info = _blueprint->impl().get_kernel_argument_info(_blueprint->impl().get_dst_id());
SiCong Lib63b1192022-01-28 18:24:39 +000068 // auto tile_info = _blueprint->impl().get_tile_info();
Giorgio Arenabd44caa2022-03-15 13:45:15 +000069
70 CLBuildOptions build_opts{};
71
SiCong Lib63b1192022-01-28 18:24:39 +000072 const auto n0 = _blueprint->impl().get_execution_window().x().step();
73 const auto m0 = _blueprint->impl().get_execution_window().y().step();
74 const auto partial_m0 = t_dst_info->dimension(0) % m0;
75 const auto partial_n0 = t_dst_info->dimension(1) % n0;
76
Giorgio Arenabd44caa2022-03-15 13:45:15 +000077 build_opts.add_option("-DDATA_TYPE=" + get_cl_type_from_data_type(t_dst_info->data_type()));
SiCong Lib63b1192022-01-28 18:24:39 +000078 build_opts.add_option("-DM0=" + support::cpp11::to_string(m0));
79 build_opts.add_option("-DN0=" + support::cpp11::to_string(n0));
80 build_opts.add_option("-DPARTIAL_STORE_M0=" + support::cpp11::to_string(partial_m0));
81 build_opts.add_option("-DPARTIAL_STORE_N0=" + support::cpp11::to_string(partial_n0));
Giorgio Arenabd44caa2022-03-15 13:45:15 +000082
83 return build_opts;
84}
85
SiCong Lib63b1192022-01-28 18:24:39 +000086void ClStoreBlockBoundaryAwareKernelComponent::allocate_shared_vars(SharedVarTable &vtable) const
87{
88 vtable.add(_src, _blueprint->impl().group(_src.arg_id), ClKernelArgDescriptor(_src.arg_id, ClKernelTensorArgType::Image_3D), "src");
89 vtable.add(_dst, _blueprint->impl().group(_dst.arg_id), ClKernelArgDescriptor(_dst.arg_id, ClKernelTensorArgType::Image_3D), "dst");
90}
91
92ClStoreBlockBoundaryAwareKernelComponent::TagLUT ClStoreBlockBoundaryAwareKernelComponent::get_tag_lut(const SharedVarTable &vtable) const
Giorgio Arena232c4522022-03-03 10:09:01 +000093{
94 return {
95 { "meta_kernel_id", id() },
SiCong Lib63b1192022-01-28 18:24:39 +000096 { "src", vtable.get(_src) },
97 { "dst", vtable.get(_dst) },
Giorgio Arena232c4522022-03-03 10:09:01 +000098 };
99}
Gunes Bayir16c56972022-03-28 21:32:33 +0100100
101ComponentType ClStoreIndirectWidthSelectKernelComponent::get_component_type() const
102{
103 return ComponentType::Store;
104}
105
106std::string ClStoreIndirectWidthSelectKernelComponent::get_component_code() const
107{
108 return R"_(
109 //------------------ START KERNEL {{meta_kernel_id}} STORE ---------------------
Gunes Bayir16c56972022-03-28 21:32:33 +0100110 {
Mohammed Suhail Munshif67903b2022-07-04 13:36:14 +0100111 // This also follows NHWC layout
112 // cout maps to global_id(0) maps to Channel
113 // mout maps to global_id(1) maps to Height and Weight (Collapsed Window)
114 // bout maps to global_id(3) maps to N / Batch
SiCong Lib63b1192022-01-28 18:24:39 +0000115 #define _IDST_WIDTH {{dst}}_w
116 #define _IDST_HEIGHT {{dst}}_h
117 TILE(uint, M0, 1, dst_indirect_y);
Gunes Bayir16c56972022-03-28 21:32:33 +0100118
SiCong Lib63b1192022-01-28 18:24:39 +0000119 // Calculate the destination indirect Y
120 LOOP_UNROLLING(int, i, 0, 1, M0,
121 {
122 dst_indirect_y[i].v = (uint)min(mout + i, (int)(_IDST_WIDTH * _IDST_HEIGHT) - 1);
123 dst_indirect_y[i].v += bout * (int)(_IDST_WIDTH * _IDST_HEIGHT);
124 })
Gunes Bayir16c56972022-03-28 21:32:33 +0100125
SiCong Lib63b1192022-01-28 18:24:39 +0000126 bool x_cond = PARTIAL_N0 != 0 && get_global_id(0) == 0;
127
128 T_STORE_INDIRECT_WIDTH_SELECT({{DST_DATA_TYPE}}, M0, N0, PARTIAL_N0, {{DST_TENSOR_TYPE}}, {{dst}}, cout, {{dst}}_stride_y, x_cond, {{src}}, dst_indirect_y);
129
130 #undef _IDST_WIDTH
131 #undef _IDST_HEIGHT
132 //------------------ END KERNEL {{meta_kernel_id}} STORE ---------------------
133 }
Gunes Bayir16c56972022-03-28 21:32:33 +0100134
135)_";
136}
137
138CLBuildOptions ClStoreIndirectWidthSelectKernelComponent::generate_build_options() const
139{
140 CLBuildOptions build_opts{};
141
142 return build_opts;
143}
144
SiCong Lib63b1192022-01-28 18:24:39 +0000145void ClStoreIndirectWidthSelectKernelComponent::allocate_shared_vars(SharedVarTable &vtable) const
146{
147 vtable.add(_src, _blueprint->impl().group(_src.arg_id), ClKernelArgDescriptor(_src.arg_id, ClKernelTensorArgType::Tensor_4D_t_Buffer), "src");
148 vtable.add(_dst, _blueprint->impl().group(_dst.arg_id), ClKernelArgDescriptor(_dst.arg_id, ClKernelTensorArgType::Tensor_4D_t_Buffer), "dst");
149}
150
151ClStoreIndirectWidthSelectKernelComponent::TagLUT ClStoreIndirectWidthSelectKernelComponent::get_tag_lut(const SharedVarTable &vtable) const
Gunes Bayir16c56972022-03-28 21:32:33 +0100152{
153 TagLUT lut{};
154
SiCong Lib63b1192022-01-28 18:24:39 +0000155 // Arguments and global shared variables
156 lut["src"] = vtable.get(_src);
157 lut["dst"] = vtable.get(_dst);
Gunes Bayir16c56972022-03-28 21:32:33 +0100158
159 // Local build options
SiCong Lib63b1192022-01-28 18:24:39 +0000160 lut["meta_kernel_id"] = id();
Gunes Bayir16c56972022-03-28 21:32:33 +0100161 lut["DST_TENSOR_TYPE"] = "BUFFER";
SiCong Lib63b1192022-01-28 18:24:39 +0000162 const auto dst_info = _blueprint->impl().get_kernel_argument_info(_blueprint->impl().get_dst_id());
Gunes Bayir16c56972022-03-28 21:32:33 +0100163 lut["DST_DATA_TYPE"] = dst_info->data_type();
164
165 return lut;
166}
167
Giorgio Arena232c4522022-03-03 10:09:01 +0000168} // namespace dynamic_fusion
169} // namespace experimental
SiCong Li4e9f5682022-05-10 10:15:59 +0100170} // namespace arm_compute
171#endif /* ENABLE_EXPERIMENTAL_DYNAMIC_FUSION */