blob: 8c493d08c6442449fc0499c778a0953f5454d3b4 [file] [log] [blame]
Gian Marco Iodice5d016812022-11-17 11:03:39 +00001/*
Matthew Bentham314d3e22023-06-23 10:53:52 +00002 * Copyright (c) 2022-2023 Arm Limited.
Gian Marco Iodice5d016812022-11-17 11:03:39 +00003 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24#include "src/gpu/cl/kernels/ClIndirectConv2dAddressPrecalculationKernel.h"
25
26#include "arm_compute/core/CL/CLKernelLibrary.h"
27#include "arm_compute/core/CL/ICLTensor.h"
28#include "arm_compute/core/ITensor.h"
29#include "arm_compute/core/KernelDescriptors.h"
30#include "arm_compute/core/utils/misc/ShapeCalculator.h"
Matthew Bentham314d3e22023-06-23 10:53:52 +000031#include "arm_compute/core/utils/StringUtils.h"
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +010032
Gian Marco Iodice5d016812022-11-17 11:03:39 +000033#include "src/core/CL/CLValidate.h"
34#include "src/core/helpers/AutoConfiguration.h"
35#include "src/core/helpers/WindowHelpers.h"
36#include "support/Cast.h"
37#include "support/StringSupport.h"
38
39namespace arm_compute
40{
41namespace opencl
42{
43namespace kernels
44{
45namespace
46{
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +010047Status validate_arguments(const ITensorInfo *src,
48 const ITensorInfo *weights,
49 const ITensorInfo *dst,
50 const PadStrideInfo &conv_info,
51 const DirectConvComputeKernelInfo &desc)
Gian Marco Iodice5d016812022-11-17 11:03:39 +000052{
53 ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(src);
54 ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(src, 1, DataType::F16, DataType::F32);
55 ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(src, weights);
56 ARM_COMPUTE_RETURN_ERROR_ON_DATA_LAYOUT_NOT_IN(src, DataLayout::NHWC);
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +010057 ARM_COMPUTE_RETURN_ERROR_ON_MSG(weights->dimension(0) != src->dimension(0),
58 "Weights feature map dimension should match the respective src's one");
Gian Marco Iodice5d016812022-11-17 11:03:39 +000059 ARM_COMPUTE_RETURN_ERROR_ON_MSG(weights->num_dimensions() > 4, "Weights can be at most 4 dimensional");
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +010060 ARM_COMPUTE_RETURN_ERROR_ON_MSG(desc.m0 <= 0 || desc.m0 > 8,
61 "M0 can only be greater than 0 and less than or equal to 8");
Gian Marco Iodice5d016812022-11-17 11:03:39 +000062
63 // Checks performed when dst is configured
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +010064 if (dst->total_size() != 0)
Gian Marco Iodice5d016812022-11-17 11:03:39 +000065 {
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +010066 ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DIMENSIONS(
67 dst->tensor_shape(),
68 misc::shape_calculator::compute_indirect_buffer_shape(src->tensor_shape(), src->data_layout(),
69 weights->tensor_shape(), conv_info, desc));
Gian Marco Iodice5d016812022-11-17 11:03:39 +000070 ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(dst, 1, DataType::S32);
71 }
72
73 return Status{};
74}
75} // namespace
76
77ClIndirectConv2dAddressPrecalculationKernel::ClIndirectConv2dAddressPrecalculationKernel()
78{
79 _type = CLKernelType::ELEMENTWISE;
80}
81
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +010082void ClIndirectConv2dAddressPrecalculationKernel::configure(const CLCompileContext &compile_context,
83 ITensorInfo *src,
84 ITensorInfo *weights,
85 ITensorInfo *dst,
86 const PadStrideInfo &conv_info,
87 const DirectConvComputeKernelInfo &desc)
Gian Marco Iodice5d016812022-11-17 11:03:39 +000088{
89 ARM_COMPUTE_ERROR_ON_NULLPTR(src, weights, dst);
90 ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(src, weights, dst, conv_info, desc));
91
Gian Marco Iodice76335eb2022-11-17 11:03:39 +000092 constexpr unsigned int width_idx = 1;
93 constexpr unsigned int height_idx = 2;
Gian Marco Iodice5d016812022-11-17 11:03:39 +000094
95 // Get dst shape
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +010096 TensorShape output_shape = misc::shape_calculator::compute_indirect_buffer_shape(
97 src->tensor_shape(), src->data_layout(), weights->tensor_shape(), conv_info, desc);
Gian Marco Iodice5d016812022-11-17 11:03:39 +000098
99 TensorShape output_conv_shape = misc::shape_calculator::compute_deep_convolution_shape(*src, *weights, conv_info);
100
101 // Output auto inizialitation if not yet initialized
102 auto_init_if_empty(*dst, output_shape, 1, DataType::S32);
103
104 // Configure kernel window
105 Window win;
106
107 // Create window and update padding
108 win = calculate_max_window(output_shape, Steps(1));
109
110 ICLKernel::configure_internal(win);
111
112 std::stringstream kernel_name;
113 CLBuildOptions build_options;
114
115 kernel_name << "indirect_convolution_address_precalculation";
116
117 const unsigned int pad_left = conv_info.pad_left();
118 const unsigned int pad_top = conv_info.pad_top();
119 const unsigned int conv_stride_x = std::get<0>(conv_info.stride());
120 const unsigned int conv_stride_y = std::get<1>(conv_info.stride());
121 const auto dst_data_type = dst->data_type();
122
123 build_options.add_option("-DSRC_CONV_WIDTH=" + support::cpp11::to_string(src->dimension(width_idx)));
124 build_options.add_option("-DSRC_CONV_HEIGHT=" + support::cpp11::to_string(src->dimension(height_idx)));
125 build_options.add_option("-DDST_CONV_WIDTH=" + support::cpp11::to_string(output_conv_shape[width_idx]));
126 build_options.add_option("-DDST_CONV_HEIGHT=" + support::cpp11::to_string(output_conv_shape[height_idx]));
127 build_options.add_option("-DDST_TENSOR_TYPE=BUFFER");
128 build_options.add_option("-DDST_DATA_TYPE=" + get_cl_type_from_data_type(dst_data_type));
129 build_options.add_option("-DWEI_CONV_WIDTH=" + support::cpp11::to_string(weights->dimension(width_idx)));
130 build_options.add_option("-DSTRIDE_X=" + support::cpp11::to_string(conv_stride_x));
131 build_options.add_option("-DSTRIDE_Y=" + support::cpp11::to_string(conv_stride_y));
132 build_options.add_option("-DPAD_LEFT=" + support::cpp11::to_string(pad_left));
133 build_options.add_option("-DPAD_TOP=" + support::cpp11::to_string(pad_top));
134 build_options.add_option("-DM0=" + support::cpp11::to_string(desc.m0));
135
Gian Marco Iodice76335eb2022-11-17 11:03:39 +0000136 // A macro guard to compile ONLY the kernel of interest
137 build_options.add_option("-D" + upper_string(kernel_name.str()));
138
Gian Marco Iodice5d016812022-11-17 11:03:39 +0000139 _kernel = create_kernel(compile_context, kernel_name.str(), build_options.options());
140
141 // Since this kernel should be called only once, we do not need to set the config_id for tuning
142}
143
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100144Status ClIndirectConv2dAddressPrecalculationKernel::validate(const ITensorInfo *src,
145 const ITensorInfo *weights,
146 const ITensorInfo *dst,
147 const PadStrideInfo &conv_info,
148 const DirectConvComputeKernelInfo &desc)
Gian Marco Iodice5d016812022-11-17 11:03:39 +0000149{
150 ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(src, weights, dst, conv_info, desc));
151 return Status{};
152}
153
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100154void ClIndirectConv2dAddressPrecalculationKernel::run_op(ITensorPack &tensors,
155 const Window &window,
156 cl::CommandQueue &queue)
Gian Marco Iodice5d016812022-11-17 11:03:39 +0000157{
158 ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
159 ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IKernel::window(), window);
160
161 // Get initial windows
162 const Window slice = window.first_slice_window_3D();
163
164 auto dst = utils::cast::polymorphic_downcast<ICLTensor *>(tensors.get_tensor(TensorType::ACL_DST));
165
166 unsigned int idx = 0;
167 add_4d_tensor_nhwc_argument(idx, dst);
168 enqueue(queue, *this, slice);
169}
170} // namespace kernels
171} // namespace opencl
172} // namespace arm_compute