blob: 1334c54a6ce986a0231d701b8dbb9238af8ae82b [file] [log] [blame]
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001/*
2 * Copyright (c) 2016, 2017 ARM Limited.
3 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24#ifndef __ARM_COMPUTE_ICLKERNEL_H__
25#define __ARM_COMPUTE_ICLKERNEL_H__
26
steniu015f910722017-08-23 10:15:22 +010027#include "arm_compute/core/CL/CLKernelLibrary.h"
Anthony Barbier6ff3b192017-09-04 18:44:23 +010028#include "arm_compute/core/CL/CLTypes.h"
29#include "arm_compute/core/CL/OpenCL.h"
30#include "arm_compute/core/IKernel.h"
31
32namespace arm_compute
33{
SiCong Li3e363692017-07-04 15:02:10 +010034template <typename T>
35class ICLArray;
Anthony Barbier6ff3b192017-09-04 18:44:23 +010036class ICLTensor;
37class Window;
38
39/** Common interface for all the OpenCL kernels */
40class ICLKernel : public IKernel
41{
42public:
43 /** Constructor */
44 ICLKernel();
45 /** Returns a reference to the OpenCL kernel of this object.
46 *
47 * @return A reference to the OpenCL kernel of this object.
48 */
49 cl::Kernel &kernel();
SiCong Li3e363692017-07-04 15:02:10 +010050 /** Add the passed 1D array's parameters to the object's kernel's arguments starting from the index idx.
51 *
52 * @param[in,out] idx Index at which to start adding the array's arguments. Will be incremented by the number of kernel arguments set.
53 * @param[in] array Array to set as an argument of the object's kernel.
54 * @param[in] strides @ref Strides object containing stride of each dimension in bytes.
55 * @param[in] num_dimensions Number of dimensions of the @p array.
56 * @param[in] window Window the kernel will be executed on.
57 */
58 template <typename T>
59 void add_1D_array_argument(unsigned int &idx, const ICLArray<T> *array, const Strides &strides, unsigned int num_dimensions, const Window &window);
Anthony Barbier6ff3b192017-09-04 18:44:23 +010060 /** Add the passed 1D tensor's parameters to the object's kernel's arguments starting from the index idx.
61 *
62 * @param[in,out] idx Index at which to start adding the tensor's arguments. Will be incremented by the number of kernel arguments set.
63 * @param[in] tensor Tensor to set as an argument of the object's kernel.
64 * @param[in] window Window the kernel will be executed on.
65 */
66 void add_1D_tensor_argument(unsigned int &idx, const ICLTensor *tensor, const Window &window);
67 /** Add the passed 2D tensor's parameters to the object's kernel's arguments starting from the index idx.
68 *
69 * @param[in,out] idx Index at which to start adding the tensor's arguments. Will be incremented by the number of kernel arguments set.
70 * @param[in] tensor Tensor to set as an argument of the object's kernel.
71 * @param[in] window Window the kernel will be executed on.
72 */
73 void add_2D_tensor_argument(unsigned int &idx, const ICLTensor *tensor, const Window &window);
74 /** Add the passed 3D tensor's parameters to the object's kernel's arguments starting from the index idx.
75 *
76 * @param[in,out] idx Index at which to start adding the tensor's arguments. Will be incremented by the number of kernel arguments set.
77 * @param[in] tensor Tensor to set as an argument of the object's kernel.
78 * @param[in] window Window the kernel will be executed on.
79 */
80 void add_3D_tensor_argument(unsigned int &idx, const ICLTensor *tensor, const Window &window);
steniu01868e5412017-07-17 23:16:00 +010081 /** Add the passed 4D tensor's parameters to the object's kernel's arguments starting from the index idx.
82 *
83 * @param[in,out] idx Index at which to start adding the tensor's arguments. Will be incremented by the number of kernel arguments set.
84 * @param[in] tensor Tensor to set as an argument of the object's kernel.
85 * @param[in] window Window the kernel will be executed on.
86 */
87 void add_4D_tensor_argument(unsigned int &idx, const ICLTensor *tensor, const Window &window);
SiCong Li3e363692017-07-04 15:02:10 +010088 /** Returns the number of arguments enqueued per 1D array object.
89 *
90 * @return The number of arguments enqueues per 1D array object.
91 */
92 unsigned int num_arguments_per_1D_array() const;
Anthony Barbier6ff3b192017-09-04 18:44:23 +010093 /** Returns the number of arguments enqueued per 1D tensor object.
94 *
95 * @return The number of arguments enqueues per 1D tensor object.
96 */
97 unsigned int num_arguments_per_1D_tensor() const;
98 /** Returns the number of arguments enqueued per 2D tensor object.
99 *
100 * @return The number of arguments enqueues per 2D tensor object.
101 */
102 unsigned int num_arguments_per_2D_tensor() const;
103 /** Returns the number of arguments enqueued per 3D tensor object.
104 *
105 * @return The number of arguments enqueues per 3D tensor object.
106 */
107 unsigned int num_arguments_per_3D_tensor() const;
steniu01868e5412017-07-17 23:16:00 +0100108 /** Returns the number of arguments enqueued per 4D tensor object.
109 *
110 * @return The number of arguments enqueues per 4D tensor object.
111 */
112 unsigned int num_arguments_per_4D_tensor() const;
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100113 /** Enqueue the OpenCL kernel to process the given window on the passed OpenCL command queue.
114 *
115 * @note The queue is *not* flushed by this method, and therefore the kernel will not have been executed by the time this method returns.
116 *
117 * @param[in] window Region on which to execute the kernel. (Must be a valid region of the window returned by window()).
118 * @param[in,out] queue Command queue on which to enqueue the kernel.
119 */
120 virtual void run(const Window &window, cl::CommandQueue &queue) = 0;
121 /** Add the passed parameters to the object's kernel's arguments starting from the index idx.
122 *
123 * @param[in,out] idx Index at which to start adding the arguments. Will be incremented by the number of kernel arguments set.
124 * @param[in] value Value to set as an argument of the object's kernel.
125 */
126 template <typename T>
127 void add_argument(unsigned int &idx, T value)
128 {
129 _kernel.setArg(idx++, value);
130 }
131
Gian Marco Iodice9331aeb2017-08-10 17:11:08 +0100132 /** Set the Local-Workgroup-Size hint
133 *
134 * @note This method should be called after the configuration of the kernel
135 *
136 * @param[in] lws_hint Local-Workgroup-Size to use
137 */
138 void set_lws_hint(cl::NDRange &lws_hint)
139 {
140 _lws_hint = lws_hint;
141 }
142
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100143 /** Set the targeted GPU architecture
144 *
145 * @param[in] target The targeted GPU architecture
146 */
147 void set_target(GPUTarget target);
148
149 /** Set the targeted GPU architecture according to the CL device
150 *
151 * @param[in] device A CL device
152 */
153 void set_target(cl::Device &device);
154
155 /** Get the targeted GPU architecture
156 *
157 * @return The targeted GPU architecture.
158 */
159 GPUTarget get_target() const;
160
161private:
SiCong Li3e363692017-07-04 15:02:10 +0100162 /** Add the passed array's parameters to the object's kernel's arguments starting from the index idx.
163 *
164 * @param[in,out] idx Index at which to start adding the array's arguments. Will be incremented by the number of kernel arguments set.
165 * @param[in] array Array to set as an argument of the object's kernel.
166 * @param[in] strides @ref Strides object containing stride of each dimension in bytes.
167 * @param[in] num_dimensions Number of dimensions of the @p array.
168 * @param[in] window Window the kernel will be executed on.
169 */
170 template <typename T, unsigned int dimension_size>
171 void add_array_argument(unsigned int &idx, const ICLArray<T> *array, const Strides &strides, unsigned int num_dimensions, const Window &window);
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100172 /** Add the passed tensor's parameters to the object's kernel's arguments starting from the index idx.
173 *
174 * @param[in,out] idx Index at which to start adding the tensor's arguments. Will be incremented by the number of kernel arguments set.
175 * @param[in] tensor Tensor to set as an argument of the object's kernel.
176 * @param[in] window Window the kernel will be executed on.
177 */
178 template <unsigned int dimension_size>
179 void add_tensor_argument(unsigned int &idx, const ICLTensor *tensor, const Window &window);
SiCong Li3e363692017-07-04 15:02:10 +0100180 /** Returns the number of arguments enqueued per array object.
181 *
182 * @return The number of arguments enqueued per array object.
183 */
184 template <unsigned int dimension_size>
185 unsigned int num_arguments_per_array() const;
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100186 /** Returns the number of arguments enqueued per tensor object.
187 *
188 * @return The number of arguments enqueued per tensor object.
189 */
190 template <unsigned int dimension_size>
191 unsigned int num_arguments_per_tensor() const;
192
193protected:
194 cl::Kernel _kernel; /**< OpenCL kernel to run */
195 cl::NDRange _lws_hint; /**< Local workgroup size hint for the OpenCL kernel */
196 GPUTarget _target; /**< The targeted GPU */
197};
198
199/** Add the kernel to the command queue with the given window.
200 *
201 * @note Depending on the size of the window, this might translate into several jobs being enqueued.
202 *
203 * @note If kernel->kernel() is empty then the function will return without adding anything to the queue.
204 *
205 * @param[in,out] queue OpenCL command queue.
206 * @param[in] kernel Kernel to enqueue
207 * @param[in] window Window the kernel has to process.
208 * @param[in] lws_hint Local workgroup size requested, by default (128,1)
209 *
210 * @note If any dimension of the lws is greater than the global workgroup size then no lws will be passed.
211 */
steniu015f910722017-08-23 10:15:22 +0100212void enqueue(cl::CommandQueue &queue, ICLKernel &kernel, const Window &window, const cl::NDRange &lws_hint = CLKernelLibrary::get().default_ndrange());
SiCong Li3e363692017-07-04 15:02:10 +0100213
214template <typename T, unsigned int dimension_size>
215void ICLKernel::add_array_argument(unsigned &idx, const ICLArray<T> *array, const Strides &strides, unsigned int num_dimensions, const Window &window)
216{
217 // Calculate offset to the start of the window
218 unsigned int offset_first_element = 0;
219
220 for(unsigned int n = 0; n < num_dimensions; ++n)
221 {
222 offset_first_element += window[n].start() * strides[n];
223 }
224
225 unsigned int idx_start = idx;
226 _kernel.setArg(idx++, array->cl_buffer());
227
228 for(unsigned int dimension = 0; dimension < dimension_size; dimension++)
229 {
230 _kernel.setArg<cl_uint>(idx++, strides[dimension]);
231 _kernel.setArg<cl_uint>(idx++, strides[dimension] * window[dimension].step());
232 }
233
234 _kernel.setArg<cl_uint>(idx++, offset_first_element);
235
236 ARM_COMPUTE_ERROR_ON_MSG(idx_start + num_arguments_per_array<dimension_size>() != idx,
237 "add_%dD_array_argument() is supposed to add exactly %d arguments to the kernel", dimension_size, num_arguments_per_array<dimension_size>());
238 ARM_COMPUTE_UNUSED(idx_start);
239}
240
241template <typename T>
242void ICLKernel::add_1D_array_argument(unsigned int &idx, const ICLArray<T> *array, const Strides &strides, unsigned int num_dimensions, const Window &window)
243{
244 add_array_argument<T, 1>(idx, array, strides, num_dimensions, window);
245}
246
247template <unsigned int dimension_size>
248unsigned int ICLKernel::num_arguments_per_array() const
249{
250 return num_arguments_per_tensor<dimension_size>();
251}
252
253template <unsigned int dimension_size>
254unsigned int ICLKernel::num_arguments_per_tensor() const
255{
256 return 2 + 2 * dimension_size;
257}
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100258}
259#endif /*__ARM_COMPUTE_ICLKERNEL_H__ */