blob: fae5fe2c8e4d117b68b61ca6c16b3e752a79e518 [file] [log] [blame]
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001/*
Michele Di Giorgiod9eaf612020-07-08 11:12:57 +01002 * Copyright (c) 2017-2020 Arm Limited.
Anthony Barbier6ff3b192017-09-04 18:44:23 +01003 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24#include "arm_compute/core/CL/kernels/CLLKTrackerKernel.h"
25
26#include "arm_compute/core/AccessWindowStatic.h"
27#include "arm_compute/core/CL/CLKernelLibrary.h"
28#include "arm_compute/core/CL/ICLArray.h"
29#include "arm_compute/core/CL/ICLTensor.h"
30#include "arm_compute/core/Coordinates.h"
31#include "arm_compute/core/Error.h"
32#include "arm_compute/core/Helpers.h"
33#include "arm_compute/core/TensorInfo.h"
34#include "arm_compute/core/Validate.h"
35#include "arm_compute/core/Window.h"
36
37#include <cmath>
38
39using namespace arm_compute;
40
41void CLLKTrackerInitKernel::configure(const ICLKeyPointArray *old_points, const ICLKeyPointArray *new_points_estimates,
42 ICLLKInternalKeypointArray *old_points_internal, ICLLKInternalKeypointArray *new_points_internal,
43 bool use_initial_estimate, size_t level, size_t num_levels, float pyramid_scale)
Manuel Bottini4c6bd512020-04-08 10:15:51 +010044{
45 configure(CLKernelLibrary::get().get_compile_context(), old_points, new_points_estimates, old_points_internal, new_points_internal, use_initial_estimate, level, num_levels, pyramid_scale);
46}
47
Manuel Bottini679fc962020-04-21 16:08:53 +010048void CLLKTrackerInitKernel::configure(const CLCompileContext &compile_context, const ICLKeyPointArray *old_points, const ICLKeyPointArray *new_points_estimates,
Manuel Bottini4c6bd512020-04-08 10:15:51 +010049 ICLLKInternalKeypointArray *old_points_internal, ICLLKInternalKeypointArray *new_points_internal,
50 bool use_initial_estimate, size_t level, size_t num_levels, float pyramid_scale)
Anthony Barbier6ff3b192017-09-04 18:44:23 +010051
52{
53 ARM_COMPUTE_ERROR_ON(old_points == nullptr);
54 ARM_COMPUTE_ERROR_ON(old_points_internal == nullptr);
55 ARM_COMPUTE_ERROR_ON(new_points_internal == nullptr);
56
57 const float scale = std::pow(pyramid_scale, level);
58
59 // Create kernel
60 std::string kernel_name = "init_level";
61 if(level == (num_levels - 1))
62 {
63 kernel_name += (use_initial_estimate) ? std::string("_max_initial_estimate") : std::string("_max");
64 }
Manuel Bottini4c6bd512020-04-08 10:15:51 +010065 _kernel = create_kernel(compile_context, kernel_name);
Anthony Barbier6ff3b192017-09-04 18:44:23 +010066
67 // Set static kernel arguments
68 unsigned int idx = 0;
69 if(level == (num_levels - 1))
70 {
71 _kernel.setArg(idx++, old_points->cl_buffer());
72 if(use_initial_estimate)
73 {
74 _kernel.setArg(idx++, new_points_estimates->cl_buffer());
75 }
76 }
77 _kernel.setArg(idx++, old_points_internal->cl_buffer());
78 _kernel.setArg(idx++, new_points_internal->cl_buffer());
79 _kernel.setArg<cl_float>(idx++, scale);
80
81 // Configure kernel window
82 Window window;
83 window.set(Window::DimX, Window::Dimension(0, old_points->num_values(), 1));
84 window.set(Window::DimY, Window::Dimension(0, 1, 1));
Anthony Barbierb6eb3532018-08-08 13:20:04 +010085 ICLKernel::configure_internal(window);
Anthony Barbier6ff3b192017-09-04 18:44:23 +010086}
87
88void CLLKTrackerInitKernel::run(const Window &window, cl::CommandQueue &queue)
89{
90 ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
91 ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window);
92
Georgios Pinitas275f99c2019-08-23 12:44:11 +010093 enqueue(queue, *this, window, lws_hint());
Anthony Barbier6ff3b192017-09-04 18:44:23 +010094}
95
96void CLLKTrackerFinalizeKernel::configure(ICLLKInternalKeypointArray *new_points_internal, ICLKeyPointArray *new_points)
Manuel Bottini4c6bd512020-04-08 10:15:51 +010097{
98 configure(CLKernelLibrary::get().get_compile_context(), new_points_internal, new_points);
99}
100
Manuel Bottini679fc962020-04-21 16:08:53 +0100101void CLLKTrackerFinalizeKernel::configure(const CLCompileContext &compile_context, ICLLKInternalKeypointArray *new_points_internal, ICLKeyPointArray *new_points)
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100102
103{
104 ARM_COMPUTE_ERROR_ON(new_points_internal == nullptr);
105 ARM_COMPUTE_ERROR_ON(new_points == nullptr);
106
107 // Create kernel
Manuel Bottini4c6bd512020-04-08 10:15:51 +0100108 _kernel = create_kernel(compile_context, "finalize");
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100109
110 // Set static kernel arguments
111 unsigned int idx = 0;
112 _kernel.setArg(idx++, new_points_internal->cl_buffer());
113 _kernel.setArg(idx++, new_points->cl_buffer());
114
115 // Configure kernel window
116 Window window;
117 window.set(Window::DimX, Window::Dimension(0, new_points_internal->num_values(), 1));
118 window.set(Window::DimY, Window::Dimension(0, 1, 1));
Anthony Barbierb6eb3532018-08-08 13:20:04 +0100119 ICLKernel::configure_internal(window);
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100120}
121
122void CLLKTrackerFinalizeKernel::run(const Window &window, cl::CommandQueue &queue)
123{
124 ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
125 ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window);
126
Georgios Pinitas275f99c2019-08-23 12:44:11 +0100127 enqueue(queue, *this, window, lws_hint());
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100128}
129
130CLLKTrackerStage0Kernel::CLLKTrackerStage0Kernel()
131 : _old_input(nullptr), _old_scharr_gx(nullptr), _old_scharr_gy(nullptr)
132{
133}
134
135void CLLKTrackerStage0Kernel::configure(const ICLTensor *old_input, const ICLTensor *old_scharr_gx, const ICLTensor *old_scharr_gy,
136 ICLLKInternalKeypointArray *old_points_internal, ICLLKInternalKeypointArray *new_points_internal,
137 ICLCoefficientTableArray *coeff_table, ICLOldValArray *old_ival,
138 size_t window_dimension, size_t level)
Manuel Bottini4c6bd512020-04-08 10:15:51 +0100139{
140 configure(CLKernelLibrary::get().get_compile_context(), old_input, old_scharr_gx, old_scharr_gy, old_points_internal, new_points_internal, coeff_table, old_ival, window_dimension, level);
141}
142
Manuel Bottini679fc962020-04-21 16:08:53 +0100143void CLLKTrackerStage0Kernel::configure(const CLCompileContext &compile_context, const ICLTensor *old_input, const ICLTensor *old_scharr_gx, const ICLTensor *old_scharr_gy,
Manuel Bottini4c6bd512020-04-08 10:15:51 +0100144 ICLLKInternalKeypointArray *old_points_internal, ICLLKInternalKeypointArray *new_points_internal,
145 ICLCoefficientTableArray *coeff_table, ICLOldValArray *old_ival,
146 size_t window_dimension, size_t level)
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100147
148{
149 ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(old_input, 1, DataType::U8);
150 ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(old_scharr_gx, 1, DataType::S16);
151 ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(old_scharr_gy, 1, DataType::S16);
152 ARM_COMPUTE_ERROR_ON(old_points_internal == nullptr);
153 ARM_COMPUTE_ERROR_ON(new_points_internal == nullptr);
154 ARM_COMPUTE_ERROR_ON(coeff_table == nullptr);
155 ARM_COMPUTE_ERROR_ON(old_ival == nullptr);
156
157 _old_input = old_input;
158 _old_scharr_gx = old_scharr_gx;
159 _old_scharr_gy = old_scharr_gy;
160
161 // Configure kernel window
162 Window window;
163 window.set(Window::DimX, Window::Dimension(0, new_points_internal->num_values(), 1));
164 window.set(Window::DimY, Window::Dimension(0, 1, 1));
165
166 const ValidRegion valid_region = intersect_valid_regions(
167 old_input->info()->valid_region(),
168 old_scharr_gx->info()->valid_region(),
169 old_scharr_gy->info()->valid_region());
170
171 update_window_and_padding(window,
172 AccessWindowStatic(old_input->info(), valid_region.start(0), valid_region.start(1),
173 valid_region.end(0), valid_region.end(1)),
174 AccessWindowStatic(old_scharr_gx->info(), valid_region.start(0), valid_region.start(1),
175 valid_region.end(0), valid_region.end(1)),
176 AccessWindowStatic(old_scharr_gy->info(), valid_region.start(0), valid_region.start(1),
177 valid_region.end(0), valid_region.end(1)));
178
Anthony Barbierb6eb3532018-08-08 13:20:04 +0100179 ICLKernel::configure_internal(window);
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100180
181 // Initialize required variables
182 const int level0 = (level == 0) ? 1 : 0;
183 const int window_size = window_dimension;
184 const int window_size_squared = window_dimension * window_dimension;
185 const int window_size_half = window_dimension / 2;
186 const float eig_const = 1.0f / (2.0f * window_size_squared);
187 const cl_float3 border_limits =
188 {
189 {
190 // -1 because we load 2 values at once for bilinear interpolation
191 static_cast<cl_float>(valid_region.end(0) - window_size - 1),
192 static_cast<cl_float>(valid_region.end(1) - window_size - 1),
193 static_cast<cl_float>(valid_region.start(0))
194 }
195 };
196
197 // Create kernel
Manuel Bottini4c6bd512020-04-08 10:15:51 +0100198 _kernel = create_kernel(compile_context, "lktracker_stage0");
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100199
200 // Set arguments
201 unsigned int idx = 3 * num_arguments_per_2D_tensor();
202 _kernel.setArg(idx++, old_points_internal->cl_buffer());
203 _kernel.setArg(idx++, new_points_internal->cl_buffer());
204 _kernel.setArg(idx++, coeff_table->cl_buffer());
205 _kernel.setArg(idx++, old_ival->cl_buffer());
206 _kernel.setArg<cl_int>(idx++, window_size);
207 _kernel.setArg<cl_int>(idx++, window_size_squared);
208 _kernel.setArg<cl_int>(idx++, window_size_half);
209 _kernel.setArg<cl_float3>(idx++, border_limits);
210 _kernel.setArg<cl_float>(idx++, eig_const);
211 _kernel.setArg<cl_int>(idx++, level0);
212}
213
214void CLLKTrackerStage0Kernel::run(const Window &window, cl::CommandQueue &queue)
215{
216 ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
217 ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window);
218
219 // Set static tensor arguments. Setting here as allocation might be deferred.
220 unsigned int idx = 0;
221 add_2D_tensor_argument(idx, _old_input, window);
222 add_2D_tensor_argument(idx, _old_scharr_gx, window);
223 add_2D_tensor_argument(idx, _old_scharr_gy, window);
224
Georgios Pinitas275f99c2019-08-23 12:44:11 +0100225 enqueue(queue, *this, window, lws_hint());
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100226}
227
228CLLKTrackerStage1Kernel::CLLKTrackerStage1Kernel()
229 : _new_input(nullptr)
230{
231}
232
233void CLLKTrackerStage1Kernel::configure(const ICLTensor *new_input, ICLLKInternalKeypointArray *new_points_internal, ICLCoefficientTableArray *coeff_table, ICLOldValArray *old_ival,
234 Termination termination, float epsilon, size_t num_iterations, size_t window_dimension, size_t level)
Manuel Bottini4c6bd512020-04-08 10:15:51 +0100235{
236 configure(CLKernelLibrary::get().get_compile_context(), new_input, new_points_internal, coeff_table, old_ival, termination, epsilon, num_iterations, window_dimension, level);
237}
238
Manuel Bottini679fc962020-04-21 16:08:53 +0100239void CLLKTrackerStage1Kernel::configure(const CLCompileContext &compile_context, const ICLTensor *new_input, ICLLKInternalKeypointArray *new_points_internal, ICLCoefficientTableArray *coeff_table,
Manuel Bottini4c6bd512020-04-08 10:15:51 +0100240 ICLOldValArray *old_ival,
241 Termination termination, float epsilon, size_t num_iterations, size_t window_dimension, size_t level)
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100242
243{
244 ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(new_input, 1, DataType::U8);
245 ARM_COMPUTE_ERROR_ON(new_points_internal == nullptr);
246 ARM_COMPUTE_ERROR_ON(coeff_table == nullptr);
247 ARM_COMPUTE_ERROR_ON(old_ival == nullptr);
248
249 _new_input = new_input;
250
251 // Configure kernel window
252 Window window;
253 window.set(Window::DimX, Window::Dimension(0, new_points_internal->num_values(), 1));
254 window.set(Window::DimY, Window::Dimension(0, 1, 1));
255
256 const ValidRegion &valid_region = new_input->info()->valid_region();
257
258 update_window_and_padding(window,
259 AccessWindowStatic(new_input->info(), valid_region.start(0), valid_region.start(1),
260 valid_region.end(0), valid_region.end(1)));
261
Anthony Barbierb6eb3532018-08-08 13:20:04 +0100262 ICLKernel::configure_internal(window);
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100263
264 // Initialize required variables
265 const int level0 = (level == 0) ? 1 : 0;
266 const int window_size = window_dimension;
267 const int window_size_squared = window_dimension * window_dimension;
268 const int window_size_half = window_dimension / 2;
269 const float eig_const = 1.0f / (2.0f * window_size_squared);
270 const cl_float3 border_limits =
271 {
272 {
273 // -1 because we load 2 values at once for bilinear interpolation
274 static_cast<cl_float>(valid_region.end(0) - window_size - 1),
275 static_cast<cl_float>(valid_region.end(1) - window_size - 1),
276 static_cast<cl_float>(valid_region.start(0))
277 }
278 };
John Richardson8de92612018-02-22 14:09:31 +0000279
280 // Set maximum number of iterations used for convergence
281 const size_t max_iterations = 1000;
282 num_iterations = (termination == Termination::TERM_CRITERIA_EPSILON) ? max_iterations : num_iterations;
283
284 const int term_epsilon = (termination == Termination::TERM_CRITERIA_EPSILON || termination == Termination::TERM_CRITERIA_BOTH) ? 1 : 0;
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100285
286 // Create kernel
Manuel Bottini4c6bd512020-04-08 10:15:51 +0100287 _kernel = create_kernel(compile_context, "lktracker_stage1");
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100288
289 // Set static kernel arguments
290 unsigned int idx = num_arguments_per_2D_tensor();
291 _kernel.setArg(idx++, new_points_internal->cl_buffer());
292 _kernel.setArg(idx++, coeff_table->cl_buffer());
293 _kernel.setArg(idx++, old_ival->cl_buffer());
294 _kernel.setArg<cl_int>(idx++, window_size);
295 _kernel.setArg<cl_int>(idx++, window_size_squared);
296 _kernel.setArg<cl_int>(idx++, window_size_half);
297 _kernel.setArg<cl_int>(idx++, num_iterations);
298 _kernel.setArg<cl_float>(idx++, epsilon);
299 _kernel.setArg<cl_float3>(idx++, border_limits);
300 _kernel.setArg<cl_float>(idx++, eig_const);
301 _kernel.setArg<cl_int>(idx++, level0);
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100302 _kernel.setArg<cl_int>(idx++, term_epsilon);
303}
304
305void CLLKTrackerStage1Kernel::run(const Window &window, cl::CommandQueue &queue)
306{
307 ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
308 ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window);
309
310 // Set static tensor arguments. Setting here as allocation might be deferred.
311 unsigned int idx = 0;
312 add_2D_tensor_argument(idx, _new_input, window);
313
Georgios Pinitas275f99c2019-08-23 12:44:11 +0100314 enqueue(queue, *this, window, lws_hint());
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100315}