blob: 40ed630c89fc799651c0ac833f1eb36c3822f4b5 [file] [log] [blame]
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001/*
John Richardson8de92612018-02-22 14:09:31 +00002 * Copyright (c) 2017-2018 ARM Limited.
Anthony Barbier6ff3b192017-09-04 18:44:23 +01003 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24#include "arm_compute/core/CL/kernels/CLLKTrackerKernel.h"
25
26#include "arm_compute/core/AccessWindowStatic.h"
27#include "arm_compute/core/CL/CLKernelLibrary.h"
28#include "arm_compute/core/CL/ICLArray.h"
29#include "arm_compute/core/CL/ICLTensor.h"
30#include "arm_compute/core/Coordinates.h"
31#include "arm_compute/core/Error.h"
32#include "arm_compute/core/Helpers.h"
33#include "arm_compute/core/TensorInfo.h"
34#include "arm_compute/core/Validate.h"
35#include "arm_compute/core/Window.h"
36
37#include <cmath>
38
39using namespace arm_compute;
40
41void CLLKTrackerInitKernel::configure(const ICLKeyPointArray *old_points, const ICLKeyPointArray *new_points_estimates,
42 ICLLKInternalKeypointArray *old_points_internal, ICLLKInternalKeypointArray *new_points_internal,
43 bool use_initial_estimate, size_t level, size_t num_levels, float pyramid_scale)
44
45{
46 ARM_COMPUTE_ERROR_ON(old_points == nullptr);
47 ARM_COMPUTE_ERROR_ON(old_points_internal == nullptr);
48 ARM_COMPUTE_ERROR_ON(new_points_internal == nullptr);
49
50 const float scale = std::pow(pyramid_scale, level);
51
52 // Create kernel
53 std::string kernel_name = "init_level";
54 if(level == (num_levels - 1))
55 {
56 kernel_name += (use_initial_estimate) ? std::string("_max_initial_estimate") : std::string("_max");
57 }
58 _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel(kernel_name));
59
60 // Set static kernel arguments
61 unsigned int idx = 0;
62 if(level == (num_levels - 1))
63 {
64 _kernel.setArg(idx++, old_points->cl_buffer());
65 if(use_initial_estimate)
66 {
67 _kernel.setArg(idx++, new_points_estimates->cl_buffer());
68 }
69 }
70 _kernel.setArg(idx++, old_points_internal->cl_buffer());
71 _kernel.setArg(idx++, new_points_internal->cl_buffer());
72 _kernel.setArg<cl_float>(idx++, scale);
73
74 // Configure kernel window
75 Window window;
76 window.set(Window::DimX, Window::Dimension(0, old_points->num_values(), 1));
77 window.set(Window::DimY, Window::Dimension(0, 1, 1));
Anthony Barbierb6eb3532018-08-08 13:20:04 +010078 ICLKernel::configure_internal(window);
Anthony Barbier6ff3b192017-09-04 18:44:23 +010079}
80
81void CLLKTrackerInitKernel::run(const Window &window, cl::CommandQueue &queue)
82{
83 ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
84 ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window);
85
86 enqueue(queue, *this, window);
87}
88
89void CLLKTrackerFinalizeKernel::configure(ICLLKInternalKeypointArray *new_points_internal, ICLKeyPointArray *new_points)
90
91{
92 ARM_COMPUTE_ERROR_ON(new_points_internal == nullptr);
93 ARM_COMPUTE_ERROR_ON(new_points == nullptr);
94
95 // Create kernel
96 _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel("finalize"));
97
98 // Set static kernel arguments
99 unsigned int idx = 0;
100 _kernel.setArg(idx++, new_points_internal->cl_buffer());
101 _kernel.setArg(idx++, new_points->cl_buffer());
102
103 // Configure kernel window
104 Window window;
105 window.set(Window::DimX, Window::Dimension(0, new_points_internal->num_values(), 1));
106 window.set(Window::DimY, Window::Dimension(0, 1, 1));
Anthony Barbierb6eb3532018-08-08 13:20:04 +0100107 ICLKernel::configure_internal(window);
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100108}
109
110void CLLKTrackerFinalizeKernel::run(const Window &window, cl::CommandQueue &queue)
111{
112 ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
113 ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window);
114
115 enqueue(queue, *this, window);
116}
117
118CLLKTrackerStage0Kernel::CLLKTrackerStage0Kernel()
119 : _old_input(nullptr), _old_scharr_gx(nullptr), _old_scharr_gy(nullptr)
120{
121}
122
123void CLLKTrackerStage0Kernel::configure(const ICLTensor *old_input, const ICLTensor *old_scharr_gx, const ICLTensor *old_scharr_gy,
124 ICLLKInternalKeypointArray *old_points_internal, ICLLKInternalKeypointArray *new_points_internal,
125 ICLCoefficientTableArray *coeff_table, ICLOldValArray *old_ival,
126 size_t window_dimension, size_t level)
127
128{
129 ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(old_input, 1, DataType::U8);
130 ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(old_scharr_gx, 1, DataType::S16);
131 ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(old_scharr_gy, 1, DataType::S16);
132 ARM_COMPUTE_ERROR_ON(old_points_internal == nullptr);
133 ARM_COMPUTE_ERROR_ON(new_points_internal == nullptr);
134 ARM_COMPUTE_ERROR_ON(coeff_table == nullptr);
135 ARM_COMPUTE_ERROR_ON(old_ival == nullptr);
136
137 _old_input = old_input;
138 _old_scharr_gx = old_scharr_gx;
139 _old_scharr_gy = old_scharr_gy;
140
141 // Configure kernel window
142 Window window;
143 window.set(Window::DimX, Window::Dimension(0, new_points_internal->num_values(), 1));
144 window.set(Window::DimY, Window::Dimension(0, 1, 1));
145
146 const ValidRegion valid_region = intersect_valid_regions(
147 old_input->info()->valid_region(),
148 old_scharr_gx->info()->valid_region(),
149 old_scharr_gy->info()->valid_region());
150
151 update_window_and_padding(window,
152 AccessWindowStatic(old_input->info(), valid_region.start(0), valid_region.start(1),
153 valid_region.end(0), valid_region.end(1)),
154 AccessWindowStatic(old_scharr_gx->info(), valid_region.start(0), valid_region.start(1),
155 valid_region.end(0), valid_region.end(1)),
156 AccessWindowStatic(old_scharr_gy->info(), valid_region.start(0), valid_region.start(1),
157 valid_region.end(0), valid_region.end(1)));
158
Anthony Barbierb6eb3532018-08-08 13:20:04 +0100159 ICLKernel::configure_internal(window);
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100160
161 // Initialize required variables
162 const int level0 = (level == 0) ? 1 : 0;
163 const int window_size = window_dimension;
164 const int window_size_squared = window_dimension * window_dimension;
165 const int window_size_half = window_dimension / 2;
166 const float eig_const = 1.0f / (2.0f * window_size_squared);
167 const cl_float3 border_limits =
168 {
169 {
170 // -1 because we load 2 values at once for bilinear interpolation
171 static_cast<cl_float>(valid_region.end(0) - window_size - 1),
172 static_cast<cl_float>(valid_region.end(1) - window_size - 1),
173 static_cast<cl_float>(valid_region.start(0))
174 }
175 };
176
177 // Create kernel
178 _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel("lktracker_stage0"));
179
180 // Set arguments
181 unsigned int idx = 3 * num_arguments_per_2D_tensor();
182 _kernel.setArg(idx++, old_points_internal->cl_buffer());
183 _kernel.setArg(idx++, new_points_internal->cl_buffer());
184 _kernel.setArg(idx++, coeff_table->cl_buffer());
185 _kernel.setArg(idx++, old_ival->cl_buffer());
186 _kernel.setArg<cl_int>(idx++, window_size);
187 _kernel.setArg<cl_int>(idx++, window_size_squared);
188 _kernel.setArg<cl_int>(idx++, window_size_half);
189 _kernel.setArg<cl_float3>(idx++, border_limits);
190 _kernel.setArg<cl_float>(idx++, eig_const);
191 _kernel.setArg<cl_int>(idx++, level0);
192}
193
194void CLLKTrackerStage0Kernel::run(const Window &window, cl::CommandQueue &queue)
195{
196 ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
197 ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window);
198
199 // Set static tensor arguments. Setting here as allocation might be deferred.
200 unsigned int idx = 0;
201 add_2D_tensor_argument(idx, _old_input, window);
202 add_2D_tensor_argument(idx, _old_scharr_gx, window);
203 add_2D_tensor_argument(idx, _old_scharr_gy, window);
204
205 enqueue(queue, *this, window);
206}
207
208CLLKTrackerStage1Kernel::CLLKTrackerStage1Kernel()
209 : _new_input(nullptr)
210{
211}
212
213void CLLKTrackerStage1Kernel::configure(const ICLTensor *new_input, ICLLKInternalKeypointArray *new_points_internal, ICLCoefficientTableArray *coeff_table, ICLOldValArray *old_ival,
214 Termination termination, float epsilon, size_t num_iterations, size_t window_dimension, size_t level)
215
216{
217 ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(new_input, 1, DataType::U8);
218 ARM_COMPUTE_ERROR_ON(new_points_internal == nullptr);
219 ARM_COMPUTE_ERROR_ON(coeff_table == nullptr);
220 ARM_COMPUTE_ERROR_ON(old_ival == nullptr);
221
222 _new_input = new_input;
223
224 // Configure kernel window
225 Window window;
226 window.set(Window::DimX, Window::Dimension(0, new_points_internal->num_values(), 1));
227 window.set(Window::DimY, Window::Dimension(0, 1, 1));
228
229 const ValidRegion &valid_region = new_input->info()->valid_region();
230
231 update_window_and_padding(window,
232 AccessWindowStatic(new_input->info(), valid_region.start(0), valid_region.start(1),
233 valid_region.end(0), valid_region.end(1)));
234
Anthony Barbierb6eb3532018-08-08 13:20:04 +0100235 ICLKernel::configure_internal(window);
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100236
237 // Initialize required variables
238 const int level0 = (level == 0) ? 1 : 0;
239 const int window_size = window_dimension;
240 const int window_size_squared = window_dimension * window_dimension;
241 const int window_size_half = window_dimension / 2;
242 const float eig_const = 1.0f / (2.0f * window_size_squared);
243 const cl_float3 border_limits =
244 {
245 {
246 // -1 because we load 2 values at once for bilinear interpolation
247 static_cast<cl_float>(valid_region.end(0) - window_size - 1),
248 static_cast<cl_float>(valid_region.end(1) - window_size - 1),
249 static_cast<cl_float>(valid_region.start(0))
250 }
251 };
John Richardson8de92612018-02-22 14:09:31 +0000252
253 // Set maximum number of iterations used for convergence
254 const size_t max_iterations = 1000;
255 num_iterations = (termination == Termination::TERM_CRITERIA_EPSILON) ? max_iterations : num_iterations;
256
257 const int term_epsilon = (termination == Termination::TERM_CRITERIA_EPSILON || termination == Termination::TERM_CRITERIA_BOTH) ? 1 : 0;
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100258
259 // Create kernel
260 _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel("lktracker_stage1"));
261
262 // Set static kernel arguments
263 unsigned int idx = num_arguments_per_2D_tensor();
264 _kernel.setArg(idx++, new_points_internal->cl_buffer());
265 _kernel.setArg(idx++, coeff_table->cl_buffer());
266 _kernel.setArg(idx++, old_ival->cl_buffer());
267 _kernel.setArg<cl_int>(idx++, window_size);
268 _kernel.setArg<cl_int>(idx++, window_size_squared);
269 _kernel.setArg<cl_int>(idx++, window_size_half);
270 _kernel.setArg<cl_int>(idx++, num_iterations);
271 _kernel.setArg<cl_float>(idx++, epsilon);
272 _kernel.setArg<cl_float3>(idx++, border_limits);
273 _kernel.setArg<cl_float>(idx++, eig_const);
274 _kernel.setArg<cl_int>(idx++, level0);
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100275 _kernel.setArg<cl_int>(idx++, term_epsilon);
276}
277
278void CLLKTrackerStage1Kernel::run(const Window &window, cl::CommandQueue &queue)
279{
280 ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
281 ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window);
282
283 // Set static tensor arguments. Setting here as allocation might be deferred.
284 unsigned int idx = 0;
285 add_2D_tensor_argument(idx, _new_input, window);
286
287 enqueue(queue, *this, window);
288}