blob: 9bbde1a57f7a5dd5e24c7df8c8d16cab22cff686 [file] [log] [blame]
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001/*
Michele Di Giorgiod9eaf612020-07-08 11:12:57 +01002 * Copyright (c) 2017-2018 Arm Limited.
Anthony Barbier6ff3b192017-09-04 18:44:23 +01003 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24#include "helpers.h"
25#include "types.h"
26
27/*
28 *The criteria for lost tracking is that the spatial gradient matrix has:
29 * - Determinant less than DETERMINANT_THR
30 * - or minimum eigenvalue is smaller then EIGENVALUE_THR
31 *
32 * The thresholds for the determinant and the minimum eigenvalue is
33 * defined by the OpenVX spec
34 *
35 * Note: Also lost tracking happens when the point tracked coordinate is outside
36 * the image coordinates
37 *
38 * https://www.khronos.org/registry/vx/specs/1.0/html/d0/d0c/group__group__vision__function__opticalflowpyrlk.html
39 */
40
41/* Internal Lucas-Kanade Keypoint struct */
42typedef struct InternalKeypoint
43{
44 float x; /**< The x coordinate. */
45 float y; /**< The y coordinate. */
46 float tracking_status; /**< A zero indicates a lost point. Initialized to 1 by corner detectors. */
Alex Gildayc357c472018-03-21 13:54:09 +000047 float dummy; /**< Dummy member for alignment. */
Anthony Barbier6ff3b192017-09-04 18:44:23 +010048} InternalKeypoint;
49
50/** Threshold for the determinant. Used for lost tracking criteria */
51#define DETERMINANT_THR 1.0e-07f
52
53/** Thresholds for minimum eigenvalue. Used for lost tracking criteria */
54#define EIGENVALUE_THR 1.0e-04f
55
56/** Constants used for Lucas-Kanade Algorithm */
57#define W_BITS (14)
58#define FLT_SCALE (1.0f / (float)(1 << 20))
59#define D0 ((float)(1 << W_BITS))
60#define D1 (1.0f / (float)(1 << (W_BITS - 5)))
61
62/** Initializes the internal new points array when the level of pyramid is NOT equal to max.
63 *
64 * @param[in,out] old_points_internal An array of internal key points that are defined at the old_images high resolution pyramid.
65 * @param[in,out] new_points_internal An array of internal key points that are defined at the new_images high resolution pyramid.
66 * @param[in] scale Scale factor to apply for the new_point coordinates.
67 */
68__kernel void init_level(
69 __global float4 *old_points_internal,
70 __global float4 *new_points_internal,
71 const float scale)
72{
73 int idx = get_global_id(0);
74
75 // Get old and new keypoints
76 float4 old_point = old_points_internal[idx];
77 float4 new_point = new_points_internal[idx];
78
79 // Scale accordingly with the pyramid_scale
80 old_point.xy *= (float2)(2.0f);
81 new_point.xy *= (float2)(2.0f);
82
83 old_points_internal[idx] = old_point;
84 new_points_internal[idx] = new_point;
85}
86
87/** Initializes the internal new points array when the level of pyramid is equal to max.
88 *
89 * @param[in] old_points An array of key points that are defined at the old_images high resolution pyramid.
90 * @param[in,out] old_points_internal An array of internal key points that are defined at the old_images high resolution pyramid.
91 * @param[out] new_points_internal An array of internal key points that are defined at the new_images high resolution pyramid.
92 * @param[in] scale Scale factor to apply for the new_point coordinates.
93 */
94__kernel void init_level_max(
95 __global Keypoint *old_points,
96 __global InternalKeypoint *old_points_internal,
97 __global InternalKeypoint *new_points_internal,
98 const float scale)
99{
100 int idx = get_global_id(0);
101
102 Keypoint old_point = old_points[idx];
103
104 // Get old keypoint to track
105 InternalKeypoint old_point_internal;
106 old_point_internal.x = old_point.x * scale;
107 old_point_internal.y = old_point.y * scale;
108 old_point_internal.tracking_status = 1.f;
109
110 // Store internal keypoints
111 old_points_internal[idx] = old_point_internal;
112 new_points_internal[idx] = old_point_internal;
113}
114
115/** Initializes the new_points array when the level of pyramid is equal to max and if use_initial_estimate = 1.
116 *
117 * @param[in] old_points An array of key points that are defined at the old_images high resolution pyramid.
118 * @param[in] new_points_estimates An array of estimate key points that are defined at the old_images high resolution pyramid.
119 * @param[in,out] old_points_internal An array of internal key points that are defined at the old_images high resolution pyramid.
120 * @param[out] new_points_internal An array of internal key points that are defined at the new_images high resolution pyramid.
121 * @param[in] scale Scale factor to apply for the new_point coordinates.
122 */
123__kernel void init_level_max_initial_estimate(
124 __global Keypoint *old_points,
125 __global Keypoint *new_points_estimates,
126 __global InternalKeypoint *old_points_internal,
127 __global InternalKeypoint *new_points_internal,
128 const float scale)
129{
130 int idx = get_global_id(0);
131
132 Keypoint old_point = old_points[idx];
133 Keypoint new_point_estimate = new_points_estimates[idx];
134 InternalKeypoint old_point_internal;
135 InternalKeypoint new_point_internal;
136
137 // Get old keypoint to track
138 old_point_internal.x = old_point.x * scale;
139 old_point_internal.y = old_point.y * scale;
140 old_point_internal.tracking_status = 1.f;
141
142 // Get new keypoint to track
143 new_point_internal.x = new_point_estimate.x * scale;
144 new_point_internal.y = new_point_estimate.y * scale;
145 new_point_internal.tracking_status = new_point_estimate.tracking_status;
146
147 // Store internal keypoints
148 old_points_internal[idx] = old_point_internal;
149 new_points_internal[idx] = new_point_internal;
150}
151
152/** Truncates the coordinates stored in new_points array
153 *
154 * @param[in] new_points_internal An array of estimate key points that are defined at the new_images high resolution pyramid.
155 * @param[out] new_points An array of internal key points that are defined at the new_images high resolution pyramid.
156 */
157__kernel void finalize(
158 __global InternalKeypoint *new_points_internal,
159 __global Keypoint *new_points)
160{
161 int idx = get_global_id(0);
162
163 // Load internal keypoint
164 InternalKeypoint new_point_internal = new_points_internal[idx];
165
166 // Calculate output point
167 Keypoint new_point;
168 new_point.x = round(new_point_internal.x);
169 new_point.y = round(new_point_internal.y);
John Richardson8de92612018-02-22 14:09:31 +0000170 new_point.strength = 0.f;
171 new_point.scale = 0.f;
172 new_point.orientation = 0.f;
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100173 new_point.tracking_status = new_point_internal.tracking_status;
John Richardson8de92612018-02-22 14:09:31 +0000174 new_point.error = 0.f;
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100175
176 // Store new point
177 new_points[idx] = new_point;
178}
179
180/** Computes A11, A12, A22, min_eig, ival, ixval and iyval at level 0th of the pyramid. These values will be used in step 1.
181 *
182 * @param[in] old_image_ptr Pointer to the input old image. Supported data types: U8
183 * @param[in] old_image_stride_x Stride of the input old image in X dimension (in bytes)
184 * @param[in] old_image_step_x old_image_stride_x * number of elements along X processed per workitem(in bytes)
185 * @param[in] old_image_stride_y Stride of the input old image in Y dimension (in bytes)
186 * @param[in] old_image_step_y old_image_stride_y * number of elements along Y processed per workitem(in bytes)
187 * @param[in] old_image_offset_first_element_in_bytes The offset of the first element in the input old image
188 * @param[in] old_scharr_gx_ptr Pointer to the input scharr x image. Supported data types: S16
189 * @param[in] old_scharr_gx_stride_x Stride of the input scharr x image in X dimension (in bytes)
190 * @param[in] old_scharr_gx_step_x old_scharr_gx_stride_x * number of elements along X processed per workitem(in bytes)
191 * @param[in] old_scharr_gx_stride_y Stride of the input scharr x image in Y dimension (in bytes)
192 * @param[in] old_scharr_gx_step_y old_scharr_gx_stride_y * number of elements along Y processed per workitem(in bytes)
193 * @param[in] old_scharr_gx_offset_first_element_in_bytes The offset of the first element in the input scharr x image
194 * @param[in] old_scharr_gy_ptr Pointer to the input scharr y image. Supported data types: S16
195 * @param[in] old_scharr_gy_stride_x Stride of the input scharr y image in X dimension (in bytes)
196 * @param[in] old_scharr_gy_step_x old_scharr_gy_stride_x * number of elements along X processed per workitem(in bytes)
197 * @param[in] old_scharr_gy_stride_y Stride of the input scharr y image in Y dimension (in bytes)
198 * @param[in] old_scharr_gy_step_y old_scharr_gy_stride_y * number of elements along Y processed per workitem(in bytes)
199 * @param[in] old_scharr_gy_offset_first_element_in_bytes The offset of the first element in the input scharr y image
200 * @param[in] old_points An array of key points. Those key points are defined at the old_images high resolution pyramid
201 * @param[in, out] new_points An output array of key points. Those key points are defined at the new_images high resolution pyramid
202 * @param[out] coeff It stores | A11 | A12 | A22 | min_eig | for each keypoint
203 * @param[out] iold_val It stores | ival | ixval | iyval | dummy | for each point in the window centered on old_keypoint
204 * @param[in] window_dimension The size of the window on which to perform the algorithm
205 * @param[in] window_dimension_pow2 The squared size of the window on which to perform the algorithm
206 * @param[in] half_window The half size of the window on which to perform the algorithm
207 * @param[in] border_limits It stores the right border limit (width - window_dimension - 1, height - window_dimension - 1,)
208 * @param[in] eig_const 1.0f / (float)(2.0f * window_dimension * window_dimension)
209 * @param[in] level0 It is set to 1 if level 0 of the pyramid
210 */
211void __kernel lktracker_stage0(
212 IMAGE_DECLARATION(old_image),
213 IMAGE_DECLARATION(old_scharr_gx),
214 IMAGE_DECLARATION(old_scharr_gy),
215 __global float4 *old_points,
216 __global float4 *new_points,
217 __global float4 *coeff,
218 __global short4 *iold_val,
219 const int window_dimension,
220 const int window_dimension_pow2,
221 const int half_window,
222 const float3 border_limits,
223 const float eig_const,
224 const int level0)
225{
226 int idx = get_global_id(0);
227
228 Image old_image = CONVERT_TO_IMAGE_STRUCT_NO_STEP(old_image);
229 Image old_scharr_gx = CONVERT_TO_IMAGE_STRUCT_NO_STEP(old_scharr_gx);
230 Image old_scharr_gy = CONVERT_TO_IMAGE_STRUCT_NO_STEP(old_scharr_gy);
231
232 // Get old keypoint
233 float2 old_keypoint = old_points[idx].xy - (float2)half_window;
234
235 // Get the floor value
236 float2 iold_keypoint = floor(old_keypoint);
237
238 // Check if using the window dimension we can go out of boundary in the following for loops. If so, invalidate the tracked point
239 if(any(iold_keypoint < border_limits.zz) || any(iold_keypoint >= border_limits.xy))
240 {
241 if(level0 == 1)
242 {
243 // Invalidate tracked point as we are at level 0
244 new_points[idx].s2 = 0.0f;
245 }
246
247 // Not valid coordinate. It sets min_eig to 0.0f
248 coeff[idx].s3 = 0.0f;
249
250 return;
251 }
252
253 // Compute weight for the bilinear interpolation
254 float2 ab = old_keypoint - iold_keypoint;
255
256 // Weight used for Bilinear-Interpolation on Scharr images
257 // w_scharr.s0 = (1.0f - ab.x) * (1.0f - ab.y)
258 // w_scharr.s1 = ab.x * (1.0f - ab.y)
259 // w_scharr.s2 = (1.0f - ab.x) * ab.y
260 // w_scharr.s3 = ab.x * ab.y
261
262 float4 w_scharr;
263 w_scharr.s3 = ab.x * ab.y;
264 w_scharr.s0 = w_scharr.s3 + 1.0f - ab.x - ab.y;
265 w_scharr.s12 = ab - (float2)w_scharr.s3;
266
267 // Weight used for Bilinear-Interpolation on Old and New images
268 // w.s0 = round(w_scharr.s0 * D0)
269 // w.s1 = round(w_scharr.s1 * D0)
270 // w.s2 = round(w_scharr.s2 * D0)
271 // w.s3 = w.s3 = D0 - w.s0 - w.s1 - w.s2
272
273 float4 w;
274 w = round(w_scharr * (float4)D0);
275 w.s3 = D0 - w.s0 - w.s1 - w.s2; // Added for matching VX implementation
276
277 // G.s0 = A11, G.s1 = A12, G.s2 = A22, G.s3 = min_eig
278 int4 iG = (int4)0;
279
280 // Window offset
281 int window_offset = idx * window_dimension_pow2;
282
283 // Compute Spatial Gradient Matrix G
284 for(ushort ky = 0; ky < window_dimension; ++ky)
285 {
286 int offset_y = iold_keypoint.y + ky;
287 for(ushort kx = 0; kx < window_dimension; ++kx)
288 {
289 int offset_x = iold_keypoint.x + kx;
290 float4 px;
291
292 // Load values from old_image for computing the bilinear interpolation
293 px = convert_float4((uchar4)(vload2(0, offset(&old_image, offset_x, offset_y)),
294 vload2(0, offset(&old_image, offset_x, offset_y + 1))));
295
296 // old_i.s0 = ival, old_i.s1 = ixval, old_i.s2 = iyval, old_i.s3 = dummy
297 float4 old_i;
298
299 // Compute bilinear interpolation (with D1 scale factor) for ival
300 old_i.s0 = dot(px, w) * D1;
301
302 // Load values from old_scharr_gx for computing the bilinear interpolation
303 px = convert_float4((short4)(vload2(0, (__global short *)offset(&old_scharr_gx, offset_x, offset_y)),
304 vload2(0, (__global short *)offset(&old_scharr_gx, offset_x, offset_y + 1))));
305
306 // Compute bilinear interpolation for ixval
307 old_i.s1 = dot(px, w_scharr);
308
309 // Load values from old_scharr_gy for computing the bilinear interpolation
310 px = convert_float4((short4)(vload2(0, (__global short *)offset(&old_scharr_gy, offset_x, offset_y)),
311 vload2(0, (__global short *)offset(&old_scharr_gy, offset_x, offset_y + 1))));
312
313 // Compute bilinear interpolation for iyval
314 old_i.s2 = dot(px, w_scharr);
315
316 // Rounding (it could be omitted. Used just for matching the VX implementation)
317 int4 iold = convert_int4(round(old_i));
318
319 // Accumulate values in the Spatial Gradient Matrix
320 iG.s0 += (int)(iold.s1 * iold.s1);
321 iG.s1 += (int)(iold.s1 * iold.s2);
322 iG.s2 += (int)(iold.s2 * iold.s2);
323
324 // Store ival, ixval and iyval
325 iold_val[window_offset + kx] = convert_short4(iold);
326 }
327 window_offset += window_dimension;
328 }
329
330 // Scale iA11, iA12 and iA22
331 float4 G = convert_float4(iG) * (float4)FLT_SCALE;
332
333 // Compute minimum eigen value
334 G.s3 = (float)(G.s2 + G.s0 - sqrt(pown(G.s0 - G.s2, 2) + 4.0f * G.s1 * G.s1)) * eig_const;
335
336 // Store A11. A11, A22 and min_eig
337 coeff[idx] = G;
338}
339
340/** Computes the motion vector for a given keypoint
341 *
342 * @param[in] new_image_ptr Pointer to the input new image. Supported data types: U8
343 * @param[in] new_image_stride_x Stride of the input new image in X dimension (in bytes)
344 * @param[in] new_image_step_x new_image_stride_x * number of elements along X processed per workitem(in bytes)
345 * @param[in] new_image_stride_y Stride of the input new image in Y dimension (in bytes)
346 * @param[in] new_image_step_y new_image_stride_y * number of elements along Y processed per workitem(in bytes)
347 * @param[in] new_image_offset_first_element_in_bytes The offset of the first element in the input new image
348 * @param[in, out] new_points An output array of key points. Those key points are defined at the new_images high resolution pyramid
349 * @param[in] coeff The | A11 | A12 | A22 | min_eig | for each keypoint
350 * @param[in] iold_val The | ival | ixval | iyval | dummy | for each point in the window centered on old_keypoint
351 * @param[in] window_dimension The size of the window on which to perform the algorithm
352 * @param[in] window_dimension_pow2 The squared size of the window on which to perform the algorithm
353 * @param[in] half_window The half size of the window on which to perform the algorithm
354 * @param[in] num_iterations The maximum number of iterations
355 * @param[in] epsilon The value for terminating the algorithm.
356 * @param[in] border_limits It stores the right border limit (width - window_dimension - 1, height - window_dimension - 1,)
357 * @param[in] eig_const 1.0f / (float)(2.0f * window_dimension * window_dimension)
358 * @param[in] level0 It is set to 1 if level of pyramid = 0
John Richardson8de92612018-02-22 14:09:31 +0000359 * @param[in] term_epsilon It is set to 1 if termination = TERM_CRITERIA_EPSILON
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100360 */
361void __kernel lktracker_stage1(
362 IMAGE_DECLARATION(new_image),
363 __global float4 *new_points,
364 __global float4 *coeff,
365 __global short4 *iold_val,
366 const int window_dimension,
367 const int window_dimension_pow2,
368 const int half_window,
369 const int num_iterations,
370 const float epsilon,
371 const float3 border_limits,
372 const float eig_const,
373 const int level0,
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100374 const int term_epsilon)
375{
376 int idx = get_global_id(0);
377 Image new_image = CONVERT_TO_IMAGE_STRUCT_NO_STEP(new_image);
378
379 // G.s0 = A11, G.s1 = A12, G.s2 = A22, G.s3 = min_eig
380 float4 G = coeff[idx];
381
382 // Determinant
383 float D = G.s0 * G.s2 - G.s1 * G.s1;
384
385 // Check if it is a good point to track
386 if(G.s3 < EIGENVALUE_THR || D < DETERMINANT_THR)
387 {
388 if(level0 == 1)
389 {
390 // Invalidate tracked point as we are at level 0
391 new_points[idx].s2 = 0;
392 }
393
394 return;
395 }
396
397 // Compute inverse
398 //D = native_recip(D);
399 D = 1.0 / D;
400
401 // Get new keypoint
402 float2 new_keypoint = new_points[idx].xy - (float)half_window;
403
404 // Get new point
405 float2 out_new_point = new_points[idx].xy;
406
407 // Keep delta obtained in the previous iteration
408 float2 prev_delta = (float2)0.0f;
409
410 int j = 0;
411 while(j < num_iterations)
412 {
413 // Get the floor value
414 float2 inew_keypoint = floor(new_keypoint);
415
416 // Check if using the window dimension we can go out of boundary in the following for loops. If so, invalidate the tracked point
417 if(any(inew_keypoint < border_limits.zz) || any(inew_keypoint >= border_limits.xy))
418 {
419 if(level0 == 1)
420 {
421 // Invalidate tracked point as we are at level 0
422 new_points[idx].s2 = 0.0f;
423 }
424 else
425 {
426 new_points[idx].xy = out_new_point;
427 }
428
429 return;
430 }
431
432 // Compute weight for the bilinear interpolation
433 float2 ab = new_keypoint - inew_keypoint;
434
435 // Weight used for Bilinear-Interpolation on Old and New images
436 // w.s0 = round((1.0f - ab.x) * (1.0f - ab.y) * D0)
437 // w.s1 = round(ab.x * (1.0f - ab.y) * D0)
438 // w.s2 = round((1.0f - ab.x) * ab.y * D0)
439 // w.s3 = D0 - w.s0 - w.s1 - w.s2
440
441 float4 w;
442 w.s3 = ab.x * ab.y;
443 w.s0 = w.s3 + 1.0f - ab.x - ab.y;
444 w.s12 = ab - (float2)w.s3;
445 w = round(w * (float4)D0);
446 w.s3 = D0 - w.s0 - w.s1 - w.s2;
447
448 // Mismatch vector
449 int2 ib = 0;
450
451 // Old val offset
452 int old_val_offset = idx * window_dimension_pow2;
453
454 for(int ky = 0; ky < window_dimension; ++ky)
455 {
456 for(int kx = 0; kx < window_dimension; ++kx)
457 {
458 // ival, ixval and iyval have been computed in the previous stage
459 int4 old_ival = convert_int4(iold_val[old_val_offset]);
460
461 // Load values from old_image for computing the bilinear interpolation
462 float4 px = convert_float4((uchar4)(vload2(0, offset(&new_image, inew_keypoint.x + kx, inew_keypoint.y + ky)),
463 vload2(0, offset(&new_image, inew_keypoint.x + kx, inew_keypoint.y + ky + 1))));
464
465 // Compute bilinear interpolation on new image
466 int jval = (int)round(dot(px, w) * D1);
467
468 // Compute luminance difference
469 int diff = (int)(jval - old_ival.s0);
470
471 // Accumulate values in mismatch vector
472 ib += (diff * old_ival.s12);
473
474 // Update old val offset
475 old_val_offset++;
476 }
477 }
478
479 float2 b = convert_float2(ib) * (float2)FLT_SCALE;
480
481 // Optical Flow
482 float2 delta;
483
484 delta.x = (float)((G.s1 * b.y - G.s2 * b.x) * D);
485 delta.y = (float)((G.s1 * b.x - G.s0 * b.y) * D);
486
487 // Update new point coordinate
488 new_keypoint += delta;
489
490 out_new_point = new_keypoint + (float2)half_window;
491
492 if(term_epsilon == 1)
493 {
494 float mag2 = dot(delta, delta);
495
496 if(mag2 <= epsilon)
497 {
498 new_points[idx].xy = out_new_point;
499
500 return;
501 }
502 }
503
504 // Check convergence analyzing the previous delta
505 if(j > 0 && all(fabs(delta + prev_delta) < (float2)0.01f))
506 {
507 out_new_point -= delta * (float2)0.5f;
508
509 new_points[idx].xy = out_new_point;
510
511 return;
512 }
513
514 // Update previous delta
515 prev_delta = delta;
516
John Richardson8de92612018-02-22 14:09:31 +0000517 j++;
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100518 }
519
520 new_points[idx].xy = out_new_point;
521}