blob: 3e3c9fd23cdee2c809a95fdad531831ba75014cf [file] [log] [blame]
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001/*
Michele Di Giorgiod9eaf612020-07-08 11:12:57 +01002 * Copyright (c) 2016, 2017 Arm Limited.
Anthony Barbier6ff3b192017-09-04 18:44:23 +01003 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24#include "helpers.h"
25
26/** Function running harris score on 3x3 block size
27 *
28 * @attention: The input data type should be passed using a compile option -DDATA_TYPE. Supported types: short and int.
29 * e.g. -DDATA_TYPE=short.
30 *
31 * @param[in] src_gx_ptr Pointer to the first source image. Supported data types: S16, S32
32 * @param[in] src_gx_stride_x Stride of the source image in X dimension (in bytes)
33 * @param[in] src_gx_step_x src_gx_stride_x * number of elements along X processed per workitem(in bytes)
34 * @param[in] src_gx_stride_y Stride of the source image in Y dimension (in bytes)
35 * @param[in] src_gx_step_y src_gx_stride_y * number of elements along Y processed per workitem(in bytes)
36 * @param[in] src_gx_offset_first_element_in_bytes The offset of the first element in the source image
37 * @param[in] src_gy_ptr Pointer to the second source image. Supported data types: S16, S32
38 * @param[in] src_gy_stride_x Stride of the destination image in X dimension (in bytes)
39 * @param[in] src_gy_step_x src_gy_stride_x * number of elements along X processed per workitem(in bytes)
40 * @param[in] src_gy_stride_y Stride of the destination image in Y dimension (in bytes)
41 * @param[in] src_gy_step_y src_gy_stride_y * number of elements along Y processed per workitem(in bytes)
42 * @param[in] src_gy_offset_first_element_in_bytes The offset of the first element in the destination image
43 * @param[out] vc_ptr Pointer to the destination image. Supported data types: F32
44 * @param[in] vc_stride_x Stride of the destination image in X dimension (in bytes)
45 * @param[in] vc_step_x vc_stride_x * number of elements along X processed per workitem(in bytes)
46 * @param[in] vc_stride_y Stride of the destination image in Y dimension (in bytes)
47 * @param[in] vc_step_y vc_stride_y * number of elements along Y processed per workitem(in bytes)
48 * @param[in] vc_offset_first_element_in_bytes The offset of the first element in the destination image
49 * @param[in] sensitivity Sensitivity threshold k from the Harris-Stephens equation
50 * @param[in] strength_thresh Minimum threshold with which to eliminate Harris Corner scores
51 * @param[in] pow4_normalization_factor Normalization factor to apply harris score
52 */
53__kernel void harris_score_3x3(
54 IMAGE_DECLARATION(src_gx),
55 IMAGE_DECLARATION(src_gy),
56 IMAGE_DECLARATION(vc),
57 float sensitivity,
58 float strength_thresh,
59 float pow4_normalization_factor)
60{
61 Image src_gx = CONVERT_TO_IMAGE_STRUCT(src_gx);
62 Image src_gy = CONVERT_TO_IMAGE_STRUCT(src_gy);
63 Image vc = CONVERT_TO_IMAGE_STRUCT(vc);
64
65 /* Gx^2, Gy^2 and Gx*Gy */
66 float4 gx2 = (float4)0.0f;
67 float4 gy2 = (float4)0.0f;
68 float4 gxgy = (float4)0.0f;
69
70 /* Row0 */
71 VEC_DATA_TYPE(DATA_TYPE, 8)
72 temp_gx = vload8(0, (__global DATA_TYPE *)offset(&src_gx, -1, -1));
73 VEC_DATA_TYPE(DATA_TYPE, 8)
74 temp_gy = vload8(0, (__global DATA_TYPE *)offset(&src_gy, -1, -1));
75
76 float4 l_gx = convert_float4(temp_gx.s0123);
77 float4 m_gx = convert_float4(temp_gx.s1234);
78 float4 r_gx = convert_float4(temp_gx.s2345);
79
80 float4 l_gy = convert_float4(temp_gy.s0123);
81 float4 m_gy = convert_float4(temp_gy.s1234);
82 float4 r_gy = convert_float4(temp_gy.s2345);
83
84 gx2 += (l_gx * l_gx) + (m_gx * m_gx) + (r_gx * r_gx);
85 gy2 += (l_gy * l_gy) + (m_gy * m_gy) + (r_gy * r_gy);
86 gxgy += (l_gx * l_gy) + (m_gx * m_gy) + (r_gx * r_gy);
87
88 /* Row1 */
89 temp_gx = vload8(0, (__global DATA_TYPE *)offset(&src_gx, -1, 0));
90 temp_gy = vload8(0, (__global DATA_TYPE *)offset(&src_gy, -1, 0));
91
92 l_gx = convert_float4(temp_gx.s0123);
93 m_gx = convert_float4(temp_gx.s1234);
94 r_gx = convert_float4(temp_gx.s2345);
95
96 l_gy = convert_float4(temp_gy.s0123);
97 m_gy = convert_float4(temp_gy.s1234);
98 r_gy = convert_float4(temp_gy.s2345);
99
100 gx2 += (l_gx * l_gx) + (m_gx * m_gx) + (r_gx * r_gx);
101 gy2 += (l_gy * l_gy) + (m_gy * m_gy) + (r_gy * r_gy);
102 gxgy += (l_gx * l_gy) + (m_gx * m_gy) + (r_gx * r_gy);
103
104 /* Row2 */
105 temp_gx = vload8(0, (__global DATA_TYPE *)offset(&src_gx, -1, 1));
106 temp_gy = vload8(0, (__global DATA_TYPE *)offset(&src_gy, -1, 1));
107
108 l_gx = convert_float4(temp_gx.s0123);
109 m_gx = convert_float4(temp_gx.s1234);
110 r_gx = convert_float4(temp_gx.s2345);
111
112 l_gy = convert_float4(temp_gy.s0123);
113 m_gy = convert_float4(temp_gy.s1234);
114 r_gy = convert_float4(temp_gy.s2345);
115
116 gx2 += (l_gx * l_gx) + (m_gx * m_gx) + (r_gx * r_gx);
117 gy2 += (l_gy * l_gy) + (m_gy * m_gy) + (r_gy * r_gy);
118 gxgy += (l_gx * l_gy) + (m_gx * m_gy) + (r_gx * r_gy);
119
120 /* Compute trace and determinant */
121 float4 trace = gx2 + gy2;
122 float4 det = gx2 * gy2 - (gxgy * gxgy);
123
124 /* Compute harris score */
125 float4 mc = (det - (sensitivity * (trace * trace))) * pow4_normalization_factor;
126
127 mc = select(0.0f, mc, mc > (float4)strength_thresh);
128
129 vstore4(mc, 0, (__global float *)vc.ptr);
130}
131
132/** Function for calculating harris score 1x5.
133 *
134 * @param[in] src_gx Pointer to gx gradient image.
135 * @param[in] src_gy Pointer to gy gradient image.
136 * @param[in] row Relative row.
137 */
138inline float16 harris_score_1x5(Image *src_gx, Image *src_gy, int row)
139{
140 float4 gx2 = 0.0f;
141 float4 gy2 = 0.0f;
142 float4 gxgy = 0.0f;
143
144 /* Row */
145 VEC_DATA_TYPE(DATA_TYPE, 8)
146 temp_gx = vload8(0, (__global DATA_TYPE *)offset(src_gx, -2, row));
147 VEC_DATA_TYPE(DATA_TYPE, 8)
148 temp_gy = vload8(0, (__global DATA_TYPE *)offset(src_gy, -2, row));
149
150 float4 gx = convert_float4(temp_gx.s0123);
151 float4 gy = convert_float4(temp_gy.s0123);
152 gx2 += (gx * gx);
153 gy2 += (gy * gy);
154 gxgy += (gx * gy);
155
156 gx = convert_float4(temp_gx.s1234);
157 gy = convert_float4(temp_gy.s1234);
158 gx2 += (gx * gx);
159 gy2 += (gy * gy);
160 gxgy += (gx * gy);
161
162 gx = convert_float4(temp_gx.s2345);
163 gy = convert_float4(temp_gy.s2345);
164 gx2 += (gx * gx);
165 gy2 += (gy * gy);
166 gxgy += (gx * gy);
167
168 gx = convert_float4(temp_gx.s3456);
169 gy = convert_float4(temp_gy.s3456);
170 gx2 += (gx * gx);
171 gy2 += (gy * gy);
172 gxgy += (gx * gy);
173
174 gx = convert_float4(temp_gx.s4567);
175 gy = convert_float4(temp_gy.s4567);
176 gx2 += (gx * gx);
177 gy2 += (gy * gy);
178 gxgy += (gx * gy);
179
180 return (float16)(gx2, gy2, gxgy, (float4)0);
181}
182
183/** Function running harris score on 5x5 block size
184 *
185 * @attention: The input data type should be passed using a compile option -DDATA_TYPE. Supported types: short and int.
186 * e.g. -DDATA_TYPE=short.
187 *
188 * @param[in] src_gx_ptr Pointer to the first source image. Supported data types: S16, S32
189 * @param[in] src_gx_stride_x Stride of the source image in X dimension (in bytes)
190 * @param[in] src_gx_step_x src_gx_stride_x * number of elements along X processed per workitem(in bytes)
191 * @param[in] src_gx_stride_y Stride of the source image in Y dimension (in bytes)
192 * @param[in] src_gx_step_y src_gx_stride_y * number of elements along Y processed per workitem(in bytes)
193 * @param[in] src_gx_offset_first_element_in_bytes The offset of the first element in the source image
194 * @param[in] src_gy_ptr Pointer to the second source image. Supported data types: S16, S32
195 * @param[in] src_gy_stride_x Stride of the destination image in X dimension (in bytes)
196 * @param[in] src_gy_step_x src_gy_stride_x * number of elements along X processed per workitem(in bytes)
197 * @param[in] src_gy_stride_y Stride of the destination image in Y dimension (in bytes)
198 * @param[in] src_gy_step_y src_gy_stride_y * number of elements along Y processed per workitem(in bytes)
199 * @param[in] src_gy_offset_first_element_in_bytes The offset of the first element in the destination image
200 * @param[out] vc_ptr Pointer to the destination image. Supported data types: F32
201 * @param[in] vc_stride_x Stride of the destination image in X dimension (in bytes)
202 * @param[in] vc_step_x vc_stride_x * number of elements along X processed per workitem(in bytes)
203 * @param[in] vc_stride_y Stride of the destination image in Y dimension (in bytes)
204 * @param[in] vc_step_y vc_stride_y * number of elements along Y processed per workitem(in bytes)
205 * @param[in] vc_offset_first_element_in_bytes The offset of the first element in the destination image
206 * @param[in] sensitivity Sensitivity threshold k from the Harris-Stephens equation
207 * @param[in] strength_thresh Minimum threshold with which to eliminate Harris Corner scores
208 * @param[in] pow4_normalization_factor Normalization factor to apply harris score
209 */
210__kernel void harris_score_5x5(
211 IMAGE_DECLARATION(src_gx),
212 IMAGE_DECLARATION(src_gy),
213 IMAGE_DECLARATION(vc),
214 float sensitivity,
215 float strength_thresh,
216 float pow4_normalization_factor)
217{
218 Image src_gx = CONVERT_TO_IMAGE_STRUCT(src_gx);
219 Image src_gy = CONVERT_TO_IMAGE_STRUCT(src_gy);
220 Image vc = CONVERT_TO_IMAGE_STRUCT(vc);
221
222 /* Gx^2, Gy^2 and Gx*Gy */
223 float16 res = (float16)0.0f;
224
225 /* Compute row */
226 for(int i = -2; i < 3; i++)
227 {
228 res += harris_score_1x5(&src_gx, &src_gy, i);
229 }
230
231 float4 gx2 = res.s0123;
232 float4 gy2 = res.s4567;
233 float4 gxgy = res.s89AB;
234
235 /* Compute trace and determinant */
236 float4 trace = gx2 + gy2;
237 float4 det = gx2 * gy2 - (gxgy * gxgy);
238
239 /* Compute harris score */
240 float4 mc = (det - (sensitivity * (trace * trace))) * pow4_normalization_factor;
241
242 mc = select(0.0f, mc, mc > (float4)strength_thresh);
243
244 vstore4(mc, 0, (__global float *)vc.ptr);
245}
246
247/** Function for calculating harris score 1x7.
248 *
249 * @param[in] src_gx Pointer to gx gradient image.
250 * @param[in] src_gy Pointer to gy gradient image.
251 * @param[in] row Relative row.
252 */
253inline float16 harris_score_1x7(Image *src_gx, Image *src_gy, int row)
254{
255 float4 gx2 = 0.0f;
256 float4 gy2 = 0.0f;
257 float4 gxgy = 0.0f;
258
259 /* Row */
260 VEC_DATA_TYPE(DATA_TYPE, 8)
261 temp_gx0 = vload8(0, (__global DATA_TYPE *)offset(src_gx, -3, row));
262 VEC_DATA_TYPE(DATA_TYPE, 8)
263 temp_gy0 = vload8(0, (__global DATA_TYPE *)offset(src_gy, -3, row));
264 VEC_DATA_TYPE(DATA_TYPE, 2)
265 temp_gx1 = vload2(0, (__global DATA_TYPE *)offset(src_gx, 5, row));
266 VEC_DATA_TYPE(DATA_TYPE, 2)
267 temp_gy1 = vload2(0, (__global DATA_TYPE *)offset(src_gy, 5, row));
268
269 float4 gx = convert_float4(temp_gx0.s0123);
270 float4 gy = convert_float4(temp_gy0.s0123);
271 gx2 += (gx * gx);
272 gy2 += (gy * gy);
273 gxgy += (gx * gy);
274
275 gx = convert_float4(temp_gx0.s1234);
276 gy = convert_float4(temp_gy0.s1234);
277 gx2 += (gx * gx);
278 gy2 += (gy * gy);
279 gxgy += (gx * gy);
280
281 gx = convert_float4(temp_gx0.s2345);
282 gy = convert_float4(temp_gy0.s2345);
283 gx2 += (gx * gx);
284 gy2 += (gy * gy);
285 gxgy += (gx * gy);
286
287 gx = convert_float4(temp_gx0.s3456);
288 gy = convert_float4(temp_gy0.s3456);
289 gx2 += (gx * gx);
290 gy2 += (gy * gy);
291 gxgy += (gx * gy);
292
293 gx = convert_float4(temp_gx0.s4567);
294 gy = convert_float4(temp_gy0.s4567);
295 gx2 += (gx * gx);
296 gy2 += (gy * gy);
297 gxgy += (gx * gy);
298
299 gx = convert_float4((VEC_DATA_TYPE(DATA_TYPE, 4))(temp_gx0.s567, temp_gx1.s0));
300 gy = convert_float4((VEC_DATA_TYPE(DATA_TYPE, 4))(temp_gy0.s567, temp_gy1.s0));
301 gx2 += (gx * gx);
302 gy2 += (gy * gy);
303 gxgy += (gx * gy);
304
305 gx = convert_float4((VEC_DATA_TYPE(DATA_TYPE, 4))(temp_gx0.s67, temp_gx1.s01));
306 gy = convert_float4((VEC_DATA_TYPE(DATA_TYPE, 4))(temp_gy0.s67, temp_gy1.s01));
307 gx2 += (gx * gx);
308 gy2 += (gy * gy);
309 gxgy += (gx * gy);
310
311 return (float16)(gx2, gy2, gxgy, (float4)0);
312}
313
314/** Function running harris score on 7x7 block size
315 *
316 * @attention: The input data type should be passed using a compile option -DDATA_TYPE. Supported types: short and int.
317 * e.g. -DDATA_TYPE=short.
318 *
319 * @param[in] src_gx_ptr Pointer to the first source image. Supported data types: S16, S32
320 * @param[in] src_gx_stride_x Stride of the source image in X dimension (in bytes)
321 * @param[in] src_gx_step_x src_gx_stride_x * number of elements along X processed per workitem(in bytes)
322 * @param[in] src_gx_stride_y Stride of the source image in Y dimension (in bytes)
323 * @param[in] src_gx_step_y src_gx_stride_y * number of elements along Y processed per workitem(in bytes)
324 * @param[in] src_gx_offset_first_element_in_bytes The offset of the first element in the source image
325 * @param[in] src_gy_ptr Pointer to the second source image. Supported data types: S16, S32
326 * @param[in] src_gy_stride_x Stride of the destination image in X dimension (in bytes)
327 * @param[in] src_gy_step_x src_gy_stride_x * number of elements along X processed per workitem(in bytes)
328 * @param[in] src_gy_stride_y Stride of the destination image in Y dimension (in bytes)
329 * @param[in] src_gy_step_y src_gy_stride_y * number of elements along Y processed per workitem(in bytes)
330 * @param[in] src_gy_offset_first_element_in_bytes The offset of the first element in the destination image
331 * @param[out] vc_ptr Pointer to the destination image. Supported data types: F32
332 * @param[in] vc_stride_x Stride of the destination image in X dimension (in bytes)
333 * @param[in] vc_step_x vc_stride_x * number of elements along X processed per workitem(in bytes)
334 * @param[in] vc_stride_y Stride of the destination image in Y dimension (in bytes)
335 * @param[in] vc_step_y vc_stride_y * number of elements along Y processed per workitem(in bytes)
336 * @param[in] vc_offset_first_element_in_bytes The offset of the first element in the destination image
337 * @param[in] sensitivity Sensitivity threshold k from the Harris-Stephens equation
338 * @param[in] strength_thresh Minimum threshold with which to eliminate Harris Corner scores
339 * @param[in] pow4_normalization_factor Normalization factor to apply harris score
340 */
341__kernel void harris_score_7x7(
342 IMAGE_DECLARATION(src_gx),
343 IMAGE_DECLARATION(src_gy),
344 IMAGE_DECLARATION(vc),
345 float sensitivity,
346 float strength_thresh,
347 float pow4_normalization_factor)
348{
349 Image src_gx = CONVERT_TO_IMAGE_STRUCT(src_gx);
350 Image src_gy = CONVERT_TO_IMAGE_STRUCT(src_gy);
351 Image vc = CONVERT_TO_IMAGE_STRUCT(vc);
352
353 /* Gx^2, Gy^2 and Gx*Gy */
354 float16 res = (float16)0.0f;
355
356 /* Compute row */
357 for(int i = -3; i < 4; i++)
358 {
359 res += harris_score_1x7(&src_gx, &src_gy, i);
360 }
361
362 float4 gx2 = res.s0123;
363 float4 gy2 = res.s4567;
364 float4 gxgy = res.s89AB;
365
366 /* Compute trace and determinant */
367 float4 trace = gx2 + gy2;
368 float4 det = gx2 * gy2 - (gxgy * gxgy);
369
370 /* Compute harris score */
371 float4 mc = (det - (sensitivity * (trace * trace))) * pow4_normalization_factor;
372
373 mc = select(0.0f, mc, mc > (float4)strength_thresh);
374
375 vstore4(mc, 0, (__global float *)vc.ptr);
376}