blob: 3e3c9fd23cdee2c809a95fdad531831ba75014cf [file] [log] [blame]
/*
* Copyright (c) 2016, 2017 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to
* deal in the Software without restriction, including without limitation the
* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
* sell copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include "helpers.h"
/** Function running harris score on 3x3 block size
*
* @attention: The input data type should be passed using a compile option -DDATA_TYPE. Supported types: short and int.
* e.g. -DDATA_TYPE=short.
*
* @param[in] src_gx_ptr Pointer to the first source image. Supported data types: S16, S32
* @param[in] src_gx_stride_x Stride of the source image in X dimension (in bytes)
* @param[in] src_gx_step_x src_gx_stride_x * number of elements along X processed per workitem(in bytes)
* @param[in] src_gx_stride_y Stride of the source image in Y dimension (in bytes)
* @param[in] src_gx_step_y src_gx_stride_y * number of elements along Y processed per workitem(in bytes)
* @param[in] src_gx_offset_first_element_in_bytes The offset of the first element in the source image
* @param[in] src_gy_ptr Pointer to the second source image. Supported data types: S16, S32
* @param[in] src_gy_stride_x Stride of the destination image in X dimension (in bytes)
* @param[in] src_gy_step_x src_gy_stride_x * number of elements along X processed per workitem(in bytes)
* @param[in] src_gy_stride_y Stride of the destination image in Y dimension (in bytes)
* @param[in] src_gy_step_y src_gy_stride_y * number of elements along Y processed per workitem(in bytes)
* @param[in] src_gy_offset_first_element_in_bytes The offset of the first element in the destination image
* @param[out] vc_ptr Pointer to the destination image. Supported data types: F32
* @param[in] vc_stride_x Stride of the destination image in X dimension (in bytes)
* @param[in] vc_step_x vc_stride_x * number of elements along X processed per workitem(in bytes)
* @param[in] vc_stride_y Stride of the destination image in Y dimension (in bytes)
* @param[in] vc_step_y vc_stride_y * number of elements along Y processed per workitem(in bytes)
* @param[in] vc_offset_first_element_in_bytes The offset of the first element in the destination image
* @param[in] sensitivity Sensitivity threshold k from the Harris-Stephens equation
* @param[in] strength_thresh Minimum threshold with which to eliminate Harris Corner scores
* @param[in] pow4_normalization_factor Normalization factor to apply harris score
*/
__kernel void harris_score_3x3(
IMAGE_DECLARATION(src_gx),
IMAGE_DECLARATION(src_gy),
IMAGE_DECLARATION(vc),
float sensitivity,
float strength_thresh,
float pow4_normalization_factor)
{
Image src_gx = CONVERT_TO_IMAGE_STRUCT(src_gx);
Image src_gy = CONVERT_TO_IMAGE_STRUCT(src_gy);
Image vc = CONVERT_TO_IMAGE_STRUCT(vc);
/* Gx^2, Gy^2 and Gx*Gy */
float4 gx2 = (float4)0.0f;
float4 gy2 = (float4)0.0f;
float4 gxgy = (float4)0.0f;
/* Row0 */
VEC_DATA_TYPE(DATA_TYPE, 8)
temp_gx = vload8(0, (__global DATA_TYPE *)offset(&src_gx, -1, -1));
VEC_DATA_TYPE(DATA_TYPE, 8)
temp_gy = vload8(0, (__global DATA_TYPE *)offset(&src_gy, -1, -1));
float4 l_gx = convert_float4(temp_gx.s0123);
float4 m_gx = convert_float4(temp_gx.s1234);
float4 r_gx = convert_float4(temp_gx.s2345);
float4 l_gy = convert_float4(temp_gy.s0123);
float4 m_gy = convert_float4(temp_gy.s1234);
float4 r_gy = convert_float4(temp_gy.s2345);
gx2 += (l_gx * l_gx) + (m_gx * m_gx) + (r_gx * r_gx);
gy2 += (l_gy * l_gy) + (m_gy * m_gy) + (r_gy * r_gy);
gxgy += (l_gx * l_gy) + (m_gx * m_gy) + (r_gx * r_gy);
/* Row1 */
temp_gx = vload8(0, (__global DATA_TYPE *)offset(&src_gx, -1, 0));
temp_gy = vload8(0, (__global DATA_TYPE *)offset(&src_gy, -1, 0));
l_gx = convert_float4(temp_gx.s0123);
m_gx = convert_float4(temp_gx.s1234);
r_gx = convert_float4(temp_gx.s2345);
l_gy = convert_float4(temp_gy.s0123);
m_gy = convert_float4(temp_gy.s1234);
r_gy = convert_float4(temp_gy.s2345);
gx2 += (l_gx * l_gx) + (m_gx * m_gx) + (r_gx * r_gx);
gy2 += (l_gy * l_gy) + (m_gy * m_gy) + (r_gy * r_gy);
gxgy += (l_gx * l_gy) + (m_gx * m_gy) + (r_gx * r_gy);
/* Row2 */
temp_gx = vload8(0, (__global DATA_TYPE *)offset(&src_gx, -1, 1));
temp_gy = vload8(0, (__global DATA_TYPE *)offset(&src_gy, -1, 1));
l_gx = convert_float4(temp_gx.s0123);
m_gx = convert_float4(temp_gx.s1234);
r_gx = convert_float4(temp_gx.s2345);
l_gy = convert_float4(temp_gy.s0123);
m_gy = convert_float4(temp_gy.s1234);
r_gy = convert_float4(temp_gy.s2345);
gx2 += (l_gx * l_gx) + (m_gx * m_gx) + (r_gx * r_gx);
gy2 += (l_gy * l_gy) + (m_gy * m_gy) + (r_gy * r_gy);
gxgy += (l_gx * l_gy) + (m_gx * m_gy) + (r_gx * r_gy);
/* Compute trace and determinant */
float4 trace = gx2 + gy2;
float4 det = gx2 * gy2 - (gxgy * gxgy);
/* Compute harris score */
float4 mc = (det - (sensitivity * (trace * trace))) * pow4_normalization_factor;
mc = select(0.0f, mc, mc > (float4)strength_thresh);
vstore4(mc, 0, (__global float *)vc.ptr);
}
/** Function for calculating harris score 1x5.
*
* @param[in] src_gx Pointer to gx gradient image.
* @param[in] src_gy Pointer to gy gradient image.
* @param[in] row Relative row.
*/
inline float16 harris_score_1x5(Image *src_gx, Image *src_gy, int row)
{
float4 gx2 = 0.0f;
float4 gy2 = 0.0f;
float4 gxgy = 0.0f;
/* Row */
VEC_DATA_TYPE(DATA_TYPE, 8)
temp_gx = vload8(0, (__global DATA_TYPE *)offset(src_gx, -2, row));
VEC_DATA_TYPE(DATA_TYPE, 8)
temp_gy = vload8(0, (__global DATA_TYPE *)offset(src_gy, -2, row));
float4 gx = convert_float4(temp_gx.s0123);
float4 gy = convert_float4(temp_gy.s0123);
gx2 += (gx * gx);
gy2 += (gy * gy);
gxgy += (gx * gy);
gx = convert_float4(temp_gx.s1234);
gy = convert_float4(temp_gy.s1234);
gx2 += (gx * gx);
gy2 += (gy * gy);
gxgy += (gx * gy);
gx = convert_float4(temp_gx.s2345);
gy = convert_float4(temp_gy.s2345);
gx2 += (gx * gx);
gy2 += (gy * gy);
gxgy += (gx * gy);
gx = convert_float4(temp_gx.s3456);
gy = convert_float4(temp_gy.s3456);
gx2 += (gx * gx);
gy2 += (gy * gy);
gxgy += (gx * gy);
gx = convert_float4(temp_gx.s4567);
gy = convert_float4(temp_gy.s4567);
gx2 += (gx * gx);
gy2 += (gy * gy);
gxgy += (gx * gy);
return (float16)(gx2, gy2, gxgy, (float4)0);
}
/** Function running harris score on 5x5 block size
*
* @attention: The input data type should be passed using a compile option -DDATA_TYPE. Supported types: short and int.
* e.g. -DDATA_TYPE=short.
*
* @param[in] src_gx_ptr Pointer to the first source image. Supported data types: S16, S32
* @param[in] src_gx_stride_x Stride of the source image in X dimension (in bytes)
* @param[in] src_gx_step_x src_gx_stride_x * number of elements along X processed per workitem(in bytes)
* @param[in] src_gx_stride_y Stride of the source image in Y dimension (in bytes)
* @param[in] src_gx_step_y src_gx_stride_y * number of elements along Y processed per workitem(in bytes)
* @param[in] src_gx_offset_first_element_in_bytes The offset of the first element in the source image
* @param[in] src_gy_ptr Pointer to the second source image. Supported data types: S16, S32
* @param[in] src_gy_stride_x Stride of the destination image in X dimension (in bytes)
* @param[in] src_gy_step_x src_gy_stride_x * number of elements along X processed per workitem(in bytes)
* @param[in] src_gy_stride_y Stride of the destination image in Y dimension (in bytes)
* @param[in] src_gy_step_y src_gy_stride_y * number of elements along Y processed per workitem(in bytes)
* @param[in] src_gy_offset_first_element_in_bytes The offset of the first element in the destination image
* @param[out] vc_ptr Pointer to the destination image. Supported data types: F32
* @param[in] vc_stride_x Stride of the destination image in X dimension (in bytes)
* @param[in] vc_step_x vc_stride_x * number of elements along X processed per workitem(in bytes)
* @param[in] vc_stride_y Stride of the destination image in Y dimension (in bytes)
* @param[in] vc_step_y vc_stride_y * number of elements along Y processed per workitem(in bytes)
* @param[in] vc_offset_first_element_in_bytes The offset of the first element in the destination image
* @param[in] sensitivity Sensitivity threshold k from the Harris-Stephens equation
* @param[in] strength_thresh Minimum threshold with which to eliminate Harris Corner scores
* @param[in] pow4_normalization_factor Normalization factor to apply harris score
*/
__kernel void harris_score_5x5(
IMAGE_DECLARATION(src_gx),
IMAGE_DECLARATION(src_gy),
IMAGE_DECLARATION(vc),
float sensitivity,
float strength_thresh,
float pow4_normalization_factor)
{
Image src_gx = CONVERT_TO_IMAGE_STRUCT(src_gx);
Image src_gy = CONVERT_TO_IMAGE_STRUCT(src_gy);
Image vc = CONVERT_TO_IMAGE_STRUCT(vc);
/* Gx^2, Gy^2 and Gx*Gy */
float16 res = (float16)0.0f;
/* Compute row */
for(int i = -2; i < 3; i++)
{
res += harris_score_1x5(&src_gx, &src_gy, i);
}
float4 gx2 = res.s0123;
float4 gy2 = res.s4567;
float4 gxgy = res.s89AB;
/* Compute trace and determinant */
float4 trace = gx2 + gy2;
float4 det = gx2 * gy2 - (gxgy * gxgy);
/* Compute harris score */
float4 mc = (det - (sensitivity * (trace * trace))) * pow4_normalization_factor;
mc = select(0.0f, mc, mc > (float4)strength_thresh);
vstore4(mc, 0, (__global float *)vc.ptr);
}
/** Function for calculating harris score 1x7.
*
* @param[in] src_gx Pointer to gx gradient image.
* @param[in] src_gy Pointer to gy gradient image.
* @param[in] row Relative row.
*/
inline float16 harris_score_1x7(Image *src_gx, Image *src_gy, int row)
{
float4 gx2 = 0.0f;
float4 gy2 = 0.0f;
float4 gxgy = 0.0f;
/* Row */
VEC_DATA_TYPE(DATA_TYPE, 8)
temp_gx0 = vload8(0, (__global DATA_TYPE *)offset(src_gx, -3, row));
VEC_DATA_TYPE(DATA_TYPE, 8)
temp_gy0 = vload8(0, (__global DATA_TYPE *)offset(src_gy, -3, row));
VEC_DATA_TYPE(DATA_TYPE, 2)
temp_gx1 = vload2(0, (__global DATA_TYPE *)offset(src_gx, 5, row));
VEC_DATA_TYPE(DATA_TYPE, 2)
temp_gy1 = vload2(0, (__global DATA_TYPE *)offset(src_gy, 5, row));
float4 gx = convert_float4(temp_gx0.s0123);
float4 gy = convert_float4(temp_gy0.s0123);
gx2 += (gx * gx);
gy2 += (gy * gy);
gxgy += (gx * gy);
gx = convert_float4(temp_gx0.s1234);
gy = convert_float4(temp_gy0.s1234);
gx2 += (gx * gx);
gy2 += (gy * gy);
gxgy += (gx * gy);
gx = convert_float4(temp_gx0.s2345);
gy = convert_float4(temp_gy0.s2345);
gx2 += (gx * gx);
gy2 += (gy * gy);
gxgy += (gx * gy);
gx = convert_float4(temp_gx0.s3456);
gy = convert_float4(temp_gy0.s3456);
gx2 += (gx * gx);
gy2 += (gy * gy);
gxgy += (gx * gy);
gx = convert_float4(temp_gx0.s4567);
gy = convert_float4(temp_gy0.s4567);
gx2 += (gx * gx);
gy2 += (gy * gy);
gxgy += (gx * gy);
gx = convert_float4((VEC_DATA_TYPE(DATA_TYPE, 4))(temp_gx0.s567, temp_gx1.s0));
gy = convert_float4((VEC_DATA_TYPE(DATA_TYPE, 4))(temp_gy0.s567, temp_gy1.s0));
gx2 += (gx * gx);
gy2 += (gy * gy);
gxgy += (gx * gy);
gx = convert_float4((VEC_DATA_TYPE(DATA_TYPE, 4))(temp_gx0.s67, temp_gx1.s01));
gy = convert_float4((VEC_DATA_TYPE(DATA_TYPE, 4))(temp_gy0.s67, temp_gy1.s01));
gx2 += (gx * gx);
gy2 += (gy * gy);
gxgy += (gx * gy);
return (float16)(gx2, gy2, gxgy, (float4)0);
}
/** Function running harris score on 7x7 block size
*
* @attention: The input data type should be passed using a compile option -DDATA_TYPE. Supported types: short and int.
* e.g. -DDATA_TYPE=short.
*
* @param[in] src_gx_ptr Pointer to the first source image. Supported data types: S16, S32
* @param[in] src_gx_stride_x Stride of the source image in X dimension (in bytes)
* @param[in] src_gx_step_x src_gx_stride_x * number of elements along X processed per workitem(in bytes)
* @param[in] src_gx_stride_y Stride of the source image in Y dimension (in bytes)
* @param[in] src_gx_step_y src_gx_stride_y * number of elements along Y processed per workitem(in bytes)
* @param[in] src_gx_offset_first_element_in_bytes The offset of the first element in the source image
* @param[in] src_gy_ptr Pointer to the second source image. Supported data types: S16, S32
* @param[in] src_gy_stride_x Stride of the destination image in X dimension (in bytes)
* @param[in] src_gy_step_x src_gy_stride_x * number of elements along X processed per workitem(in bytes)
* @param[in] src_gy_stride_y Stride of the destination image in Y dimension (in bytes)
* @param[in] src_gy_step_y src_gy_stride_y * number of elements along Y processed per workitem(in bytes)
* @param[in] src_gy_offset_first_element_in_bytes The offset of the first element in the destination image
* @param[out] vc_ptr Pointer to the destination image. Supported data types: F32
* @param[in] vc_stride_x Stride of the destination image in X dimension (in bytes)
* @param[in] vc_step_x vc_stride_x * number of elements along X processed per workitem(in bytes)
* @param[in] vc_stride_y Stride of the destination image in Y dimension (in bytes)
* @param[in] vc_step_y vc_stride_y * number of elements along Y processed per workitem(in bytes)
* @param[in] vc_offset_first_element_in_bytes The offset of the first element in the destination image
* @param[in] sensitivity Sensitivity threshold k from the Harris-Stephens equation
* @param[in] strength_thresh Minimum threshold with which to eliminate Harris Corner scores
* @param[in] pow4_normalization_factor Normalization factor to apply harris score
*/
__kernel void harris_score_7x7(
IMAGE_DECLARATION(src_gx),
IMAGE_DECLARATION(src_gy),
IMAGE_DECLARATION(vc),
float sensitivity,
float strength_thresh,
float pow4_normalization_factor)
{
Image src_gx = CONVERT_TO_IMAGE_STRUCT(src_gx);
Image src_gy = CONVERT_TO_IMAGE_STRUCT(src_gy);
Image vc = CONVERT_TO_IMAGE_STRUCT(vc);
/* Gx^2, Gy^2 and Gx*Gy */
float16 res = (float16)0.0f;
/* Compute row */
for(int i = -3; i < 4; i++)
{
res += harris_score_1x7(&src_gx, &src_gy, i);
}
float4 gx2 = res.s0123;
float4 gy2 = res.s4567;
float4 gxgy = res.s89AB;
/* Compute trace and determinant */
float4 trace = gx2 + gy2;
float4 det = gx2 * gy2 - (gxgy * gxgy);
/* Compute harris score */
float4 mc = (det - (sensitivity * (trace * trace))) * pow4_normalization_factor;
mc = select(0.0f, mc, mc > (float4)strength_thresh);
vstore4(mc, 0, (__global float *)vc.ptr);
}