Anthony Barbier | 6ff3b19 | 2017-09-04 18:44:23 +0100 | [diff] [blame] | 1 | /* |
| 2 | * Copyright (c) 2016, 2017 ARM Limited. |
| 3 | * |
| 4 | * SPDX-License-Identifier: MIT |
| 5 | * |
| 6 | * Permission is hereby granted, free of charge, to any person obtaining a copy |
| 7 | * of this software and associated documentation files (the "Software"), to |
| 8 | * deal in the Software without restriction, including without limitation the |
| 9 | * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or |
| 10 | * sell copies of the Software, and to permit persons to whom the Software is |
| 11 | * furnished to do so, subject to the following conditions: |
| 12 | * |
| 13 | * The above copyright notice and this permission notice shall be included in all |
| 14 | * copies or substantial portions of the Software. |
| 15 | * |
| 16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
| 17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
| 18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
| 19 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
| 20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
| 21 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
| 22 | * SOFTWARE. |
| 23 | */ |
| 24 | #include "helpers.h" |
| 25 | |
| 26 | /***********************************************/ |
| 27 | /* Begin implementation of Sobel3x3 filter */ |
| 28 | /***********************************************/ |
| 29 | |
| 30 | /** This OpenCL kernel that computes a Sobel3x3 filter. |
| 31 | * |
| 32 | * @attention To enable computation of the X gradient -DGRAD_X must be passed at compile time, while computation of the Y gradient |
| 33 | * is performed when -DGRAD_Y is used. You can use both when computation of both gradients is required. |
| 34 | * |
| 35 | * @param[in] src_ptr Pointer to the source image. Supported data types: U8 |
| 36 | * @param[in] src_stride_x Stride of the source image in X dimension (in bytes) |
| 37 | * @param[in] src_step_x src_stride_x * number of elements along X processed per workitem(in bytes) |
| 38 | * @param[in] src_stride_y Stride of the source image in Y dimension (in bytes) |
| 39 | * @param[in] src_step_y src_stride_y * number of elements along Y processed per workitem(in bytes) |
| 40 | * @param[in] src_offset_first_element_in_bytes The offset of the first element in the source image |
| 41 | * @param[out] dst_gx_ptr Pointer to the destination image. Supported data types: S16 |
| 42 | * @param[in] dst_gx_stride_x Stride of the destination image in X dimension (in bytes) |
| 43 | * @param[in] dst_gx_step_x dst_gx_stride_x * number of elements along X processed per workitem(in bytes) |
| 44 | * @param[in] dst_gx_stride_y Stride of the destination image in Y dimension (in bytes) |
| 45 | * @param[in] dst_gx_step_y dst_gx_stride_y * number of elements along Y processed per workitem(in bytes) |
| 46 | * @param[in] dst_gx_offset_first_element_in_bytes The offset of the first element in the destination image |
| 47 | * @param[out] dst_gy_ptr Pointer to the destination image. Supported data types: S16 |
| 48 | * @param[in] dst_gy_stride_x Stride of the destination image in X dimension (in bytes) |
| 49 | * @param[in] dst_gy_step_x dst_gy_stride_x * number of elements along X processed per workitem(in bytes) |
| 50 | * @param[in] dst_gy_stride_y Stride of the destination image in Y dimension (in bytes) |
| 51 | * @param[in] dst_gy_step_y dst_gy_stride_y * number of elements along Y processed per workitem(in bytes) |
| 52 | * @param[in] dst_gy_offset_first_element_in_bytes The offset of the first element in the destination image |
| 53 | */ |
| 54 | __kernel void sobel3x3( |
| 55 | IMAGE_DECLARATION(src) |
| 56 | #ifdef GRAD_X |
| 57 | , |
| 58 | IMAGE_DECLARATION(dst_gx) |
Anthony Barbier | ac69aa1 | 2017-07-03 17:39:37 +0100 | [diff] [blame] | 59 | #endif /* GRAD_X */ |
Anthony Barbier | 6ff3b19 | 2017-09-04 18:44:23 +0100 | [diff] [blame] | 60 | #ifdef GRAD_Y |
| 61 | , |
| 62 | IMAGE_DECLARATION(dst_gy) |
Anthony Barbier | ac69aa1 | 2017-07-03 17:39:37 +0100 | [diff] [blame] | 63 | #endif /* GRAD_Y */ |
Anthony Barbier | 6ff3b19 | 2017-09-04 18:44:23 +0100 | [diff] [blame] | 64 | ) |
| 65 | { |
| 66 | Image src = CONVERT_TO_IMAGE_STRUCT(src); |
| 67 | #ifdef GRAD_X |
| 68 | Image dst_gx = CONVERT_TO_IMAGE_STRUCT(dst_gx); |
Anthony Barbier | ac69aa1 | 2017-07-03 17:39:37 +0100 | [diff] [blame] | 69 | #endif /* GRAD_X */ |
Anthony Barbier | 6ff3b19 | 2017-09-04 18:44:23 +0100 | [diff] [blame] | 70 | #ifdef GRAD_Y |
| 71 | Image dst_gy = CONVERT_TO_IMAGE_STRUCT(dst_gy); |
Anthony Barbier | ac69aa1 | 2017-07-03 17:39:37 +0100 | [diff] [blame] | 72 | #endif /* GRAD_Y */ |
Anthony Barbier | 6ff3b19 | 2017-09-04 18:44:23 +0100 | [diff] [blame] | 73 | |
| 74 | // Output pixels |
| 75 | #ifdef GRAD_X |
| 76 | short8 gx = (short8)0; |
Anthony Barbier | ac69aa1 | 2017-07-03 17:39:37 +0100 | [diff] [blame] | 77 | #endif /* GRAD_X */ |
Anthony Barbier | 6ff3b19 | 2017-09-04 18:44:23 +0100 | [diff] [blame] | 78 | #ifdef GRAD_Y |
| 79 | short8 gy = (short8)0; |
Anthony Barbier | ac69aa1 | 2017-07-03 17:39:37 +0100 | [diff] [blame] | 80 | #endif /* GRAD_Y */ |
Anthony Barbier | 6ff3b19 | 2017-09-04 18:44:23 +0100 | [diff] [blame] | 81 | |
| 82 | // Row0 |
| 83 | uchar16 temp = vload16(0, offset(&src, -1, -1)); |
| 84 | short8 left = convert_short8(temp.s01234567); |
| 85 | short8 middle = convert_short8(temp.s12345678); |
| 86 | short8 right = convert_short8(temp.s23456789); |
| 87 | #ifdef GRAD_X |
| 88 | gx += left * (short8)(-1); |
| 89 | gx += right * (short8)(+1); |
Anthony Barbier | ac69aa1 | 2017-07-03 17:39:37 +0100 | [diff] [blame] | 90 | #endif /* GRAD_X */ |
Anthony Barbier | 6ff3b19 | 2017-09-04 18:44:23 +0100 | [diff] [blame] | 91 | #ifdef GRAD_Y |
| 92 | gy += left * (short8)(-1); |
| 93 | gy += middle * (short8)(-2); |
| 94 | gy += right * (short8)(-1); |
Anthony Barbier | ac69aa1 | 2017-07-03 17:39:37 +0100 | [diff] [blame] | 95 | #endif /* GRAD_Y */ |
Anthony Barbier | 6ff3b19 | 2017-09-04 18:44:23 +0100 | [diff] [blame] | 96 | |
| 97 | // Row1 |
| 98 | temp = vload16(0, offset(&src, -1, 0)); |
| 99 | left = convert_short8(temp.s01234567); |
| 100 | right = convert_short8(temp.s23456789); |
| 101 | #ifdef GRAD_X |
| 102 | gx += left * (short8)(-2); |
| 103 | gx += right * (short8)(+2); |
Anthony Barbier | ac69aa1 | 2017-07-03 17:39:37 +0100 | [diff] [blame] | 104 | #endif /* GRAD_X */ |
Anthony Barbier | 6ff3b19 | 2017-09-04 18:44:23 +0100 | [diff] [blame] | 105 | |
| 106 | // Row2 |
| 107 | temp = vload16(0, offset(&src, -1, 1)); |
| 108 | left = convert_short8(temp.s01234567); |
| 109 | middle = convert_short8(temp.s12345678); |
| 110 | right = convert_short8(temp.s23456789); |
| 111 | #ifdef GRAD_X |
| 112 | gx += left * (short8)(-1); |
| 113 | gx += right * (short8)(+1); |
Anthony Barbier | ac69aa1 | 2017-07-03 17:39:37 +0100 | [diff] [blame] | 114 | #endif /* GRAD_X */ |
Anthony Barbier | 6ff3b19 | 2017-09-04 18:44:23 +0100 | [diff] [blame] | 115 | #ifdef GRAD_Y |
| 116 | gy += left * (short8)(+1); |
| 117 | gy += middle * (short8)(+2); |
| 118 | gy += right * (short8)(+1); |
Anthony Barbier | ac69aa1 | 2017-07-03 17:39:37 +0100 | [diff] [blame] | 119 | #endif /* GRAD_Y */ |
Anthony Barbier | 6ff3b19 | 2017-09-04 18:44:23 +0100 | [diff] [blame] | 120 | |
| 121 | // Store results |
| 122 | #ifdef GRAD_X |
| 123 | vstore8(gx, 0, ((__global short *)dst_gx.ptr)); |
Anthony Barbier | ac69aa1 | 2017-07-03 17:39:37 +0100 | [diff] [blame] | 124 | #endif /* GRAD_X */ |
Anthony Barbier | 6ff3b19 | 2017-09-04 18:44:23 +0100 | [diff] [blame] | 125 | #ifdef GRAD_Y |
| 126 | vstore8(gy, 0, ((__global short *)dst_gy.ptr)); |
Anthony Barbier | ac69aa1 | 2017-07-03 17:39:37 +0100 | [diff] [blame] | 127 | #endif /* GRAD_Y */ |
Anthony Barbier | 6ff3b19 | 2017-09-04 18:44:23 +0100 | [diff] [blame] | 128 | } |
| 129 | |
| 130 | /**********************************************/ |
| 131 | /* End implementation of Sobel3x3 filter */ |
| 132 | /**********************************************/ |
| 133 | |
| 134 | /***********************************************/ |
| 135 | /* Begin implementation of Sobel5x5 filter */ |
| 136 | /***********************************************/ |
| 137 | |
| 138 | /** Compute a 1D horizontal sobel filter 1x5 for 8 bytes assuming the input is made of 1 channel of 1 byte (i.e 8 pixels). |
| 139 | * |
| 140 | * @param[in] src Pointer to source image. |
| 141 | * @param[in] left1_coeff_gx Weight of the most left pixel for gx |
| 142 | * @param[in] left2_coeff_gx Weight of the left pixel for gx |
| 143 | * @param[in] middle_coeff_gx Weight of the middle pixel for gx |
| 144 | * @param[in] right1_coeff_gx Weight of the right pixel for gx |
| 145 | * @param[in] right2_coeff_gx Weight of the most right pixel for gx |
| 146 | * @param[in] left1_coeff_gy Weight of the most left pixel for gy |
| 147 | * @param[in] left2_coeff_gy Weight of the left pixel for gy |
| 148 | * @param[in] middle_coeff_gy Weight of the middle pixel for gy |
| 149 | * @param[in] right1_coeff_gy Weight of the right pixel for gy |
| 150 | * @param[in] right2_coeff_gy Weight of the most right pixel for gy |
| 151 | * |
| 152 | * @return a short16 containing short8 gx and short8 gy values. |
| 153 | */ |
| 154 | short16 sobel1x5( |
| 155 | Image *src, |
| 156 | const short left1_coeff_gx, |
| 157 | const short left2_coeff_gx, |
| 158 | const short middle_coeff_gx, |
| 159 | const short right1_coeff_gx, |
| 160 | const short right2_coeff_gx, |
| 161 | const short left1_coeff_gy, |
| 162 | const short left2_coeff_gy, |
| 163 | const short middle_coeff_gy, |
| 164 | const short right1_coeff_gy, |
| 165 | const short right2_coeff_gy) |
| 166 | { |
| 167 | uchar16 temp = vload16(0, offset(src, -2, 0)); |
| 168 | short8 gx = 0; |
| 169 | short8 gy = 0; |
| 170 | short8 val; |
| 171 | |
| 172 | val = convert_short8(temp.s01234567); |
| 173 | gx += val * (short8)left1_coeff_gx; |
| 174 | gy += val * (short8)left1_coeff_gy; |
| 175 | |
| 176 | val = convert_short8(temp.s12345678); |
| 177 | gx += val * (short8)left2_coeff_gx; |
| 178 | gy += val * (short8)left2_coeff_gy; |
| 179 | |
| 180 | val = convert_short8(temp.s23456789); |
| 181 | gx += val * (short8)middle_coeff_gx; |
| 182 | gy += val * (short8)middle_coeff_gy; |
| 183 | |
| 184 | val = convert_short8(temp.s3456789a); |
| 185 | gx += val * (short8)right1_coeff_gx; |
| 186 | gy += val * (short8)right1_coeff_gy; |
| 187 | |
| 188 | val = convert_short8(temp.s456789ab); |
| 189 | gx += val * (short8)right2_coeff_gx; |
| 190 | gy += val * (short8)right2_coeff_gy; |
| 191 | |
| 192 | return (short16)(gx, gy); |
| 193 | } |
| 194 | |
| 195 | /** Compute a 1D vertical sobel filter 5x1 for 8 bytes assuming the input is made of 1 channel of 1 byte (i.e 8 pixels). |
| 196 | * |
| 197 | * @param[in] src Pointer to source image. |
| 198 | * @param[in] up1_coeff Weight of the most up pixel |
| 199 | * @param[in] up2_coeff Weight of the up pixel |
| 200 | * @param[in] middle_coeff Weight of the middle pixel |
| 201 | * @param[in] down1_coeff Weight of the down pixel |
| 202 | * @param[in] down2_coeff Weight of the most down pixel |
| 203 | * |
| 204 | * @return a short8 containing 8 convoluted values. |
| 205 | */ |
| 206 | short8 sobel5x1( |
| 207 | Image *src, |
| 208 | const short up1_coeff, |
| 209 | const short up2_coeff, |
| 210 | const short middle_coeff, |
| 211 | const short down1_coeff, |
| 212 | const short down2_coeff) |
| 213 | { |
| 214 | short8 val; |
| 215 | short8 out = (short8)0; |
| 216 | |
| 217 | val = vload8(0, (__global short *)offset(src, 0, -2)); |
| 218 | out += val * (short8)up1_coeff; |
| 219 | |
| 220 | val = vload8(0, (__global short *)offset(src, 0, -1)); |
| 221 | out += val * (short8)up2_coeff; |
| 222 | |
| 223 | val = vload8(0, (__global short *)offset(src, 0, 0)); |
| 224 | out += val * (short8)middle_coeff; |
| 225 | |
| 226 | val = vload8(0, (__global short *)offset(src, 0, 1)); |
| 227 | out += val * (short8)down1_coeff; |
| 228 | |
| 229 | val = vload8(0, (__global short *)offset(src, 0, 2)); |
| 230 | out += val * (short8)down2_coeff; |
| 231 | |
| 232 | return (short8)(out); |
| 233 | } |
| 234 | |
| 235 | /** Apply a 1x5 sobel matrix to a single channel U8 input image and output two temporary channel S16 images. |
| 236 | * |
| 237 | * @attention To enable computation of the X gradient -DGRAD_X must be passed at compile time, while computation of the Y gradient |
| 238 | * is performed when -DGRAD_Y is used. You can use both when computation of both gradients is required. |
| 239 | * |
| 240 | * @param[in] src_ptr Pointer to the source image.. Supported data types: U8 |
| 241 | * @param[in] src_stride_x Stride of the source image in X dimension (in bytes) |
| 242 | * @param[in] src_step_x src_stride_x * number of elements along X processed per workitem(in bytes) |
| 243 | * @param[in] src_stride_y Stride of the source image in Y dimension (in bytes) |
| 244 | * @param[in] src_step_y src_stride_y * number of elements along Y processed per workitem(in bytes) |
| 245 | * @param[in] src_offset_first_element_in_bytes The offset of the first element in the source image |
| 246 | * @param[out] dst_gx_ptr Pointer to the destination image.. Supported data types: S16 |
| 247 | * @param[in] dst_gx_stride_x Stride of the destination image in X dimension (in bytes) |
| 248 | * @param[in] dst_gx_step_x dst_gx_stride_x * number of elements along X processed per workitem(in bytes) |
| 249 | * @param[in] dst_gx_stride_y Stride of the destination image in Y dimension (in bytes) |
| 250 | * @param[in] dst_gx_step_y dst_gx_stride_y * number of elements along Y processed per workitem(in bytes) |
| 251 | * @param[in] dst_gx_offset_first_element_in_bytes The offset of the first element in the destination image |
| 252 | * @param[out] dst_gy_ptr Pointer to the destination image. Supported data types: S16 |
| 253 | * @param[in] dst_gy_stride_x Stride of the destination image in X dimension (in bytes) |
| 254 | * @param[in] dst_gy_step_x dst_gy_stride_x * number of elements along X processed per workitem(in bytes) |
| 255 | * @param[in] dst_gy_stride_y Stride of the destination image in Y dimension (in bytes) |
| 256 | * @param[in] dst_gy_step_y dst_gy_stride_y * number of elements along Y processed per workitem(in bytes) |
| 257 | * @param[in] dst_gy_offset_first_element_in_bytes The offset of the first element in the destination image |
| 258 | */ |
| 259 | __kernel void sobel_separable1x5( |
| 260 | IMAGE_DECLARATION(src) |
| 261 | #ifdef GRAD_X |
| 262 | , |
| 263 | IMAGE_DECLARATION(dst_gx) |
Anthony Barbier | ac69aa1 | 2017-07-03 17:39:37 +0100 | [diff] [blame] | 264 | #endif /* GRAD_X */ |
Anthony Barbier | 6ff3b19 | 2017-09-04 18:44:23 +0100 | [diff] [blame] | 265 | #ifdef GRAD_Y |
| 266 | , |
| 267 | IMAGE_DECLARATION(dst_gy) |
Anthony Barbier | ac69aa1 | 2017-07-03 17:39:37 +0100 | [diff] [blame] | 268 | #endif /* GRAD_Y */ |
Anthony Barbier | 6ff3b19 | 2017-09-04 18:44:23 +0100 | [diff] [blame] | 269 | ) |
| 270 | { |
| 271 | Image src = CONVERT_TO_IMAGE_STRUCT(src); |
| 272 | #ifdef GRAD_X |
| 273 | Image dst_gx = CONVERT_TO_IMAGE_STRUCT(dst_gx); |
Anthony Barbier | ac69aa1 | 2017-07-03 17:39:37 +0100 | [diff] [blame] | 274 | #endif /* GRAD_X */ |
Anthony Barbier | 6ff3b19 | 2017-09-04 18:44:23 +0100 | [diff] [blame] | 275 | #ifdef GRAD_Y |
| 276 | Image dst_gy = CONVERT_TO_IMAGE_STRUCT(dst_gy); |
Anthony Barbier | ac69aa1 | 2017-07-03 17:39:37 +0100 | [diff] [blame] | 277 | #endif /* GRAD_Y */ |
Anthony Barbier | 6ff3b19 | 2017-09-04 18:44:23 +0100 | [diff] [blame] | 278 | |
| 279 | // Output pixels |
| 280 | short16 gx_gy = sobel1x5(&src, |
| 281 | -1, -2, 0, 2, 1, |
| 282 | 1, 4, 6, 4, 1); |
| 283 | |
| 284 | // Store result in dst |
| 285 | #ifdef GRAD_X |
| 286 | vstore8(gx_gy.s01234567, 0, ((__global short *)dst_gx.ptr)); |
Anthony Barbier | ac69aa1 | 2017-07-03 17:39:37 +0100 | [diff] [blame] | 287 | #endif /* GRAD_X */ |
Anthony Barbier | 6ff3b19 | 2017-09-04 18:44:23 +0100 | [diff] [blame] | 288 | #ifdef GRAD_Y |
| 289 | vstore8(gx_gy.s89ABCDEF, 0, ((__global short *)dst_gy.ptr)); |
Anthony Barbier | ac69aa1 | 2017-07-03 17:39:37 +0100 | [diff] [blame] | 290 | #endif /* GRAD_Y */ |
Anthony Barbier | 6ff3b19 | 2017-09-04 18:44:23 +0100 | [diff] [blame] | 291 | } |
| 292 | |
| 293 | /** Apply a 5x1 convolution matrix to two single channel S16 input temporary images |
| 294 | * and output two single channel S16 images. |
| 295 | * |
| 296 | * @attention To enable computation of the X gradient -DGRAD_X must be passed at compile time, while computation of the Y gradient |
| 297 | * is performed when -DGRAD_Y is used. You can use both when computation of both gradients is required. |
| 298 | * |
| 299 | * @param[in] src_x_ptr Pointer to the source image.. Supported data types: S16 |
| 300 | * @param[in] src_x_stride_x Stride of the source image in X dimension (in bytes) |
| 301 | * @param[in] src_x_step_x src_x_stride_x * number of elements along X processed per workitem(in bytes) |
| 302 | * @param[in] src_x_stride_y Stride of the source image in Y dimension (in bytes) |
| 303 | * @param[in] src_x_step_y src_x_stride_y * number of elements along Y processed per workitem(in bytes) |
| 304 | * @param[in] src_x_offset_first_element_in_bytes The offset of the first element in the source image |
| 305 | * @param[out] dst_gx_ptr Pointer to the destination image. Supported data types: S16 |
| 306 | * @param[in] dst_gx_stride_x Stride of the destination image in X dimension (in bytes) |
| 307 | * @param[in] dst_gx_step_x dst_gx_stride_x * number of elements along X processed per workitem(in bytes) |
| 308 | * @param[in] dst_gx_stride_y Stride of the destination image in Y dimension (in bytes) |
| 309 | * @param[in] dst_gx_step_y dst_gx_stride_y * number of elements along Y processed per workitem(in bytes) |
| 310 | * @param[in] dst_gx_offset_first_element_in_bytes The offset of the first element in the destination image |
| 311 | * @param[in] src_y_ptr Pointer to the source image. Supported data types: S16 |
| 312 | * @param[in] src_y_stride_x Stride of the source image in X dimension (in bytes) |
| 313 | * @param[in] src_y_step_x src_y_stride_x * number of elements along X processed per workitem(in bytes) |
| 314 | * @param[in] src_y_stride_y Stride of the source image in Y dimension (in bytes) |
| 315 | * @param[in] src_y_step_y src_y_stride_y * number of elements along Y processed per workitem(in bytes) |
| 316 | * @param[in] src_y_offset_first_element_in_bytes The offset of the first element in the source image |
| 317 | * @param[out] dst_gy_ptr Pointer to the destination image. Supported data types: S16 |
| 318 | * @param[in] dst_gy_stride_x Stride of the destination image in X dimension (in bytes) |
| 319 | * @param[in] dst_gy_step_x dst_gy_stride_x * number of elements along X processed per workitem(in bytes) |
| 320 | * @param[in] dst_gy_stride_y Stride of the destination image in Y dimension (in bytes) |
| 321 | * @param[in] dst_gy_step_y dst_gy_stride_y * number of elements along Y processed per workitem(in bytes) |
| 322 | * @param[in] dst_gy_offset_first_element_in_bytes The offset of the first element in the destination image |
| 323 | * @param[in] dummy Dummy parameter to easy conditional inclusion |
| 324 | */ |
| 325 | __kernel void sobel_separable5x1( |
| 326 | #ifdef GRAD_X |
| 327 | IMAGE_DECLARATION(src_x), |
| 328 | IMAGE_DECLARATION(dst_gx), |
Anthony Barbier | ac69aa1 | 2017-07-03 17:39:37 +0100 | [diff] [blame] | 329 | #endif /* GRAD_X */ |
Anthony Barbier | 6ff3b19 | 2017-09-04 18:44:23 +0100 | [diff] [blame] | 330 | #ifdef GRAD_Y |
| 331 | IMAGE_DECLARATION(src_y), |
| 332 | IMAGE_DECLARATION(dst_gy), |
Anthony Barbier | ac69aa1 | 2017-07-03 17:39:37 +0100 | [diff] [blame] | 333 | #endif /* GRAD_Y */ |
Anthony Barbier | 6ff3b19 | 2017-09-04 18:44:23 +0100 | [diff] [blame] | 334 | int dummy) |
| 335 | { |
| 336 | #ifdef GRAD_X |
| 337 | Image src_x = CONVERT_TO_IMAGE_STRUCT(src_x); |
| 338 | Image dst_gx = CONVERT_TO_IMAGE_STRUCT(dst_gx); |
Anthony Barbier | ac69aa1 | 2017-07-03 17:39:37 +0100 | [diff] [blame] | 339 | #endif /* GRAD_X */ |
Anthony Barbier | 6ff3b19 | 2017-09-04 18:44:23 +0100 | [diff] [blame] | 340 | #ifdef GRAD_Y |
| 341 | Image src_y = CONVERT_TO_IMAGE_STRUCT(src_y); |
| 342 | Image dst_gy = CONVERT_TO_IMAGE_STRUCT(dst_gy); |
Anthony Barbier | ac69aa1 | 2017-07-03 17:39:37 +0100 | [diff] [blame] | 343 | #endif /* GRAD_Y */ |
Anthony Barbier | 6ff3b19 | 2017-09-04 18:44:23 +0100 | [diff] [blame] | 344 | |
| 345 | #ifdef GRAD_X |
| 346 | short8 gx = sobel5x1(&src_x, |
| 347 | 1, 4, 6, 4, 1); |
| 348 | vstore8(gx, 0, ((__global short *)dst_gx.ptr)); |
Anthony Barbier | ac69aa1 | 2017-07-03 17:39:37 +0100 | [diff] [blame] | 349 | #endif /* GRAD_X */ |
Anthony Barbier | 6ff3b19 | 2017-09-04 18:44:23 +0100 | [diff] [blame] | 350 | #ifdef GRAD_Y |
| 351 | short8 gy = sobel5x1(&src_y, |
| 352 | -1, -2, 0, 2, 1); |
| 353 | vstore8(gy, 0, ((__global short *)dst_gy.ptr)); |
Anthony Barbier | ac69aa1 | 2017-07-03 17:39:37 +0100 | [diff] [blame] | 354 | #endif /* GRAD_Y */ |
Anthony Barbier | 6ff3b19 | 2017-09-04 18:44:23 +0100 | [diff] [blame] | 355 | } |
| 356 | |
| 357 | /**********************************************/ |
| 358 | /* End implementation of Sobel5x5 filter */ |
| 359 | /**********************************************/ |
| 360 | |
| 361 | /***********************************************/ |
| 362 | /* Begin implementation of Sobel7x7 filter */ |
| 363 | /***********************************************/ |
| 364 | |
| 365 | /* Sobel 1x7 horizontal X / 7x1 vertical Y coefficients */ |
| 366 | #define X0 -1 |
| 367 | #define X1 -4 |
| 368 | #define X2 -5 |
| 369 | #define X3 0 |
| 370 | #define X4 5 |
| 371 | #define X5 4 |
| 372 | #define X6 1 |
| 373 | |
| 374 | /* Sobel 1x7 vertical X / 7x1 horizontal Y coefficients */ |
| 375 | #define Y0 1 |
| 376 | #define Y1 6 |
| 377 | #define Y2 15 |
| 378 | #define Y3 20 |
| 379 | #define Y4 15 |
| 380 | #define Y5 6 |
| 381 | #define Y6 1 |
| 382 | |
| 383 | /* Calculates single horizontal iteration. */ |
| 384 | #define SOBEL1x1_HOR(src, gx, gy, idx) \ |
| 385 | { \ |
| 386 | int8 val = convert_int8(vload8(0, offset(src, idx - 3, 0))); \ |
| 387 | gx += val * X##idx; \ |
| 388 | gy += val * Y##idx; \ |
| 389 | } |
| 390 | |
| 391 | /* Calculates single vertical iteration. */ |
| 392 | #define SOBEL1x1_VERT(src, g, direction, idx) \ |
| 393 | { \ |
| 394 | int8 val = vload8(0, (__global int *)offset(src, 0, idx - 3)); \ |
| 395 | g += val * (int8)direction##idx; \ |
| 396 | } |
| 397 | |
| 398 | /* Calculates a 1x7 horizontal iteration. */ |
| 399 | #define SOBEL1x7(ptr, gx, gy) \ |
| 400 | SOBEL1x1_HOR(ptr, gx, gy, 0) \ |
| 401 | SOBEL1x1_HOR(ptr, gx, gy, 1) \ |
| 402 | SOBEL1x1_HOR(ptr, gx, gy, 2) \ |
| 403 | SOBEL1x1_HOR(ptr, gx, gy, 3) \ |
| 404 | SOBEL1x1_HOR(ptr, gx, gy, 4) \ |
| 405 | SOBEL1x1_HOR(ptr, gx, gy, 5) \ |
| 406 | SOBEL1x1_HOR(ptr, gx, gy, 6) |
| 407 | |
| 408 | /* Calculates a 7x1 vertical iteration. */ |
| 409 | #define SOBEL7x1(ptr, g, direction) \ |
| 410 | SOBEL1x1_VERT(ptr, g, direction, 0) \ |
| 411 | SOBEL1x1_VERT(ptr, g, direction, 1) \ |
| 412 | SOBEL1x1_VERT(ptr, g, direction, 2) \ |
| 413 | SOBEL1x1_VERT(ptr, g, direction, 3) \ |
| 414 | SOBEL1x1_VERT(ptr, g, direction, 4) \ |
| 415 | SOBEL1x1_VERT(ptr, g, direction, 5) \ |
| 416 | SOBEL1x1_VERT(ptr, g, direction, 6) |
| 417 | |
| 418 | /** Apply a 1x7 sobel matrix to a single channel U8 input image and output two temporary channel S16 images and leave the borders undefined. |
| 419 | * |
| 420 | * @attention To enable computation of the X gradient -DGRAD_X must be passed at compile time, while computation of the Y gradient |
| 421 | * is performed when -DGRAD_Y is used. You can use both when computation of both gradients is required. |
| 422 | * |
| 423 | * @param[in] src_ptr Pointer to the source image. Supported data types: U8 |
| 424 | * @param[in] src_stride_x Stride of the source image in X dimension (in bytes) |
| 425 | * @param[in] src_step_x src_stride_x * number of elements along X processed per workitem(in bytes) |
| 426 | * @param[in] src_stride_y Stride of the source image in Y dimension (in bytes) |
| 427 | * @param[in] src_step_y src_stride_y * number of elements along Y processed per workitem(in bytes) |
| 428 | * @param[in] src_offset_first_element_in_bytes The offset of the first element in the source image |
| 429 | * @param[out] dst_gx_ptr Pointer to the destination image. Supported data types: S32 |
| 430 | * @param[in] dst_gx_stride_x Stride of the destination image in X dimension (in bytes) |
| 431 | * @param[in] dst_gx_step_x dst_gx_stride_x * number of elements along X processed per workitem(in bytes) |
| 432 | * @param[in] dst_gx_stride_y Stride of the destination image in Y dimension (in bytes) |
| 433 | * @param[in] dst_gx_step_y dst_gx_stride_y * number of elements along Y processed per workitem(in bytes) |
| 434 | * @param[in] dst_gx_offset_first_element_in_bytes The offset of the first element in the destination image |
| 435 | * @param[out] dst_gy_ptr Pointer to the destination image. Supported data types: S32 |
| 436 | * @param[in] dst_gy_stride_x Stride of the destination image in X dimension (in bytes) |
| 437 | * @param[in] dst_gy_step_x dst_gy_stride_x * number of elements along X processed per workitem(in bytes) |
| 438 | * @param[in] dst_gy_stride_y Stride of the destination image in Y dimension (in bytes) |
| 439 | * @param[in] dst_gy_step_y dst_gy_stride_y * number of elements along Y processed per workitem(in bytes) |
| 440 | * @param[in] dst_gy_offset_first_element_in_bytes The offset of the first element in the destination image |
| 441 | */ |
| 442 | __kernel void sobel_separable1x7( |
| 443 | IMAGE_DECLARATION(src) |
| 444 | #ifdef GRAD_X |
| 445 | , |
| 446 | IMAGE_DECLARATION(dst_gx) |
Anthony Barbier | ac69aa1 | 2017-07-03 17:39:37 +0100 | [diff] [blame] | 447 | #endif /* GRAD_X */ |
Anthony Barbier | 6ff3b19 | 2017-09-04 18:44:23 +0100 | [diff] [blame] | 448 | #ifdef GRAD_Y |
| 449 | , |
| 450 | IMAGE_DECLARATION(dst_gy) |
Anthony Barbier | ac69aa1 | 2017-07-03 17:39:37 +0100 | [diff] [blame] | 451 | #endif /* GRAD_Y */ |
Anthony Barbier | 6ff3b19 | 2017-09-04 18:44:23 +0100 | [diff] [blame] | 452 | ) |
| 453 | { |
| 454 | Image src = CONVERT_TO_IMAGE_STRUCT(src); |
| 455 | #ifdef GRAD_X |
| 456 | Image dst_gx = CONVERT_TO_IMAGE_STRUCT(dst_gx); |
Anthony Barbier | ac69aa1 | 2017-07-03 17:39:37 +0100 | [diff] [blame] | 457 | #endif /* GRAD_X */ |
Anthony Barbier | 6ff3b19 | 2017-09-04 18:44:23 +0100 | [diff] [blame] | 458 | #ifdef GRAD_Y |
| 459 | Image dst_gy = CONVERT_TO_IMAGE_STRUCT(dst_gy); |
Anthony Barbier | ac69aa1 | 2017-07-03 17:39:37 +0100 | [diff] [blame] | 460 | #endif /* GRAD_Y */ |
Anthony Barbier | 6ff3b19 | 2017-09-04 18:44:23 +0100 | [diff] [blame] | 461 | int8 gx = (int8)0; |
| 462 | int8 gy = (int8)0; |
| 463 | |
| 464 | SOBEL1x7(&src, gx, gy); |
| 465 | |
| 466 | // Store result in dst |
| 467 | #ifdef GRAD_X |
| 468 | vstore8(gx, 0, ((__global int *)dst_gx.ptr)); |
Anthony Barbier | ac69aa1 | 2017-07-03 17:39:37 +0100 | [diff] [blame] | 469 | #endif /* GRAD_X */ |
Anthony Barbier | 6ff3b19 | 2017-09-04 18:44:23 +0100 | [diff] [blame] | 470 | #ifdef GRAD_Y |
| 471 | vstore8(gy, 0, ((__global int *)dst_gy.ptr)); |
Anthony Barbier | ac69aa1 | 2017-07-03 17:39:37 +0100 | [diff] [blame] | 472 | #endif /* GRAD_Y */ |
Anthony Barbier | 6ff3b19 | 2017-09-04 18:44:23 +0100 | [diff] [blame] | 473 | } |
| 474 | |
| 475 | /** Apply a 7x1 convolution matrix to two single channel S16 input temporary images and output two single channel S16 images and leave the borders undefined. |
| 476 | * |
| 477 | * @attention To enable computation of the X gradient -DGRAD_X must be passed at compile time, while computation of the Y gradient |
| 478 | * is performed when -DGRAD_Y is used. You can use both when computation of both gradients is required. |
| 479 | * |
| 480 | * @param[in] src_x_ptr Pointer to the source image. Supported data types: S32 |
| 481 | * @param[in] src_x_stride_x Stride of the source image in X dimension (in bytes) |
| 482 | * @param[in] src_x_step_x src_x_stride_x * number of elements along X processed per workitem(in bytes) |
| 483 | * @param[in] src_x_stride_y Stride of the source image in Y dimension (in bytes) |
| 484 | * @param[in] src_x_step_y src_x_stride_y * number of elements along Y processed per workitem(in bytes) |
| 485 | * @param[in] src_x_offset_first_element_in_bytes The offset of the first element in the source image |
| 486 | * @param[out] dst_gx_ptr Pointer to the destination image. Supported data types: S16 |
| 487 | * @param[in] dst_gx_stride_x Stride of the destination image in X dimension (in bytes) |
| 488 | * @param[in] dst_gx_step_x dst_gx_stride_x * number of elements along X processed per workitem(in bytes) |
| 489 | * @param[in] dst_gx_stride_y Stride of the destination image in Y dimension (in bytes) |
| 490 | * @param[in] dst_gx_step_y dst_gx_stride_y * number of elements along Y processed per workitem(in bytes) |
| 491 | * @param[in] dst_gx_offset_first_element_in_bytes The offset of the first element in the destination image |
| 492 | * @param[in] src_y_ptr Pointer to the source image. Supported data types: S32 |
| 493 | * @param[in] src_y_stride_x Stride of the source image in X dimension (in bytes) |
| 494 | * @param[in] src_y_step_x src_y_stride_x * number of elements along X processed per workitem(in bytes) |
| 495 | * @param[in] src_y_stride_y Stride of the source image in Y dimension (in bytes) |
| 496 | * @param[in] src_y_step_y src_y_stride_y * number of elements along Y processed per workitem(in bytes) |
| 497 | * @param[in] src_y_offset_first_element_in_bytes The offset of the first element in the source image |
| 498 | * @param[out] dst_gy_ptr Pointer to the destination image. Supported data types: S16 |
| 499 | * @param[in] dst_gy_stride_x Stride of the destination image in X dimension (in bytes) |
| 500 | * @param[in] dst_gy_step_x dst_gy_stride_x * number of elements along X processed per workitem(in bytes) |
| 501 | * @param[in] dst_gy_stride_y Stride of the destination image in Y dimension (in bytes) |
| 502 | * @param[in] dst_gy_step_y dst_gy_stride_y * number of elements along Y processed per workitem(in bytes) |
| 503 | * @param[in] dst_gy_offset_first_element_in_bytes The offset of the first element in the destination image |
| 504 | * @param[in] dummy Dummy parameter to easy conditional inclusion |
| 505 | */ |
| 506 | __kernel void sobel_separable7x1( |
| 507 | #ifdef GRAD_X |
| 508 | IMAGE_DECLARATION(src_x), |
| 509 | IMAGE_DECLARATION(dst_gx), |
Anthony Barbier | ac69aa1 | 2017-07-03 17:39:37 +0100 | [diff] [blame] | 510 | #endif /* GRAD_X */ |
Anthony Barbier | 6ff3b19 | 2017-09-04 18:44:23 +0100 | [diff] [blame] | 511 | #ifdef GRAD_Y |
| 512 | IMAGE_DECLARATION(src_y), |
| 513 | IMAGE_DECLARATION(dst_gy), |
Anthony Barbier | ac69aa1 | 2017-07-03 17:39:37 +0100 | [diff] [blame] | 514 | #endif /* GRAD_Y */ |
Anthony Barbier | 6ff3b19 | 2017-09-04 18:44:23 +0100 | [diff] [blame] | 515 | int dummy) |
| 516 | { |
| 517 | #ifdef GRAD_X |
| 518 | Image src_x = CONVERT_TO_IMAGE_STRUCT(src_x); |
| 519 | Image dst_gx = CONVERT_TO_IMAGE_STRUCT(dst_gx); |
Anthony Barbier | ac69aa1 | 2017-07-03 17:39:37 +0100 | [diff] [blame] | 520 | #endif /* GRAD_X */ |
Anthony Barbier | 6ff3b19 | 2017-09-04 18:44:23 +0100 | [diff] [blame] | 521 | #ifdef GRAD_Y |
| 522 | Image src_y = CONVERT_TO_IMAGE_STRUCT(src_y); |
| 523 | Image dst_gy = CONVERT_TO_IMAGE_STRUCT(dst_gy); |
Anthony Barbier | ac69aa1 | 2017-07-03 17:39:37 +0100 | [diff] [blame] | 524 | #endif /* GRAD_Y */ |
Anthony Barbier | 6ff3b19 | 2017-09-04 18:44:23 +0100 | [diff] [blame] | 525 | |
| 526 | // Output pixels |
| 527 | #ifdef GRAD_X |
| 528 | int8 gx = 0; |
| 529 | SOBEL7x1(&src_x, gx, Y); |
| 530 | vstore8(gx, 0, (__global int *)dst_gx.ptr); |
Anthony Barbier | ac69aa1 | 2017-07-03 17:39:37 +0100 | [diff] [blame] | 531 | #endif /* GRAD_X */ |
Anthony Barbier | 6ff3b19 | 2017-09-04 18:44:23 +0100 | [diff] [blame] | 532 | #ifdef GRAD_Y |
| 533 | int8 gy = 0; |
| 534 | SOBEL7x1(&src_y, gy, X); |
| 535 | vstore8(gy, 0, (__global int *)dst_gy.ptr); |
Anthony Barbier | ac69aa1 | 2017-07-03 17:39:37 +0100 | [diff] [blame] | 536 | #endif /* GRAD_Y */ |
Anthony Barbier | 6ff3b19 | 2017-09-04 18:44:23 +0100 | [diff] [blame] | 537 | } |
| 538 | |
| 539 | /**********************************************/ |
| 540 | /* End implementation of Sobel7x7 filter */ |
| 541 | /**********************************************/ |