blob: 7c87284a7226945ee306ced6969f4d2e679acd80 [file] [log] [blame]
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001/*
Michele Di Giorgiod9eaf612020-07-08 11:12:57 +01002 * Copyright (c) 2016, 2017 Arm Limited.
Anthony Barbier6ff3b192017-09-04 18:44:23 +01003 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24#include "helpers.h"
25#include "non_linear_filter_helpers.h"
26
27// Sorting networks below were generated using http://pages.ripco.net/~jgamble/nw.html
28
29/** Sorting network to sort 8 disks of diameter 5 and return their median.
30 *
31 * @param[in] top2 Values of elements two rows above.
32 * @param[in] top Values of elements one row above.
33 * @param[in] middle Values of middle elements.
34 * @param[in] bottom Values of elements one row below.
35 * @param[in] bottom2 Values of elements two rows below.
36 *
37 * @return Median values for 8 elements.
38 */
39inline uchar8 median_disk5x5(uchar16 top2, uchar16 top, uchar16 middle, uchar16 bottom, uchar16 bottom2)
40{
41 uchar8 p0 = top2.s01234567;
42 uchar8 p1 = top2.s12345678;
43 uchar8 p2 = top2.s23456789;
44 uchar8 p3 = top.s01234567;
45 uchar8 p4 = top.s12345678;
46 uchar8 p5 = top.s23456789;
47 uchar8 p6 = top.s3456789A;
48 uchar8 p7 = top.s456789AB;
49 uchar8 p8 = middle.s01234567;
50 uchar8 p9 = middle.s12345678;
51 uchar8 p10 = middle.s23456789;
52 uchar8 p11 = middle.s3456789A;
53 uchar8 p12 = middle.s456789AB;
54 uchar8 p13 = bottom.s01234567;
55 uchar8 p14 = bottom.s12345678;
56 uchar8 p15 = bottom.s23456789;
57 uchar8 p16 = bottom.s3456789A;
58 uchar8 p17 = bottom.s456789AB;
59 uchar8 p18 = bottom2.s01234567;
60 uchar8 p19 = bottom2.s12345678;
61 uchar8 p20 = bottom2.s23456789;
62
63 SORT(p0, p1);
64 SORT(p2, p3);
65 SORT(p4, p5);
66 SORT(p6, p7);
67 SORT(p8, p9);
68 SORT(p10, p11);
69 SORT(p12, p13);
70 SORT(p14, p15);
71 SORT(p16, p17);
72 SORT(p18, p19);
73 SORT(p0, p2);
74 SORT(p1, p3);
75 SORT(p4, p6);
76 SORT(p5, p7);
77 SORT(p8, p10);
78 SORT(p9, p11);
79 SORT(p12, p14);
80 SORT(p13, p15);
81 SORT(p16, p18);
82 SORT(p17, p19);
83 SORT(p1, p2);
84 SORT(p5, p6);
85 SORT(p0, p4);
86 SORT(p3, p7);
87 SORT(p9, p10);
88 SORT(p13, p14);
89 SORT(p8, p12);
90 SORT(p11, p15);
91 SORT(p17, p18);
92 SORT(p16, p20);
93 SORT(p1, p5);
94 SORT(p2, p6);
95 SORT(p9, p13);
96 SORT(p10, p14);
97 SORT(p0, p8);
98 SORT(p7, p15);
99 SORT(p17, p20);
100 SORT(p1, p4);
101 SORT(p3, p6);
102 SORT(p9, p12);
103 SORT(p11, p14);
104 SORT(p18, p20);
105 SORT(p0, p16);
106 SORT(p2, p4);
107 SORT(p3, p5);
108 SORT(p10, p12);
109 SORT(p11, p13);
110 SORT(p1, p9);
111 SORT(p6, p14);
112 SORT(p19, p20);
113 SORT(p3, p4);
114 SORT(p11, p12);
115 SORT(p1, p8);
116 SORT(p2, p10);
117 SORT(p5, p13);
118 SORT(p7, p14);
119 SORT(p3, p11);
120 SORT(p2, p8);
121 SORT(p4, p12);
122 SORT(p7, p13);
123 SORT(p1, p17);
124 SORT(p3, p10);
125 SORT(p5, p12);
126 SORT(p1, p16);
127 SORT(p2, p18);
128 SORT(p3, p9);
129 SORT(p6, p12);
130 SORT(p2, p16);
131 SORT(p3, p8);
132 SORT(p7, p12);
133 SORT(p5, p9);
134 SORT(p6, p10);
135 SORT(p4, p8);
136 SORT(p7, p11);
137 SORT(p3, p19);
138 SORT(p5, p8);
139 SORT(p7, p10);
140 SORT(p3, p18);
141 SORT(p4, p20);
142 SORT(p6, p8);
143 SORT(p7, p9);
144 SORT(p3, p17);
145 SORT(p5, p20);
146 SORT(p7, p8);
147 SORT(p3, p16);
148 SORT(p6, p20);
149 SORT(p5, p17);
150 SORT(p7, p20);
151 SORT(p4, p16);
152 SORT(p6, p18);
153 SORT(p5, p16);
154 SORT(p7, p19);
155 SORT(p7, p18);
156 SORT(p6, p16);
157 SORT(p7, p17);
158 SORT(p10, p18);
159 SORT(p7, p16);
160 SORT(p9, p17);
161 SORT(p8, p16);
162 SORT(p9, p16);
163 SORT(p10, p16);
164
165 return p10;
166}
167
168/** Sorting network to sort 8 boxes of size 5 and return their median.
169 *
170 * @param[in] top2 Values of elements two rows above.
171 * @param[in] top Values of elements one row above.
172 * @param[in] middle Values of middle elements.
173 * @param[in] bottom Values of elements one row below.
174 * @param[in] bottom2 Values of elements two rows below.
175 *
176 * @return Median values for 8 elements.
177 */
178inline uchar8 median_box5x5(uchar16 top2, uchar16 top, uchar16 middle, uchar16 bottom, uchar16 bottom2)
179{
180 uchar8 p0 = top2.s01234567;
181 uchar8 p1 = top2.s12345678;
182 uchar8 p2 = top2.s23456789;
183 uchar8 p3 = top2.s3456789A;
184 uchar8 p4 = top2.s456789AB;
185 uchar8 p5 = top.s01234567;
186 uchar8 p6 = top.s12345678;
187 uchar8 p7 = top.s23456789;
188 uchar8 p8 = top.s3456789A;
189 uchar8 p9 = top.s456789AB;
190 uchar8 p10 = middle.s01234567;
191 uchar8 p11 = middle.s12345678;
192 uchar8 p12 = middle.s23456789;
193 uchar8 p13 = middle.s3456789A;
194 uchar8 p14 = middle.s456789AB;
195 uchar8 p15 = bottom.s01234567;
196 uchar8 p16 = bottom.s12345678;
197 uchar8 p17 = bottom.s23456789;
198 uchar8 p18 = bottom.s3456789A;
199 uchar8 p19 = bottom.s456789AB;
200 uchar8 p20 = bottom2.s01234567;
201 uchar8 p21 = bottom2.s12345678;
202 uchar8 p22 = bottom2.s23456789;
203 uchar8 p23 = bottom2.s3456789A;
204 uchar8 p24 = bottom2.s456789AB;
205
206 SORT(p1, p2);
207 SORT(p0, p1);
208 SORT(p1, p2);
209 SORT(p4, p5);
210 SORT(p3, p4);
211 SORT(p4, p5);
212 SORT(p0, p3);
213 SORT(p2, p5);
214 SORT(p2, p3);
215 SORT(p1, p4);
216 SORT(p1, p2);
217 SORT(p3, p4);
218 SORT(p7, p8);
219 SORT(p6, p7);
220 SORT(p7, p8);
221 SORT(p10, p11);
222 SORT(p9, p10);
223 SORT(p10, p11);
224 SORT(p6, p9);
225 SORT(p8, p11);
226 SORT(p8, p9);
227 SORT(p7, p10);
228 SORT(p7, p8);
229 SORT(p9, p10);
230 SORT(p0, p6);
231 SORT(p4, p10);
232 SORT(p4, p6);
233 SORT(p2, p8);
234 SORT(p2, p4);
235 SORT(p6, p8);
236 SORT(p1, p7);
237 SORT(p5, p11);
238 SORT(p5, p7);
239 SORT(p3, p9);
240 SORT(p3, p5);
241 SORT(p7, p9);
242 SORT(p1, p2);
243 SORT(p3, p4);
244 SORT(p5, p6);
245 SORT(p7, p8);
246 SORT(p9, p10);
247 SORT(p13, p14);
248 SORT(p12, p13);
249 SORT(p13, p14);
250 SORT(p16, p17);
251 SORT(p15, p16);
252 SORT(p16, p17);
253 SORT(p12, p15);
254 SORT(p14, p17);
255 SORT(p14, p15);
256 SORT(p13, p16);
257 SORT(p13, p14);
258 SORT(p15, p16);
259 SORT(p19, p20);
260 SORT(p18, p19);
261 SORT(p19, p20);
262 SORT(p21, p22);
263 SORT(p23, p24);
264 SORT(p21, p23);
265 SORT(p22, p24);
266 SORT(p22, p23);
267 SORT(p18, p21);
268 SORT(p20, p23);
269 SORT(p20, p21);
270 SORT(p19, p22);
271 SORT(p22, p24);
272 SORT(p19, p20);
273 SORT(p21, p22);
274 SORT(p23, p24);
275 SORT(p12, p18);
276 SORT(p16, p22);
277 SORT(p16, p18);
278 SORT(p14, p20);
279 SORT(p20, p24);
280 SORT(p14, p16);
281 SORT(p18, p20);
282 SORT(p22, p24);
283 SORT(p13, p19);
284 SORT(p17, p23);
285 SORT(p17, p19);
286 SORT(p15, p21);
287 SORT(p15, p17);
288 SORT(p19, p21);
289 SORT(p13, p14);
290 SORT(p15, p16);
291 SORT(p17, p18);
292 SORT(p19, p20);
293 SORT(p21, p22);
294 SORT(p23, p24);
295 SORT(p0, p12);
296 SORT(p8, p20);
297 SORT(p8, p12);
298 SORT(p4, p16);
299 SORT(p16, p24);
300 SORT(p12, p16);
301 SORT(p2, p14);
302 SORT(p10, p22);
303 SORT(p10, p14);
304 SORT(p6, p18);
305 SORT(p6, p10);
306 SORT(p10, p12);
307 SORT(p1, p13);
308 SORT(p9, p21);
309 SORT(p9, p13);
310 SORT(p5, p17);
311 SORT(p13, p17);
312 SORT(p3, p15);
313 SORT(p11, p23);
314 SORT(p11, p15);
315 SORT(p7, p19);
316 SORT(p7, p11);
317 SORT(p11, p13);
318 SORT(p11, p12);
319 return p12;
320}
321
322/** This function applies a non linear filter on a 5x5 box basis on an input image.
323 *
324 * @note The needed filter operation is defined through the preprocessor by passing either -DMIN, -DMAX or -DMEDIAN.
325 *
326 * @param[in] src_ptr Pointer to the source image. Supported data types: U8
327 * @param[in] src_stride_x Stride of the source image in X dimension (in bytes)
328 * @param[in] src_step_x src_stride_x * number of elements along X processed per workitem(in bytes)
329 * @param[in] src_stride_y Stride of the source image in Y dimension (in bytes)
330 * @param[in] src_step_y src_stride_y * number of elements along Y processed per workitem(in bytes)
331 * @param[in] src_offset_first_element_in_bytes The offset of the first element in the source image
332 * @param[out] dst_ptr Pointer to the destination image. Supported data types: U8
333 * @param[in] dst_stride_x Stride of the destination image in X dimension (in bytes)
334 * @param[in] dst_step_x dst_stride_x * number of elements along X processed per workitem(in bytes)
335 * @param[in] dst_stride_y Stride of the destination image in Y dimension (in bytes)
336 * @param[in] dst_step_y dst_stride_y * number of elements along Y processed per workitem(in bytes)
337 * @param[in] dst_offset_first_element_in_bytes The offset of the first element in the destination image
338 */
339__kernel void non_linear_filter_box5x5(
340 IMAGE_DECLARATION(src),
341 IMAGE_DECLARATION(dst))
342{
343 Image src = CONVERT_TO_IMAGE_STRUCT(src);
344 Image dst = CONVERT_TO_IMAGE_STRUCT(dst);
345
346 // Load values
347 uchar16 top2 = vload16(0, offset(&src, -2, -2));
348 uchar16 top = vload16(0, offset(&src, -2, -1));
349 uchar16 middle = vload16(0, offset(&src, -2, 0));
350 uchar16 bottom = vload16(0, offset(&src, -2, 1));
351 uchar16 bottom2 = vload16(0, offset(&src, -2, 2));
352
353 // Apply respective filter
Anthony Barbierac69aa12017-07-03 17:39:37 +0100354#ifdef MIN
355 uchar16 tmp = min(middle, min(min(top2, top), min(bottom, bottom2)));
356 uchar8 out = row_reduce_min_5(tmp);
357#elif defined(MAX)
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100358 uchar16 tmp = max(middle, max(max(top2, top), max(bottom, bottom2)));
359 uchar8 out = row_reduce_max_5(tmp);
Anthony Barbierac69aa12017-07-03 17:39:37 +0100360#elif defined(MEDIAN)
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100361 uchar8 out = median_box5x5(top2, top, middle, bottom, bottom2);
Anthony Barbierac69aa12017-07-03 17:39:37 +0100362#else /* MIN or MAX or MEDIAN */
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100363#error "Unsupported filter function"
Anthony Barbierac69aa12017-07-03 17:39:37 +0100364#endif /* MIN or MAX or MEDIAN */
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100365
366 // Store result
367 vstore8(out, 0, dst.ptr);
368}
369
370/** This function applies a non linear filter on a 5x5 cross basis on an input image.
371 *
372 * @note The needed filter operation is defined through the preprocessor by passing either -DMIN, -DMAX or -DMEDIAN.
373 *
374 * @param[in] src_ptr Pointer to the source image. Supported data types: U8
375 * @param[in] src_stride_x Stride of the source image in X dimension (in bytes)
376 * @param[in] src_step_x src_stride_x * number of elements along X processed per workitem(in bytes)
377 * @param[in] src_stride_y Stride of the source image in Y dimension (in bytes)
378 * @param[in] src_step_y src_stride_y * number of elements along Y processed per workitem(in bytes)
379 * @param[in] src_offset_first_element_in_bytes The offset of the first element in the source image
380 * @param[out] dst_ptr Pointer to the destination image. Supported data types: U8
381 * @param[in] dst_stride_x Stride of the destination image in X dimension (in bytes)
382 * @param[in] dst_step_x dst_stride_x * number of elements along X processed per workitem(in bytes)
383 * @param[in] dst_stride_y Stride of the destination image in Y dimension (in bytes)
384 * @param[in] dst_step_y dst_stride_y * number of elements along Y processed per workitem(in bytes)
385 * @param[in] dst_offset_first_element_in_bytes The offset of the first element in the destination image
386 */
387__kernel void non_linear_filter_cross5x5(
388 IMAGE_DECLARATION(src),
389 IMAGE_DECLARATION(dst))
390{
391 Image src = CONVERT_TO_IMAGE_STRUCT(src);
392 Image dst = CONVERT_TO_IMAGE_STRUCT(dst);
393
394 // Load values
Georgios Pinitas52f8b392017-09-18 12:52:28 +0100395 uchar8 top2 = vload8(0, offset(&src, 0, -2));
396 uchar8 top = vload8(0, offset(&src, 0, -1));
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100397 uchar16 middle = vload16(0, offset(&src, -2, 0));
Georgios Pinitas52f8b392017-09-18 12:52:28 +0100398 uchar8 bottom = vload8(0, offset(&src, 0, 1));
399 uchar8 bottom2 = vload8(0, offset(&src, 0, 2));
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100400
401 // Apply respective filter
Anthony Barbierac69aa12017-07-03 17:39:37 +0100402#ifdef MIN
403 uchar8 tmp_middle = row_reduce_min_5(middle);
Georgios Pinitas52f8b392017-09-18 12:52:28 +0100404 uchar8 out = min(tmp_middle, min(min(top2, top), min(bottom, bottom2)));
Anthony Barbierac69aa12017-07-03 17:39:37 +0100405#elif defined(MAX)
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100406 uchar8 tmp_middle = row_reduce_max_5(middle);
Georgios Pinitas52f8b392017-09-18 12:52:28 +0100407 uchar8 out = max(tmp_middle, max(max(top2, top.s01234567), max(bottom, bottom2)));
Anthony Barbierac69aa12017-07-03 17:39:37 +0100408#elif defined(MEDIAN)
Georgios Pinitas52f8b392017-09-18 12:52:28 +0100409 uchar8 p0 = top2;
410 uchar8 p1 = top;
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100411 uchar8 p2 = middle.s01234567;
412 uchar8 p3 = middle.s12345678;
413 uchar8 p4 = middle.s23456789;
414 uchar8 p5 = middle.s3456789A;
415 uchar8 p6 = middle.s456789AB;
Georgios Pinitas52f8b392017-09-18 12:52:28 +0100416 uchar8 p7 = bottom;
417 uchar8 p8 = bottom2;
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100418 uchar8 out = sort9(p0, p1, p2, p3, p4, p5, p6, p7, p8);
Anthony Barbierac69aa12017-07-03 17:39:37 +0100419#else /* MIN or MAX or MEDIAN */
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100420#error "Unsupported filter function"
Anthony Barbierac69aa12017-07-03 17:39:37 +0100421#endif /* MIN or MAX or MEDIAN */
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100422
423 // Store result
424 vstore8(out, 0, dst.ptr);
425}
426
427/** This function applies a non linear filter on a 5x5 disk basis on an input image.
428 *
429 * @note The needed filter operation is defined through the preprocessor by passing either -DMIN, -DMAX or -DMEDIAN.
430 *
431 * @param[in] src_ptr Pointer to the source image. Supported data types: U8
432 * @param[in] src_stride_x Stride of the source image in X dimension (in bytes)
433 * @param[in] src_step_x src_stride_x * number of elements along X processed per workitem(in bytes)
434 * @param[in] src_stride_y Stride of the source image in Y dimension (in bytes)
435 * @param[in] src_step_y src_stride_y * number of elements along Y processed per workitem(in bytes)
436 * @param[in] src_offset_first_element_in_bytes The offset of the first element in the source image
437 * @param[out] dst_ptr Pointer to the destination image. Supported data types: U8
438 * @param[in] dst_stride_x Stride of the destination image in X dimension (in bytes)
439 * @param[in] dst_step_x dst_stride_x * number of elements along X processed per workitem(in bytes)
440 * @param[in] dst_stride_y Stride of the destination image in Y dimension (in bytes)
441 * @param[in] dst_step_y dst_stride_y * number of elements along Y processed per workitem(in bytes)
442 * @param[in] dst_offset_first_element_in_bytes The offset of the first element in the destination image
443 */
444__kernel void non_linear_filter_disk5x5(
445 IMAGE_DECLARATION(src),
446 IMAGE_DECLARATION(dst))
447{
448 Image src = CONVERT_TO_IMAGE_STRUCT(src);
449 Image dst = CONVERT_TO_IMAGE_STRUCT(dst);
450
451 // Load values
Georgios Pinitas52f8b392017-09-18 12:52:28 +0100452 uchar16 top2 = vload16(0, offset(&src, -2, -2));
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100453 uchar16 top = vload16(0, offset(&src, -2, -1));
454 uchar16 middle = vload16(0, offset(&src, -2, 0));
455 uchar16 bottom = vload16(0, offset(&src, -2, 1));
Georgios Pinitas52f8b392017-09-18 12:52:28 +0100456 uchar16 bottom2 = vload16(0, offset(&src, -2, 2));
457
458 // Shift top2 and bottom2 values
459 top2 = top2.s123456789ABCDEFF;
460 bottom2 = bottom2.s123456789ABCDEFF;
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100461
462 // Apply respective filter
Anthony Barbierac69aa12017-07-03 17:39:37 +0100463#ifdef MIN
464 uchar16 tmp_3 = min(top2, bottom2);
465 uchar16 tmp_5 = min(middle, min(top, bottom));
466 uchar8 tmp_3_red = row_reduce_min_3(tmp_3);
467 uchar8 tmp_5_red = row_reduce_min_5(tmp_5);
468 uchar8 out = min(tmp_3_red, tmp_5_red);
469#elif defined(MAX)
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100470 uchar16 tmp_3 = max(top2, bottom2);
471 uchar16 tmp_5 = max(middle, max(top, bottom));
472 uchar8 tmp_3_red = row_reduce_max_3(tmp_3);
473 uchar8 tmp_5_red = row_reduce_max_5(tmp_5);
474 uchar8 out = max(tmp_3_red, tmp_5_red);
Anthony Barbierac69aa12017-07-03 17:39:37 +0100475#elif defined(MEDIAN)
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100476 uchar8 out = median_disk5x5(top2, top, middle, bottom, bottom2);
Anthony Barbierac69aa12017-07-03 17:39:37 +0100477#else /* MIN or MAX or MEDIAN */
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100478#error "Unsupported filter function"
Anthony Barbierac69aa12017-07-03 17:39:37 +0100479#endif /* MIN or MAX or MEDIAN */
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100480
481 // Store result
482 vstore8(out, 0, dst.ptr);
483}