blob: 08b27e319ebacc43c12f0538ce1317f77aa4e015 [file] [log] [blame]
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001/*
Michalis Spyroua4f378d2019-04-26 14:54:54 +01002 * Copyright (c) 2016-2019 ARM Limited.
Anthony Barbier6ff3b192017-09-04 18:44:23 +01003 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24#include "arm_compute/core/NEON/kernels/NEMinMaxLocationKernel.h"
25
26#include "arm_compute/core/Coordinates.h"
27#include "arm_compute/core/Error.h"
28#include "arm_compute/core/Helpers.h"
29#include "arm_compute/core/IAccessWindow.h"
30#include "arm_compute/core/ITensor.h"
31#include "arm_compute/core/TensorInfo.h"
32#include "arm_compute/core/Types.h"
33#include "arm_compute/core/Validate.h"
steniu014c2938e2017-06-19 15:44:45 +010034#include "arm_compute/core/Window.h"
Georgios Pinitasd8734b52017-12-22 15:27:52 +000035#include "arm_compute/core/utils/misc/Utility.h"
Anthony Barbier6ff3b192017-09-04 18:44:23 +010036
steniu014c2938e2017-06-19 15:44:45 +010037#include <algorithm>
Anthony Barbier6ff3b192017-09-04 18:44:23 +010038#include <arm_neon.h>
39#include <climits>
40#include <cstddef>
41
42namespace arm_compute
43{
44NEMinMaxKernel::NEMinMaxKernel()
Michele Di Giorgioef4b4ae2017-07-04 17:19:43 +010045 : _func(), _input(nullptr), _min(), _max(), _mtx()
Anthony Barbier6ff3b192017-09-04 18:44:23 +010046{
47}
48
Michele Di Giorgioef4b4ae2017-07-04 17:19:43 +010049void NEMinMaxKernel::configure(const IImage *input, void *min, void *max)
Anthony Barbier6ff3b192017-09-04 18:44:23 +010050{
51 ARM_COMPUTE_ERROR_ON_TENSOR_NOT_2D(input);
Michele Di Giorgioef4b4ae2017-07-04 17:19:43 +010052 ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8, DataType::S16, DataType::F32);
Anthony Barbier6ff3b192017-09-04 18:44:23 +010053 ARM_COMPUTE_ERROR_ON(nullptr == min);
54 ARM_COMPUTE_ERROR_ON(nullptr == max);
55
56 _input = input;
57 _min = min;
58 _max = max;
59
Michele Di Giorgioef4b4ae2017-07-04 17:19:43 +010060 switch(_input->info()->data_type())
Anthony Barbier6ff3b192017-09-04 18:44:23 +010061 {
Michele Di Giorgioef4b4ae2017-07-04 17:19:43 +010062 case DataType::U8:
63 _func = &NEMinMaxKernel::minmax_U8;
Anthony Barbier6ff3b192017-09-04 18:44:23 +010064 break;
Michele Di Giorgioef4b4ae2017-07-04 17:19:43 +010065 case DataType::S16:
66 _func = &NEMinMaxKernel::minmax_S16;
67 break;
68 case DataType::F32:
69 _func = &NEMinMaxKernel::minmax_F32;
Anthony Barbier6ff3b192017-09-04 18:44:23 +010070 break;
71 default:
Michele Di Giorgioef4b4ae2017-07-04 17:19:43 +010072 ARM_COMPUTE_ERROR("Unsupported data type");
Anthony Barbier6ff3b192017-09-04 18:44:23 +010073 break;
74 }
75
Anthony Barbier6ff3b192017-09-04 18:44:23 +010076 // Configure kernel window
steniu014c2938e2017-06-19 15:44:45 +010077 constexpr unsigned int num_elems_processed_per_iteration = 1;
Anthony Barbier6ff3b192017-09-04 18:44:23 +010078
steniu014c2938e2017-06-19 15:44:45 +010079 Window win = calculate_max_window(*input->info(), Steps(num_elems_processed_per_iteration));
Anthony Barbier6ff3b192017-09-04 18:44:23 +010080
81 INEKernel::configure(win);
82}
83
Moritz Pflanzerc186b572017-09-07 09:48:04 +010084void NEMinMaxKernel::run(const Window &window, const ThreadInfo &info)
Anthony Barbier6ff3b192017-09-04 18:44:23 +010085{
Moritz Pflanzerc186b572017-09-07 09:48:04 +010086 ARM_COMPUTE_UNUSED(info);
Anthony Barbier6ff3b192017-09-04 18:44:23 +010087 ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
88 ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window);
89 ARM_COMPUTE_ERROR_ON(_func == nullptr);
90
91 (this->*_func)(window);
92}
93
94void NEMinMaxKernel::reset()
95{
96 ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
Michele Di Giorgioef4b4ae2017-07-04 17:19:43 +010097 switch(_input->info()->data_type())
98 {
99 case DataType::U8:
100 *static_cast<int32_t *>(_min) = UCHAR_MAX;
101 *static_cast<int32_t *>(_max) = 0;
102 break;
103 case DataType::S16:
104 *static_cast<int32_t *>(_min) = SHRT_MAX;
105 *static_cast<int32_t *>(_max) = SHRT_MIN;
106 break;
107 case DataType::F32:
108 *static_cast<float *>(_min) = std::numeric_limits<float>::max();
109 *static_cast<float *>(_max) = std::numeric_limits<float>::lowest();
110 break;
111 default:
112 ARM_COMPUTE_ERROR("Unsupported data type");
113 break;
114 }
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100115}
116
117template <typename T>
118void NEMinMaxKernel::update_min_max(const T min, const T max)
119{
Michalis Spyrou07781ac2017-08-31 15:11:41 +0100120 std::lock_guard<arm_compute::Mutex> lock(_mtx);
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100121
Michele Di Giorgioef4b4ae2017-07-04 17:19:43 +0100122 using type = typename std::conditional<std::is_same<T, float>::value, float, int32_t>::type;
123
124 auto min_ptr = static_cast<type *>(_min);
125 auto max_ptr = static_cast<type *>(_max);
126
127 if(min < *min_ptr)
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100128 {
Michele Di Giorgioef4b4ae2017-07-04 17:19:43 +0100129 *min_ptr = min;
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100130 }
131
Michele Di Giorgioef4b4ae2017-07-04 17:19:43 +0100132 if(max > *max_ptr)
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100133 {
Michele Di Giorgioef4b4ae2017-07-04 17:19:43 +0100134 *max_ptr = max;
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100135 }
136}
137
steniu014c2938e2017-06-19 15:44:45 +0100138void NEMinMaxKernel::minmax_U8(Window win)
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100139{
140 uint8x8_t carry_min = vdup_n_u8(UCHAR_MAX);
141 uint8x8_t carry_max = vdup_n_u8(0);
142
steniu014c2938e2017-06-19 15:44:45 +0100143 uint8_t carry_max_scalar = 0;
144 uint8_t carry_min_scalar = UCHAR_MAX;
145
146 const int x_start = win.x().start();
147 const int x_end = win.x().end();
148
149 // Handle X dimension manually to split into two loops
150 // First one will use vector operations, second one processes the left over pixels
151 win.set(Window::DimX, Window::Dimension(0, 1, 1));
152
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100153 Iterator input(_input, win);
154
Michalis Spyroua4f378d2019-04-26 14:54:54 +0100155 execute_window_loop(win, [&](const Coordinates &)
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100156 {
steniu014c2938e2017-06-19 15:44:45 +0100157 int x = x_start;
158
159 // Vector loop
160 for(; x <= x_end - 16; x += 16)
161 {
162 const uint8x16_t pixels = vld1q_u8(input.ptr() + x);
163 const uint8x8_t tmp_min = vmin_u8(vget_high_u8(pixels), vget_low_u8(pixels));
164 const uint8x8_t tmp_max = vmax_u8(vget_high_u8(pixels), vget_low_u8(pixels));
165 carry_min = vmin_u8(tmp_min, carry_min);
166 carry_max = vmax_u8(tmp_max, carry_max);
167 }
168
169 // Process leftover pixels
170 for(; x < x_end; ++x)
171 {
172 const uint8_t pixel = input.ptr()[x];
173 carry_min_scalar = std::min(pixel, carry_min_scalar);
174 carry_max_scalar = std::max(pixel, carry_max_scalar);
175 }
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100176 },
177 input);
178
179 // Reduce result
180 carry_min = vpmin_u8(carry_min, carry_min);
181 carry_max = vpmax_u8(carry_max, carry_max);
182 carry_min = vpmin_u8(carry_min, carry_min);
183 carry_max = vpmax_u8(carry_max, carry_max);
184 carry_min = vpmin_u8(carry_min, carry_min);
185 carry_max = vpmax_u8(carry_max, carry_max);
186
187 // Extract max/min values
steniu014c2938e2017-06-19 15:44:45 +0100188 const uint8_t min_i = std::min(vget_lane_u8(carry_min, 0), carry_min_scalar);
189 const uint8_t max_i = std::max(vget_lane_u8(carry_max, 0), carry_max_scalar);
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100190
191 // Perform reduction of local min/max values
192 update_min_max(min_i, max_i);
193}
194
steniu014c2938e2017-06-19 15:44:45 +0100195void NEMinMaxKernel::minmax_S16(Window win)
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100196{
197 int16x4_t carry_min = vdup_n_s16(SHRT_MAX);
198 int16x4_t carry_max = vdup_n_s16(SHRT_MIN);
199
steniu014c2938e2017-06-19 15:44:45 +0100200 int16_t carry_max_scalar = SHRT_MIN;
201 int16_t carry_min_scalar = SHRT_MAX;
202
203 const int x_start = win.x().start();
204 const int x_end = win.x().end();
205
206 // Handle X dimension manually to split into two loops
207 // First one will use vector operations, second one processes the left over pixels
208 win.set(Window::DimX, Window::Dimension(0, 1, 1));
209
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100210 Iterator input(_input, win);
211
Michalis Spyroua4f378d2019-04-26 14:54:54 +0100212 execute_window_loop(win, [&](const Coordinates &)
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100213 {
steniu014c2938e2017-06-19 15:44:45 +0100214 int x = x_start;
Kohei Takahashicedb78f2018-08-23 10:23:52 +0900215 const auto in_ptr = reinterpret_cast<const int16_t *>(input.ptr());
steniu014c2938e2017-06-19 15:44:45 +0100216
217 // Vector loop
218 for(; x <= x_end - 16; x += 16)
219 {
220 const int16x8x2_t pixels = vld2q_s16(in_ptr + x);
221 const int16x8_t tmp_min1 = vminq_s16(pixels.val[0], pixels.val[1]);
222 const int16x8_t tmp_max1 = vmaxq_s16(pixels.val[0], pixels.val[1]);
223 const int16x4_t tmp_min2 = vmin_s16(vget_high_s16(tmp_min1), vget_low_s16(tmp_min1));
224 const int16x4_t tmp_max2 = vmax_s16(vget_high_s16(tmp_max1), vget_low_s16(tmp_max1));
225 carry_min = vmin_s16(tmp_min2, carry_min);
226 carry_max = vmax_s16(tmp_max2, carry_max);
227 }
228
229 // Process leftover pixels
230 for(; x < x_end; ++x)
231 {
232 const int16_t pixel = in_ptr[x];
233 carry_min_scalar = std::min(pixel, carry_min_scalar);
234 carry_max_scalar = std::max(pixel, carry_max_scalar);
235 }
236
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100237 },
238 input);
239
240 // Reduce result
241 carry_min = vpmin_s16(carry_min, carry_min);
242 carry_max = vpmax_s16(carry_max, carry_max);
243 carry_min = vpmin_s16(carry_min, carry_min);
244 carry_max = vpmax_s16(carry_max, carry_max);
245
246 // Extract max/min values
steniu014c2938e2017-06-19 15:44:45 +0100247 const int16_t min_i = std::min(vget_lane_s16(carry_min, 0), carry_min_scalar);
248 const int16_t max_i = std::max(vget_lane_s16(carry_max, 0), carry_max_scalar);
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100249
250 // Perform reduction of local min/max values
251 update_min_max(min_i, max_i);
252}
253
Michele Di Giorgioef4b4ae2017-07-04 17:19:43 +0100254void NEMinMaxKernel::minmax_F32(Window win)
255{
256 float32x2_t carry_min = vdup_n_f32(std::numeric_limits<float>::max());
257 float32x2_t carry_max = vdup_n_f32(std::numeric_limits<float>::lowest());
258
259 float carry_min_scalar = std::numeric_limits<float>::max();
260 float carry_max_scalar = std::numeric_limits<float>::lowest();
261
262 const int x_start = win.x().start();
263 const int x_end = win.x().end();
264
265 // Handle X dimension manually to split into two loops
266 // First one will use vector operations, second one processes the left over pixels
267 win.set(Window::DimX, Window::Dimension(0, 1, 1));
268
269 Iterator input(_input, win);
270
Michalis Spyroua4f378d2019-04-26 14:54:54 +0100271 execute_window_loop(win, [&](const Coordinates &)
Michele Di Giorgioef4b4ae2017-07-04 17:19:43 +0100272 {
273 int x = x_start;
Kohei Takahashicedb78f2018-08-23 10:23:52 +0900274 const auto in_ptr = reinterpret_cast<const float *>(input.ptr());
Michele Di Giorgioef4b4ae2017-07-04 17:19:43 +0100275
276 // Vector loop
277 for(; x <= x_end - 8; x += 8)
278 {
279 const float32x4x2_t pixels = vld2q_f32(in_ptr + x);
280 const float32x4_t tmp_min1 = vminq_f32(pixels.val[0], pixels.val[1]);
281 const float32x4_t tmp_max1 = vmaxq_f32(pixels.val[0], pixels.val[1]);
282 const float32x2_t tmp_min2 = vmin_f32(vget_high_f32(tmp_min1), vget_low_f32(tmp_min1));
283 const float32x2_t tmp_max2 = vmax_f32(vget_high_f32(tmp_max1), vget_low_f32(tmp_max1));
284 carry_min = vmin_f32(tmp_min2, carry_min);
285 carry_max = vmax_f32(tmp_max2, carry_max);
286 }
287
288 // Process leftover pixels
289 for(; x < x_end; ++x)
290 {
291 const float pixel = in_ptr[x];
292 carry_min_scalar = std::min(pixel, carry_min_scalar);
293 carry_max_scalar = std::max(pixel, carry_max_scalar);
294 }
295
296 },
297 input);
298
299 // Reduce result
300 carry_min = vpmin_f32(carry_min, carry_min);
301 carry_max = vpmax_f32(carry_max, carry_max);
302 carry_min = vpmin_f32(carry_min, carry_min);
303 carry_max = vpmax_f32(carry_max, carry_max);
304
305 // Extract max/min values
306 const float min_i = std::min(vget_lane_f32(carry_min, 0), carry_min_scalar);
307 const float max_i = std::max(vget_lane_f32(carry_max, 0), carry_max_scalar);
308
309 // Perform reduction of local min/max values
310 update_min_max(min_i, max_i);
311}
312
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100313NEMinMaxLocationKernel::NEMinMaxLocationKernel()
steniu014c2938e2017-06-19 15:44:45 +0100314 : _func(nullptr), _input(nullptr), _min(nullptr), _max(nullptr), _min_count(nullptr), _max_count(nullptr), _min_loc(nullptr), _max_loc(nullptr)
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100315{
316}
317
318bool NEMinMaxLocationKernel::is_parallelisable() const
319{
320 return false;
321}
322
Georgios Pinitas8795ffb2017-12-01 16:13:40 +0000323template <class T, std::size_t... N>
324struct NEMinMaxLocationKernel::create_func_table<T, utility::index_sequence<N...>>
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100325{
Michalis Spyroua4f378d2019-04-26 14:54:54 +0100326 static const std::array<NEMinMaxLocationKernel::MinMaxLocFunction, sizeof...(N)> func_table;
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100327};
328
Georgios Pinitas8795ffb2017-12-01 16:13:40 +0000329template <class T, std::size_t... N>
Michalis Spyroua4f378d2019-04-26 14:54:54 +0100330const std::array<NEMinMaxLocationKernel::MinMaxLocFunction, sizeof...(N)> NEMinMaxLocationKernel::create_func_table<T, utility::index_sequence<N...>>::func_table
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100331{
332 &NEMinMaxLocationKernel::minmax_loc<T, bool(N & 8), bool(N & 4), bool(N & 2), bool(N & 1)>...
333};
334
Michele Di Giorgioef4b4ae2017-07-04 17:19:43 +0100335void NEMinMaxLocationKernel::configure(const IImage *input, void *min, void *max,
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100336 ICoordinates2DArray *min_loc, ICoordinates2DArray *max_loc,
337 uint32_t *min_count, uint32_t *max_count)
338{
339 ARM_COMPUTE_ERROR_ON_TENSOR_NOT_2D(input);
Michele Di Giorgioef4b4ae2017-07-04 17:19:43 +0100340 ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8, DataType::S16, DataType::F32);
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100341 ARM_COMPUTE_ERROR_ON(nullptr == min);
342 ARM_COMPUTE_ERROR_ON(nullptr == max);
343
344 _input = input;
345 _min = min;
346 _max = max;
347 _min_count = min_count;
348 _max_count = max_count;
349 _min_loc = min_loc;
350 _max_loc = max_loc;
351
352 unsigned int count_min = (nullptr != min_count ? 1 : 0);
353 unsigned int count_max = (nullptr != max_count ? 1 : 0);
354 unsigned int loc_min = (nullptr != min_loc ? 1 : 0);
355 unsigned int loc_max = (nullptr != max_loc ? 1 : 0);
356
357 unsigned int table_idx = (count_min << 3) | (count_max << 2) | (loc_min << 1) | loc_max;
358
Michele Di Giorgioef4b4ae2017-07-04 17:19:43 +0100359 switch(input->info()->data_type())
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100360 {
Michele Di Giorgioef4b4ae2017-07-04 17:19:43 +0100361 case DataType::U8:
Georgios Pinitas8795ffb2017-12-01 16:13:40 +0000362 _func = create_func_table<uint8_t, utility::index_sequence_t<16>>::func_table[table_idx];
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100363 break;
Michele Di Giorgioef4b4ae2017-07-04 17:19:43 +0100364 case DataType::S16:
Georgios Pinitas8795ffb2017-12-01 16:13:40 +0000365 _func = create_func_table<int16_t, utility::index_sequence_t<16>>::func_table[table_idx];
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100366 break;
Michele Di Giorgioef4b4ae2017-07-04 17:19:43 +0100367 case DataType::F32:
Georgios Pinitas8795ffb2017-12-01 16:13:40 +0000368 _func = create_func_table<float, utility::index_sequence_t<16>>::func_table[table_idx];
Michele Di Giorgioef4b4ae2017-07-04 17:19:43 +0100369 break;
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100370 default:
Michele Di Giorgioef4b4ae2017-07-04 17:19:43 +0100371 ARM_COMPUTE_ERROR("Unsupported data type");
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100372 break;
373 }
374
steniu014c2938e2017-06-19 15:44:45 +0100375 constexpr unsigned int num_elems_processed_per_iteration = 1;
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100376
377 // Configure kernel window
steniu014c2938e2017-06-19 15:44:45 +0100378 Window win = calculate_max_window(*input->info(), Steps(num_elems_processed_per_iteration));
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100379
steniu014c2938e2017-06-19 15:44:45 +0100380 update_window_and_padding(win, AccessWindowHorizontal(input->info(), 0, num_elems_processed_per_iteration));
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100381
382 INEKernel::configure(win);
383}
384
Moritz Pflanzerc186b572017-09-07 09:48:04 +0100385void NEMinMaxLocationKernel::run(const Window &window, const ThreadInfo &info)
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100386{
Moritz Pflanzerc186b572017-09-07 09:48:04 +0100387 ARM_COMPUTE_UNUSED(info);
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100388 ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
389 ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window);
390 ARM_COMPUTE_ERROR_ON(_func == nullptr);
391
392 (this->*_func)(window);
393}
394
395template <class T, bool count_min, bool count_max, bool loc_min, bool loc_max>
396void NEMinMaxLocationKernel::minmax_loc(const Window &win)
397{
398 if(count_min || count_max || loc_min || loc_max)
399 {
400 Iterator input(_input, win);
401
steniu014c2938e2017-06-19 15:44:45 +0100402 size_t min_count = 0;
403 size_t max_count = 0;
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100404
405 // Clear min location array
406 if(loc_min)
407 {
408 _min_loc->clear();
409 }
410
411 // Clear max location array
412 if(loc_max)
413 {
414 _max_loc->clear();
415 }
416
Michele Di Giorgioef4b4ae2017-07-04 17:19:43 +0100417 using type = typename std::conditional<std::is_same<T, float>::value, float, int32_t>::type;
418
419 auto min_ptr = static_cast<type *>(_min);
420 auto max_ptr = static_cast<type *>(_max);
421
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100422 execute_window_loop(win, [&](const Coordinates & id)
423 {
424 auto in_ptr = reinterpret_cast<const T *>(input.ptr());
425 int32_t idx = id.x();
426 int32_t idy = id.y();
427
steniu014c2938e2017-06-19 15:44:45 +0100428 const T pixel = *in_ptr;
429 Coordinates2D p{ idx, idy };
430
431 if(count_min || loc_min)
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100432 {
Michele Di Giorgioef4b4ae2017-07-04 17:19:43 +0100433 if(*min_ptr == pixel)
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100434 {
steniu014c2938e2017-06-19 15:44:45 +0100435 if(count_min)
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100436 {
steniu014c2938e2017-06-19 15:44:45 +0100437 ++min_count;
438 }
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100439
steniu014c2938e2017-06-19 15:44:45 +0100440 if(loc_min)
441 {
442 _min_loc->push_back(p);
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100443 }
444 }
steniu014c2938e2017-06-19 15:44:45 +0100445 }
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100446
steniu014c2938e2017-06-19 15:44:45 +0100447 if(count_max || loc_max)
448 {
Michele Di Giorgioef4b4ae2017-07-04 17:19:43 +0100449 if(*max_ptr == pixel)
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100450 {
steniu014c2938e2017-06-19 15:44:45 +0100451 if(count_max)
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100452 {
steniu014c2938e2017-06-19 15:44:45 +0100453 ++max_count;
454 }
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100455
steniu014c2938e2017-06-19 15:44:45 +0100456 if(loc_max)
457 {
458 _max_loc->push_back(p);
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100459 }
460 }
461 }
462 },
463 input);
464
465 if(count_min)
466 {
467 *_min_count = min_count;
468 }
469
470 if(count_max)
471 {
472 *_max_count = max_count;
473 }
474 }
475}
476} // namespace arm_compute