blob: eddc3b29ab66b434bc9575c635f54794d98ecbb0 [file] [log] [blame]
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001/*
Sang-Hoon Park68dd25f2020-10-19 16:00:11 +01002 * Copyright (c) 2016-2020 Arm Limited.
Anthony Barbier6ff3b192017-09-04 18:44:23 +01003 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
Michalis Spyrouebcebf12020-10-21 00:04:14 +010024#include "src/core/NEON/kernels/NEHistogramKernel.h"
Anthony Barbier6ff3b192017-09-04 18:44:23 +010025
26#include "arm_compute/core/Error.h"
27#include "arm_compute/core/Helpers.h"
28#include "arm_compute/core/IDistribution1D.h"
29#include "arm_compute/core/ITensor.h"
30#include "arm_compute/core/TensorInfo.h"
31#include "arm_compute/core/Types.h"
32#include "arm_compute/core/Window.h"
Sang-Hoon Park68dd25f2020-10-19 16:00:11 +010033#include "src/core/helpers/AutoConfiguration.h"
34#include "src/core/helpers/WindowHelpers.h"
Anthony Barbier6ff3b192017-09-04 18:44:23 +010035
36#include <algorithm>
37#include <arm_neon.h>
38#include <array>
39
Anthony Barbier6ff3b192017-09-04 18:44:23 +010040namespace arm_compute
41{
42class Coordinates;
Anthony Barbier6ff3b192017-09-04 18:44:23 +010043
44inline void NEHistogramKernel::merge_histogram(uint32_t *global_hist, const uint32_t *local_hist, size_t bins)
45{
Georgios Pinitase874ef92019-09-09 17:40:33 +010046 arm_compute::lock_guard<arm_compute::Mutex> lock(_hist_mtx);
Anthony Barbier6ff3b192017-09-04 18:44:23 +010047
48 const unsigned int v_end = (bins / 4) * 4;
49
50 for(unsigned int b = 0; b < v_end; b += 4)
51 {
52 const uint32x4_t tmp_global = vld1q_u32(global_hist + b);
53 const uint32x4_t tmp_local = vld1q_u32(local_hist + b);
54 vst1q_u32(global_hist + b, vaddq_u32(tmp_global, tmp_local));
55 }
56
57 for(unsigned int b = v_end; b < bins; ++b)
58 {
59 global_hist[b] += local_hist[b];
60 }
61}
62
63NEHistogramKernel::NEHistogramKernel()
64 : _func(nullptr), _input(nullptr), _output(nullptr), _local_hist(nullptr), _window_lut(nullptr), _hist_mtx()
65{
66}
67
Moritz Pflanzerc186b572017-09-07 09:48:04 +010068void NEHistogramKernel::histogram_U8(Window win, const ThreadInfo &info)
Anthony Barbier6ff3b192017-09-04 18:44:23 +010069{
70 ARM_COMPUTE_ERROR_ON(_output->buffer() == nullptr);
71
72 const size_t bins = _output->num_bins();
73 const int32_t offset = _output->offset();
74 const uint32_t offrange = offset + _output->range();
75 const uint32_t *const w_lut = _window_lut;
Moritz Pflanzerc186b572017-09-07 09:48:04 +010076 uint32_t *const local_hist = _local_hist + info.thread_id * bins;
Anthony Barbier6ff3b192017-09-04 18:44:23 +010077
78 // Clear local_histogram
79 std::fill_n(local_hist, bins, 0);
80
81 auto update_local_hist = [&](uint8_t p)
82 {
83 if(offset <= p && p < offrange)
84 {
85 ++local_hist[w_lut[p]];
86 }
87 };
88
Isabella Gottardi02aabcc2017-10-12 17:28:51 +010089 const int x_start = win.x().start();
90 const int x_end = win.x().end();
Anthony Barbier6ff3b192017-09-04 18:44:23 +010091
92 // Handle X dimension manually to split into two loops
93 // First one will use vector operations, second one processes the left over
94 // pixels
95 win.set(Window::DimX, Window::Dimension(0, 1, 1));
96
97 Iterator input(_input, win);
98
99 // Calculate local histogram
100 execute_window_loop(win, [&](const Coordinates &)
101 {
Isabella Gottardi02aabcc2017-10-12 17:28:51 +0100102 int x = x_start;
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100103
104 // Vector loop
105 for(; x <= x_end - 8; x += 8)
106 {
107 const uint8x8_t pixels = vld1_u8(input.ptr() + x);
108
109 update_local_hist(vget_lane_u8(pixels, 0));
110 update_local_hist(vget_lane_u8(pixels, 1));
111 update_local_hist(vget_lane_u8(pixels, 2));
112 update_local_hist(vget_lane_u8(pixels, 3));
113 update_local_hist(vget_lane_u8(pixels, 4));
114 update_local_hist(vget_lane_u8(pixels, 5));
115 update_local_hist(vget_lane_u8(pixels, 6));
116 update_local_hist(vget_lane_u8(pixels, 7));
117 }
118
119 // Process leftover pixels
120 for(; x < x_end; ++x)
121 {
122 update_local_hist(input.ptr()[x]);
123 }
124 },
125 input);
126
127 // Merge histograms
128 merge_histogram(_output->buffer(), local_hist, bins);
129}
130
Moritz Pflanzerc186b572017-09-07 09:48:04 +0100131void NEHistogramKernel::histogram_fixed_U8(Window win, const ThreadInfo &info)
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100132{
Moritz Pflanzerc186b572017-09-07 09:48:04 +0100133 ARM_COMPUTE_UNUSED(info);
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100134 ARM_COMPUTE_ERROR_ON(_output->buffer() == nullptr);
135
136 std::array<uint32_t, _max_range_size> local_hist{ { 0 } };
137
Isabella Gottardi02aabcc2017-10-12 17:28:51 +0100138 const int x_start = win.x().start();
139 const int x_end = win.x().end();
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100140
141 // Handle X dimension manually to split into two loops
142 // First one will use vector operations, second one processes the left over
143 // pixels
144 win.set(Window::DimX, Window::Dimension(0, 1, 1));
145
146 Iterator input(_input, win);
147
148 // Calculate local histogram
149 execute_window_loop(win, [&](const Coordinates &)
150 {
Isabella Gottardi02aabcc2017-10-12 17:28:51 +0100151 int x = x_start;
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100152
153 // Vector loop
154 for(; x <= x_end - 8; x += 8)
155 {
156 const uint8x8_t pixels = vld1_u8(input.ptr() + x);
157
158 ++local_hist[vget_lane_u8(pixels, 0)];
159 ++local_hist[vget_lane_u8(pixels, 1)];
160 ++local_hist[vget_lane_u8(pixels, 2)];
161 ++local_hist[vget_lane_u8(pixels, 3)];
162 ++local_hist[vget_lane_u8(pixels, 4)];
163 ++local_hist[vget_lane_u8(pixels, 5)];
164 ++local_hist[vget_lane_u8(pixels, 6)];
165 ++local_hist[vget_lane_u8(pixels, 7)];
166 }
167
168 // Process leftover pixels
169 for(; x < x_end; ++x)
170 {
171 ++local_hist[input.ptr()[x]];
172 }
173 },
174 input);
175
176 // Merge histograms
177 merge_histogram(_output->buffer(), local_hist.data(), _max_range_size);
178}
179
180void NEHistogramKernel::calculate_window_lut() const
181{
182 const int32_t offset = _output->offset();
183 const size_t bins = _output->num_bins();
184 const uint32_t range = _output->range();
185
186 std::fill_n(_window_lut, offset, 0);
187
188 for(unsigned int p = offset; p < _max_range_size; ++p)
189 {
190 _window_lut[p] = ((p - offset) * bins) / range;
191 }
192}
193
194void NEHistogramKernel::configure(const IImage *input, IDistribution1D *output, uint32_t *local_hist, uint32_t *window_lut)
195{
196 ARM_COMPUTE_ERROR_ON_TENSOR_NOT_2D(input);
197 ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8);
198 ARM_COMPUTE_ERROR_ON(nullptr == output);
199 ARM_COMPUTE_ERROR_ON(nullptr == local_hist);
200 ARM_COMPUTE_ERROR_ON(nullptr == window_lut);
201
202 _input = input;
203 _output = output;
204 _local_hist = local_hist;
205 _window_lut = window_lut;
206
207 //Check offset
208 ARM_COMPUTE_ERROR_ON_MSG(0 > _output->offset() || _output->offset() > static_cast<int32_t>(_max_range_size), "Offset is larger than the image value range.");
209
210 //Check range
211 ARM_COMPUTE_ERROR_ON_MSG(static_cast<int32_t>(_output->range()) > static_cast<int32_t>(_max_range_size) /* max range */, "Range larger than the image value range.");
212
213 // Calculate LUT
214 calculate_window_lut();
215
216 // Set appropriate function
217 _func = &NEHistogramKernel::histogram_U8;
218
Michele Di Giorgio4646d2e2019-06-19 12:28:47 +0100219 Window win = calculate_max_window(*input->info(), Steps());
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100220
221 INEKernel::configure(win);
222}
223
224void NEHistogramKernel::configure(const IImage *input, IDistribution1D *output)
225{
226 ARM_COMPUTE_ERROR_ON_TENSOR_NOT_2D(input);
227 ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8);
228 ARM_COMPUTE_ERROR_ON(nullptr == output);
229
230 _input = input;
231 _output = output;
232
233 // Set appropriate function
234 _func = &NEHistogramKernel::histogram_fixed_U8;
235
Michele Di Giorgio4646d2e2019-06-19 12:28:47 +0100236 Window win = calculate_max_window(*input->info(), Steps());
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100237
238 INEKernel::configure(win);
239}
240
Moritz Pflanzerc186b572017-09-07 09:48:04 +0100241void NEHistogramKernel::run(const Window &window, const ThreadInfo &info)
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100242{
243 ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
244 ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window);
245 ARM_COMPUTE_ERROR_ON(_func == nullptr);
246
Moritz Pflanzerc186b572017-09-07 09:48:04 +0100247 (this->*_func)(window, info);
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100248}
Michele Di Giorgio4646d2e2019-06-19 12:28:47 +0100249} // namespace arm_compute