blob: 211ea1ff9cfc452b26dab3cf960cfcd1b3b5cab2 [file] [log] [blame]
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001/*
Michele Di Giorgio4646d2e2019-06-19 12:28:47 +01002 * Copyright (c) 2016-2019 ARM Limited.
Anthony Barbier6ff3b192017-09-04 18:44:23 +01003 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24#include "arm_compute/core/NEON/kernels/NEHistogramKernel.h"
25
26#include "arm_compute/core/Error.h"
27#include "arm_compute/core/Helpers.h"
28#include "arm_compute/core/IDistribution1D.h"
29#include "arm_compute/core/ITensor.h"
30#include "arm_compute/core/TensorInfo.h"
31#include "arm_compute/core/Types.h"
32#include "arm_compute/core/Window.h"
33
34#include <algorithm>
35#include <arm_neon.h>
36#include <array>
37
Anthony Barbier6ff3b192017-09-04 18:44:23 +010038namespace arm_compute
39{
40class Coordinates;
Anthony Barbier6ff3b192017-09-04 18:44:23 +010041
42inline void NEHistogramKernel::merge_histogram(uint32_t *global_hist, const uint32_t *local_hist, size_t bins)
43{
Georgios Pinitase874ef92019-09-09 17:40:33 +010044 arm_compute::lock_guard<arm_compute::Mutex> lock(_hist_mtx);
Anthony Barbier6ff3b192017-09-04 18:44:23 +010045
46 const unsigned int v_end = (bins / 4) * 4;
47
48 for(unsigned int b = 0; b < v_end; b += 4)
49 {
50 const uint32x4_t tmp_global = vld1q_u32(global_hist + b);
51 const uint32x4_t tmp_local = vld1q_u32(local_hist + b);
52 vst1q_u32(global_hist + b, vaddq_u32(tmp_global, tmp_local));
53 }
54
55 for(unsigned int b = v_end; b < bins; ++b)
56 {
57 global_hist[b] += local_hist[b];
58 }
59}
60
61NEHistogramKernel::NEHistogramKernel()
62 : _func(nullptr), _input(nullptr), _output(nullptr), _local_hist(nullptr), _window_lut(nullptr), _hist_mtx()
63{
64}
65
Moritz Pflanzerc186b572017-09-07 09:48:04 +010066void NEHistogramKernel::histogram_U8(Window win, const ThreadInfo &info)
Anthony Barbier6ff3b192017-09-04 18:44:23 +010067{
68 ARM_COMPUTE_ERROR_ON(_output->buffer() == nullptr);
69
70 const size_t bins = _output->num_bins();
71 const int32_t offset = _output->offset();
72 const uint32_t offrange = offset + _output->range();
73 const uint32_t *const w_lut = _window_lut;
Moritz Pflanzerc186b572017-09-07 09:48:04 +010074 uint32_t *const local_hist = _local_hist + info.thread_id * bins;
Anthony Barbier6ff3b192017-09-04 18:44:23 +010075
76 // Clear local_histogram
77 std::fill_n(local_hist, bins, 0);
78
79 auto update_local_hist = [&](uint8_t p)
80 {
81 if(offset <= p && p < offrange)
82 {
83 ++local_hist[w_lut[p]];
84 }
85 };
86
Isabella Gottardi02aabcc2017-10-12 17:28:51 +010087 const int x_start = win.x().start();
88 const int x_end = win.x().end();
Anthony Barbier6ff3b192017-09-04 18:44:23 +010089
90 // Handle X dimension manually to split into two loops
91 // First one will use vector operations, second one processes the left over
92 // pixels
93 win.set(Window::DimX, Window::Dimension(0, 1, 1));
94
95 Iterator input(_input, win);
96
97 // Calculate local histogram
98 execute_window_loop(win, [&](const Coordinates &)
99 {
Isabella Gottardi02aabcc2017-10-12 17:28:51 +0100100 int x = x_start;
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100101
102 // Vector loop
103 for(; x <= x_end - 8; x += 8)
104 {
105 const uint8x8_t pixels = vld1_u8(input.ptr() + x);
106
107 update_local_hist(vget_lane_u8(pixels, 0));
108 update_local_hist(vget_lane_u8(pixels, 1));
109 update_local_hist(vget_lane_u8(pixels, 2));
110 update_local_hist(vget_lane_u8(pixels, 3));
111 update_local_hist(vget_lane_u8(pixels, 4));
112 update_local_hist(vget_lane_u8(pixels, 5));
113 update_local_hist(vget_lane_u8(pixels, 6));
114 update_local_hist(vget_lane_u8(pixels, 7));
115 }
116
117 // Process leftover pixels
118 for(; x < x_end; ++x)
119 {
120 update_local_hist(input.ptr()[x]);
121 }
122 },
123 input);
124
125 // Merge histograms
126 merge_histogram(_output->buffer(), local_hist, bins);
127}
128
Moritz Pflanzerc186b572017-09-07 09:48:04 +0100129void NEHistogramKernel::histogram_fixed_U8(Window win, const ThreadInfo &info)
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100130{
Moritz Pflanzerc186b572017-09-07 09:48:04 +0100131 ARM_COMPUTE_UNUSED(info);
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100132 ARM_COMPUTE_ERROR_ON(_output->buffer() == nullptr);
133
134 std::array<uint32_t, _max_range_size> local_hist{ { 0 } };
135
Isabella Gottardi02aabcc2017-10-12 17:28:51 +0100136 const int x_start = win.x().start();
137 const int x_end = win.x().end();
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100138
139 // Handle X dimension manually to split into two loops
140 // First one will use vector operations, second one processes the left over
141 // pixels
142 win.set(Window::DimX, Window::Dimension(0, 1, 1));
143
144 Iterator input(_input, win);
145
146 // Calculate local histogram
147 execute_window_loop(win, [&](const Coordinates &)
148 {
Isabella Gottardi02aabcc2017-10-12 17:28:51 +0100149 int x = x_start;
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100150
151 // Vector loop
152 for(; x <= x_end - 8; x += 8)
153 {
154 const uint8x8_t pixels = vld1_u8(input.ptr() + x);
155
156 ++local_hist[vget_lane_u8(pixels, 0)];
157 ++local_hist[vget_lane_u8(pixels, 1)];
158 ++local_hist[vget_lane_u8(pixels, 2)];
159 ++local_hist[vget_lane_u8(pixels, 3)];
160 ++local_hist[vget_lane_u8(pixels, 4)];
161 ++local_hist[vget_lane_u8(pixels, 5)];
162 ++local_hist[vget_lane_u8(pixels, 6)];
163 ++local_hist[vget_lane_u8(pixels, 7)];
164 }
165
166 // Process leftover pixels
167 for(; x < x_end; ++x)
168 {
169 ++local_hist[input.ptr()[x]];
170 }
171 },
172 input);
173
174 // Merge histograms
175 merge_histogram(_output->buffer(), local_hist.data(), _max_range_size);
176}
177
178void NEHistogramKernel::calculate_window_lut() const
179{
180 const int32_t offset = _output->offset();
181 const size_t bins = _output->num_bins();
182 const uint32_t range = _output->range();
183
184 std::fill_n(_window_lut, offset, 0);
185
186 for(unsigned int p = offset; p < _max_range_size; ++p)
187 {
188 _window_lut[p] = ((p - offset) * bins) / range;
189 }
190}
191
192void NEHistogramKernel::configure(const IImage *input, IDistribution1D *output, uint32_t *local_hist, uint32_t *window_lut)
193{
194 ARM_COMPUTE_ERROR_ON_TENSOR_NOT_2D(input);
195 ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8);
196 ARM_COMPUTE_ERROR_ON(nullptr == output);
197 ARM_COMPUTE_ERROR_ON(nullptr == local_hist);
198 ARM_COMPUTE_ERROR_ON(nullptr == window_lut);
199
200 _input = input;
201 _output = output;
202 _local_hist = local_hist;
203 _window_lut = window_lut;
204
205 //Check offset
206 ARM_COMPUTE_ERROR_ON_MSG(0 > _output->offset() || _output->offset() > static_cast<int32_t>(_max_range_size), "Offset is larger than the image value range.");
207
208 //Check range
209 ARM_COMPUTE_ERROR_ON_MSG(static_cast<int32_t>(_output->range()) > static_cast<int32_t>(_max_range_size) /* max range */, "Range larger than the image value range.");
210
211 // Calculate LUT
212 calculate_window_lut();
213
214 // Set appropriate function
215 _func = &NEHistogramKernel::histogram_U8;
216
Michele Di Giorgio4646d2e2019-06-19 12:28:47 +0100217 Window win = calculate_max_window(*input->info(), Steps());
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100218
219 INEKernel::configure(win);
220}
221
222void NEHistogramKernel::configure(const IImage *input, IDistribution1D *output)
223{
224 ARM_COMPUTE_ERROR_ON_TENSOR_NOT_2D(input);
225 ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8);
226 ARM_COMPUTE_ERROR_ON(nullptr == output);
227
228 _input = input;
229 _output = output;
230
231 // Set appropriate function
232 _func = &NEHistogramKernel::histogram_fixed_U8;
233
Michele Di Giorgio4646d2e2019-06-19 12:28:47 +0100234 Window win = calculate_max_window(*input->info(), Steps());
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100235
236 INEKernel::configure(win);
237}
238
Moritz Pflanzerc186b572017-09-07 09:48:04 +0100239void NEHistogramKernel::run(const Window &window, const ThreadInfo &info)
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100240{
241 ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
242 ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window);
243 ARM_COMPUTE_ERROR_ON(_func == nullptr);
244
Moritz Pflanzerc186b572017-09-07 09:48:04 +0100245 (this->*_func)(window, info);
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100246}
Michele Di Giorgio4646d2e2019-06-19 12:28:47 +0100247} // namespace arm_compute