blob: e04982a3152913c3f93ef521159e0f9d7062398e [file] [log] [blame]
Michalis Spyrou04f089c2017-08-08 17:42:38 +01001/*
Michalis Spyrou0b18d972020-01-30 18:11:13 +00002 * Copyright (c) 2017-2020 ARM Limited.
Michalis Spyrou04f089c2017-08-08 17:42:38 +01003 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24#include "arm_compute/runtime/CL/functions/CLReductionOperation.h"
25
26#include "arm_compute/core/CL/ICLTensor.h"
27#include "arm_compute/core/CL/kernels/CLReductionOperationKernel.h"
28#include "arm_compute/core/Error.h"
Sang-Hoon Park2697fd82019-10-15 16:49:24 +010029#include "arm_compute/core/Helpers.h"
Michalis Spyrou04f089c2017-08-08 17:42:38 +010030#include "arm_compute/core/PixelValue.h"
31#include "arm_compute/core/TensorInfo.h"
32#include "arm_compute/core/Validate.h"
Sang-Hoon Park2697fd82019-10-15 16:49:24 +010033#include "arm_compute/core/utils/misc/ShapeCalculator.h"
Michalis Spyrou04f089c2017-08-08 17:42:38 +010034#include "arm_compute/runtime/CL/CLScheduler.h"
35#include "arm_compute/runtime/Tensor.h"
Manuel Bottini7b9998d2019-10-21 17:59:07 +010036#include "arm_compute/runtime/Utils.h"
Michalis Spyrou04f089c2017-08-08 17:42:38 +010037#include "support/ToolchainSupport.h"
38
Sang-Hoon Park2697fd82019-10-15 16:49:24 +010039namespace arm_compute
40{
Georgios Pinitas8a94e7c2017-09-15 19:06:47 +010041CLReductionOperation::CLReductionOperation(std::shared_ptr<IMemoryManager> memory_manager)
Sang-Hoon Park2697fd82019-10-15 16:49:24 +010042 : _memory_group(std::move(memory_manager)), _results_vector(), _reduction_kernels_vector(), _border_handlers_vector(), _reshape_kernel(), _op(), _num_of_stages(), _reduction_axis(), _is_serial(),
43 _is_reshape_required(false)
Michalis Spyrou04f089c2017-08-08 17:42:38 +010044{
45}
46
Sang-Hoon Park2697fd82019-10-15 16:49:24 +010047Status CLReductionOperation::validate(const ITensorInfo *input, const ITensorInfo *output, unsigned int axis, ReductionOperation op, bool keep_dims)
John Richardson62385bc2018-04-20 13:11:36 +010048{
Manuel Bottini7b9998d2019-10-21 17:59:07 +010049 ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
Sang-Hoon Park2697fd82019-10-15 16:49:24 +010050 ARM_COMPUTE_RETURN_ERROR_ON_MSG(axis >= TensorShape::num_max_dimensions, "Reduction axis greater than max number of dimensions");
51 ARM_COMPUTE_RETURN_ERROR_ON_MSG(axis > 3, "Unsupported reduction axis");
52
Manuel Bottini7b9998d2019-10-21 17:59:07 +010053 const unsigned int num_of_stages = calculate_number_of_stages_only_x_axis(input->dimension(0), axis);
Sang-Hoon Park2697fd82019-10-15 16:49:24 +010054 const bool is_serial = needs_serialized_reduction(op, input->data_type(), axis);
Manuel Bottini7b9998d2019-10-21 17:59:07 +010055 const bool is_reshape_required = !keep_dims;
Sang-Hoon Park2697fd82019-10-15 16:49:24 +010056
Manuel Bottini7b9998d2019-10-21 17:59:07 +010057 if(is_reshape_required && output->total_size() != 0)
Sang-Hoon Park2697fd82019-10-15 16:49:24 +010058 {
59 const TensorInfo expected_output_shape = output->clone()->set_tensor_shape(arm_compute::misc::shape_calculator::compute_reduced_shape(input->tensor_shape(), axis, keep_dims));
60 ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(&expected_output_shape, output);
61 }
62
63 auto *output_internal = output;
64
65 TensorInfo output_before_reshape;
66 const auto input_shape = input->tensor_shape();
67 const auto input_data_type = input->data_type();
68 const auto input_num_channles = input->num_channels();
69 const auto input_qinfo = input->quantization_info();
Manuel Bottini7b9998d2019-10-21 17:59:07 +010070 const auto output_data_type = output->data_type();
Sang-Hoon Park2697fd82019-10-15 16:49:24 +010071
72 auto initialize_tensorinfo = [](TensorInfo & ti, TensorShape shape, DataType data_type, int num_channels, QuantizationInfo qinfo)
73 {
74 ti.set_data_type(data_type).set_tensor_shape(shape).set_num_channels(num_channels).set_quantization_info(qinfo);
75 };
76
77 if(is_reshape_required)
78 {
79 auto shape_before_reshape = input_shape;
80 shape_before_reshape.set(axis, 1);
81 initialize_tensorinfo(output_before_reshape, shape_before_reshape, output_data_type, input_num_channles, input_qinfo);
82 output_internal = &output_before_reshape;
83 }
84
Manuel Bottinib412fab2018-12-10 17:40:23 +000085 if(is_serial)
86 {
Sang-Hoon Park2697fd82019-10-15 16:49:24 +010087 ARM_COMPUTE_RETURN_ON_ERROR(CLReductionOperationKernel::validate(input, output_internal, axis, op));
Manuel Bottinib412fab2018-12-10 17:40:23 +000088 }
89 else
John Richardson62385bc2018-04-20 13:11:36 +010090 {
Michalis Spyrou7e9391b2018-10-05 14:49:28 +010091 // Create temporary tensor infos
Michalis Spyroubcfd09a2019-05-01 13:03:59 +010092 std::vector<TensorInfo> sums_vector(num_of_stages - 1);
Michalis Spyrou7e9391b2018-10-05 14:49:28 +010093
94 // Create intermediate tensor info
Sang-Hoon Park2697fd82019-10-15 16:49:24 +010095 TensorShape shape{ input_shape };
96
97 shape.set(0, ceil(shape.x() / 128.f));
Michalis Spyrou7e9391b2018-10-05 14:49:28 +010098
99 for(unsigned int i = 0; i < num_of_stages - 1; i++)
100 {
Sang-Hoon Park2697fd82019-10-15 16:49:24 +0100101 initialize_tensorinfo(sums_vector[i], shape, input_data_type, input_num_channles, input_qinfo);
Michalis Spyrou7e9391b2018-10-05 14:49:28 +0100102 }
103
Michalis Spyroue55a0132018-10-26 10:48:56 +0100104 ReductionOperation first_kernel_op;
Manuel Bottinib412fab2018-12-10 17:40:23 +0000105 ReductionOperation intermediate_kernel_op;
Michalis Spyroue55a0132018-10-26 10:48:56 +0100106 ReductionOperation last_kernel_op;
107 switch(op)
108 {
109 case ReductionOperation::SUM:
110 case ReductionOperation::MEAN_SUM:
Manuel Bottinib412fab2018-12-10 17:40:23 +0000111 first_kernel_op = ReductionOperation::SUM;
112 intermediate_kernel_op = ReductionOperation::SUM;
113 last_kernel_op = op;
Michalis Spyroue55a0132018-10-26 10:48:56 +0100114 break;
115 case ReductionOperation::SUM_SQUARE:
Manuel Bottinib412fab2018-12-10 17:40:23 +0000116 first_kernel_op = ReductionOperation::SUM_SQUARE;
117 intermediate_kernel_op = ReductionOperation::SUM;
118 last_kernel_op = ReductionOperation::SUM;
119 break;
120 case ReductionOperation::PROD:
121 first_kernel_op = ReductionOperation::PROD;
122 intermediate_kernel_op = ReductionOperation::PROD;
123 last_kernel_op = ReductionOperation::PROD;
Michalis Spyroue55a0132018-10-26 10:48:56 +0100124 break;
Usama Arifb2890502019-05-21 11:48:37 +0100125 case ReductionOperation::MIN:
126 first_kernel_op = ReductionOperation::MIN;
127 intermediate_kernel_op = ReductionOperation::MIN;
128 last_kernel_op = ReductionOperation::MIN;
129 break;
Usama Arif048b0f32019-05-22 16:32:27 +0100130 case ReductionOperation::MAX:
131 first_kernel_op = ReductionOperation::MAX;
132 intermediate_kernel_op = ReductionOperation::MAX;
133 last_kernel_op = ReductionOperation::MAX;
134 break;
Michalis Spyroue55a0132018-10-26 10:48:56 +0100135 default:
136 ARM_COMPUTE_ERROR("Not supported");
137 }
138
Michalis Spyrou7e9391b2018-10-05 14:49:28 +0100139 // Validate ReductionOperation only on first kernel
Michalis Spyroubcfd09a2019-05-01 13:03:59 +0100140 ARM_COMPUTE_RETURN_ON_ERROR(CLReductionOperationKernel::validate(input, &sums_vector[0], axis, first_kernel_op));
Michalis Spyrou7e9391b2018-10-05 14:49:28 +0100141
142 // Validate ReductionOperation on intermediate stages
143 for(unsigned int i = 1; i < num_of_stages - 1; ++i)
144 {
Michalis Spyroubcfd09a2019-05-01 13:03:59 +0100145 ARM_COMPUTE_RETURN_ON_ERROR(CLReductionOperationKernel::validate(&sums_vector[i - 1], &sums_vector[i], axis, intermediate_kernel_op));
Michalis Spyrou7e9391b2018-10-05 14:49:28 +0100146 }
147
148 // Validate ReductionOperation on the last stage
149 const unsigned int last_stage = num_of_stages - 1;
Sang-Hoon Park2697fd82019-10-15 16:49:24 +0100150 ARM_COMPUTE_RETURN_ON_ERROR(CLReductionOperationKernel::validate(&sums_vector[last_stage - 1], output_internal, axis, last_kernel_op, input->dimension(0)));
151 }
152
153 if(is_reshape_required)
154 {
155 ARM_COMPUTE_RETURN_ON_ERROR(CLReshapeLayerKernel::validate(output_internal, output));
John Richardson62385bc2018-04-20 13:11:36 +0100156 }
John Richardson62385bc2018-04-20 13:11:36 +0100157
John Richardson62385bc2018-04-20 13:11:36 +0100158 return Status{};
159}
160
Sang-Hoon Park2697fd82019-10-15 16:49:24 +0100161ICLTensor *CLReductionOperation::configure_intermediate_result_vector(ICLTensor *input, ICLTensor *output)
Michalis Spyrou04f089c2017-08-08 17:42:38 +0100162{
Sang-Hoon Park2697fd82019-10-15 16:49:24 +0100163 if(!_is_reshape_required && _is_serial)
164 {
165 return output;
166 }
167
Manuel Bottini7b9998d2019-10-21 17:59:07 +0100168 auto intermediate_result_vector_size = _is_serial ? 1 : _num_of_stages;
Sang-Hoon Park2697fd82019-10-15 16:49:24 +0100169
170 if(!_is_reshape_required)
171 {
172 --intermediate_result_vector_size;
173 }
174
175 _results_vector.resize(intermediate_result_vector_size);
176 auto shape = input->info()->tensor_shape();
177
178 shape.set(_reduction_axis, _is_serial ? 1 : ceil(shape.x() / 128.f));
179
180 for(auto &v : _results_vector)
181 {
182 if(&v == &_results_vector.back() && _is_reshape_required)
183 {
184 shape.set(_reduction_axis, 1);
185 }
186 v.allocator()->init(input->info()->clone()->set_tensor_shape(shape));
187 }
188
Sang-Hoon Park2697fd82019-10-15 16:49:24 +0100189 return _is_reshape_required ? &_results_vector.back() : output;
190}
191
192void CLReductionOperation::configure(ICLTensor *input, ICLTensor *output, unsigned int axis, ReductionOperation op, bool keep_dims)
193{
Manuel Bottini7b9998d2019-10-21 17:59:07 +0100194 ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
195 _op = op;
196 _num_of_stages = calculate_number_of_stages_only_x_axis(input->info()->dimension(0), axis);
197 _reduction_axis = axis;
198 _is_serial = needs_serialized_reduction(op, input->info()->data_type(), axis);
199 _is_reshape_required = !keep_dims;
Sang-Hoon Park2697fd82019-10-15 16:49:24 +0100200
201 auto *output_internal = configure_intermediate_result_vector(input, output);
202
Sang-Hoon Park2697fd82019-10-15 16:49:24 +0100203 if(_is_reshape_required)
204 {
205 const TensorShape output_shape = arm_compute::misc::shape_calculator::compute_reduced_shape(input->info()->tensor_shape(), axis, false);
Manuel Bottini7b9998d2019-10-21 17:59:07 +0100206 const auto output_data_type = input->info()->data_type();
Sang-Hoon Park2697fd82019-10-15 16:49:24 +0100207 auto_init_if_empty(*output->info(), input->info()->clone()->set_tensor_shape(output_shape).set_data_type(output_data_type).reset_padding().set_is_resizable(true));
208 }
Georgios Pinitasaec513c2017-09-15 19:36:30 +0100209
Michalis Spyrou04f089c2017-08-08 17:42:38 +0100210 // Configure reduction operation kernels
Michalis Spyroubcfd09a2019-05-01 13:03:59 +0100211 _reduction_kernels_vector.resize(_num_of_stages);
Michalis Spyrou04f089c2017-08-08 17:42:38 +0100212
Michalis Spyrou7e9391b2018-10-05 14:49:28 +0100213 // Create temporary tensors
Manuel Bottinib412fab2018-12-10 17:40:23 +0000214 if(_is_serial)
215 {
Sang-Hoon Park2697fd82019-10-15 16:49:24 +0100216 if(_is_reshape_required)
217 {
218 _memory_group.manage(&_results_vector.back());
219 }
220
221 _reduction_kernels_vector[0].configure(input, output_internal, axis, op, 0);
Manuel Bottinib412fab2018-12-10 17:40:23 +0000222 }
223 else
Michalis Spyrou04f089c2017-08-08 17:42:38 +0100224 {
Michalis Spyroubcfd09a2019-05-01 13:03:59 +0100225 _border_handlers_vector.resize(_num_of_stages);
Michalis Spyroubcfd09a2019-05-01 13:03:59 +0100226 _memory_group.manage(&_results_vector[0]);
Michalis Spyrou7e9391b2018-10-05 14:49:28 +0100227
228 ReductionOperation first_kernel_op;
Manuel Bottinib412fab2018-12-10 17:40:23 +0000229 ReductionOperation intermediate_kernel_op;
Michalis Spyrou7e9391b2018-10-05 14:49:28 +0100230 ReductionOperation last_kernel_op;
Manuel Bottinib412fab2018-12-10 17:40:23 +0000231 PixelValue pixelValue;
Michalis Spyrou7e9391b2018-10-05 14:49:28 +0100232 switch(op)
233 {
234 case ReductionOperation::SUM:
235 case ReductionOperation::MEAN_SUM:
Manuel Bottinib412fab2018-12-10 17:40:23 +0000236 first_kernel_op = ReductionOperation::SUM;
237 intermediate_kernel_op = ReductionOperation::SUM;
238 last_kernel_op = op;
Manuel Bottini55e16782019-01-15 13:21:57 +0000239 pixelValue = PixelValue();
Michalis Spyrou7e9391b2018-10-05 14:49:28 +0100240 break;
241 case ReductionOperation::SUM_SQUARE:
Manuel Bottinib412fab2018-12-10 17:40:23 +0000242 first_kernel_op = ReductionOperation::SUM_SQUARE;
243 intermediate_kernel_op = ReductionOperation::SUM;
244 last_kernel_op = ReductionOperation::SUM;
Manuel Bottini55e16782019-01-15 13:21:57 +0000245 pixelValue = PixelValue();
Manuel Bottinib412fab2018-12-10 17:40:23 +0000246 break;
247 case ReductionOperation::PROD:
248 first_kernel_op = ReductionOperation::PROD;
249 intermediate_kernel_op = ReductionOperation::PROD;
250 last_kernel_op = ReductionOperation::PROD;
251 pixelValue = PixelValue(1, input->info()->data_type());
Michalis Spyrou7e9391b2018-10-05 14:49:28 +0100252 break;
Usama Arifb2890502019-05-21 11:48:37 +0100253 case ReductionOperation::MIN:
254 first_kernel_op = ReductionOperation::MIN;
255 intermediate_kernel_op = ReductionOperation::MIN;
256 last_kernel_op = ReductionOperation::MIN;
257 switch(input->info()->data_type())
258 {
259 case DataType::F32:
260 {
261 pixelValue = PixelValue(std::numeric_limits<float>::max());
262 break;
263 }
264 case DataType::F16:
265 {
266 pixelValue = PixelValue(static_cast<half>(65504.0f));
267 break;
268 }
269 case DataType::QASYMM8:
270 {
271 pixelValue = PixelValue(255, input->info()->data_type(), input->info()->quantization_info());
272 break;
273 }
Michalis Spyrou0b18d972020-01-30 18:11:13 +0000274 case DataType::QASYMM8_SIGNED:
275 {
276 pixelValue = PixelValue(127, input->info()->data_type(), input->info()->quantization_info());
277 break;
278 }
Usama Arifb2890502019-05-21 11:48:37 +0100279 default:
280 {
281 ARM_COMPUTE_ERROR("Unsupported DataType");
282 }
283 }
284 break;
Usama Arif048b0f32019-05-22 16:32:27 +0100285 case ReductionOperation::MAX:
286 first_kernel_op = ReductionOperation::MAX;
287 intermediate_kernel_op = ReductionOperation::MAX;
288 last_kernel_op = ReductionOperation::MAX;
289 switch(input->info()->data_type())
290 {
291 case DataType::F32:
292 {
293 pixelValue = PixelValue(-std::numeric_limits<float>::max());
294 break;
295 }
296 case DataType::F16:
297 {
298 pixelValue = PixelValue(static_cast<half>(-65504.0f));
299 break;
300 }
301 case DataType::QASYMM8:
302 {
303 pixelValue = PixelValue(0, input->info()->data_type(), input->info()->quantization_info());
304 break;
305 }
Michalis Spyrou0b18d972020-01-30 18:11:13 +0000306 case DataType::QASYMM8_SIGNED:
307 {
308 pixelValue = PixelValue(-128, input->info()->data_type(), input->info()->quantization_info());
309 break;
310 }
Usama Arif048b0f32019-05-22 16:32:27 +0100311 default:
312 {
313 ARM_COMPUTE_ERROR("Unsupported DataType");
314 }
315 }
316 break;
Michalis Spyrou7e9391b2018-10-05 14:49:28 +0100317 default:
318 ARM_COMPUTE_ERROR("Not supported");
319 }
320
Michalis Spyroubcfd09a2019-05-01 13:03:59 +0100321 _reduction_kernels_vector[0].configure(input, &_results_vector[0], axis, first_kernel_op);
Manuel Bottinib412fab2018-12-10 17:40:23 +0000322 _border_handlers_vector[0].configure(input, _reduction_kernels_vector[0].border_size(), BorderMode::CONSTANT, pixelValue);
Michalis Spyrou7e9391b2018-10-05 14:49:28 +0100323
324 // Apply ReductionOperation on intermediate stages
325 for(unsigned int i = 1; i < _num_of_stages - 1; ++i)
326 {
Michalis Spyroubcfd09a2019-05-01 13:03:59 +0100327 _memory_group.manage(&_results_vector[i]);
328 _reduction_kernels_vector[i].configure(&_results_vector[i - 1], &_results_vector[i], axis, intermediate_kernel_op);
329 _border_handlers_vector[i].configure(&_results_vector[i - 1], _reduction_kernels_vector[i].border_size(), BorderMode::CONSTANT, pixelValue);
Manuel Bottinib412fab2018-12-10 17:40:23 +0000330 _results_vector[i - 1].allocator()->allocate();
Michalis Spyrou7e9391b2018-10-05 14:49:28 +0100331 }
332
333 // Apply ReductionOperation on the last stage
334 const unsigned int last_stage = _num_of_stages - 1;
335 const unsigned int input_width = input->info()->dimension(0);
Sang-Hoon Park2697fd82019-10-15 16:49:24 +0100336
337 if(_is_reshape_required)
338 {
339 _memory_group.manage(&_results_vector.back());
340 }
341
342 _reduction_kernels_vector[last_stage].configure(&_results_vector[last_stage - 1], output_internal, axis, last_kernel_op, input_width);
Michalis Spyroubcfd09a2019-05-01 13:03:59 +0100343 _border_handlers_vector[last_stage].configure(&_results_vector[last_stage - 1], _reduction_kernels_vector[last_stage].border_size(), BorderMode::CONSTANT, pixelValue);
Manuel Bottinib412fab2018-12-10 17:40:23 +0000344 _results_vector[last_stage - 1].allocator()->allocate();
Michalis Spyrou04f089c2017-08-08 17:42:38 +0100345 }
Sang-Hoon Park2697fd82019-10-15 16:49:24 +0100346
347 if(_is_reshape_required)
348 {
349 _reshape_kernel.configure(&_results_vector.back(), output);
350 _results_vector.back().allocator()->allocate();
351 }
Michalis Spyrou04f089c2017-08-08 17:42:38 +0100352}
353
354void CLReductionOperation::run()
355{
Georgios Pinitasda953f22019-04-02 17:27:03 +0100356 MemoryGroupResourceScope scope_mg(_memory_group);
Georgios Pinitas8a94e7c2017-09-15 19:06:47 +0100357
Manuel Bottinib412fab2018-12-10 17:40:23 +0000358 if(_is_serial)
359 {
360 CLScheduler::get().enqueue(_reduction_kernels_vector[0], false);
361 }
362 else
Michalis Spyrou04f089c2017-08-08 17:42:38 +0100363 {
Michalis Spyrou7e9391b2018-10-05 14:49:28 +0100364 for(unsigned int i = 0; i < _num_of_stages; ++i)
365 {
366 CLScheduler::get().enqueue(_border_handlers_vector[i], false);
367 CLScheduler::get().enqueue(_reduction_kernels_vector[i], false);
368 }
369 }
Sang-Hoon Park2697fd82019-10-15 16:49:24 +0100370
371 if(_is_reshape_required)
372 {
373 CLScheduler::get().enqueue(_reshape_kernel, false);
374 }
John Richardson62385bc2018-04-20 13:11:36 +0100375}
Sang-Hoon Park2697fd82019-10-15 16:49:24 +0100376} // namespace arm_compute