blob: 30d42fa25fd537a4a10997bd751c669169aec4af [file] [log] [blame]
Georgios Pinitasd9769582017-08-03 10:19:40 +01001/*
John Richardson73d4aef2018-05-08 14:34:33 +01002 * Copyright (c) 2017-2018 ARM Limited.
Georgios Pinitasd9769582017-08-03 10:19:40 +01003 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24#include "arm_compute/core/NEON/kernels/NEReductionOperationKernel.h"
25
26#include "arm_compute/core/Coordinates.h"
27#include "arm_compute/core/Helpers.h"
28#include "arm_compute/core/IAccessWindow.h"
29#include "arm_compute/core/ITensor.h"
30#include "arm_compute/core/NEON/INEKernel.h"
31#include "arm_compute/core/NEON/NEMath.h"
John Richardson73d4aef2018-05-08 14:34:33 +010032#include "arm_compute/core/TensorInfo.h"
Georgios Pinitasd9769582017-08-03 10:19:40 +010033#include "arm_compute/core/Validate.h"
34
35#include <arm_neon.h>
36
37using namespace arm_compute;
38
39namespace
40{
41template <class F>
42class Reducer
43{
44public:
45 static void reduceX(const Window &window, const ITensor *input, ITensor *output, F f)
46 {
47 // Set out window
48 Window out_window(window);
49 out_window.set(Window::DimX, Window::Dimension(0, 0, 0));
50
51 // Get first input and output slices
52 Window in_slice = window.first_slice_window_1D();
53 Window out_slice = out_window.first_slice_window_1D();
54
55 do
56 {
57 Iterator in(input, in_slice);
58 Iterator out(output, out_slice);
59
60 f(in, out, in_slice, out_slice);
61 }
62 while(window.slide_window_slice_1D(in_slice) && window.slide_window_slice_1D(out_slice));
63 }
64};
65
66struct SumsqOpX
67{
68 inline void operator()(Iterator &input, Iterator &output, Window &in_slice, Window &out_slice)
69 {
70 ARM_COMPUTE_UNUSED(out_slice);
71 float32x4_t vec_sum_value = vdupq_n_f32(0.f);
72
73 execute_window_loop(in_slice, [&](const Coordinates & id)
74 {
75 const auto in_ptr = reinterpret_cast<const float *>(input.ptr());
76 const float32x4_t vec_elements = vld1q_f32(in_ptr);
77 vec_sum_value = vaddq_f32(vmulq_f32(vec_elements, vec_elements), vec_sum_value);
78 },
79 input);
80
81 float32x2_t carry_addition = vpadd_f32(vget_high_f32(vec_sum_value), vget_low_f32(vec_sum_value));
82 carry_addition = vpadd_f32(carry_addition, carry_addition);
83
84 *(reinterpret_cast<float *>(output.ptr())) = vget_lane_f32(carry_addition, 0);
85 }
86};
87
88void reduce_sumsq(const Window &window, const ITensor *input, ITensor *output, unsigned int axis)
89{
90 switch(axis)
91 {
92 case 0:
93 return Reducer<SumsqOpX>::reduceX(window, input, output, SumsqOpX());
94 default:
95 ARM_COMPUTE_ERROR("Unsupported reduction axis");
96 }
97}
John Richardson73d4aef2018-05-08 14:34:33 +010098
99TensorShape calculate_output_shape(const TensorShape &input_shape, unsigned int axis)
100{
101 TensorShape output_shape{ input_shape };
102 output_shape.set(axis, 1);
103
104 return output_shape;
105}
106
107Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, unsigned int axis, ReductionOperation op)
108{
109 ARM_COMPUTE_UNUSED(op);
110
111 ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output);
112 ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F32);
113 ARM_COMPUTE_RETURN_ERROR_ON(input->data_layout() != DataLayout::NCHW);
114
115 ARM_COMPUTE_RETURN_ERROR_ON_MSG(axis >= TensorShape::num_max_dimensions, "Reduction axis greater than max number of dimensions");
116 ARM_COMPUTE_RETURN_ERROR_ON_MSG(axis > 0, "Unsupported reduction axis, Supported axis is 0");
117
118 if(output->total_size() != 0)
119 {
120 ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
121 ARM_COMPUTE_RETURN_ERROR_ON(output->data_layout() != DataLayout::NCHW);
122
123 const TensorShape output_shape = calculate_output_shape(input->tensor_shape(), axis);
124 const TensorInfo tensor_info_reshaped = input->clone()->set_tensor_shape(output_shape);
125 ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(output, &tensor_info_reshaped);
126 }
127
128 return Status{};
129}
130
131std::tuple<Status, Window> validate_and_configure_window(ITensorInfo *input, ITensorInfo *output, unsigned int axis)
132{
133 // Calculate output shape and set if empty
134 const TensorShape output_shape = calculate_output_shape(input->tensor_shape(), axis);
135
136 // Output auto initialization if not yet initialized
137 auto_init_if_empty(*output, output_shape, 1, input->data_type(), input->fixed_point_position());
138
139 unsigned int num_elems_processed_per_iteration = 16 / data_size_from_type(input->data_type());
140
141 // Configure kernel window
142 Window win = calculate_max_window(*input, Steps(num_elems_processed_per_iteration));
143 AccessWindowHorizontal input_access(input, 0, num_elems_processed_per_iteration);
144 AccessWindowHorizontal output_access(output, 0, num_elems_processed_per_iteration);
145
146 bool window_changed = update_window_and_padding(win, input_access, output_access);
147 output_access.set_valid_region(win, ValidRegion(Coordinates(), output->tensor_shape()));
148
149 Status err = (window_changed) ? ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Insufficient Padding!") : Status{};
150
151 return std::make_tuple(err, win);
152}
Georgios Pinitasd9769582017-08-03 10:19:40 +0100153} // namespace
154
155NEReductionOperationKernel::NEReductionOperationKernel()
156 : _input(nullptr), _output(nullptr), _reduction_axis(0), _op(ReductionOperation::SUM_SQUARE), _border_size()
157{
158}
159
160BorderSize NEReductionOperationKernel::border_size() const
161{
162 return _border_size;
163}
164
165void NEReductionOperationKernel::configure(const ITensor *input, ITensor *output, unsigned int axis, ReductionOperation op)
166{
167 ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
Georgios Pinitasd9769582017-08-03 10:19:40 +0100168
John Richardson73d4aef2018-05-08 14:34:33 +0100169 ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), output->info(), axis, op));
Georgios Pinitasd9769582017-08-03 10:19:40 +0100170
171 unsigned int num_elems_processed_per_iteration = 16 / data_size_from_type(input->info()->data_type());
172
173 _input = input;
174 _output = output;
175 _border_size = (axis == 0) ? BorderSize(0, num_elems_processed_per_iteration - (input->info()->dimension(0) % num_elems_processed_per_iteration), 0, 0) : BorderSize();
176 _op = op;
177
178 // Configure kernel window
John Richardson73d4aef2018-05-08 14:34:33 +0100179 auto win_config = validate_and_configure_window(_input->info(), _output->info(), axis);
Georgios Pinitasd9769582017-08-03 10:19:40 +0100180
John Richardson73d4aef2018-05-08 14:34:33 +0100181 ARM_COMPUTE_ERROR_THROW_ON(std::get<0>(win_config));
Georgios Pinitasd9769582017-08-03 10:19:40 +0100182
John Richardson73d4aef2018-05-08 14:34:33 +0100183 INEKernel::configure(std::get<1>(win_config));
184}
185
186Status NEReductionOperationKernel::validate(const ITensorInfo *input, const ITensorInfo *output, unsigned int axis, ReductionOperation op)
187{
188 ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, output, axis, op));
189 ARM_COMPUTE_RETURN_ON_ERROR(std::get<0>(validate_and_configure_window(input->clone().get(), output->clone().get(), axis)));
190
191 return Status{};
Georgios Pinitasd9769582017-08-03 10:19:40 +0100192}
193
Moritz Pflanzerc186b572017-09-07 09:48:04 +0100194void NEReductionOperationKernel::run(const Window &window, const ThreadInfo &info)
Georgios Pinitasd9769582017-08-03 10:19:40 +0100195{
Moritz Pflanzerc186b572017-09-07 09:48:04 +0100196 ARM_COMPUTE_UNUSED(info);
Georgios Pinitasd9769582017-08-03 10:19:40 +0100197 ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
198 ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window);
199
200 switch(_op)
201 {
202 case ReductionOperation::SUM_SQUARE:
203 reduce_sumsq(window, _input, _output, _reduction_axis);
204 break;
205 default:
206 ARM_COMPUTE_ERROR("Unsupported reduction operation.");
207 }
208}