blob: dee1608c435586db71fbf0983e68632da3000537 [file] [log] [blame]
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001/*
Isabella Gottardie6630e42018-01-18 15:50:39 +00002 * Copyright (c) 2017-2018 ARM Limited.
Anthony Barbier6ff3b192017-09-04 18:44:23 +01003 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24#include "arm_compute/core/NEON/kernels/NEIm2ColKernel.h"
25
26#include "arm_compute/core/Error.h"
27#include "arm_compute/core/FixedPoint.h"
28#include "arm_compute/core/Helpers.h"
29#include "arm_compute/core/ITensor.h"
Gian Marco Iodice13edbff2017-06-26 17:20:16 +010030#include "arm_compute/core/Size2D.h"
Anthony Barbier6ff3b192017-09-04 18:44:23 +010031#include "arm_compute/core/TensorInfo.h"
32#include "arm_compute/core/Types.h"
33#include "arm_compute/core/Validate.h"
34
Ioan-Cristian Szabob4e3e1c2017-11-30 17:17:17 +000035#include "arm_compute/core/utils/misc/ShapeCalculator.h"
36
Anthony Barbier6ff3b192017-09-04 18:44:23 +010037#include <arm_neon.h>
38#include <cstddef>
39#include <cstdint>
40#include <cstring>
41#include <tuple>
42
43using namespace arm_compute;
44
45namespace
46{
Ioan-Cristian Szabob4e3e1c2017-11-30 17:17:17 +000047Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info,
48 bool has_bias, bool is_fully_connected, bool is_flatten)
Georgios Pinitasd912fd82017-11-27 21:00:13 +000049{
50 ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QS8, DataType::QASYMM8, DataType::QS16, DataType::F16, DataType::F32);
51 ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
52 ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_FIXED_POINT_POSITION(input, output);
53 ARM_COMPUTE_RETURN_ERROR_ON(input->data_type() == DataType::QASYMM8 && has_bias);
Ioan-Cristian Szabob4e3e1c2017-11-30 17:17:17 +000054
55 if(is_flatten) /* Called by FlattenLayer */
56 {
57 size_t flatten_shape = input->tensor_shape().x() * input->tensor_shape().y() * input->tensor_shape().z();
58 ARM_COMPUTE_RETURN_ERROR_ON(output->dimension(0) != flatten_shape);
59 }
60 else if(!is_fully_connected) /* Called by ConvolutionLayer */
61 {
62 std::pair<unsigned int, unsigned int> out_dims = scaled_dimensions(input->dimension(0), input->dimension(1), kernel_dims.width, kernel_dims.height, conv_info);
63 ARM_COMPUTE_RETURN_ERROR_ON(output->dimension(0) != (input->dimension(2) * kernel_dims.area() + (has_bias ? 1 : 0)));
64 ARM_COMPUTE_RETURN_ERROR_ON(output->dimension(1) != (out_dims.first * out_dims.second));
65 ARM_COMPUTE_RETURN_ERROR_ON(output->dimension(2) != 1);
66 }
67 else /* Called by FullyConnectedLayer */
68 {
69 const int num_batch_dimensions = std::max(0, static_cast<int>(output->tensor_shape().num_dimensions()) - 1);
70 const int num_input_dimensions = input->tensor_shape().num_dimensions() - num_batch_dimensions;
71
72 TensorInfo expected_output = output->clone()->set_tensor_shape(misc::shape_calculator::compute_im2col_shape(input, num_input_dimensions));
73 ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(&expected_output, output);
74 }
Georgios Pinitasd912fd82017-11-27 21:00:13 +000075
Georgios Pinitas631c41a2017-12-06 11:53:03 +000076 return Status{};
Georgios Pinitasd912fd82017-11-27 21:00:13 +000077}
78
Anthony Barbier6ff3b192017-09-04 18:44:23 +010079template <typename T, bool has_pads>
80inline void linearize_volume(const uint8_t *const in_ptr,
81 T *out_ptr,
82 bool has_bias,
83 int top_left_x,
84 int top_left_y,
Gian Marco Iodice13edbff2017-06-26 17:20:16 +010085 int kernel_width,
86 int kernel_height,
Anthony Barbier6ff3b192017-09-04 18:44:23 +010087 int kernel_depth,
88 int input_w,
89 int input_h,
90 int input_stride_x,
91 int input_stride_y,
92 int input_stride_z,
Isabella Gottardie6630e42018-01-18 15:50:39 +000093 int fixed_point_position,
94 int pad_value)
Anthony Barbier6ff3b192017-09-04 18:44:23 +010095{
Gian Marco Iodice13edbff2017-06-26 17:20:16 +010096 const int kernel_size2 = kernel_width * kernel_height;
97 const int x_e = top_left_x + kernel_width;
98 const int y_e = top_left_y + kernel_height;
Anthony Barbier6ff3b192017-09-04 18:44:23 +010099
100 // Linearize volume
101 int d = 0;
102 // This for loop linearize a volume with 3 slices. This allows:
103 // 1) to reduce the iterations of the outer for loop "d"
104 // 2) to have an optimized im2col for the first convolution layer where usually we have 3 IFMs
105 for(; d <= (kernel_depth - 3); d += 3)
106 {
107 for(int y = top_left_y; y < y_e; ++y)
108 {
109 if((y < 0 || y >= input_h) && has_pads)
110 {
Isabella Gottardie6630e42018-01-18 15:50:39 +0000111 // All the values will be the offset (will be zeros when not quantized)
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100112 for(int x = top_left_x; x < x_e; ++x, ++out_ptr)
113 {
Isabella Gottardie6630e42018-01-18 15:50:39 +0000114 *(out_ptr + 0 * kernel_size2) = pad_value;
115 *(out_ptr + 1 * kernel_size2) = pad_value;
116 *(out_ptr + 2 * kernel_size2) = pad_value;
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100117 }
118 }
119 else
120 {
121 for(int x = top_left_x; x < x_e; ++x, ++out_ptr)
122 {
123 if((x < 0 || x >= input_w) && has_pads)
124 {
Isabella Gottardie6630e42018-01-18 15:50:39 +0000125 *(out_ptr + 0 * kernel_size2) = pad_value;
126 *(out_ptr + 1 * kernel_size2) = pad_value;
127 *(out_ptr + 2 * kernel_size2) = pad_value;
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100128 }
129 else
130 {
131 *(out_ptr + 0 * kernel_size2) = *(reinterpret_cast<const T *>(in_ptr + ((d + 0) * input_stride_z + y * input_stride_y + x * input_stride_x)));
132 *(out_ptr + 1 * kernel_size2) = *(reinterpret_cast<const T *>(in_ptr + ((d + 1) * input_stride_z + y * input_stride_y + x * input_stride_x)));
133 *(out_ptr + 2 * kernel_size2) = *(reinterpret_cast<const T *>(in_ptr + ((d + 2) * input_stride_z + y * input_stride_y + x * input_stride_x)));
134 }
135 }
136 }
137 }
138 out_ptr += 2 * kernel_size2;
139 }
140
141 // Left over
142 for(; d < kernel_depth; d++)
143 {
144 for(int y = top_left_y; y < y_e; ++y)
145 {
146 if((y < 0 || y >= input_h) && has_pads)
147 {
Isabella Gottardie6630e42018-01-18 15:50:39 +0000148 // All the values will be the offset (will be zeros when not quantized)
149 memset(out_ptr, pad_value, kernel_width * sizeof(T));
Gian Marco Iodice13edbff2017-06-26 17:20:16 +0100150 out_ptr += kernel_width;
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100151 }
152 else
153 {
154 for(int x = top_left_x; x < x_e; ++x, ++out_ptr)
155 {
156 if((x < 0 || x >= input_w) && has_pads)
157 {
Isabella Gottardie6630e42018-01-18 15:50:39 +0000158 *out_ptr = pad_value;
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100159 }
160 else
161 {
162 *out_ptr = *(reinterpret_cast<const T *>(in_ptr + (d * input_stride_z + y * input_stride_y + x * input_stride_x)));
163 }
164 }
165 }
166 }
167 }
168
169 // Append 1 if the convolution layer has biases
170 if(has_bias)
171 {
Gian Marco Iodice2bbd9642017-07-04 16:46:32 +0100172 if(std::is_same<T, qint8_t>::value)
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100173 {
Georgios Pinitas21efeb42017-07-04 12:47:17 +0100174 *out_ptr = sqcvt_qs8_f32(1.0f, fixed_point_position);
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100175 }
Gian Marco Iodice2bbd9642017-07-04 16:46:32 +0100176 else if(std::is_same<T, qint16_t>::value)
177 {
Georgios Pinitas21efeb42017-07-04 12:47:17 +0100178 *out_ptr = sqcvt_qs16_f32(1.0f, fixed_point_position);
Gian Marco Iodice2bbd9642017-07-04 16:46:32 +0100179 }
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100180 else
181 {
182 *out_ptr = static_cast<T>(1);
183 }
184 }
185}
186} // namespace
187
188template <typename T, bool has_pads>
189void NEIm2ColKernel::run_generic(const Window &window)
190{
191 ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
192 ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window);
193
194 const int kernel_depth = _input->info()->dimension(2);
195 const int input_w = _input->info()->dimension(0);
196 const int input_h = _input->info()->dimension(1);
197 const int input_stride_x = _input->info()->strides_in_bytes().x();
198 const int input_stride_y = _input->info()->strides_in_bytes().y();
199 const int input_stride_z = _input->info()->strides_in_bytes().z();
Isabella Gottardie6630e42018-01-18 15:50:39 +0000200 const int offset = is_data_type_quantized(_input->info()->data_type()) ? _input->info()->quantization_info().offset : 0;
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100201
Jaroslaw Rzepeckia1ed41f2017-10-13 11:13:58 +0100202 int pad_left = 0;
203 int pad_top = 0;
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100204 int stride_x = 0;
205 int stride_y = 0;
Jaroslaw Rzepeckia1ed41f2017-10-13 11:13:58 +0100206 pad_left = _conv_info.pad_left();
207 pad_top = _conv_info.pad_top();
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100208 std::tie(stride_x, stride_y) = _conv_info.stride();
209
210 // Setup input window
Jaroslaw Rzepeckia1ed41f2017-10-13 11:13:58 +0100211 const int start_x = -pad_left;
212 const int start_y = -pad_top;
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100213
214 Window window_in(window);
215 // The first three dimensions of the input are increased by the inner loops
216 window_in.set(Window::DimX, Window::Dimension(0, 0, 0));
217 window_in.set(Window::DimY, Window::Dimension(0, 0, 0));
218 window_in.set(Window::DimZ, Window::Dimension(0, 0, 0));
219
220 // Setup output window
221 Window window_out(window);
222 window_out.set(Window::DimX, Window::Dimension(0, _output->info()->dimension(0), _output->info()->strides_in_bytes().y() / _output->info()->element_size()));
223 window_out.set(Window::DimY, Window::Dimension(window.y().start() * _convolved_dims.first, window.y().end() * _convolved_dims.first, _convolved_dims.first));
224 window_out.set(Window::DimZ, Window::Dimension(0, 1, 1));
225
226 // Create iterators
227 Iterator in(_input, window_in);
228 Iterator out(_output, window_out);
229
230 execute_window_loop(window, [&](const Coordinates & id)
231 {
232 const int top_left_x = id.x() * stride_x + start_x;
233 const int top_left_y = id.y() * stride_y + start_y;
234
235 // Get pointers
236 const uint8_t *const input_ptr = in.ptr();
237 auto output_ptr = reinterpret_cast<T *>(out.ptr());
238
239 // Linearize volume
240 linearize_volume<T, has_pads>(input_ptr,
241 output_ptr,
242 _has_bias,
243 top_left_x,
244 top_left_y,
Gian Marco Iodice13edbff2017-06-26 17:20:16 +0100245 static_cast<int>(_kernel_width),
246 static_cast<int>(_kernel_height),
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100247 kernel_depth,
248 input_w,
249 input_h,
250 input_stride_x,
251 input_stride_y,
252 input_stride_z,
Isabella Gottardie6630e42018-01-18 15:50:39 +0000253 _input->info()->fixed_point_position(),
254 offset);
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100255 },
256 in, out);
257}
258
259template <typename T>
260void NEIm2ColKernel::run_reduced(const Window &window)
261{
262 const size_t in_width = _input->info()->dimension(0);
263 const size_t in_height = _input->info()->dimension(1);
264 const size_t out_step_x = in_width * _input->info()->element_size();
265 const size_t out_step_y = out_step_x * in_height;
266 const size_t out_width = _output->info()->dimension(0);
267
268 Window in_window(window);
269 in_window.set(Window::DimX, Window::Dimension(0, 1, 1));
270
271 Window out_window;
SiCong Li86b53332017-08-23 11:02:43 +0100272 out_window.use_tensor_dimensions(_output->info()->tensor_shape());
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100273 out_window.set(Window::DimX, Window::Dimension(out_window.x().start(), out_window.x().end(), in_width));
274
275 Window in_slice = in_window.first_slice_window_3D();
276 Window out_slice = out_window.first_slice_window_1D();
277
278 do
279 {
280 Iterator in(_input, in_slice);
281 Iterator out(_output, out_slice);
282
283 uint8_t *out_ptr = out.ptr();
284
285 execute_window_loop(in_slice, [&](const Coordinates & id)
286 {
287 memcpy(out_ptr + id.y() * out_step_x + id.z() * out_step_y, in.ptr(), out_step_x);
288 },
289 in);
290
291 // Add bias
292 if(_has_bias)
293 {
Gian Marco Iodice2bbd9642017-07-04 16:46:32 +0100294 if(std::is_same<T, qint8_t>::value)
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100295 {
Georgios Pinitas21efeb42017-07-04 12:47:17 +0100296 *(reinterpret_cast<T *>(out_ptr) + out_width - 1) = sqcvt_qs8_f32(1.0f, _input->info()->fixed_point_position());
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100297 }
Gian Marco Iodice2bbd9642017-07-04 16:46:32 +0100298 else if(std::is_same<T, qint16_t>::value)
299 {
Georgios Pinitas21efeb42017-07-04 12:47:17 +0100300 *(reinterpret_cast<T *>(out_ptr) + out_width - 1) = sqcvt_qs16_f32(1.0f, _input->info()->fixed_point_position());
Gian Marco Iodice2bbd9642017-07-04 16:46:32 +0100301 }
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100302 else
303 {
304 *(reinterpret_cast<T *>(out_ptr) + out_width - 1) = static_cast<T>(1);
305 }
306 }
307 }
308 while(in_window.slide_window_slice_3D(in_slice) && out_window.slide_window_slice_1D(out_slice));
309}
310
311NEIm2ColKernel::NEIm2ColKernel()
Gian Marco Iodice13edbff2017-06-26 17:20:16 +0100312 : _func(), _input(nullptr), _output(nullptr), _convolved_dims(), _conv_info(), _kernel_width(0), _kernel_height(0), _has_bias(false)
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100313{
314}
315
Ioan-Cristian Szabob4e3e1c2017-11-30 17:17:17 +0000316void NEIm2ColKernel::configure(const ITensor *input, ITensor *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info,
317 bool has_bias, bool is_fully_connected, bool is_flatten)
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100318{
Georgios Pinitasd912fd82017-11-27 21:00:13 +0000319 ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
320
321 // Perform validation step
Georgios Pinitasd8734b52017-12-22 15:27:52 +0000322 ARM_COMPUTE_UNUSED(is_fully_connected, is_flatten);
Ioan-Cristian Szabob4e3e1c2017-11-30 17:17:17 +0000323 ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), output->info(), kernel_dims, conv_info, has_bias, is_fully_connected, is_flatten));
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100324
325 _input = input;
326 _output = output;
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100327 _conv_info = conv_info;
Gian Marco Iodice13edbff2017-06-26 17:20:16 +0100328 _kernel_width = kernel_dims.width;
329 _kernel_height = kernel_dims.height,
330 _convolved_dims = scaled_dimensions(input->info()->dimension(0), input->info()->dimension(1),
331 _kernel_width, _kernel_height,
332 _conv_info);
333 _has_bias = has_bias;
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100334
Moritz Pflanzer484e7b32017-08-09 11:43:18 +0100335 unsigned int stride_x = 0;
336 unsigned int stride_y = 0;
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100337 std::tie(stride_x, stride_y) = conv_info.stride();
338
339 bool run_img2col_reduced = (output->info()->dimension(0) == (input->info()->dimension(0) * input->info()->dimension(1) * input->info()->dimension(2))) && (TensorShape::num_max_dimensions >= 4)
340 && (std::equal(input->info()->tensor_shape().cbegin() + 3,
341 input->info()->tensor_shape().cend(),
342 output->info()->tensor_shape().cbegin() + 1))
Jaroslaw Rzepeckia1ed41f2017-10-13 11:13:58 +0100343 && ((stride_x == 1) && (stride_y == 1) && !conv_info.has_padding());
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100344
345 Window window = calculate_max_window(*input->info(), Steps());
346
347 if(run_img2col_reduced)
348 {
349 switch(_input->info()->data_type())
350 {
351 case DataType::F32:
352 _func = &NEIm2ColKernel::run_reduced<float>;
353 break;
Ioan-Cristian Szabo5edbd1c2017-11-13 13:34:08 +0000354#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
Pablo Tello659abc02017-06-22 16:00:16 +0100355 case DataType::F16:
356 _func = &NEIm2ColKernel::run_reduced<float16_t>;
357 break;
Ioan-Cristian Szabo5edbd1c2017-11-13 13:34:08 +0000358#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100359 case DataType::QS8:
360 _func = &NEIm2ColKernel::run_reduced<qint8_t>;
361 break;
Gian Marco Iodice2bbd9642017-07-04 16:46:32 +0100362 case DataType::QS16:
363 _func = &NEIm2ColKernel::run_reduced<qint16_t>;
364 break;
Isabella Gottardie6630e42018-01-18 15:50:39 +0000365 case DataType::QASYMM8:
366 _func = &NEIm2ColKernel::run_reduced<qasymm8_t>;
367 break;
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100368 default:
369 ARM_COMPUTE_ERROR("Data type not supported");
370 break;
371 }
372 }
373 else
374 {
375 switch(_input->info()->data_type())
376 {
377 case DataType::F32:
Jaroslaw Rzepeckia1ed41f2017-10-13 11:13:58 +0100378 _func = (!conv_info.has_padding()) ? &NEIm2ColKernel::run_generic<float, false> : &NEIm2ColKernel::run_generic<float, true>;
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100379 break;
Ioan-Cristian Szabo5edbd1c2017-11-13 13:34:08 +0000380#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
Pablo Tello659abc02017-06-22 16:00:16 +0100381 case DataType::F16:
Jaroslaw Rzepeckia1ed41f2017-10-13 11:13:58 +0100382 _func = (!conv_info.has_padding()) ? &NEIm2ColKernel::run_generic<float16_t, false> : &NEIm2ColKernel::run_generic<float16_t, true>;
Pablo Tello659abc02017-06-22 16:00:16 +0100383 break;
Ioan-Cristian Szabo5edbd1c2017-11-13 13:34:08 +0000384#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100385 case DataType::QS8:
Jaroslaw Rzepeckia1ed41f2017-10-13 11:13:58 +0100386 _func = (!conv_info.has_padding()) ? &NEIm2ColKernel::run_generic<qint8_t, false> : &NEIm2ColKernel::run_generic<qint8_t, true>;
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100387 break;
Gian Marco Iodice2bbd9642017-07-04 16:46:32 +0100388 case DataType::QS16:
Jaroslaw Rzepeckia1ed41f2017-10-13 11:13:58 +0100389 _func = (!conv_info.has_padding()) ? &NEIm2ColKernel::run_generic<qint16_t, false> : &NEIm2ColKernel::run_generic<qint16_t, true>;
Gian Marco Iodice2bbd9642017-07-04 16:46:32 +0100390 break;
Isabella Gottardie6630e42018-01-18 15:50:39 +0000391 case DataType::QASYMM8:
392 _func = (!conv_info.has_padding()) ? &NEIm2ColKernel::run_generic<qasymm8_t, false> : &NEIm2ColKernel::run_generic<qasymm8_t, true>;
393 break;
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100394 default:
395 ARM_COMPUTE_ERROR("Data type not supported");
396 break;
397 }
398 window.set(Window::DimX, Window::Dimension(0, _convolved_dims.first, 1));
399 window.set(Window::DimY, Window::Dimension(0, _convolved_dims.second, 1));
400 window.set(Window::DimZ, Window::Dimension(0, 1, 1));
401 }
402
403 // The NEIm2ColKernel doesn't need padding so update_window_and_padding() can be skipped
404 output->info()->set_valid_region(ValidRegion(Coordinates(), output->info()->tensor_shape()));
405
406 IKernel::configure(window);
407}
408
Ioan-Cristian Szabob4e3e1c2017-11-30 17:17:17 +0000409Status NEIm2ColKernel::validate(const ITensorInfo *input, const ITensorInfo *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info,
410 bool has_bias, bool is_fully_connected, bool is_flatten)
Georgios Pinitasd912fd82017-11-27 21:00:13 +0000411{
Ioan-Cristian Szabob4e3e1c2017-11-30 17:17:17 +0000412 ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, output, kernel_dims, conv_info, has_bias, is_fully_connected, is_flatten));
Georgios Pinitas631c41a2017-12-06 11:53:03 +0000413 return Status{};
Georgios Pinitasd912fd82017-11-27 21:00:13 +0000414}
415
Moritz Pflanzerc186b572017-09-07 09:48:04 +0100416void NEIm2ColKernel::run(const Window &window, const ThreadInfo &info)
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100417{
Moritz Pflanzerc186b572017-09-07 09:48:04 +0100418 ARM_COMPUTE_UNUSED(info);
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100419 ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
420 ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window);
421
422 (this->*_func)(window);
423}