blob: 70d66897310cdec74b693563ac8f8941ff6d3e64 [file] [log] [blame]
Georgios Pinitas4074c992018-01-30 18:13:46 +00001/*
Michele Di Giorgiod9eaf612020-07-08 11:12:57 +01002 * Copyright (c) 2018-2019 Arm Limited.
Georgios Pinitas4074c992018-01-30 18:13:46 +00003 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24
25#pragma once
26
Georgios Pinitasa4bba9c2019-04-02 15:27:52 +010027#include <arm_neon.h>
Georgios Pinitas30271c72019-06-24 14:56:34 +010028#include "activation.hpp"
29#include "padding.hpp"
Georgios Pinitas47d39dc2019-03-11 14:03:23 +000030
Georgios Pinitas4074c992018-01-30 18:13:46 +000031namespace depthwise
32{
33
Georgios Pinitas47d39dc2019-03-11 14:03:23 +000034namespace nck = neon_convolution_kernels;
35
Georgios Pinitas4074c992018-01-30 18:13:46 +000036class IDepthwiseConvolution
37{
Georgios Pinitasbe0ae932018-03-13 13:08:12 +000038 public:
Georgios Pinitas4074c992018-01-30 18:13:46 +000039 virtual ~IDepthwiseConvolution() = default;
Georgios Pinitas47d39dc2019-03-11 14:03:23 +000040
Georgios Pinitas1a57ad12019-01-09 16:11:51 +000041 virtual int output_size(
42 int dim_size,
43 unsigned int padding_before,
44 unsigned int padding_after
45 ) const = 0;
46
Georgios Pinitas47d39dc2019-03-11 14:03:23 +000047 /* Set input tensor and stride. */
48 virtual void set_input(const void *inptr) = 0;
49 virtual void set_input(const void *inptr, int column_stride) = 0;
50 virtual void set_input(const void *inptr, int row_stride, int column_stride) = 0;
51 virtual void set_input(const void *inptr, int batch_stride, int row_stride, int column_stride) = 0;
52
53 /* Set output tensor and stride. */
54 virtual void set_output(void *outptr) = 0;
55 virtual void set_output(void *outptr, int column_stride) = 0;
56 virtual void set_output(void *outptr, int row_stride, int column_stride) = 0;
57 virtual void set_output(void *outptr, int batch_stride, int row_stride, int column_stride) = 0;
58
59 /* Weights and biases are re-ordered to improve memory access patterns. Use
60 * these methods to determine the size of the re-pack buffer and to set the
61 * address (and implicitly reorder the weights and biases into) the buffer.
62 */
63 virtual size_t get_packed_params_size(void) const = 0;
64 virtual void set_packed_params_buffer(void *) = 0;
65
66 virtual void pack_params(const void *weights, const void *biases=nullptr) const = 0;
67 virtual void pack_params(void *buffer, const void *weights, const void *biases=nullptr) const = 0;
68 virtual void pack_params(
69 void *buffer,
70 const void* weights,
71 unsigned int weight_row_stride,
72 unsigned int weight_col_stride,
73 const void *biases=nullptr
74 ) const = 0;
75
76 /* Working space is used to pad tensors on the fly. Before running any
77 * inference check the amount of space required, allocate and provide a
78 * pointer to the convolution engine.
79 */
80 virtual size_t get_working_space_size(unsigned int nthreads=1) const = 0;
81 virtual void set_working_space(void *) = 0;
82
Georgios Pinitas4074c992018-01-30 18:13:46 +000083 virtual unsigned int get_window(void) const = 0;
Georgios Pinitas47d39dc2019-03-11 14:03:23 +000084 virtual void run(
85 unsigned int start,
86 unsigned int stop,
87 unsigned int threadid=0
88 ) = 0;
Georgios Pinitas4074c992018-01-30 18:13:46 +000089};
90
91template <
Georgios Pinitas47d39dc2019-03-11 14:03:23 +000092 unsigned int OutputTileRows, unsigned int OutputTileCols,
93 unsigned int KernelRows, unsigned int KernelCols,
94 unsigned int StrideRows, unsigned int StrideCols,
95 typename TIn, typename TBias, typename TOut,
96 typename Derived
Georgios Pinitas4074c992018-01-30 18:13:46 +000097>
Georgios Pinitas47d39dc2019-03-11 14:03:23 +000098class DepthwiseConvolutionBase : public IDepthwiseConvolution
Georgios Pinitas4074c992018-01-30 18:13:46 +000099{
100 public:
Georgios Pinitas4074c992018-01-30 18:13:46 +0000101 // Information about the specific convolution instance
Georgios Pinitas47d39dc2019-03-11 14:03:23 +0000102 using InputType = TIn;
103 using BiasType = TBias;
104 using OutputType = TOut;
Georgios Pinitas4074c992018-01-30 18:13:46 +0000105 static constexpr int output_tile_rows = OutputTileRows;
106 static constexpr int output_tile_cols = OutputTileCols;
107 static constexpr int kernel_rows = KernelRows;
108 static constexpr int kernel_cols = KernelCols;
109 static constexpr int stride_rows = StrideRows;
110 static constexpr int stride_cols = StrideCols;
Georgios Pinitasbe0ae932018-03-13 13:08:12 +0000111 static constexpr int inner_tile_rows = stride_rows * (output_tile_rows - 1) + kernel_rows;
112 static constexpr int inner_tile_cols = stride_cols * (output_tile_cols - 1) + kernel_cols;
Georgios Pinitas4074c992018-01-30 18:13:46 +0000113
114 /** Create a new depthwise convolution engine.
115 *
Georgios Pinitas47d39dc2019-03-11 14:03:23 +0000116 * @param[in] n_batches Number of batches tensors.
117 * @param[in] n_input_rows Number of rows in input tensor.
118 * @param[in] n_input_cols Number of columns in input tensor.
119 * @param[in] n_channels Number of channels in input and output tensors.
Georgios Pinitas4074c992018-01-30 18:13:46 +0000120 */
Georgios Pinitas47d39dc2019-03-11 14:03:23 +0000121 DepthwiseConvolutionBase(
122 int n_batches, int n_input_rows, int n_input_cols, int n_channels,
123 nck::ActivationFunction activation,
Georgios Pinitas1a57ad12019-01-09 16:11:51 +0000124 unsigned int padding_top,
125 unsigned int padding_left,
126 unsigned int padding_bottom,
Georgios Pinitas47d39dc2019-03-11 14:03:23 +0000127 unsigned int padding_right
Georgios Pinitas4074c992018-01-30 18:13:46 +0000128 );
129
Georgios Pinitas30271c72019-06-24 14:56:34 +0100130 /** Create a new depthwise convolution engine.
131 *
132 * @param[in] n_batches Number of batches tensors.
133 * @param[in] n_input_rows Number of rows in input tensor.
134 * @param[in] n_input_cols Number of columns in input tensor.
135 * @param[in] n_channels Number of channels in input and output tensors.
136 */
137 DepthwiseConvolutionBase(
138 int n_batches, int n_input_rows, int n_input_cols, int n_channels,
139 int n_output_rows, int n_output_cols,
140 nck::ActivationFunction activation,
141 unsigned int padding_top,
142 unsigned int padding_left,
143 unsigned int padding_bottom,
144 unsigned int padding_right
145 );
146
Georgios Pinitas4074c992018-01-30 18:13:46 +0000147 // Cannot copy or move a DepthwiseConvolution.
Georgios Pinitas47d39dc2019-03-11 14:03:23 +0000148 DepthwiseConvolutionBase(DepthwiseConvolutionBase&) = delete;
149 DepthwiseConvolutionBase operator=(DepthwiseConvolutionBase&) = delete;
150
151 /* Set input tensor and stride. */
152 void set_input(const void *inptr) override;
153 void set_input(const void *inptr, int column_stride) override;
154 void set_input(const void *inptr, int row_stride, int column_stride) override;
155 void set_input(const void *inptr, int batch_stride, int row_stride, int column_stride) override;
156
157 /* Set output tensor and stride. */
158 void set_output(void *outptr) override;
159 void set_output(void *outptr, int column_stride) override;
160 void set_output(void *outptr, int row_stride, int column_stride) override;
161 void set_output(void *outptr, int batch_stride, int row_stride, int column_stride) override;
Georgios Pinitas4074c992018-01-30 18:13:46 +0000162
163 /** Get the number of output rows/columns.
164 *
165 * @param[in] dim_size Number of elements in the dimension (rows/columns)
166 * @param[in] same_padding True if the padding is SAME, otherwise false.
167 */
Georgios Pinitas1a57ad12019-01-09 16:11:51 +0000168 static int get_output_size(
Georgios Pinitas47d39dc2019-03-11 14:03:23 +0000169 int dim_size, unsigned int padding_before, unsigned int padding_after
Georgios Pinitas1a57ad12019-01-09 16:11:51 +0000170 );
Georgios Pinitas4074c992018-01-30 18:13:46 +0000171
Georgios Pinitas1a57ad12019-01-09 16:11:51 +0000172 int output_size(
Georgios Pinitas47d39dc2019-03-11 14:03:23 +0000173 int dim_size, unsigned int padding_before, unsigned int padding_after
174 ) const override;
Georgios Pinitas1a57ad12019-01-09 16:11:51 +0000175
Georgios Pinitas47d39dc2019-03-11 14:03:23 +0000176 /* Determine how much memory is required to store the packed weights and
177 * biases.
Georgios Pinitasa799ce02018-09-12 20:11:34 +0100178 */
Georgios Pinitas47d39dc2019-03-11 14:03:23 +0000179 size_t get_packed_params_size(void) const override;
180
181 /* Set the buffer for the packed weights and biases, and perform the
182 * packing.
183 */
184 void set_packed_params_buffer(void *buffer) override;
185
186 void pack_params(const void *weights, const void *biases=nullptr) const override;
187
188 void pack_params(
189 void *buffer,
190 const void *weights,
191 const void *biases=nullptr
192 ) const override;
193
194 void pack_params(
195 void *buffer,
196 const void *weights,
197 unsigned int weight_row_stride,
198 unsigned int weight_col_stride,
199 const void *biases=nullptr
200 ) const override;
201
202 /** Query the amount of working space required.
203 * @param[in] The largest number of threads which will be used to execute
204 * the kernel.
205 */
206 size_t get_working_space_size(unsigned int n_threads=1) const override;
207
208 /** Set the working space buffer.
209 */
210 void set_working_space(void *buffer) override;
Georgios Pinitasa799ce02018-09-12 20:11:34 +0100211
Georgios Pinitas4074c992018-01-30 18:13:46 +0000212 /** Get the window of work to be performed by an instance of the operator.
213 */
214 unsigned int get_window(void) const override;
215
216 /** Perform a portion of the work associated with the operator.
217 *
218 * Will perform the window of work described by $[start, stop)$.
219 *
220 * @param[in] start Start of the window of work to perform.
221 * @param[in] stop End of the work to perform.
Georgios Pinitas47d39dc2019-03-11 14:03:23 +0000222 * @param[in] ID of the thread performing the work.
Georgios Pinitas4074c992018-01-30 18:13:46 +0000223 */
Georgios Pinitas47d39dc2019-03-11 14:03:23 +0000224 void run(
225 unsigned int start,
226 unsigned int stop,
227 unsigned int threadid=0
228 ) override;
Georgios Pinitas4074c992018-01-30 18:13:46 +0000229
230 protected:
Georgios Pinitas47d39dc2019-03-11 14:03:23 +0000231 /** Get the value to use to pad the tensor.
232 */
233 TIn _input_padding_value(void) const;
234
235 /** Implementation of the parameter packing.
236 */
237 void _pack_params(
238 void *buffer,
239 const void *weights,
240 unsigned int weight_row_stride,
241 unsigned int weight_col_stride,
242 const void *biases=nullptr
243 ) const;
244
Georgios Pinitas4074c992018-01-30 18:13:46 +0000245 /** Process a tile-row of the tensors.
246 */
Georgios Pinitas47d39dc2019-03-11 14:03:23 +0000247 void process_tile_row(
248 unsigned int threadid,
Georgios Pinitas1a57ad12019-01-09 16:11:51 +0000249 int n_channels,
Georgios Pinitas47d39dc2019-03-11 14:03:23 +0000250 const void* packed_params,
251 const InputType* inptr,
252 OutputType* outptr,
Georgios Pinitas1a57ad12019-01-09 16:11:51 +0000253 int row_pad_in_top,
254 int row_pad_in_left,
255 int row_pad_in_bottom,
256 int row_pad_out_bottom,
257 int n_tiles,
258 int n_input_cols,
Georgios Pinitas47d39dc2019-03-11 14:03:23 +0000259 int n_output_cols
Georgios Pinitas4074c992018-01-30 18:13:46 +0000260 );
261
Georgios Pinitas47d39dc2019-03-11 14:03:23 +0000262 /** Process a single tile of the tensor.
Georgios Pinitas4074c992018-01-30 18:13:46 +0000263 *
Georgios Pinitas47d39dc2019-03-11 14:03:23 +0000264 * This method will apply input/output padding (if required) and call the
265 * depthwise tile implementation.
Georgios Pinitas4074c992018-01-30 18:13:46 +0000266 */
Georgios Pinitas47d39dc2019-03-11 14:03:23 +0000267 void process_tile(
268 unsigned int threadid,
Georgios Pinitas1a57ad12019-01-09 16:11:51 +0000269 int n_channels,
Georgios Pinitas47d39dc2019-03-11 14:03:23 +0000270 const void* packed_params,
271 const InputType* inptr,
272 OutputType* outptr,
273 int pad_in_top,
274 int pad_in_left,
275 int pad_in_bottom,
276 int pad_in_right,
277 int pad_out_bottom,
278 int pad_out_right
Georgios Pinitas4074c992018-01-30 18:13:46 +0000279 );
280
Georgios Pinitas47d39dc2019-03-11 14:03:23 +0000281 /** Perform depthwise convolution on a single tile.
Georgios Pinitas4074c992018-01-30 18:13:46 +0000282 */
Georgios Pinitas47d39dc2019-03-11 14:03:23 +0000283 template <nck::ActivationFunction Activation>
284 void execute_tile(
285 int n_channels,
286 const void* packed_params,
287 const InputType* inptr,
288 unsigned int in_row_stride,
289 unsigned int in_col_stride,
290 OutputType* outptr,
291 unsigned int out_row_stride,
292 unsigned int out_col_stride
293 );
294
Georgios Pinitasa4bba9c2019-04-02 15:27:52 +0100295 template <nck::ActivationFunction Activation>
296 void execute_tile(
297 int n_channels,
298 const void* packed_params,
299 const InputType* inptrs[inner_tile_rows][inner_tile_cols],
300 OutputType* outptrs[output_tile_rows][output_tile_cols]
301 );
302
Georgios Pinitas47d39dc2019-03-11 14:03:23 +0000303 int n_channels(void) const;
Georgios Pinitas4074c992018-01-30 18:13:46 +0000304
305 private:
306 // Member variables of instances of a convolution engine.
Georgios Pinitas47d39dc2019-03-11 14:03:23 +0000307 const InputType* _input;
308 OutputType* _output;
309 void* _packed_parameters;
310 void* _working_space; // Per-thread working space
Georgios Pinitas4074c992018-01-30 18:13:46 +0000311 const int _n_batches, _n_input_rows, _n_input_cols, _n_channels,
312 _n_output_rows, _n_output_cols, _n_tile_rows, _n_tile_cols;
Georgios Pinitas1a57ad12019-01-09 16:11:51 +0000313 const unsigned int _padding_top, _padding_left, _padding_bottom, _padding_right;
Georgios Pinitas47d39dc2019-03-11 14:03:23 +0000314 const nck::ActivationFunction _activation;
Georgios Pinitasbe0ae932018-03-13 13:08:12 +0000315
316 // Stride information for a convolution instance
Georgios Pinitas47d39dc2019-03-11 14:03:23 +0000317 int _input_col_stride, _input_row_stride, _input_batch_stride;
Georgios Pinitas47d39dc2019-03-11 14:03:23 +0000318 int _output_col_stride, _output_row_stride, _output_batch_stride;
Georgios Pinitas47d39dc2019-03-11 14:03:23 +0000319
320 // Methods for getting access to working space
321 size_t _get_input_working_space_size(void) const;
322 size_t _get_output_working_space_size(void) const;
323
324 void *_get_input_working_space(unsigned int threadid) const;
325 void *_get_output_working_space(unsigned int threadid) const;
Georgios Pinitas4074c992018-01-30 18:13:46 +0000326};
327
Georgios Pinitas47d39dc2019-03-11 14:03:23 +0000328
329template <
330 unsigned int OutputTileRows, unsigned int OutputTileCols,
331 unsigned int KernelRows, unsigned int KernelCols,
332 unsigned int StrideRows, unsigned int StrideCols,
333 typename TIn, typename TBias, typename TOut
334>
335class DepthwiseConvolution : public DepthwiseConvolutionBase<
336 OutputTileRows, OutputTileCols,
337 KernelRows, KernelCols,
338 StrideRows, StrideCols,
339 TIn, TBias, TOut,
340 DepthwiseConvolution<
341 OutputTileRows, OutputTileCols,
342 KernelRows, KernelCols,
343 StrideRows, StrideCols,
344 TIn, TBias, TOut
345 >
346>
347{
348 using Base = DepthwiseConvolutionBase<
349 OutputTileRows, OutputTileCols,
350 KernelRows, KernelCols,
351 StrideRows, StrideCols,
352 TIn, TBias, TOut,
353 DepthwiseConvolution<
354 OutputTileRows, OutputTileCols,
355 KernelRows, KernelCols,
356 StrideRows, StrideCols,
357 TIn, TBias, TOut
358 > >;
359 friend Base;
360 using InputType = typename Base::InputType;
361 using OutputType = typename Base::OutputType;
362
363 public:
364 using Base::DepthwiseConvolutionBase;
365
366 protected:
367 template <nck::ActivationFunction Activation>
368 void execute_tile(
369 int n_channels,
370 const void* packed_params,
371 const TIn* inptr,
372 unsigned int in_row_stride,
373 unsigned int in_col_stride,
374 TOut* outptr,
375 unsigned int out_row_stride,
376 unsigned int out_col_stride
377 );
Georgios Pinitasa4bba9c2019-04-02 15:27:52 +0100378
379 template <nck::ActivationFunction Activation>
380 void execute_tile(
381 int n_channels,
382 const void* packed_params,
383 const InputType* inptrs[Base::inner_tile_rows][Base::inner_tile_cols],
384 OutputType* outptrs[Base::output_tile_rows][Base::output_tile_cols]
385 );
Georgios Pinitas47d39dc2019-03-11 14:03:23 +0000386};
387
388
389template <
390 unsigned int OutputTileRows, unsigned int OutputTileCols,
391 unsigned int KernelRows, unsigned int KernelCols,
392 unsigned int StrideRows, unsigned int StrideCols
393>
394class DepthwiseConvolution<
395 OutputTileRows, OutputTileCols,
396 KernelRows, KernelCols,
397 StrideRows, StrideCols,
398 float, float, float
399> : public DepthwiseConvolutionBase<
400 OutputTileRows, OutputTileCols,
401 KernelRows, KernelCols,
402 StrideRows, StrideCols,
403 float, float, float,
404 DepthwiseConvolution<
405 OutputTileRows, OutputTileCols,
406 KernelRows, KernelCols,
407 StrideRows, StrideCols,
408 float, float, float
409 >
410>
411{
412 using Base = DepthwiseConvolutionBase<
413 OutputTileRows, OutputTileCols,
414 KernelRows, KernelCols,
415 StrideRows, StrideCols,
416 float, float, float,
417 DepthwiseConvolution<
418 OutputTileRows, OutputTileCols,
419 KernelRows, KernelCols,
420 StrideRows, StrideCols,
421 float, float, float
422 > >;
423 friend Base;
424 using InputType = typename Base::InputType;
425 using OutputType = typename Base::OutputType;
426
427 public:
428 DepthwiseConvolution(
429 int n_batches, int n_input_rows, int n_input_cols, int n_channels,
430 nck::ActivationFunction activation,
431 unsigned int padding_top,
432 unsigned int padding_left,
433 unsigned int padding_bottom,
434 unsigned int padding_right
435 );
436
Georgios Pinitas30271c72019-06-24 14:56:34 +0100437 DepthwiseConvolution(
438 int n_batches, int n_input_rows, int n_input_cols, int n_channels,
439 int n_output_rows, int n_output_cols,
440 nck::ActivationFunction activation,
441 unsigned int padding_top,
442 unsigned int padding_left,
443 unsigned int padding_bottom,
444 unsigned int padding_right
445 );
446
Georgios Pinitas47d39dc2019-03-11 14:03:23 +0000447 protected:
448 template <nck::ActivationFunction Activation>
449 void execute_tile(
450 int n_channels,
451 const void* packed_params,
452 const float* inptr,
453 unsigned int in_row_stride,
454 unsigned int in_col_stride,
455 float* outptr,
456 unsigned int out_row_stride,
457 unsigned int out_col_stride
458 );
Georgios Pinitasa4bba9c2019-04-02 15:27:52 +0100459
460 template <nck::ActivationFunction Activation>
461 void execute_tile(
462 int n_channels,
463 const void* packed_params,
464 const float* inptrs[Base::inner_tile_rows][Base::inner_tile_cols],
465 float* outptrs[Base::output_tile_rows][Base::output_tile_cols]
466 );
Georgios Pinitas47d39dc2019-03-11 14:03:23 +0000467};
468
469#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
470template <
471 unsigned int OutputTileRows, unsigned int OutputTileCols,
472 unsigned int KernelRows, unsigned int KernelCols,
473 unsigned int StrideRows, unsigned int StrideCols
474>
475class DepthwiseConvolution<
476 OutputTileRows, OutputTileCols,
477 KernelRows, KernelCols,
478 StrideRows, StrideCols,
479 float16_t, float16_t, float16_t
480> : public DepthwiseConvolutionBase<
481 OutputTileRows, OutputTileCols,
482 KernelRows, KernelCols,
483 StrideRows, StrideCols,
484 float16_t, float16_t, float16_t,
485 DepthwiseConvolution<
486 OutputTileRows, OutputTileCols,
487 KernelRows, KernelCols,
488 StrideRows, StrideCols,
489 float16_t, float16_t, float16_t
490 >
491>
492{
493 using Base = DepthwiseConvolutionBase<
494 OutputTileRows, OutputTileCols,
495 KernelRows, KernelCols,
496 StrideRows, StrideCols,
497 float16_t, float16_t, float16_t,
498 DepthwiseConvolution<
499 OutputTileRows, OutputTileCols,
500 KernelRows, KernelCols,
501 StrideRows, StrideCols,
502 float16_t, float16_t, float16_t
503 > >;
504 friend Base;
505 using InputType = typename Base::InputType;
506 using OutputType = typename Base::OutputType;
507
508 public:
509 DepthwiseConvolution(
510 int n_batches, int n_input_rows, int n_input_cols, int n_channels,
511 nck::ActivationFunction activation,
512 unsigned int padding_top,
513 unsigned int padding_left,
514 unsigned int padding_bottom,
515 unsigned int padding_right
516 );
517
Georgios Pinitas30271c72019-06-24 14:56:34 +0100518 DepthwiseConvolution(
519 int n_batches, int n_input_rows, int n_input_cols, int n_channels,
520 int n_output_rows, int n_output_cols,
521 nck::ActivationFunction activation,
522 unsigned int padding_top,
523 unsigned int padding_left,
524 unsigned int padding_bottom,
525 unsigned int padding_right
526 );
527
Georgios Pinitas47d39dc2019-03-11 14:03:23 +0000528 protected:
529 template <nck::ActivationFunction Activation>
530 void execute_tile(
531 int n_channels,
532 const void* packed_params,
533 const float16_t* inptr,
534 unsigned int in_row_stride,
535 unsigned int in_col_stride,
536 float16_t* outptr,
537 unsigned int out_row_stride,
538 unsigned int out_col_stride
539 );
Georgios Pinitasa4bba9c2019-04-02 15:27:52 +0100540
541 template <nck::ActivationFunction Activation>
542 void execute_tile(
543 int n_channels,
544 const void* packed_params,
545 const float16_t* inptrs[Base::inner_tile_rows][Base::inner_tile_cols],
546 float16_t* outptrs[Base::output_tile_rows][Base::output_tile_cols]
547 );
Georgios Pinitas47d39dc2019-03-11 14:03:23 +0000548};
549#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
Georgios Pinitasa4bba9c2019-04-02 15:27:52 +0100550
Georgios Pinitas4074c992018-01-30 18:13:46 +0000551} // namespace depthwise