blob: 266d13d6fc199e23fb21a8d128f263bcfef4e5a3 [file] [log] [blame]
Georgios Pinitas4074c992018-01-30 18:13:46 +00001/*
Michele Di Giorgiod9eaf612020-07-08 11:12:57 +01002 * Copyright (c) 2018-2019 Arm Limited.
Georgios Pinitas4074c992018-01-30 18:13:46 +00003 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24
25/*
26 * !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
27 *
28 * NOTE: Header to be included by implementation files only.
29 *
30 * !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
31 */
32
33#include <algorithm>
Georgios Pinitas47d39dc2019-03-11 14:03:23 +000034#include <cstdint>
Georgios Pinitas30271c72019-06-24 14:56:34 +010035#include "depthwise.hpp"
36#include "padding.hpp"
37#include "utils.hpp"
Georgios Pinitas4074c992018-01-30 18:13:46 +000038
39#pragma once
40
Georgios Pinitas47d39dc2019-03-11 14:03:23 +000041#define MEMBERFN(TOUT) template <\
42 unsigned int OutputTileRows, unsigned int OutputTileColumns,\
43 unsigned int KernelRows, unsigned int KernelColumns,\
44 unsigned int StrideRows, unsigned int StrideColumns,\
45 typename TIn, typename TBias, typename TOut,\
46 typename Derived\
47> TOUT DepthwiseConvolutionBase<\
48 OutputTileRows, OutputTileColumns,\
49 KernelRows, KernelColumns,\
50 StrideRows, StrideColumns,\
51 TIn, TBias, TOut, Derived\
52>
53
54using namespace neon_convolution_kernels;
55
Georgios Pinitas4074c992018-01-30 18:13:46 +000056namespace depthwise
57{
58
Georgios Pinitas47d39dc2019-03-11 14:03:23 +000059template <unsigned int KernelRows, unsigned int KernelColumns, size_t WeightSize, size_t BiasSize>
60struct PackParameters
61{
62 static void execute(
63 unsigned int n_channels,
64 void *buffer,
65 const void *weights,
66 unsigned int weight_row_stride,
67 unsigned int weight_col_stride,
68 const void *biases
69 );
70};
71
Georgios Pinitasbe0ae932018-03-13 13:08:12 +000072const unsigned int CHANNEL_BLOCK = 16;
73
Georgios Pinitas47d39dc2019-03-11 14:03:23 +000074MEMBERFN(int)::get_output_size(
Georgios Pinitas1a57ad12019-01-09 16:11:51 +000075 const int dim_size, const unsigned int padding_before, const unsigned int padding_after
76)
77{
Georgios Pinitas47d39dc2019-03-11 14:03:23 +000078 return iceildiv(dim_size + padding_before + padding_after - KernelRows + 1, StrideRows);
Georgios Pinitas1a57ad12019-01-09 16:11:51 +000079}
Georgios Pinitas4074c992018-01-30 18:13:46 +000080
Georgios Pinitas47d39dc2019-03-11 14:03:23 +000081MEMBERFN(int)::output_size(
82 const int dim_size, const unsigned int padding_before, const unsigned int padding_after
83) const
Georgios Pinitas1a57ad12019-01-09 16:11:51 +000084{
Georgios Pinitas47d39dc2019-03-11 14:03:23 +000085 return get_output_size(dim_size, padding_before, padding_after);
Georgios Pinitas1a57ad12019-01-09 16:11:51 +000086}
87
Georgios Pinitas47d39dc2019-03-11 14:03:23 +000088MEMBERFN()::DepthwiseConvolutionBase(
89 const int n_batches,
90 const int n_input_rows,
91 const int n_input_cols,
Georgios Pinitas1a57ad12019-01-09 16:11:51 +000092 const int n_channels,
Georgios Pinitas47d39dc2019-03-11 14:03:23 +000093 ActivationFunction activation,
Georgios Pinitas1a57ad12019-01-09 16:11:51 +000094 const unsigned int padding_top,
95 const unsigned int padding_left,
96 const unsigned int padding_bottom,
Georgios Pinitas47d39dc2019-03-11 14:03:23 +000097 const unsigned int padding_right
Georgios Pinitas30271c72019-06-24 14:56:34 +010098) : DepthwiseConvolutionBase(
99 n_batches, n_input_rows, n_input_cols, n_channels,
100 get_output_size(n_input_rows, padding_top, padding_bottom),
101 get_output_size(n_input_cols, padding_left, padding_right),
102 activation,
103 padding_top, padding_left, padding_bottom, padding_right
104 )
105{
106}
107
108MEMBERFN()::DepthwiseConvolutionBase(
109 const int n_batches,
110 const int n_input_rows,
111 const int n_input_cols,
112 const int n_channels,
113 const int n_output_rows,
114 const int n_output_cols,
115 ActivationFunction activation,
116 const unsigned int padding_top,
117 const unsigned int padding_left,
118 const unsigned int padding_bottom,
119 const unsigned int padding_right
Georgios Pinitas47d39dc2019-03-11 14:03:23 +0000120) : _input(nullptr), _output(nullptr),
121 _packed_parameters(nullptr),
122 _working_space(nullptr),
Georgios Pinitas4074c992018-01-30 18:13:46 +0000123 _n_batches(n_batches),
124 _n_input_rows(n_input_rows),
125 _n_input_cols(n_input_cols),
126 _n_channels(n_channels),
Georgios Pinitas30271c72019-06-24 14:56:34 +0100127 _n_output_rows(n_output_rows),
128 _n_output_cols(n_output_cols),
Georgios Pinitas4074c992018-01-30 18:13:46 +0000129 _n_tile_rows(iceildiv(_n_output_rows, output_tile_rows)),
130 _n_tile_cols(iceildiv(_n_output_cols, output_tile_cols)),
Georgios Pinitas1a57ad12019-01-09 16:11:51 +0000131 _padding_top(padding_top),
132 _padding_left(padding_left),
133 _padding_bottom(padding_bottom),
134 _padding_right(padding_right),
Georgios Pinitas47d39dc2019-03-11 14:03:23 +0000135 _activation(activation),
136 _input_col_stride(0), _input_row_stride(0), _input_batch_stride(0),
Georgios Pinitasa4bba9c2019-04-02 15:27:52 +0100137 _output_col_stride(0), _output_row_stride(0), _output_batch_stride(0)
Georgios Pinitas4074c992018-01-30 18:13:46 +0000138{
139}
140
Georgios Pinitas47d39dc2019-03-11 14:03:23 +0000141MEMBERFN(void)::set_input(const void* const inptr)
142{
143 set_input(inptr, _n_channels);
144}
Georgios Pinitas4074c992018-01-30 18:13:46 +0000145
Georgios Pinitas47d39dc2019-03-11 14:03:23 +0000146MEMBERFN(void)::set_input(const void* const inptr, const int ld_col)
147{
148 set_input(inptr, _n_input_cols * ld_col, ld_col);
149}
150
151MEMBERFN(void)::set_input(const void* const inptr, const int ld_row, const int ld_col)
152{
153 set_input(inptr, _n_input_rows * ld_row, ld_row, ld_col);
154}
155
156MEMBERFN(void)::set_input(const void* const inptr, const int ld_batch, const int ld_row, const int ld_col)
157{
158 _input = static_cast<const TIn *>(inptr);
159 _input_batch_stride = ld_batch;
160 _input_row_stride = ld_row;
161 _input_col_stride = ld_col;
162}
163
164MEMBERFN(void)::set_output(void* const outptr)
165{
166 set_output(outptr, _n_channels);
167}
168
169MEMBERFN(void)::set_output(void* const outptr, const int ld_col)
170{
171 set_output(outptr, _n_output_cols * ld_col, ld_col);
172}
173
174MEMBERFN(void)::set_output(void* const outptr, const int ld_row, const int ld_col)
175{
176 set_output(outptr, _n_output_rows * ld_row, ld_row, ld_col);
177}
178
179MEMBERFN(void)::set_output(void* const outptr, const int ld_batch, const int ld_row, const int ld_col)
180{
181 _output = static_cast<TOut *>(outptr);
182 _output_batch_stride = ld_batch;
183 _output_row_stride = ld_row;
184 _output_col_stride = ld_col;
185}
186
187MEMBERFN(size_t)::get_packed_params_size(void) const
188{
189 return _n_channels * (sizeof(TIn)*KernelRows*KernelColumns + sizeof(TBias));
190}
191
192MEMBERFN(void)::set_packed_params_buffer(void *buffer)
193{
194 _packed_parameters = buffer;
195}
196
197MEMBERFN(void)::pack_params(const void *weights, const void *biases) const
198{
199 static_cast<const Derived *>(this)->pack_params(_packed_parameters, weights, biases);
200}
201
202MEMBERFN(void)::pack_params(void *buffer, const void *weights, const void *biases) const
203{
204 const unsigned int weight_col_stride = _n_channels;
205 const unsigned int weight_row_stride = KernelColumns * weight_col_stride;
206 static_cast<const Derived *>(this)->pack_params(
207 buffer, weights, weight_row_stride, weight_col_stride, biases
208 );
209}
210
211MEMBERFN(void)::pack_params(
212 void * const buffer,
213 const void * const weights,
214 const unsigned int weight_row_stride,
215 const unsigned int weight_col_stride,
216 const void * const biases
217) const
218{
219 static_cast<const Derived *>(this)->_pack_params(
220 buffer, weights, weight_row_stride, weight_col_stride, biases
221 );
222}
223
224MEMBERFN(void)::_pack_params(
225 void * const buffer,
226 const void * const weights,
227 const unsigned int weight_row_stride,
228 const unsigned int weight_col_stride,
229 const void * const biases
230) const
231{
232 // Default implementation
233 PackParameters<KernelRows, KernelColumns, sizeof(TIn), sizeof(TOut)>::execute(
234 _n_channels, buffer, weights, weight_row_stride, weight_col_stride, biases
235 );
236}
237
238MEMBERFN(size_t)::get_working_space_size(const unsigned int nthreads) const
239{
240 return nthreads * (
241 _get_input_working_space_size() + _get_output_working_space_size()
242 );
243}
244
245MEMBERFN(void)::set_working_space(void *buffer)
246{
247 _working_space = buffer;
248}
249
250MEMBERFN(size_t)::_get_input_working_space_size(void) const
251{
Georgios Pinitasa4bba9c2019-04-02 15:27:52 +0100252 return sizeof(TIn) * _n_channels;
Georgios Pinitas47d39dc2019-03-11 14:03:23 +0000253}
254
255MEMBERFN(size_t)::_get_output_working_space_size(void) const
256{
Georgios Pinitasa4bba9c2019-04-02 15:27:52 +0100257 return sizeof(TOut) * _n_channels;
Georgios Pinitas47d39dc2019-03-11 14:03:23 +0000258}
259
260MEMBERFN(void *)::_get_input_working_space(const unsigned int threadid) const
261{
262 return static_cast<uint8_t*>(_working_space) + threadid * (
263 _get_input_working_space_size() + _get_output_working_space_size()
264 );
265}
266
267MEMBERFN(void *)::_get_output_working_space(const unsigned int threadid) const
268{
269 return static_cast<uint8_t*>(_get_input_working_space(threadid)) + _get_input_working_space_size();
270}
271
272MEMBERFN(unsigned int)::get_window() const
Georgios Pinitas4074c992018-01-30 18:13:46 +0000273{
Georgios Pinitasbe0ae932018-03-13 13:08:12 +0000274 // Parallelise over blocks of channels.
275 return iceildiv(_n_channels, CHANNEL_BLOCK);
Georgios Pinitas4074c992018-01-30 18:13:46 +0000276}
277
Georgios Pinitas47d39dc2019-03-11 14:03:23 +0000278MEMBERFN(void)::run(
Georgios Pinitas4074c992018-01-30 18:13:46 +0000279 const unsigned int start,
Georgios Pinitas47d39dc2019-03-11 14:03:23 +0000280 const unsigned int stop,
281 const unsigned int threadid
Georgios Pinitas4074c992018-01-30 18:13:46 +0000282)
283{
Georgios Pinitasa4bba9c2019-04-02 15:27:52 +0100284 // Clear the input padding buffer
285 TIn *buf = static_cast<TIn *>(_get_input_working_space(threadid));
286 const TIn pad_value = static_cast<Derived *>(this)->_input_padding_value();
287 for (int n = 0; n < _n_channels; n++)
288 {
289 buf[n] = pad_value;
290 }
291
Georgios Pinitasbe0ae932018-03-13 13:08:12 +0000292 // Parallelise over blocks of channels
293 const auto start_channel = CHANNEL_BLOCK * start;
294 const auto stop_channel = std::min<unsigned int>(_n_channels, CHANNEL_BLOCK * stop);
Giuseppe Rossinif01201a2019-11-06 14:57:49 +0000295 const auto params_size_per_channel = this->get_packed_params_size()/_n_channels;
Georgios Pinitas4074c992018-01-30 18:13:46 +0000296
297 // Compute top and bottom padding for input and output
Georgios Pinitas1a57ad12019-01-09 16:11:51 +0000298 const int input_pad_top = _padding_top;
299 const int input_pad_left = _padding_left;
Georgios Pinitasbe0ae932018-03-13 13:08:12 +0000300 constexpr int tile_overlap = kernel_rows - stride_rows;
Georgios Pinitas4074c992018-01-30 18:13:46 +0000301
302 // Perform the convolution by calling `process_tile_row` for each tile row in
303 // each batch.
304 for (int batch = 0; batch < _n_batches; batch++)
305 {
Georgios Pinitasbe0ae932018-03-13 13:08:12 +0000306 const TIn* const inptr_batch = _input + batch*_input_batch_stride;
307 TOut* const outptr_batch = _output + batch*_output_batch_stride;
Georgios Pinitas4074c992018-01-30 18:13:46 +0000308
309 // Loop over rows of tiles
310 for (int tile_i = 0; tile_i < _n_tile_rows; tile_i++)
311 {
312 // Pointer to the row
313 const int input_row_offset = (tile_i == 0) ? 0 : input_pad_top;
Georgios Pinitasbe0ae932018-03-13 13:08:12 +0000314 const TIn* const inptr_row = (inptr_batch + ((inner_tile_rows - tile_overlap)*tile_i - input_row_offset)*_input_row_stride);
315 TOut* const outptr_row = outptr_batch + output_tile_rows * tile_i * _output_row_stride;
Georgios Pinitas4074c992018-01-30 18:13:46 +0000316
317 // Input padding (top + bottom) for the row
318 const int input_row_top = tile_i*(inner_tile_rows - tile_overlap) - input_pad_top;
319 const int input_row_bottom = input_row_top + inner_tile_rows;
320 const int input_row_pad_top = (tile_i == 0) ? input_pad_top : 0;
321 const int input_row_pad_bottom = std::max(0, input_row_bottom - _n_input_rows);
322
323 // Output padding (bottom) for the row
324 const int output_row_bottom = (tile_i + 1)*output_tile_rows;
325 const int output_row_pad_bottom = std::max(0, output_row_bottom - _n_output_rows);
326
Georgios Pinitas47d39dc2019-03-11 14:03:23 +0000327 // Get the offset into the packed parameters
328 const auto params_ptr = static_cast<const uint8_t*>(_packed_parameters) +
Giuseppe Rossinif01201a2019-11-06 14:57:49 +0000329 start_channel*params_size_per_channel;
Georgios Pinitas47d39dc2019-03-11 14:03:23 +0000330
Georgios Pinitas4074c992018-01-30 18:13:46 +0000331 // Process the row
332 process_tile_row(
Georgios Pinitas47d39dc2019-03-11 14:03:23 +0000333 threadid,
Georgios Pinitasbe0ae932018-03-13 13:08:12 +0000334 stop_channel - start_channel,
Georgios Pinitas47d39dc2019-03-11 14:03:23 +0000335 params_ptr,
336 inptr_row + start_channel,
337 outptr_row + start_channel,
Georgios Pinitas4074c992018-01-30 18:13:46 +0000338 input_row_pad_top, input_pad_left, input_row_pad_bottom,
339 output_row_pad_bottom,
Georgios Pinitas47d39dc2019-03-11 14:03:23 +0000340 _n_tile_cols, _n_input_cols, _n_output_cols
Georgios Pinitas4074c992018-01-30 18:13:46 +0000341 );
342 }
343 }
344}
345
Georgios Pinitas47d39dc2019-03-11 14:03:23 +0000346MEMBERFN(void)::process_tile_row(
347 const unsigned int threadid,
Georgios Pinitas4074c992018-01-30 18:13:46 +0000348 const int n_channels,
Georgios Pinitas47d39dc2019-03-11 14:03:23 +0000349 const void* const packed_params,
Georgios Pinitas4074c992018-01-30 18:13:46 +0000350 const TIn* const inptr,
Georgios Pinitas4074c992018-01-30 18:13:46 +0000351 TOut* const outptr,
Georgios Pinitas4074c992018-01-30 18:13:46 +0000352 const int row_pad_in_top,
353 const int row_pad_in_left,
354 const int row_pad_in_bottom,
355 const int row_pad_out_bottom,
356 const int n_tiles,
357 const int n_input_cols,
Georgios Pinitas47d39dc2019-03-11 14:03:23 +0000358 const int n_output_cols
Georgios Pinitas4074c992018-01-30 18:13:46 +0000359)
360{
Georgios Pinitasbe0ae932018-03-13 13:08:12 +0000361 constexpr int tile_overlap = kernel_cols - stride_cols;
Georgios Pinitas4074c992018-01-30 18:13:46 +0000362
363 // Loop over columns of tiles
364 for (int tile_j = 0; tile_j < n_tiles; tile_j++)
365 {
366 // Input padding (left + right) for the tile
367 const int t_pad_in_left = (tile_j == 0) ? row_pad_in_left : 0;
368 const int t_in_start = tile_j*(inner_tile_cols - tile_overlap) - row_pad_in_left;
369 const int t_in_end = t_in_start + inner_tile_cols;
370 const int t_pad_in_right = std::max(0, t_in_end - n_input_cols);
371
372 // Output padding (right) for the tile
373 const int t_out_end = (tile_j + 1) * output_tile_cols;
374 const int t_pad_out_right = std::max(0, t_out_end - n_output_cols);
375
376 // Get pointers into the inputs and outputs
377 const int col_offset = (tile_j == 0) ? 0 : row_pad_in_left;
Georgios Pinitas47d39dc2019-03-11 14:03:23 +0000378 const TIn* const inptr_col = (inptr + ((inner_tile_cols - tile_overlap)*tile_j - col_offset)*_input_col_stride);
379 TOut* const outptr_col = outptr + tile_j * output_tile_cols * _output_col_stride;
Georgios Pinitas4074c992018-01-30 18:13:46 +0000380
Georgios Pinitas47d39dc2019-03-11 14:03:23 +0000381 // Process just this tile
382 process_tile(
383 threadid, n_channels, packed_params, inptr_col, outptr_col,
384 row_pad_in_top, t_pad_in_left, row_pad_in_bottom, t_pad_in_right, // Input paddings
385 row_pad_out_bottom, t_pad_out_right // Output paddings
Georgios Pinitas4074c992018-01-30 18:13:46 +0000386 );
387 }
388}
389
Georgios Pinitas47d39dc2019-03-11 14:03:23 +0000390MEMBERFN(TIn)::_input_padding_value(void) const
Georgios Pinitasbe0ae932018-03-13 13:08:12 +0000391{
Georgios Pinitas47d39dc2019-03-11 14:03:23 +0000392 return static_cast<TIn>(0);
393}
Georgios Pinitasbe0ae932018-03-13 13:08:12 +0000394
Georgios Pinitas47d39dc2019-03-11 14:03:23 +0000395MEMBERFN(void)::process_tile(
396 const unsigned int threadid,
Georgios Pinitas4074c992018-01-30 18:13:46 +0000397 const int n_channels,
Georgios Pinitas47d39dc2019-03-11 14:03:23 +0000398 const void* const packed_params,
399 const TIn* const inptr,
400 TOut* const outptr,
401 const int pad_in_top,
402 const int pad_in_left,
403 const int pad_in_bottom,
404 const int pad_in_right,
405 const int pad_out_bottom,
406 const int pad_out_right
Georgios Pinitas4074c992018-01-30 18:13:46 +0000407)
408{
Georgios Pinitasa4bba9c2019-04-02 15:27:52 +0100409 Derived * dthis = static_cast<Derived *>(this);
Georgios Pinitas47d39dc2019-03-11 14:03:23 +0000410 const bool pad_input = pad_in_top || pad_in_left || pad_in_bottom || pad_in_right;
411 const bool pad_output = pad_out_bottom || pad_out_right;
Georgios Pinitasbe0ae932018-03-13 13:08:12 +0000412
Georgios Pinitasa4bba9c2019-04-02 15:27:52 +0100413 if (!pad_input && !pad_output)
Georgios Pinitas4074c992018-01-30 18:13:46 +0000414 {
Georgios Pinitasa4bba9c2019-04-02 15:27:52 +0100415 switch(_activation)
416 {
417 case ActivationFunction::ReLU:
418 dthis->template execute_tile<ActivationFunction::ReLU>(
419 n_channels, packed_params,
420 inptr, _input_row_stride, _input_col_stride,
421 outptr, _output_row_stride, _output_col_stride
422 );
423 break;
424 case ActivationFunction::ReLU6:
425 dthis->template execute_tile<ActivationFunction::ReLU6>(
426 n_channels, packed_params,
427 inptr, _input_row_stride, _input_col_stride,
428 outptr, _output_row_stride, _output_col_stride
429 );
430 break;
431 default:
432 dthis->template execute_tile<ActivationFunction::None>(
433 n_channels, packed_params,
434 inptr, _input_row_stride, _input_col_stride,
435 outptr, _output_row_stride, _output_col_stride
436 );
437 break;
438 }
Georgios Pinitas4074c992018-01-30 18:13:46 +0000439 }
Georgios Pinitasa4bba9c2019-04-02 15:27:52 +0100440 else
Georgios Pinitas47d39dc2019-03-11 14:03:23 +0000441 {
Georgios Pinitasa4bba9c2019-04-02 15:27:52 +0100442 // Create arrays of input and output pointers, pointing padded elements to
443 // the working space padding buffers provided.
444 const TIn *inptrs[inner_tile_rows][inner_tile_cols];
445 for (int i = 0; i < inner_tile_rows; i++)
446 {
447 for (int j = 0; j < inner_tile_cols; j++)
448 {
449 if (i < pad_in_top || (inner_tile_rows - pad_in_bottom) <= i ||
450 j < pad_in_left || (inner_tile_cols - pad_in_right) <= j)
451 {
452 // Padded input
453 inptrs[i][j] = static_cast<const TIn *>(_get_input_working_space(threadid));
454 }
455 else
456 {
457 inptrs[i][j] = inptr + (i - pad_in_top)*_input_row_stride + (j - pad_in_left)*_input_col_stride;
458 }
459 }
460 }
Georgios Pinitas47d39dc2019-03-11 14:03:23 +0000461
Georgios Pinitasa4bba9c2019-04-02 15:27:52 +0100462 TOut *outptrs[output_tile_rows][output_tile_cols];
463 for (int i = 0; i < output_tile_rows; i++)
464 {
465 for (int j = 0; j < output_tile_cols; j++)
466 {
467 if (i < (output_tile_rows - pad_out_bottom) &&
468 j < (output_tile_cols - pad_out_right))
469 {
470 outptrs[i][j] = outptr + i*_output_row_stride + j*_output_col_stride;
471 }
472 else
473 {
474 outptrs[i][j] = static_cast<TOut *>(_get_output_working_space(threadid));
475 }
476 }
477 }
478
479 switch(_activation)
480 {
481 case ActivationFunction::ReLU:
482 dthis->template execute_tile<ActivationFunction::ReLU>(
483 n_channels, packed_params, inptrs, outptrs
484 );
485 break;
486 case ActivationFunction::ReLU6:
487 dthis->template execute_tile<ActivationFunction::ReLU6>(
488 n_channels, packed_params, inptrs, outptrs
489 );
490 break;
491 default:
492 dthis->template execute_tile<ActivationFunction::None>(
493 n_channels, packed_params, inptrs, outptrs
494 );
495 break;
496 }
Georgios Pinitas47d39dc2019-03-11 14:03:23 +0000497 }
498}
499
500MEMBERFN(int)::n_channels(void) const
501{
502 return _n_channels;
Georgios Pinitas4074c992018-01-30 18:13:46 +0000503}
504
Georgios Pinitas4074c992018-01-30 18:13:46 +0000505} // namespace depthwise