blob: 1303cf9021865c1fb5d9e3fe338fc93352762fe9 [file] [log] [blame]
Giorgio Arena44f55722019-07-12 14:49:49 +01001/*
Michele Di Giorgio8c837ca2020-01-07 15:06:41 +00002 * Copyright (c) 2019-2020 ARM Limited.
Giorgio Arena44f55722019-07-12 14:49:49 +01003 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
Michalis Spyrouf4643372019-11-29 16:17:13 +000024#ifndef ARM_COMPUTE_NEDEPTHWISECONVOLUTIONLAYERNATIVEKERNEL_H
25#define ARM_COMPUTE_NEDEPTHWISECONVOLUTIONLAYERNATIVEKERNEL_H
Giorgio Arena44f55722019-07-12 14:49:49 +010026
27#include "arm_compute/core/NEON/INEKernel.h"
Michele Di Giorgio8c837ca2020-01-07 15:06:41 +000028#include "arm_compute/core/utils/misc/Requires.h"
Giorgio Arena44f55722019-07-12 14:49:49 +010029
Giorgio Arenad93e2632019-10-15 11:09:33 +010030#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
31#include <arm_neon.h>
32#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
33
Giorgio Arena44f55722019-07-12 14:49:49 +010034namespace arm_compute
35{
36// Forward declarations
37class ITensor;
38
Gian Marco Iodicebd9097d2019-07-26 15:31:02 +010039/** Interface for the kernel to run a depthwise convolution native on a tensor. */
40class NEDepthwiseConvolutionLayerNativeKernel : public INEKernel
Giorgio Arena44f55722019-07-12 14:49:49 +010041{
42public:
43 const char *name() const override
44 {
Gian Marco Iodicebd9097d2019-07-26 15:31:02 +010045 return "NEDepthwiseConvolutionLayerNativeKernel";
Giorgio Arena44f55722019-07-12 14:49:49 +010046 }
47 /** Default constructor */
Gian Marco Iodicebd9097d2019-07-26 15:31:02 +010048 NEDepthwiseConvolutionLayerNativeKernel();
Giorgio Arena44f55722019-07-12 14:49:49 +010049 /** Prevent instances of this class from being copied (As this class contains pointers) */
Gian Marco Iodicebd9097d2019-07-26 15:31:02 +010050 NEDepthwiseConvolutionLayerNativeKernel(const NEDepthwiseConvolutionLayerNativeKernel &) = delete;
Giorgio Arena44f55722019-07-12 14:49:49 +010051 /** Prevent instances of this class from being copied (As this class contains pointers) */
Gian Marco Iodicebd9097d2019-07-26 15:31:02 +010052 NEDepthwiseConvolutionLayerNativeKernel &operator=(const NEDepthwiseConvolutionLayerNativeKernel &) = delete;
Giorgio Arena44f55722019-07-12 14:49:49 +010053 /** Default Move Constructor. */
Gian Marco Iodicebd9097d2019-07-26 15:31:02 +010054 NEDepthwiseConvolutionLayerNativeKernel(NEDepthwiseConvolutionLayerNativeKernel &&) = default;
Giorgio Arena44f55722019-07-12 14:49:49 +010055 /** Default move assignment operator */
Gian Marco Iodicebd9097d2019-07-26 15:31:02 +010056 NEDepthwiseConvolutionLayerNativeKernel &operator=(NEDepthwiseConvolutionLayerNativeKernel &&) = default;
Giorgio Arena44f55722019-07-12 14:49:49 +010057 /** Initialize the function's source, destination and parameters.
58 *
59 * @note Supported data layouts: NHWC
60 *
Michele Di Giorgio8c837ca2020-01-07 15:06:41 +000061 * @param[in] input Source tensor. DataType supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
Michele Di Giorgiodf4cf572019-10-09 15:32:39 +010062 * @param[in] weights Weights tensor. This is a 3D tensor with dimensions [IFM, W, H].
Michele Di Giorgio8c837ca2020-01-07 15:06:41 +000063 * Data type supported: Same as @p input or QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL when @p input is QASYMM8/QASYMM8_SIGNED.
Michele Di Giorgiodf4cf572019-10-09 15:32:39 +010064 * @param[in] biases Biases tensor. A 1D tensor with dimensions [IFM]. Must be nullptr if not needed.
Michele Di Giorgio8c837ca2020-01-07 15:06:41 +000065 * Data type supported: Same as @p input, S32 when input is QASYMM8/QASYMM8_SIGNED.
Giorgio Arena44f55722019-07-12 14:49:49 +010066 * @param[out] output Destination tensor. Data type supported: Same as @p input.
67 * @param[in] conv_info Padding and stride information to use for the convolution.
68 * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
69 * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
70 *
71 */
72 void configure(const ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info, unsigned int depth_multiplier = 1,
73 const Size2D &dilation = Size2D(1U, 1U));
Gian Marco Iodicebd9097d2019-07-26 15:31:02 +010074 /** Static function to check if given info will lead to a valid configuration of @ref NEDepthwiseConvolutionLayerNativeKernel
Giorgio Arena44f55722019-07-12 14:49:49 +010075 *
76 * @note Supported data layouts: NHWC
77 *
Michele Di Giorgio8c837ca2020-01-07 15:06:41 +000078 * @param[in] input Source tensor info. DataType supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
Michele Di Giorgiodf4cf572019-10-09 15:32:39 +010079 * @param[in] weights Weights tensor info. This is a 3D tensor with dimensions [IFM, W, H].
Michele Di Giorgio8c837ca2020-01-07 15:06:41 +000080 * Data type supported: Same as @p input or QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL when @p input is QASYMM8/QASYMM8_SIGNED.
Michele Di Giorgiodf4cf572019-10-09 15:32:39 +010081 * @param[in] biases Biases tensor info. A 1D tensor with dimensions [IFM]. Must be nullptr if not needed.
Michele Di Giorgio8c837ca2020-01-07 15:06:41 +000082 * Data type supported: Same as @p input, S32 when input is QASYMM8/QASYMM8_SIGNED.
Giorgio Arena44f55722019-07-12 14:49:49 +010083 * @param[in] output Destination tensor info. Data type supported: Same as @p input.
84 * @param[in] conv_info Padding and stride information to use for the convolution.
85 * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
86 * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
87 *
88 * @return a status
89 */
90 static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info, unsigned int depth_multiplier = 1,
91 const Size2D &dilation = Size2D(1U, 1U));
92
93 // Inherited methods overridden:
94 void run(const Window &window, const ThreadInfo &info) override;
95 BorderSize border_size() const override;
96
97private:
Giorgio Arenad93e2632019-10-15 11:09:33 +010098 template < typename T, typename TW, int S, bool has_biases, bool is_per_channel, typename std::enable_if < std::is_same<T, float>::value
99#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
100 || std::is_same<T, float16_t>::value
101#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
102 ,
103 int >::type = 0 >
104 void run_depthwise(const Window &window);
105
Michele Di Giorgio8c837ca2020-01-07 15:06:41 +0000106 template < typename T, typename TW, int S, bool has_biases, bool is_per_channel, REQUIRES_TA(std::is_same<T, uint8_t>::value || std::is_same<T, int8_t>::value) >
Giorgio Arena44f55722019-07-12 14:49:49 +0100107 void run_depthwise(const Window &window);
108
Gian Marco Iodicebd9097d2019-07-26 15:31:02 +0100109 /** Common signature for all the specialised depthwise convolution native functions
Giorgio Arena44f55722019-07-12 14:49:49 +0100110 *
111 * @param[in] window Region on which to execute the kernel.
112 */
Gian Marco Iodicebd9097d2019-07-26 15:31:02 +0100113 using DepthwiseFunctionPtr = void (NEDepthwiseConvolutionLayerNativeKernel::*)(const Window &window);
Giorgio Arena44f55722019-07-12 14:49:49 +0100114
115 DepthwiseFunctionPtr _func;
116 BorderSize _border_size;
117 const ITensor *_input;
118 const ITensor *_weights;
119 const ITensor *_biases;
120 ITensor *_output;
121 PadStrideInfo _conv_info;
122 unsigned int _depth_multiplier;
123 Size2D _dilation;
Giorgio Arenad93e2632019-10-15 11:09:33 +0100124 std::vector<int> _output_multiplier;
125 std::vector<int> _output_shift;
Giorgio Arena44f55722019-07-12 14:49:49 +0100126};
127} // namespace arm_compute
Michalis Spyrouf4643372019-11-29 16:17:13 +0000128#endif /* ARM_COMPUTE_NEDEPTHWISECONVOLUTIONLAYERNATIVEKERNEL_H */