blob: 01ddae12bbff5d9229b83ec681a63647ec8cf78a [file] [log] [blame]
Giorgio Arena93a690e2017-08-01 16:09:33 +01001/*
Giorgio Arena0a66abe2021-04-12 16:44:24 +01002 * Copyright (c) 2017-2021 Arm Limited.
Giorgio Arena93a690e2017-08-01 16:09:33 +01003 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
Michalis Spyrouf4643372019-11-29 16:17:13 +000024#ifndef ARM_COMPUTE_CLDEPTHWISECONVOLUTION_H
25#define ARM_COMPUTE_CLDEPTHWISECONVOLUTION_H
Giorgio Arena93a690e2017-08-01 16:09:33 +010026
Giorgio Arena93a690e2017-08-01 16:09:33 +010027#include "arm_compute/core/Types.h"
28#include "arm_compute/runtime/CL/CLTensor.h"
Georgios Pinitas05045c12018-12-07 18:31:47 +000029#include "arm_compute/runtime/CL/functions/CLPermute.h"
Giorgio Arena93a690e2017-08-01 16:09:33 +010030#include "arm_compute/runtime/IFunction.h"
Georgios Pinitas26014cf2019-09-09 19:00:57 +010031#include "arm_compute/runtime/MemoryGroup.h"
Giorgio Arena93a690e2017-08-01 16:09:33 +010032
Giorgio Arena93a690e2017-08-01 16:09:33 +010033namespace arm_compute
34{
Sang-Hoon Parkbef7fa22020-10-21 15:58:54 +010035class CLCompileContext;
Sang-Hoon Parkbef7fa22020-10-21 15:58:54 +010036class CLDepthwiseConvolutionLayerNativeKernel;
Giorgio Arena93a690e2017-08-01 16:09:33 +010037class ICLTensor;
38
Manuel Bottini05069f02019-09-26 17:18:26 +010039/** Function to execute a depthwise convolution
Gian Marco Iodice8155c022021-04-16 15:08:59 +010040 *
41 * -# @ref CLDepthwiseConvolutionLayerNativeKernel
42 * -# @ref CLPermute (if the data layout is NCHW)
43 *
Manuel Bottini05069f02019-09-26 17:18:26 +010044 */
45class CLDepthwiseConvolutionLayer : public IFunction
46{
47public:
48 /** Default constructor */
49 CLDepthwiseConvolutionLayer(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
50 /** Prevent instances of this class from being copied (As this class contains pointers) */
51 CLDepthwiseConvolutionLayer(const CLDepthwiseConvolutionLayer &) = delete;
52 /** Default move constructor */
53 CLDepthwiseConvolutionLayer(CLDepthwiseConvolutionLayer &&) = default;
54 /** Prevent instances of this class from being copied (As this class contains pointers) */
55 CLDepthwiseConvolutionLayer &operator=(const CLDepthwiseConvolutionLayer &) = delete;
56 /** Default move assignment operator */
57 CLDepthwiseConvolutionLayer &operator=(CLDepthwiseConvolutionLayer &&) = default;
Sang-Hoon Parkbef7fa22020-10-21 15:58:54 +010058 /** Default destructor */
59 ~CLDepthwiseConvolutionLayer();
Manuel Bottini05069f02019-09-26 17:18:26 +010060 /** Initialize the function's source, destination, weights and convolution information.
61 *
Teresa Charlin62687422021-04-28 10:58:49 +010062 * Valid data layouts:
63 * - NHWC
64 * - NCHW
65 *
66 * Valid data type configurations:
67 * |src0 |src1 |src2 |dst |
68 * |:--------------|:------------------|:------|:--------------|
69 * |F16 |F16 |F16 |F16 |
70 * |F32 |F32 |F32 |F32 |
71 * |QASYMM8 |QASYMM8 |S32 |QASYMM8 |
72 * |QASYMM8 |QSYMM8_PER_CHANNEL |S32 |QASYMM8 |
73 * |QASYMM8_SIGNED |QASYMM8_SIGNED |S32 |QASYMM8_SIGNED |
74 * |QASYMM8_SIGNED |QSYMM8_PER_CHANNEL |S32 |QASYMM8_SIGNED |
75 *
Manuel Bottini2b84be52020-04-08 10:15:51 +010076 * @param[in] compile_context The compile context to be used.
morgolock3e77c272020-10-14 12:12:55 +010077 * @param[in, out] input Source tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/FP16/FP32. Data layout supported: NHWC, NCHW
Manuel Bottini2b84be52020-04-08 10:15:51 +010078 * @param[in] weights Weights tensor. These are 3D tensors with shape [kernel_x, kernel_y, IFM].
morgolock3e77c272020-10-14 12:12:55 +010079 * Data type supported: Same as @p input or QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL when @p input is QASYMM8.
Manuel Bottini2b84be52020-04-08 10:15:51 +010080 * @param[in] biases Biases tensor. A 1D tensor with shape [IFM]. Must be nullptr if not needed.
morgolock3e77c272020-10-14 12:12:55 +010081 * Data type supported: Same as @p input, S32 when input is QASYMM8/QASYMM8_SIGNED.
SiCongLibc4e3112021-06-29 13:18:30 +010082 * @param[out] output Destination tensor. Pass in nullptr or @p input for in-place operation. Data type supported: same as @p input.
Manuel Bottini2b84be52020-04-08 10:15:51 +010083 * @param[in] conv_info Padding and stride information to use for the convolution.
84 * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
85 * @param[in] act_info (Optional) Activation layer information in case of a fused activation.
86 * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
SiCongLibc4e3112021-06-29 13:18:30 +010087 *
88 * @note: For in-place support, please check @ref CLDepthwiseConvolutionLayerNativeKernel
Manuel Bottini2b84be52020-04-08 10:15:51 +010089 */
90 void configure(const CLCompileContext &compile_context, ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info,
91 unsigned int depth_multiplier = 1, ActivationLayerInfo act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U));
Manuel Bottini05069f02019-09-26 17:18:26 +010092
SiCongLibc4e3112021-06-29 13:18:30 +010093 /** Initialize the function's source, destination, weights and convolution information.
94 *
95 * Similar to @ref CLDepthwiseConvolutionLayer::configure()
96 */
97 void configure(ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info,
98 unsigned int depth_multiplier = 1, ActivationLayerInfo act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U));
99
Manuel Bottini05069f02019-09-26 17:18:26 +0100100 /** Static function to check if given info will lead to a valid configuration of @ref CLDepthwiseConvolutionLayer
101 *
SiCongLibc4e3112021-06-29 13:18:30 +0100102 * Similar to @ref CLDepthwiseConvolutionLayer::configure()
Manuel Bottini05069f02019-09-26 17:18:26 +0100103 *
104 * @return a status
105 */
Gian Marco Iodice8155c022021-04-16 15:08:59 +0100106 static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info,
107 unsigned int depth_multiplier = 1, ActivationLayerInfo act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U));
Manuel Bottini05069f02019-09-26 17:18:26 +0100108
109 // Inherited methods overriden:
110 void run() override;
111 void prepare() override;
112
Gian Marco Iodice8155c022021-04-16 15:08:59 +0100113 void set_memory_group(std::shared_ptr<IMemoryManager> memory_manager)
114 {
115 _memory_group = MemoryGroup(std::move(memory_manager));
116 };
117
Gian Marco Iodicec63b7222021-06-30 08:39:44 +0000118private:
Gian Marco Iodice8155c022021-04-16 15:08:59 +0100119 MemoryGroup _memory_group;
Gian Marco Iodicec63b7222021-06-30 08:39:44 +0000120
Gian Marco Iodice8155c022021-04-16 15:08:59 +0100121 std::unique_ptr<CLDepthwiseConvolutionLayerNativeKernel> _dwc_native_kernel;
122 CLPermute _permute_input_to_nhwc;
123 CLPermute _permute_weights_to_nhwc;
124 CLPermute _permute_output_to_nchw;
Gian Marco Iodicec63b7222021-06-30 08:39:44 +0000125
Gian Marco Iodice8155c022021-04-16 15:08:59 +0100126 CLTensor _permuted_input;
127 CLTensor _permuted_weights;
128 CLTensor _permuted_output;
129 CLTensor _output_multipliers;
130 CLTensor _output_shifts;
131 const ITensor *_original_weights;
132 const ITensor *_input;
133 const ITensor *_output;
Gian Marco Iodicec63b7222021-06-30 08:39:44 +0000134
Gian Marco Iodice8155c022021-04-16 15:08:59 +0100135 bool _needs_permute;
136 bool _is_prepared;
137 bool _is_quantized;
Manuel Bottini05069f02019-09-26 17:18:26 +0100138};
Georgios Pinitas05045c12018-12-07 18:31:47 +0000139} // namespace arm_compute
Michalis Spyrouf4643372019-11-29 16:17:13 +0000140#endif /*ARM_COMPUTE_CLDEPTHWISECONVOLUTION_H */