blob: 9c0908405a1a71de46df5f1c7ded571b1b383cdd [file] [log] [blame]
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001/*
2 * Copyright (c) 2016, 2017 ARM Limited.
3 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24#ifndef __ARM_COMPUTE_CLCONVOLUTIONKERNEL_H__
25#define __ARM_COMPUTE_CLCONVOLUTIONKERNEL_H__
26
27#include "arm_compute/core/CL/ICLSimple2DKernel.h"
28
29#include <cstdint>
30
31namespace arm_compute
32{
33class ICLTensor;
34
35/****************************************************************************************\
36 * Square Convolution *
37\****************************************************************************************/
38
39/** Interface for the kernel to run an arbitrary size convolution on a tensor. (Currently supports 3x3, 5x5, 7x7 and 9x9).
40 * The client can supply a convolution matrix \f$ C_{m,n} \f$.
41 * @f{eqnarray}{
42 * k_0 &=& \frac{m}{2} \\
43 * l_0 &=& \frac{n}{2} \\
44 * sum &=& \sum_{k=0,l=0}^{k=m-1,l=n-1} input(x+k-k_0, y+l-l_0) C_{k,l}
45 * @f}
46 *
47 * @note The above equation for this function is similar to the default OpenCV Filter2D function,
48 * which actually computes a correlation and not a convolution.
49 * In case of a real convolution the convolution matrix should be flipped both horizontally and vertically.
50 */
51template <unsigned int matrix_size>
52class CLConvolutionKernel : public ICLSimple2DKernel
53{
54public:
55 /** Initialise the kernel's input, output and border mode.
56 *
57 * @param[in] input Source tensor. Data types supported: U8.
58 * @param[out] output Destination tensor, Data types supported: U8, S16.
59 * @param[in] conv Convolution matrix to apply to the input tensor.
60 * @param[in] scale Scale of the convolution matrix. If 0 is passed, it will be set to the sum of the coefficients of the convolution or 1 if they add up to 0.
61 * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
62 */
63 void configure(const ICLTensor *input, ICLTensor *output, const int16_t *conv, uint32_t scale, bool border_undefined);
64
65 // Inherited methods overridden:
66 BorderSize border_size() const override;
67};
68
69/** Interface for the kernel which applies a 3x3 convolution to a tensor. */
70using CLConvolution3x3Kernel = CLConvolutionKernel<3>;
71/** Interface for the kernel which applies a 5x5 convolution to a tensor. */
72using CLConvolution5x5Kernel = CLConvolutionKernel<5>;
73/** Interface for the kernel which applies a 7x7 convolution to a tensor. */
74using CLConvolution7x7Kernel = CLConvolutionKernel<7>;
75/** Interface for the kernel which applies a 9x9 convolution to a tensor. */
76using CLConvolution9x9Kernel = CLConvolutionKernel<9>;
77
78/****************************************************************************************\
79 * Separable Square Convolution *
80\****************************************************************************************/
81
82/** Kernel for the Horizontal pass of a Separable Convolution. Currently support 5x5, 7x7, 9x9 */
83template <unsigned int matrix_size>
84class CLSeparableConvolutionHorKernel : public ICLSimple2DKernel
85{
86public:
87 /** Default Constructor */
88 CLSeparableConvolutionHorKernel();
89 /** Initialise the kernel's input, output and border mode.
90 *
91 * @param[in] input Source tensor. Data types supported: U8.
92 * @param[out] output Destination tensor, Data types supported: S16.
93 * @param[in] conv Convolution matrix to apply to the input tensor.
94 * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
95 */
96 void configure(const ICLTensor *input, ICLTensor *output, const int16_t *conv, bool border_undefined);
97
98 // Inherited methods overridden:
99 BorderSize border_size() const override;
100
101private:
102 BorderSize _border_size; /**< Border size */
103};
104
105/** Interface for the kernel which applies a horizontal pass of 5x5 convolution to a tensor. */
106using CLSeparableConvolution5x5HorKernel = CLSeparableConvolutionHorKernel<5>;
107/** Interface for the kernel which applies a horizontal pass of 7x7 convolution to a tensor. */
108using CLSeparableConvolution7x7HorKernel = CLSeparableConvolutionHorKernel<7>;
109/** Interface for the kernel which applies a horizontal pass of 9x9 convolution to a tensor. */
110using CLSeparableConvolution9x9HorKernel = CLSeparableConvolutionHorKernel<9>;
111
112/** Kernel for the Vertical pass of a Separable Convolution. Currently supports 5x5, 7x7, 9x9 */
113template <unsigned int matrix_size>
114class CLSeparableConvolutionVertKernel : public ICLSimple2DKernel
115{
116public:
117 /** Initialise the kernel's input, output and border mode.
118 *
119 * @param[in] input Source tensor. Data types supported: S16.
120 * @param[out] output Destination tensor, Data types supported: U8, S16.
121 * @param[in] conv Convolution matrix to apply to the input tensor.
122 * @param[in] scale Scale of the convolution matrix.
123 * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
124 * @param[in] data_type Data type to use for intermeidate result. @sa data_type_for_convolution
125 */
126 void configure(const ICLTensor *input, ICLTensor *output, const int16_t *conv, uint32_t scale, bool border_undefined, DataType data_type = DataType::S32);
127
128 // Inherited methods overridden:
129 BorderSize border_size() const override;
130};
131
132/** Interface for the kernel which applies a vertical pass of 5x5 convolution to a tensor. */
133using CLSeparableConvolution5x5VertKernel = CLSeparableConvolutionVertKernel<5>;
134/** Interface for the kernel which applies a vertical pass of 7x7 convolution to a tensor. */
135using CLSeparableConvolution7x7VertKernel = CLSeparableConvolutionVertKernel<7>;
136/** Interface for the kernel which applies a vertical pass of 9x9 convolution to a tensor. */
137using CLSeparableConvolution9x9VertKernel = CLSeparableConvolutionVertKernel<9>;
138
139/****************************************************************************************\
140 * Rectangle Convolution *
141\****************************************************************************************/
142
143/** Kernel for the running convolution on a rectangle matrix.
144 *
145 * @note Supports combinations of 3,5,7 and 9.
146 */
147class CLConvolutionRectangleKernel : public ICLKernel
148{
149public:
150 /** Default constructor */
151 CLConvolutionRectangleKernel();
152 /** Prevent instances of this class from being copied (As this class contains pointers) */
153 CLConvolutionRectangleKernel(const CLConvolutionRectangleKernel &) = delete;
154 /** Prevent instances of this class from being copied (As this class contains pointers) */
155 CLConvolutionRectangleKernel &operator=(const CLConvolutionRectangleKernel &) = delete;
156 /** Allow instances of this class to be moved */
157 CLConvolutionRectangleKernel(CLConvolutionRectangleKernel &&) = default;
158 /** Allow instances of this class to be moved */
159 CLConvolutionRectangleKernel &operator=(CLConvolutionRectangleKernel &&) = default;
160 /** Initialise the kernel's input, output and border mode.
161 *
162 * @param[in] input Source tensor. Data types supported: U8.
163 * @param[out] output Destination tensor, Data types supported: U8, S16.
164 * @param[in] conv Convolution matrix to apply to the input tensor.
165 * @param[in] width Width of convolution matrix (Number of columns)
166 * @param[in] height Height of convolution matrix (Number of rows)
167 * @param[in] scale Scale of the convolution matrix. If 0 is passed, it will be set to the sum of the coefficients of the convolution or 1 if they add up to 0.
168 * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
169 */
170 void configure(const ICLTensor *input, ICLTensor *output, const int16_t *conv, uint32_t width, uint32_t height, uint32_t scale, bool border_undefined);
171
172 // Inherited methods overridden:
173 void run(const Window &window, cl::CommandQueue &queue) override;
174 BorderSize border_size() const override;
175
176private:
177 BorderSize _border_size;
178 const ICLTensor *_input;
179 ICLTensor *_output;
180};
181}
182#endif /*__ARM_COMPUTE_CLCONVOLUTIONKERNEL_H__ */