blob: 19a509bd0ad67133ba2d0cbe96b04405aaab4a8b [file] [log] [blame]
Dmitry Savenkod7295b72017-11-20 22:00:08 +07001/*
2 * Copyright (c) 2017 ARM Limited.
3 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24
25#include "helpers_asymm.h"
26
27#if defined(CONV_STRIDE_X)
28
29#if CONV_STRIDE_X == 1
30#define convolution1x3 convolution1x3_stride_1
31#elif CONV_STRIDE_X == 2
32#define convolution1x3 convolution1x3_stride_2
33#elif CONV_STRIDE_X == 3
34#define convolution1x3 convolution1x3_stride_3
35#else /* CONV_STRIDE_X */
36#error "Stride not supported"
37#endif /* CONV_STRIDE_X */
38
39/** Compute a 1D horizontal convolution of size 3 and stride 1 for uchar type.
40 *
41 * @param[in] left_pixel Pointer to the left pixel.
42 * @param[in] left_coeff Weight of the left pixel
43 * @param[in] middle_coeff Weight of the middle pixel
44 * @param[in] right_coeff Weight of the right pixel
45 * @param[in] input_offset Quantized offset of zero point of the input tensor data range
46 * @param[in] weight_offset Quantized offset of zero point of the weights tensor data range
47 *
48 * @return a int2 containing 2 convoluted values.
49 */
50inline int2 convolution1x3_stride_1(__global const uchar *left_pixel,
51 const int left_coeff,
52 const int middle_coeff,
53 const int right_coeff,
54 const int input_offset,
55 const int weight_offset)
56{
57 int4 temp = CONVERT(vload4(0, left_pixel), int4);
58
59 int2 left = CONVERT(temp.s01, int2);
60 int2 middle = CONVERT(temp.s12, int2);
61 int2 right = CONVERT(temp.s23, int2);
62
63 return (left + input_offset) * (int2)(left_coeff + weight_offset) + (middle + input_offset) * (int2)(middle_coeff + weight_offset) + (right + input_offset) * (int2)(right_coeff + weight_offset);
64}
65
66/** Compute a 1D horizontal convolution of size 3 and stride 2 for uchar type.
67 *
68 * @param[in] left_pixel Pointer to the left pixel.
69 * @param[in] left_coeff Weight of the left pixel
70 * @param[in] middle_coeff Weight of the middle pixel
71 * @param[in] right_coeff Weight of the right pixel
72 * @param[in] input_offset Quantized offset of zero point of the input tensor data range
73 * @param[in] weight_offset Quantized offset of zero point of the weights tensor data range
74 *
75 * @return a int2 containing 2 convoluted values.
76 */
77inline int2 convolution1x3_stride_2(__global const uchar *left_pixel,
78 const int left_coeff,
79 const int middle_coeff,
80 const int right_coeff,
81 const int input_offset,
82 const int weight_offset)
83{
84 int4 temp0 = CONVERT(vload4(0, left_pixel), int4);
85 int temp1 = CONVERT(*(left_pixel + 4 * sizeof(uchar)), int);
86
87 int2 left = CONVERT(temp0.s02, int2);
88 int2 middle = CONVERT(temp0.s13, int2);
89 int2 right = CONVERT((int2)(temp0.s2, temp1), int2);
90
91 return (left + input_offset) * (int2)(left_coeff + weight_offset) + (middle + input_offset) * (int2)(middle_coeff + weight_offset) + (right + input_offset) * (int2)(right_coeff + weight_offset);
92}
93
94/** Compute a 1D horizontal convolution of size 3 and stride 3 for uchar type.
95 *
96 * @param[in] left_pixel Pointer to the left pixel.
97 * @param[in] left_coeff Weight of the left pixel
98 * @param[in] middle_coeff Weight of the middle pixel
99 * @param[in] right_coeff Weight of the right pixel
100 * @param[in] input_offset Quantized offset of zero point of the input tensor data range
101 * @param[in] weight_offset Quantized offset of zero point of the weights tensor data range
102 *
103 * @return a int2 containing 2 convoluted values.
104 */
105inline int2 convolution1x3_stride_3(__global const uchar *left_pixel,
106 const int left_coeff,
107 const int middle_coeff,
108 const int right_coeff,
109 const int input_offset,
110 const int weight_offset)
111{
112 int4 temp0 = CONVERT(vload4(0, left_pixel), int4);
113 int2 temp1 = CONVERT(vload2(0, (left_pixel + 4 * sizeof(uchar))), int2);
114
115 int2 left = CONVERT(temp0.s03, int2);
116 int2 middle = CONVERT((int2)(temp0.s1, temp1.s0), int2);
117 int2 right = CONVERT((int2)(temp0.s2, temp1.s1), int2);
118
119 return (left + input_offset) * (int2)(left_coeff + weight_offset) + (middle + input_offset) * (int2)(middle_coeff + weight_offset) + (right + input_offset) * (int2)(right_coeff + weight_offset);
120}
121
122/** Apply a 3x3 convolution matrix to a single channel QASYMM8 input image and return the result.
123 *
124 * Convolution matrix layout:
125 *
126 * [ mat0, mat1, mat2 ]\n
127 * [ mat3, mat4, mat5 ]\n
128 * [ mat6, mat7, mat8 ]\n
129 *
130 * @param[in] src A pointer to source Image structure
131 * @param[in] mat0 Coefficient from the convolution matrix
132 * @param[in] mat1 Coefficient from the convolution matrix
133 * @param[in] mat2 Coefficient from the convolution matrix
134 * @param[in] mat3 Coefficient from the convolution matrix
135 * @param[in] mat4 Coefficient from the convolution matrix
136 * @param[in] mat5 Coefficient from the convolution matrix
137 * @param[in] mat6 Coefficient from the convolution matrix
138 * @param[in] mat7 Coefficient from the convolution matrix
139 * @param[in] mat8 Coefficient from the convolution matrix
140 * @param[in] input_offset Quantized offset of zero point of the input tensor data range
141 * @param[in] weight_offset Quantized offset of zero point of the weights tensor data range
142 * @param[in] output_offset Quantized offset of zero point of the output tensor data range
143 * @param[in] output_multiplier Output scale multiplier
144 * @param[in] output_shift Output scale divisor exponent
145 * @param[in] bias (Optional) Bias value
146 *
147 * @return a uchar2 containing 2 convoluted values.
148 */
149inline uchar2 convolution3x3(
150 Image *src,
151 const uchar mat0, const uchar mat1, const uchar mat2,
152 const uchar mat3, const uchar mat4, const uchar mat5,
153 const uchar mat6, const uchar mat7, const uchar mat8,
154 const int input_offset, const int weight_offset, const int output_offset,
155 const int output_multiplier, const int output_shift
156#if defined(HAS_BIAS)
157 ,
158 const int bias
159#endif //defined(HAS_BIAS)
160)
161{
162 int2 pixels;
163
164 pixels = convolution1x3(offset(src, 0, 0), mat0, mat1, mat2, input_offset, weight_offset);
165 pixels += convolution1x3(offset(src, 0, 1), mat3, mat4, mat5, input_offset, weight_offset);
166 pixels += convolution1x3(offset(src, 0, 2), mat6, mat7, mat8, input_offset, weight_offset);
167#if defined(HAS_BIAS)
168 pixels += (int2)(bias);
169#endif //defined(HAS_BIAS)
170
171 pixels = ASYMM_MULT_BY_QUANT_MULTIPLIER_LESS_THAN_ONE(pixels, output_multiplier, output_shift, 2);
172 pixels = pixels + output_offset;
173 pixels = clamp(pixels, 0, 255);
174
175 return CONVERT(pixels, uchar2);
176}
177
178/** This function computes the horizontal integral of the image.
179 *
180 * @param[in] src_ptr Pointer to the source image. Supported data types: QASYMM8
181 * @param[in] src_stride_x Stride of the source image in X dimension (in bytes)
182 * @param[in] src_step_x src_stride_x * number of elements along X processed per workitem(in bytes)
183 * @param[in] src_stride_y Stride of the source image in Y dimension (in bytes)
184 * @param[in] src_step_y src_stride_y * number of elements along Y processed per workitem(in bytes)
185 * @param[in] src_offset_first_element_in_bytes The offset of the first element in the source image
186 * @param[in] src_stride_z Stride of the source tensor in Z dimension (in bytes)
187 * @param[in] src_step_z src_stride_z * number of elements along Y processed per workitem(in bytes)
188 * @param[in] dst_ptr Pointer to the destination tensor. Supported data types: QASYMM8
189 * @param[in] dst_stride_x Stride of the destination tensor in X dimension (in bytes)
190 * @param[in] dst_step_x dst_stride_x * number of elements along X processed per workitem(in bytes)
191 * @param[in] dst_stride_y Stride of the destination tensor in Y dimension (in bytes)
192 * @param[in] dst_step_y dst_stride_y * number of elements along Y processed per workitem(in bytes)
193 * @param[in] dst_stride_z Stride of the destination tensor in Z dimension (in bytes)
194 * @param[in] dst_step_z dst_stride_z * number of elements along Y processed per workitem(in bytes)
195 * @param[in] dst_offset_first_element_in_bytes The offset of the first element in the destination tensor
196 * @param[in] weights_ptr Pointer to the weights tensor. Supported data types: QASYMM8
197 * @param[in] weights_stride_x Stride of the weights tensor in X dimension (in bytes)
198 * @param[in] weights_step_x weights_stride_x * number of elements along X processed per workitem(in bytes)
199 * @param[in] weights_stride_y Stride of the weights tensor in Y dimension (in bytes)
200 * @param[in] weights_step_y weights_stride_y * number of elements along Y processed per workitem(in bytes)
201 * @param[in] weights_stride_z Stride of the weights tensor in Z dimension (in bytes)
202 * @param[in] weights_step_z weights_stride_z * number of elements along Y processed per workitem(in bytes)
203 * @param[in] weights_offset_first_element_in_bytes The offset of the first element in the weights tensor
204 * @param[in] biases_ptr (Optional) Pointer to the biases vector. Supported data types: QASYMM8
205 * @param[in] biases_stride_x (Optional) Stride of the biases vector in X dimension (in bytes)
206 * @param[in] biases_step_x (Optional) biases_stride_x * number of elements along X processed per workitem(in bytes)
207 * @param[in] biases_offset_first_element_in_bytes (Optional) The offset of the first element in the biases vector
208 * @param[in] input_offset Quantized offset of zero point of the input tensor data range
209 * @param[in] weight_offset Quantized offset of zero point of the weights tensor data range
210 * @param[in] output_offset Quantized offset of zero point of the output tensor data range
211 * @param[in] output_multiplier Output scale multiplier
212 * @param[in] output_shift Output scale divisor exponent
213 */
214
215__kernel void depthwise_convolution_3x3_quantized(
216 TENSOR3D_DECLARATION(src),
217 TENSOR3D_DECLARATION(dst),
218 TENSOR3D_DECLARATION(weights),
219#if defined(HAS_BIAS)
220 VECTOR_DECLARATION(biases),
221#endif //defined(HAS_BIAS)
222 int input_offset,
223 int weight_offset,
224 int output_offset,
225 int output_multiplier,
226 int output_shift)
227{
228 Image src = CONVERT_TENSOR3D_TO_IMAGE_STRUCT(src);
229 Image dst = CONVERT_TENSOR3D_TO_IMAGE_STRUCT(dst);
230 Tensor3D weights = CONVERT_TO_TENSOR3D_STRUCT(weights);
231#if defined(HAS_BIAS)
232 Vector biases = CONVERT_TO_VECTOR_STRUCT_NO_STEP(biases);
233#endif //defined(HAS_BIAS)
234
235 uchar3 offset = (uchar3)(0, 1, 2) * (uchar3)weights_stride_y;
236 uchar3 weights_values0 = vload3(0, weights.ptr + offset.s0);
237 uchar3 weights_values1 = vload3(0, weights.ptr + offset.s1);
238 uchar3 weights_values2 = vload3(0, weights.ptr + offset.s2);
239
240#if defined(HAS_BIAS)
241 int bias_value = *((__global int *)(vector_offset(&biases, get_global_id(2))));
242#endif //defined(HAS_BIAS)
243
244 uchar2 pixels = convolution3x3(&src, weights_values0.s0, weights_values0.s1, weights_values0.s2,
245 weights_values1.s0, weights_values1.s1, weights_values1.s2,
246 weights_values2.s0, weights_values2.s1, weights_values2.s2,
247 input_offset, weight_offset, output_offset,
248 output_multiplier, output_shift
249#if defined(HAS_BIAS)
250 ,
251 bias_value
252#endif //defined(HAS_BIAS)
253 );
254
255 vstore2(pixels, 0, dst.ptr);
256}
257
258#endif //defined(CONV_STRIDE_X)