blob: 071c1858bca983726dab86b540bf49d471ca7cf5 [file] [log] [blame]
Anthony Barbier7068f992017-10-26 15:23:08 +01001/*
2 * Copyright (c) 2017 ARM Limited.
3 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24
25layout(local_size_x = LOCAL_SIZE_X, local_size_y = LOCAL_SIZE_Y, local_size_z = LOCAL_SIZE_Z) in;
26
Joel Liangf1f3ebd2017-11-10 09:59:19 +080027#include "helpers_cs.h"
Anthony Barbier7068f992017-10-26 15:23:08 +010028
Joel Liangf1f3ebd2017-11-10 09:59:19 +080029#if defined(DATA_TYPE_FP16)
30precision mediump float;
31#endif // DATA_TYPE_FP16
32
33/** This kernel performs a direct convolution to convolve the low three dimensions.
34 *
35 * @note The data type must be passed at compile time using "#define DATA_TYPE_NAME". e.g. "#define DATA_TYPE_FP32"
36 * @note The convolution stride x must be passed at compile time using "#define STRIDE_X n" e.g. "#define STRIDE_X 1"
37 * @note In case biases will be added to the convolution "#define HAS_BIAS" has to be passed to append the final matrix with 1 in each row.
38 *
39 * @param[in] src_ptr Pointer to the source tensor. Supported data types: F16/F32
40 * @param[in] src_attrs The attributes of the source tensor
41 * @param[out] dst_ptr Pointer to the destination tensor. Supported data types: same as @p src_ptr
42 * @param[in] dst_attrs The attributes of the destination tensor
43 * @param[in] weights_ptr Pointer to the weights tensor. Supported data types: same as @p src_ptr
44 * @param[in] weights_attrs The attributes of the weights tensor
45 * @param[in] biases_ptr Pointer to the biases tensor. Same as @p src_ptr
46 * @param[in] biases_attrs The attributes of the weights tensor
47 * @param[in] weights_stride_w Stride of the weights tensor in the 4th dimension
48 * @param[in] weights_depth The third dimensions of the weights tensors
49 */
50SHADER_PARAMS_DECLARATION
Anthony Barbier7068f992017-10-26 15:23:08 +010051{
Joel Liangf1f3ebd2017-11-10 09:59:19 +080052 Tensor3DAttributes src_attrs;
53 Tensor3DAttributes dst_attrs;
54 Tensor3DAttributes weights_attrs;
Anthony Barbier7068f992017-10-26 15:23:08 +010055#ifdef BIAS
Joel Liangf1f3ebd2017-11-10 09:59:19 +080056 VectorAttributes biases_attrs;
Anthony Barbier7068f992017-10-26 15:23:08 +010057#endif /* BIAS */
58 uint weights_stride_w;
59 uint weights_depth;
60};
61
62#if defined(DATA_TYPE_FP32)
Joel Liangf1f3ebd2017-11-10 09:59:19 +080063TENSOR_DECLARATION(1, srcBuffer, float, src_ptr, src_shift, 2, readonly);
64TENSOR_DECLARATION(2, dstBuffer, float, dst_ptr, dst_shift, 2, writeonly);
65TENSOR_DECLARATION(3, weightsBuffer, float, weights_ptr, weights_shift, 2, readonly);
Anthony Barbier7068f992017-10-26 15:23:08 +010066#ifdef BIAS
Joel Liangf1f3ebd2017-11-10 09:59:19 +080067TENSOR_DECLARATION(4, biasesBuffer, float, biases_ptr, biases_shift, 2, readonly);
Anthony Barbier7068f992017-10-26 15:23:08 +010068#endif /* BIAS */
69
Anthony Barbier7068f992017-10-26 15:23:08 +010070void main()
71{
Joel Liangf1f3ebd2017-11-10 09:59:19 +080072 ImageIterator src_iter = CONVERT_TO_IMAGE_ITERATOR(src_attrs, src_shift);
73 Tensor3DIterator weights_iter = CONVERT_TO_TENSOR3D_ITERATOR_NO_STEP(weights_attrs, weights_shift);
74 Tensor3DIterator dst_iter = CONVERT_TO_TENSOR3D_ITERATOR(dst_attrs, dst_shift);
Anthony Barbier7068f992017-10-26 15:23:08 +010075
76#ifdef BIAS
Joel Liangf1f3ebd2017-11-10 09:59:19 +080077 VectorIterator biases_iter = CONVERT_TO_VECTOR_ITERATOR_NO_STEP(biases_attrs, biases_shift);
Anthony Barbier7068f992017-10-26 15:23:08 +010078#endif /* BIAS */
79
Joel Liangf1f3ebd2017-11-10 09:59:19 +080080 float pixels = 0.f;
Anthony Barbier7068f992017-10-26 15:23:08 +010081 uint z_index = gl_GlobalInvocationID.z;
Joel Liangf1f3ebd2017-11-10 09:59:19 +080082 TENSOR_ITERATOR_ADVANCE_IN_BYTES(weights_iter, z_index * weights_stride_w);
83
Anthony Barbier7068f992017-10-26 15:23:08 +010084 float temp;
85 float temp_weight;
Anthony Barbier7068f992017-10-26 15:23:08 +010086 for(int d = 0; d < int(weights_depth); ++d)
87 {
Joel Liangf1f3ebd2017-11-10 09:59:19 +080088 temp = LOAD_CURRENT_ITEM(src_ptr, src_iter);
89 temp_weight = LOAD_CURRENT_ITEM(weights_ptr, weights_iter);
Anthony Barbier7068f992017-10-26 15:23:08 +010090 pixels += temp * temp_weight;
91
Joel Liangf1f3ebd2017-11-10 09:59:19 +080092 TENSOR_ITERATOR_ADVANCE_IN_BYTES(src_iter, src_attrs.stride_z);
93 TENSOR_ITERATOR_ADVANCE_IN_BYTES(weights_iter, weights_attrs.stride_z);
Anthony Barbier7068f992017-10-26 15:23:08 +010094 }
95
96#ifdef BIAS
Joel Liangf1f3ebd2017-11-10 09:59:19 +080097 pixels += LOAD(biases_ptr, VECTOR_OFFSET(biases_iter, z_index));
Anthony Barbier7068f992017-10-26 15:23:08 +010098#endif /* BIAS */
99
Joel Liangf1f3ebd2017-11-10 09:59:19 +0800100 STORE_CURRENT_ITEM(dst_ptr, dst_iter, pixels);
Anthony Barbier7068f992017-10-26 15:23:08 +0100101}
102#elif defined(DATA_TYPE_FP16)
Anthony Barbier7068f992017-10-26 15:23:08 +0100103
Joel Liangf1f3ebd2017-11-10 09:59:19 +0800104TENSOR_DECLARATION(1, srcBuffer, uvec4, src_ptr, src_shift, 4, readonly);
105TENSOR_DECLARATION(2, dstBuffer, uvec4, dst_ptr, dst_shift, 4, writeonly);
106TENSOR_DECLARATION(3, weightsBuffer, uint, weights_ptr, weights_shift, 2, readonly);
Anthony Barbier7068f992017-10-26 15:23:08 +0100107#ifdef BIAS
Joel Liangf1f3ebd2017-11-10 09:59:19 +0800108TENSOR_DECLARATION(4, biasesBuffer, uint, biases_ptr, biases_shift, 2, readonly);
Anthony Barbier7068f992017-10-26 15:23:08 +0100109#endif /* BIAS */
110
111#if STRIDE_X == 2
112#define CONVOLVE(s, w) convolve_stride2(s, w)
113#elif STRIDE_X == 1 /* STRIDE_X == 1 */
114#define CONVOLVE(s, w) convolve_stride1(s, w)
115#else /* STRIDE_X not equals 1 or 2 */
116#error STRIDE_X larger than 2 is not supported
117#endif /* STRIDE_X == 2 */
118
Joel Liangf1f3ebd2017-11-10 09:59:19 +0800119vec4[2] convolve_stride1(ImageIterator src_iter, float w)
Anthony Barbier7068f992017-10-26 15:23:08 +0100120{
Joel Liangf1f3ebd2017-11-10 09:59:19 +0800121 vec4 s[2];
122 s = LOAD_UNPACK8_CURRENT_ITEM_HALF(src_ptr, src_iter);
Anthony Barbier7068f992017-10-26 15:23:08 +0100123
124 s[0] *= w;
125 s[1] *= w;
126
127 return s;
128}
129
Joel Liangf1f3ebd2017-11-10 09:59:19 +0800130vec4[2] convolve_stride2(ImageIterator src_iter, float w)
Anthony Barbier7068f992017-10-26 15:23:08 +0100131{
Joel Liangf1f3ebd2017-11-10 09:59:19 +0800132 vec4 s[2];
133 vec4 r[2];
Anthony Barbier7068f992017-10-26 15:23:08 +0100134
Joel Liangf1f3ebd2017-11-10 09:59:19 +0800135 s = LOAD_UNPACK8_CURRENT_ITEM_HALF(src_ptr, src_iter);
Anthony Barbier7068f992017-10-26 15:23:08 +0100136 r[0] = vec4(s[0].xz, s[1].xz);
Joel Liangf1f3ebd2017-11-10 09:59:19 +0800137 s = LOAD_UNPACK8_HALF(src_ptr, IMAGE_OFFSET(src_iter, 8, 0));
Anthony Barbier7068f992017-10-26 15:23:08 +0100138 r[1] = vec4(s[0].xz, s[1].xz);
139
140 r[0] *= w;
141 r[1] *= w;
142
143 return r;
144}
145
Anthony Barbier7068f992017-10-26 15:23:08 +0100146void main()
147{
Joel Liangf1f3ebd2017-11-10 09:59:19 +0800148 ImageIterator src_iter = CONVERT_TO_IMAGE_ITERATOR(src_attrs, src_shift);
149 Tensor3DIterator weights_iter = CONVERT_TO_TENSOR3D_ITERATOR_NO_STEP(weights_attrs, weights_shift);
150 Tensor3DIterator dst_iter = CONVERT_TO_TENSOR3D_ITERATOR(dst_attrs, dst_shift);
Anthony Barbier7068f992017-10-26 15:23:08 +0100151
152#ifdef BIAS
Joel Liangf1f3ebd2017-11-10 09:59:19 +0800153 VectorIterator biases_iter = CONVERT_TO_VECTOR_ITERATOR_NO_STEP(biases_attrs, biases_shift);
Anthony Barbier7068f992017-10-26 15:23:08 +0100154#endif /* BIAS */
155
156 vec4 pixels[2];
157 pixels[0] = vec4(0.f);
158 pixels[1] = vec4(0.f);
159
160 uint z_index = gl_GlobalInvocationID.z;
Joel Liangf1f3ebd2017-11-10 09:59:19 +0800161 TENSOR_ITERATOR_ADVANCE_IN_BYTES(weights_iter, z_index * weights_stride_w);
Anthony Barbier7068f992017-10-26 15:23:08 +0100162
Anthony Barbier7068f992017-10-26 15:23:08 +0100163 float w;
Anthony Barbier7068f992017-10-26 15:23:08 +0100164 for(int d = 0; d < int(weights_depth); ++d)
165 {
Joel Liangf1f3ebd2017-11-10 09:59:19 +0800166 w = LOAD_UNPACK2_CURRENT_ITEM_HALF(weights_ptr, weights_iter).x;
Anthony Barbier7068f992017-10-26 15:23:08 +0100167
Joel Liangf1f3ebd2017-11-10 09:59:19 +0800168 vec4 r[2] = CONVOLVE(src_iter, w);
Anthony Barbier7068f992017-10-26 15:23:08 +0100169 pixels[0] += r[0];
170 pixels[1] += r[1];
171
Joel Liangf1f3ebd2017-11-10 09:59:19 +0800172 TENSOR_ITERATOR_ADVANCE_IN_BYTES(src_iter, src_attrs.stride_z);
173 TENSOR_ITERATOR_ADVANCE_IN_BYTES(weights_iter, weights_attrs.stride_z);
Anthony Barbier7068f992017-10-26 15:23:08 +0100174 }
175
176#ifdef BIAS
Joel Liangf1f3ebd2017-11-10 09:59:19 +0800177 vec2 vec2_b;
Anthony Barbier7068f992017-10-26 15:23:08 +0100178 float b;
179
Joel Liangf1f3ebd2017-11-10 09:59:19 +0800180 vec2_b = LOAD_UNPACK2_HALF(biases_ptr, VECTOR_OFFSET(biases_iter, z_index));
Anthony Barbier7068f992017-10-26 15:23:08 +0100181
182 if(z_index % uint(2) == uint(0))
183 {
Joel Liangf1f3ebd2017-11-10 09:59:19 +0800184 b = vec2_b.x;
Anthony Barbier7068f992017-10-26 15:23:08 +0100185 }
186 else
187 {
Joel Liangf1f3ebd2017-11-10 09:59:19 +0800188 b = vec2_b.y;
Anthony Barbier7068f992017-10-26 15:23:08 +0100189 }
190
Joel Liangf1f3ebd2017-11-10 09:59:19 +0800191 pixels[0] += b;
192 pixels[1] += b;
Anthony Barbier7068f992017-10-26 15:23:08 +0100193#endif /* BIAS */
194
Joel Liangf1f3ebd2017-11-10 09:59:19 +0800195 STORE_PACK8_CURRENT_ITEM_HALF(dst_ptr, dst_iter, pixels);
Anthony Barbier7068f992017-10-26 15:23:08 +0100196}
197#else /* DATA_TYPE_FP32 */
198#error Data type not supported
199#endif /* DATA_TYPE_FP32 */