blob: 65000f2de2e72c3d1b941802ab82a56bb534012c [file] [log] [blame]
Anthony Barbier7068f992017-10-26 15:23:08 +01001/*
2 * Copyright (c) 2017 ARM Limited.
3 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24layout(local_size_x = LOCAL_SIZE_X, local_size_y = LOCAL_SIZE_Y, local_size_z = LOCAL_SIZE_Z) in;
25#include "helpers.h"
26
27#ifdef DATA_TYPE_FP32
28precision highp float;
29
30layout(std140) uniform shader_params
31{
32 TENSOR3D_PARAM_DECLARATION(src);
33 TENSOR3D_PARAM_DECLARATION(dst);
34};
35
36BUFFER_DECLARATION(src, 1, float, readonly);
37BUFFER_DECLARATION(dst, 2, float, writeonly);
38
39/** This kernel concatenates the input tensor into the output tensor along the third dimension
40 *
41 * @param[in] src_ptr Pointer to the source tensor. Supported data types: F32
42 * @param[in] src_stride_x Stride of the source tensor in X dimension (in bytes)
43 * @param[in] src_step_x src_stride_x * number of elements along X processed per workitem(in bytes)
44 * @param[in] src_stride_y Stride of the source tensor in Y dimension (in bytes)
45 * @param[in] src_step_y src_stride_y * number of elements along Y processed per workitem(in bytes)
46 * @param[in] src_stride_z Stride of the source tensor in Z dimension (in bytes)
47 * @param[in] src_step_z src_stride_z * number of elements along Z processed per workitem(in bytes)
48 * @param[in] src_offset_first_element_in_bytes The offset of the first element in the source tensor
49 * @param[out] dst_ptr Pointer to the destination tensor. Supported data types: same as @p src_ptr
50 * @param[in] dst_stride_x Stride of the destination tensor in X dimension (in bytes)
51 * @param[in] dst_step_x dst_stride_x * number of elements along X processed per workitem(in bytes)
52 * @param[in] dst_stride_y Stride of the destination tensor in Y dimension (in bytes)
53 * @param[in] dst_step_y dst_stride_y * number of elements along Y processed per workitem(in bytes)
54 * @param[in] dst_stride_z Stride of the source tensor in Z dimension (in bytes)
55 * @param[in] dst_step_z dst_stride_z * number of elements along Z processed per workitem(in bytes)
56 * @param[in] dst_offset_first_element_in_bytes The offset of the first element in the destination tensor
57 */
58void main(void)
59{
60 Tensor3D src = CONVERT_TO_TENSOR3D_STRUCT(src);
61 Tensor3D dst = CONVERT_TO_TENSOR3D_STRUCT(dst);
62
63 dst_ptr[dst.current_offset + uint(OFFSETS_Z >> 2)] = src_ptr[tensor3D_offset(src, -OFFSETS_X, -OFFSETS_Y, 0)];
64}
65
66#elif defined(DATA_TYPE_FP16)
67precision mediump float;
68
69layout(std140) uniform shader_params
70{
71 TENSOR3D_PARAM_DECLARATION(src);
72 TENSOR3D_PARAM_DECLARATION(dst);
73};
74
75BUFFER_DECLARATION(src, 1, uvec2, readonly);
76BUFFER_DECLARATION(dst, 2, uvec2, writeonly);
77
78/** This kernel concatenates the input tensor into the output tensor along the third dimension
79 *
80 * @param[in] src_ptr Pointer to the source tensor. Supported data types: F16
81 * @param[in] src_stride_x Stride of the source tensor in X dimension (in bytes)
82 * @param[in] src_step_x src_stride_x * number of elements along X processed per workitem(in bytes)
83 * @param[in] src_stride_y Stride of the source tensor in Y dimension (in bytes)
84 * @param[in] src_step_y src_stride_y * number of elements along Y processed per workitem(in bytes)
85 * @param[in] src_stride_z Stride of the source tensor in Z dimension (in bytes)
86 * @param[in] src_step_z src_stride_z * number of elements along Z processed per workitem(in bytes)
87 * @param[in] src_offset_first_element_in_bytes The offset of the first element in the source tensor
88 * @param[out] dst_ptr Pointer to the destination tensor. Supported data types: same as @p src_ptr
89 * @param[in] dst_stride_x Stride of the destination tensor in X dimension (in bytes)
90 * @param[in] dst_step_x dst_stride_x * number of elements along X processed per workitem(in bytes)
91 * @param[in] dst_stride_y Stride of the destination tensor in Y dimension (in bytes)
92 * @param[in] dst_step_y dst_stride_y * number of elements along Y processed per workitem(in bytes)
93 * @param[in] dst_stride_z Stride of the source tensor in Z dimension (in bytes)
94 * @param[in] dst_step_z dst_stride_z * number of elements along Z processed per workitem(in bytes)
95 * @param[in] dst_offset_first_element_in_bytes The offset of the first element in the destination tensor
96 */
97void main(void)
98{
99 Tensor3D src = GC_CONVERT_TO_TENSOR3D_STRUCT(src);
100 Tensor3D dst = GC_CONVERT_TO_TENSOR3D_STRUCT(dst);
101
102 uvec2 packed_s;
103 GC_LOAD1_3D_OFFSET(packed_s, src, -OFFSETS_X, -OFFSETS_Y, 0);
104 dst_ptr[(dst.current_offset + uint(OFFSETS_Z)) >> 3] = packed_s;
105}
106#endif /*DATA_TYPE_FP32*/