COMPMID-1137 OpenCL concatenate width Change-Id: I40faba421281b1cf080fa6a825d04a4366cdaeb0 Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/130700 Reviewed-by: Anthony Barbier <anthony.barbier@arm.com> Tested-by: Jenkins <bsgcomp@arm.com> Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com>

commit: 55b3d1216b4011d86d5f06335e518dc924987ae5 [log] [tgz]
author: Michalis Spyrou <michalis.spyrou@arm.com> Wed May 09 09:59:23 2018 +0100
committer: Anthony Barbier <anthony.barbier@arm.com> Fri Nov 02 16:52:35 2018 +0000
tree: a94ca420f4385ff301a770fbeb5b4f930ac60105
parent: a8aef2916379402e241d9f2c5e0faf3f99c860f7 [diff] [blame]
diff --git a/src/core/CL/cl_kernels/concatenate.cl b/src/core/CL/cl_kernels/concatenate.cl
index a92ab5b..f97ae13 100644
--- a/src/core/CL/cl_kernels/concatenate.cl
+++ b/src/core/CL/cl_kernels/concatenate.cl

@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -23,6 +23,41 @@
  */
 #include "helpers.h"
 
+/** This kernel concatenates the input tensor into the output tensor along the first dimension
+ *
+ * @param[in]  src_ptr                           Pointer to the source tensor. Supported data types: QS8, QASYMM8, QS16, F16, F32
+ * @param[in]  src_stride_x                      Stride of the source tensor in X dimension (in bytes)
+ * @param[in]  src_step_x                        src_stride_x * number of elements along X processed per workitem(in bytes)
+ * @param[in]  src_stride_y                      Stride of the source tensor in Y dimension (in bytes)
+ * @param[in]  src_step_y                        src_stride_y * number of elements along Y processed per workitem(in bytes)
+ * @param[in]  src_stride_z                      Stride of the source tensor in Z dimension (in bytes)
+ * @param[in]  src_step_z                        src_stride_z * number of elements along Z processed per workitem(in bytes)
+ * @param[in]  src_offset_first_element_in_bytes The offset of the first element in the source tensor
+ * @param[out] dst_ptr                           Pointer to the destination tensor. Supported data types: same as @p src_ptr
+ * @param[in]  dst_stride_x                      Stride of the destination tensor in X dimension (in bytes)
+ * @param[in]  dst_step_x                        dst_stride_x * number of elements along X processed per workitem(in bytes)
+ * @param[in]  dst_stride_y                      Stride of the destination tensor in Y dimension (in bytes)
+ * @param[in]  dst_step_y                        dst_stride_y * number of elements along Y processed per workitem(in bytes)
+ * @param[in]  dst_stride_z                      Stride of the source tensor in Z dimension (in bytes)
+ * @param[in]  dst_step_z                        dst_stride_z * number of elements along Z processed per workitem(in bytes)
+ * @param[in]  dst_offset_first_element_in_bytes The offset of the first element in the destination tensor
+ * @param[in]  offset                            The offset to the first valid element of the output tensor in bytes
+ */
+__kernel void concatenate_width(
+    TENSOR3D_DECLARATION(src),
+    TENSOR3D_DECLARATION(dst),
+    int offset)
+{
+    Tensor3D src = CONVERT_TO_TENSOR3D_STRUCT(src);
+    Tensor3D dst = CONVERT_TO_TENSOR3D_STRUCT(dst);
+
+    VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE)
+    source_values = VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)src.ptr);
+
+    VSTORE(VEC_SIZE)
+    (source_values, 0, (__global DATA_TYPE *)(dst.ptr + offset));
+}
+
 /** This kernel concatenates the input tensor into the output tensor along the third dimension
  *
  * @param[in]  src_ptr                           Pointer to the source tensor. Supported data types: QS8, QS16, F16, F32
commit	55b3d1216b4011d86d5f06335e518dc924987ae5	[log] [tgz]
author	Michalis Spyrou <michalis.spyrou@arm.com>	Wed May 09 09:59:23 2018 +0100
committer	Anthony Barbier <anthony.barbier@arm.com>	Fri Nov 02 16:52:35 2018 +0000
tree	a94ca420f4385ff301a770fbeb5b4f930ac60105
parent	a8aef2916379402e241d9f2c5e0faf3f99c860f7 [diff] [blame]