Blame - src/core/CL/cl_kernels/concatenate.cl - ml/ComputeLibrary

2017-09-04 18:44:23 +0100

[diff] [blame]

1

/*

Michalis Spyrou

2018-05-09 09:59:23 +0100

[diff] [blame]

2

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

3

*

4

* SPDX-License-Identifier: MIT

5

*

6

* Permission is hereby granted, free of charge, to any person obtaining a copy

7

* of this software and associated documentation files (the "Software"), to

8

* deal in the Software without restriction, including without limitation the

9

* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or

10

* sell copies of the Software, and to permit persons to whom the Software is

11

* furnished to do so, subject to the following conditions:

12

*

13

* The above copyright notice and this permission notice shall be included in all

14

* copies or substantial portions of the Software.

15

*

16

* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR

17

* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,

18

* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE

19

* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER

20

* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,

21

* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE

* SOFTWARE.

*/

#include "helpers.h"

Michele Di Giorgio

2018-10-19 15:46:19 +0100

[diff] [blame]

26

#if defined(DATA_TYPE) && defined(VEC_SIZE)

27

Michele Di Giorgio

27400b9

2018-11-01 13:44:05 +0000

[diff] [blame]

28

#if defined(DEPTH) && defined(ELEMENT_SIZE)

29

30

#if defined(INPUT1_WIDTH)

31

32

#if ELEMENT_SIZE == 1

33

#define COND_DATA_TYPE char

34

#elif ELEMENT_SIZE == 2

35

#define COND_DATA_TYPE short

36

#elif ELEMENT_SIZE == 4

37

#define COND_DATA_TYPE int

38

#else // ELEMENT_SIZE

39

#error "Element size not supported"

40

#endif // ELEMENT_SIZE

41

42

#if VEC_SIZE == 2

43

#define SEQ ((int2)(0, 1))

44

#elif VEC_SIZE == 4

45

#define SEQ ((int4)(0, 1, 2, 3))

46

#elif VEC_SIZE == 8

47

#define SEQ ((int8)(0, 1, 2, 3, 4, 5, 6, 7))

48

#elif VEC_SIZE == 16

49

#define SEQ ((int16)(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15))

50

#else // VEC_SIZE

51

#error "Vector size not supported"

52

#endif // VEC_SIZE

53

/** This kernel concatenates two input tensors into the output tensor along the first dimension

54

*

55

* @note The data type has to be passed at compile time using -DDATA_TYPE. i.e. -DDATA_TYPE=float

56

* @note Vector size has to be passed at compile time using -DVEC_SIZE. i.e. -DVEC_SIZE=16

57

* @note The offset for the first spatial dimension has to be passed at compile time using -DWIDTH_OFFSET. i.e. -DWIDTH_OFFSET=128

58

* @note Tensor depth should be given as a preprocessor argument using -DDEPTH=size. e.g. -DDEPTH=16

59

* @note First input tensor width should be given as a preprocessor argument using -DINPUT1_WIDTH=width. e.g. -DINPUT1_WIDTH=8

60

*

61

* @param[in] src1_ptr Pointer to the source tensor. Supported data types: U8/S8/QASYMM8/U16/S16/F16/U32/F32

62

* @param[in] src1_stride_x Stride of the source tensor in X dimension (in bytes)

63

* @param[in] src1_step_x src_stride_x * number of elements along X processed per workitem(in bytes)

64

* @param[in] src1_stride_y Stride of the source tensor in Y dimension (in bytes)

65

* @param[in] src1_step_y src_stride_y * number of elements along Y processed per workitem(in bytes)

66

* @param[in] src1_stride_z Stride of the source tensor in Z dimension (in bytes)

67

* @param[in] src1_step_z src_stride_z * number of elements along Z processed per workitem(in bytes)

68

* @param[in] src1_stride_w Stride of the first source tensor in Z dimension (in bytes)

69

* @param[in] src1_step_w src_stride_z * number of elements along Z processed per workitem(in bytes)

70

* @param[in] src1_offset_first_element_in_bytes The offset of the first element in the source tensor

71

* @param[in] src2_ptr Pointer to the source tensor. Supported data types: same as @p src1_ptr

72

* @param[in] src2_stride_x Stride of the source tensor in X dimension (in bytes)

73

* @param[in] src2_step_x src_stride_x * number of elements along X processed per workitem(in bytes)

74

* @param[in] src2_stride_y Stride of the source tensor in Y dimension (in bytes)

75

* @param[in] src2_step_y src_stride_y * number of elements along Y processed per workitem(in bytes)

76

* @param[in] src2_stride_z Stride of the source tensor in Z dimension (in bytes)

77

* @param[in] src2_step_z src_stride_z * number of elements along Z processed per workitem(in bytes)

78

* @param[in] src2_stride_w Stride of the first source tensor in Z dimension (in bytes)

79

* @param[in] src2_step_w src_stride_z * number of elements along Z processed per workitem(in bytes)

80

* @param[in] src2_offset_first_element_in_bytes The offset of the first element in the source tensor

81

* @param[out] dst_ptr Pointer to the destination tensor. Supported data types: same as @p src1_ptr

82

* @param[in] dst_stride_x Stride of the destination tensor in X dimension (in bytes)

83

* @param[in] dst_step_x dst_stride_x * number of elements along X processed per workitem(in bytes)

84

* @param[in] dst_stride_y Stride of the destination tensor in Y dimension (in bytes)

85

* @param[in] dst_step_y dst_stride_y * number of elements along Y processed per workitem(in bytes)

86

* @param[in] dst_stride_z Stride of the source tensor in Z dimension (in bytes)

87

* @param[in] dst_step_z dst_stride_z * number of elements along Z processed per workitem(in bytes)

88

* @param[in] dst_stride_w Stride of the destination tensor in Z dimension (in bytes)

89

* @param[in] dst_step_w output_stride_z * number of elements along Z processed per workitem(in bytes)

90

* @param[in] dst_offset_first_element_in_bytes The offset of the first element in the destination tensor

91

*/

92

__kernel void concatenate_width_x2(

93

TENSOR4D_DECLARATION(src1),

94

TENSOR4D_DECLARATION(src2),

95

TENSOR4D_DECLARATION(dst))

96

{

97

Tensor4D dst = CONVERT_TO_TENSOR4D_STRUCT(dst, DEPTH);

98

99

// Calculate input indices

100

const int x = get_global_id(0) * (int)VEC_SIZE;

101

const int y = get_global_id(1);

102

const int z = get_global_id(2) % (int)DEPTH;

103

const int w = get_global_id(2) / (int)DEPTH;

104

const int x1 = min(x, (int)INPUT1_WIDTH);

105

const int x2 = max(x - (int)INPUT1_WIDTH, -(int)VEC_SIZE);

106

107

// Calculate inputs and output addresses

108

const __global uchar *in1_ptr = src1_ptr + (int)src1_offset_first_element_in_bytes + x1 * (int)src1_stride_x + y * (int)src1_stride_y + z * (int)src1_stride_z + w * (int)src1_stride_w;

109

const __global uchar *in2_ptr = src2_ptr + (int)src2_offset_first_element_in_bytes + x2 * (int)src2_stride_x + y * (int)src2_stride_y + z * (int)src2_stride_z + w * (int)src2_stride_w;

110

111

const VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE) src1_values = VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)in1_ptr);

112

const VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE) src2_values = VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)in2_ptr);

113

114

const VEC_DATA_TYPE(int, VEC_SIZE) x_coords = SEQ + (VEC_DATA_TYPE(int, VEC_SIZE))(x);

115

const VEC_DATA_TYPE(COND_DATA_TYPE, VEC_SIZE) cond = CONVERT(x_coords < (VEC_DATA_TYPE(int, VEC_SIZE))(INPUT1_WIDTH), VEC_DATA_TYPE(COND_DATA_TYPE, VEC_SIZE));

116

const VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE) values = select(src2_values, src1_values, cond);

117

118

VSTORE(VEC_SIZE)

119

(values, 0, (__global DATA_TYPE *)dst.ptr);

120

}

121

122

#if defined(INPUT2_WIDTH) && defined(INPUT3_WIDTH)

123

/** This kernel concatenates four input tensors into the output tensor along the first dimension

124

*

125

* @note The data type has to be passed at compile time using -DDATA_TYPE. i.e. -DDATA_TYPE=float

126

* @note Vector size has to be passed at compile time using -DVEC_SIZE. i.e. -DVEC_SIZE=16

127

* @note The offset for the first spatial dimension has to be passed at compile time using -DWIDTH_OFFSET. i.e. -DWIDTH_OFFSET=128

128

* @note Tensor depth should be given as a preprocessor argument using -DDEPTH=size. e.g. -DDEPTH=16

129

* @note First input tensor width should be given as a preprocessor argument using -DINPUT1_WIDTH=width. e.g. -DINPUT1_WIDTH=8

130

* @note Second input tensor width should be given as a preprocessor argument using -DINPUT2_WIDTH=width. e.g. -DINPUT2_WIDTH=8

131

* @note Third input tensor width should be given as a preprocessor argument using -DINPUT3_WIDTH=width. e.g. -DINPUT3_WIDTH=8

132

*

133

* @param[in] src1_ptr Pointer to the source tensor. Supported data types: U8/S8/QASYMM8/U16/S16/F16/U32/F32

134

* @param[in] src1_stride_x Stride of the source tensor in X dimension (in bytes)

135

* @param[in] src1_step_x src_stride_x * number of elements along X processed per workitem(in bytes)

136

* @param[in] src1_stride_y Stride of the source tensor in Y dimension (in bytes)

137

* @param[in] src1_step_y src_stride_y * number of elements along Y processed per workitem(in bytes)

138

* @param[in] src1_stride_z Stride of the source tensor in Z dimension (in bytes)

139

* @param[in] src1_step_z src_stride_z * number of elements along Z processed per workitem(in bytes)

140

* @param[in] src1_stride_w Stride of the first source tensor in Z dimension (in bytes)

141

* @param[in] src1_step_w src_stride_z * number of elements along Z processed per workitem(in bytes)

142

* @param[in] src1_offset_first_element_in_bytes The offset of the first element in the source tensor

143

* @param[in] src2_ptr Pointer to the source tensor. Supported data types: same as @p src1_ptr

144

* @param[in] src2_stride_x Stride of the source tensor in X dimension (in bytes)

145

* @param[in] src2_step_x src_stride_x * number of elements along X processed per workitem(in bytes)

146

* @param[in] src2_stride_y Stride of the source tensor in Y dimension (in bytes)

147

* @param[in] src2_step_y src_stride_y * number of elements along Y processed per workitem(in bytes)

148

* @param[in] src2_stride_z Stride of the source tensor in Z dimension (in bytes)

149

* @param[in] src2_step_z src_stride_z * number of elements along Z processed per workitem(in bytes)

150

* @param[in] src2_stride_w Stride of the first source tensor in Z dimension (in bytes)

151

* @param[in] src2_step_w src_stride_z * number of elements along Z processed per workitem(in bytes)

152

* @param[in] src2_offset_first_element_in_bytes The offset of the first element in the source tensor

153

* @param[in] src3_ptr Pointer to the source tensor. Supported data types: same as @p src1_ptr

154

* @param[in] src3_stride_x Stride of the source tensor in X dimension (in bytes)

155

* @param[in] src3_step_x src_stride_x * number of elements along X processed per workitem(in bytes)

156

* @param[in] src3_stride_y Stride of the source tensor in Y dimension (in bytes)

157

* @param[in] src3_step_y src_stride_y * number of elements along Y processed per workitem(in bytes)

158

* @param[in] src3_stride_z Stride of the source tensor in Z dimension (in bytes)

159

* @param[in] src3_step_z src_stride_z * number of elements along Z processed per workitem(in bytes)

160

* @param[in] src3_stride_w Stride of the first source tensor in Z dimension (in bytes)

161

* @param[in] src3_step_w src_stride_z * number of elements along Z processed per workitem(in bytes)

162

* @param[in] src3_offset_first_element_in_bytes The offset of the first element in the source tensor

163

* @param[in] src4_ptr Pointer to the source tensor. Supported data types: same as @p src1_ptr

164

* @param[in] src4_stride_x Stride of the source tensor in X dimension (in bytes)

165

* @param[in] src4_step_x src_stride_x * number of elements along X processed per workitem(in bytes)

166

* @param[in] src4_stride_y Stride of the source tensor in Y dimension (in bytes)

167

* @param[in] src4_step_y src_stride_y * number of elements along Y processed per workitem(in bytes)

168

* @param[in] src4_stride_z Stride of the source tensor in Z dimension (in bytes)

169

* @param[in] src4_step_z src_stride_z * number of elements along Z processed per workitem(in bytes)

170

* @param[in] src4_stride_w Stride of the first source tensor in Z dimension (in bytes)

171

* @param[in] src4_step_w src_stride_z * number of elements along Z processed per workitem(in bytes)

172

* @param[in] src4_offset_first_element_in_bytes The offset of the first element in the source tensor

173

* @param[out] dst_ptr Pointer to the destination tensor. Supported data types: same as @p src1_ptr

174

* @param[in] dst_stride_x Stride of the destination tensor in X dimension (in bytes)

175

* @param[in] dst_step_x dst_stride_x * number of elements along X processed per workitem(in bytes)

176

* @param[in] dst_stride_y Stride of the destination tensor in Y dimension (in bytes)

177

* @param[in] dst_step_y dst_stride_y * number of elements along Y processed per workitem(in bytes)

178

* @param[in] dst_stride_z Stride of the source tensor in Z dimension (in bytes)

179

* @param[in] dst_step_z dst_stride_z * number of elements along Z processed per workitem(in bytes)

180

* @param[in] dst_stride_w Stride of the destination tensor in Z dimension (in bytes)

181

* @param[in] dst_step_w output_stride_z * number of elements along Z processed per workitem(in bytes)

182

* @param[in] dst_offset_first_element_in_bytes The offset of the first element in the destination tensor

183

*/

184

__kernel void concatenate_width_x4(

185

TENSOR4D_DECLARATION(src1),

186

TENSOR4D_DECLARATION(src2),

187

TENSOR4D_DECLARATION(src3),

188

TENSOR4D_DECLARATION(src4),

189

TENSOR4D_DECLARATION(dst))

190

{

191

Tensor4D dst = CONVERT_TO_TENSOR4D_STRUCT(dst, DEPTH);

192

193

// Calculate input indices

194

const int x = get_global_id(0) * (int)VEC_SIZE;

195

const int y = get_global_id(1);

196

const int z = get_global_id(2) % (int)DEPTH;

197

const int w = get_global_id(2) / (int)DEPTH;

198

199

const int x1 = min(x, (int)INPUT1_WIDTH);

200

const int x2 = min(max(x - (int)INPUT1_WIDTH, -(int)VEC_SIZE), (int)INPUT2_WIDTH);

201

const int x3 = min(max(x - (int)INPUT1_WIDTH - (int)INPUT2_WIDTH, -(int)VEC_SIZE), (int)INPUT3_WIDTH);

202

const int x4 = max(x - (int)INPUT1_WIDTH - (int)INPUT2_WIDTH - (int)INPUT3_WIDTH, -(int)VEC_SIZE);

203

204

// Calculate inputs and output addresses

205

const __global uchar *in1_ptr = src1_ptr + (int)src1_offset_first_element_in_bytes + x1 * (int)src1_stride_x + y * (int)src1_stride_y + z * (int)src1_stride_z + w * (int)src1_stride_w;

206

const __global uchar *in2_ptr = src2_ptr + (int)src2_offset_first_element_in_bytes + x2 * (int)src2_stride_x + y * (int)src2_stride_y + z * (int)src2_stride_z + w * (int)src2_stride_w;

207

const __global uchar *in3_ptr = src3_ptr + (int)src3_offset_first_element_in_bytes + x3 * (int)src3_stride_x + y * (int)src3_stride_y + z * (int)src3_stride_z + w * (int)src3_stride_w;

208

const __global uchar *in4_ptr = src4_ptr + (int)src4_offset_first_element_in_bytes + x4 * (int)src4_stride_x + y * (int)src4_stride_y + z * (int)src4_stride_z + w * (int)src4_stride_w;

209

210

const VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE) src1_values = VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)in1_ptr);

211

const VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE) src2_values = VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)in2_ptr);

212

const VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE) src3_values = VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)in3_ptr);

213

const VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE) src4_values = VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)in4_ptr);

214

215

const VEC_DATA_TYPE(int, VEC_SIZE) x_coords = SEQ + (VEC_DATA_TYPE(int, VEC_SIZE))(x);

216

217

const VEC_DATA_TYPE(COND_DATA_TYPE, VEC_SIZE) cond_in2 = CONVERT(x_coords < (VEC_DATA_TYPE(int, VEC_SIZE))(INPUT1_WIDTH), VEC_DATA_TYPE(COND_DATA_TYPE, VEC_SIZE));

218

const VEC_DATA_TYPE(COND_DATA_TYPE, VEC_SIZE) cond_in3 = CONVERT(x_coords < (VEC_DATA_TYPE(int, VEC_SIZE))(INPUT1_WIDTH + INPUT2_WIDTH), VEC_DATA_TYPE(COND_DATA_TYPE, VEC_SIZE));

219

const VEC_DATA_TYPE(COND_DATA_TYPE, VEC_SIZE) cond_in4 = CONVERT(x_coords < (VEC_DATA_TYPE(int, VEC_SIZE))(INPUT1_WIDTH + INPUT2_WIDTH + INPUT3_WIDTH), VEC_DATA_TYPE(COND_DATA_TYPE, VEC_SIZE));

220

221

VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE)

222

values = select(src2_values, src1_values, cond_in2);

223

values = select(src3_values, values, cond_in3);

224

values = select(src4_values, values, cond_in4);

225

226

VSTORE(VEC_SIZE)

227

(values, 0, (__global DATA_TYPE *)dst.ptr);

228

}

229

#endif /* defined(INPUT2_WIDTH) && defined(INPUT3_WIDTH) */

230

#endif /* defined(INPUT1_WIDTH) */

231

#endif /* defined(DEPTH) && defined(ELEMENT_SIZE) */

232

Michele Di Giorgio

2018-10-19 15:46:19 +0100

[diff] [blame]

233

#if defined(WIDTH_OFFSET) && defined(DEPTH)

Michalis Spyrou

2018-05-09 09:59:23 +0100

[diff] [blame]

234

/** This kernel concatenates the input tensor into the output tensor along the first dimension

235

*

Gian Marco Iodice

2018-08-10 09:34:11 +0100

[diff] [blame]

236

* @note The data type has to be passed at compile time using -DDATA_TYPE. i.e. -DDATA_TYPE=float

Michele Di Giorgio

2018-10-19 15:46:19 +0100

[diff] [blame]

237

* @note Vector size has to be passed at compile time using -DVEC_SIZE. i.e. -DVEC_SIZE=16

Gian Marco Iodice

2018-08-10 09:34:11 +0100

[diff] [blame]

238

* @note The offset for the first spatial dimension has to be passed at compile time using -DWIDTH_OFFSET. i.e. -DWIDTH_OFFSET=128

Michele Di Giorgio

27400b9

2018-11-01 13:44:05 +0000

[diff] [blame]

239

* @note Tensor depth should be given as a preprocessor argument using -DDEPTH=size. e.g. -DDEPTH=16

Gian Marco Iodice

2018-08-10 09:34:11 +0100

[diff] [blame]

240

*

241

* @param[in] src_ptr Pointer to the source tensor. Supported data types: U8/S8/QASYMM8/U16/S16/F16/U32/F32

Michalis Spyrou

2018-05-09 09:59:23 +0100

[diff] [blame]

242

* @param[in] src_stride_x Stride of the source tensor in X dimension (in bytes)

243

* @param[in] src_step_x src_stride_x * number of elements along X processed per workitem(in bytes)

244

* @param[in] src_stride_y Stride of the source tensor in Y dimension (in bytes)

245

* @param[in] src_step_y src_stride_y * number of elements along Y processed per workitem(in bytes)

246

* @param[in] src_stride_z Stride of the source tensor in Z dimension (in bytes)

247

* @param[in] src_step_z src_stride_z * number of elements along Z processed per workitem(in bytes)

Michele Di Giorgio

2018-10-19 15:46:19 +0100

[diff] [blame]

248

* @param[in] src_stride_w Stride of the first source tensor in Z dimension (in bytes)

249

* @param[in] src_step_w src_stride_z * number of elements along Z processed per workitem(in bytes)

Michalis Spyrou

2018-05-09 09:59:23 +0100

[diff] [blame]

250

* @param[in] src_offset_first_element_in_bytes The offset of the first element in the source tensor

251

* @param[out] dst_ptr Pointer to the destination tensor. Supported data types: same as @p src_ptr

252

* @param[in] dst_stride_x Stride of the destination tensor in X dimension (in bytes)

253

* @param[in] dst_step_x dst_stride_x * number of elements along X processed per workitem(in bytes)

254

* @param[in] dst_stride_y Stride of the destination tensor in Y dimension (in bytes)

255

* @param[in] dst_step_y dst_stride_y * number of elements along Y processed per workitem(in bytes)

256

* @param[in] dst_stride_z Stride of the source tensor in Z dimension (in bytes)

257

* @param[in] dst_step_z dst_stride_z * number of elements along Z processed per workitem(in bytes)

Michele Di Giorgio

2018-10-19 15:46:19 +0100

[diff] [blame]

258

* @param[in] dst_stride_w Stride of the destination tensor in Z dimension (in bytes)

259

* @param[in] dst_step_w output_stride_z * number of elements along Z processed per workitem(in bytes)

Michalis Spyrou

2018-05-09 09:59:23 +0100

[diff] [blame]

260

* @param[in] dst_offset_first_element_in_bytes The offset of the first element in the destination tensor

Michalis Spyrou

2018-05-09 09:59:23 +0100

[diff] [blame]

261

*/

262

__kernel void concatenate_width(

Michele Di Giorgio

2018-10-19 15:46:19 +0100

[diff] [blame]

263

TENSOR4D_DECLARATION(src),

264

TENSOR4D_DECLARATION(dst))

Michalis Spyrou

2018-05-09 09:59:23 +0100

[diff] [blame]

265

{

Michele Di Giorgio

2018-10-19 15:46:19 +0100

[diff] [blame]

266

Tensor4D src = CONVERT_TO_TENSOR4D_STRUCT(src, DEPTH);

267

Tensor4D dst = CONVERT_TO_TENSOR4D_STRUCT(dst, DEPTH);

Michalis Spyrou

2018-05-09 09:59:23 +0100

[diff] [blame]

268

269

VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE)

270

source_values = VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)src.ptr);

271

272

VSTORE(VEC_SIZE)

Gian Marco Iodice

2018-08-10 09:34:11 +0100

[diff] [blame]

273

(source_values, 0, (__global DATA_TYPE *)(dst.ptr) + WIDTH_OFFSET);

Michalis Spyrou

2018-05-09 09:59:23 +0100

[diff] [blame]

274

}

Michele Di Giorgio

2018-10-19 15:46:19 +0100

[diff] [blame]

275

#endif /* defined(WIDTH_OFFSET) && defined(DEPTH) */

Michalis Spyrou

2018-05-09 09:59:23 +0100

[diff] [blame]

276

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

277

/** This kernel concatenates the input tensor into the output tensor along the third dimension

278

*

Michele Di Giorgio

2018-10-19 15:46:19 +0100

[diff] [blame]

279

* @note The data type has to be passed at compile time using -DDATA_TYPE. i.e. -DDATA_TYPE=float

280

* @note Vector size has to be passed at compile time using -DVEC_SIZE. i.e. -DVEC_SIZE=16

281

*

Vidhya Sudhan Loganathan

7485d5a

2018-07-04 09:34:00 +0100

[diff] [blame]

282

* @param[in] src_ptr Pointer to the source tensor. Supported data types: F16, F32

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

283

* @param[in] src_stride_x Stride of the source tensor in X dimension (in bytes)

284

* @param[in] src_step_x src_stride_x * number of elements along X processed per workitem(in bytes)

285

* @param[in] src_stride_y Stride of the source tensor in Y dimension (in bytes)

286

* @param[in] src_step_y src_stride_y * number of elements along Y processed per workitem(in bytes)

Georgios Pinitas

2017-07-05 17:02:25 +0100

[diff] [blame]

287

* @param[in] src_stride_z Stride of the source tensor in Z dimension (in bytes)

288

* @param[in] src_step_z src_stride_z * number of elements along Z processed per workitem(in bytes)

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

289

* @param[in] src_offset_first_element_in_bytes The offset of the first element in the source tensor

Georgios Pinitas

2017-07-05 17:02:25 +0100

[diff] [blame]

290

* @param[out] dst_ptr Pointer to the destination tensor. Supported data types: same as @p src_ptr

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

291

* @param[in] dst_stride_x Stride of the destination tensor in X dimension (in bytes)

292

* @param[in] dst_step_x dst_stride_x * number of elements along X processed per workitem(in bytes)

293

* @param[in] dst_stride_y Stride of the destination tensor in Y dimension (in bytes)

294

* @param[in] dst_step_y dst_stride_y * number of elements along Y processed per workitem(in bytes)

Georgios Pinitas

2017-07-05 17:02:25 +0100

[diff] [blame]

295

* @param[in] dst_stride_z Stride of the source tensor in Z dimension (in bytes)

296

* @param[in] dst_step_z dst_stride_z * number of elements along Z processed per workitem(in bytes)

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

297

* @param[in] dst_offset_first_element_in_bytes The offset of the first element in the destination tensor

Georgios Pinitas

2017-07-05 17:02:25 +0100

[diff] [blame]

298

* @param[in] offsets The offsets to the first valid element of the output tensor in bytes

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

299

*/

300

__kernel void concatenate_depth(

Georgios Pinitas

2017-07-05 17:02:25 +0100

[diff] [blame]

301

TENSOR3D_DECLARATION(src),

302

TENSOR3D_DECLARATION(dst),

303

int3 offsets)

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

304

{

Georgios Pinitas

2017-07-05 17:02:25 +0100

[diff] [blame]

305

Tensor3D src = CONVERT_TO_TENSOR3D_STRUCT(src);

306

Tensor3D dst = CONVERT_TO_TENSOR3D_STRUCT(dst);

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

307

Georgios Pinitas

2017-07-05 17:02:25 +0100

[diff] [blame]

308

VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE)

309

source_values = VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)tensor3D_offset(&src, -offsets.x, -offsets.y, 0));

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

310

Georgios Pinitas

2017-07-05 17:02:25 +0100

[diff] [blame]

311

VSTORE(VEC_SIZE)

312

(source_values, 0, (__global DATA_TYPE *)(dst.ptr + offsets.z));

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

313

}

Michele Di Giorgio