Blame - src/core/CL/cl_kernels/concatenate.cl - ml/ComputeLibrary

2019-02-07 15:53:19 +0000

[diff] [blame]

130

VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE)

131

src1_values = VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)in1_ptr);

132

VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE)

133

src2_values = VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)in2_ptr);

Michele Di Giorgio

2018-11-01 13:44:05 +0000

[diff] [blame]

134

Georgios Pinitas

6631ac2

2019-04-17 12:12:56 +0100

[diff] [blame]

135

#if defined(OFFSET_IN1) && defined(OFFSET_IN2) && defined(OFFSET_OUT) && defined(SCALE_IN1) && defined(SCALE_IN2) && defined(SCALE_OUT)

Pablo Tello

2019-02-07 15:53:19 +0000

[diff] [blame]

136

src1_values = requantize(src1_values, OFFSET_IN1, OFFSET_OUT, SCALE_IN1, SCALE_OUT);

137

src2_values = requantize(src2_values, OFFSET_IN2, OFFSET_OUT, SCALE_IN2, SCALE_OUT);

Georgios Pinitas

6631ac2

2019-04-17 12:12:56 +0100

[diff] [blame]

138

#endif /* defined(OFFSET_IN1) && defined(OFFSET_IN2) && defined(OFFSET_OUT) && defined(SCALE_IN1) && defined(SCALE_IN2) && defined(SCALE_OUT) */

Michele Di Giorgio

2018-11-01 13:44:05 +0000

[diff] [blame]

139

const VEC_DATA_TYPE(int, VEC_SIZE) x_coords = SEQ + (VEC_DATA_TYPE(int, VEC_SIZE))(x);

140

const VEC_DATA_TYPE(COND_DATA_TYPE, VEC_SIZE) cond = CONVERT(x_coords < (VEC_DATA_TYPE(int, VEC_SIZE))(INPUT1_WIDTH), VEC_DATA_TYPE(COND_DATA_TYPE, VEC_SIZE));

141

const VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE) values = select(src2_values, src1_values, cond);

142

143

VSTORE(VEC_SIZE)

144

(values, 0, (__global DATA_TYPE *)dst.ptr);

145

}

146

147

#if defined(INPUT2_WIDTH) && defined(INPUT3_WIDTH)

148

/** This kernel concatenates four input tensors into the output tensor along the first dimension

149

*

150

* @note The data type has to be passed at compile time using -DDATA_TYPE. i.e. -DDATA_TYPE=float

151

* @note Vector size has to be passed at compile time using -DVEC_SIZE. i.e. -DVEC_SIZE=16

152

* @note The offset for the first spatial dimension has to be passed at compile time using -DWIDTH_OFFSET. i.e. -DWIDTH_OFFSET=128

153

* @note Tensor depth should be given as a preprocessor argument using -DDEPTH=size. e.g. -DDEPTH=16

154

* @note First input tensor width should be given as a preprocessor argument using -DINPUT1_WIDTH=width. e.g. -DINPUT1_WIDTH=8

155

* @note Second input tensor width should be given as a preprocessor argument using -DINPUT2_WIDTH=width. e.g. -DINPUT2_WIDTH=8

156

* @note Third input tensor width should be given as a preprocessor argument using -DINPUT3_WIDTH=width. e.g. -DINPUT3_WIDTH=8

157

*

Michele Di Giorgio

f6f7876

2020-07-06 11:27:21 +0100

[diff] [blame]

158

* @param[in] src1_ptr Pointer to the source tensor. Supported data types: All

Michele Di Giorgio

2018-11-01 13:44:05 +0000

[diff] [blame]

159

* @param[in] src1_stride_x Stride of the source tensor in X dimension (in bytes)

160

* @param[in] src1_step_x src_stride_x * number of elements along X processed per workitem(in bytes)

161

* @param[in] src1_stride_y Stride of the source tensor in Y dimension (in bytes)

162

* @param[in] src1_step_y src_stride_y * number of elements along Y processed per workitem(in bytes)

163

* @param[in] src1_stride_z Stride of the source tensor in Z dimension (in bytes)

164

* @param[in] src1_step_z src_stride_z * number of elements along Z processed per workitem(in bytes)

165

* @param[in] src1_stride_w Stride of the first source tensor in Z dimension (in bytes)

166

* @param[in] src1_step_w src_stride_z * number of elements along Z processed per workitem(in bytes)

167

* @param[in] src1_offset_first_element_in_bytes The offset of the first element in the source tensor

168

* @param[in] src2_ptr Pointer to the source tensor. Supported data types: same as @p src1_ptr

169

* @param[in] src2_stride_x Stride of the source tensor in X dimension (in bytes)

170

* @param[in] src2_step_x src_stride_x * number of elements along X processed per workitem(in bytes)

171

* @param[in] src2_stride_y Stride of the source tensor in Y dimension (in bytes)

172

* @param[in] src2_step_y src_stride_y * number of elements along Y processed per workitem(in bytes)

173

* @param[in] src2_stride_z Stride of the source tensor in Z dimension (in bytes)

174

* @param[in] src2_step_z src_stride_z * number of elements along Z processed per workitem(in bytes)

175

* @param[in] src2_stride_w Stride of the first source tensor in Z dimension (in bytes)

176

* @param[in] src2_step_w src_stride_z * number of elements along Z processed per workitem(in bytes)

177

* @param[in] src2_offset_first_element_in_bytes The offset of the first element in the source tensor

178

* @param[in] src3_ptr Pointer to the source tensor. Supported data types: same as @p src1_ptr

179

* @param[in] src3_stride_x Stride of the source tensor in X dimension (in bytes)

180

* @param[in] src3_step_x src_stride_x * number of elements along X processed per workitem(in bytes)

181

* @param[in] src3_stride_y Stride of the source tensor in Y dimension (in bytes)

182

* @param[in] src3_step_y src_stride_y * number of elements along Y processed per workitem(in bytes)

183

* @param[in] src3_stride_z Stride of the source tensor in Z dimension (in bytes)

184

* @param[in] src3_step_z src_stride_z * number of elements along Z processed per workitem(in bytes)

185

* @param[in] src3_stride_w Stride of the first source tensor in Z dimension (in bytes)

186

* @param[in] src3_step_w src_stride_z * number of elements along Z processed per workitem(in bytes)

187

* @param[in] src3_offset_first_element_in_bytes The offset of the first element in the source tensor

188

* @param[in] src4_ptr Pointer to the source tensor. Supported data types: same as @p src1_ptr

189

* @param[in] src4_stride_x Stride of the source tensor in X dimension (in bytes)

190

* @param[in] src4_step_x src_stride_x * number of elements along X processed per workitem(in bytes)

191

* @param[in] src4_stride_y Stride of the source tensor in Y dimension (in bytes)

192

* @param[in] src4_step_y src_stride_y * number of elements along Y processed per workitem(in bytes)

193

* @param[in] src4_stride_z Stride of the source tensor in Z dimension (in bytes)

194

* @param[in] src4_step_z src_stride_z * number of elements along Z processed per workitem(in bytes)

195

* @param[in] src4_stride_w Stride of the first source tensor in Z dimension (in bytes)

196

* @param[in] src4_step_w src_stride_z * number of elements along Z processed per workitem(in bytes)

197

* @param[in] src4_offset_first_element_in_bytes The offset of the first element in the source tensor

198

* @param[out] dst_ptr Pointer to the destination tensor. Supported data types: same as @p src1_ptr

199

* @param[in] dst_stride_x Stride of the destination tensor in X dimension (in bytes)

200

* @param[in] dst_step_x dst_stride_x * number of elements along X processed per workitem(in bytes)

201

* @param[in] dst_stride_y Stride of the destination tensor in Y dimension (in bytes)

202

* @param[in] dst_step_y dst_stride_y * number of elements along Y processed per workitem(in bytes)

203

* @param[in] dst_stride_z Stride of the source tensor in Z dimension (in bytes)

204

* @param[in] dst_step_z dst_stride_z * number of elements along Z processed per workitem(in bytes)

205

* @param[in] dst_stride_w Stride of the destination tensor in Z dimension (in bytes)

206

* @param[in] dst_step_w output_stride_z * number of elements along Z processed per workitem(in bytes)

207

* @param[in] dst_offset_first_element_in_bytes The offset of the first element in the destination tensor

Michele Di Giorgio

8e150a1

2018-12-21 15:20:56 +0000

[diff] [blame]

208

* @param[in] src1_pad_right Right paddings of the first input tensor in unit of elements

209

* @param[in] src2_pad_left Left paddings of the second input tensor in unit of elements

210

* @param[in] src2_pad_right Right paddings of the second input tensor in unit of elements

211

* @param[in] src3_pad_left Left paddings of the third input tensor in unit of elements

212

* @param[in] src3_pad_right Right paddings of the third input tensor in unit of elements

213

* @param[in] src4_pad_left Left paddings of the fourth input tensor in unit of elements

Michele Di Giorgio

2018-11-01 13:44:05 +0000

[diff] [blame]

214

*/

215

__kernel void concatenate_width_x4(

216

TENSOR4D_DECLARATION(src1),

217

TENSOR4D_DECLARATION(src2),

218

TENSOR4D_DECLARATION(src3),

219

TENSOR4D_DECLARATION(src4),

Michele Di Giorgio

8e150a1

2018-12-21 15:20:56 +0000

[diff] [blame]

220

TENSOR4D_DECLARATION(dst),

uint src1_pad_right,

uint src2_pad_left,

uint src2_pad_right,

uint src3_pad_left,

uint src3_pad_right,

uint src4_pad_left)

Michele Di Giorgio

2018-11-01 13:44:05 +0000

[diff] [blame]

227

{

228

Tensor4D dst = CONVERT_TO_TENSOR4D_STRUCT(dst, DEPTH);

229

230

// Calculate input indices

231

const int x = get_global_id(0) * (int)VEC_SIZE;

232

const int y = get_global_id(1);

233

const int z = get_global_id(2) % (int)DEPTH;

234

const int w = get_global_id(2) / (int)DEPTH;

235

Michele Di Giorgio

8e150a1

2018-12-21 15:20:56 +0000

[diff] [blame]

236

const int x1 = min(x, (int)INPUT1_WIDTH + (int)src1_pad_right - (int)VEC_SIZE);

237

const int x2 = min(max(x - (int)INPUT1_WIDTH, -(int)src2_pad_left), (int)INPUT2_WIDTH + (int)src2_pad_right - (int)VEC_SIZE);

238

const int x3 = min(max(x - (int)INPUT1_WIDTH - (int)INPUT2_WIDTH, -(int)src3_pad_left), (int)INPUT3_WIDTH + (int)src3_pad_right - (int)VEC_SIZE);

239

const int x4 = max(x - (int)INPUT1_WIDTH - (int)INPUT2_WIDTH - (int)INPUT3_WIDTH, -(int)src4_pad_left);

Michele Di Giorgio

2018-11-01 13:44:05 +0000

[diff] [blame]

240

241

// Calculate inputs and output addresses

242

const __global uchar *in1_ptr = src1_ptr + (int)src1_offset_first_element_in_bytes + x1 * (int)src1_stride_x + y * (int)src1_stride_y + z * (int)src1_stride_z + w * (int)src1_stride_w;

243

const __global uchar *in2_ptr = src2_ptr + (int)src2_offset_first_element_in_bytes + x2 * (int)src2_stride_x + y * (int)src2_stride_y + z * (int)src2_stride_z + w * (int)src2_stride_w;

244

const __global uchar *in3_ptr = src3_ptr + (int)src3_offset_first_element_in_bytes + x3 * (int)src3_stride_x + y * (int)src3_stride_y + z * (int)src3_stride_z + w * (int)src3_stride_w;

245

const __global uchar *in4_ptr = src4_ptr + (int)src4_offset_first_element_in_bytes + x4 * (int)src4_stride_x + y * (int)src4_stride_y + z * (int)src4_stride_z + w * (int)src4_stride_w;

246

Pablo Tello

2019-02-07 15:53:19 +0000

[diff] [blame]

247

VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE)

248

src1_values = VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)in1_ptr);

249

VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE)

250

src2_values = VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)in2_ptr);

251

VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE)

252

src3_values = VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)in3_ptr);

253

VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE)

254

src4_values = VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)in4_ptr);

255

256

#if defined(OFFSET_IN1) && defined(OFFSET_OUT) && defined(SCALE_IN1) && defined(SCALE_OUT) && defined(OFFSET_IN2) && defined(SCALE_IN2) && defined(OFFSET_IN3) && defined(SCALE_IN3) && defined(OFFSET_IN4) && defined(SCALE_IN4)

257

src1_values = requantize(src1_values, OFFSET_IN1, OFFSET_OUT, SCALE_IN1, SCALE_OUT);

258

src2_values = requantize(src2_values, OFFSET_IN2, OFFSET_OUT, SCALE_IN2, SCALE_OUT);

259

src3_values = requantize(src3_values, OFFSET_IN3, OFFSET_OUT, SCALE_IN3, SCALE_OUT);

260

src4_values = requantize(src4_values, OFFSET_IN4, OFFSET_OUT, SCALE_IN4, SCALE_OUT);

261

#endif /* defined(OFFSET_IN1) && defined(OFFSET_OUT) && defined(SCALE_IN1) && defined(SCALE_OUT) && defined(OFFSET_IN2) && defined(SCALE_IN2) && defined(OFFSET_IN3) && defined(SCALE_IN3) && defined(OFFSET_IN4) && defined(SCALE_IN4) */

Michele Di Giorgio

2018-11-01 13:44:05 +0000

[diff] [blame]

262

263

const VEC_DATA_TYPE(int, VEC_SIZE) x_coords = SEQ + (VEC_DATA_TYPE(int, VEC_SIZE))(x);

264

265

const VEC_DATA_TYPE(COND_DATA_TYPE, VEC_SIZE) cond_in2 = CONVERT(x_coords < (VEC_DATA_TYPE(int, VEC_SIZE))(INPUT1_WIDTH), VEC_DATA_TYPE(COND_DATA_TYPE, VEC_SIZE));

266

const VEC_DATA_TYPE(COND_DATA_TYPE, VEC_SIZE) cond_in3 = CONVERT(x_coords < (VEC_DATA_TYPE(int, VEC_SIZE))(INPUT1_WIDTH + INPUT2_WIDTH), VEC_DATA_TYPE(COND_DATA_TYPE, VEC_SIZE));

267

const VEC_DATA_TYPE(COND_DATA_TYPE, VEC_SIZE) cond_in4 = CONVERT(x_coords < (VEC_DATA_TYPE(int, VEC_SIZE))(INPUT1_WIDTH + INPUT2_WIDTH + INPUT3_WIDTH), VEC_DATA_TYPE(COND_DATA_TYPE, VEC_SIZE));

268

269

VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE)

270

values = select(src2_values, src1_values, cond_in2);

271

values = select(src3_values, values, cond_in3);

272

values = select(src4_values, values, cond_in4);

273

274

VSTORE(VEC_SIZE)

275

(values, 0, (__global DATA_TYPE *)dst.ptr);

276

}

277

#endif /* defined(INPUT2_WIDTH) && defined(INPUT3_WIDTH) */

278

#endif /* defined(INPUT1_WIDTH) */

279

#endif /* defined(DEPTH) && defined(ELEMENT_SIZE) */

280

Michele Di Giorgio

2018-10-19 15:46:19 +0100

[diff] [blame]

281

#if defined(WIDTH_OFFSET) && defined(DEPTH)

Michalis Spyrou

2018-05-09 09:59:23 +0100

[diff] [blame]

282

/** This kernel concatenates the input tensor into the output tensor along the first dimension

283

*

Gian Marco Iodice

2018-08-10 09:34:11 +0100

[diff] [blame]

284

* @note The data type has to be passed at compile time using -DDATA_TYPE. i.e. -DDATA_TYPE=float

Michele Di Giorgio

2018-10-19 15:46:19 +0100

[diff] [blame]

285

* @note Vector size has to be passed at compile time using -DVEC_SIZE. i.e. -DVEC_SIZE=16

Gian Marco Iodice

2018-08-10 09:34:11 +0100

[diff] [blame]

286

* @note The offset for the first spatial dimension has to be passed at compile time using -DWIDTH_OFFSET. i.e. -DWIDTH_OFFSET=128

Michele Di Giorgio

2018-11-01 13:44:05 +0000

[diff] [blame]

287

* @note Tensor depth should be given as a preprocessor argument using -DDEPTH=size. e.g. -DDEPTH=16

Gian Marco Iodice

2018-08-10 09:34:11 +0100

[diff] [blame]

288

*

289

* @param[in] src_ptr Pointer to the source tensor. Supported data types: U8/S8/QASYMM8/U16/S16/F16/U32/F32

Michalis Spyrou

2018-05-09 09:59:23 +0100

[diff] [blame]

290

* @param[in] src_stride_x Stride of the source tensor in X dimension (in bytes)

291

* @param[in] src_step_x src_stride_x * number of elements along X processed per workitem(in bytes)

292

* @param[in] src_stride_y Stride of the source tensor in Y dimension (in bytes)

293

* @param[in] src_step_y src_stride_y * number of elements along Y processed per workitem(in bytes)

294

* @param[in] src_stride_z Stride of the source tensor in Z dimension (in bytes)

295

* @param[in] src_step_z src_stride_z * number of elements along Z processed per workitem(in bytes)

Michele Di Giorgio

2018-10-19 15:46:19 +0100

[diff] [blame]

296

* @param[in] src_stride_w Stride of the first source tensor in Z dimension (in bytes)

297

* @param[in] src_step_w src_stride_z * number of elements along Z processed per workitem(in bytes)

Michalis Spyrou

2018-05-09 09:59:23 +0100

[diff] [blame]

298

* @param[in] src_offset_first_element_in_bytes The offset of the first element in the source tensor

299

* @param[out] dst_ptr Pointer to the destination tensor. Supported data types: same as @p src_ptr

300

* @param[in] dst_stride_x Stride of the destination tensor in X dimension (in bytes)

301

* @param[in] dst_step_x dst_stride_x * number of elements along X processed per workitem(in bytes)

302

* @param[in] dst_stride_y Stride of the destination tensor in Y dimension (in bytes)

303

* @param[in] dst_step_y dst_stride_y * number of elements along Y processed per workitem(in bytes)

304

* @param[in] dst_stride_z Stride of the source tensor in Z dimension (in bytes)

305

* @param[in] dst_step_z dst_stride_z * number of elements along Z processed per workitem(in bytes)

Michele Di Giorgio

2018-10-19 15:46:19 +0100

[diff] [blame]

306

* @param[in] dst_stride_w Stride of the destination tensor in Z dimension (in bytes)

307

* @param[in] dst_step_w output_stride_z * number of elements along Z processed per workitem(in bytes)

Michalis Spyrou

2018-05-09 09:59:23 +0100

[diff] [blame]

308

* @param[in] dst_offset_first_element_in_bytes The offset of the first element in the destination tensor

Michalis Spyrou

2018-05-09 09:59:23 +0100

[diff] [blame]

309

*/

Pablo Tello

2019-02-07 15:53:19 +0000

[diff] [blame]

310

Michalis Spyrou

2018-05-09 09:59:23 +0100

[diff] [blame]

311

__kernel void concatenate_width(

Michele Di Giorgio

2018-10-19 15:46:19 +0100

[diff] [blame]

312

TENSOR4D_DECLARATION(src),

313

TENSOR4D_DECLARATION(dst))

Michalis Spyrou

2018-05-09 09:59:23 +0100

[diff] [blame]

314

{

Michele Di Giorgio

2018-10-19 15:46:19 +0100

[diff] [blame]

315

Tensor4D src = CONVERT_TO_TENSOR4D_STRUCT(src, DEPTH);

316

Tensor4D dst = CONVERT_TO_TENSOR4D_STRUCT(dst, DEPTH);

Michalis Spyrou

2018-05-09 09:59:23 +0100

[diff] [blame]

317

318

VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE)

319

source_values = VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)src.ptr);

320

Pablo Tello

2019-02-07 15:53:19 +0000

[diff] [blame]

321

#if defined(OFFSET_IN1) && defined(OFFSET_OUT) && defined(SCALE_IN1) && defined(SCALE_OUT)

Manuel Bottini

8481d83

2019-12-10 15:28:40 +0000

[diff] [blame]

322

const VEC_QUANT out = requantize(source_values, OFFSET_IN1, OFFSET_OUT, SCALE_IN1, SCALE_OUT);

Pablo Tello

2019-02-07 15:53:19 +0000

[diff] [blame]

323

VSTORE(VEC_SIZE)

324

(out, 0, (__global DATA_TYPE *)(dst.ptr) + WIDTH_OFFSET);

325

#else /* defined(OFFSET_IN1) && defined(OFFSET_OUT) && defined(SCALE_IN1) && defined(SCALE_OUT) */

Michalis Spyrou

2018-05-09 09:59:23 +0100

[diff] [blame]

326

VSTORE(VEC_SIZE)

Gian Marco Iodice

2018-08-10 09:34:11 +0100

[diff] [blame]

327

(source_values, 0, (__global DATA_TYPE *)(dst.ptr) + WIDTH_OFFSET);

Pablo Tello

2019-02-07 15:53:19 +0000

[diff] [blame]

328

#endif /* defined(OFFSET_IN1) && defined(OFFSET_OUT) && defined(SCALE_IN1) && defined(SCALE_OUT) */

Michalis Spyrou

2018-05-09 09:59:23 +0100

[diff] [blame]

329

}

Pablo Tello

2019-02-07 15:53:19 +0000

[diff] [blame]

330

Michele Di Giorgio

2018-10-19 15:46:19 +0100

[diff] [blame]

331

#endif /* defined(WIDTH_OFFSET) && defined(DEPTH) */

Michalis Spyrou

2018-05-09 09:59:23 +0100

[diff] [blame]

332

Pablo Tello

6a14adb

2019-03-05 17:33:08 +0000

[diff] [blame]

333

#if defined(HEIGHT_OFFSET) && defined(DEPTH) && defined(VEC_SIZE)

334

/** This kernel concatenates the input tensor into the output tensor along the second dimension

335

*

336

* @note The data type has to be passed at compile time using -DDATA_TYPE. i.e. -DDATA_TYPE=float

337

* @note Vector size has to be passed at compile time using -DVEC_SIZE. i.e. -DVEC_SIZE=16

338

* @note Vector sizes supported are 2,4,8 and 16.

339

* @note The offset for the second spatial dimension has to be passed at compile time using -DHEIGHT_OFFSET. i.e. -DHEIGHT_OFFSET=128

340

* @note Tensor depth should be given as a preprocessor argument using -DDEPTH=size. e.g. -DDEPTH=16

341

*

342

* @param[in] src_ptr Pointer to the source tensor. Supported data types: U8/S8/QASYMM8/U16/S16/F16/U32/F32

343

* @param[in] src_stride_x Stride of the source tensor in X dimension (in bytes)

344

* @param[in] src_step_x src_stride_x * number of elements along X processed per workitem(in bytes)

345

* @param[in] src_stride_y Stride of the source tensor in Y dimension (in bytes)

346

* @param[in] src_step_y src_stride_y * number of elements along Y processed per workitem(in bytes)

347

* @param[in] src_stride_z Stride of the source tensor in Z dimension (in bytes)

348

* @param[in] src_step_z src_stride_z * number of elements along Z processed per workitem(in bytes)

349

* @param[in] src_stride_w Stride of the first source tensor in Z dimension (in bytes)

350

* @param[in] src_step_w src_stride_z * number of elements along Z processed per workitem(in bytes)

351

* @param[in] src_offset_first_element_in_bytes The offset of the first element in the source tensor

352

* @param[out] dst_ptr Pointer to the destination tensor. Supported data types: same as @p src_ptr

353

* @param[in] dst_stride_x Stride of the destination tensor in X dimension (in bytes)

354

* @param[in] dst_step_x dst_stride_x * number of elements along X processed per workitem(in bytes)

355

* @param[in] dst_stride_y Stride of the destination tensor in Y dimension (in bytes)

356

* @param[in] dst_step_y dst_stride_y * number of elements along Y processed per workitem(in bytes)

357

* @param[in] dst_stride_z Stride of the source tensor in Z dimension (in bytes)

358

* @param[in] dst_step_z dst_stride_z * number of elements along Z processed per workitem(in bytes)

359

* @param[in] dst_stride_w Stride of the destination tensor in Z dimension (in bytes)

360

* @param[in] dst_step_w output_stride_z * number of elements along Z processed per workitem(in bytes)

361

* @param[in] dst_offset_first_element_in_bytes The offset of the first element in the destination tensor

362

*/

363

364

__kernel void concatenate_height(

365

TENSOR4D_DECLARATION(src),

366

TENSOR4D_DECLARATION(dst))

367

{

368

Tensor4D src = CONVERT_TO_TENSOR4D_STRUCT(src, DEPTH);

369

Tensor4D dst = CONVERT_TO_TENSOR4D_STRUCT(dst, DEPTH);

370

371

VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE)

372

source_values = VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)src.ptr);

373

374

#if defined(OFFSET_IN1) && defined(OFFSET_OUT) && defined(SCALE_IN1) && defined(SCALE_OUT)

Manuel Bottini

8481d83

2019-12-10 15:28:40 +0000

[diff] [blame]

375

const VEC_QUANT out = requantize(source_values, OFFSET_IN1, OFFSET_OUT, SCALE_IN1, SCALE_OUT);

Pablo Tello

6a14adb

2019-03-05 17:33:08 +0000

[diff] [blame]

376

VSTORE(VEC_SIZE)

377

(out, 0, (__global DATA_TYPE *)(dst.ptr + HEIGHT_OFFSET * dst_stride_y));

378

#else /* defined(OFFSET_IN1) && defined(OFFSET_OUT) && defined(SCALE_IN1) && defined(SCALE_OUT) */

379

VSTORE(VEC_SIZE)

380

(source_values, 0, (__global DATA_TYPE *)(dst.ptr + HEIGHT_OFFSET * dst_stride_y));

381

#endif /* defined(OFFSET_IN1) && defined(OFFSET_OUT) && defined(SCALE_IN1) && defined(SCALE_OUT) */

382

}

383

384

#endif /* defined(HEIGHT_OFFSET) && defined(DEPTH) */

385

Giorgio Arena

2020-10-07 16:03:43 +0100

[diff] [blame]

386

#if defined(VEC_SIZE_LEFTOVER)

387

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

388

/** This kernel concatenates the input tensor into the output tensor along the third dimension

389

*

Michele Di Giorgio

2018-10-19 15:46:19 +0100

[diff] [blame]

390

* @note The data type has to be passed at compile time using -DDATA_TYPE. i.e. -DDATA_TYPE=float

391

* @note Vector size has to be passed at compile time using -DVEC_SIZE. i.e. -DVEC_SIZE=16

Giorgio Arena

2020-10-07 16:03:43 +0100

[diff] [blame]

392

* @note Leftover vector size has to be passed at compile time using -DVEC_SIZE_LEFTOVER. e.g. -DVEC_SIZE=3. It is defined as the remainder between the input's first dimension and VEC_SIZE

Michele Di Giorgio

2018-10-19 15:46:19 +0100

[diff] [blame]

393

*

Michele Di Giorgio

f6f7876

2020-07-06 11:27:21 +0100

[diff] [blame]

394

* @param[in] src_ptr Pointer to the source tensor. Supported data types: All

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

395

* @param[in] src_stride_x Stride of the source tensor in X dimension (in bytes)

396

* @param[in] src_step_x src_stride_x * number of elements along X processed per workitem(in bytes)

397

* @param[in] src_stride_y Stride of the source tensor in Y dimension (in bytes)

398

* @param[in] src_step_y src_stride_y * number of elements along Y processed per workitem(in bytes)

Georgios Pinitas

2017-07-05 17:02:25 +0100

[diff] [blame]

399

* @param[in] src_stride_z Stride of the source tensor in Z dimension (in bytes)

400

* @param[in] src_step_z src_stride_z * number of elements along Z processed per workitem(in bytes)

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

401

* @param[in] src_offset_first_element_in_bytes The offset of the first element in the source tensor

Georgios Pinitas

2017-07-05 17:02:25 +0100

[diff] [blame]

402

* @param[out] dst_ptr Pointer to the destination tensor. Supported data types: same as @p src_ptr

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

403

* @param[in] dst_stride_x Stride of the destination tensor in X dimension (in bytes)

404

* @param[in] dst_step_x dst_stride_x * number of elements along X processed per workitem(in bytes)

405

* @param[in] dst_stride_y Stride of the destination tensor in Y dimension (in bytes)

406

* @param[in] dst_step_y dst_stride_y * number of elements along Y processed per workitem(in bytes)

Georgios Pinitas

2017-07-05 17:02:25 +0100

[diff] [blame]

407

* @param[in] dst_stride_z Stride of the source tensor in Z dimension (in bytes)

408

* @param[in] dst_step_z dst_stride_z * number of elements along Z processed per workitem(in bytes)

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

409

* @param[in] dst_offset_first_element_in_bytes The offset of the first element in the destination tensor

Georgios Pinitas

2017-07-05 17:02:25 +0100

[diff] [blame]

410

* @param[in] offsets The offsets to the first valid element of the output tensor in bytes

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

411

*/

Vidhya Sudhan Loganathan

338595b

2019-06-28 14:09:53 +0100

[diff] [blame]

412

__kernel void concatenate(

Georgios Pinitas

2017-07-05 17:02:25 +0100

[diff] [blame]

413

TENSOR3D_DECLARATION(src),

414

TENSOR3D_DECLARATION(dst),

Michalis Spyrou

a9c4472

2019-04-05 17:18:36 +0100

[diff] [blame]

415

int offset)

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

416

{

Giorgio Arena

2020-10-07 16:03:43 +0100

[diff] [blame]

417

uint x_offs = max((int)(get_global_id(0) * VEC_SIZE * sizeof(DATA_TYPE) - (VEC_SIZE - VEC_SIZE_LEFTOVER) % VEC_SIZE * sizeof(DATA_TYPE)), 0);

418

419

__global uchar *src_addr = src_ptr + src_offset_first_element_in_bytes + x_offs + get_global_id(1) * src_stride_y + get_global_id(2) * src_stride_z;

420

__global uchar *dst_addr = dst_ptr + dst_offset_first_element_in_bytes + x_offs + get_global_id(1) * dst_stride_y + get_global_id(2) * dst_stride_z;

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

421

Georgios Pinitas

2017-07-05 17:02:25 +0100

[diff] [blame]

422

VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE)

Giorgio Arena

2020-10-07 16:03:43 +0100

[diff] [blame]

423

source_values0 = VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)src_addr);

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

424

Pablo Tello

2019-02-07 15:53:19 +0000

[diff] [blame]

425

#if defined(OFFSET_IN1) && defined(OFFSET_OUT) && defined(SCALE_IN1) && defined(SCALE_OUT)

Giorgio Arena

2020-10-07 16:03:43 +0100

[diff] [blame]

426

source_values0 = requantize(source_values0, OFFSET_IN1, OFFSET_OUT, SCALE_IN1, SCALE_OUT);

Pablo Tello

2019-02-07 15:53:19 +0000

[diff] [blame]

427

#endif /* defined(OFFSET_IN1) && defined(OFFSET_OUT) && defined(SCALE_IN1) && defined(SCALE_OUT) */

428

Giorgio Arena

2020-10-07 16:03:43 +0100

[diff] [blame]

429

STORE_VECTOR_SELECT(source_values, DATA_TYPE, dst_addr + offset, VEC_SIZE, VEC_SIZE_LEFTOVER, VEC_SIZE_LEFTOVER != 0 && get_global_id(0) == 0)

Anthony Barbier

2017-09-04 18:44:23 +0100

[diff] [blame]

430

}

Giorgio Arena

2020-10-07 16:03:43 +0100

[diff] [blame]

431

#endif /* defined(VEC_SIZE_LEFTOVER) */

Michele Di Giorgio