Blame - src/core/CL/cl_kernels/scale.cl - ml/ComputeLibrary

*((__global DATA_TYPE *)out.ptr) = *((__global DATA_TYPE *)tensor4D_offset(&in, get_global_id(0), convert_int(clamped_x), convert_int(clamped_y), (get_global_id(2) / DEPTH_OUT)));

Michalis Spyrou

46da23f

2018-04-10 13:41:30 +0100

[diff] [blame]

205

}

206

207

/** Performs scale on an image interpolating with the BILINEAR method. (NHWC)

208

*

209

* @note Sampling policy to be used is passed as -DSAMPLING_POLICY_(TYPE) e.g. -DSAMPLING_POLICY_TOP_LEFT

210

* @note If border mode replicate is used, is should be passed as -DBORDER_MODE_REPLICATE

Michalis Spyrou

1f8db2b

2018-12-10 16:19:20 +0000

[diff] [blame]

211

* @note Output tensor's depth should be given as a preprocessor argument using -DDEPTH_OUT=size. e.g. -DDEPTH=16

Michalis Spyrou

46da23f

2018-04-10 13:41:30 +0100

[diff] [blame]

212

*

213

* @param[in] in_ptr Pointer to the source image. Supported data types: U8/S16/F16/F32.

214

* @param[in] in_stride_x Stride of the source image in X dimension (in bytes)

215

* @param[in] in_step_x src_stride_x * number of elements along X processed per workitem(in bytes)

216

* @param[in] in_stride_y Stride of the source image in Y dimension (in bytes)

217

* @param[in] in_step_y src_stride_y * number of elements along Y processed per workitem(in bytes)

218

* @param[in] in_stride_z Stride of the source image in Z dimension (in bytes)

219

* @param[in] in_step_z src_stride_z * number of elements along Z processed per workitem(in bytes)

220

* @param[in] in_offset_first_element_in_bytes The offset of the first element in the source image

221

* @param[out] out_ptr Pointer to the destination image. Supported data types: same as @p in_ptr

222

* @param[in] out_stride_x Stride of the destination image in X dimension (in bytes)

223

* @param[in] out_step_x dst_stride_x * number of elements along X processed per workitem(in bytes)

224

* @param[in] out_stride_y Stride of the destination image in Y dimension (in bytes)

225

* @param[in] out_step_y dst_stride_y * number of elements along Y processed per workitem(in bytes)

226

* @param[in] out_stride_z Stride of the destination image in Z dimension (in bytes)

227

* @param[in] out_step_z dst_stride_y * number of elements along Z processed per workitem(in bytes)

228

* @param[in] out_offset_first_element_in_bytes The offset of the first element in the destination image

229

* @param[in] input_width Input image width

230

* @param[in] input_height Input image height

231

* @param[in] scale_x The scale factor along x dimension

232

* @param[in] scale_y The scale factor along y dimension

233

*/

234

__kernel void scale_bilinear_nhwc(

Michalis Spyrou

1f8db2b

2018-12-10 16:19:20 +0000

[diff] [blame]

235

TENSOR4D_DECLARATION(in),

236

TENSOR4D_DECLARATION(out),

Michalis Spyrou

46da23f

2018-04-10 13:41:30 +0100

[diff] [blame]

237

const float input_width,

238

const float input_height,

239

const float scale_x,

240

const float scale_y)

241

{

Michalis Spyrou

1f8db2b

2018-12-10 16:19:20 +0000

[diff] [blame]

242

Tensor4D in = CONVERT_TO_TENSOR4D_STRUCT_NO_STEP(in, 0);

243

Tensor4D out = CONVERT_TO_TENSOR4D_STRUCT(out, DEPTH_OUT);

Michalis Spyrou

46da23f

2018-04-10 13:41:30 +0100

[diff] [blame]

244

245

#ifdef SAMPLING_POLICY_TOP_LEFT

246

const float new_x = get_global_id(1) * scale_x;

Michalis Spyrou

1f8db2b

2018-12-10 16:19:20 +0000

[diff] [blame]

247

const float new_y = (get_global_id(2) % DEPTH_OUT) * scale_y;

Michalis Spyrou

46da23f

2018-04-10 13:41:30 +0100

[diff] [blame]

248

#elif SAMPLING_POLICY_CENTER

249

const float new_x = (get_global_id(1) + 0.5f) * scale_x - 0.5f;

Michalis Spyrou

1f8db2b

2018-12-10 16:19:20 +0000

[diff] [blame]

250

const float new_y = ((get_global_id(2) % DEPTH_OUT) + 0.5f) * scale_y - 0.5f;

Michalis Spyrou

46da23f

2018-04-10 13:41:30 +0100

[diff] [blame]

251

#else /* SAMPLING_POLICY */

252

#error("Unsupported sampling policy");

253

#endif /* SAMPLING_POLICY */

254

255

const float new_xf = floor(new_x);

256

const float new_yf = floor(new_y);

257

float clamped_x = clamp(new_xf, 0.0f, input_width - 1);

258

float clamped_x1 = clamp(new_xf + 1, 0.0f, input_width - 1);

259

float clamped_x_ = clamped_x;

260

float clamped_x1_ = clamped_x1;

261

const float clamped_y = clamp(new_yf, 0.0f, input_height - 1);

262

const float clamped_y1 = clamp(new_yf + 1, 0.0f, input_height - 1);

263

264

#ifndef BORDER_MODE_REPLICATE

265

clamped_x1 = select(clamped_x1, 0.0f - BORDER_SIZE, new_yf + 1 < 0.f || new_yf + 1 > input_height - 1 || new_xf + 1 < 0.f || new_xf + 1 > input_width - 1);

266

clamped_x_ = select(clamped_x_, 0.0f - BORDER_SIZE, new_yf + 1 > input_height - 1 || new_xf < 0.f || new_xf > input_width - 1);

267

clamped_x = select(clamped_x, 0.0f - BORDER_SIZE, new_yf < 0.f || new_yf > input_height - 1 || new_xf < 0.f || new_xf > input_width - 1);

268

clamped_x1_ = select(clamped_x1_, 0.0f - BORDER_SIZE, new_xf + 1 < 0.f || new_xf + 1 > input_width - 1 || new_yf < 0.f || new_yf > input_height - 1);

269

#endif /* BORDER_MODE_REPLICATE */

270

Michalis Spyrou

1f8db2b

2018-12-10 16:19:20 +0000

[diff] [blame]

271

float4 ins = (float4)(*((__global DATA_TYPE *)tensor4D_offset(&in, get_global_id(0), convert_int(clamped_x), convert_int(clamped_y), (get_global_id(2) / DEPTH_OUT))),

272