Blame - src/core/CL/cl_kernels/scale.cl - ml/ComputeLibrary

*((__global DATA_TYPE *)out.ptr) = *((__global DATA_TYPE *)tensor4D_offset(&in, get_global_id(0), convert_int(clamped_x), convert_int(clamped_y), (get_global_id(2) / DEPTH_OUT)));

Michalis Spyrou

46da23f

2018-04-10 13:41:30 +0100

[diff] [blame]

181

}

182

183

/** Performs scale on an image interpolating with the BILINEAR method. (NHWC)

184

*

185

* @note Sampling policy to be used is passed as -DSAMPLING_POLICY_(TYPE) e.g. -DSAMPLING_POLICY_TOP_LEFT

186

* @note If border mode replicate is used, is should be passed as -DBORDER_MODE_REPLICATE

Michalis Spyrou

1f8db2b

2018-12-10 16:19:20 +0000

[diff] [blame]

187

* @note Output tensor's depth should be given as a preprocessor argument using -DDEPTH_OUT=size. e.g. -DDEPTH=16

Michalis Spyrou

46da23f

2018-04-10 13:41:30 +0100

[diff] [blame]

188

*

189

* @param[in] in_ptr Pointer to the source image. Supported data types: U8/S16/F16/F32.

190

* @param[in] in_stride_x Stride of the source image in X dimension (in bytes)

191

* @param[in] in_step_x src_stride_x * number of elements along X processed per workitem(in bytes)

192

* @param[in] in_stride_y Stride of the source image in Y dimension (in bytes)

193

* @param[in] in_step_y src_stride_y * number of elements along Y processed per workitem(in bytes)

194

* @param[in] in_stride_z Stride of the source image in Z dimension (in bytes)

195

* @param[in] in_step_z src_stride_z * number of elements along Z processed per workitem(in bytes)

196

* @param[in] in_offset_first_element_in_bytes The offset of the first element in the source image

197

* @param[out] out_ptr Pointer to the destination image. Supported data types: same as @p in_ptr

198

* @param[in] out_stride_x Stride of the destination image in X dimension (in bytes)

199

* @param[in] out_step_x dst_stride_x * number of elements along X processed per workitem(in bytes)

200

* @param[in] out_stride_y Stride of the destination image in Y dimension (in bytes)

201

* @param[in] out_step_y dst_stride_y * number of elements along Y processed per workitem(in bytes)

202

* @param[in] out_stride_z Stride of the destination image in Z dimension (in bytes)

203

* @param[in] out_step_z dst_stride_y * number of elements along Z processed per workitem(in bytes)

204

* @param[in] out_offset_first_element_in_bytes The offset of the first element in the destination image

205

* @param[in] input_width Input image width

206

* @param[in] input_height Input image height

207

* @param[in] scale_x The scale factor along x dimension

208

* @param[in] scale_y The scale factor along y dimension

209

*/

210

__kernel void scale_bilinear_nhwc(

Michalis Spyrou

1f8db2b

2018-12-10 16:19:20 +0000

[diff] [blame]

211

TENSOR4D_DECLARATION(in),

212

TENSOR4D_DECLARATION(out),

Michalis Spyrou

46da23f

2018-04-10 13:41:30 +0100

[diff] [blame]

213

const float input_width,

214

const float input_height,

215

const float scale_x,

216

const float scale_y)

217

{

Michalis Spyrou

1f8db2b

2018-12-10 16:19:20 +0000

[diff] [blame]

218

Tensor4D in = CONVERT_TO_TENSOR4D_STRUCT_NO_STEP(in, 0);

219

Tensor4D out = CONVERT_TO_TENSOR4D_STRUCT(out, DEPTH_OUT);

Michalis Spyrou

46da23f

2018-04-10 13:41:30 +0100

[diff] [blame]

220

221

#ifdef SAMPLING_POLICY_TOP_LEFT

222

const float new_x = get_global_id(1) * scale_x;

Michalis Spyrou

1f8db2b

2018-12-10 16:19:20 +0000

[diff] [blame]

223

const float new_y = (get_global_id(2) % DEPTH_OUT) * scale_y;

Michalis Spyrou

46da23f

2018-04-10 13:41:30 +0100

[diff] [blame]

224

#elif SAMPLING_POLICY_CENTER

225

const float new_x = (get_global_id(1) + 0.5f) * scale_x - 0.5f;

Michalis Spyrou

1f8db2b

2018-12-10 16:19:20 +0000

[diff] [blame]

226

const float new_y = ((get_global_id(2) % DEPTH_OUT) + 0.5f) * scale_y - 0.5f;

Michalis Spyrou

46da23f

2018-04-10 13:41:30 +0100

[diff] [blame]

227

#else /* SAMPLING_POLICY */

228

#error("Unsupported sampling policy");

229

#endif /* SAMPLING_POLICY */

230

231

const float new_xf = floor(new_x);

232

const float new_yf = floor(new_y);

233

float clamped_x = clamp(new_xf, 0.0f, input_width - 1);

234

float clamped_x1 = clamp(new_xf + 1, 0.0f, input_width - 1);

235

float clamped_x_ = clamped_x;

236

float clamped_x1_ = clamped_x1;

237

const float clamped_y = clamp(new_yf, 0.0f, input_height - 1);

238

const float clamped_y1 = clamp(new_yf + 1, 0.0f, input_height - 1);

239

240

#ifndef BORDER_MODE_REPLICATE

241

clamped_x1 = select(clamped_x1, 0.0f - BORDER_SIZE, new_yf + 1 < 0.f || new_yf + 1 > input_height - 1 || new_xf + 1 < 0.f || new_xf + 1 > input_width - 1);

242

clamped_x_ = select(clamped_x_, 0.0f - BORDER_SIZE, new_yf + 1 > input_height - 1 || new_xf < 0.f || new_xf > input_width - 1);

243

clamped_x = select(clamped_x, 0.0f - BORDER_SIZE, new_yf < 0.f || new_yf > input_height - 1 || new_xf < 0.f || new_xf > input_width - 1);

244

clamped_x1_ = select(clamped_x1_, 0.0f - BORDER_SIZE, new_xf + 1 < 0.f || new_xf + 1 > input_width - 1 || new_yf < 0.f || new_yf > input_height - 1);

245

#endif /* BORDER_MODE_REPLICATE */

246

Michalis Spyrou

1f8db2b

2018-12-10 16:19:20 +0000

[diff] [blame]

247

float4 ins = (float4)(*((__global DATA_TYPE *)tensor4D_offset(&in, get_global_id(0), convert_int(clamped_x), convert_int(clamped_y), (get_global_id(2) / DEPTH_OUT))),

248