Blame - src/core/NEON/kernels/NECropKernel.cpp - ml/ComputeLibrary

const std::array<uint32_t, 2> &rows_out_of_bounds, const std::array<uint32_t, 2> &cols_out_of_bounds, NECropKernel::InBoundsCropFunction *in_bounds_crop_function)

George Wort

05398a9

2019-01-25 15:38:33 +0000

[diff] [blame]

182

{

183

// Output is always float.

184

const int window_step_x = 16 / sizeof(float);

185

auto *output_ptr = reinterpret_cast<float *>(output->buffer());

186

// Output window:

187

// --------------------------------

188

// | Out of bounds |

189

// | rows before |

190

// |------------------------------|

191

// | Out of | In | Out of |

192

// | bounds | bounds | bounds |

193

// | cols | elements | cols |

194

// | before | copied | after |

195

// | | from input | |

196

// --------------------------------

197

// | Out of bounds |

198

// | rows after |

199

// |------------------------------|

200

// Fill all output rows that have no elements that are within the input bounds with the extrapolation value.

201

// First for the rows before the in bounds rows.

202

out_of_bounds_crop_window(output, output_ptr, extrapolation_value, window_step_x, 0, rows_out_of_bounds[0] * output->info()->dimension(1));

203

output_ptr += rows_out_of_bounds[0] * output->info()->dimension(1) * output->info()->dimension(0);

204

// Iterate through each row that has any elements within the input bounds.

205

for(uint32_t row = rows_out_of_bounds[0]; static_cast<int32_t>(row) < static_cast<int32_t>(output->info()->dimension(2) - rows_out_of_bounds[1]);

206

++row, is_height_flipped ? --input_offset[2] : ++input_offset[2])

207

{

208

// Fill all elements in the row that are out of bounds with the extrapolation value.

209

// First for the elements before the in bounds elements.

210

if(has_cols_out_of_bounds_before)

211

{

212

out_of_bounds_crop_window(output, output_ptr, extrapolation_value, window_step_x, 0, cols_out_of_bounds[0]);

213

}

214

// Copy all elements within the input bounds from the input tensor.

215

if(has_cols_in_bounds)

216

{

217

(*in_bounds_crop_function)(input, output, output_ptr, input_offset, window_step_x, cols_out_of_bounds[0], output->info()->dimension(1) - cols_out_of_bounds[1]);

218

}

219

// Fill all elements after the in bounds elements with the extrapolation value.

220

if(has_cols_out_of_bounds_after)

221

{

222

out_of_bounds_crop_window(output, output_ptr, extrapolation_value, window_step_x, output->info()->dimension(1) - cols_out_of_bounds[1], output->info()->dimension(1));

223

}

224

output_ptr += output->info()->dimension(1) * output->info()->dimension(0);

225

}

226

// Fill all rows after the in bounds elements with the extrapolation value.

227

out_of_bounds_crop_window(output, output_ptr, extrapolation_value, window_step_x, 0, rows_out_of_bounds[1] * output->info()->dimension(1));

}

} // namespace

NECropKernel::NECropKernel()

232

: _input(nullptr), _crop_boxes(nullptr), _box_ind(nullptr), _output(nullptr), _start(), _end(), _crop_box_ind(0), _extrapolation_value(0), _rows_out_of_bounds(), _cols_out_of_bounds(),

233

_in_bounds_crop_functions(), _in_bounds_crop_function(nullptr), _crop_function(nullptr)

{

}

void NECropKernel::configure(const ITensor *input, const ITensor *crop_boxes, const ITensor *box_ind, ITensor *output, uint32_t crop_box_ind, float extrapolation_value)

238

{

239

ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);

240

ARM_COMPUTE_ERROR_THROW_ON(validate(input->info(), crop_boxes->info(), box_ind->info(), output->info(), crop_box_ind, extrapolation_value));

241

242

_input = input;

243

_crop_boxes = crop_boxes;

244

_box_ind = box_ind;

245

_output = output;

246

_crop_box_ind = crop_box_ind;

247

_extrapolation_value = extrapolation_value;

248

249

const static std::map<std::pair<DataType, bool>, std::pair<NECropKernel::InBoundsCropFunction *, NECropKernel::InBoundsCropFunction *>> in_map_function =

250

{

251

{ { DataType::F32, false }, { &in_bounds_crop_window<float, false, false>, &in_bounds_crop_window<float, false, true> } },

252

{ { DataType::F32, true }, { &in_bounds_crop_window<float, true, false>, &in_bounds_crop_window<float, true, true> } },

253

{ { DataType::U16, false }, { &in_bounds_crop_window<uint16_t, false, false>, &in_bounds_crop_window<uint16_t, false, true> } },

254

{ { DataType::U16, true }, { &in_bounds_crop_window<uint16_t, true, false>, &in_bounds_crop_window<uint16_t, true, true> } },

255

{ { DataType::S16, false }, { &in_bounds_crop_window<int16_t, false, false>, &in_bounds_crop_window<int16_t, false, true> } },

256

{ { DataType::S16, true }, { &in_bounds_crop_window<int16_t, true, false>, &in_bounds_crop_window<int16_t, true, true> } },

257

{ { DataType::U32, false }, { &in_bounds_crop_window<uint32_t, false, false>, &in_bounds_crop_window<uint32_t, false, true> } },

258

{ { DataType::U32, true }, { &in_bounds_crop_window<uint32_t, true, false>, &in_bounds_crop_window<uint32_t, true, true> } },

259

{ { DataType::S32, false }, { &in_bounds_crop_window<int32_t, false, false>, &in_bounds_crop_window<int32_t, false, true> } },

260

{ { DataType::S32, true }, { &in_bounds_crop_window<int32_t, true, false>, &in_bounds_crop_window<int32_t, true, true> } },

261

#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC

262

{ { DataType::F16, false }, { &in_bounds_crop_window<float16_t, false, false>, &in_bounds_crop_window<float16_t, false, true> } },

263

{ { DataType::F16, false }, { &in_bounds_crop_window<float16_t, true, false>, &in_bounds_crop_window<float16_t, true, true> } }

264

#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */

265

};

266

267

auto in_it = in_map_function.find({ input->info()->data_type(), input->info()->dimension(0) == 1 });

268

269

if(in_it != in_map_function.end())

270

{

271

_in_bounds_crop_functions = in_it->second;

}

}

Status NECropKernel::validate(const ITensorInfo *input, const ITensorInfo *crop_boxes, const ITensorInfo *box_ind, const ITensorInfo *output, uint32_t crop_box_ind, float extrapolation_value)

276

{

277

ARM_COMPUTE_UNUSED(extrapolation_value);

278

ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(input);

279

ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U16, DataType::S16, DataType::F16, DataType::U32, DataType::S32, DataType::F32);

280

ARM_COMPUTE_RETURN_ERROR_ON_DATA_LAYOUT_NOT_IN(input, DataLayout::NHWC);

281

ARM_COMPUTE_RETURN_ERROR_ON(input->tensor_shape().num_dimensions() > 4);

282

ARM_COMPUTE_RETURN_ERROR_ON(crop_boxes->tensor_shape()[0] != 4);

283

ARM_COMPUTE_RETURN_ERROR_ON(crop_boxes->tensor_shape()[1] != box_ind->tensor_shape()[0]);

284

ARM_COMPUTE_RETURN_ERROR_ON(crop_boxes->tensor_shape()[1] <= crop_box_ind);

285

ARM_COMPUTE_RETURN_ERROR_ON(box_ind->tensor_shape()[0] <= crop_box_ind);

286

if(output->total_size() > 0)

287

{

288

ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_NOT_IN(output, DataType::F32);

289

ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_LAYOUT(input, output);

290

ARM_COMPUTE_RETURN_ERROR_ON(output->num_dimensions() != 3);

291

ARM_COMPUTE_RETURN_ERROR_ON(output->has_padding());

}

return Status{};

}

void NECropKernel::configure_output_shape()

297

{

298

// _crop_box_ind is used to index _crop_boxes and retrieve the appropriate crop box.

299

// The crop box is specified by normalized coordinates [y0, x0, y1, x1].

300

const float x0 = *reinterpret_cast<const float *>(_crop_boxes->ptr_to_element(Coordinates(1, _crop_box_ind)));

301

const float y0 = *reinterpret_cast<const float *>(_crop_boxes->ptr_to_element(Coordinates(0, _crop_box_ind)));

302

const float x1 = *reinterpret_cast<const float *>(_crop_boxes->ptr_to_element(Coordinates(3, _crop_box_ind)));

303

const float y1 = *reinterpret_cast<const float *>(_crop_boxes->ptr_to_element(Coordinates(2, _crop_box_ind)));

304

// The normalized coordiantes are scaled to retrieve the floating point image coordinates which are rounded to integers.

305

_start = Coordinates(std::floor(x0 * (_input->info()->tensor_shape()[1] - 1) + 0.5f),

306

std::floor(y0 * (_input->info()->tensor_shape()[2] - 1) + 0.5f));

307

_end = Coordinates(std::floor(x1 * (_input->info()->tensor_shape()[1] - 1) + 0.5f),

308

std::floor(y1 * (_input->info()->tensor_shape()[2] - 1) + 0.5f));

309

const TensorShape out_shape(_input->info()->tensor_shape()[0], abs(_end[0] - _start[0]) + 1, abs(_end[1] - _start[1]) + 1);

310

_output->info()->set_tensor_shape(out_shape);

311

312

_in_bounds_crop_function = _start[0] <= _end[0] ? _in_bounds_crop_functions.first : _in_bounds_crop_functions.second;

313

314

bool is_width_flipped = _end[0] < _start[0];

315

bool is_height_flipped = _end[1] < _start[1];

316

if(is_height_flipped)

317

{

318

_rows_out_of_bounds[0] = _start[1] >= static_cast<int32_t>(_input->info()->dimension(2)) ? std::min(static_cast<uint32_t>(_start[1] - _input->info()->dimension(2) + 1),

319

static_cast<uint32_t>(_output->info()->dimension(2))) :

320

0;

321

_rows_out_of_bounds[1] = _end[1] < 0 ? std::min(static_cast<uint32_t>(-_end[1]),

322

static_cast<uint32_t>(_output->info()->dimension(2))) :

0;

}

else

{

_rows_out_of_bounds[0] = _start[1] < 0 ? std::min(static_cast<uint32_t>(-_start[1]),

328

static_cast<uint32_t>(_output->info()->dimension(2))) :

329

0;

330

_rows_out_of_bounds[1] = _end[1] >= static_cast<int32_t>(_input->info()->dimension(2)) ? std::min(static_cast<uint32_t>(_end[1] - _input->info()->dimension(2) + 1),

331

static_cast<uint32_t>(_output->info()->dimension(2))) :

0;

}

if(is_width_flipped)

{

_cols_out_of_bounds[0] = _start[0] >= static_cast<int32_t>(_input->info()->dimension(1)) ? std::min(static_cast<uint32_t>(_start[0] - _input->info()->dimension(1) + 1),

337

static_cast<uint32_t>(_output->info()->dimension(1))) :

338

0;

339

_cols_out_of_bounds[1] = _end[0] < 0 ? std::min(static_cast<uint32_t>(-_end[0]),

340

static_cast<uint32_t>(_output->info()->dimension(1))) :

0;

}

else

{

_cols_out_of_bounds[0] = _start[0] < 0 ? std::min(static_cast<uint32_t>(-_start[0]),

346

static_cast<uint32_t>(_output->info()->dimension(1))) :

347

0;

348

_cols_out_of_bounds[1] = _end[0] >= static_cast<int32_t>(_input->info()->dimension(1)) ? std::min(static_cast<uint32_t>(_end[0] - _input->info()->dimension(1) + 1),

349

static_cast<uint32_t>(_output->info()->dimension(1))) :

0;

}

const static std::map<std::tuple<bool, bool, bool, bool>, NECropKernel::CropFunction *> map_function =

354

{

355

{ std::make_tuple(false, false, false, false), &execute_window<false, false, false, false> },

356

{ std::make_tuple(false, false, false, true), &execute_window<false, false, false, true> },

357

{ std::make_tuple(false, false, true, false), &execute_window<false, false, true, false> },

358

{ std::make_tuple(false, false, true, true), &execute_window<false, false, true, true> },

359

{ std::make_tuple(false, true, false, false), &execute_window<false, true, false, false> },

360

{ std::make_tuple(false, true, false, true), &execute_window<false, true, false, true> },

361

{ std::make_tuple(false, true, true, false), &execute_window<false, true, true, false> },

362

{ std::make_tuple(false, true, true, true), &execute_window<false, true, true, true> },

363

{ std::make_tuple(true, false, false, false), &execute_window<true, false, false, false> },

364

{ std::make_tuple(true, false, false, true), &execute_window<true, false, false, true> },

365

{ std::make_tuple(true, false, true, false), &execute_window<true, false, true, false> },

366

{ std::make_tuple(true, false, true, true), &execute_window<true, false, true, true> },

367

{ std::make_tuple(true, true, false, false), &execute_window<true, true, false, false> },

368

{ std::make_tuple(true, true, false, true), &execute_window<true, true, false, true> },

369

{ std::make_tuple(true, true, true, false), &execute_window<true, true, true, false> },

370

{ std::make_tuple(true, true, true, true), &execute_window<true, true, true, true> },

371

};

372

373

auto it = map_function.find(std::make_tuple(is_height_flipped,

374

_cols_out_of_bounds[0] + _cols_out_of_bounds[1] < _output->info()->dimension(1),

375

_cols_out_of_bounds[0] > 0,

376

_cols_out_of_bounds[1] > 0));

377

378

if(it != map_function.end())

379

{

380

_crop_function = it->second;

381

}

382

383

INEKernel::configure(calculate_max_window(*_output->info()));

384

}

385

386

void NECropKernel::run(const Window &window, const ThreadInfo &info)

387

{

388

ARM_COMPUTE_UNUSED(window, info);

389

ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);

390

ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window);

391

392

ARM_COMPUTE_ERROR_ON(_input->info()->has_padding());

393

ARM_COMPUTE_ERROR_ON(_output->info()->has_padding());

394

395

uint32_t batch_index = *(reinterpret_cast<int32_t *>(_box_ind->ptr_to_element(Coordinates(_crop_box_ind))));

396

Coordinates input_offset(0, _end[0] < _start[0] ? _start[0] - _cols_out_of_bounds[0] : _start[0] + _cols_out_of_bounds[0],

397

_end[1] < _start[1] ? _start[1] - _rows_out_of_bounds[0] : _start[1] + _rows_out_of_bounds[0], batch_index);

398

(*_crop_function)(_input, _output, input_offset, _extrapolation_value, _rows_out_of_bounds, _cols_out_of_bounds, _in_bounds_crop_function);

399

}

400

} // namespace arm_compute