Blame - src/core/NEON/kernels/NEPermuteKernel.cpp - ml/ComputeLibrary

return (permutations3.end() != std::find(permutations3.begin(), permutations3.end(), v)) || (permutations4.end() != std::find(permutations4.begin(), permutations4.end(), v));

90

}

91

Georgios Pinitas

284cfe2

2018-02-13 12:15:13 +0000

[diff] [blame]

92

Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, const PermutationVector &perm)

93

{

Anthony Barbier

eaefd00

2018-07-20 17:49:35 +0100

[diff] [blame]

94

//Note: ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(input) is not needed here as this kernel doesn't use NEON FP16 instructions.

Vidhya Sudhan Loganathan

7485d5a

2018-07-04 09:34:00 +0100

[diff] [blame]

95

ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8, DataType::S8, DataType::QASYMM8,

96

DataType::U16, DataType::S16,

Georgios Pinitas

284cfe2

2018-02-13 12:15:13 +0000

[diff] [blame]

97

DataType::U32, DataType::S32,

98

DataType::F16, DataType::F32);

Pablo Tello

35767bc

2018-12-05 17:36:30 +0000

[diff] [blame]

99

100

ARM_COMPUTE_RETURN_ERROR_ON_MSG(!is_permutation_supported(perm), "PermutationVector not supported.");

Georgios Pinitas

284cfe2

2018-02-13 12:15:13 +0000

[diff] [blame]

101

102

const TensorShape output_shape = misc::shape_calculator::compute_permutation_output_shape(*input, perm);

103

104

// Validate configured output

105

if(output->total_size() != 0)

106

{

107

ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DIMENSIONS(output->tensor_shape(), output_shape);

Isabella Gottardi

0a1090a

2019-02-14 18:07:36 +0000

[diff] [blame]

108

ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_QUANTIZATION_INFO(input, output);

Georgios Pinitas

284cfe2

2018-02-13 12:15:13 +0000

[diff] [blame]

109

ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);

Georgios Pinitas

284cfe2

2018-02-13 12:15:13 +0000

[diff] [blame]

}

return Status{};

}

} // namespace

template <typename T>

117

void NEPermuteKernel::run_permute(const Window &window)

118

{

Pablo Tello

35767bc

2018-12-05 17:36:30 +0000

[diff] [blame]

119

const DataLayout input_layout = _input->info()->data_layout();

120

Georgios Pinitas

284cfe2

2018-02-13 12:15:13 +0000

[diff] [blame]

121

// Input window

122

Window window_in = window;

Pablo Tello

35767bc

2018-12-05 17:36:30 +0000

[diff] [blame]

123

124

// we only support these two configs in arm_compute/core/NEON/kernels/convolution/common/shims.hpp, for all others

125

// we have to fall back to C++

126

if((input_layout == DataLayout::NCHW && _perm == PermutationVector{ 2U, 0U, 1U }) || (input_layout == DataLayout::NHWC && _perm == PermutationVector{ 1U, 2U, 0U }))

127

{

128

window_in.set(Window::DimX, Window::Dimension(window.x().start(), window.x().end(), window.x().end() - window.x().start()));

129

window_in.set(Window::DimY, Window::Dimension(window.y().start(), window.y().end(), window.y().end() - window.y().start()));

130

window_in.set(Window::DimZ, Window::Dimension(window.z().start(), window.z().end(), window.z().end() - window.z().start()));

131

window_in.set(3, Window::Dimension(window[3].start(), window[3].end(), window[3].end() - window[3].start()));

132

}

Georgios Pinitas

284cfe2

2018-02-13 12:15:13 +0000

[diff] [blame]

133

134

// Output window

135

Window window_out(window);

136

const Window::Dimension zero_window = Window::Dimension(0, 0, 0);

137

for(size_t d = 0; d <= _perm.num_dimensions(); ++d)

138

{

139

window_out.set(d, zero_window);

}

// Create iterators

Iterator in(_input, window_in);

144

Iterator out(_output, window_out);

145

Pablo Tello

35767bc

2018-12-05 17:36:30 +0000

[diff] [blame]

146

int in_row_stride = 0;

147

int in_col_stride = 0;

148

int in_channel_stride = 0;

149

int in_batch_stride = 0;

int n_cols = 0;

int n_rows = 0;

int n_channels = 0;

int n_batches = 0;

Georgios Pinitas

284cfe2

2018-02-13 12:15:13 +0000

[diff] [blame]

154

Pablo Tello

35767bc

2018-12-05 17:36:30 +0000

[diff] [blame]

155

switch(input_layout)

156

{

157

case DataLayout::NCHW:

158

{

159

in_row_stride = _input->info()->strides_in_bytes().y() / sizeof(T);

160

in_channel_stride = _input->info()->strides_in_bytes().z() / sizeof(T);

161

in_batch_stride = _input->info()->strides_in_bytes()[3] / sizeof(T);

162

n_cols = _input->info()->tensor_shape().x();

163

n_rows = window_in.y().step();

164

n_channels = _input->info()->tensor_shape().z();

165

n_batches = _input->info()->tensor_shape()[3];

166

break;

167

}

168

case DataLayout::NHWC:

169

{

170

in_col_stride = _input->info()->strides_in_bytes().y() / sizeof(T);

171

in_row_stride = _input->info()->strides_in_bytes().z() / sizeof(T);

172

in_batch_stride = _input->info()->strides_in_bytes()[3] / sizeof(T);

173

n_channels = _input->info()->tensor_shape().x();

174

n_cols = window_in.y().step();

175

n_rows = _input->info()->tensor_shape().z();

176

n_batches = _input->info()->tensor_shape()[3];

break;

}

default:

{

ARM_COMPUTE_ERROR("Invalid input data layout.");

break;

}

}

// CHW -> HWC

if(input_layout == DataLayout::NCHW && _perm == PermutationVector{ 2U, 0U, 1U })

188

{

Georgios Pinitas

284cfe2

2018-02-13 12:15:13 +0000

[diff] [blame]

189

const int out_channel_stride = _output->info()->strides_in_bytes().x() / sizeof(T);

190

const int out_col_stride = _output->info()->strides_in_bytes().y() / sizeof(T);

191

const int out_row_stride = _output->info()->strides_in_bytes().z() / sizeof(T);

192

const int out_batch_stride = _output->info()->strides_in_bytes()[3] / sizeof(T);

Georgios Pinitas

284cfe2

2018-02-13 12:15:13 +0000

[diff] [blame]

193

execute_window_loop(window_in, [&](const Coordinates & id)

194

{

195

const int idx = id[0] * out_col_stride + id[1] * out_row_stride + id[2] * out_channel_stride;

196

reorder::nchw_to_nhwc(reinterpret_cast<const T *>(in.ptr()), reinterpret_cast<T *>(out.ptr()) + idx,

197

n_batches, n_channels, n_rows, n_cols,

198

in_batch_stride, in_channel_stride, in_row_stride,

199

out_batch_stride, out_row_stride, out_col_stride);

},

in, out);

}

// HWC -> CHW

Pablo Tello

35767bc

2018-12-05 17:36:30 +0000

[diff] [blame]

204

else if(input_layout == DataLayout::NHWC && _perm == PermutationVector{ 1U, 2U, 0U })

Georgios Pinitas

284cfe2

2018-02-13 12:15:13 +0000

[diff] [blame]

205

{

Georgios Pinitas

284cfe2

2018-02-13 12:15:13 +0000

[diff] [blame]

206

const int out_col_stride = _output->info()->strides_in_bytes().x() / sizeof(T);

207

const int out_row_stride = _output->info()->strides_in_bytes().y() / sizeof(T);

208

const int out_channel_stride = _output->info()->strides_in_bytes().z() / sizeof(T);

209

const int out_batch_stride = _output->info()->strides_in_bytes()[3] / sizeof(T);

Georgios Pinitas

284cfe2

2018-02-13 12:15:13 +0000

[diff] [blame]

210

execute_window_loop(window_in, [&](const Coordinates & id)

211

{

212

const int idx = id[0] * out_channel_stride + id[1] * out_col_stride + id[2] * out_row_stride;

213

reorder::nhwc_to_nchw(reinterpret_cast<const T *>(in.ptr()), reinterpret_cast<T *>(out.ptr()) + idx,

214

n_batches, n_rows, n_cols, n_channels,

215

in_batch_stride, in_row_stride, in_col_stride,

216

out_batch_stride, out_channel_stride, out_row_stride);

},

in, out);

}

else

{

Pablo Tello

35767bc

2018-12-05 17:36:30 +0000

[diff] [blame]

222

// All other cases fall back to C++

223

// Permute strides

224

Strides strides = _output->info()->strides_in_bytes();

225

Strides perm_strides = strides;

226

permute_strides(perm_strides, _perm);

227

const int perm_stride_3 = _input->info()->num_dimensions() >= 4 ? perm_strides[3] : 0;

228

execute_window_loop(window, [&](const Coordinates & id)

229

{

230

const int idx = id[0] * perm_strides[0] + id[1] * perm_strides[1] + id[2] * perm_strides[2] + id[3] * perm_stride_3;

231

*(reinterpret_cast<T *>(out.ptr() + idx)) = *(reinterpret_cast<const T *>(in.ptr()));

232

},

233

in, out);

Georgios Pinitas

284cfe2

2018-02-13 12:15:13 +0000

[diff] [blame]

}

}

NEPermuteKernel::NEPermuteKernel()

238

: _func(), _input(nullptr), _output(nullptr), _perm()

{

}

void NEPermuteKernel::configure(const ITensor *input, ITensor *output, const PermutationVector &perm)

243

{

244

ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);

245

const TensorShape output_shape = misc::shape_calculator::compute_permutation_output_shape(*input->info(), perm);

246

// Output auto inizialitation if not yet initialized

247

auto_init_if_empty(*output->info(), input->info()->clone()->set_tensor_shape(output_shape));

248

249

// Perform validation step

250

ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), output->info(), perm));

_input = input;

_output = output;

_perm = perm;

switch(input->info()->element_size())

257

{

258

case 1:

259

_func = &NEPermuteKernel::run_permute<uint8_t>;

260

break;

261

case 2:

262

_func = &NEPermuteKernel::run_permute<uint16_t>;

263

break;

264

case 4:

265

_func = &NEPermuteKernel::run_permute<uint32_t>;

266

break;

267

default:

268

ARM_COMPUTE_ERROR("Element size not supported");

break;

}

// Configure kernel window

273

Window win = calculate_max_window(*input->info(), Steps());

274

275

// The NEPermute doesn't need padding so update_window_and_padding() can be skipped

276

Coordinates coord;

277

coord.set_num_dimensions(output->info()->num_dimensions());

278

output->info()->set_valid_region(ValidRegion(coord, output->info()->tensor_shape()));

279

280

ICPPKernel::configure(win);

281

}

282

283

Status NEPermuteKernel::validate(const ITensorInfo *input, const ITensorInfo *output, const PermutationVector &perm)

284

{

285

ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, output, perm));

return Status{};

}

void NEPermuteKernel::run(const Window &window, const ThreadInfo &info)

290

{

291

ARM_COMPUTE_UNUSED(info);

292

ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);

293

ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICPPKernel::window(), window);

if(_func != nullptr)

{

(this->*_func)(window);

298

}

299

}