Blame - src/runtime/NEON/functions/assembly/NEDepthwiseConvolutionAssemblyDispatch.cpp - ml/ComputeLibrary

n_batches, in_rows, in_cols, n_channels, activation, wqinfo, iqinfo, oqinfo, rescale_params, padding_top, padding_left, padding_bottom, padding_right);

100

case 2:

101

return arm_compute::support::cpp14::make_unique<depthwise::QAsymm8DepthwiseConvolution<2, 2, 3, 3, 2, 2>>(

102

n_batches, in_rows, in_cols, n_channels, activation, wqinfo, iqinfo, oqinfo, rescale_params, padding_top, padding_left, padding_bottom, padding_right);

default:

return nullptr;

}

}

else

{

// Create float convolver

110

switch(data_type)

111

{

112

#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC

case DataType::F16:

{

switch(stride_x)

{

case 1:

return arm_compute::support::cpp14::make_unique<depthwise::DepthwiseConvolution<3, 3, 3, 3, 1, 1, float16_t, float16_t, float16_t>>(

119

n_batches, in_rows, in_cols, n_channels, activation, padding_top, padding_left, padding_bottom, padding_right);

120

case 2:

121

return arm_compute::support::cpp14::make_unique<depthwise::DepthwiseConvolution<3, 3, 3, 3, 2, 2, float16_t, float16_t, float16_t>>(

122

n_batches, in_rows, in_cols, n_channels, activation, padding_top, padding_left, padding_bottom, padding_right);

default:

return nullptr;

}

break;

}

#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC

case DataType::F32:

{

switch(stride_x)

{

case 1:

return arm_compute::support::cpp14::make_unique<depthwise::DepthwiseConvolution<4, 4, 3, 3, 1, 1, float, float, float>>(

135

n_batches, in_rows, in_cols, n_channels, activation, padding_top, padding_left, padding_bottom, padding_right);

136

case 2:

137

return arm_compute::support::cpp14::make_unique<depthwise::DepthwiseConvolution<3, 3, 3, 3, 2, 2, float, float, float>>(

138

n_batches, in_rows, in_cols, n_channels, activation, padding_top, padding_left, padding_bottom, padding_right);

default:

return nullptr;

}

break;

}

default:

return nullptr;

}

}

}

} // namespace

#ifndef DOXYGEN_SKIP_THIS

152

NEDepthwiseConvolutionAssemblyDispatch::NEDepthwiseConvolutionAssemblyDispatch(std::shared_ptr<arm_compute::IMemoryManager> memory_manager)

153

: _memory_group(std::move(memory_manager)), _input(nullptr), _weights(nullptr), _bias(nullptr), _output(nullptr), _packed_weights(), _workspace(), _is_prepared(false), _dwc_assembly_kernel(nullptr),

_dwc_acl_kernel()

{

}

#endif /* DOXYGEN_SKIP_THIS */

158

159

void NEDepthwiseConvolutionAssemblyDispatch::configure(const ITensor *input,

160

const ITensor *weights,

161

const ITensor *bias,

162

ITensor *output,

163

const PadStrideInfo &conv_info,

164

unsigned int depth_multiplier,

165

const ActivationLayerInfo &act_info)

166

{

167

ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, output);

168

ARM_COMPUTE_UNUSED(depth_multiplier);

169

ARM_COMPUTE_ERROR_THROW_ON(NEDepthwiseConvolutionAssemblyDispatch::validate(input->info(),

170

weights->info(),

171

bias != nullptr ? bias->info() : nullptr,

output->info(),

conv_info,

depth_multiplier,

act_info));

// Output auto inizialitation if not yet initialized

178

const TensorShape output_shape = misc::shape_calculator::compute_depthwise_convolution_shape(*input->info(), *weights->info(), conv_info, depth_multiplier);

179

auto_init_if_empty(*output->info(), input->info()->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(output_shape));

_input = input;

_weights = weights;

_bias = bias;

_output = output;

_is_prepared = false;

186

187

// Create convolver

188

_dwc_assembly_kernel = create_convolver(input, weights, output, conv_info, act_info);

189

ARM_COMPUTE_ERROR_ON(_dwc_assembly_kernel == nullptr);

190

191

// Create assembly kernel wrapper

192

_dwc_acl_kernel.configure(_dwc_assembly_kernel.get());

193

194

constexpr size_t alignment = 128;

195

196

// Create workspace

197

const unsigned int num_threads = NEScheduler::get().num_threads();

198

const size_t workspace_size = _dwc_assembly_kernel->get_working_space_size(num_threads);

199

ARM_COMPUTE_ERROR_ON_MSG(workspace_size == 0, "Workspace size cannot be 0 !");

200

_workspace.allocator()->init(TensorInfo(TensorShape{ workspace_size }, 1, DataType::S8), alignment);

201

_memory_group.manage(&_workspace);

202

_workspace.allocator()->allocate();

203

204

// Create packing tensor

205

const size_t pack_tensor_size = _dwc_assembly_kernel->get_packed_params_size();

206

ARM_COMPUTE_ERROR_ON_MSG(pack_tensor_size == 0, "Pack tensor size cannot be 0 !");

207

_packed_weights.allocator()->init(TensorInfo(TensorShape{ pack_tensor_size }, 1, DataType::S8), alignment);

208

}

209

210

Status NEDepthwiseConvolutionAssemblyDispatch::validate(const ITensorInfo *input,

211

const ITensorInfo *weights,

212

const ITensorInfo *bias,

213

const ITensorInfo *output,

214

const PadStrideInfo &conv_info,

215

unsigned int depth_multiplier,

216

const ActivationLayerInfo &act_info)

217

{

218

ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(input);

219

ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::F16, DataType::F32);

220

ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, weights);

221

ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_LAYOUT(input, weights);

222

223

const auto strides = conv_info.stride();

224

const DataLayout data_layout = input->data_layout();

225

unsigned int width_idx = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH);

226

unsigned int height_idx = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT);

227

ARM_COMPUTE_RETURN_ERROR_ON(weights->dimension(width_idx) != 3 || weights->dimension(height_idx) != 3);

228

ARM_COMPUTE_RETURN_ERROR_ON(!((strides.first == strides.second) && ((strides.first == 1) || (strides.first == 2))));

229

ARM_COMPUTE_RETURN_ERROR_ON(depth_multiplier != 1);

230

231

const bool is_relu = arm_compute::utils::info_helpers::is_relu(act_info);

232

const bool is_relu6 = arm_compute::utils::info_helpers::is_relu6(act_info);

233

ARM_COMPUTE_RETURN_ERROR_ON(act_info.enabled() && !(is_relu || is_relu6));

// Check bias

if(bias != nullptr)

{

unsigned int channel_idx = get_data_layout_dimension_index(data_layout, DataLayoutDimension::CHANNEL);

239

ARM_COMPUTE_RETURN_ERROR_ON(bias->num_dimensions() > 1);

240

ARM_COMPUTE_RETURN_ERROR_ON(bias->dimension(0) != weights->dimension(channel_idx));

}

// Check output

if(output->total_size() != 0)

245

{

246

const TensorShape output_shape = misc::shape_calculator::compute_depthwise_convolution_shape(*input, *weights, conv_info, depth_multiplier);

247

ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DIMENSIONS(output->tensor_shape(), output_shape);

248

ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);

}

return Status{};

}

bool NEDepthwiseConvolutionAssemblyDispatch::is_optimized_supported(const ITensorInfo *input,

255

const ITensorInfo *weights,

256

PadStrideInfo conv_info,

Usama Arif

881f2de

2019-04-12 10:29:17 +0100

[diff] [blame^]

257

unsigned int depth_multiplier,

258

const Size2D &dilation)

Georgios Pinitas

47d39dc

2019-03-11 14:03:23 +0000

[diff] [blame]

259

{

260

ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights);

261

262

// Reshape input shape if in NHWC format

263

const DataLayout data_layout = input->data_layout();

264

TensorShape in_shape{ input->tensor_shape() };

265

if(data_layout == DataLayout::NHWC)

266

{

267

in_shape.set(Window::DimX, input->tensor_shape().y());

268

in_shape.set(Window::DimY, input->tensor_shape().z());

269

in_shape.set(Window::DimZ, input->tensor_shape().x());

}

// Check data type

const DataType data_type = weights->data_type();

274

bool is_data_type_valid = is_data_type_float(data_type) || is_data_type_quantized_asymmetric(data_type);

275

276

// Check weighs size

277

const unsigned int width_idx = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH);

278

const unsigned int height_idx = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT);

279

bool weights_supported = (weights->dimension(width_idx) == 3) && (weights->dimension(height_idx) == 3);

280

281

// Check for supported strides

282

const auto &strides = conv_info.stride();

283

bool supported_strides = (strides.first == strides.second) && ((strides.first == 1) || (strides.first == 2));

284

285

// Check for supported padding

286

const auto pad_top = conv_info.pad_top();

287

const auto pad_right = conv_info.pad_right();

288

const auto pad_bottom = conv_info.pad_bottom();

289

const auto pad_left = conv_info.pad_left();

290

PadStrideInfo same_pad = calculate_same_pad(in_shape, TensorShape(3U, 3U), conv_info);

291

bool is_same_padding = (pad_top == same_pad.pad_top()) && (pad_right == same_pad.pad_right()) && (pad_bottom == same_pad.pad_bottom()) && (pad_left == same_pad.pad_left());

292

bool is_valid_padding = (pad_top == 0) && (pad_right == 0) && (pad_bottom == 0) && (pad_left == 0);

293

bool supported_padding = is_same_padding || is_valid_padding;

Usama Arif

881f2de

2019-04-12 10:29:17 +0100

[diff] [blame^]

294

bool is_dilation_1 = dilation.x() == 1 && dilation.y() == 1;

Georgios Pinitas

47d39dc

2019-03-11 14:03:23 +0000

[diff] [blame]

295

Usama Arif

881f2de

2019-04-12 10:29:17 +0100

[diff] [blame^]

296

return is_data_type_valid && weights_supported && supported_strides && supported_padding && (depth_multiplier == 1) && is_dilation_1;

Georgios Pinitas

47d39dc

2019-03-11 14:03:23 +0000

[diff] [blame]

297

}

298

299

void NEDepthwiseConvolutionAssemblyDispatch::run()

300

{

301

// Prepare assembly kernel

302

prepare();

303

Georgios Pinitas

da953f2

2019-04-02 17:27:03 +0100

[diff] [blame]

304

MemoryGroupResourceScope scope_mg(_memory_group);

Georgios Pinitas

47d39dc

2019-03-11 14:03:23 +0000

[diff] [blame]

305

306

// Setup inputs/outputs

307

ARM_COMPUTE_ERROR_ON(_workspace.buffer() == nullptr);

308

_dwc_assembly_kernel->set_working_space(static_cast<void *>(_workspace.buffer()));

309

310

ARM_COMPUTE_ERROR_ON(_input->buffer() == nullptr);

311

const int input_element_size = _input->info()->element_size();

312

const int input_batch_stride = _input->info()->strides_in_bytes()[3] / input_element_size;

313

const int input_row_stride = _input->info()->strides_in_bytes().z() / input_element_size;

314

const int input_col_stride = _input->info()->strides_in_bytes().y() / input_element_size;

315

const void *input_ptr = _input->buffer() + _input->info()->offset_first_element_in_bytes();

316

_dwc_assembly_kernel->set_input(input_ptr, input_batch_stride, input_row_stride, input_col_stride);

317

318

ARM_COMPUTE_ERROR_ON(_output->buffer() == nullptr);

319

const int output_element_size = _output->info()->element_size();

320

const int output_batch_stride = _output->info()->strides_in_bytes()[3] / output_element_size;

321

const int output_row_stride = _output->info()->strides_in_bytes().z() / output_element_size;

322

const int output_col_stride = _output->info()->strides_in_bytes().y() / output_element_size;

323

void *output_ptr = _output->buffer() + _output->info()->offset_first_element_in_bytes();

324

_dwc_assembly_kernel->set_output(output_ptr, output_batch_stride, output_row_stride, output_col_stride);

325

326

// Schedule assembly kernel

327

NEScheduler::get().schedule(&_dwc_acl_kernel, Window::DimX);

Georgios Pinitas

47d39dc

2019-03-11 14:03:23 +0000

[diff] [blame]

328

}

329

330

void NEDepthwiseConvolutionAssemblyDispatch::prepare()

{

if(!_is_prepared)

{

_packed_weights.allocator()->allocate();

335

ARM_COMPUTE_ERROR_ON(_packed_weights.buffer() == nullptr);

336

337

// Pack weights and bias

338

const int weights_element_size = _weights->info()->element_size();

339

const int weights_row_stride = _weights->info()->strides_in_bytes().z() / weights_element_size;

340

const int weights_col_stride = _weights->info()->strides_in_bytes().y() / weights_element_size;

341

_dwc_assembly_kernel->pack_params(_packed_weights.buffer(),

342

_weights->buffer() + _weights->info()->offset_first_element_in_bytes(),

343

weights_row_stride,

344

weights_col_stride,

345

(_bias != nullptr) ? _bias->buffer() : nullptr);

346

_dwc_assembly_kernel->set_packed_params_buffer(_packed_weights.buffer());

347

348

_weights->mark_as_unused();

349

if(_bias != nullptr)

350

{

351

_bias->mark_as_unused();

}

_is_prepared = true;

}

}

} // namespace arm_compute