Blame - src/runtime/cpu/operators/CpuDepthwiseConvolution.cpp - ml/ComputeLibrary

blob: 6d097280e05f0beab449ab2da30ae89edecd3918 [file] [log] [blame]

Michalis Spyrou	60c3b0e	2021-04-08 12:02:58 +0100	[diff] [blame]	1	/*
				2	* Copyright (c) 2021 Arm Limited.
				3	*
				4	* SPDX-License-Identifier: MIT
				5	*
				6	* Permission is hereby granted, free of charge, to any person obtaining a copy
				7	* of this software and associated documentation files (the "Software"), to
				8	* deal in the Software without restriction, including without limitation the
				9	* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
				10	* sell copies of the Software, and to permit persons to whom the Software is
				11	* furnished to do so, subject to the following conditions:
				12	*
				13	* The above copyright notice and this permission notice shall be included in all
				14	* copies or substantial portions of the Software.
				15	*
				16	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
				17	* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
				18	* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
				19	* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
				20	* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
				21	* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
				22	* SOFTWARE.
				23	*/
				24	#include "src/runtime/cpu/operators/CpuDepthwiseConvolution.h"
				25
				26	#include "arm_compute/core/TensorInfo.h"
				27	#include "arm_compute/core/Validate.h"
				28	#include "arm_compute/core/utils/misc/InfoHelpers.h"
				29	#include "arm_compute/core/utils/misc/ShapeCalculator.h"
				30	#include "arm_compute/runtime/NEON/NEScheduler.h"
				31	#include "src/core/cpu/kernels/CpuDepthwiseConvolutionNativeKernel.h"
				32
				33	namespace arm_compute
				34	{
				35	namespace cpu
				36	{
				37	namespace
				38	{
				39	Status validate_arguments_optimized(const ITensorInfo input, const ITensorInfo weights, const ITensorInfo biases, const ITensorInfo output, const ConvolutionInfo &info)
				40	{
				41	ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, weights, output);
				42	ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::QASYMM8_SIGNED, DataType::F16, DataType::F32);
				43	if(!is_data_type_quantized_per_channel(weights->data_type()))
				44	{
				45	ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, weights);
				46	}
				47	ARM_COMPUTE_RETURN_ERROR_ON(input->data_layout() == DataLayout::UNKNOWN);
				48	ARM_COMPUTE_RETURN_ERROR_ON(info.dilation.x() < 1 \|\| info.dilation.y() < 1);
				49	const size_t idx_w = get_data_layout_dimension_index(input->data_layout(), DataLayoutDimension::WIDTH);
				50	const size_t idx_h = get_data_layout_dimension_index(input->data_layout(), DataLayoutDimension::HEIGHT);
				51	ARM_COMPUTE_RETURN_ERROR_ON(weights->dimension(idx_w) + (weights->dimension(idx_w) - 1) * (info.dilation.x() - 1) > input->dimension(idx_w) + info.pad_stride_info.pad_left() +
				52	info.pad_stride_info.pad_right());
				53	ARM_COMPUTE_RETURN_ERROR_ON(weights->dimension(idx_h) + (weights->dimension(idx_h) - 1) * (info.dilation.y() - 1) > input->dimension(idx_h) + info.pad_stride_info.pad_top() +
				54	info.pad_stride_info.pad_bottom());
				55
				56	if(biases != nullptr)
				57	{
				58	const unsigned int channel_idx = get_data_layout_dimension_index(input->data_layout(), DataLayoutDimension::CHANNEL);
				59	ARM_COMPUTE_RETURN_ERROR_ON(biases->num_dimensions() > 1);
				60	ARM_COMPUTE_RETURN_ERROR_ON(biases->dimension(0) != weights->dimension(channel_idx));
				61	}
				62
				63	ARM_COMPUTE_RETURN_ON_ERROR(CpuDepthwiseConvolutionAssemblyDispatch::validate(input, weights, biases, output, info));
				64
				65	//Validate Activation Layer
				66	if(info.act_info.enabled())
				67	{
				68	ARM_COMPUTE_RETURN_ON_ERROR(CpuActivation::validate(output, nullptr, info.act_info));
				69	}
				70	return Status{};
				71	}
				72	} // namespace
				73
				74	CpuDepthwiseConvolution::CpuDepthwiseConvolutionOptimizedInternal::CpuDepthwiseConvolutionOptimizedInternal()
				75	: _dwc_optimized_func(nullptr), _permute_input(nullptr), _permute_weights(nullptr), _permute_output(nullptr), _activationlayer_function(nullptr), _has_bias(false), _is_quantized(false),
				76	_is_nchw(true), _permute(false), _is_activationlayer_enabled(false), _is_prepared(false)
				77	{
				78	}
				79
				80	void CpuDepthwiseConvolution::CpuDepthwiseConvolutionOptimizedInternal::configure(ITensorInfo *input,
				81	const ITensorInfo *weights,
				82	const ITensorInfo *biases,
				83	ITensorInfo *output,
				84	const ConvolutionInfo &info)
				85	{
				86	ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, output);
				87	// Perform validation step
				88	ARM_COMPUTE_ERROR_THROW_ON(CpuDepthwiseConvolutionOptimizedInternal::validate(input, weights, (biases == nullptr) ? nullptr : biases,
				89	output, info));
				90
				91	_is_quantized = is_data_type_quantized_asymmetric(input->data_type());
				92	_has_bias = biases != nullptr;
				93	_is_nchw = input->data_layout() == DataLayout::NCHW;
				94	_permute = _is_nchw;
				95	_is_prepared = false;
				96
				97	// Configure pipeline
				98	ActivationLayerInfo act_info_to_use = ActivationLayerInfo();
				99	const bool is_relu = arm_compute::utils::info_helpers::is_relu(info.act_info);
				100	const bool is_relu6 = arm_compute::utils::info_helpers::is_relu6(info.act_info);
				101	_is_activationlayer_enabled = info.act_info.enabled() && !(is_relu \|\| is_relu6);
				102
				103	if(!_is_activationlayer_enabled)
				104	{
				105	act_info_to_use = info.act_info;
				106	}
				107
				108	_dwc_optimized_func = std::make_unique<CpuDepthwiseConvolutionAssemblyDispatch>();
				109	if(_is_nchw)
				110	{
				111	_permute_input = std::make_unique<cpu::CpuPermute>();
				112	_permute_weights = std::make_unique<cpu::CpuPermute>();
				113	_permute_output = std::make_unique<cpu::CpuPermute>();
				114
				115	auto input_perm = std::make_unique<TensorInfo>();
				116	auto weights_perm = std::make_unique<TensorInfo>();
				117	auto output_perm = std::make_unique<TensorInfo>();
				118
				119	// Configure the function to transform the input tensor from NCHW -> NHWC
				120	_permute_input->configure(input, input_perm.get(), PermutationVector(2U, 0U, 1U));
				121	input_perm->set_data_layout(DataLayout::NHWC);
				122
				123	// Configure the function to transform the weights tensor from IHW -> HWI
				124	_permute_weights->configure(weights, weights_perm.get(), PermutationVector(2U, 0U, 1U));
				125	weights_perm->set_data_layout(DataLayout::NHWC);
				126
				127	output_perm->set_data_layout(DataLayout::NHWC);
				128	output_perm->set_quantization_info(output->quantization_info());
				129
				130	// Configure optimized depthwise
				131	_dwc_optimized_func->configure(input_perm.get(), weights_perm.get(), biases, output_perm.get(), info);
				132
				133	// Configure the function to transform the convoluted output to ACL's native ordering format NCHW
				134	output_perm->set_data_layout(DataLayout::NHWC);
				135	_permute_output->configure(output_perm.get(), output, PermutationVector(1U, 2U, 0U));
				136	}
				137	else
				138	{
				139	_dwc_optimized_func->configure(input, weights, biases, output, info);
				140	}
				141
				142	// Configure activation
				143	if(_is_activationlayer_enabled)
				144	{
				145	_activationlayer_function = std::make_unique<cpu::CpuActivation>();
				146	_activationlayer_function->configure(output, nullptr, info.act_info);
				147	}
				148	}
				149
				150	Status CpuDepthwiseConvolution::CpuDepthwiseConvolutionOptimizedInternal::validate(const ITensorInfo *input,
				151	const ITensorInfo *weights,
				152	const ITensorInfo *biases,
				153	const ITensorInfo *output,
				154	const ConvolutionInfo &info)
				155	{
				156	return validate_arguments_optimized(input, weights, biases, output, info);
				157	}
				158
				159	void CpuDepthwiseConvolution::CpuDepthwiseConvolutionOptimizedInternal::run(ITensorPack &tensors)
				160	{
				161	ARM_COMPUTE_ERROR_ON_MSG(tensors.empty(), "No inputs provided");
				162	prepare(tensors);
				163
				164	auto bias = tensors.get_const_tensor(TensorType::ACL_SRC_2);
				165	auto dst = tensors.get_tensor(TensorType::ACL_DST_0);
				166	auto workspace = tensors.get_tensor(TensorType::ACL_INT_3);
				167	auto packed_weights = tensors.get_tensor(TensorType::ACL_INT_4);
				168
				169	// Permute input
				170	if(_permute)
				171	{
				172	ITensorPack pack;
Michalis Spyrou	a7a7436	2021-04-23 10:32:48 +0100	[diff] [blame^]	173	auto src = tensors.get_const_tensor(TensorType::ACL_SRC_0);
Michalis Spyrou	60c3b0e	2021-04-08 12:02:58 +0100	[diff] [blame]	174	auto src_perm = tensors.get_tensor(TensorType::ACL_INT_0);
				175	pack.add_tensor(TensorType::ACL_SRC, src);
				176	pack.add_tensor(TensorType::ACL_DST, src_perm);
				177	_permute_input->run(pack);
				178	}
				179
				180	// Run assembly function
				181	if(_is_nchw)
				182	{
				183	auto src_perm = tensors.get_tensor(TensorType::ACL_INT_0);
				184	auto weights_perm = tensors.get_tensor(TensorType::ACL_INT_1);
				185	auto dst_perm = tensors.get_tensor(TensorType::ACL_INT_2);
				186
				187	ITensorPack pack;
				188	pack.add_tensor(TensorType::ACL_SRC_0, src_perm);
				189	pack.add_tensor(TensorType::ACL_SRC_1, weights_perm);
				190	pack.add_tensor(TensorType::ACL_SRC_2, bias);
				191	pack.add_tensor(TensorType::ACL_INT_0, workspace);
				192	pack.add_tensor(TensorType::ACL_INT_1, packed_weights);
				193	pack.add_tensor(TensorType::ACL_DST, dst_perm);
				194	_dwc_optimized_func->run(pack);
				195	}
				196	else
				197	{
				198	auto src = tensors.get_tensor(TensorType::ACL_SRC_0);
				199	auto weights = tensors.get_tensor(TensorType::ACL_SRC_1);
				200	auto dst = tensors.get_tensor(TensorType::ACL_DST);
				201
				202	ITensorPack pack;
				203	pack.add_tensor(TensorType::ACL_SRC_0, src);
				204	pack.add_tensor(TensorType::ACL_SRC_1, weights);
				205	pack.add_tensor(TensorType::ACL_SRC_2, bias);
				206	pack.add_tensor(TensorType::ACL_INT_0, workspace);
				207	pack.add_tensor(TensorType::ACL_INT_1, packed_weights);
				208	pack.add_tensor(TensorType::ACL_DST, dst);
				209	_dwc_optimized_func->run(pack);
				210	}
				211
				212	// Permute output
				213	if(_is_nchw)
				214	{
				215	ITensorPack pack;
				216	auto dst_perm = tensors.get_tensor(TensorType::ACL_INT_2);
				217	pack.add_tensor(TensorType::ACL_SRC, dst_perm);
				218	pack.add_tensor(TensorType::ACL_DST, dst);
				219	_permute_output->run(pack);
				220	}
				221
				222	// Run activation
				223	if(_is_activationlayer_enabled)
				224	{
				225	ITensorPack pack;
				226	pack.add_tensor(TensorType::ACL_SRC, dst);
				227	pack.add_tensor(TensorType::ACL_DST, dst);
				228	_activationlayer_function->run(pack);
				229	}
				230	}
				231
				232	void CpuDepthwiseConvolution::CpuDepthwiseConvolutionOptimizedInternal::prepare(ITensorPack &tensors)
				233	{
				234	if(!_is_prepared)
				235	{
				236	auto weights = tensors.get_const_tensor(TensorType::ACL_SRC_1);
				237	auto bias = tensors.get_const_tensor(TensorType::ACL_SRC_2);
				238	auto packed_weights = tensors.get_tensor(TensorType::ACL_INT_4);
				239
				240	// Permute weights
				241	if(_permute)
				242	{
				243	auto permuted_weights = tensors.get_tensor(TensorType::ACL_INT_1);
				244
				245	ITensorPack pack;
				246	pack.add_tensor(TensorType::ACL_SRC, weights);
				247	pack.add_tensor(TensorType::ACL_DST, permuted_weights);
				248	_permute_weights->run(pack);
				249
Michalis Spyrou	a7a7436	2021-04-23 10:32:48 +0100	[diff] [blame^]	250	weights->mark_as_unused();
				251
Michalis Spyrou	60c3b0e	2021-04-08 12:02:58 +0100	[diff] [blame]	252	ITensorPack pack_opt;
				253	pack_opt.add_const_tensor(TensorType::ACL_SRC_1, permuted_weights);
				254	pack_opt.add_tensor(TensorType::ACL_SRC_2, bias);
				255	pack_opt.add_tensor(TensorType::ACL_INT_1, packed_weights);
				256
				257	// Prepare optimized function
				258	_dwc_optimized_func->prepare(pack_opt);
				259	}
				260	else
				261	{
				262	ITensorPack pack_opt;
				263	pack_opt.add_tensor(TensorType::ACL_SRC_1, weights);
				264	pack_opt.add_tensor(TensorType::ACL_SRC_2, bias);
				265	pack_opt.add_tensor(TensorType::ACL_INT_1, packed_weights);
				266
				267	// Prepare optimized function
				268	_dwc_optimized_func->prepare(pack_opt);
				269	}
				270
				271	_is_prepared = true;
				272	}
				273	}
				274
				275	CpuDepthwiseConvolution::CpuDepthwiseConvolutionGeneric::CpuDepthwiseConvolutionGeneric()
				276	: _depthwise_conv_kernel(nullptr), _permute_input(nullptr), _permute_weights(nullptr), _permute_output(nullptr), _activationlayer_function(nullptr), _is_nchw(true), _is_prepared(false),
				277	_is_activationlayer_enabled(false)
				278	{
				279	}
				280
				281	void CpuDepthwiseConvolution::CpuDepthwiseConvolutionGeneric::configure(ITensorInfo input, const ITensorInfo weights, const ITensorInfo biases, ITensorInfo output, const ConvolutionInfo &info)
				282	{
				283	ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, output);
				284	ARM_COMPUTE_ERROR_THROW_ON(CpuDepthwiseConvolution::validate(input, weights, (biases == nullptr) ? nullptr : biases,
				285	output, info));
				286
				287	_is_nchw = input->data_layout() == DataLayout::NCHW;
				288	_is_prepared = !_is_nchw;
				289
				290	ITensorInfo *input_to_use = input;
				291	const ITensorInfo *weights_to_use = weights;
				292	ITensorInfo *output_to_use = output;
				293
				294	auto input_perm = std::make_unique<TensorInfo>();
				295	auto weights_perm = std::make_unique<TensorInfo>();
				296	auto output_perm = std::make_unique<TensorInfo>(output->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(TensorShape()));
				297
				298	if(_is_nchw)
				299	{
				300	_permute_input = std::make_unique<cpu::CpuPermute>();
				301	_permute_weights = std::make_unique<cpu::CpuPermute>();
				302
				303	_permute_input->configure(input, input_perm.get(), PermutationVector(2U, 0U, 1U));
				304	input_perm->set_data_layout(DataLayout::NHWC);
				305	input_to_use = input_perm.get();
				306
				307	_permute_weights->configure(weights, weights_perm.get(), PermutationVector(2U, 0U, 1U));
				308	weights_perm->set_data_layout(DataLayout::NHWC);
				309	weights_to_use = weights_perm.get();
				310
				311	output_to_use = output_perm.get();
				312	}
				313
				314	_depthwise_conv_kernel = std::make_unique<cpu::kernels::CpuDepthwiseConvolutionNativeKernel>();
				315	_depthwise_conv_kernel->configure(input_to_use, weights_to_use, biases, output_to_use, info);
				316
				317	if(_is_nchw)
				318	{
				319	_permute_output = std::make_unique<cpu::CpuPermute>();
				320	_permute_output->configure(output_perm.get(), output, PermutationVector(1U, 2U, 0U));
				321	output_perm->set_data_layout(DataLayout::NHWC);
				322	}
				323
				324	//Configure Activation Layer
				325	_is_activationlayer_enabled = info.act_info.enabled();
				326	if(_is_activationlayer_enabled)
				327	{
				328	_activationlayer_function = std::make_unique<cpu::CpuActivation>();
				329	_activationlayer_function->configure(output, nullptr, info.act_info);
				330	}
				331	}
				332
				333	Status CpuDepthwiseConvolution::CpuDepthwiseConvolutionGeneric::validate(const ITensorInfo input, const ITensorInfo weights, const ITensorInfo biases, const ITensorInfo output,
				334	const ConvolutionInfo &info)
				335	{
				336	ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, weights, output);
				337	if(input->data_layout() == DataLayout::NCHW)
				338	{
				339	TensorShape permuted_input_shape = input->tensor_shape();
				340	TensorShape permuted_weights_shape = weights->tensor_shape();
				341	TensorShape permuted_output_shape = misc::shape_calculator::compute_depthwise_convolution_shape(input, weights, info);
				342	permute(permuted_input_shape, PermutationVector(2U, 0U, 1U));
				343	permute(permuted_weights_shape, PermutationVector(2U, 0U, 1U));
				344	permute(permuted_output_shape, PermutationVector(2U, 0U, 1U));
				345
				346	const TensorInfo permuted_input = TensorInfo(input->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(permuted_input_shape).set_data_layout(DataLayout::NHWC));
				347	const TensorInfo permuted_weights = TensorInfo(weights->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(permuted_weights_shape).set_data_layout(DataLayout::NHWC));
				348	const TensorInfo permuted_output = TensorInfo(output->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(permuted_output_shape).set_data_layout(DataLayout::NCHW));
				349
				350	ARM_COMPUTE_RETURN_ON_ERROR(CpuPermute::validate(input, &permuted_input, PermutationVector(2U, 0U, 1U)));
				351	ARM_COMPUTE_RETURN_ON_ERROR(CpuPermute::validate(weights, &permuted_weights, PermutationVector(2U, 0U, 1U)));
				352	ARM_COMPUTE_RETURN_ON_ERROR(CpuPermute::validate(&permuted_output, output, PermutationVector(1U, 2U, 0U)));
				353
				354	ARM_COMPUTE_RETURN_ON_ERROR(cpu::kernels::CpuDepthwiseConvolutionNativeKernel::validate(&permuted_input, &permuted_weights, biases, &permuted_output, info));
				355	}
				356	else
				357	{
				358	ARM_COMPUTE_RETURN_ON_ERROR(cpu::kernels::CpuDepthwiseConvolutionNativeKernel::validate(input, weights, biases, output, info));
				359	}
				360
				361	// Validate Activation Layer
				362	if(info.act_info.enabled())
				363	{
				364	ARM_COMPUTE_RETURN_ON_ERROR(CpuActivation::validate(output, nullptr, info.act_info));
				365	}
				366
				367	return Status{};
				368	}
				369
				370	void CpuDepthwiseConvolution::CpuDepthwiseConvolutionGeneric::run(ITensorPack &tensors)
				371	{
				372	auto src = tensors.get_const_tensor(TensorType::ACL_SRC_0);
				373	auto weights = tensors.get_const_tensor(TensorType::ACL_SRC_1);
				374	auto biases = tensors.get_const_tensor(TensorType::ACL_SRC_2);
				375	auto dst = tensors.get_tensor(TensorType::ACL_DST_0);
				376
				377	if(_is_nchw)
				378	{
				379	prepare(tensors);
				380	auto src_perm = tensors.get_tensor(TensorType::ACL_INT_0);
				381	auto weights_perm = tensors.get_tensor(TensorType::ACL_INT_1);
				382	auto dst_perm = tensors.get_tensor(TensorType::ACL_INT_2);
				383
				384	ITensorPack pack;
				385	pack.add_tensor(TensorType::ACL_SRC, src);
				386	pack.add_tensor(TensorType::ACL_DST, src_perm);
				387	_permute_input->run(pack);
				388
				389	ITensorPack pack_depth;
				390	pack_depth.add_const_tensor(TensorType::ACL_SRC_0, src_perm);
				391	pack_depth.add_const_tensor(TensorType::ACL_SRC_1, weights_perm);
				392	pack_depth.add_tensor(TensorType::ACL_SRC_2, biases);
				393	pack_depth.add_tensor(TensorType::ACL_DST, dst_perm);
				394	NEScheduler::get().schedule_op(_depthwise_conv_kernel.get(), Window::DimY, _depthwise_conv_kernel->window(), pack_depth);
				395	}
				396	else
				397	{
				398	ITensorPack pack_depth;
				399	pack_depth.add_tensor(TensorType::ACL_SRC_0, src);
				400	pack_depth.add_tensor(TensorType::ACL_SRC_1, weights);
				401	pack_depth.add_tensor(TensorType::ACL_SRC_2, biases);
				402	pack_depth.add_tensor(TensorType::ACL_DST, dst);
				403	NEScheduler::get().schedule_op(_depthwise_conv_kernel.get(), Window::DimY, _depthwise_conv_kernel->window(), pack_depth);
				404	}
				405
				406	if(_is_nchw)
				407	{
				408	ITensorPack pack;
				409	auto dst_perm = tensors.get_tensor(TensorType::ACL_INT_2);
				410	pack.add_tensor(TensorType::ACL_SRC, dst_perm);
				411	pack.add_tensor(TensorType::ACL_DST, dst);
				412	_permute_output->run(pack);
				413	}
				414
				415	if(_is_activationlayer_enabled)
				416	{
				417	ITensorPack pack;
				418	pack.add_tensor(TensorType::ACL_SRC, dst);
				419	pack.add_tensor(TensorType::ACL_DST, dst);
				420	_activationlayer_function->run(pack);
				421	}
				422	}
				423
				424	void CpuDepthwiseConvolution::CpuDepthwiseConvolutionGeneric::prepare(ITensorPack &tensors)
				425	{
				426	if(!_is_prepared)
				427	{
				428	auto weights = tensors.get_const_tensor(TensorType::ACL_SRC_1);
				429	auto weights_perm = tensors.get_tensor(TensorType::ACL_INT_1);
				430
				431	ARM_COMPUTE_ERROR_ON(!weights->is_used());
				432
				433	ITensorPack pack;
				434	pack.add_tensor(TensorType::ACL_SRC, weights);
				435	pack.add_tensor(TensorType::ACL_DST, weights_perm);
				436
				437	_permute_weights->run(pack);
				438	weights->mark_as_unused();
				439	_is_prepared = true;
				440	}
				441	}
				442
				443	CpuDepthwiseConvolution::CpuDepthwiseConvolution()
				444	: _depth_conv_func(DepthwiseConvolutionFunction::GENERIC), _func_optimized(), _func_generic()
				445	{
				446	}
				447
				448	void CpuDepthwiseConvolution::configure(ITensorInfo input, const ITensorInfo weights, const ITensorInfo biases, ITensorInfo output, const ConvolutionInfo &info)
				449	{
				450	_depth_conv_func = get_depthwiseconvolution_function(input, weights, (biases != nullptr) ? biases : nullptr, output, info);
				451	switch(_depth_conv_func)
				452	{
				453	case DepthwiseConvolutionFunction::OPTIMIZED:
				454	_func_optimized.configure(input, weights, biases, output, info);
				455	break;
				456	case DepthwiseConvolutionFunction::GENERIC:
				457	_func_generic.configure(input, weights, biases, output, info);
				458	break;
				459	default:
				460	ARM_COMPUTE_ERROR("Unsupported DepthwiseConvolutionFunction");
				461	}
				462	}
				463
				464	Status CpuDepthwiseConvolution::validate(const ITensorInfo input, const ITensorInfo weights, const ITensorInfo biases, const ITensorInfo output, const ConvolutionInfo &info)
				465	{
				466	DepthwiseConvolutionFunction depth_conv_func = get_depthwiseconvolution_function(input, weights, biases, output, info);
				467	switch(depth_conv_func)
				468	{
				469	case DepthwiseConvolutionFunction::OPTIMIZED:
				470	return CpuDepthwiseConvolutionOptimizedInternal::validate(input, weights, biases, output, info);
				471	break;
				472	case DepthwiseConvolutionFunction::GENERIC:
				473	return CpuDepthwiseConvolutionGeneric::validate(input, weights, biases, output, info);
				474	break;
				475	default:
				476	ARM_COMPUTE_ERROR("Unsupported DepthwiseConvolutionFunction");
				477	}
				478	}
				479
				480	DepthwiseConvolutionFunction CpuDepthwiseConvolution::get_depthwiseconvolution_function(const ITensorInfo input, const ITensorInfo weights, const ITensorInfo biases, const ITensorInfo output,
				481	const ConvolutionInfo &info)
				482	{
				483	if(bool(CpuDepthwiseConvolutionOptimizedInternal::validate(input, weights, biases, output, info)))
				484	{
				485	return DepthwiseConvolutionFunction::OPTIMIZED;
				486	}
				487	else
				488	{
				489	return DepthwiseConvolutionFunction::GENERIC;
				490	}
				491	}
				492
				493	void CpuDepthwiseConvolution::run(ITensorPack &tensors)
				494	{
				495	switch(_depth_conv_func)
				496	{
				497	case DepthwiseConvolutionFunction::OPTIMIZED:
				498	_func_optimized.run(tensors);
				499	break;
				500	case DepthwiseConvolutionFunction::GENERIC:
				501	_func_generic.run(tensors);
				502	break;
				503	default:
				504	ARM_COMPUTE_ERROR("DepthwiseConvolutionFunction not properly configured");
				505	}
				506	}
				507
				508	void CpuDepthwiseConvolution::prepare(ITensorPack &tensors)
				509	{
				510	switch(_depth_conv_func)
				511	{
				512	case DepthwiseConvolutionFunction::OPTIMIZED:
				513	_func_optimized.prepare(tensors);
				514	break;
				515	case DepthwiseConvolutionFunction::GENERIC:
				516	_func_generic.prepare(tensors);
				517	break;
				518	default:
				519	ARM_COMPUTE_ERROR("DepthwiseConvolutionFunction not properly configured");
				520	}
				521	}
				522	} // namespace cpu
				523	} // namespace arm_compute