Blame - src/cpu/kernels/CpuAddMulAddKernel.cpp - ml/ComputeLibrary

blob: b84bdd54e965f173c869479bc5d88809363cceca [file] [log] [blame]

Gunes Bayir	ae72a46	2023-01-29 13:24:24 +0000	[diff] [blame]	1	/*
				2	* Copyright (c) 2023 Arm Limited.
				3	*
				4	* SPDX-License-Identifier: MIT
				5	*
				6	* Permission is hereby granted, free of charge, to any person obtaining a copy
				7	* of this software and associated documentation files (the "Software"), to
				8	* deal in the Software without restriction, including without limitation the
				9	* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
				10	* sell copies of the Software, and to permit persons to whom the Software is
				11	* furnished to do so, subject to the following conditions:
				12	*
				13	* The above copyright notice and this permission notice shall be included in all
				14	* copies or substantial portions of the Software.
				15	*
				16	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
				17	* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
				18	* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
				19	* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
				20	* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
				21	* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
				22	* SOFTWARE.
				23	*/
				24	#include "src/cpu/kernels/CpuAddMulAddKernel.h"
				25
				26	#include "arm_compute/core/ITensor.h"
				27	#include "arm_compute/core/TensorInfo.h"
				28	#include "arm_compute/core/Validate.h"
				29
				30	#include "src/core/CPP/Validate.h"
				31	#include "src/core/common/Registrars.h"
				32	#include "src/core/helpers/AutoConfiguration.h"
				33	#include "src/core/helpers/WindowHelpers.h"
				34	#include "src/cpu/kernels/addmuladd/list.h"
				35
				36	namespace arm_compute
				37	{
				38	namespace cpu
				39	{
				40	namespace kernels
				41	{
				42	namespace
				43	{
				44	static const std::vector<CpuAddMulAddKernel::AddMulAddKernel> available_kernels =
				45	{
				46	#ifdef __aarch64__
				47	{
				48	"neon_fp32_add_mul_add",
				49	[](const DataTypeISASelectorData & data) { return (data.dt == DataType::F32); },
				50	REGISTER_FP32_NEON(arm_compute::cpu::add_mul_add_fp32_neon)
				51	},
				52	{
				53	"neon_fp16_add_mul_add",
				54	[](const DataTypeISASelectorData & data) { return (data.dt == DataType::F16); },
				55	REGISTER_FP16_NEON(arm_compute::cpu::add_mul_add_fp16_neon)
				56	},
				57	{
				58	"neon_qasymm8_add_mul_add",
				59	[](const DataTypeISASelectorData & data) { return (data.dt == DataType::QASYMM8); },
				60	REGISTER_QASYMM8_NEON(arm_compute::cpu::add_mul_add_u8_neon)
				61	},
				62	{
				63	"neon_qasymm8_signed_add_mul_add",
				64	[](const DataTypeISASelectorData & data) { return (data.dt == DataType::QASYMM8_SIGNED); },
				65	REGISTER_QASYMM8_SIGNED_NEON(arm_compute::cpu::add_mul_add_s8_neon)
				66	}
				67	#endif // __aarch64__
				68	};
				69
				70	Status validate_arguments(const ITensorInfo input1, const ITensorInfo input2,
				71	const ITensorInfo bn_mul, const ITensorInfo bn_add,
				72	const ITensorInfo add_output, const ITensorInfo final_output,
				73	ConvertPolicy policy, const ActivationLayerInfo &act_info)
				74	{
				75	ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input1, input2, bn_mul, bn_add, final_output);
				76
				77	ARM_COMPUTE_RETURN_ERROR_ON_MSG(policy != ConvertPolicy::SATURATE, "Only Saturate Policy is supported");
				78
				79	using ActFunction = ActivationLayerInfo::ActivationFunction;
				80	const ActFunction act_func = act_info.activation();
				81	ARM_COMPUTE_RETURN_ERROR_ON_MSG(
				82	(act_func != ActFunction::BOUNDED_RELU && act_func != ActFunction::RELU && act_func != ActFunction::LU_BOUNDED_RELU && act_func != ActFunction::IDENTITY),
				83	"Only RELU Family activations, or no activation, is supported");
				84
				85	ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(input1);
				86	ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input1, 1, DataType::QASYMM8, DataType::QASYMM8_SIGNED,
				87	DataType::F16, DataType::F32);
				88	ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input1, input2);
				89
				90	if(is_data_type_quantized(input1->data_type()))
				91	{
				92	ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(bn_mul, 1, DataType::F32);
				93	ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(bn_add, 1, DataType::F32);
				94	}
				95	else
				96	{
				97	ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input1, bn_mul);
				98	ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input1, bn_add);
				99	}
				100
				101	ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(input1, input2); // No broadcasting
				102	ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(bn_mul, bn_add);
				103	ARM_COMPUTE_RETURN_ERROR_ON_MSG(bn_mul->num_dimensions() != 1, "BatchNorm coefficients should be 1D array");
				104	ARM_COMPUTE_RETURN_ERROR_ON_MSG(bn_mul->tensor_shape()[0] != input1->tensor_shape()[0], "First dimensions of inputs and batchNorm coefs should match");
				105
				106	// Validate in case we have add layer's output (intermediate) initialized
				107	if(add_output != nullptr && add_output->total_size() > 0)
				108	{
				109	ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input1, add_output);
				110	ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(input1, add_output);
				111	}
				112
				113	// Validate in case final output has been initialized
				114	if(final_output->total_size() > 0)
				115	{
				116	ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input1, final_output);
				117	ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(input1, final_output);
				118	}
				119
				120	const auto uk = CpuAddMulAddKernel::get_implementation<DataTypeISASelectorData>(DataTypeISASelectorData{ input1->data_type(), CPUInfo::get().get_isa() });
				121	ARM_COMPUTE_RETURN_ERROR_ON(uk == nullptr \|\| uk->ukernel == nullptr);
				122
				123	return Status{};
				124	}
				125	} // namespace
				126
				127	void CpuAddMulAddKernel::configure(const ITensorInfo input1, const ITensorInfo input2,
				128	const ITensorInfo bn_mul, const ITensorInfo bn_add,
				129	ITensorInfo add_output, ITensorInfo final_output,
				130	ConvertPolicy policy, const ActivationLayerInfo &act_info)
				131	{
				132	ARM_COMPUTE_UNUSED(bn_mul, bn_add, input2);
				133	ARM_COMPUTE_ERROR_ON_NULLPTR(input1, input2, bn_add, bn_mul, final_output);
				134	ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input1, input2, bn_mul, bn_add, add_output, final_output, policy, act_info));
				135
				136	const auto uk = CpuAddMulAddKernel::get_implementation<DataTypeISASelectorData>(DataTypeISASelectorData{ input1->data_type(), CPUInfo::get().get_isa() });
				137	ARM_COMPUTE_ERROR_ON_NULLPTR(uk);
				138	ARM_COMPUTE_ERROR_ON(uk->ukernel == nullptr);
				139
				140	_policy = policy;
				141	_act_info = act_info;
				142	_run_method = uk->ukernel;
				143	_name = std::string("CpuAddMulAddKernel/").append(uk->name);
				144
				145	// Auto initialize outputs if not initialized
				146	set_shape_if_empty(*final_output, input1->tensor_shape());
				147	set_data_type_if_unknown(*final_output, input1->data_type());
				148
				149	if(add_output != nullptr)
				150	{
				151	set_shape_if_empty(*add_output, input1->tensor_shape());
				152	set_data_type_if_unknown(*add_output, input1->data_type());
				153	}
				154
				155	// Configure kernel window
				156	Window win;
				157	win = calculate_max_window(*final_output, Steps());
				158	ICpuKernel::configure(win);
				159	}
				160
				161	Status CpuAddMulAddKernel::validate(const ITensorInfo input1, const ITensorInfo input2,
				162	const ITensorInfo bn_mul, const ITensorInfo bn_add,
				163	const ITensorInfo add_output, const ITensorInfo final_output,
				164	ConvertPolicy policy, const ActivationLayerInfo &act_info)
				165	{
				166	ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input1, input2, bn_mul, bn_add, final_output);
				167
				168	ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input1, input2, bn_mul, bn_add, add_output, final_output, policy, act_info));
				169
				170	return Status{};
				171	}
				172
				173	void CpuAddMulAddKernel::run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info)
				174	{
				175	ARM_COMPUTE_UNUSED(info);
				176
				177	ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
				178	ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICpuKernel::window(), window);
				179
				180	ARM_COMPUTE_ERROR_ON(tensors.empty());
				181	ARM_COMPUTE_ERROR_ON(_run_method == nullptr);
				182
				183	const ITensor *input1 = tensors.get_const_tensor(TensorType::ACL_SRC_0);
				184	const ITensor *input2 = tensors.get_const_tensor(TensorType::ACL_SRC_1);
				185	const ITensor *bn_mul = tensors.get_const_tensor(TensorType::ACL_SRC_2);
				186	const ITensor *bn_add = tensors.get_const_tensor(TensorType::ACL_SRC_3);
				187	ITensor *add_output = tensors.get_tensor(TensorType::ACL_DST_0);
				188	ITensor *final_output = tensors.get_tensor(TensorType::ACL_DST_1);
				189
				190	_run_method(input1, input2, bn_mul, bn_add, add_output, final_output, _policy, _act_info, window);
				191	}
				192
				193	const char *CpuAddMulAddKernel::name() const
				194	{
				195	return _name.c_str();
				196	}
				197
				198	const std::vector<CpuAddMulAddKernel::AddMulAddKernel> &CpuAddMulAddKernel::get_available_kernels()
				199	{
				200	return available_kernels;
				201	}
				202	} // namespace kernels
				203	} // namespace cpu
				204	} // namespace arm_compute