Blame - src/core/NEON/kernels/NEGEMMMatrixAdditionKernel.cpp - ml/ComputeLibrary

blob: 6a2802a991f769c09a7f98505a9bf1f9763a1a2a [file] [log] [blame]

Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	1	/*
Georgios Pinitas	ddb93bb	2020-10-02 16:38:59 +0100	[diff] [blame]	2	* Copyright (c) 2016-2020 Arm Limited.
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	3	*
				4	* SPDX-License-Identifier: MIT
				5	*
				6	* Permission is hereby granted, free of charge, to any person obtaining a copy
				7	* of this software and associated documentation files (the "Software"), to
				8	* deal in the Software without restriction, including without limitation the
				9	* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
				10	* sell copies of the Software, and to permit persons to whom the Software is
				11	* furnished to do so, subject to the following conditions:
				12	*
				13	* The above copyright notice and this permission notice shall be included in all
				14	* copies or substantial portions of the Software.
				15	*
				16	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
				17	* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
				18	* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
				19	* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
				20	* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
				21	* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
				22	* SOFTWARE.
				23	*/
Michalis Spyrou	ebcebf1	2020-10-21 00:04:14 +0100	[diff] [blame]	24	#include "src/core/NEON/kernels/NEGEMMMatrixAdditionKernel.h"
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	25
				26	#include "arm_compute/core/Error.h"
				27	#include "arm_compute/core/Helpers.h"
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	28	#include "arm_compute/core/Types.h"
				29	#include "arm_compute/core/Validate.h"
Sang-Hoon Park	68dd25f	2020-10-19 16:00:11 +0100	[diff] [blame]	30	#include "src/core/CPP/Validate.h"
Georgios Pinitas	ddb93bb	2020-10-02 16:38:59 +0100	[diff] [blame]	31	#include "src/core/NEON/NEFixedPoint.h"
Sang-Hoon Park	68dd25f	2020-10-19 16:00:11 +0100	[diff] [blame]	32	#include "src/core/helpers/AutoConfiguration.h"
				33	#include "src/core/helpers/WindowHelpers.h"
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	34
				35	#include <arm_neon.h>
				36
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	37	namespace arm_compute
				38	{
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	39	namespace
				40	{
Michele Di Giorgio	4646d2e	2019-06-19 12:28:47 +0100	[diff] [blame]	41	constexpr unsigned int num_elems_processed_per_iteration = 16;
				42
Georgios Pinitas	ea9e0dc	2018-08-28 16:24:56 +0100	[diff] [blame]	43	Status validate_arguments(const ITensorInfo input, const ITensorInfo output, float beta)
				44	{
				45	ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output);
				46	ARM_COMPUTE_UNUSED(beta);
				47
				48	ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(input);
				49	ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16, DataType::F32);
				50
				51	if(output->total_size() > 0)
				52	{
				53	ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
				54	ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(input, output);
				55	}
				56
				57	return Status{};
				58	}
				59
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	60	void matrix_addition_f32(const ITensor input, ITensor output, const Window &window, float beta)
				61	{
				62	const float32x4_t beta_f32 = vdupq_n_f32(beta);
				63
				64	Iterator in(input, window);
				65	Iterator out(output, window);
				66
Michalis Spyrou	a4f378d	2019-04-26 14:54:54 +0100	[diff] [blame]	67	execute_window_loop(window, [&](const Coordinates &)
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	68	{
				69	const auto in_ptr = reinterpret_cast<const float *>(in.ptr());
				70	const auto out_ptr = reinterpret_cast<float *>(out.ptr());
				71
Pablo Tello	221f381	2017-06-28 17:27:56 +0100	[diff] [blame]	72	float32x4x4_t alpha_ab = vld4q_f32(out_ptr);
				73	const float32x4x4_t c = vld4q_f32(in_ptr);
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	74
				75	// Multiply matrix C by its weight and accumulate
				76	alpha_ab.val[0] = vmlaq_f32(alpha_ab.val[0], c.val[0], beta_f32);
				77	alpha_ab.val[1] = vmlaq_f32(alpha_ab.val[1], c.val[1], beta_f32);
				78	alpha_ab.val[2] = vmlaq_f32(alpha_ab.val[2], c.val[2], beta_f32);
				79	alpha_ab.val[3] = vmlaq_f32(alpha_ab.val[3], c.val[3], beta_f32);
				80
Pablo Tello	221f381	2017-06-28 17:27:56 +0100	[diff] [blame]	81	vst4q_f32(out_ptr, alpha_ab);
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	82	},
				83	in, out);
				84	}
				85
Ioan-Cristian Szabo	5edbd1c	2017-11-13 13:34:08 +0000	[diff] [blame]	86	#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	87	void matrix_addition_f16(const ITensor input, ITensor output, const Window &window, float beta)
				88	{
				89	const float16x8_t beta_f16 = vdupq_n_f16(beta);
				90
				91	Iterator in(input, window);
				92	Iterator out(output, window);
				93
Michalis Spyrou	a4f378d	2019-04-26 14:54:54 +0100	[diff] [blame]	94	execute_window_loop(window, [&](const Coordinates &)
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	95	{
				96	const auto in_ptr = reinterpret_cast<const float16_t *>(in.ptr());
				97	const auto out_ptr = reinterpret_cast<float16_t *>(out.ptr());
				98
Pablo Tello	221f381	2017-06-28 17:27:56 +0100	[diff] [blame]	99	float16x8x2_t alpha_ab = vld2q_f16(out_ptr);
				100	const float16x8x2_t c = vld2q_f16(in_ptr);
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	101	// Multiply matrix C by its weight and accumulate
				102	alpha_ab.val[0] = vaddq_f16(alpha_ab.val[0], vmulq_f16(c.val[0], beta_f16));
				103	alpha_ab.val[1] = vaddq_f16(alpha_ab.val[1], vmulq_f16(c.val[1], beta_f16));
				104
Pablo Tello	221f381	2017-06-28 17:27:56 +0100	[diff] [blame]	105	vst2q_f16(out_ptr + 0, alpha_ab);
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	106	},
				107	in, out);
				108	}
Ioan-Cristian Szabo	5edbd1c	2017-11-13 13:34:08 +0000	[diff] [blame]	109	#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	110
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	111	} // namespace
				112
				113	NEGEMMMatrixAdditionKernel::NEGEMMMatrixAdditionKernel()
				114	: INESimpleKernel(), _func(nullptr), _beta(0.0f)
				115	{
				116	}
				117
				118	void NEGEMMMatrixAdditionKernel::configure(const ITensor input, ITensor output, float beta)
				119	{
Georgios Pinitas	ea9e0dc	2018-08-28 16:24:56 +0100	[diff] [blame]	120	ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
				121
				122	// Perform validation step
				123	ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), output->info(), beta));
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	124
				125	switch(input->info()->data_type())
				126	{
				127	case DataType::F32:
				128	_func = &matrix_addition_f32;
				129	break;
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	130	case DataType::F16:
Ioan-Cristian Szabo	5edbd1c	2017-11-13 13:34:08 +0000	[diff] [blame]	131	#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	132	_func = &matrix_addition_f16;
				133	break;
Ioan-Cristian Szabo	5edbd1c	2017-11-13 13:34:08 +0000	[diff] [blame]	134	#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	135	default:
				136	ARM_COMPUTE_ERROR("Data type not supported");
				137	break;
				138	}
				139
Georgios Pinitas	ea9e0dc	2018-08-28 16:24:56 +0100	[diff] [blame]	140	// Configure kernel window
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	141	INESimpleKernel::configure(input, output, num_elems_processed_per_iteration);
				142
				143	_beta = beta;
				144	}
				145
Georgios Pinitas	ea9e0dc	2018-08-28 16:24:56 +0100	[diff] [blame]	146	Status NEGEMMMatrixAdditionKernel::validate(const ITensorInfo input, const ITensorInfo output, float beta)
				147	{
Georgios Pinitas	ea9e0dc	2018-08-28 16:24:56 +0100	[diff] [blame]	148	ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, output, beta));
				149	ARM_COMPUTE_RETURN_ON_ERROR(INESimpleKernel::validate(input->clone().get(), output->clone().get(), num_elems_processed_per_iteration));
				150	return Status{};
				151	}
				152
Moritz Pflanzer	c186b57	2017-09-07 09:48:04 +0100	[diff] [blame]	153	void NEGEMMMatrixAdditionKernel::run(const Window &window, const ThreadInfo &info)
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	154	{
Moritz Pflanzer	c186b57	2017-09-07 09:48:04 +0100	[diff] [blame]	155	ARM_COMPUTE_UNUSED(info);
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	156	ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
				157	ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INESimpleKernel::window(), window);
				158
				159	if(_beta != 0.0f)
				160	{
				161	(*_func)(_input, _output, window, _beta);
				162	}
				163	}
Georgios Pinitas	ea9e0dc	2018-08-28 16:24:56 +0100	[diff] [blame]	164	} // namespace arm_compute