Blame - src/core/NEON/kernels/NEGEMMMatrixAdditionKernel.cpp - ml/ComputeLibrary

blob: 86bea849e4c2ef0aba18e189b0856694daefa001 [file] [log] [blame]

Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	1	/*
Michalis Spyrou	a4f378d	2019-04-26 14:54:54 +0100	[diff] [blame]	2	* Copyright (c) 2016-2019 ARM Limited.
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	3	*
				4	* SPDX-License-Identifier: MIT
				5	*
				6	* Permission is hereby granted, free of charge, to any person obtaining a copy
				7	* of this software and associated documentation files (the "Software"), to
				8	* deal in the Software without restriction, including without limitation the
				9	* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
				10	* sell copies of the Software, and to permit persons to whom the Software is
				11	* furnished to do so, subject to the following conditions:
				12	*
				13	* The above copyright notice and this permission notice shall be included in all
				14	* copies or substantial portions of the Software.
				15	*
				16	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
				17	* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
				18	* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
				19	* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
				20	* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
				21	* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
				22	* SOFTWARE.
				23	*/
				24	#include "arm_compute/core/NEON/kernels/NEGEMMMatrixAdditionKernel.h"
				25
Anthony Barbier	eaefd00	2018-07-20 17:49:35 +0100	[diff] [blame]	26	#include "arm_compute/core/CPP/Validate.h"
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	27	#include "arm_compute/core/Error.h"
				28	#include "arm_compute/core/Helpers.h"
				29	#include "arm_compute/core/NEON/NEFixedPoint.h"
				30	#include "arm_compute/core/Types.h"
				31	#include "arm_compute/core/Validate.h"
				32
				33	#include <arm_neon.h>
				34
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	35	namespace arm_compute
				36	{
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	37	namespace
				38	{
Georgios Pinitas	ea9e0dc	2018-08-28 16:24:56 +0100	[diff] [blame]	39	Status validate_arguments(const ITensorInfo input, const ITensorInfo output, float beta)
				40	{
				41	ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output);
				42	ARM_COMPUTE_UNUSED(beta);
				43
				44	ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(input);
				45	ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16, DataType::F32);
				46
				47	if(output->total_size() > 0)
				48	{
				49	ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
				50	ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(input, output);
				51	}
				52
				53	return Status{};
				54	}
				55
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	56	void matrix_addition_f32(const ITensor input, ITensor output, const Window &window, float beta)
				57	{
				58	const float32x4_t beta_f32 = vdupq_n_f32(beta);
				59
				60	Iterator in(input, window);
				61	Iterator out(output, window);
				62
Michalis Spyrou	a4f378d	2019-04-26 14:54:54 +0100	[diff] [blame]	63	execute_window_loop(window, [&](const Coordinates &)
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	64	{
				65	const auto in_ptr = reinterpret_cast<const float *>(in.ptr());
				66	const auto out_ptr = reinterpret_cast<float *>(out.ptr());
				67
Pablo Tello	221f381	2017-06-28 17:27:56 +0100	[diff] [blame]	68	float32x4x4_t alpha_ab = vld4q_f32(out_ptr);
				69	const float32x4x4_t c = vld4q_f32(in_ptr);
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	70
				71	// Multiply matrix C by its weight and accumulate
				72	alpha_ab.val[0] = vmlaq_f32(alpha_ab.val[0], c.val[0], beta_f32);
				73	alpha_ab.val[1] = vmlaq_f32(alpha_ab.val[1], c.val[1], beta_f32);
				74	alpha_ab.val[2] = vmlaq_f32(alpha_ab.val[2], c.val[2], beta_f32);
				75	alpha_ab.val[3] = vmlaq_f32(alpha_ab.val[3], c.val[3], beta_f32);
				76
Pablo Tello	221f381	2017-06-28 17:27:56 +0100	[diff] [blame]	77	vst4q_f32(out_ptr, alpha_ab);
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	78	},
				79	in, out);
				80	}
				81
Ioan-Cristian Szabo	5edbd1c	2017-11-13 13:34:08 +0000	[diff] [blame]	82	#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	83	void matrix_addition_f16(const ITensor input, ITensor output, const Window &window, float beta)
				84	{
				85	const float16x8_t beta_f16 = vdupq_n_f16(beta);
				86
				87	Iterator in(input, window);
				88	Iterator out(output, window);
				89
Michalis Spyrou	a4f378d	2019-04-26 14:54:54 +0100	[diff] [blame]	90	execute_window_loop(window, [&](const Coordinates &)
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	91	{
				92	const auto in_ptr = reinterpret_cast<const float16_t *>(in.ptr());
				93	const auto out_ptr = reinterpret_cast<float16_t *>(out.ptr());
				94
Pablo Tello	221f381	2017-06-28 17:27:56 +0100	[diff] [blame]	95	float16x8x2_t alpha_ab = vld2q_f16(out_ptr);
				96	const float16x8x2_t c = vld2q_f16(in_ptr);
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	97	// Multiply matrix C by its weight and accumulate
				98	alpha_ab.val[0] = vaddq_f16(alpha_ab.val[0], vmulq_f16(c.val[0], beta_f16));
				99	alpha_ab.val[1] = vaddq_f16(alpha_ab.val[1], vmulq_f16(c.val[1], beta_f16));
				100
Pablo Tello	221f381	2017-06-28 17:27:56 +0100	[diff] [blame]	101	vst2q_f16(out_ptr + 0, alpha_ab);
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	102	},
				103	in, out);
				104	}
Ioan-Cristian Szabo	5edbd1c	2017-11-13 13:34:08 +0000	[diff] [blame]	105	#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	106
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	107	} // namespace
				108
				109	NEGEMMMatrixAdditionKernel::NEGEMMMatrixAdditionKernel()
				110	: INESimpleKernel(), _func(nullptr), _beta(0.0f)
				111	{
				112	}
				113
				114	void NEGEMMMatrixAdditionKernel::configure(const ITensor input, ITensor output, float beta)
				115	{
Georgios Pinitas	ea9e0dc	2018-08-28 16:24:56 +0100	[diff] [blame]	116	ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
				117
				118	// Perform validation step
				119	ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), output->info(), beta));
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	120
				121	switch(input->info()->data_type())
				122	{
				123	case DataType::F32:
				124	_func = &matrix_addition_f32;
				125	break;
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	126	case DataType::F16:
Ioan-Cristian Szabo	5edbd1c	2017-11-13 13:34:08 +0000	[diff] [blame]	127	#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	128	_func = &matrix_addition_f16;
				129	break;
Ioan-Cristian Szabo	5edbd1c	2017-11-13 13:34:08 +0000	[diff] [blame]	130	#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	131	default:
				132	ARM_COMPUTE_ERROR("Data type not supported");
				133	break;
				134	}
				135
Georgios Pinitas	ea9e0dc	2018-08-28 16:24:56 +0100	[diff] [blame]	136	// Configure kernel window
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	137	constexpr unsigned int num_elems_processed_per_iteration = 16;
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	138	INESimpleKernel::configure(input, output, num_elems_processed_per_iteration);
				139
				140	_beta = beta;
				141	}
				142
Georgios Pinitas	ea9e0dc	2018-08-28 16:24:56 +0100	[diff] [blame]	143	Status NEGEMMMatrixAdditionKernel::validate(const ITensorInfo input, const ITensorInfo output, float beta)
				144	{
				145	constexpr unsigned int num_elems_processed_per_iteration = 16;
				146	ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, output, beta));
				147	ARM_COMPUTE_RETURN_ON_ERROR(INESimpleKernel::validate(input->clone().get(), output->clone().get(), num_elems_processed_per_iteration));
				148	return Status{};
				149	}
				150
Moritz Pflanzer	c186b57	2017-09-07 09:48:04 +0100	[diff] [blame]	151	void NEGEMMMatrixAdditionKernel::run(const Window &window, const ThreadInfo &info)
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	152	{
Moritz Pflanzer	c186b57	2017-09-07 09:48:04 +0100	[diff] [blame]	153	ARM_COMPUTE_UNUSED(info);
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	154	ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
				155	ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INESimpleKernel::window(), window);
				156
				157	if(_beta != 0.0f)
				158	{
				159	(*_func)(_input, _output, window, _beta);
				160	}
				161	}
Georgios Pinitas	ea9e0dc	2018-08-28 16:24:56 +0100	[diff] [blame]	162	} // namespace arm_compute