Blame - arm_compute/core/NEON/kernels/NEAccumulateKernel.h - ml/ComputeLibrary

blob: 12be3e4e7a711c2e45b15b86e892af0d04f63a6b [file] [log] [blame]

Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	1	/*
				2	* Copyright (c) 2016, 2017 ARM Limited.
				3	*
				4	* SPDX-License-Identifier: MIT
				5	*
				6	* Permission is hereby granted, free of charge, to any person obtaining a copy
				7	* of this software and associated documentation files (the "Software"), to
				8	* deal in the Software without restriction, including without limitation the
				9	* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
				10	* sell copies of the Software, and to permit persons to whom the Software is
				11	* furnished to do so, subject to the following conditions:
				12	*
				13	* The above copyright notice and this permission notice shall be included in all
				14	* copies or substantial portions of the Software.
				15	*
				16	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
				17	* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
				18	* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
				19	* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
				20	* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
				21	* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
				22	* SOFTWARE.
				23	*/
				24	#ifndef __ARM_COMPUTE_NEACCUMULATEKERNEL_H__
				25	#define __ARM_COMPUTE_NEACCUMULATEKERNEL_H__
				26
				27	#include "arm_compute/core/NEON/INESimpleKernel.h"
				28
				29	#include <cstdint>
				30
				31	namespace arm_compute
				32	{
				33	class ITensor;
				34
				35	/** Interface for the accumulate kernel
				36	*
				37	* Accumulation is computed by:
				38	* @f[ accum(x,y) = accum(x,y) + input(x,y) @f]
				39	*/
				40	class NEAccumulateKernel : public INESimpleKernel
				41	{
				42	public:
				43	/** Set the input and accumulation tensors
				44	*
				45	* @param[in] input Source tensor. Data type supported: U8.
				46	* @param[out] accum Destination tensor. Data type supported: S16.
				47	*/
				48	void configure(const ITensor input, ITensor accum);
				49
				50	// Inherited methods overridden:
				51	void run(const Window &window) override;
				52	};
				53
				54	/** Interface for the accumulate weighted kernel
				55	*
				56	* Weighted accumulation is computed:
				57	* @f[ accum(x,y) = (1 - \alpha)accum(x,y) + \alphainput(x,y) @f]
				58	*
				59	* Where @f$ 0 \le \alpha \le 1 @f$
				60	* Conceptually, the rounding for this is defined as:
				61	* @f[ output(x,y)= uint8( (1 - \alpha) * float32( int32( output(x,y) ) ) + \alpha * float32( int32( input(x,y) ) ) ) @f]
				62	*/
				63	class NEAccumulateWeightedKernel : public INESimpleKernel
				64	{
				65	public:
				66	/** Default constructor */
				67	NEAccumulateWeightedKernel();
				68	/** Set the input and accumulation tensors, and the scale value
				69	*
				70	* @param[in] input Source tensor. Data type supported: U8.
				71	* @param[in] alpha Scalar value in the range [0.0f, 1.0f]
				72	* @param[in,out] accum Accumulated tensor. Data type supported: U8.
				73	*/
				74	void configure(const ITensor input, float alpha, ITensor accum);
				75
				76	// Inherited methods overridden:
				77	void run(const Window &window) override;
				78
				79	protected:
				80	float _alpha;
				81	};
				82
				83	#ifdef ARM_COMPUTE_ENABLE_FP16
				84	/** Interface for the accumulate weighted kernel using F16 */
				85	class NEAccumulateWeightedFP16Kernel : public NEAccumulateWeightedKernel
				86	{
				87	public:
				88	// Inherited methods overridden:
				89	void run(const Window &window) override;
				90	};
Anthony Barbier	ac69aa1	2017-07-03 17:39:37 +0100	[diff] [blame]	91	#else /* ARM_COMPUTE_ENABLE_FP16 */
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	92	using NEAccumulateWeightedFP16Kernel = NEAccumulateWeightedKernel;
Anthony Barbier	ac69aa1	2017-07-03 17:39:37 +0100	[diff] [blame]	93	#endif /* ARM_COMPUTE_ENABLE_FP16 */
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	94
				95	/** Interface for the accumulate squared kernel
				96	*
				97	* The accumulation of squares is computed:
				98	* @f[ accum(x,y) = saturate_{int16} ( (uint16) accum(x,y) + (((uint16)(input(x,y)^2)) >> (shift)) ) @f]
				99	*
				100	* Where @f$ 0 \le shift \le 15 @f$
				101	*/
				102	class NEAccumulateSquaredKernel : public INESimpleKernel
				103	{
				104	public:
				105	/** Default constructor */
				106	NEAccumulateSquaredKernel();
				107	/** Set the input and accumulation tensors and the shift value.
				108	*
				109	* @param[in] input Source tensor. Data type supported: U8.
				110	* @param[in] shift Shift value in the range of [0, 15]
				111	* @param[in,out] accum Accumulated tensor. Data type supported: S16.
				112	*/
				113	void configure(const ITensor input, uint32_t shift, ITensor accum);
				114
				115	// Inherited methods overridden:
				116	void run(const Window &window) override;
				117
				118	private:
				119	uint32_t _shift;
				120	};
				121	}
				122	#endif /__ARM_COMPUTE_NEACCUMULATEKERNEL_H__ /