Blame - arm_compute/core/CL/kernels/CLSoftmaxLayerKernel.h - ml/ComputeLibrary

blob: 675c462c95d73545f1a2c36b71e519a5f3adbd6c [file] [log] [blame]

Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	1	/*
				2	* Copyright (c) 2017 ARM Limited.
				3	*
				4	* SPDX-License-Identifier: MIT
				5	*
				6	* Permission is hereby granted, free of charge, to any person obtaining a copy
				7	* of this software and associated documentation files (the "Software"), to
				8	* deal in the Software without restriction, including without limitation the
				9	* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
				10	* sell copies of the Software, and to permit persons to whom the Software is
				11	* furnished to do so, subject to the following conditions:
				12	*
				13	* The above copyright notice and this permission notice shall be included in all
				14	* copies or substantial portions of the Software.
				15	*
				16	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
				17	* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
				18	* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
				19	* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
				20	* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
				21	* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
				22	* SOFTWARE.
				23	*/
				24	#ifndef __ARM_COMPUTE_CLSOFTMAXLAYERKERNEL_H__
				25	#define __ARM_COMPUTE_CLSOFTMAXLAYERKERNEL_H__
				26
steniu01	0d523cc	2017-07-13 14:24:23 +0100	[diff] [blame]	27	#include "arm_compute/core/CL/ICLSimple3DKernel.h"
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	28
Chunosov	d6afedc	2017-11-06 22:09:45 +0700	[diff] [blame^]	29	#include <tuple>
				30
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	31	namespace arm_compute
				32	{
				33	class ICLTensor;
				34
				35	/** Interface for the identifying the max value of 1D Logits */
steniu01	0d523cc	2017-07-13 14:24:23 +0100	[diff] [blame]	36	class CLLogits1DMaxKernel : public ICLSimple3DKernel
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	37	{
				38	public:
				39	/** Set the input and output tensors.
				40	*
Georgios Pinitas	0979675	2017-07-10 16:05:21 +0100	[diff] [blame]	41	* @param[in] input Source tensor. Data types supported: QS8/QS16/F16/F32
Georgios Pinitas	e5f8fd6	2017-06-23 18:03:44 +0100	[diff] [blame]	42	* @param[out] output Destination tensor. Data types supported: same as @p input
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	43	*/
				44	void configure(const ICLTensor input, ICLTensor output);
				45	};
				46
Chunosov	d6afedc	2017-11-06 22:09:45 +0700	[diff] [blame^]	47	/** Interface for shifting, exponentiating and summing the logits */
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	48	class CLLogits1DShiftExpSumKernel : public ICLKernel
				49	{
				50	public:
				51	/** Default constructor */
				52	CLLogits1DShiftExpSumKernel();
				53	/** Prevent instances of this class from being copied (As this class contains pointers) */
				54	CLLogits1DShiftExpSumKernel(const CLLogits1DShiftExpSumKernel &) = delete;
				55	/** Prevent instances of this class from being copied (As this class contains pointers) */
				56	CLLogits1DShiftExpSumKernel &operator=(const CLLogits1DShiftExpSumKernel &) = delete;
				57	/** Allow instances of this class to be moved */
				58	CLLogits1DShiftExpSumKernel(CLLogits1DShiftExpSumKernel &&) = default;
				59	/** Allow instances of this class to be moved */
				60	CLLogits1DShiftExpSumKernel &operator=(CLLogits1DShiftExpSumKernel &&) = default;
				61	/** Set the input and output tensors.
				62	*
Georgios Pinitas	0979675	2017-07-10 16:05:21 +0100	[diff] [blame]	63	* @param[in] input Source tensor. Data types supported: QS8/QS16/F16/F32
Georgios Pinitas	e5f8fd6	2017-06-23 18:03:44 +0100	[diff] [blame]	64	* @param[in] max Max values tensor. Data types supported: same as @p input
				65	* @param[out] output Destination tensor. Data types supported: same as @p input
				66	* @param[out] sum Sum of 1D logits tensor. Data types supported: same as @p input
Chunosov	d6afedc	2017-11-06 22:09:45 +0700	[diff] [blame^]	67	* @param[in] beta (Optional) A scaling factor for the exponent. Defaults to 1.f
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	68	*/
Pablo Palmier	48a60f9	2017-10-18 11:03:08 +0100	[diff] [blame]	69	void configure(const ICLTensor input, const ICLTensor max, ICLTensor output, ICLTensor sum, float beta = 1.0f);
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	70
				71	// Inherited methods overridden:
				72	void run(const Window &window, cl::CommandQueue &queue) override;
				73
				74	private:
				75	const ICLTensor *_input;
				76	const ICLTensor *_max;
				77	ICLTensor *_output;
				78	ICLTensor *_sum;
				79	};
				80
Chunosov	d6afedc	2017-11-06 22:09:45 +0700	[diff] [blame^]	81	/** Interface for max, shifting, exponentiating and summing the logits */
				82	class CLLogits1DMaxShiftExpSumKernel : public ICLKernel
				83	{
				84	public:
				85	using ParallelReductionInfo = std::tuple<bool, unsigned int>;
				86
				87	public:
				88	/** Default constructor */
				89	CLLogits1DMaxShiftExpSumKernel();
				90	/** Prevent instances of this class from being copied (As this class contains pointers) */
				91	CLLogits1DMaxShiftExpSumKernel(const CLLogits1DMaxShiftExpSumKernel &) = delete;
				92	/** Prevent instances of this class from being copied (As this class contains pointers) */
				93	CLLogits1DMaxShiftExpSumKernel &operator=(const CLLogits1DMaxShiftExpSumKernel &) = delete;
				94	/** Allow instances of this class to be moved */
				95	CLLogits1DMaxShiftExpSumKernel(CLLogits1DMaxShiftExpSumKernel &&) = default;
				96	/** Allow instances of this class to be moved */
				97	CLLogits1DMaxShiftExpSumKernel &operator=(CLLogits1DMaxShiftExpSumKernel &&) = default;
				98	/** Set the input and output tensors.
				99	*
				100	* @param[in] input Source tensor. Data types supported: QS8/QS16/F16/F32
				101	* @param[in,out] max Max values tensor. Data types supported: same as @p input
				102	* @param[out] output Destination tensor. Data types supported: same as @p input
				103	* @param[out] sum Sum of 1D logits tensor. Data types supported: same as @p input
				104	* @param[in] beta (Optional) A scaling factor for the exponent. Defaults to 1.f
				105	*/
				106	void configure(const ICLTensor input, ICLTensor max, ICLTensor output, ICLTensor sum, float beta = 1.0f);
				107	/** Checks if the given size is eligible for parallel reduction
				108	*
				109	* @note Serial reduction is launched for width < (_grid_size * _serial_vector_size).
				110	* @note Parallel reduction is launched for width >= (_grid_size * _serial_vector_size) and vector_size is forced to 4.
				111	*
				112	* @param[in] size Size to check
				113	*
				114	* @return A two-element tuple where the first element is a boolean specifying is a parallel reduction will be run,
				115	* while the second elements is the vector size of the execution.
				116	*/
				117	static ParallelReductionInfo is_parallel_reduction(size_t size);
				118
				119	// Inherited methods overridden:
				120	void run(const Window &window, cl::CommandQueue &queue) override;
				121
				122	private:
				123	const ICLTensor *_input;
				124	ICLTensor *_max;
				125	ICLTensor *_output;
				126	ICLTensor *_sum;
				127
				128	private:
				129	static const unsigned int _grid_size;
				130	static const unsigned int _serial_vector_size;
				131	static const unsigned int _parallel_vector_size;
				132	};
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	133	/** Interface for calculating the final step of the Softmax Layer where each logit value is multiplied by the inverse of the sum of the logits. */
				134	class CLLogits1DNormKernel : public ICLKernel
				135	{
				136	public:
				137	/** Default constructor */
				138	CLLogits1DNormKernel();
				139	/** Prevent instances of this class from being copied (As this class contains pointers) */
				140	CLLogits1DNormKernel(const CLLogits1DNormKernel &) = delete;
				141	/** Prevent instances of this class from being copied (As this class contains pointers) */
				142	CLLogits1DNormKernel &operator=(const CLLogits1DNormKernel &) = delete;
				143	/** Allow instances of this class to be moved */
				144	CLLogits1DNormKernel(CLLogits1DNormKernel &&) = default;
				145	/** Allow instances of this class to be moved */
				146	CLLogits1DNormKernel &operator=(CLLogits1DNormKernel &&) = default;
				147	/** Set the input and output tensors.
				148	*
Georgios Pinitas	0979675	2017-07-10 16:05:21 +0100	[diff] [blame]	149	* @param[in] input Source tensor. Data types supported: QS8/QS16/F16/F32
Georgios Pinitas	e5f8fd6	2017-06-23 18:03:44 +0100	[diff] [blame]	150	* @param[in] sum Sum tensor. Dimensions should be dim(input)-1. Data types supported: same as @p input
				151	* @param[out] output Destination tensor. Data types supported: same as @p input
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	152	*/
				153	void configure(const ICLTensor input, const ICLTensor sum, ICLTensor *output);
				154
				155	// Inherited methods overridden:
				156	void run(const Window &window, cl::CommandQueue &queue) override;
				157
				158	private:
				159	const ICLTensor *_input;
				160	const ICLTensor *_sum;
				161	ICLTensor *_output;
				162	};
Gian Marco Iodice	f670a0a	2017-09-18 12:20:45 +0100	[diff] [blame]	163	} // namespace arm_compute
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	164	#endif /__ARM_COMPUTE_CLSOFTMAXLAYERKERNEL_H__ /