blob: 675c462c95d73545f1a2c36b71e519a5f3adbd6c [file] [log] [blame]
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001/*
2 * Copyright (c) 2017 ARM Limited.
3 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24#ifndef __ARM_COMPUTE_CLSOFTMAXLAYERKERNEL_H__
25#define __ARM_COMPUTE_CLSOFTMAXLAYERKERNEL_H__
26
steniu010d523cc2017-07-13 14:24:23 +010027#include "arm_compute/core/CL/ICLSimple3DKernel.h"
Anthony Barbier6ff3b192017-09-04 18:44:23 +010028
Chunosovd6afedc2017-11-06 22:09:45 +070029#include <tuple>
30
Anthony Barbier6ff3b192017-09-04 18:44:23 +010031namespace arm_compute
32{
33class ICLTensor;
34
35/** Interface for the identifying the max value of 1D Logits */
steniu010d523cc2017-07-13 14:24:23 +010036class CLLogits1DMaxKernel : public ICLSimple3DKernel
Anthony Barbier6ff3b192017-09-04 18:44:23 +010037{
38public:
39 /** Set the input and output tensors.
40 *
Georgios Pinitas09796752017-07-10 16:05:21 +010041 * @param[in] input Source tensor. Data types supported: QS8/QS16/F16/F32
Georgios Pinitase5f8fd62017-06-23 18:03:44 +010042 * @param[out] output Destination tensor. Data types supported: same as @p input
Anthony Barbier6ff3b192017-09-04 18:44:23 +010043 */
44 void configure(const ICLTensor *input, ICLTensor *output);
45};
46
Chunosovd6afedc2017-11-06 22:09:45 +070047/** Interface for shifting, exponentiating and summing the logits */
Anthony Barbier6ff3b192017-09-04 18:44:23 +010048class CLLogits1DShiftExpSumKernel : public ICLKernel
49{
50public:
51 /** Default constructor */
52 CLLogits1DShiftExpSumKernel();
53 /** Prevent instances of this class from being copied (As this class contains pointers) */
54 CLLogits1DShiftExpSumKernel(const CLLogits1DShiftExpSumKernel &) = delete;
55 /** Prevent instances of this class from being copied (As this class contains pointers) */
56 CLLogits1DShiftExpSumKernel &operator=(const CLLogits1DShiftExpSumKernel &) = delete;
57 /** Allow instances of this class to be moved */
58 CLLogits1DShiftExpSumKernel(CLLogits1DShiftExpSumKernel &&) = default;
59 /** Allow instances of this class to be moved */
60 CLLogits1DShiftExpSumKernel &operator=(CLLogits1DShiftExpSumKernel &&) = default;
61 /** Set the input and output tensors.
62 *
Georgios Pinitas09796752017-07-10 16:05:21 +010063 * @param[in] input Source tensor. Data types supported: QS8/QS16/F16/F32
Georgios Pinitase5f8fd62017-06-23 18:03:44 +010064 * @param[in] max Max values tensor. Data types supported: same as @p input
65 * @param[out] output Destination tensor. Data types supported: same as @p input
66 * @param[out] sum Sum of 1D logits tensor. Data types supported: same as @p input
Chunosovd6afedc2017-11-06 22:09:45 +070067 * @param[in] beta (Optional) A scaling factor for the exponent. Defaults to 1.f
Anthony Barbier6ff3b192017-09-04 18:44:23 +010068 */
Pablo Palmier48a60f92017-10-18 11:03:08 +010069 void configure(const ICLTensor *input, const ICLTensor *max, ICLTensor *output, ICLTensor *sum, float beta = 1.0f);
Anthony Barbier6ff3b192017-09-04 18:44:23 +010070
71 // Inherited methods overridden:
72 void run(const Window &window, cl::CommandQueue &queue) override;
73
74private:
75 const ICLTensor *_input;
76 const ICLTensor *_max;
77 ICLTensor *_output;
78 ICLTensor *_sum;
79};
80
Chunosovd6afedc2017-11-06 22:09:45 +070081/** Interface for max, shifting, exponentiating and summing the logits */
82class CLLogits1DMaxShiftExpSumKernel : public ICLKernel
83{
84public:
85 using ParallelReductionInfo = std::tuple<bool, unsigned int>;
86
87public:
88 /** Default constructor */
89 CLLogits1DMaxShiftExpSumKernel();
90 /** Prevent instances of this class from being copied (As this class contains pointers) */
91 CLLogits1DMaxShiftExpSumKernel(const CLLogits1DMaxShiftExpSumKernel &) = delete;
92 /** Prevent instances of this class from being copied (As this class contains pointers) */
93 CLLogits1DMaxShiftExpSumKernel &operator=(const CLLogits1DMaxShiftExpSumKernel &) = delete;
94 /** Allow instances of this class to be moved */
95 CLLogits1DMaxShiftExpSumKernel(CLLogits1DMaxShiftExpSumKernel &&) = default;
96 /** Allow instances of this class to be moved */
97 CLLogits1DMaxShiftExpSumKernel &operator=(CLLogits1DMaxShiftExpSumKernel &&) = default;
98 /** Set the input and output tensors.
99 *
100 * @param[in] input Source tensor. Data types supported: QS8/QS16/F16/F32
101 * @param[in,out] max Max values tensor. Data types supported: same as @p input
102 * @param[out] output Destination tensor. Data types supported: same as @p input
103 * @param[out] sum Sum of 1D logits tensor. Data types supported: same as @p input
104 * @param[in] beta (Optional) A scaling factor for the exponent. Defaults to 1.f
105 */
106 void configure(const ICLTensor *input, ICLTensor *max, ICLTensor *output, ICLTensor *sum, float beta = 1.0f);
107 /** Checks if the given size is eligible for parallel reduction
108 *
109 * @note Serial reduction is launched for width < (_grid_size * _serial_vector_size).
110 * @note Parallel reduction is launched for width >= (_grid_size * _serial_vector_size) and vector_size is forced to 4.
111 *
112 * @param[in] size Size to check
113 *
114 * @return A two-element tuple where the first element is a boolean specifying is a parallel reduction will be run,
115 * while the second elements is the vector size of the execution.
116 */
117 static ParallelReductionInfo is_parallel_reduction(size_t size);
118
119 // Inherited methods overridden:
120 void run(const Window &window, cl::CommandQueue &queue) override;
121
122private:
123 const ICLTensor *_input;
124 ICLTensor *_max;
125 ICLTensor *_output;
126 ICLTensor *_sum;
127
128private:
129 static const unsigned int _grid_size;
130 static const unsigned int _serial_vector_size;
131 static const unsigned int _parallel_vector_size;
132};
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100133/** Interface for calculating the final step of the Softmax Layer where each logit value is multiplied by the inverse of the sum of the logits. */
134class CLLogits1DNormKernel : public ICLKernel
135{
136public:
137 /** Default constructor */
138 CLLogits1DNormKernel();
139 /** Prevent instances of this class from being copied (As this class contains pointers) */
140 CLLogits1DNormKernel(const CLLogits1DNormKernel &) = delete;
141 /** Prevent instances of this class from being copied (As this class contains pointers) */
142 CLLogits1DNormKernel &operator=(const CLLogits1DNormKernel &) = delete;
143 /** Allow instances of this class to be moved */
144 CLLogits1DNormKernel(CLLogits1DNormKernel &&) = default;
145 /** Allow instances of this class to be moved */
146 CLLogits1DNormKernel &operator=(CLLogits1DNormKernel &&) = default;
147 /** Set the input and output tensors.
148 *
Georgios Pinitas09796752017-07-10 16:05:21 +0100149 * @param[in] input Source tensor. Data types supported: QS8/QS16/F16/F32
Georgios Pinitase5f8fd62017-06-23 18:03:44 +0100150 * @param[in] sum Sum tensor. Dimensions should be dim(input)-1. Data types supported: same as @p input
151 * @param[out] output Destination tensor. Data types supported: same as @p input
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100152 */
153 void configure(const ICLTensor *input, const ICLTensor *sum, ICLTensor *output);
154
155 // Inherited methods overridden:
156 void run(const Window &window, cl::CommandQueue &queue) override;
157
158private:
159 const ICLTensor *_input;
160 const ICLTensor *_sum;
161 ICLTensor *_output;
162};
Gian Marco Iodicef670a0a2017-09-18 12:20:45 +0100163} // namespace arm_compute
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100164#endif /*__ARM_COMPUTE_CLSOFTMAXLAYERKERNEL_H__ */