blob: 184d827d4bd8cf6c3d30d2af01319a3681b8cfaa [file] [log] [blame]
Gian Marco05288a22017-11-21 10:57:50 +00001/*
Giorgio Arena1856ff72020-02-07 13:46:45 +00002 * Copyright (c) 2017-2020 ARM Limited.
Gian Marco05288a22017-11-21 10:57:50 +00003 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
Manuel Bottini1f332d42019-11-29 17:25:25 +000024#ifndef ARM_COMPUTE_CLGEMMLOWPOUTPUTSTAGE_H
25#define ARM_COMPUTE_CLGEMMLOWPOUTPUTSTAGE_H
Gian Marco05288a22017-11-21 10:57:50 +000026
27#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
28
29/** This file contains all available output stages for GEMMLowp on OpenCL.
30 *
31 * In gemmlowp, the "output stage" is the process that takes a final int32 accumulator value (the output of @ref CLGEMMLowpMatrixMultiplyCore),
Manuel Bottini1f332d42019-11-29 17:25:25 +000032 * and processes it to obtain the final QASYMM8/QASYMM8_SIGNED value.
Gian Marco05288a22017-11-21 10:57:50 +000033 *
34 * More information about the GEMMLowp output stage can be found at https://github.com/google/gemmlowp/blob/master/doc/output.md
35 */
36
37namespace arm_compute
38{
39class ITensor;
40
41/** Basic function to execute CLGEMMLowpQuantizeDownInt32ToUint8Scale on OpenCL.
42 *
43 * CLGEMMLowpQuantizeDownInt32ToUint8Scale depends on 3 parameters: result_offset, result_mult_int, result_shift
44 * The final result is:
45 *
46 * ((input[i][k] + result_offset) * result_mult_int) >> result_shift
47 *
48 * In case the bias tensor is provided, the final result is:
49 *
Gian Marco58c57942017-11-28 09:10:03 +000050 * ((input[i][k] + bias[k] + result_offset) * result_mult_int) >> result_shift
Gian Marco05288a22017-11-21 10:57:50 +000051 *
52 * This function calls the following OpenCL kernels:
53 *
Luca Foschiani689c9682020-02-26 14:30:14 +000054 * -# @ref CLGEMMLowpQuantizeDownInt32ScaleKernel
Gian Marco05288a22017-11-21 10:57:50 +000055 *
56 * @note The function accepts also 2 optional input arguments (min and max) which can be used to implement "rectified linear unit" activation functions
Gian Marco58c57942017-11-28 09:10:03 +000057 * after the result is shifted right by result_shift
Gian Marco05288a22017-11-21 10:57:50 +000058*/
59class CLGEMMLowpQuantizeDownInt32ToUint8Scale : public ICLSimpleFunction
60{
61public:
62 /** Initialise the kernel's inputs, output
63 *
64 * @param[in] input Input tensor. It is the output of @ref CLGEMMLowpMatrixMultiplyCore function. Data type supported: S32
65 * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the addition of biases is not required.
66 * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input.
Sheri Zhang0cdbda52020-02-25 15:57:21 +000067 * @param[out] output Output tensor. Data type supported: QASYMM8
Gian Marco05288a22017-11-21 10:57:50 +000068 * @param[in] result_offset Offset to be added to each element of the input matrix
69 * @param[in] result_mult_int Value to be multiplied to each element of the input matrix when once the result_offset has been add
70 * @param[in] result_shift Number of bits to shift right the result before converting back to QASYMM8
Giorgio Arena1856ff72020-02-07 13:46:45 +000071 * @param[in] min (Optional) Min value used to saturate down the output result before converting back to QASYMM8. Defaults to the minimum possible 32-bit signed integer.
Gian Marco05288a22017-11-21 10:57:50 +000072 * @param[in] max (Optional) Max value used to saturate up the output result before converting back to QASYMM8,
Giorgio Arena1856ff72020-02-07 13:46:45 +000073 * Along with @p min, this value can be used to implement "rectified linear unit" activation functions. Defaults to the maximum possible 32-bit signed integer.
Gian Marco05288a22017-11-21 10:57:50 +000074 */
Luca Foschiani689c9682020-02-26 14:30:14 +000075 ARM_COMPUTE_DEPRECATED_REL(20.05)
Giorgio Arena1856ff72020-02-07 13:46:45 +000076 void configure(const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, int result_offset, int result_mult_int, int result_shift, int min = std::numeric_limits<int32_t>::lowest(),
77 int max = std::numeric_limits<int32_t>::max());
Gian Marco58c57942017-11-28 09:10:03 +000078 /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMLowpQuantizeDownInt32ToUint8Scale
79 *
80 * @param[in] input Input tensor. It is the output of @ref CLGEMMLowpMatrixMultiplyCore function. Data type supported: S32
81 * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the addition of biases is not required.
82 * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input.
Sheri Zhang0cdbda52020-02-25 15:57:21 +000083 * @param[in] output Output tensor. Data type supported: QASYMM8
Giorgio Arena1856ff72020-02-07 13:46:45 +000084 * @param[in] min (Optional) Min value used to saturate down the output result before converting back to QASYMM8. Defaults to the minimum possible 32-bit signed integer.
Gian Marco58c57942017-11-28 09:10:03 +000085 * @param[in] max (Optional) Max value used to saturate up the output result before converting back to QASYMM8,
Giorgio Arena1856ff72020-02-07 13:46:45 +000086 * Along with @p min, this value can be used to implement "rectified linear unit" activation functions. Defaults to the maximum possible 32-bit signed integer.
Gian Marco58c57942017-11-28 09:10:03 +000087 *
Georgios Pinitas631c41a2017-12-06 11:53:03 +000088 * @return a status
Gian Marco58c57942017-11-28 09:10:03 +000089 */
Luca Foschiani689c9682020-02-26 14:30:14 +000090 ARM_COMPUTE_DEPRECATED_REL(20.05)
Giorgio Arena1856ff72020-02-07 13:46:45 +000091 static Status validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, int min = std::numeric_limits<int32_t>::lowest(), int max = std::numeric_limits<int32_t>::max());
Gian Marco58c57942017-11-28 09:10:03 +000092};
93
94/** Basic function to execute CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint on OpenCL.
95 *
96 * CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint depends on 3 parameters:
97 *
98 * result_fixedpoint_multiplier, result_shift, result_offset_after_shift
99 *
100 * The final result is:
101 *
102 * (FixedPointMul(input[i][k], result_fixedpoint_multiplier) >> result_shift) + result_offset_after_shift
103 *
104 * where FixedPointMul(x, y) is the nearest integer to the following
105 * mathematical expression, evaluated without overflow or intermediate rounding:
106 *
107 * (x * y) / 2^31
108 *
109 * For more information: https://github.com/google/gemmlowp/blob/master/public/output_stages.h#L68
110 *
111 * In case the bias tensor is provided, the final result is:
112 *
113 * ((FixedPointMul(input[i][k] + bias[k], result_fixedpoint_multiplier)) >> result_shift) + result_offset_after_shift
114 *
115 * This function calls the following OpenCL kernels:
116 *
117 * -# @ref CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel
118 *
119 * @note The function accepts also 2 optional input arguments (min and max) which can be used to implement "rectified linear unit" activation functions
120 * after the result is shifted right by result_shift
121*/
122class CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint : public ICLSimpleFunction
123{
124public:
125 /** Initialise the kernel's inputs, output
126 *
127 * @param[in] input Input tensor. Data type supported: S32
128 * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the biases addition is not required.
129 * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input.
Sheri Zhang0cdbda52020-02-25 15:57:21 +0000130 * @param[out] output Output tensor. Data type supported: QASYMM8
Gian Marco58c57942017-11-28 09:10:03 +0000131 * @param[in] result_fixedpoint_multiplier Fixed point value to be multiplied to each element of the input matrix when once the result_offset has been add
132 * @param[in] result_shift Number of bits to shift right the result after the fixed point multiplication
133 * @param[in] result_offset_after_shift Offset to be applied to result before converting it back to QASYMM8
Giorgio Arena1856ff72020-02-07 13:46:45 +0000134 * @param[in] min (Optional) Min value used to saturate down the output result before converting back to QASYMM8. Defaults to the minimum possible 32-bit signed integer.
Gian Marco58c57942017-11-28 09:10:03 +0000135 * @param[in] max (Optional) Max value used to saturate up the output result before converting back to QASYMM8,
Giorgio Arena1856ff72020-02-07 13:46:45 +0000136 * Along with @p min, this value can be used to implement "rectified linear unit" activation functions. Defaults to the maximum possible 32-bit signed integer.
Gian Marco58c57942017-11-28 09:10:03 +0000137 */
Georgios Pinitas932491f2018-09-21 16:33:15 +0100138 void configure(const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, int result_fixedpoint_multiplier, int result_shift, int result_offset_after_shift,
Giorgio Arena1856ff72020-02-07 13:46:45 +0000139 int min = std::numeric_limits<int32_t>::lowest(), int max = std::numeric_limits<int32_t>::max());
Gian Marco58c57942017-11-28 09:10:03 +0000140 /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint
141 *
Gian Marco Iodice4b908652018-10-18 10:21:02 +0100142 * @param[in] input Input tensor. It is the output of @ref CLGEMMLowpMatrixMultiplyCore function. Data type supported: S32
143 * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the addition of biases is not required.
144 * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input.
Sheri Zhang0cdbda52020-02-25 15:57:21 +0000145 * @param[in] output Output tensor. Data type supported: QASYMM8
Giorgio Arena1856ff72020-02-07 13:46:45 +0000146 * @param[in] min (Optional) Min value used to saturate down the output result before converting back to QASYMM8. Defaults to the minimum possible 32-bit signed integer.
Gian Marco Iodice4b908652018-10-18 10:21:02 +0100147 * @param[in] max (Optional) Max value used to saturate up the output result before converting back to QASYMM8,
Giorgio Arena1856ff72020-02-07 13:46:45 +0000148 * Along with @p min, this value can be used to implement "rectified linear unit" activation functions. Defaults to the maximum possible 32-bit signed integer.
Gian Marco58c57942017-11-28 09:10:03 +0000149 *
Georgios Pinitas631c41a2017-12-06 11:53:03 +0000150 * @return a status
Gian Marco58c57942017-11-28 09:10:03 +0000151 */
Giorgio Arena1856ff72020-02-07 13:46:45 +0000152 static Status validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, int min = std::numeric_limits<int32_t>::lowest(), int max = std::numeric_limits<int32_t>::max());
Gian Marco05288a22017-11-21 10:57:50 +0000153};
Georgios Pinitas51e53a32018-10-22 13:49:08 +0100154
Manuel Bottini1f332d42019-11-29 17:25:25 +0000155/** Basic function to execute CLGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPoint on OpenCL.
156 *
157 * CLGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPoint depends on 3 parameters:
158 *
159 * result_fixedpoint_multiplier, result_shift, result_offset_after_shift
160 *
161 * The final result is:
162 *
163 * (FixedPointMul(input[i][k], result_fixedpoint_multiplier) >> result_shift) + result_offset_after_shift
164 *
165 * where FixedPointMul(x, y) is the nearest integer to the following
166 * mathematical expression, evaluated without overflow or intermediate rounding:
167 *
168 * (x * y) / 2^31
169 *
170 * For more information: https://github.com/google/gemmlowp/blob/master/public/output_stages.h#L68
171 *
172 * In case the bias tensor is provided, the final result is:
173 *
174 * ((FixedPointMul(input[i][k] + bias[k], result_fixedpoint_multiplier)) >> result_shift) + result_offset_after_shift
175 *
176 * This function calls the following OpenCL kernels:
177 *
178 * -# @ref CLGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel
179 *
180 * @note The function accepts also 2 optional input arguments (min and max) which can be used to implement "rectified linear unit" activation functions
181 * after the result is shifted right by result_shift
182*/
183class CLGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPoint : public ICLSimpleFunction
184{
185public:
186 /** Initialise the kernel's inputs, output
187 *
188 * @param[in] input Input tensor. Data type supported: S32
189 * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the biases addition is not required.
190 * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input.
Sheri Zhang0cdbda52020-02-25 15:57:21 +0000191 * @param[out] output Output tensor. Data type supported: QASYMM8_SIGNED
Manuel Bottini1f332d42019-11-29 17:25:25 +0000192 * @param[in] result_fixedpoint_multiplier Fixed point value to be multiplied to each element of the input matrix when once the result_offset has been add
193 * @param[in] result_shift Number of bits to shift right the result after the fixed point multiplication
194 * @param[in] result_offset_after_shift Offset to be applied to result before converting it back to QASYMM8_SIGNED
Giorgio Arena1856ff72020-02-07 13:46:45 +0000195 * @param[in] min (Optional) Min value used to saturate down the output result before converting back to QASYMM8_SIGNED. Defaults to the minimum possible 32-bit signed integer.
Manuel Bottini1f332d42019-11-29 17:25:25 +0000196 * @param[in] max (Optional) Max value used to saturate up the output result before converting back to QASYMM8_SIGNED. Defaults to 0
Giorgio Arena1856ff72020-02-07 13:46:45 +0000197 * Along with @p min, this value can be used to implement "rectified linear unit" activation functions. Defaults to the maximum possible 32-bit signed integer.
Manuel Bottini1f332d42019-11-29 17:25:25 +0000198 */
199 void configure(const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, int result_fixedpoint_multiplier, int result_shift, int result_offset_after_shift,
Giorgio Arena1856ff72020-02-07 13:46:45 +0000200 int min = std::numeric_limits<int32_t>::lowest(), int max = std::numeric_limits<int32_t>::max());
Manuel Bottini1f332d42019-11-29 17:25:25 +0000201 /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPoint
202 *
203 * @param[in] input Input tensor. It is the output of @ref CLGEMMLowpMatrixMultiplyCore function. Data type supported: S32
204 * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the addition of biases is not required.
205 * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input.
Sheri Zhang0cdbda52020-02-25 15:57:21 +0000206 * @param[in] output Output tensor. Data type supported: QASYMM8_SIGNED
Giorgio Arena1856ff72020-02-07 13:46:45 +0000207 * @param[in] min (Optional) Min value used to saturate down the output result before converting back to QASYMM8_SIGNED. Defaults to the minimum possible 32-bit signed integer.
Manuel Bottini1f332d42019-11-29 17:25:25 +0000208 * @param[in] max (Optional) Max value used to saturate up the output result before converting back to QASYMM8_SIGNED. Defaults to 0
Giorgio Arena1856ff72020-02-07 13:46:45 +0000209 * Along with @p min, this value can be used to implement "rectified linear unit" activation functions. Defaults to the maximum possible 32-bit signed integer.
Manuel Bottini1f332d42019-11-29 17:25:25 +0000210 *
211 * @return a status
212 */
Giorgio Arena1856ff72020-02-07 13:46:45 +0000213 static Status validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, int min = std::numeric_limits<int32_t>::lowest(), int max = std::numeric_limits<int32_t>::max());
Manuel Bottini1f332d42019-11-29 17:25:25 +0000214};
215
Georgios Pinitas51e53a32018-10-22 13:49:08 +0100216/** Basic function to execute CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFloat on OpenCL.
217 *
218 * This function calls the following OpenCL kernels:
219 *
220 * -# @ref CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFloatKernel
221 *
222 * @note The function accepts also 2 optional input arguments (min and max) which can be used to implement "rectified linear unit" activation functions
223 * after the result is shifted right by result_shift
224*/
225class CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFloat : public ICLSimpleFunction
226{
227public:
228 /** Initialise the kernel's inputs, output
229 *
Gian Marco Iodice0c54a622018-10-30 12:20:03 +0000230 * @param[in] input Input tensor. Data type supported: S32
231 * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the biases addition is not required.
232 * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input.
Sheri Zhang0cdbda52020-02-25 15:57:21 +0000233 * @param[out] output Output tensor. Data type supported: QASYMM8
Gian Marco Iodice0c54a622018-10-30 12:20:03 +0000234 * @param[in] multiplier Float multiplier to be multiplied to each element of the input matrix
235 * @param[in] offset Offset to be applied to result before converting it back to QASYMM8
Giorgio Arena1856ff72020-02-07 13:46:45 +0000236 * @param[in] min (Optional) Min value used to saturate down the output result before converting back to QASYMM8. Defaults to the minimum possible 32-bit signed integer.
Gian Marco Iodice0c54a622018-10-30 12:20:03 +0000237 * @param[in] max (Optional) Max value used to saturate up the output result before converting back to QASYMM8,
Giorgio Arena1856ff72020-02-07 13:46:45 +0000238 * Along with @p min, this value can be used to implement "rectified linear unit" activation functions. Defaults to the maximum possible 32-bit signed integer.
Georgios Pinitas51e53a32018-10-22 13:49:08 +0100239 */
Giorgio Arena1856ff72020-02-07 13:46:45 +0000240 void configure(const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, float multiplier, int offset, int min = std::numeric_limits<int32_t>::lowest(),
241 int max = std::numeric_limits<int32_t>::max());
Georgios Pinitas51e53a32018-10-22 13:49:08 +0100242 /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint
243 *
Gian Marco Iodice0c54a622018-10-30 12:20:03 +0000244 * @param[in] input Input tensor. It is the output of @ref CLGEMMLowpMatrixMultiplyCore function. Data type supported: S32
245 * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the addition of biases is not required.
246 * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input.
Sheri Zhang0cdbda52020-02-25 15:57:21 +0000247 * @param[in] output Output tensor. Data type supported: QASYMM8
Giorgio Arena1856ff72020-02-07 13:46:45 +0000248 * @param[in] min (Optional) Min value used to saturate down the output result before converting back to QASYMM8. Defaults to the minimum possible 32-bit signed integer.
Gian Marco Iodice0c54a622018-10-30 12:20:03 +0000249 * @param[in] max (Optional) Max value used to saturate up the output result before converting back to QASYMM8,
Giorgio Arena1856ff72020-02-07 13:46:45 +0000250 * Along with @p min, this value can be used to implement "rectified linear unit" activation functions. Defaults to the maximum possible 32-bit signed integer.
Georgios Pinitas51e53a32018-10-22 13:49:08 +0100251 *
252 * @return a status
253 */
Giorgio Arena1856ff72020-02-07 13:46:45 +0000254 static Status validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, int min = std::numeric_limits<int32_t>::lowest(), int max = std::numeric_limits<int32_t>::max());
Georgios Pinitas51e53a32018-10-22 13:49:08 +0100255};
Manuel Bottini9c9b70b2019-07-01 17:35:56 +0100256/** Basic function to execute CLGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPoint on OpenCL.
257 *
258 * CLGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPoint depends on 2 parameters:
259 *
260 * result_fixedpoint_multiplier, result_shift
261 *
262 * The final result is:
263 *
264 * (FixedPointMul(input[i][k], result_fixedpoint_multiplier) >> result_shift)
265 *
266 * where FixedPointMul(x, y) is the nearest integer to the following
267 * mathematical expression, evaluated without overflow or intermediate rounding:
268 *
269 * (x * y) / 2^31
270 *
271 * For more information: https://github.com/google/gemmlowp/blob/master/public/output_stages.h#L68
272 *
273 * In case the bias tensor is provided, the final result is:
274 *
275 * ((FixedPointMul(input[i][k] + bias[k], result_fixedpoint_multiplier)) >> result_shift) + result_offset_after_shift
276 *
277 * This function calls the following NEON kernels:
278 *
279 * -# @ref CLGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel
280 *
281 * @note The function accepts also 2 optional input arguments (min and max) which can be used to implement "rectified linear unit" activation functions
282 * after the result is shifted right by result_shift
283*/
284class CLGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPoint : public ICLSimpleFunction
285{
286public:
287 /** Initialise the kernel's inputs, output
288 *
289 * @param[in] input Input tensor. Data type supported: S32
290 * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the biases addition is not required.
291 * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input.
Sheri Zhang0cdbda52020-02-25 15:57:21 +0000292 * @param[out] output Output tensor. Data type supported: QSYMM16
Manuel Bottini9c9b70b2019-07-01 17:35:56 +0100293 * @param[in] result_fixedpoint_multiplier Fixed point value to be multiplied to each element of the input matrix when once the result_offset has been add
294 * @param[in] result_shift Number of bits to shift right the result after the fixed point multiplication
Giorgio Arena1856ff72020-02-07 13:46:45 +0000295 * @param[in] min (Optional) Min value used to saturate down the output result before converting back to QSYMM16. Defaults to the minimum possible 32-bit signed integer.
Manuel Bottini9c9b70b2019-07-01 17:35:56 +0100296 * @param[in] max (Optional) Max value used to saturate up the output result before converting back to QSYMM16.
Giorgio Arena1856ff72020-02-07 13:46:45 +0000297 * Along with @p min, this value can be used to implement "rectified linear unit" activation functions. Defaults to the maximum possible 32-bit signed integer.
Manuel Bottini9c9b70b2019-07-01 17:35:56 +0100298 */
Giorgio Arena1856ff72020-02-07 13:46:45 +0000299 void configure(const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, int result_fixedpoint_multiplier, int result_shift, int min = std::numeric_limits<int32_t>::lowest(),
300 int max = std::numeric_limits<int32_t>::max());
Manuel Bottini9c9b70b2019-07-01 17:35:56 +0100301 /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPoint
302 *
303 * @param[in] input Input tensor info. It is the output of @ref CLGEMMLowpMatrixMultiplyCore function. Data type supported: S32
304 * @param[in] bias Biases tensor info. Only shared biases supported and it can be a nullptr if the addition of biases is not required.
305 * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input.
Sheri Zhang0cdbda52020-02-25 15:57:21 +0000306 * @param[in] output Output tensor info. Data type supported: QSYMM16
Giorgio Arena1856ff72020-02-07 13:46:45 +0000307 * @param[in] min (Optional) Min value used to saturate down the output result before converting back to QSYMM16. Defaults to the minimum possible 32-bit signed integer.
Manuel Bottini9c9b70b2019-07-01 17:35:56 +0100308 * @param[in] max (Optional) Max value used to saturate up the output result before converting back to QSYMM16,
Giorgio Arena1856ff72020-02-07 13:46:45 +0000309 * Along with @p min, this value can be used to implement "rectified linear unit" activation functions. Defaults to the maximum possible 32-bit signed integer.
Manuel Bottini9c9b70b2019-07-01 17:35:56 +0100310 *
311 * @return a status
312 */
Giorgio Arena1856ff72020-02-07 13:46:45 +0000313 static Status validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, int min = std::numeric_limits<int32_t>::lowest(), int max = std::numeric_limits<int32_t>::max());
Manuel Bottini9c9b70b2019-07-01 17:35:56 +0100314};
Sheri Zhang0cdbda52020-02-25 15:57:21 +0000315/** Basic function to execute GEMMLowpQuantizeDown kernels on CL.
316 *
317 * This function calls the following CL kernels:
318 *
Luca Foschiani689c9682020-02-26 14:30:14 +0000319 * -# @ref CLGEMMLowpQuantizeDownInt32ScaleKernel
Sheri Zhang0cdbda52020-02-25 15:57:21 +0000320 * -# @ref CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel
321 * -# @ref CLGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel
322*/
323class CLGEMMLowpOutputStage : public ICLSimpleFunction
324{
325public:
326 /** Initialise the kernel's inputs, output
327 *
328 * @param[in] input Input tensor. Data type supported: S32
329 * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the biases addition is not required.
330 * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input.
331 * @param[out] output Output tensor. Data type supported: QASYMM8/QASYMM8_SIGNED
332 * @param[in] info GEMMLowp output stage metadata.
333 */
334 void configure(const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, const GEMMLowpOutputStageInfo &info);
335 /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel
336 *
337 * @param[in] input Input tensor. It is the output of @ref CLGEMMLowpMatrixMultiplyCore function. Data type supported: S32
338 * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the addition of biases is not required.
339 * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input.
340 * @param[in] output Output tensor. Data type supported: QASYMM8/QASYMM8_SIGNED
341 * @param[in] info GEMMLowp output stage metadata.
342 *
343 * @return a status
344 */
345 static Status validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, const GEMMLowpOutputStageInfo &info);
346};
Georgios Pinitas932491f2018-09-21 16:33:15 +0100347} // namespace arm_compute
Michalis Spyrouf4643372019-11-29 16:17:13 +0000348#endif /*ARM_COMPUTE_CLGEMMLOWPOUTPUTSTAGE_H */