blob: b6619da5d2174518d67e6eb306f7410cb28c2d34 [file] [log] [blame]
Gian Marco05288a22017-11-21 10:57:50 +00001/*
Giorgio Arena1856ff72020-02-07 13:46:45 +00002 * Copyright (c) 2017-2020 ARM Limited.
Gian Marco05288a22017-11-21 10:57:50 +00003 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
Manuel Bottini1f332d42019-11-29 17:25:25 +000024#ifndef ARM_COMPUTE_CLGEMMLOWPOUTPUTSTAGE_H
25#define ARM_COMPUTE_CLGEMMLOWPOUTPUTSTAGE_H
Gian Marco05288a22017-11-21 10:57:50 +000026
27#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
28
29/** This file contains all available output stages for GEMMLowp on OpenCL.
30 *
31 * In gemmlowp, the "output stage" is the process that takes a final int32 accumulator value (the output of @ref CLGEMMLowpMatrixMultiplyCore),
Manuel Bottini1f332d42019-11-29 17:25:25 +000032 * and processes it to obtain the final QASYMM8/QASYMM8_SIGNED value.
Gian Marco05288a22017-11-21 10:57:50 +000033 *
34 * More information about the GEMMLowp output stage can be found at https://github.com/google/gemmlowp/blob/master/doc/output.md
35 */
36
37namespace arm_compute
38{
39class ITensor;
40
41/** Basic function to execute CLGEMMLowpQuantizeDownInt32ToUint8Scale on OpenCL.
42 *
43 * CLGEMMLowpQuantizeDownInt32ToUint8Scale depends on 3 parameters: result_offset, result_mult_int, result_shift
44 * The final result is:
45 *
46 * ((input[i][k] + result_offset) * result_mult_int) >> result_shift
47 *
48 * In case the bias tensor is provided, the final result is:
49 *
Gian Marco58c57942017-11-28 09:10:03 +000050 * ((input[i][k] + bias[k] + result_offset) * result_mult_int) >> result_shift
Gian Marco05288a22017-11-21 10:57:50 +000051 *
52 * This function calls the following OpenCL kernels:
53 *
54 * -# @ref CLGEMMLowpQuantizeDownInt32ToUint8ScaleKernel
55 *
56 * @note The function accepts also 2 optional input arguments (min and max) which can be used to implement "rectified linear unit" activation functions
Gian Marco58c57942017-11-28 09:10:03 +000057 * after the result is shifted right by result_shift
Gian Marco05288a22017-11-21 10:57:50 +000058*/
59class CLGEMMLowpQuantizeDownInt32ToUint8Scale : public ICLSimpleFunction
60{
61public:
62 /** Initialise the kernel's inputs, output
63 *
64 * @param[in] input Input tensor. It is the output of @ref CLGEMMLowpMatrixMultiplyCore function. Data type supported: S32
65 * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the addition of biases is not required.
66 * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input.
Sheri Zhang0cdbda52020-02-25 15:57:21 +000067 * @param[out] output Output tensor. Data type supported: QASYMM8
Gian Marco05288a22017-11-21 10:57:50 +000068 * @param[in] result_offset Offset to be added to each element of the input matrix
69 * @param[in] result_mult_int Value to be multiplied to each element of the input matrix when once the result_offset has been add
70 * @param[in] result_shift Number of bits to shift right the result before converting back to QASYMM8
Giorgio Arena1856ff72020-02-07 13:46:45 +000071 * @param[in] min (Optional) Min value used to saturate down the output result before converting back to QASYMM8. Defaults to the minimum possible 32-bit signed integer.
Gian Marco05288a22017-11-21 10:57:50 +000072 * @param[in] max (Optional) Max value used to saturate up the output result before converting back to QASYMM8,
Giorgio Arena1856ff72020-02-07 13:46:45 +000073 * Along with @p min, this value can be used to implement "rectified linear unit" activation functions. Defaults to the maximum possible 32-bit signed integer.
Gian Marco05288a22017-11-21 10:57:50 +000074 */
Giorgio Arena1856ff72020-02-07 13:46:45 +000075 void configure(const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, int result_offset, int result_mult_int, int result_shift, int min = std::numeric_limits<int32_t>::lowest(),
76 int max = std::numeric_limits<int32_t>::max());
Gian Marco58c57942017-11-28 09:10:03 +000077 /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMLowpQuantizeDownInt32ToUint8Scale
78 *
79 * @param[in] input Input tensor. It is the output of @ref CLGEMMLowpMatrixMultiplyCore function. Data type supported: S32
80 * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the addition of biases is not required.
81 * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input.
Sheri Zhang0cdbda52020-02-25 15:57:21 +000082 * @param[in] output Output tensor. Data type supported: QASYMM8
Giorgio Arena1856ff72020-02-07 13:46:45 +000083 * @param[in] min (Optional) Min value used to saturate down the output result before converting back to QASYMM8. Defaults to the minimum possible 32-bit signed integer.
Gian Marco58c57942017-11-28 09:10:03 +000084 * @param[in] max (Optional) Max value used to saturate up the output result before converting back to QASYMM8,
Giorgio Arena1856ff72020-02-07 13:46:45 +000085 * Along with @p min, this value can be used to implement "rectified linear unit" activation functions. Defaults to the maximum possible 32-bit signed integer.
Gian Marco58c57942017-11-28 09:10:03 +000086 *
Georgios Pinitas631c41a2017-12-06 11:53:03 +000087 * @return a status
Gian Marco58c57942017-11-28 09:10:03 +000088 */
Giorgio Arena1856ff72020-02-07 13:46:45 +000089 static Status validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, int min = std::numeric_limits<int32_t>::lowest(), int max = std::numeric_limits<int32_t>::max());
Gian Marco58c57942017-11-28 09:10:03 +000090};
91
92/** Basic function to execute CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint on OpenCL.
93 *
94 * CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint depends on 3 parameters:
95 *
96 * result_fixedpoint_multiplier, result_shift, result_offset_after_shift
97 *
98 * The final result is:
99 *
100 * (FixedPointMul(input[i][k], result_fixedpoint_multiplier) >> result_shift) + result_offset_after_shift
101 *
102 * where FixedPointMul(x, y) is the nearest integer to the following
103 * mathematical expression, evaluated without overflow or intermediate rounding:
104 *
105 * (x * y) / 2^31
106 *
107 * For more information: https://github.com/google/gemmlowp/blob/master/public/output_stages.h#L68
108 *
109 * In case the bias tensor is provided, the final result is:
110 *
111 * ((FixedPointMul(input[i][k] + bias[k], result_fixedpoint_multiplier)) >> result_shift) + result_offset_after_shift
112 *
113 * This function calls the following OpenCL kernels:
114 *
115 * -# @ref CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel
116 *
117 * @note The function accepts also 2 optional input arguments (min and max) which can be used to implement "rectified linear unit" activation functions
118 * after the result is shifted right by result_shift
119*/
120class CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint : public ICLSimpleFunction
121{
122public:
123 /** Initialise the kernel's inputs, output
124 *
125 * @param[in] input Input tensor. Data type supported: S32
126 * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the biases addition is not required.
127 * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input.
Sheri Zhang0cdbda52020-02-25 15:57:21 +0000128 * @param[out] output Output tensor. Data type supported: QASYMM8
Gian Marco58c57942017-11-28 09:10:03 +0000129 * @param[in] result_fixedpoint_multiplier Fixed point value to be multiplied to each element of the input matrix when once the result_offset has been add
130 * @param[in] result_shift Number of bits to shift right the result after the fixed point multiplication
131 * @param[in] result_offset_after_shift Offset to be applied to result before converting it back to QASYMM8
Giorgio Arena1856ff72020-02-07 13:46:45 +0000132 * @param[in] min (Optional) Min value used to saturate down the output result before converting back to QASYMM8. Defaults to the minimum possible 32-bit signed integer.
Gian Marco58c57942017-11-28 09:10:03 +0000133 * @param[in] max (Optional) Max value used to saturate up the output result before converting back to QASYMM8,
Giorgio Arena1856ff72020-02-07 13:46:45 +0000134 * Along with @p min, this value can be used to implement "rectified linear unit" activation functions. Defaults to the maximum possible 32-bit signed integer.
Gian Marco58c57942017-11-28 09:10:03 +0000135 */
Georgios Pinitas932491f2018-09-21 16:33:15 +0100136 void configure(const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, int result_fixedpoint_multiplier, int result_shift, int result_offset_after_shift,
Giorgio Arena1856ff72020-02-07 13:46:45 +0000137 int min = std::numeric_limits<int32_t>::lowest(), int max = std::numeric_limits<int32_t>::max());
Gian Marco58c57942017-11-28 09:10:03 +0000138 /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint
139 *
Gian Marco Iodice4b908652018-10-18 10:21:02 +0100140 * @param[in] input Input tensor. It is the output of @ref CLGEMMLowpMatrixMultiplyCore function. Data type supported: S32
141 * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the addition of biases is not required.
142 * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input.
Sheri Zhang0cdbda52020-02-25 15:57:21 +0000143 * @param[in] output Output tensor. Data type supported: QASYMM8
Giorgio Arena1856ff72020-02-07 13:46:45 +0000144 * @param[in] min (Optional) Min value used to saturate down the output result before converting back to QASYMM8. Defaults to the minimum possible 32-bit signed integer.
Gian Marco Iodice4b908652018-10-18 10:21:02 +0100145 * @param[in] max (Optional) Max value used to saturate up the output result before converting back to QASYMM8,
Giorgio Arena1856ff72020-02-07 13:46:45 +0000146 * Along with @p min, this value can be used to implement "rectified linear unit" activation functions. Defaults to the maximum possible 32-bit signed integer.
Gian Marco58c57942017-11-28 09:10:03 +0000147 *
Georgios Pinitas631c41a2017-12-06 11:53:03 +0000148 * @return a status
Gian Marco58c57942017-11-28 09:10:03 +0000149 */
Giorgio Arena1856ff72020-02-07 13:46:45 +0000150 static Status validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, int min = std::numeric_limits<int32_t>::lowest(), int max = std::numeric_limits<int32_t>::max());
Gian Marco05288a22017-11-21 10:57:50 +0000151};
Georgios Pinitas51e53a32018-10-22 13:49:08 +0100152
Manuel Bottini1f332d42019-11-29 17:25:25 +0000153/** Basic function to execute CLGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPoint on OpenCL.
154 *
155 * CLGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPoint depends on 3 parameters:
156 *
157 * result_fixedpoint_multiplier, result_shift, result_offset_after_shift
158 *
159 * The final result is:
160 *
161 * (FixedPointMul(input[i][k], result_fixedpoint_multiplier) >> result_shift) + result_offset_after_shift
162 *
163 * where FixedPointMul(x, y) is the nearest integer to the following
164 * mathematical expression, evaluated without overflow or intermediate rounding:
165 *
166 * (x * y) / 2^31
167 *
168 * For more information: https://github.com/google/gemmlowp/blob/master/public/output_stages.h#L68
169 *
170 * In case the bias tensor is provided, the final result is:
171 *
172 * ((FixedPointMul(input[i][k] + bias[k], result_fixedpoint_multiplier)) >> result_shift) + result_offset_after_shift
173 *
174 * This function calls the following OpenCL kernels:
175 *
176 * -# @ref CLGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel
177 *
178 * @note The function accepts also 2 optional input arguments (min and max) which can be used to implement "rectified linear unit" activation functions
179 * after the result is shifted right by result_shift
180*/
181class CLGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPoint : public ICLSimpleFunction
182{
183public:
184 /** Initialise the kernel's inputs, output
185 *
186 * @param[in] input Input tensor. Data type supported: S32
187 * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the biases addition is not required.
188 * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input.
Sheri Zhang0cdbda52020-02-25 15:57:21 +0000189 * @param[out] output Output tensor. Data type supported: QASYMM8_SIGNED
Manuel Bottini1f332d42019-11-29 17:25:25 +0000190 * @param[in] result_fixedpoint_multiplier Fixed point value to be multiplied to each element of the input matrix when once the result_offset has been add
191 * @param[in] result_shift Number of bits to shift right the result after the fixed point multiplication
192 * @param[in] result_offset_after_shift Offset to be applied to result before converting it back to QASYMM8_SIGNED
Giorgio Arena1856ff72020-02-07 13:46:45 +0000193 * @param[in] min (Optional) Min value used to saturate down the output result before converting back to QASYMM8_SIGNED. Defaults to the minimum possible 32-bit signed integer.
Manuel Bottini1f332d42019-11-29 17:25:25 +0000194 * @param[in] max (Optional) Max value used to saturate up the output result before converting back to QASYMM8_SIGNED. Defaults to 0
Giorgio Arena1856ff72020-02-07 13:46:45 +0000195 * Along with @p min, this value can be used to implement "rectified linear unit" activation functions. Defaults to the maximum possible 32-bit signed integer.
Manuel Bottini1f332d42019-11-29 17:25:25 +0000196 */
197 void configure(const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, int result_fixedpoint_multiplier, int result_shift, int result_offset_after_shift,
Giorgio Arena1856ff72020-02-07 13:46:45 +0000198 int min = std::numeric_limits<int32_t>::lowest(), int max = std::numeric_limits<int32_t>::max());
Manuel Bottini1f332d42019-11-29 17:25:25 +0000199 /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPoint
200 *
201 * @param[in] input Input tensor. It is the output of @ref CLGEMMLowpMatrixMultiplyCore function. Data type supported: S32
202 * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the addition of biases is not required.
203 * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input.
Sheri Zhang0cdbda52020-02-25 15:57:21 +0000204 * @param[in] output Output tensor. Data type supported: QASYMM8_SIGNED
Giorgio Arena1856ff72020-02-07 13:46:45 +0000205 * @param[in] min (Optional) Min value used to saturate down the output result before converting back to QASYMM8_SIGNED. Defaults to the minimum possible 32-bit signed integer.
Manuel Bottini1f332d42019-11-29 17:25:25 +0000206 * @param[in] max (Optional) Max value used to saturate up the output result before converting back to QASYMM8_SIGNED. Defaults to 0
Giorgio Arena1856ff72020-02-07 13:46:45 +0000207 * Along with @p min, this value can be used to implement "rectified linear unit" activation functions. Defaults to the maximum possible 32-bit signed integer.
Manuel Bottini1f332d42019-11-29 17:25:25 +0000208 *
209 * @return a status
210 */
Giorgio Arena1856ff72020-02-07 13:46:45 +0000211 static Status validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, int min = std::numeric_limits<int32_t>::lowest(), int max = std::numeric_limits<int32_t>::max());
Manuel Bottini1f332d42019-11-29 17:25:25 +0000212};
213
Georgios Pinitas51e53a32018-10-22 13:49:08 +0100214/** Basic function to execute CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFloat on OpenCL.
215 *
216 * This function calls the following OpenCL kernels:
217 *
218 * -# @ref CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFloatKernel
219 *
220 * @note The function accepts also 2 optional input arguments (min and max) which can be used to implement "rectified linear unit" activation functions
221 * after the result is shifted right by result_shift
222*/
223class CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFloat : public ICLSimpleFunction
224{
225public:
226 /** Initialise the kernel's inputs, output
227 *
Gian Marco Iodice0c54a622018-10-30 12:20:03 +0000228 * @param[in] input Input tensor. Data type supported: S32
229 * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the biases addition is not required.
230 * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input.
Sheri Zhang0cdbda52020-02-25 15:57:21 +0000231 * @param[out] output Output tensor. Data type supported: QASYMM8
Gian Marco Iodice0c54a622018-10-30 12:20:03 +0000232 * @param[in] multiplier Float multiplier to be multiplied to each element of the input matrix
233 * @param[in] offset Offset to be applied to result before converting it back to QASYMM8
Giorgio Arena1856ff72020-02-07 13:46:45 +0000234 * @param[in] min (Optional) Min value used to saturate down the output result before converting back to QASYMM8. Defaults to the minimum possible 32-bit signed integer.
Gian Marco Iodice0c54a622018-10-30 12:20:03 +0000235 * @param[in] max (Optional) Max value used to saturate up the output result before converting back to QASYMM8,
Giorgio Arena1856ff72020-02-07 13:46:45 +0000236 * Along with @p min, this value can be used to implement "rectified linear unit" activation functions. Defaults to the maximum possible 32-bit signed integer.
Georgios Pinitas51e53a32018-10-22 13:49:08 +0100237 */
Giorgio Arena1856ff72020-02-07 13:46:45 +0000238 void configure(const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, float multiplier, int offset, int min = std::numeric_limits<int32_t>::lowest(),
239 int max = std::numeric_limits<int32_t>::max());
Georgios Pinitas51e53a32018-10-22 13:49:08 +0100240 /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint
241 *
Gian Marco Iodice0c54a622018-10-30 12:20:03 +0000242 * @param[in] input Input tensor. It is the output of @ref CLGEMMLowpMatrixMultiplyCore function. Data type supported: S32
243 * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the addition of biases is not required.
244 * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input.
Sheri Zhang0cdbda52020-02-25 15:57:21 +0000245 * @param[in] output Output tensor. Data type supported: QASYMM8
Giorgio Arena1856ff72020-02-07 13:46:45 +0000246 * @param[in] min (Optional) Min value used to saturate down the output result before converting back to QASYMM8. Defaults to the minimum possible 32-bit signed integer.
Gian Marco Iodice0c54a622018-10-30 12:20:03 +0000247 * @param[in] max (Optional) Max value used to saturate up the output result before converting back to QASYMM8,
Giorgio Arena1856ff72020-02-07 13:46:45 +0000248 * Along with @p min, this value can be used to implement "rectified linear unit" activation functions. Defaults to the maximum possible 32-bit signed integer.
Georgios Pinitas51e53a32018-10-22 13:49:08 +0100249 *
250 * @return a status
251 */
Giorgio Arena1856ff72020-02-07 13:46:45 +0000252 static Status validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, int min = std::numeric_limits<int32_t>::lowest(), int max = std::numeric_limits<int32_t>::max());
Georgios Pinitas51e53a32018-10-22 13:49:08 +0100253};
Manuel Bottini9c9b70b2019-07-01 17:35:56 +0100254/** Basic function to execute CLGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPoint on OpenCL.
255 *
256 * CLGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPoint depends on 2 parameters:
257 *
258 * result_fixedpoint_multiplier, result_shift
259 *
260 * The final result is:
261 *
262 * (FixedPointMul(input[i][k], result_fixedpoint_multiplier) >> result_shift)
263 *
264 * where FixedPointMul(x, y) is the nearest integer to the following
265 * mathematical expression, evaluated without overflow or intermediate rounding:
266 *
267 * (x * y) / 2^31
268 *
269 * For more information: https://github.com/google/gemmlowp/blob/master/public/output_stages.h#L68
270 *
271 * In case the bias tensor is provided, the final result is:
272 *
273 * ((FixedPointMul(input[i][k] + bias[k], result_fixedpoint_multiplier)) >> result_shift) + result_offset_after_shift
274 *
275 * This function calls the following NEON kernels:
276 *
277 * -# @ref CLGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel
278 *
279 * @note The function accepts also 2 optional input arguments (min and max) which can be used to implement "rectified linear unit" activation functions
280 * after the result is shifted right by result_shift
281*/
282class CLGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPoint : public ICLSimpleFunction
283{
284public:
285 /** Initialise the kernel's inputs, output
286 *
287 * @param[in] input Input tensor. Data type supported: S32
288 * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the biases addition is not required.
289 * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input.
Sheri Zhang0cdbda52020-02-25 15:57:21 +0000290 * @param[out] output Output tensor. Data type supported: QSYMM16
Manuel Bottini9c9b70b2019-07-01 17:35:56 +0100291 * @param[in] result_fixedpoint_multiplier Fixed point value to be multiplied to each element of the input matrix when once the result_offset has been add
292 * @param[in] result_shift Number of bits to shift right the result after the fixed point multiplication
Giorgio Arena1856ff72020-02-07 13:46:45 +0000293 * @param[in] min (Optional) Min value used to saturate down the output result before converting back to QSYMM16. Defaults to the minimum possible 32-bit signed integer.
Manuel Bottini9c9b70b2019-07-01 17:35:56 +0100294 * @param[in] max (Optional) Max value used to saturate up the output result before converting back to QSYMM16.
Giorgio Arena1856ff72020-02-07 13:46:45 +0000295 * Along with @p min, this value can be used to implement "rectified linear unit" activation functions. Defaults to the maximum possible 32-bit signed integer.
Manuel Bottini9c9b70b2019-07-01 17:35:56 +0100296 */
Giorgio Arena1856ff72020-02-07 13:46:45 +0000297 void configure(const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, int result_fixedpoint_multiplier, int result_shift, int min = std::numeric_limits<int32_t>::lowest(),
298 int max = std::numeric_limits<int32_t>::max());
Manuel Bottini9c9b70b2019-07-01 17:35:56 +0100299 /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPoint
300 *
301 * @param[in] input Input tensor info. It is the output of @ref CLGEMMLowpMatrixMultiplyCore function. Data type supported: S32
302 * @param[in] bias Biases tensor info. Only shared biases supported and it can be a nullptr if the addition of biases is not required.
303 * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input.
Sheri Zhang0cdbda52020-02-25 15:57:21 +0000304 * @param[in] output Output tensor info. Data type supported: QSYMM16
Giorgio Arena1856ff72020-02-07 13:46:45 +0000305 * @param[in] min (Optional) Min value used to saturate down the output result before converting back to QSYMM16. Defaults to the minimum possible 32-bit signed integer.
Manuel Bottini9c9b70b2019-07-01 17:35:56 +0100306 * @param[in] max (Optional) Max value used to saturate up the output result before converting back to QSYMM16,
Giorgio Arena1856ff72020-02-07 13:46:45 +0000307 * Along with @p min, this value can be used to implement "rectified linear unit" activation functions. Defaults to the maximum possible 32-bit signed integer.
Manuel Bottini9c9b70b2019-07-01 17:35:56 +0100308 *
309 * @return a status
310 */
Giorgio Arena1856ff72020-02-07 13:46:45 +0000311 static Status validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, int min = std::numeric_limits<int32_t>::lowest(), int max = std::numeric_limits<int32_t>::max());
Manuel Bottini9c9b70b2019-07-01 17:35:56 +0100312};
Sheri Zhang0cdbda52020-02-25 15:57:21 +0000313/** Basic function to execute GEMMLowpQuantizeDown kernels on CL.
314 *
315 * This function calls the following CL kernels:
316 *
317 * -# @ref CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel
318 * -# @ref CLGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel
319*/
320class CLGEMMLowpOutputStage : public ICLSimpleFunction
321{
322public:
323 /** Initialise the kernel's inputs, output
324 *
325 * @param[in] input Input tensor. Data type supported: S32
326 * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the biases addition is not required.
327 * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input.
328 * @param[out] output Output tensor. Data type supported: QASYMM8/QASYMM8_SIGNED
329 * @param[in] info GEMMLowp output stage metadata.
330 */
331 void configure(const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, const GEMMLowpOutputStageInfo &info);
332 /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel
333 *
334 * @param[in] input Input tensor. It is the output of @ref CLGEMMLowpMatrixMultiplyCore function. Data type supported: S32
335 * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the addition of biases is not required.
336 * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input.
337 * @param[in] output Output tensor. Data type supported: QASYMM8/QASYMM8_SIGNED
338 * @param[in] info GEMMLowp output stage metadata.
339 *
340 * @return a status
341 */
342 static Status validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, const GEMMLowpOutputStageInfo &info);
343};
Georgios Pinitas932491f2018-09-21 16:33:15 +0100344} // namespace arm_compute
Michalis Spyrouf4643372019-11-29 16:17:13 +0000345#endif /*ARM_COMPUTE_CLGEMMLOWPOUTPUTSTAGE_H */