blob: c6e95888e59eed7da388b81748f05e6694fab76e [file] [log] [blame]
Gian Marco05288a22017-11-21 10:57:50 +00001/*
Michele Di Giorgiod9eaf612020-07-08 11:12:57 +01002 * Copyright (c) 2017-2020 Arm Limited.
Gian Marco05288a22017-11-21 10:57:50 +00003 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
Manuel Bottini1f332d42019-11-29 17:25:25 +000024#ifndef ARM_COMPUTE_CLGEMMLOWPOUTPUTSTAGE_H
25#define ARM_COMPUTE_CLGEMMLOWPOUTPUTSTAGE_H
Gian Marco05288a22017-11-21 10:57:50 +000026
27#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
28
29/** This file contains all available output stages for GEMMLowp on OpenCL.
30 *
31 * In gemmlowp, the "output stage" is the process that takes a final int32 accumulator value (the output of @ref CLGEMMLowpMatrixMultiplyCore),
Manuel Bottini1f332d42019-11-29 17:25:25 +000032 * and processes it to obtain the final QASYMM8/QASYMM8_SIGNED value.
Gian Marco05288a22017-11-21 10:57:50 +000033 *
34 * More information about the GEMMLowp output stage can be found at https://github.com/google/gemmlowp/blob/master/doc/output.md
35 */
36
37namespace arm_compute
38{
39class ITensor;
40
Gian Marco58c57942017-11-28 09:10:03 +000041/** Basic function to execute CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint on OpenCL.
42 *
43 * CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint depends on 3 parameters:
44 *
45 * result_fixedpoint_multiplier, result_shift, result_offset_after_shift
46 *
47 * The final result is:
48 *
49 * (FixedPointMul(input[i][k], result_fixedpoint_multiplier) >> result_shift) + result_offset_after_shift
50 *
51 * where FixedPointMul(x, y) is the nearest integer to the following
52 * mathematical expression, evaluated without overflow or intermediate rounding:
53 *
54 * (x * y) / 2^31
55 *
56 * For more information: https://github.com/google/gemmlowp/blob/master/public/output_stages.h#L68
57 *
58 * In case the bias tensor is provided, the final result is:
59 *
60 * ((FixedPointMul(input[i][k] + bias[k], result_fixedpoint_multiplier)) >> result_shift) + result_offset_after_shift
61 *
62 * This function calls the following OpenCL kernels:
63 *
64 * -# @ref CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel
65 *
66 * @note The function accepts also 2 optional input arguments (min and max) which can be used to implement "rectified linear unit" activation functions
67 * after the result is shifted right by result_shift
68*/
69class CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint : public ICLSimpleFunction
70{
71public:
72 /** Initialise the kernel's inputs, output
73 *
74 * @param[in] input Input tensor. Data type supported: S32
75 * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the biases addition is not required.
76 * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input.
Sheri Zhang0cdbda52020-02-25 15:57:21 +000077 * @param[out] output Output tensor. Data type supported: QASYMM8
Gian Marco58c57942017-11-28 09:10:03 +000078 * @param[in] result_fixedpoint_multiplier Fixed point value to be multiplied to each element of the input matrix when once the result_offset has been add
79 * @param[in] result_shift Number of bits to shift right the result after the fixed point multiplication
80 * @param[in] result_offset_after_shift Offset to be applied to result before converting it back to QASYMM8
Giorgio Arena1856ff72020-02-07 13:46:45 +000081 * @param[in] min (Optional) Min value used to saturate down the output result before converting back to QASYMM8. Defaults to the minimum possible 32-bit signed integer.
Gian Marco58c57942017-11-28 09:10:03 +000082 * @param[in] max (Optional) Max value used to saturate up the output result before converting back to QASYMM8,
Giorgio Arena1856ff72020-02-07 13:46:45 +000083 * Along with @p min, this value can be used to implement "rectified linear unit" activation functions. Defaults to the maximum possible 32-bit signed integer.
Gian Marco58c57942017-11-28 09:10:03 +000084 */
Georgios Pinitas932491f2018-09-21 16:33:15 +010085 void configure(const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, int result_fixedpoint_multiplier, int result_shift, int result_offset_after_shift,
Giorgio Arena1856ff72020-02-07 13:46:45 +000086 int min = std::numeric_limits<int32_t>::lowest(), int max = std::numeric_limits<int32_t>::max());
Manuel Bottini2b84be52020-04-08 10:15:51 +010087 /** Initialise the kernel's inputs, output
88 *
89 * @param[in] compile_context The compile context to be used.
90 * @param[in] input Input tensor. Data type supported: S32
91 * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the biases addition is not required.
92 * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input.
93 * @param[out] output Output tensor. Data type supported: QASYMM8
94 * @param[in] result_fixedpoint_multiplier Fixed point value to be multiplied to each element of the input matrix when once the result_offset has been add
95 * @param[in] result_shift Number of bits to shift right the result after the fixed point multiplication
96 * @param[in] result_offset_after_shift Offset to be applied to result before converting it back to QASYMM8
97 * @param[in] min (Optional) Min value used to saturate down the output result before converting back to QASYMM8. Defaults to the minimum possible 32-bit signed integer.
98 * @param[in] max (Optional) Max value used to saturate up the output result before converting back to QASYMM8,
99 * Along with @p min, this value can be used to implement "rectified linear unit" activation functions. Defaults to the maximum possible 32-bit signed integer.
100 */
101 void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, int result_fixedpoint_multiplier, int result_shift,
102 int result_offset_after_shift,
103 int min = std::numeric_limits<int32_t>::lowest(), int max = std::numeric_limits<int32_t>::max());
Gian Marco58c57942017-11-28 09:10:03 +0000104 /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint
105 *
Gian Marco Iodice4b908652018-10-18 10:21:02 +0100106 * @param[in] input Input tensor. It is the output of @ref CLGEMMLowpMatrixMultiplyCore function. Data type supported: S32
107 * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the addition of biases is not required.
108 * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input.
Sheri Zhang0cdbda52020-02-25 15:57:21 +0000109 * @param[in] output Output tensor. Data type supported: QASYMM8
Giorgio Arena1856ff72020-02-07 13:46:45 +0000110 * @param[in] min (Optional) Min value used to saturate down the output result before converting back to QASYMM8. Defaults to the minimum possible 32-bit signed integer.
Gian Marco Iodice4b908652018-10-18 10:21:02 +0100111 * @param[in] max (Optional) Max value used to saturate up the output result before converting back to QASYMM8,
Giorgio Arena1856ff72020-02-07 13:46:45 +0000112 * Along with @p min, this value can be used to implement "rectified linear unit" activation functions. Defaults to the maximum possible 32-bit signed integer.
Gian Marco58c57942017-11-28 09:10:03 +0000113 *
Georgios Pinitas631c41a2017-12-06 11:53:03 +0000114 * @return a status
Gian Marco58c57942017-11-28 09:10:03 +0000115 */
Giorgio Arena1856ff72020-02-07 13:46:45 +0000116 static Status validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, int min = std::numeric_limits<int32_t>::lowest(), int max = std::numeric_limits<int32_t>::max());
Gian Marco05288a22017-11-21 10:57:50 +0000117};
Georgios Pinitas51e53a32018-10-22 13:49:08 +0100118
Manuel Bottini1f332d42019-11-29 17:25:25 +0000119/** Basic function to execute CLGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPoint on OpenCL.
120 *
121 * CLGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPoint depends on 3 parameters:
122 *
123 * result_fixedpoint_multiplier, result_shift, result_offset_after_shift
124 *
125 * The final result is:
126 *
127 * (FixedPointMul(input[i][k], result_fixedpoint_multiplier) >> result_shift) + result_offset_after_shift
128 *
129 * where FixedPointMul(x, y) is the nearest integer to the following
130 * mathematical expression, evaluated without overflow or intermediate rounding:
131 *
132 * (x * y) / 2^31
133 *
134 * For more information: https://github.com/google/gemmlowp/blob/master/public/output_stages.h#L68
135 *
136 * In case the bias tensor is provided, the final result is:
137 *
138 * ((FixedPointMul(input[i][k] + bias[k], result_fixedpoint_multiplier)) >> result_shift) + result_offset_after_shift
139 *
140 * This function calls the following OpenCL kernels:
141 *
142 * -# @ref CLGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel
143 *
144 * @note The function accepts also 2 optional input arguments (min and max) which can be used to implement "rectified linear unit" activation functions
145 * after the result is shifted right by result_shift
146*/
147class CLGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPoint : public ICLSimpleFunction
148{
149public:
150 /** Initialise the kernel's inputs, output
151 *
152 * @param[in] input Input tensor. Data type supported: S32
153 * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the biases addition is not required.
154 * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input.
Sheri Zhang0cdbda52020-02-25 15:57:21 +0000155 * @param[out] output Output tensor. Data type supported: QASYMM8_SIGNED
Manuel Bottini1f332d42019-11-29 17:25:25 +0000156 * @param[in] result_fixedpoint_multiplier Fixed point value to be multiplied to each element of the input matrix when once the result_offset has been add
157 * @param[in] result_shift Number of bits to shift right the result after the fixed point multiplication
158 * @param[in] result_offset_after_shift Offset to be applied to result before converting it back to QASYMM8_SIGNED
Giorgio Arena1856ff72020-02-07 13:46:45 +0000159 * @param[in] min (Optional) Min value used to saturate down the output result before converting back to QASYMM8_SIGNED. Defaults to the minimum possible 32-bit signed integer.
Manuel Bottini1f332d42019-11-29 17:25:25 +0000160 * @param[in] max (Optional) Max value used to saturate up the output result before converting back to QASYMM8_SIGNED. Defaults to 0
Giorgio Arena1856ff72020-02-07 13:46:45 +0000161 * Along with @p min, this value can be used to implement "rectified linear unit" activation functions. Defaults to the maximum possible 32-bit signed integer.
Manuel Bottini1f332d42019-11-29 17:25:25 +0000162 */
163 void configure(const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, int result_fixedpoint_multiplier, int result_shift, int result_offset_after_shift,
Giorgio Arena1856ff72020-02-07 13:46:45 +0000164 int min = std::numeric_limits<int32_t>::lowest(), int max = std::numeric_limits<int32_t>::max());
Manuel Bottini2b84be52020-04-08 10:15:51 +0100165 /** Initialise the kernel's inputs, output
166 *
167 * @param[in] compile_context The compile context to be used.
168 * @param[in] input Input tensor. Data type supported: S32
169 * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the biases addition is not required.
170 * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input.
171 * @param[out] output Output tensor. Data type supported: QASYMM8_SIGNED
172 * @param[in] result_fixedpoint_multiplier Fixed point value to be multiplied to each element of the input matrix when once the result_offset has been add
173 * @param[in] result_shift Number of bits to shift right the result after the fixed point multiplication
174 * @param[in] result_offset_after_shift Offset to be applied to result before converting it back to QASYMM8_SIGNED
175 * @param[in] min (Optional) Min value used to saturate down the output result before converting back to QASYMM8_SIGNED. Defaults to the minimum possible 32-bit signed integer.
176 * @param[in] max (Optional) Max value used to saturate up the output result before converting back to QASYMM8_SIGNED. Defaults to 0
177 * Along with @p min, this value can be used to implement "rectified linear unit" activation functions. Defaults to the maximum possible 32-bit signed integer.
178 */
179 void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, int result_fixedpoint_multiplier, int result_shift,
180 int result_offset_after_shift,
181 int min = std::numeric_limits<int32_t>::lowest(), int max = std::numeric_limits<int32_t>::max());
Manuel Bottini1f332d42019-11-29 17:25:25 +0000182 /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPoint
183 *
184 * @param[in] input Input tensor. It is the output of @ref CLGEMMLowpMatrixMultiplyCore function. Data type supported: S32
185 * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the addition of biases is not required.
186 * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input.
Sheri Zhang0cdbda52020-02-25 15:57:21 +0000187 * @param[in] output Output tensor. Data type supported: QASYMM8_SIGNED
Giorgio Arena1856ff72020-02-07 13:46:45 +0000188 * @param[in] min (Optional) Min value used to saturate down the output result before converting back to QASYMM8_SIGNED. Defaults to the minimum possible 32-bit signed integer.
Manuel Bottini1f332d42019-11-29 17:25:25 +0000189 * @param[in] max (Optional) Max value used to saturate up the output result before converting back to QASYMM8_SIGNED. Defaults to 0
Giorgio Arena1856ff72020-02-07 13:46:45 +0000190 * Along with @p min, this value can be used to implement "rectified linear unit" activation functions. Defaults to the maximum possible 32-bit signed integer.
Manuel Bottini1f332d42019-11-29 17:25:25 +0000191 *
192 * @return a status
193 */
Giorgio Arena1856ff72020-02-07 13:46:45 +0000194 static Status validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, int min = std::numeric_limits<int32_t>::lowest(), int max = std::numeric_limits<int32_t>::max());
Manuel Bottini1f332d42019-11-29 17:25:25 +0000195};
196
Manuel Bottini9c9b70b2019-07-01 17:35:56 +0100197/** Basic function to execute CLGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPoint on OpenCL.
198 *
199 * CLGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPoint depends on 2 parameters:
200 *
201 * result_fixedpoint_multiplier, result_shift
202 *
203 * The final result is:
204 *
205 * (FixedPointMul(input[i][k], result_fixedpoint_multiplier) >> result_shift)
206 *
207 * where FixedPointMul(x, y) is the nearest integer to the following
208 * mathematical expression, evaluated without overflow or intermediate rounding:
209 *
210 * (x * y) / 2^31
211 *
212 * For more information: https://github.com/google/gemmlowp/blob/master/public/output_stages.h#L68
213 *
214 * In case the bias tensor is provided, the final result is:
215 *
216 * ((FixedPointMul(input[i][k] + bias[k], result_fixedpoint_multiplier)) >> result_shift) + result_offset_after_shift
217 *
218 * This function calls the following NEON kernels:
219 *
220 * -# @ref CLGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel
221 *
222 * @note The function accepts also 2 optional input arguments (min and max) which can be used to implement "rectified linear unit" activation functions
223 * after the result is shifted right by result_shift
224*/
225class CLGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPoint : public ICLSimpleFunction
226{
227public:
228 /** Initialise the kernel's inputs, output
229 *
230 * @param[in] input Input tensor. Data type supported: S32
231 * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the biases addition is not required.
232 * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input.
Sheri Zhang0cdbda52020-02-25 15:57:21 +0000233 * @param[out] output Output tensor. Data type supported: QSYMM16
Manuel Bottini9c9b70b2019-07-01 17:35:56 +0100234 * @param[in] result_fixedpoint_multiplier Fixed point value to be multiplied to each element of the input matrix when once the result_offset has been add
235 * @param[in] result_shift Number of bits to shift right the result after the fixed point multiplication
Giorgio Arena1856ff72020-02-07 13:46:45 +0000236 * @param[in] min (Optional) Min value used to saturate down the output result before converting back to QSYMM16. Defaults to the minimum possible 32-bit signed integer.
Manuel Bottini9c9b70b2019-07-01 17:35:56 +0100237 * @param[in] max (Optional) Max value used to saturate up the output result before converting back to QSYMM16.
Giorgio Arena1856ff72020-02-07 13:46:45 +0000238 * Along with @p min, this value can be used to implement "rectified linear unit" activation functions. Defaults to the maximum possible 32-bit signed integer.
Manuel Bottini9c9b70b2019-07-01 17:35:56 +0100239 */
Giorgio Arena1856ff72020-02-07 13:46:45 +0000240 void configure(const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, int result_fixedpoint_multiplier, int result_shift, int min = std::numeric_limits<int32_t>::lowest(),
241 int max = std::numeric_limits<int32_t>::max());
Manuel Bottini2b84be52020-04-08 10:15:51 +0100242 /** Initialise the kernel's inputs, output
243 *
244 * @param[in] compile_context The compile context to be used.
245 * @param[in] input Input tensor. Data type supported: S32
246 * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the biases addition is not required.
247 * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input.
248 * @param[out] output Output tensor. Data type supported: QSYMM16
249 * @param[in] result_fixedpoint_multiplier Fixed point value to be multiplied to each element of the input matrix when once the result_offset has been add
250 * @param[in] result_shift Number of bits to shift right the result after the fixed point multiplication
251 * @param[in] min (Optional) Min value used to saturate down the output result before converting back to QSYMM16. Defaults to the minimum possible 32-bit signed integer.
252 * @param[in] max (Optional) Max value used to saturate up the output result before converting back to QSYMM16.
253 * Along with @p min, this value can be used to implement "rectified linear unit" activation functions. Defaults to the maximum possible 32-bit signed integer.
254 */
255 void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, int result_fixedpoint_multiplier, int result_shift,
256 int min = std::numeric_limits<int32_t>::lowest(), int max = std::numeric_limits<int32_t>::max());
Manuel Bottini9c9b70b2019-07-01 17:35:56 +0100257 /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPoint
258 *
259 * @param[in] input Input tensor info. It is the output of @ref CLGEMMLowpMatrixMultiplyCore function. Data type supported: S32
260 * @param[in] bias Biases tensor info. Only shared biases supported and it can be a nullptr if the addition of biases is not required.
261 * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input.
Sheri Zhang0cdbda52020-02-25 15:57:21 +0000262 * @param[in] output Output tensor info. Data type supported: QSYMM16
Giorgio Arena1856ff72020-02-07 13:46:45 +0000263 * @param[in] min (Optional) Min value used to saturate down the output result before converting back to QSYMM16. Defaults to the minimum possible 32-bit signed integer.
Manuel Bottini9c9b70b2019-07-01 17:35:56 +0100264 * @param[in] max (Optional) Max value used to saturate up the output result before converting back to QSYMM16,
Giorgio Arena1856ff72020-02-07 13:46:45 +0000265 * Along with @p min, this value can be used to implement "rectified linear unit" activation functions. Defaults to the maximum possible 32-bit signed integer.
Manuel Bottini9c9b70b2019-07-01 17:35:56 +0100266 *
267 * @return a status
268 */
Giorgio Arena1856ff72020-02-07 13:46:45 +0000269 static Status validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, int min = std::numeric_limits<int32_t>::lowest(), int max = std::numeric_limits<int32_t>::max());
Manuel Bottini9c9b70b2019-07-01 17:35:56 +0100270};
Sheri Zhang0cdbda52020-02-25 15:57:21 +0000271/** Basic function to execute GEMMLowpQuantizeDown kernels on CL.
272 *
273 * This function calls the following CL kernels:
274 *
Luca Foschiani689c9682020-02-26 14:30:14 +0000275 * -# @ref CLGEMMLowpQuantizeDownInt32ScaleKernel
Sheri Zhang1b14c752020-03-09 14:29:52 +0000276 * -# @ref CLGEMMLowpQuantizeDownInt32ScaleByFloatKernel
Sheri Zhang0cdbda52020-02-25 15:57:21 +0000277 * -# @ref CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel
278 * -# @ref CLGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel
Michele Di Giorgio1c1b3aa2020-04-02 17:35:42 +0100279 * -# @ref CLGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel
Sheri Zhang0cdbda52020-02-25 15:57:21 +0000280*/
281class CLGEMMLowpOutputStage : public ICLSimpleFunction
282{
283public:
284 /** Initialise the kernel's inputs, output
285 *
286 * @param[in] input Input tensor. Data type supported: S32
287 * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the biases addition is not required.
288 * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input.
289 * @param[out] output Output tensor. Data type supported: QASYMM8/QASYMM8_SIGNED
290 * @param[in] info GEMMLowp output stage metadata.
291 */
292 void configure(const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, const GEMMLowpOutputStageInfo &info);
Manuel Bottini2b84be52020-04-08 10:15:51 +0100293 /** Initialise the kernel's inputs, output
294 *
295 * @param[in] compile_context The compile context to be used.
296 * @param[in] input Input tensor. Data type supported: S32
297 * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the biases addition is not required.
298 * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input.
299 * @param[out] output Output tensor. Data type supported: QASYMM8/QASYMM8_SIGNED
300 * @param[in] info GEMMLowp output stage metadata.
301 */
302 void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, const GEMMLowpOutputStageInfo &info);
Sheri Zhang0cdbda52020-02-25 15:57:21 +0000303 /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel
304 *
305 * @param[in] input Input tensor. It is the output of @ref CLGEMMLowpMatrixMultiplyCore function. Data type supported: S32
306 * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the addition of biases is not required.
307 * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input.
308 * @param[in] output Output tensor. Data type supported: QASYMM8/QASYMM8_SIGNED
309 * @param[in] info GEMMLowp output stage metadata.
310 *
311 * @return a status
312 */
313 static Status validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, const GEMMLowpOutputStageInfo &info);
314};
Georgios Pinitas932491f2018-09-21 16:33:15 +0100315} // namespace arm_compute
Sang-Hoon Parka45abfd2020-08-17 13:50:15 +0100316#endif /*ARM_COMPUTE_CLGEMMLOWPOUTPUTSTAGE_H */