blob: aecba3712e88e70c78f8e9a5b0915edb41d0bbf2 [file] [log] [blame]
Georgios Pinitas4c5469b2019-05-21 13:32:43 +01001/*
Jonathan Deakina668f9f2024-01-24 09:15:38 +00002 * Copyright (c) 2019-2024 Arm Limited.
Georgios Pinitas4c5469b2019-05-21 13:32:43 +01003 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
Jonathan Deakina668f9f2024-01-24 09:15:38 +000024#ifndef ACL_ARM_COMPUTE_CORE_QUANTIZATIONINFO_H
25#define ACL_ARM_COMPUTE_CORE_QUANTIZATIONINFO_H
Georgios Pinitas4c5469b2019-05-21 13:32:43 +010026
Giorgio Arena433ea492021-05-26 15:32:50 +010027#include "arm_compute/core/Rounding.h"
Matthew Bentham7d9a78e2023-05-31 13:18:33 +000028#include "arm_compute/core/utils/misc/Utility.h"
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +010029
Giorgio Arena433ea492021-05-26 15:32:50 +010030#include "support/ToolchainSupport.h"
Georgios Pinitas4c5469b2019-05-21 13:32:43 +010031
Georgios Pinitas4c5469b2019-05-21 13:32:43 +010032#include <vector>
33
34namespace arm_compute
35{
Michalis Spyrou8d4d1b82019-11-28 11:31:23 +000036using qasymm8_signed_t = int8_t; /**< 8 bit signed quantized asymmetric scalar value */
37using qasymm8_t = uint8_t; /**< 8 bit quantized asymmetric scalar value */
38using qsymm16_t = int16_t; /**< 16 bit quantized symmetric scalar value */
39using qasymm16_t = uint16_t; /**< 16 bit quantized asymmetric scalar value */
Georgios Pinitas4c5469b2019-05-21 13:32:43 +010040
41/** Quantization info when assuming per layer quantization */
42struct UniformQuantizationInfo
43{
44 /** Default constructor */
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +010045 UniformQuantizationInfo() : scale(0.f), offset(0)
Georgios Pinitas4c5469b2019-05-21 13:32:43 +010046 {
47 }
48 /** Constructor
49 *
50 * @param[in] scale Quantization scale
51 * @param[in] offset Quantization offset
52 */
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +010053 UniformQuantizationInfo(float scale, int32_t offset) : scale(scale), offset(offset)
Georgios Pinitas4c5469b2019-05-21 13:32:43 +010054 {
55 }
56 /** Checks if the scale and offset are both zero */
57 bool empty() const
58 {
59 return (scale == 0) && (offset == 0);
60 }
61
62 float scale;
63 int32_t offset;
64};
65
66/** Quantization information */
Georgios Pinitas3d13af82019-06-04 13:04:16 +010067class QuantizationInfo
Georgios Pinitas4c5469b2019-05-21 13:32:43 +010068{
Georgios Pinitas3d13af82019-06-04 13:04:16 +010069public:
Georgios Pinitas4c5469b2019-05-21 13:32:43 +010070 /** Default constructor */
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +010071 QuantizationInfo() noexcept : _scale(), _offset()
Georgios Pinitas4c5469b2019-05-21 13:32:43 +010072 {
73 }
74 /** Construct quantization info.
75 *
76 * @note Used for symmetric quantization
77 *
78 * @param[in] scale Scale.
79 */
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +010080 QuantizationInfo(float scale) : _scale(1, scale), _offset()
Georgios Pinitas4c5469b2019-05-21 13:32:43 +010081 {
82 }
83 /** Construct quantization info.
84 *
85 * @note Used for asymmetric quantization
86 *
Jonathan Deakina668f9f2024-01-24 09:15:38 +000087 * @param[in] scale Scale.
88 * @param[in] offset Offset.
89 * @param[in] is_dynamic Whether this QuantizationInfo is dynamic, i.e. the scale and offset may change.
Georgios Pinitas4c5469b2019-05-21 13:32:43 +010090 */
Jonathan Deakina668f9f2024-01-24 09:15:38 +000091 QuantizationInfo(float scale, int offset, bool is_dynamic = false)
92 : _scale(1, scale), _offset(1, offset), _is_dynamic(is_dynamic)
Georgios Pinitas4c5469b2019-05-21 13:32:43 +010093 {
94 }
95 /** Construct quantization info.
96 *
97 * @note Used for symmetric per channel quantization
98 *
99 * @param[in] scale Scale.
100 */
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100101 QuantizationInfo(std::vector<float> scale) : _scale(scale), _offset()
Georgios Pinitas4c5469b2019-05-21 13:32:43 +0100102 {
103 }
Michalis Spyrou29a01c92019-08-22 11:44:04 +0100104 /** Construct quantization info.
105 *
106 * @note Used for asymmetric per channel quantization
107 *
Jonathan Deakina668f9f2024-01-24 09:15:38 +0000108 * @param[in] scale Scale.
109 * @param[in] offset Offset.
110 * @param[in] is_dynamic Whether this QuantizationInfo is dynamic, i.e. the scale and offset may change.
Michalis Spyrou29a01c92019-08-22 11:44:04 +0100111 */
Jonathan Deakina668f9f2024-01-24 09:15:38 +0000112 QuantizationInfo(std::vector<float> scale, std::vector<int32_t> offset, bool is_dynamic = false)
113 : _scale(scale), _offset(offset), _is_dynamic(is_dynamic)
Michalis Spyrou29a01c92019-08-22 11:44:04 +0100114 {
115 }
Georgios Pinitas3d13af82019-06-04 13:04:16 +0100116 /** Scale vector accessor
117 *
118 * @return A reference to quantization scale metadata
119 */
120 const std::vector<float> &scale() const
121 {
122 return _scale;
123 }
124 /** Offset vector accessor
125 *
126 * @return A reference to quantization offset metadata
127 */
128 const std::vector<int32_t> &offset() const
129 {
130 return _offset;
131 }
Jonathan Deakina668f9f2024-01-24 09:15:38 +0000132 /** is_dynamic accessor
133 *
134 * @return If true, the scale and offset may change, so operators will need to read on every run
135 */
136 bool is_dynamic() const
137 {
138 return _is_dynamic;
139 }
Georgios Pinitas4c5469b2019-05-21 13:32:43 +0100140 /** Indicates whether this QuantizationInfo has valid settings or not
141 *
142 * @return True if the this has invalid settings.
143 */
144 bool empty() const
145 {
Georgios Pinitas3d13af82019-06-04 13:04:16 +0100146 return _scale.empty() && _offset.empty();
Georgios Pinitas4c5469b2019-05-21 13:32:43 +0100147 }
148 /** Return per layer quantization info
149 *
150 * @return Uniform quantization information in case of empty information zero is returned in the respective fields
151 */
152 UniformQuantizationInfo uniform() const
153 {
154 UniformQuantizationInfo uqinfo;
Georgios Pinitas3d13af82019-06-04 13:04:16 +0100155 uqinfo.scale = _scale.empty() ? 0 : _scale[0];
156 uqinfo.offset = _offset.empty() ? 0 : _offset[0];
Georgios Pinitas4c5469b2019-05-21 13:32:43 +0100157
158 return uqinfo;
159 }
160
Georgios Pinitas3d13af82019-06-04 13:04:16 +0100161private:
162 std::vector<float> _scale; /**< Vector containing scaling factors */
163 std::vector<int32_t> _offset; /**< Vector containing zero offsets */
Jonathan Deakina668f9f2024-01-24 09:15:38 +0000164 bool _is_dynamic =
165 false; /**< If true, the scale and offset may change, so operators will need to read on every run */
Georgios Pinitas4c5469b2019-05-21 13:32:43 +0100166};
167
168/** Check whether two quantization info are equal.
169 *
170 * @param[in] lhs RHS quantization info.
171 * @param[in] rhs LHS quantization info.
172 *
173 * @return True if the given quantization info is the same.
174 */
175inline bool operator==(const QuantizationInfo &lhs, const QuantizationInfo &rhs)
176{
Georgios Pinitas3d13af82019-06-04 13:04:16 +0100177 return (lhs.scale() == rhs.scale()) && (lhs.offset() == rhs.offset());
Georgios Pinitas4c5469b2019-05-21 13:32:43 +0100178}
179
180/** Check whether two quantization info are not equal.
181 *
182 * @param[in] lhs RHS quantization info.
183 * @param[in] rhs LHS quantization info.
184 *
185 * @return True if the given quantization info is the same.
186 */
187inline bool operator!=(const QuantizationInfo &lhs, const QuantizationInfo &rhs)
188{
189 return !(operator==(lhs, rhs));
190}
191
192/** Check whether two quantization info are equal.
193 *
194 * @param[in] lhs RHS quantization info.
195 * @param[in] rhs LHS quantization info.
196 *
197 * @return True if the given quantization info is the same.
198 */
199inline bool operator==(const UniformQuantizationInfo &lhs, const UniformQuantizationInfo &rhs)
200{
201 return (lhs.scale == rhs.scale) && (lhs.offset == rhs.offset);
202}
203
204/** Check whether two quantization info are not equal.
205 *
206 * @param[in] lhs RHS quantization info.
207 * @param[in] rhs LHS quantization info.
208 *
209 * @return True if the given quantization info is the same.
210 */
211inline bool operator!=(const UniformQuantizationInfo &lhs, const UniformQuantizationInfo &rhs)
212{
213 return !(operator==(lhs, rhs));
214}
Sang-Hoon Parkae6ef7c2019-11-13 16:51:45 +0000215template <typename QUANTIZED_TYPE = uint8_t>
216struct Qasymm8QuantizationHelper
217{
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100218 static_assert(std::is_same<QUANTIZED_TYPE, uint8_t>::value || std::is_same<QUANTIZED_TYPE, int8_t>::value,
Sang-Hoon Parkae6ef7c2019-11-13 16:51:45 +0000219 "quantized type should be either uint8_t or int8_t.");
Georgios Pinitas4c5469b2019-05-21 13:32:43 +0100220
Sang-Hoon Parkae6ef7c2019-11-13 16:51:45 +0000221 /** Quantize a value given a 8-bit asymmetric quantization scheme
222 *
Giorgio Arena433ea492021-05-26 15:32:50 +0100223 * @param[in] value Value to quantize
224 * @param[in] qinfo Quantization information to use for quantizing
Sang-Hoon Parkae6ef7c2019-11-13 16:51:45 +0000225 *
226 * @return Quantized value
227 */
Giorgio Arena433ea492021-05-26 15:32:50 +0100228 static inline QUANTIZED_TYPE quantize(float value, const UniformQuantizationInfo &qinfo)
Sang-Hoon Parkae6ef7c2019-11-13 16:51:45 +0000229 {
230 ARM_COMPUTE_ERROR_ON(qinfo.scale == 0);
Giorgio Arena433ea492021-05-26 15:32:50 +0100231 const int quantized = support::cpp11::lround(value / qinfo.scale) + qinfo.offset;
232 return static_cast<QUANTIZED_TYPE>(arm_compute::utility::clamp<decltype(quantized), QUANTIZED_TYPE>(quantized));
233 }
234
235 /** Quantize a value given a 8-bit asymmetric quantization scheme using a specific rounding policy
236 *
237 * @param[in] value Value to quantize
238 * @param[in] qinfo Quantization information to use for quantizing
239 * @param[in] rounding_policy Rounding policy to use
240 *
241 * @return Quantized value
242 */
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100243 static inline QUANTIZED_TYPE
244 quantize(float value, const UniformQuantizationInfo &qinfo, RoundingPolicy rounding_policy)
Giorgio Arena433ea492021-05-26 15:32:50 +0100245 {
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100246 if (rounding_policy == RoundingPolicy::TO_NEAREST_UP)
Giorgio Arena433ea492021-05-26 15:32:50 +0100247 {
248 return quantize(value, qinfo);
249 }
250
251 ARM_COMPUTE_ERROR_ON(qinfo.scale == 0);
Sang-Hoon Parkae6ef7c2019-11-13 16:51:45 +0000252 const int quantized = arm_compute::round(value / qinfo.scale, rounding_policy) + qinfo.offset;
253 return static_cast<QUANTIZED_TYPE>(arm_compute::utility::clamp<decltype(quantized), QUANTIZED_TYPE>(quantized));
254 }
255
256 /** Quantize a value given a 8-bit asymmetric quantization scheme
257 *
258 * @param[in] value Value to quantize
259 * @param[in] qinfo Quantization information to use for quantizing
260 * @param[in] rounding_policy (Optional) Rounding policy to use. Default: nearest up
261 *
262 * @return Quantized value
263 */
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100264 static inline QUANTIZED_TYPE
265 quantize(float value, const QuantizationInfo &qinfo, RoundingPolicy rounding_policy = RoundingPolicy::TO_NEAREST_UP)
Sang-Hoon Parkae6ef7c2019-11-13 16:51:45 +0000266 {
267 const UniformQuantizationInfo uqinfo = qinfo.uniform();
268 ARM_COMPUTE_ERROR_ON(uqinfo.scale == 0);
269 const int quantized = arm_compute::round(value / uqinfo.scale, rounding_policy) + uqinfo.offset;
270 return static_cast<QUANTIZED_TYPE>(arm_compute::utility::clamp<decltype(quantized), QUANTIZED_TYPE>(quantized));
271 }
272
273 /** Dequantize a value given a 8-bit asymmetric quantization scheme
274 *
275 * @param[in] value Value to dequantize
276 * @param[in] qinfo Quantization information to use for dequantizing
277 *
278 * @return Dequantized value
279 */
280 static inline float dequantize(QUANTIZED_TYPE value, const UniformQuantizationInfo &qinfo)
281 {
282 return (static_cast<int>(value) - qinfo.offset) * qinfo.scale;
283 }
284
285 /** Dequantize a value given a 8-bit asymmetric quantization scheme
286 *
287 * @param[in] value Value to dequantize
288 * @param[in] qinfo Quantization information to use for dequantizing
289 *
290 * @return Dequantized value
291 */
292 static inline float dequantize(QUANTIZED_TYPE value, const QuantizationInfo &qinfo)
293 {
294 const UniformQuantizationInfo uqinfo = qinfo.uniform();
295 return (static_cast<int>(value) - uqinfo.offset) * uqinfo.scale;
296 }
297};
298
299/** Quantize a value given an unsigned 8-bit asymmetric quantization scheme
Georgios Pinitas4c5469b2019-05-21 13:32:43 +0100300 *
301 * @param[in] value Value to quantize
302 * @param[in] qinfo Quantization information to use for quantizing
303 * @param[in] rounding_policy (Optional) Rounding policy to use. Default: nearest up
304 *
305 * @return Quantized value
306 */
Sang-Hoon Parkae6ef7c2019-11-13 16:51:45 +0000307template <typename INFO_TYPE>
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100308inline uint8_t
309quantize_qasymm8(float value, const INFO_TYPE &qinfo, RoundingPolicy rounding_policy = RoundingPolicy::TO_NEAREST_UP)
Georgios Pinitas4c5469b2019-05-21 13:32:43 +0100310{
Sang-Hoon Parkae6ef7c2019-11-13 16:51:45 +0000311 return Qasymm8QuantizationHelper<uint8_t>::quantize(value, qinfo, rounding_policy);
Georgios Pinitas4c5469b2019-05-21 13:32:43 +0100312}
313
Sang-Hoon Parkae6ef7c2019-11-13 16:51:45 +0000314/** Quantize a value given a signed 8-bit asymmetric quantization scheme
Georgios Pinitas4c5469b2019-05-21 13:32:43 +0100315 *
316 * @param[in] value Value to quantize
317 * @param[in] qinfo Quantization information to use for quantizing
318 * @param[in] rounding_policy (Optional) Rounding policy to use. Default: nearest up
319 *
320 * @return Quantized value
321 */
Sang-Hoon Parkae6ef7c2019-11-13 16:51:45 +0000322template <typename INFO_TYPE>
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100323inline int8_t quantize_qasymm8_signed(float value,
324 const INFO_TYPE &qinfo,
325 RoundingPolicy rounding_policy = RoundingPolicy::TO_NEAREST_UP)
Georgios Pinitas4c5469b2019-05-21 13:32:43 +0100326{
Sang-Hoon Parkae6ef7c2019-11-13 16:51:45 +0000327 return Qasymm8QuantizationHelper<int8_t>::quantize(value, qinfo, rounding_policy);
Georgios Pinitas4c5469b2019-05-21 13:32:43 +0100328}
329
Michele Di Giorgio35ea9a72019-08-23 12:02:06 +0100330/** Quantize a value given a 8-bit symmetric quantization scheme
Georgios Pinitas4c5469b2019-05-21 13:32:43 +0100331 *
332 * @param[in] value Value to quantize
333 * @param[in] qinfo Quantization information to use for quantizing
334 *
335 * @return Quantized value
336 */
337inline int8_t quantize_qsymm8(float value, const QuantizationInfo &qinfo)
338{
339 int quantized = arm_compute::round(value / qinfo.uniform().scale, RoundingPolicy::TO_NEAREST_UP);
340 quantized = std::max(-128, std::min(quantized, 127));
341 return quantized;
342}
343
Georgios Pinitasdbdea0d2019-10-16 19:21:40 +0100344/** Quantize a value given a 8-bit symmetric per channel quantization scheme
345 *
346 * @param[in] value Value to quantize
347 * @param[in] qinfo Quantization information to use for quantizing
348 * @param[in] channel_id channel index into the scale vector of quantization info
349 *
350 * @return Quantized value
351 */
352inline int8_t quantize_qsymm8_per_channel(float value, const QuantizationInfo &qinfo, size_t channel_id = 0)
353{
354 int quantized = arm_compute::round(value / qinfo.scale()[channel_id], RoundingPolicy::TO_NEAREST_UP);
355 quantized = std::max(-128, std::min(quantized, 127));
356 return quantized;
357}
358
Sang-Hoon Parkae6ef7c2019-11-13 16:51:45 +0000359/** Dequantize a value given an unsigned 8-bit asymmetric quantization scheme
Georgios Pinitas4c5469b2019-05-21 13:32:43 +0100360 *
361 * @param[in] value Value to dequantize
362 * @param[in] qinfo Quantization information to use for dequantizing
363 *
364 * @return Dequantized value
365 */
Sang-Hoon Parkae6ef7c2019-11-13 16:51:45 +0000366template <typename INFO_TYPE>
367inline float dequantize_qasymm8(uint8_t value, const INFO_TYPE &qinfo)
Georgios Pinitas4c5469b2019-05-21 13:32:43 +0100368{
Sang-Hoon Parkae6ef7c2019-11-13 16:51:45 +0000369 return Qasymm8QuantizationHelper<uint8_t>::dequantize(value, qinfo);
Georgios Pinitas4c5469b2019-05-21 13:32:43 +0100370}
371
Sang-Hoon Parkae6ef7c2019-11-13 16:51:45 +0000372/** Dequantize a value given a signed 8-bit asymmetric quantization scheme
Georgios Pinitas4c5469b2019-05-21 13:32:43 +0100373 *
374 * @param[in] value Value to dequantize
375 * @param[in] qinfo Quantization information to use for dequantizing
376 *
377 * @return Dequantized value
378 */
Sang-Hoon Parkae6ef7c2019-11-13 16:51:45 +0000379template <typename INFO_TYPE>
380inline float dequantize_qasymm8_signed(int8_t value, const INFO_TYPE &qinfo)
Georgios Pinitas4c5469b2019-05-21 13:32:43 +0100381{
Sang-Hoon Parkae6ef7c2019-11-13 16:51:45 +0000382 return Qasymm8QuantizationHelper<int8_t>::dequantize(value, qinfo);
Georgios Pinitas4c5469b2019-05-21 13:32:43 +0100383}
384
Michele Di Giorgio35ea9a72019-08-23 12:02:06 +0100385/** Dequantize a value given an 8-bit asymmetric quantization scheme
Georgios Pinitas3d13af82019-06-04 13:04:16 +0100386 *
387 * @param[in] value Value to dequantize
388 * @param[in] scale Scale to use for dequantization
389 * @param[in] offset Zero-offset to use for dequantization
390 *
391 * @return Dequantized value
392 */
393inline float dequantize(uint8_t value, float scale, int32_t offset)
394{
395 return (static_cast<int>(value) - offset) * scale;
396}
397
Michele Di Giorgio35ea9a72019-08-23 12:02:06 +0100398/** Dequantize a value given a 8-bit symmetric quantization scheme
Georgios Pinitas4c5469b2019-05-21 13:32:43 +0100399 *
400 * @param[in] value Value to dequantize
401 * @param[in] qinfo Quantization information to use for dequantizing
402 *
403 * @return Dequantized value
404 */
Georgios Pinitas3d13af82019-06-04 13:04:16 +0100405inline float dequantize_qsymm8(int8_t value, const UniformQuantizationInfo &qinfo)
Georgios Pinitas4c5469b2019-05-21 13:32:43 +0100406{
Georgios Pinitas3d13af82019-06-04 13:04:16 +0100407 return value * qinfo.scale;
408}
409
Michele Di Giorgio35ea9a72019-08-23 12:02:06 +0100410/** Dequantize a value given a 8-bit symmetric quantization scheme
Georgios Pinitas3d13af82019-06-04 13:04:16 +0100411 *
412 * @param[in] value Value to dequantize
413 * @param[in] scale Scale to use for dequantization
414 *
415 * @return Dequantized value
416 */
417inline float dequantize(int8_t value, float scale)
418{
419 return value * scale;
Georgios Pinitas4c5469b2019-05-21 13:32:43 +0100420}
Manuel Bottini3689fcd2019-06-14 17:18:12 +0100421
Michele Di Giorgio35ea9a72019-08-23 12:02:06 +0100422/** Dequantize a value given a 16-bit symmetric quantization scheme
Manuel Bottini10c53f12019-07-17 16:11:53 +0100423 *
424 * @param[in] value Value to dequantize
425 * @param[in] scale Scale to use for dequantization
426 *
427 * @return Dequantized value
428 */
429inline float dequantize(int16_t value, float scale)
430{
431 return value * scale;
432}
433
Michele Di Giorgio35ea9a72019-08-23 12:02:06 +0100434/** Dequantize a value given a 16-bit asymmetric quantization scheme
435 *
436 * @param[in] value Value to dequantize
437 * @param[in] scale Scale to use for dequantization
438 * @param[in] offset Zero-offset to use for dequantization
439 *
440 * @return Dequantized value
441 */
442inline float dequantize(uint16_t value, float scale, int32_t offset)
443{
444 return (static_cast<int>(value) - offset) * scale;
445}
446
Jonathan Deakina668f9f2024-01-24 09:15:38 +0000447/** Dequantize a value given a 32-bit asymmetric quantization scheme
448 *
449 * @param[in] value Value to dequantize
450 * @param[in] scale Scale to use for dequantization
451 * @param[in] offset Zero-offset to use for dequantization
452 *
453 * @return Dequantized value
454 */
455inline float dequantize(int32_t value, float scale, int32_t offset)
456{
457 return (static_cast<int>(value) - offset) * scale;
458}
459
Manuel Bottini3689fcd2019-06-14 17:18:12 +0100460/** Quantize a value given a 16-bit symmetric quantization scheme
461 *
462 * @param[in] value Value to quantize
463 * @param[in] qinfo Quantization information to use for quantizing
464 * @param[in] rounding_policy (Optional) Rounding policy to use. Default: nearest up
465 *
466 * @return Quantized value
467 */
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100468inline int16_t quantize_qsymm16(float value,
469 const UniformQuantizationInfo &qinfo,
470 RoundingPolicy rounding_policy = RoundingPolicy::TO_NEAREST_UP)
Manuel Bottini3689fcd2019-06-14 17:18:12 +0100471{
472 int quantized = arm_compute::round(value / qinfo.scale, rounding_policy);
473 quantized = arm_compute::utility::clamp<int, int16_t>(quantized);
474 return quantized;
475}
476
477/** Dequantize a value given a 16-bit symmetric quantization scheme
478 *
479 * @param[in] value Value to dequantize
480 * @param[in] qinfo Quantization information to use for dequantizing
481 *
482 * @return Dequantized value
483 */
484inline float dequantize_qsymm16(int16_t value, const UniformQuantizationInfo &qinfo)
485{
486 return value * qinfo.scale;
487}
488
489/** Quantize a value given a 16-bit symmetric quantization scheme
490 *
491 * @param[in] value Value to quantize
492 * @param[in] qinfo Quantization information to use for quantizing
493 *
494 * @return Quantized value
495 */
496inline int16_t quantize_qsymm16(float value, const QuantizationInfo &qinfo)
497{
498 return quantize_qsymm16(value, qinfo.uniform());
499}
500
501/** Dequantize a value given a 16-bit symmetric quantization scheme
502 *
503 * @param[in] value Value to dequantize
504 * @param[in] qinfo Quantization information to use for dequantizing
505 *
506 * @return Dequantized value
507 */
508inline float dequantize_qsymm16(int16_t value, const QuantizationInfo &qinfo)
509{
510 return dequantize_qsymm16(value, qinfo.uniform());
511}
Michele Di Giorgio35ea9a72019-08-23 12:02:06 +0100512
513/** Quantize a value given a 16-bit asymmetric quantization scheme
514 *
515 * @param[in] value Value to quantize
516 * @param[in] qinfo Quantization information to use for quantizing
517 * @param[in] rounding_policy (Optional) Rounding policy to use. Default: nearest up
518 *
519 * @return Quantized value
520 */
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100521inline uint16_t quantize_qasymm16(float value,
522 const UniformQuantizationInfo &qinfo,
523 RoundingPolicy rounding_policy = RoundingPolicy::TO_NEAREST_UP)
Michele Di Giorgio35ea9a72019-08-23 12:02:06 +0100524{
525 int quantized = arm_compute::round(value / qinfo.scale, rounding_policy) + qinfo.offset;
526 quantized = arm_compute::utility::clamp<int, uint16_t>(quantized);
527 return quantized;
528}
529
530/** Dequantize a value given a 16-bit asymmetric quantization scheme
531 *
532 * @param[in] value Value to dequantize
533 * @param[in] qinfo Quantization information to use for dequantizing
534 *
535 * @return Dequantized value
536 */
537inline float dequantize_qasymm16(uint16_t value, const UniformQuantizationInfo &qinfo)
538{
539 return (static_cast<int>(value) - qinfo.offset) * qinfo.scale;
540}
541
542/** Quantize a value given a 16-bit asymmetric quantization scheme
543 *
544 * @param[in] value Value to quantize
545 * @param[in] qinfo Quantization information to use for quantizing
546 *
547 * @return Quantized value
548 */
549inline uint16_t quantize_qasymm16(float value, const QuantizationInfo &qinfo)
550{
551 return quantize_qasymm16(value, qinfo.uniform());
552}
553
554/** Dequantize a value given a 16-bit asymmetric quantization scheme
555 *
556 * @param[in] value Value to dequantize
557 * @param[in] qinfo Quantization information to use for dequantizing
558 *
559 * @return Dequantized value
560 */
561inline float dequantize_qasymm16(uint16_t value, const QuantizationInfo &qinfo)
562{
563 return dequantize_qasymm16(value, qinfo.uniform());
564}
Manuel Bottini4370cff2020-02-07 16:31:59 +0000565
Jonathan Deakina668f9f2024-01-24 09:15:38 +0000566/** Dequantize a value given a 32-bit asymmetric quantization scheme
567 *
568 * @param[in] value Value to dequantize
569 * @param[in] qinfo Quantization information to use for dequantizing
570 *
571 * @return Dequantized value
572 */
573inline float dequantize_s32(int32_t value, const UniformQuantizationInfo &qinfo)
574{
575 return (static_cast<int>(value) - qinfo.offset) * qinfo.scale;
576}
577
578/** Dequantize a value given a 32-bit asymmetric quantization scheme
579 *
580 * @param[in] value Value to dequantize
581 * @param[in] qinfo Quantization information to use for dequantizing
582 *
583 * @return Dequantized value
584 */
585
586inline float dequantize_s32(int32_t value, const QuantizationInfo &qinfo)
587{
588 return dequantize_s32(value, qinfo.uniform());
589}
590
Manuel Bottini4370cff2020-02-07 16:31:59 +0000591/*
592 * In case of requantization of a quantized input tensor to an output tensor with another quantization
593 * instead of applying dequantization and then a quantization functions, we just compute new scale and
594 * offset.
595 *
596 * Assuming:
597 * - q_i as input quantized value
598 * - q_o as output quantized value
599 * - z_i as input quantization offset value
600 * - z_o as output quantization offset value
601 * - s_i as input quantization scale value
602 * - s_o as output quantization scale value
603 * - z_n as new quantization offset value
604 * - s_n as new quantization scale value
605 *
606 * q_o = ( q_i - z_i ) * s_i / s_o + z_o
607 *
608 * We can rewrite the formula as:
609 *
610 * q_o = ( q_i * s_i / s_o ) - z_i * s_i / s_o + z_o
611 *
612 * q_o = q_i / s_n + z_n
613 *
614 * Where:
615 *
616 * s_n = s_o / s_i
617 *
618 * z_n = - z_i * s_i / s_o + z_o
619 *
620 */
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100621inline UniformQuantizationInfo compute_requantization_scale_offset(const UniformQuantizationInfo &uqinfo_in,
622 const UniformQuantizationInfo &uqinfo_out)
Manuel Bottini4370cff2020-02-07 16:31:59 +0000623{
624 float scale_to_apply = uqinfo_out.scale;
625 int32_t offset_to_apply = uqinfo_out.offset;
626
627 scale_to_apply /= uqinfo_in.scale;
628 // In order to minimize flooring we convert the offset to a float,
629 // then compute the new offset in the float domain,
630 // finally we convert it back as int32_t
631 offset_to_apply -= static_cast<int32_t>(static_cast<float>(uqinfo_in.offset) * uqinfo_in.scale / uqinfo_out.scale);
632 return UniformQuantizationInfo(scale_to_apply, offset_to_apply);
633}
634
Georgios Pinitas4c5469b2019-05-21 13:32:43 +0100635} // namespace arm_compute
Jonathan Deakina668f9f2024-01-24 09:15:38 +0000636#endif // ACL_ARM_COMPUTE_CORE_QUANTIZATIONINFO_H