blob: ea9ba776a941ced92cccfe4a260c34a6ad411562 [file] [log] [blame]
Chunosovd621bca2017-11-03 17:33:15 +07001/*
Gian Marco Iodice3139f032018-11-05 14:26:32 +00002 * Copyright (c) 2017-2018 ARM Limited.
Chunosovd621bca2017-11-03 17:33:15 +07003 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24#include "arm_compute/core/utils/quantization/AsymmHelpers.h"
25
26#include <cmath>
27#include <limits>
28#include <numeric>
29
30using namespace arm_compute::quantization;
31
Chunosovf450caa2017-11-08 16:09:35 +070032constexpr int64_t fixed_point_one_Q0 = (1ll << 31);
Gian Marco Iodice3139f032018-11-05 14:26:32 +000033constexpr float epsilon = 0.00001f;
Chunosovf450caa2017-11-08 16:09:35 +070034
Gian Marco Iodice3139f032018-11-05 14:26:32 +000035arm_compute::Status arm_compute::quantization::calculate_quantized_multiplier_less_than_one(float multiplier,
Georgios Pinitas631c41a2017-12-06 11:53:03 +000036 int *quant_multiplier,
37 int *right_shift)
Chunosovd621bca2017-11-03 17:33:15 +070038{
39 ARM_COMPUTE_RETURN_ERROR_ON(quant_multiplier == nullptr);
40 ARM_COMPUTE_RETURN_ERROR_ON(right_shift == nullptr);
Gian Marco Iodice3139f032018-11-05 14:26:32 +000041 ARM_COMPUTE_RETURN_ERROR_ON(multiplier < -epsilon);
42 ARM_COMPUTE_RETURN_ERROR_ON(multiplier > 1.0f + epsilon);
43 if(std::fabs(1.0f - multiplier) < epsilon)
44 {
45 *quant_multiplier = 1;
46 *right_shift = 0;
47 return arm_compute::Status{};
48 }
49
50 if(std::fabs(0.0f - multiplier) < epsilon)
Chunosovd621bca2017-11-03 17:33:15 +070051 {
52 *quant_multiplier = 0;
53 *right_shift = 0;
Georgios Pinitas631c41a2017-12-06 11:53:03 +000054 return arm_compute::Status{};
Chunosovd621bca2017-11-03 17:33:15 +070055 }
Gian Marco Iodice3139f032018-11-05 14:26:32 +000056
Chunosovd621bca2017-11-03 17:33:15 +070057 const double q = std::frexp(multiplier, right_shift);
58 *right_shift *= -1;
Chunosovf450caa2017-11-08 16:09:35 +070059 auto q_fixed = static_cast<int64_t>(round(q * fixed_point_one_Q0));
60 ARM_COMPUTE_RETURN_ERROR_ON(q_fixed > fixed_point_one_Q0);
61 if(q_fixed == fixed_point_one_Q0)
Chunosovd621bca2017-11-03 17:33:15 +070062 {
63 q_fixed /= 2;
64 --*right_shift;
65 }
66 ARM_COMPUTE_RETURN_ERROR_ON(*right_shift < 0);
67 ARM_COMPUTE_RETURN_ERROR_ON(q_fixed > std::numeric_limits<int32_t>::max());
Chunosovf450caa2017-11-08 16:09:35 +070068 *quant_multiplier = static_cast<int32_t>(q_fixed);
Chunosovd621bca2017-11-03 17:33:15 +070069
Georgios Pinitas631c41a2017-12-06 11:53:03 +000070 return arm_compute::Status{};
Chunosovf450caa2017-11-08 16:09:35 +070071}
72
Gian Marco Iodice3139f032018-11-05 14:26:32 +000073arm_compute::Status arm_compute::quantization::calculate_quantized_multiplier_greater_than_one(float multiplier,
Georgios Pinitas631c41a2017-12-06 11:53:03 +000074 int *quantized_multiplier,
75 int *left_shift)
Chunosovf450caa2017-11-08 16:09:35 +070076{
77 ARM_COMPUTE_RETURN_ERROR_ON(quantized_multiplier == nullptr);
78 ARM_COMPUTE_RETURN_ERROR_ON(left_shift == nullptr);
79 ARM_COMPUTE_RETURN_ERROR_ON(multiplier < 1.f);
80 const double q = std::frexp(multiplier, left_shift);
81 auto q_fixed = static_cast<int64_t>(round(q * fixed_point_one_Q0));
82 ARM_COMPUTE_RETURN_ERROR_ON(q_fixed > fixed_point_one_Q0);
83 if(q_fixed == fixed_point_one_Q0)
84 {
85 q_fixed /= 2;
86 ++*left_shift;
87 }
88 ARM_COMPUTE_RETURN_ERROR_ON(*left_shift < 0);
89 ARM_COMPUTE_RETURN_ERROR_ON(q_fixed > std::numeric_limits<int32_t>::max());
90 *quantized_multiplier = static_cast<int32_t>(q_fixed);
91
Georgios Pinitas631c41a2017-12-06 11:53:03 +000092 return arm_compute::Status{};
Chunosovf450caa2017-11-08 16:09:35 +070093}