blob: 7c1497ec41224c75facad1b471f0fcf538913aa2 [file] [log] [blame]
Jonathan Deakina668f9f2024-01-24 09:15:38 +00001/*
2 * Copyright (c) 2020-2021, 2024 Arm Limited.
3 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24#include "arm_compute/core/Types.h"
25#include "arm_compute/core/utils/quantization/AsymmHelpers.h"
26#include "arm_compute/core/WindowIterator.h"
27#include "arm_compute/runtime/NEON/NEFunctions.h"
28#include "arm_compute/runtime/NEON/NEScheduler.h"
29
30#include "support/ToolchainSupport.h"
31#include "utils/Utils.h"
32
33#include <cstdlib>
34
35using namespace arm_compute;
36using namespace utils;
37
38QuantizationInfo dynamic_qinfo(QuantizationInfo qinfo)
39{
40 return QuantizationInfo(qinfo.scale(), qinfo.offset(), true);
41}
42void set_qinfo_dynamic(Tensor &t)
43{
44 t.info()->set_quantization_info(dynamic_qinfo(t.info()->quantization_info()));
45}
46
47void quantize(Tensor &qt, const Tensor &t, float min, float max)
48{
49 DataType dt = DataType::QASYMM8_SIGNED;
50
51 // Determine the scale
52 const float scale = (max - min) / 256.0f;
53
54 // Determine the zero-point; using affine equation val = (qval-zerop) * scale
55 const float zero_point = -128.0f - min / scale;
56
57 QuantizationInfo qinfo(scale, (int32_t)round(zero_point), true);
58
59 // We now have the quantisation info and can configure the quantised tensor
60 qt.allocator()->init(TensorInfo(t.info()->tensor_shape(), 1, dt, qinfo));
61 qt.allocator()->allocate();
62 NEQuantizationLayer quantization;
63 quantization.configure(&t, &qt);
64 quantization.run();
65}
66
67void invert_qinfo_offset(Tensor &t)
68{
69 QuantizationInfo qinfo = t.info()->quantization_info();
70 t.info()->set_quantization_info(QuantizationInfo(qinfo.scale()[0], -qinfo.offset()[0], qinfo.is_dynamic()));
71}
72
73void print_quantization_info(const Tensor &t, const std::string &name_prefix)
74{
75 QuantizationInfo qinfo = t.info()->quantization_info();
76 std::cout << name_prefix << "_qinfo="
77 << "QuantizationInfo(" << qinfo.scale()[0] << ", " << qinfo.offset()[0] << ")\n";
78}
79
80int main(int argc, char **argv)
81{
82 size_t M = 4;
83 size_t N = 4;
84 size_t K = 4;
85
86 // Parse args
87 if (argc < 3) /* case default matrix sizes */
88 {
89 // Print help
90 std::cout << "Usage: ./build/neon_gemm_qasymm8 M N K\n";
91 std::cout << "Too few or no inputs provided. Using default M=4, N=4, K=4\n\n";
92 }
93 else /* case M N K arguments provided */
94 {
95 M = strtol(argv[1], nullptr, 10);
96 N = strtol(argv[2], nullptr, 10);
97 K = strtol(argv[3], nullptr, 10);
98 }
99
100 /*** Floating point matrix multiplication ***/
101
102 // Initialise input matrices
103 NEGEMM fgemm{};
104
105 Tensor src1;
106 Tensor src2;
107 Tensor dst;
108 src1.allocator()->init(TensorInfo(TensorShape(K, M), 1, DataType::F32));
109 src2.allocator()->init(TensorInfo(TensorShape(N, K), 1, DataType::F32));
110 dst.allocator()->init(TensorInfo(TensorShape(N, M), 1, DataType::F32));
111 fgemm.configure(&src1, &src2, nullptr, &dst, 1, 0);
112
113 // Allocate matrices
114 src1.allocator()->allocate();
115 src2.allocator()->allocate();
116 dst.allocator()->allocate();
117
118 float min1 = 0.0f;
119 float max1 = 1.0f;
120 fill_random_tensor(src1, 0, min1, max1);
121
122 float min2 = -1.0f;
123 float max2 = 2.0f;
124 fill_random_tensor(src2, 1, min2, max2);
125
126 // Run single precision gemm and print result
127 fgemm.run();
128
129#if ARM_COMPUTE_DEBUG_ENABLED
130 std::cout << "# F32 GEMM result:\n";
131 std::cout << "src1=[ \n";
132 src1.print(std::cout);
133 std::cout << "] \n";
134 std::cout << "src2=[ \n";
135 src2.print(std::cout);
136 std::cout << "] \n";
137 std::cout << "dst=[ \n";
138 dst.print(std::cout);
139 std::cout << "] \n";
140#endif // ARM_COMPUTE_DEBUG_ENABLED
141
142 Tensor q_src1;
143 quantize(q_src1, src1, min1, max1);
144 print_quantization_info(q_src1, "src1");
145 q_src1.info()->set_are_values_constant(false);
146
147 // NEGEMMLowpMatrixMultiplyCore adopts the opposite convention for the offset
148 // compared to NEQuantizeLayer
149 invert_qinfo_offset(q_src1);
150
151 Tensor q_src2;
152 quantize(q_src2, src2, min2, max2);
153 print_quantization_info(q_src2, "src2");
154 q_src2.info()->set_are_values_constant(false);
155
156 // NEGEMMLowpMatrixMultiplyCore adopts the opposite convention for the offset
157 // compared to NEQuantizeLayer
158 invert_qinfo_offset(q_src2);
159
160 // q_dst will be Dequantized to F32 so it doesn't need a QuantizationInfo
161 Tensor q_dst;
162 q_dst.allocator()->init(TensorInfo(TensorShape(N, M), 1, DataType::F32));
163
164 // Configure low precision gemm and initialise result tensor (pre-output)
165 NEGEMMLowpMatrixMultiplyCore qgemm;
166 qgemm.configure(&q_src1, &q_src2, nullptr, &q_dst);
167
168 q_dst.allocator()->allocate();
169
170 // Run low precision matrix multiply kernel
171 qgemm.run();
172
173#if ARM_COMPUTE_DEBUG_ENABLED
174 // Print quantized source matrices
175 std::cout << "q_src1=[ \n";
176 q_src1.print(std::cout);
177 std::cout << "] \n";
178 std::cout << "q_src2=[ \n";
179 q_src2.print(std::cout);
180 std::cout << "] \n";
181 std::cout << "# Lowp GEMM output (FP32):\n";
182 std::cout << "q_dst=[ \n";
183 q_dst.print(std::cout);
184 std::cout << "] \n";
185
186 // Expected result
187 std::cout << "# Expected result:\n";
188 std::cout << "dst=[ \n";
189 dst.print(std::cout);
190 std::cout << "] \n";
191#endif // ARM_COMPUTE_DEBUG_ENABLED
192
193 // Rerun to test the ability to modify the Tensor contents and QuantizationInfo (dynamic quantization)
194 min1 = -1.0f;
195 max1 = 1.0f;
196 fill_random_tensor(src1, 2, min1, max1);
197
198#if ARM_COMPUTE_DEBUG_ENABLED
199 std::cout << "# Refilled src1\n";
200 std::cout << "src1=[ \n";
201 src1.print(std::cout);
202 std::cout << "] \n";
203 std::cout << "src2=[ \n";
204 src2.print(std::cout);
205 std::cout << "] \n";
206#endif // ARM_COMPUTE_DEBUG_ENABLED
207
208 fgemm.run();
209
210 quantize(q_src1, src1, min1, max1);
211 set_qinfo_dynamic(q_src1);
212 print_quantization_info(q_src1, "src1");
213
214 // NEGEMMLowpMatrixMultiplyCore adopts the opposite convention for the offset
215 // compared to NEQuantizeLayer
216 invert_qinfo_offset(q_src1);
217
218 qgemm.run();
219
220#if ARM_COMPUTE_DEBUG_ENABLED
221 // Print quantized source matrices
222 std::cout << "q_src1=[ \n";
223 q_src1.print(std::cout);
224 std::cout << "] \n";
225 std::cout << "q_src2=[ \n";
226 q_src2.print(std::cout);
227 std::cout << "] \n";
228 std::cout << "# Lowp GEMM output (FP32):\n";
229 std::cout << "q_dst=[ \n";
230 q_dst.print(std::cout);
231 std::cout << "] \n";
232
233 // Expected result
234 std::cout << "# Expected result:\n";
235 std::cout << "dst=[ \n";
236 dst.print(std::cout);
237 std::cout << "] \n";
238#endif // ARM_COMPUTE_DEBUG_ENABLED
239}