blob: a28ddadc682c74722d738440d657b317d49eb3fb [file] [log] [blame]
Michele Di Giorgio5b6904b2018-01-29 12:24:14 +00001/*
Viet-Hoa Do03b29712022-06-01 11:47:14 +01002 * Copyright (c) 2017-2022 Arm Limited.
Michele Di Giorgio5b6904b2018-01-29 12:24:14 +00003 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
Pablo Telloeb82fd22018-02-23 13:43:50 +000024
Michele Di Giorgio5b6904b2018-01-29 12:24:14 +000025#pragma once
26
Francesco Petrogalli553f6952022-06-30 10:22:01 +000027#include "src/cpu/kernels/assembly/arm_gemm.hpp"
Georgios Pinitasc0b6f762020-11-02 01:37:17 +000028
Georgios Pinitas7cd26d42019-01-09 18:35:17 +000029#include <cstddef>
Freddie Liardet6e90c122021-08-02 13:35:11 +010030#include <limits>
Freddie Liardetd216f572021-08-03 15:57:32 +010031#include <tuple>
Georgios Pinitas421405b2018-10-26 19:05:32 +010032
Pablo Telloeb82fd22018-02-23 13:43:50 +000033// Macro for unreachable code (e.g. impossible default cases on switch)
Anthony Barbier5f707732018-07-03 16:22:02 +010034#define UNREACHABLE(why) __builtin_unreachable()
Michele Di Giorgio5b6904b2018-01-29 12:24:14 +000035
Pablo Telloeb82fd22018-02-23 13:43:50 +000036// Paranoid option for the above with assert
37// #define UNREACHABLE(why) assert(0 && why)
Michele Di Giorgio5b6904b2018-01-29 12:24:14 +000038
Georgios Pinitasc0b6f762020-11-02 01:37:17 +000039namespace arm_gemm {
40
Georgios Pinitas1d480652019-01-23 11:24:50 +000041template<typename T>
Georgios Pinitas4ee8b152021-07-16 16:16:43 +010042std::string get_type_name() {
43#ifdef __GNUC__
44 std::string s = __PRETTY_FUNCTION__;
45
46 auto start = s.find("cls_");
47
48 if (start==std::string::npos) {
49 return "(unknown)";
50 }
51
52 for(size_t x = start+4; x<s.size(); x++) {
53 if (s[x] == ';' || s[x] == ']') {
54 return s.substr(start+4, x-(start+4));
55 }
56 }
57
58 return "(unknown)";
59#else
60 return "(unsupported)";
61#endif
62}
63
64template<typename T>
Georgios Pinitas1d480652019-01-23 11:24:50 +000065inline T iceildiv(const T a, const T b) {
Georgios Pinitas421405b2018-10-26 19:05:32 +010066 return (a + b - 1) / b;
Pablo Telloeb82fd22018-02-23 13:43:50 +000067}
Michele Di Giorgio5b6904b2018-01-29 12:24:14 +000068
Pablo Telloeb82fd22018-02-23 13:43:50 +000069template <typename T>
Anthony Barbier5f707732018-07-03 16:22:02 +010070inline T roundup(const T a, const T b) {
Georgios Pinitas421405b2018-10-26 19:05:32 +010071 T rem = a % b;
Michele Di Giorgio5b6904b2018-01-29 12:24:14 +000072
Georgios Pinitas421405b2018-10-26 19:05:32 +010073 if (rem) {
74 return a + b - rem;
75 } else {
76 return a;
77 }
Pablo Telloeb82fd22018-02-23 13:43:50 +000078}
David Manselld93991e2018-07-06 14:52:52 +010079
Georgios Pinitasc0b6f762020-11-02 01:37:17 +000080enum class VLType {
81 None,
82 SVE,
Viet-Hoa Do03b29712022-06-01 11:47:14 +010083 SME
Georgios Pinitasc0b6f762020-11-02 01:37:17 +000084};
85
86template<typename T>
87struct IndirectOutputArg {
88 struct {
89 T *base;
90 size_t stride;
91 } direct = {};
92 struct {
93 T * const *ptr;
94 size_t offset;
95 } indirect = {};
96 bool is_indirect;
97
98 // Direct
99 IndirectOutputArg(T *base, size_t stride) : is_indirect(false) {
100 direct.base = base;
101 direct.stride = stride;
102 }
103
104 // Indirect
105 IndirectOutputArg(T * const * ptr, size_t offset) : is_indirect(true) {
106 indirect.ptr = ptr;
107 indirect.offset = offset;
108 }
109
110 IndirectOutputArg() : is_indirect(false) {
111 direct.base = nullptr;
112 direct.stride = 0;
113 }
114};
115
116// Check that the provided Requantize32 doesn't have a left shift.
117inline bool quant_no_left_shift(const Requantize32 &qp) {
118 if (qp.per_channel_requant) {
119 return (qp.per_channel_left_shifts == nullptr);
120 } else {
121 return (qp.per_layer_left_shift == 0);
122 }
123}
124
125// Check that the provided Requantize32 is compatible with the "symmetric" hybrid kernels. These don't include row
126// sums, so the 'b_offset' has to be zero.
127inline bool quant_hybrid_symmetric(const Requantize32 &qp) {
128 return quant_no_left_shift(qp) && qp.b_offset == 0;
129}
130
131// Check that the provided Requantize32 is compatible with the "asymmetric" hybrid kernels. These don't support per
132// channel quantization. Technically b_offset==0 cases would work, but it is a waste to sum and then multiply by 0...
133inline bool quant_hybrid_asymmetric(const Requantize32 &qp) {
134 return quant_no_left_shift(qp) /* && qp.b_offset != 0 */ && qp.per_channel_requant==false;
135}
136
137template<typename T>
138struct IndirectInputArg {
139 struct {
140 const T *base;
141 size_t stride;
142 } direct = {};
143 struct {
144 const T * const * const * ptr;
145 unsigned int start_row;
146 unsigned int start_col;
147 } indirect = {};
148 bool is_indirect;
149
150 // Direct
151 IndirectInputArg(const T *base, size_t stride) : is_indirect(false) {
152 direct.base = base;
153 direct.stride = stride;
154 }
155
156 // Indirect
157 IndirectInputArg(const T * const * const *ptr, unsigned int start_row, unsigned int start_col) : is_indirect(true) {
158 indirect.ptr = ptr;
159 indirect.start_row = start_row;
160 indirect.start_col = start_col;
161 }
162
163 IndirectInputArg() : is_indirect(false) {
164 direct.base = nullptr;
165 direct.stride = 0;
166 }
167};
168
Georgios Pinitas7cd26d42019-01-09 18:35:17 +0000169namespace utils {
Georgios Pinitas8bd25562021-07-22 11:56:32 +0100170
171// get_vector_length(): Returns SVE vector length for type "T".
172//
173// It is required that this can be compiled by a compiler in non-SVE mode, but it must be prevented from running (at
174// runtime) if SVE is not enabled. Typically this is used by switchyard/driver code which is built in normal mode
175// which then calls SVE kernels (compiled accordingly) iff SVE is detected at runtime.
David Manselld93991e2018-07-06 14:52:52 +0100176template <typename T>
177inline unsigned long get_vector_length() {
Georgios Pinitas8bd25562021-07-22 11:56:32 +0100178#if defined(__aarch64__)
Michalis Spyrou20fca522021-06-07 14:23:57 +0100179 uint64_t vl;
180
181 __asm __volatile (
182 ".inst 0x0420e3e0\n" // CNTB X0, ALL, MUL #1
183 "mov %0, X0\n"
184 : "=r" (vl)
185 :
186 : "x0"
187 );
188
189 return vl / sizeof(T);
Georgios Pinitas8bd25562021-07-22 11:56:32 +0100190#else // !defined(__aarch64__)
Georgios Pinitas7cd26d42019-01-09 18:35:17 +0000191 return 16 / sizeof(T);
Georgios Pinitas8bd25562021-07-22 11:56:32 +0100192#endif // defined(__aarch64__)
Georgios Pinitas7cd26d42019-01-09 18:35:17 +0000193}
David Manselld93991e2018-07-06 14:52:52 +0100194
Viet-Hoa Do03b29712022-06-01 11:47:14 +0100195#ifdef ARM_COMPUTE_ENABLE_SME
196namespace sme {
197
198// function from misc-sve.cpp
199extern unsigned int raw_vector_length();
200
201template <typename T>
202inline unsigned long get_vector_length() {
203 return raw_vector_length() / sizeof(T);
204}
205
206} // namespace sme
207#endif // ARM_COMPUTE_ENABLE_SME
208
Georgios Pinitas8bd25562021-07-22 11:56:32 +0100209// get_vector_length(VLType): Returns vector length for type "T".
210//
211// This has the same requirements and constraints as the SVE-only form above, so we call into that code for SVE.
212
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +0000213template <typename T>
214inline unsigned long get_vector_length(VLType vl_type) {
215 switch (vl_type) {
Viet-Hoa Do03b29712022-06-01 11:47:14 +0100216#ifdef ARM_COMPUTE_ENABLE_SME
217 case VLType::SME:
218 return sme::get_vector_length<T>();
219#endif // ARM_COMPUTE_ENABLE_SME
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +0000220 case VLType::SVE:
Michalis Spyrou20fca522021-06-07 14:23:57 +0100221 return get_vector_length<T>();
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +0000222 default:
223 return 16 / sizeof(T);
224 }
225}
Gian Marco Iodice9a1e28b2021-07-29 16:24:36 +0100226
227// get_default_activation_values(): Returns the default values for activation min and max for integer activation.
228template <typename T>
229inline std::tuple<T, T> get_default_activation_values()
230{
231 const T min = static_cast<T>(std::numeric_limits<T>::min());
232 const T max = static_cast<T>(std::numeric_limits<T>::max());
233
234 return std::make_tuple(min, max);
235}
236
237// get_default_activation_values(): Returns the default values for activation min and max for float activation.
238template <>
239inline std::tuple<float, float> get_default_activation_values()
240{
241 const float min = static_cast<float>(-std::numeric_limits<float>::infinity());
242 const float max = static_cast<float>(std::numeric_limits<float>::infinity());
243
244 return std::make_tuple(min, max);
245}
246
247#if defined(__ARM_FP16_ARGS)
248// get_default_activation_values(): Returns the default values for activation min and max for __fp16 activation.
249template <>
250inline std::tuple<__fp16, __fp16> get_default_activation_values()
251{
252 const __fp16 min = static_cast<__fp16>(-std::numeric_limits<float>::infinity());
253 const __fp16 max = static_cast<__fp16>(std::numeric_limits<float>::infinity());
254
255 return std::make_tuple(min, max);
256}
257#endif // defined(__ARM_FP16_ARGS)
Georgios Pinitas7cd26d42019-01-09 18:35:17 +0000258} // utils namespace
259} // arm_gemm namespace
260
Georgios Pinitascfa2bba2019-06-27 17:00:52 +0100261using namespace arm_gemm::utils;