blob: 18e124b83e818f7ca729139f208a470f676434b0 [file] [log] [blame]
Michele Di Giorgio5b6904b2018-01-29 12:24:14 +00001/*
Georgios Pinitas4ee8b152021-07-16 16:16:43 +01002 * Copyright (c) 2017-2021 Arm Limited.
Michele Di Giorgio5b6904b2018-01-29 12:24:14 +00003 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
Pablo Telloeb82fd22018-02-23 13:43:50 +000024
Michele Di Giorgio5b6904b2018-01-29 12:24:14 +000025#pragma once
26
Georgios Pinitasc0b6f762020-11-02 01:37:17 +000027#include "arm_gemm.hpp"
28
Georgios Pinitas7cd26d42019-01-09 18:35:17 +000029#include <cstddef>
Freddie Liardet6e90c122021-08-02 13:35:11 +010030#include <limits>
Freddie Liardetd216f572021-08-03 15:57:32 +010031#include <tuple>
Georgios Pinitas421405b2018-10-26 19:05:32 +010032
Pablo Telloeb82fd22018-02-23 13:43:50 +000033// Macro for unreachable code (e.g. impossible default cases on switch)
Anthony Barbier5f707732018-07-03 16:22:02 +010034#define UNREACHABLE(why) __builtin_unreachable()
Michele Di Giorgio5b6904b2018-01-29 12:24:14 +000035
Pablo Telloeb82fd22018-02-23 13:43:50 +000036// Paranoid option for the above with assert
37// #define UNREACHABLE(why) assert(0 && why)
Michele Di Giorgio5b6904b2018-01-29 12:24:14 +000038
Georgios Pinitasc0b6f762020-11-02 01:37:17 +000039namespace arm_gemm {
40
Georgios Pinitas1d480652019-01-23 11:24:50 +000041template<typename T>
Georgios Pinitas4ee8b152021-07-16 16:16:43 +010042std::string get_type_name() {
43#ifdef __GNUC__
44 std::string s = __PRETTY_FUNCTION__;
45
46 auto start = s.find("cls_");
47
48 if (start==std::string::npos) {
49 return "(unknown)";
50 }
51
52 for(size_t x = start+4; x<s.size(); x++) {
53 if (s[x] == ';' || s[x] == ']') {
54 return s.substr(start+4, x-(start+4));
55 }
56 }
57
58 return "(unknown)";
59#else
60 return "(unsupported)";
61#endif
62}
63
64template<typename T>
Georgios Pinitas1d480652019-01-23 11:24:50 +000065inline T iceildiv(const T a, const T b) {
Georgios Pinitas421405b2018-10-26 19:05:32 +010066 return (a + b - 1) / b;
Pablo Telloeb82fd22018-02-23 13:43:50 +000067}
Michele Di Giorgio5b6904b2018-01-29 12:24:14 +000068
Pablo Telloeb82fd22018-02-23 13:43:50 +000069template <typename T>
Anthony Barbier5f707732018-07-03 16:22:02 +010070inline T roundup(const T a, const T b) {
Georgios Pinitas421405b2018-10-26 19:05:32 +010071 T rem = a % b;
Michele Di Giorgio5b6904b2018-01-29 12:24:14 +000072
Georgios Pinitas421405b2018-10-26 19:05:32 +010073 if (rem) {
74 return a + b - rem;
75 } else {
76 return a;
77 }
Pablo Telloeb82fd22018-02-23 13:43:50 +000078}
David Manselld93991e2018-07-06 14:52:52 +010079
Georgios Pinitasc0b6f762020-11-02 01:37:17 +000080enum class VLType {
81 None,
82 SVE,
83};
84
85template<typename T>
86struct IndirectOutputArg {
87 struct {
88 T *base;
89 size_t stride;
90 } direct = {};
91 struct {
92 T * const *ptr;
93 size_t offset;
94 } indirect = {};
95 bool is_indirect;
96
97 // Direct
98 IndirectOutputArg(T *base, size_t stride) : is_indirect(false) {
99 direct.base = base;
100 direct.stride = stride;
101 }
102
103 // Indirect
104 IndirectOutputArg(T * const * ptr, size_t offset) : is_indirect(true) {
105 indirect.ptr = ptr;
106 indirect.offset = offset;
107 }
108
109 IndirectOutputArg() : is_indirect(false) {
110 direct.base = nullptr;
111 direct.stride = 0;
112 }
113};
114
115// Check that the provided Requantize32 doesn't have a left shift.
116inline bool quant_no_left_shift(const Requantize32 &qp) {
117 if (qp.per_channel_requant) {
118 return (qp.per_channel_left_shifts == nullptr);
119 } else {
120 return (qp.per_layer_left_shift == 0);
121 }
122}
123
124// Check that the provided Requantize32 is compatible with the "symmetric" hybrid kernels. These don't include row
125// sums, so the 'b_offset' has to be zero.
126inline bool quant_hybrid_symmetric(const Requantize32 &qp) {
127 return quant_no_left_shift(qp) && qp.b_offset == 0;
128}
129
130// Check that the provided Requantize32 is compatible with the "asymmetric" hybrid kernels. These don't support per
131// channel quantization. Technically b_offset==0 cases would work, but it is a waste to sum and then multiply by 0...
132inline bool quant_hybrid_asymmetric(const Requantize32 &qp) {
133 return quant_no_left_shift(qp) /* && qp.b_offset != 0 */ && qp.per_channel_requant==false;
134}
135
136template<typename T>
137struct IndirectInputArg {
138 struct {
139 const T *base;
140 size_t stride;
141 } direct = {};
142 struct {
143 const T * const * const * ptr;
144 unsigned int start_row;
145 unsigned int start_col;
146 } indirect = {};
147 bool is_indirect;
148
149 // Direct
150 IndirectInputArg(const T *base, size_t stride) : is_indirect(false) {
151 direct.base = base;
152 direct.stride = stride;
153 }
154
155 // Indirect
156 IndirectInputArg(const T * const * const *ptr, unsigned int start_row, unsigned int start_col) : is_indirect(true) {
157 indirect.ptr = ptr;
158 indirect.start_row = start_row;
159 indirect.start_col = start_col;
160 }
161
162 IndirectInputArg() : is_indirect(false) {
163 direct.base = nullptr;
164 direct.stride = 0;
165 }
166};
167
Georgios Pinitas7cd26d42019-01-09 18:35:17 +0000168namespace utils {
Georgios Pinitas8bd25562021-07-22 11:56:32 +0100169
170// get_vector_length(): Returns SVE vector length for type "T".
171//
172// It is required that this can be compiled by a compiler in non-SVE mode, but it must be prevented from running (at
173// runtime) if SVE is not enabled. Typically this is used by switchyard/driver code which is built in normal mode
174// which then calls SVE kernels (compiled accordingly) iff SVE is detected at runtime.
David Manselld93991e2018-07-06 14:52:52 +0100175template <typename T>
176inline unsigned long get_vector_length() {
Georgios Pinitas8bd25562021-07-22 11:56:32 +0100177#if defined(__aarch64__)
Michalis Spyrou20fca522021-06-07 14:23:57 +0100178 uint64_t vl;
179
180 __asm __volatile (
181 ".inst 0x0420e3e0\n" // CNTB X0, ALL, MUL #1
182 "mov %0, X0\n"
183 : "=r" (vl)
184 :
185 : "x0"
186 );
187
188 return vl / sizeof(T);
Georgios Pinitas8bd25562021-07-22 11:56:32 +0100189#else // !defined(__aarch64__)
Georgios Pinitas7cd26d42019-01-09 18:35:17 +0000190 return 16 / sizeof(T);
Georgios Pinitas8bd25562021-07-22 11:56:32 +0100191#endif // defined(__aarch64__)
Georgios Pinitas7cd26d42019-01-09 18:35:17 +0000192}
David Manselld93991e2018-07-06 14:52:52 +0100193
Georgios Pinitas8bd25562021-07-22 11:56:32 +0100194// get_vector_length(VLType): Returns vector length for type "T".
195//
196// This has the same requirements and constraints as the SVE-only form above, so we call into that code for SVE.
197
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +0000198template <typename T>
199inline unsigned long get_vector_length(VLType vl_type) {
200 switch (vl_type) {
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +0000201 case VLType::SVE:
Michalis Spyrou20fca522021-06-07 14:23:57 +0100202 return get_vector_length<T>();
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +0000203 default:
204 return 16 / sizeof(T);
205 }
206}
Gian Marco Iodice9a1e28b2021-07-29 16:24:36 +0100207
208// get_default_activation_values(): Returns the default values for activation min and max for integer activation.
209template <typename T>
210inline std::tuple<T, T> get_default_activation_values()
211{
212 const T min = static_cast<T>(std::numeric_limits<T>::min());
213 const T max = static_cast<T>(std::numeric_limits<T>::max());
214
215 return std::make_tuple(min, max);
216}
217
218// get_default_activation_values(): Returns the default values for activation min and max for float activation.
219template <>
220inline std::tuple<float, float> get_default_activation_values()
221{
222 const float min = static_cast<float>(-std::numeric_limits<float>::infinity());
223 const float max = static_cast<float>(std::numeric_limits<float>::infinity());
224
225 return std::make_tuple(min, max);
226}
227
228#if defined(__ARM_FP16_ARGS)
229// get_default_activation_values(): Returns the default values for activation min and max for __fp16 activation.
230template <>
231inline std::tuple<__fp16, __fp16> get_default_activation_values()
232{
233 const __fp16 min = static_cast<__fp16>(-std::numeric_limits<float>::infinity());
234 const __fp16 max = static_cast<__fp16>(std::numeric_limits<float>::infinity());
235
236 return std::make_tuple(min, max);
237}
238#endif // defined(__ARM_FP16_ARGS)
Georgios Pinitas7cd26d42019-01-09 18:35:17 +0000239} // utils namespace
240} // arm_gemm namespace
241
Georgios Pinitascfa2bba2019-06-27 17:00:52 +0100242using namespace arm_gemm::utils;