Blame - src/core/NEON/kernels/arm_gemm/utils.hpp - ml/ComputeLibrary

blob: 18e124b83e818f7ca729139f208a470f676434b0 [file] [log] [blame]

Michele Di Giorgio	5b6904b	2018-01-29 12:24:14 +0000	[diff] [blame]	1	/*
Georgios Pinitas	4ee8b15	2021-07-16 16:16:43 +0100	[diff] [blame]	2	* Copyright (c) 2017-2021 Arm Limited.
Michele Di Giorgio	5b6904b	2018-01-29 12:24:14 +0000	[diff] [blame]	3	*
				4	* SPDX-License-Identifier: MIT
				5	*
				6	* Permission is hereby granted, free of charge, to any person obtaining a copy
				7	* of this software and associated documentation files (the "Software"), to
				8	* deal in the Software without restriction, including without limitation the
				9	* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
				10	* sell copies of the Software, and to permit persons to whom the Software is
				11	* furnished to do so, subject to the following conditions:
				12	*
				13	* The above copyright notice and this permission notice shall be included in all
				14	* copies or substantial portions of the Software.
				15	*
				16	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
				17	* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
				18	* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
				19	* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
				20	* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
				21	* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
				22	* SOFTWARE.
				23	*/
Pablo Tello	eb82fd2	2018-02-23 13:43:50 +0000	[diff] [blame]	24
Michele Di Giorgio	5b6904b	2018-01-29 12:24:14 +0000	[diff] [blame]	25	#pragma once
				26
Georgios Pinitas	c0b6f76	2020-11-02 01:37:17 +0000	[diff] [blame]	27	#include "arm_gemm.hpp"
				28
Georgios Pinitas	7cd26d4	2019-01-09 18:35:17 +0000	[diff] [blame]	29	#include <cstddef>
Freddie Liardet	6e90c12	2021-08-02 13:35:11 +0100	[diff] [blame]	30	#include <limits>
Freddie Liardet	d216f57	2021-08-03 15:57:32 +0100	[diff] [blame^]	31	#include <tuple>
Georgios Pinitas	421405b	2018-10-26 19:05:32 +0100	[diff] [blame]	32
Pablo Tello	eb82fd2	2018-02-23 13:43:50 +0000	[diff] [blame]	33	// Macro for unreachable code (e.g. impossible default cases on switch)
Anthony Barbier	5f70773	2018-07-03 16:22:02 +0100	[diff] [blame]	34	#define UNREACHABLE(why) __builtin_unreachable()
Michele Di Giorgio	5b6904b	2018-01-29 12:24:14 +0000	[diff] [blame]	35
Pablo Tello	eb82fd2	2018-02-23 13:43:50 +0000	[diff] [blame]	36	// Paranoid option for the above with assert
				37	// #define UNREACHABLE(why) assert(0 && why)
Michele Di Giorgio	5b6904b	2018-01-29 12:24:14 +0000	[diff] [blame]	38
Georgios Pinitas	c0b6f76	2020-11-02 01:37:17 +0000	[diff] [blame]	39	namespace arm_gemm {
				40
Georgios Pinitas	1d48065	2019-01-23 11:24:50 +0000	[diff] [blame]	41	template<typename T>
Georgios Pinitas	4ee8b15	2021-07-16 16:16:43 +0100	[diff] [blame]	42	std::string get_type_name() {
				43	#ifdef __GNUC__
				44	std::string s = __PRETTY_FUNCTION__;
				45
				46	auto start = s.find("cls_");
				47
				48	if (start==std::string::npos) {
				49	return "(unknown)";
				50	}
				51
				52	for(size_t x = start+4; x<s.size(); x++) {
				53	if (s[x] == ';' \|\| s[x] == ']') {
				54	return s.substr(start+4, x-(start+4));
				55	}
				56	}
				57
				58	return "(unknown)";
				59	#else
				60	return "(unsupported)";
				61	#endif
				62	}
				63
				64	template<typename T>
Georgios Pinitas	1d48065	2019-01-23 11:24:50 +0000	[diff] [blame]	65	inline T iceildiv(const T a, const T b) {
Georgios Pinitas	421405b	2018-10-26 19:05:32 +0100	[diff] [blame]	66	return (a + b - 1) / b;
Pablo Tello	eb82fd2	2018-02-23 13:43:50 +0000	[diff] [blame]	67	}
Michele Di Giorgio	5b6904b	2018-01-29 12:24:14 +0000	[diff] [blame]	68
Pablo Tello	eb82fd2	2018-02-23 13:43:50 +0000	[diff] [blame]	69	template <typename T>
Anthony Barbier	5f70773	2018-07-03 16:22:02 +0100	[diff] [blame]	70	inline T roundup(const T a, const T b) {
Georgios Pinitas	421405b	2018-10-26 19:05:32 +0100	[diff] [blame]	71	T rem = a % b;
Michele Di Giorgio	5b6904b	2018-01-29 12:24:14 +0000	[diff] [blame]	72
Georgios Pinitas	421405b	2018-10-26 19:05:32 +0100	[diff] [blame]	73	if (rem) {
				74	return a + b - rem;
				75	} else {
				76	return a;
				77	}
Pablo Tello	eb82fd2	2018-02-23 13:43:50 +0000	[diff] [blame]	78	}
David Mansell	d93991e	2018-07-06 14:52:52 +0100	[diff] [blame]	79
Georgios Pinitas	c0b6f76	2020-11-02 01:37:17 +0000	[diff] [blame]	80	enum class VLType {
				81	None,
				82	SVE,
				83	};
				84
				85	template<typename T>
				86	struct IndirectOutputArg {
				87	struct {
				88	T *base;
				89	size_t stride;
				90	} direct = {};
				91	struct {
				92	T * const *ptr;
				93	size_t offset;
				94	} indirect = {};
				95	bool is_indirect;
				96
				97	// Direct
				98	IndirectOutputArg(T *base, size_t stride) : is_indirect(false) {
				99	direct.base = base;
				100	direct.stride = stride;
				101	}
				102
				103	// Indirect
				104	IndirectOutputArg(T * const * ptr, size_t offset) : is_indirect(true) {
				105	indirect.ptr = ptr;
				106	indirect.offset = offset;
				107	}
				108
				109	IndirectOutputArg() : is_indirect(false) {
				110	direct.base = nullptr;
				111	direct.stride = 0;
				112	}
				113	};
				114
				115	// Check that the provided Requantize32 doesn't have a left shift.
				116	inline bool quant_no_left_shift(const Requantize32 &qp) {
				117	if (qp.per_channel_requant) {
				118	return (qp.per_channel_left_shifts == nullptr);
				119	} else {
				120	return (qp.per_layer_left_shift == 0);
				121	}
				122	}
				123
				124	// Check that the provided Requantize32 is compatible with the "symmetric" hybrid kernels. These don't include row
				125	// sums, so the 'b_offset' has to be zero.
				126	inline bool quant_hybrid_symmetric(const Requantize32 &qp) {
				127	return quant_no_left_shift(qp) && qp.b_offset == 0;
				128	}
				129
				130	// Check that the provided Requantize32 is compatible with the "asymmetric" hybrid kernels. These don't support per
				131	// channel quantization. Technically b_offset==0 cases would work, but it is a waste to sum and then multiply by 0...
				132	inline bool quant_hybrid_asymmetric(const Requantize32 &qp) {
				133	return quant_no_left_shift(qp) /* && qp.b_offset != 0 */ && qp.per_channel_requant==false;
				134	}
				135
				136	template<typename T>
				137	struct IndirectInputArg {
				138	struct {
				139	const T *base;
				140	size_t stride;
				141	} direct = {};
				142	struct {
				143	const T * const * const * ptr;
				144	unsigned int start_row;
				145	unsigned int start_col;
				146	} indirect = {};
				147	bool is_indirect;
				148
				149	// Direct
				150	IndirectInputArg(const T *base, size_t stride) : is_indirect(false) {
				151	direct.base = base;
				152	direct.stride = stride;
				153	}
				154
				155	// Indirect
				156	IndirectInputArg(const T * const * const *ptr, unsigned int start_row, unsigned int start_col) : is_indirect(true) {
				157	indirect.ptr = ptr;
				158	indirect.start_row = start_row;
				159	indirect.start_col = start_col;
				160	}
				161
				162	IndirectInputArg() : is_indirect(false) {
				163	direct.base = nullptr;
				164	direct.stride = 0;
				165	}
				166	};
				167
Georgios Pinitas	7cd26d4	2019-01-09 18:35:17 +0000	[diff] [blame]	168	namespace utils {
Georgios Pinitas	8bd2556	2021-07-22 11:56:32 +0100	[diff] [blame]	169
				170	// get_vector_length(): Returns SVE vector length for type "T".
				171	//
				172	// It is required that this can be compiled by a compiler in non-SVE mode, but it must be prevented from running (at
				173	// runtime) if SVE is not enabled. Typically this is used by switchyard/driver code which is built in normal mode
				174	// which then calls SVE kernels (compiled accordingly) iff SVE is detected at runtime.
David Mansell	d93991e	2018-07-06 14:52:52 +0100	[diff] [blame]	175	template <typename T>
				176	inline unsigned long get_vector_length() {
Georgios Pinitas	8bd2556	2021-07-22 11:56:32 +0100	[diff] [blame]	177	#if defined(__aarch64__)
Michalis Spyrou	20fca52	2021-06-07 14:23:57 +0100	[diff] [blame]	178	uint64_t vl;
				179
				180	__asm __volatile (
				181	".inst 0x0420e3e0\n" // CNTB X0, ALL, MUL #1
				182	"mov %0, X0\n"
				183	: "=r" (vl)
				184	:
				185	: "x0"
				186	);
				187
				188	return vl / sizeof(T);
Georgios Pinitas	8bd2556	2021-07-22 11:56:32 +0100	[diff] [blame]	189	#else // !defined(__aarch64__)
Georgios Pinitas	7cd26d4	2019-01-09 18:35:17 +0000	[diff] [blame]	190	return 16 / sizeof(T);
Georgios Pinitas	8bd2556	2021-07-22 11:56:32 +0100	[diff] [blame]	191	#endif // defined(__aarch64__)
Georgios Pinitas	7cd26d4	2019-01-09 18:35:17 +0000	[diff] [blame]	192	}
David Mansell	d93991e	2018-07-06 14:52:52 +0100	[diff] [blame]	193
Georgios Pinitas	8bd2556	2021-07-22 11:56:32 +0100	[diff] [blame]	194	// get_vector_length(VLType): Returns vector length for type "T".
				195	//
				196	// This has the same requirements and constraints as the SVE-only form above, so we call into that code for SVE.
				197
Michele Di Giorgio	d02d5ed	2021-01-22 09:47:04 +0000	[diff] [blame]	198	template <typename T>
				199	inline unsigned long get_vector_length(VLType vl_type) {
				200	switch (vl_type) {
Michele Di Giorgio	d02d5ed	2021-01-22 09:47:04 +0000	[diff] [blame]	201	case VLType::SVE:
Michalis Spyrou	20fca52	2021-06-07 14:23:57 +0100	[diff] [blame]	202	return get_vector_length<T>();
Michele Di Giorgio	d02d5ed	2021-01-22 09:47:04 +0000	[diff] [blame]	203	default:
				204	return 16 / sizeof(T);
				205	}
				206	}
Gian Marco Iodice	9a1e28b	2021-07-29 16:24:36 +0100	[diff] [blame]	207
				208	// get_default_activation_values(): Returns the default values for activation min and max for integer activation.
				209	template <typename T>
				210	inline std::tuple<T, T> get_default_activation_values()
				211	{
				212	const T min = static_cast<T>(std::numeric_limits<T>::min());
				213	const T max = static_cast<T>(std::numeric_limits<T>::max());
				214
				215	return std::make_tuple(min, max);
				216	}
				217
				218	// get_default_activation_values(): Returns the default values for activation min and max for float activation.
				219	template <>
				220	inline std::tuple<float, float> get_default_activation_values()
				221	{
				222	const float min = static_cast<float>(-std::numeric_limits<float>::infinity());
				223	const float max = static_cast<float>(std::numeric_limits<float>::infinity());
				224
				225	return std::make_tuple(min, max);
				226	}
				227
				228	#if defined(__ARM_FP16_ARGS)
				229	// get_default_activation_values(): Returns the default values for activation min and max for __fp16 activation.
				230	template <>
				231	inline std::tuple<__fp16, __fp16> get_default_activation_values()
				232	{
				233	const __fp16 min = static_cast<__fp16>(-std::numeric_limits<float>::infinity());
				234	const __fp16 max = static_cast<__fp16>(std::numeric_limits<float>::infinity());
				235
				236	return std::make_tuple(min, max);
				237	}
				238	#endif // defined(__ARM_FP16_ARGS)
Georgios Pinitas	7cd26d4	2019-01-09 18:35:17 +0000	[diff] [blame]	239	} // utils namespace
				240	} // arm_gemm namespace
				241
Georgios Pinitas	cfa2bba	2019-06-27 17:00:52 +0100	[diff] [blame]	242	using namespace arm_gemm::utils;