blob: 0927123f7cb9714cc9ebe86cbb8f0618922a721d [file] [log] [blame]
Pablo Telloeb82fd22018-02-23 13:43:50 +00001/*
Georgios Pinitas7cd26d42019-01-09 18:35:17 +00002 * Copyright (c) 2017-2019 ARM Limited.
Pablo Telloeb82fd22018-02-23 13:43:50 +00003 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
Pablo Tello99ef8402018-03-20 16:46:55 +000024
25// This can only be built if the target/compiler supports FP16 arguments.
26#ifdef __ARM_FP16_ARGS
Pablo Telloeb82fd22018-02-23 13:43:50 +000027
28#include "arm_gemm.hpp"
29
30#include "gemm_common.hpp"
Georgios Pinitas14613832019-03-01 19:07:11 +000031#include "gemm_hybrid.hpp"
David Manselle39334c2018-07-06 17:53:35 +010032#include "gemm_implementation.hpp"
Pablo Telloeb82fd22018-02-23 13:43:50 +000033#include "gemm_interleaved.hpp"
Georgios Pinitas14613832019-03-01 19:07:11 +000034#include "gemm_native.hpp"
Pablo Telloeb82fd22018-02-23 13:43:50 +000035
Georgios Pinitas14613832019-03-01 19:07:11 +000036#include "kernels/a32_sgemm_8x6.hpp"
Pablo Telloeb82fd22018-02-23 13:43:50 +000037#include "kernels/a64_hgemm_24x8.hpp"
38#include "kernels/a64_sgemm_12x8.hpp"
Georgios Pinitas14613832019-03-01 19:07:11 +000039#include "kernels/sve_hybrid_fp16_mla_4VLx4.hpp"
Georgios Pinitas421405b2018-10-26 19:05:32 +010040#include "kernels/sve_interleaved_fp16_mla_3VLx8.hpp"
Georgios Pinitas14613832019-03-01 19:07:11 +000041#include "kernels/sve_native_fp16_mla_4VLx4.hpp"
Pablo Telloeb82fd22018-02-23 13:43:50 +000042
Anthony Barbier5f707732018-07-03 16:22:02 +010043namespace arm_gemm {
44
Georgios Pinitas7cd26d42019-01-09 18:35:17 +000045static const GemmImplementation<__fp16, __fp16> gemm_fp16_methods[] = {
46#if defined(__ARM_FEATURE_SVE)
47{
Georgios Pinitas14613832019-03-01 19:07:11 +000048 GemmMethod::GEMM_HYBRID,
49 "hybrid_fp16_mla_4VLx4",
50 [](const GemmArgs<__fp16> &args) { return (args._Ksize >= 8) && (args._alpha == 1.0f) && !args._trA && args._pretransposed_hint; },
51 [](const GemmArgs<__fp16> &args) { return ((args._Ksize <= 256) && (args._Nsize <= 256)) || ((args._nmulti > 1) && ((args._Msize / args._maxthreads) < 8)); },
52 [](const GemmArgs<__fp16> &args) { return new GemmHybrid<hybrid_fp16_mla_4VLx4, __fp16, __fp16>(args); }
53},
54{
55 GemmMethod::GEMM_NATIVE,
56 "native_fp16_mla_4VLx4",
57 [](const GemmArgs<__fp16> &args) { return (args._Ksize >= 8 && args._alpha==1.0f && !args._trA && !args._trB); },
58 [](const GemmArgs<__fp16> &args) { return ((args._Ksize <= 128) && (args._Nsize <= 128)) || ((args._nmulti > 1) && ((args._Msize / args._maxthreads) < 8)); },
59 [](const GemmArgs<__fp16> &args) { return new GemmNative<native_fp16_mla_4VLx4, __fp16, __fp16>(args); }
60},
61{
Georgios Pinitas7cd26d42019-01-09 18:35:17 +000062 GemmMethod::GEMM_INTERLEAVED,
63 "interleaved_fp16_mla_3VLx8",
64 [](const GemmArgs<__fp16> &args) { return (args._Ksize > 4); },
Georgios Pinitas14613832019-03-01 19:07:11 +000065 nullptr,
Georgios Pinitas7cd26d42019-01-09 18:35:17 +000066 [](const GemmArgs<__fp16> &args) { return new GemmInterleaved<interleaved_fp16_mla_3VLx8, __fp16, __fp16>(args); }
67},
68#endif
Georgios Pinitas14613832019-03-01 19:07:11 +000069
Georgios Pinitas7cd26d42019-01-09 18:35:17 +000070#if defined(__aarch64__) && (defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) || defined(FP16_KERNELS))
71{
72 GemmMethod::GEMM_INTERLEAVED,
73 "hgemm_24x8",
David Manselle39334c2018-07-06 17:53:35 +010074#ifndef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
Georgios Pinitas14613832019-03-01 19:07:11 +000075 [](const GemmArgs<__fp16> &args) { return args._ci->has_fp16(); },
David Manselle39334c2018-07-06 17:53:35 +010076#else
Georgios Pinitas14613832019-03-01 19:07:11 +000077 nullptr,
David Manselle39334c2018-07-06 17:53:35 +010078#endif
Georgios Pinitas14613832019-03-01 19:07:11 +000079 nullptr,
Georgios Pinitas7cd26d42019-01-09 18:35:17 +000080 [](const GemmArgs<__fp16> &args) { return new GemmInterleaved<hgemm_24x8, __fp16, __fp16>(args); }
81},
Anthony Barbier92d20812018-07-13 11:35:30 +010082#endif
Georgios Pinitas14613832019-03-01 19:07:11 +000083#ifdef __aarch64__
84{
85 GemmMethod::GEMM_INTERLEAVED,
86 "sgemm_12x8",
87 nullptr,
88 nullptr,
89 [](const GemmArgs<__fp16> &args) { return new GemmInterleaved<sgemm_12x8, __fp16, __fp16>(args); }
90},
91#elif defined(__arm__)
Georgios Pinitasa41c54b2019-01-30 18:16:43 +000092{
93 GemmMethod::GEMM_INTERLEAVED,
94 "sgemm_8x6",
Georgios Pinitas14613832019-03-01 19:07:11 +000095 nullptr,
96 nullptr,
Georgios Pinitasa41c54b2019-01-30 18:16:43 +000097 [](const GemmArgs<__fp16> &args) { return new GemmInterleaved<sgemm_8x6, __fp16, __fp16>(args); }
98},
Georgios Pinitas14613832019-03-01 19:07:11 +000099#else // not AArch64 or AArch32
100# error Unknown Architecture
Georgios Pinitasa41c54b2019-01-30 18:16:43 +0000101#endif
Georgios Pinitas7cd26d42019-01-09 18:35:17 +0000102{
103 GemmMethod::DEFAULT,
104 "",
105 nullptr,
106 nullptr,
107 nullptr,
108}
David Manselle39334c2018-07-06 17:53:35 +0100109};
110
111template<>
Georgios Pinitas7cd26d42019-01-09 18:35:17 +0000112const GemmImplementation<__fp16, __fp16> *gemm_implementation_list<__fp16, __fp16>() {
David Manselle39334c2018-07-06 17:53:35 +0100113 return gemm_fp16_methods;
Pablo Telloeb82fd22018-02-23 13:43:50 +0000114}
115
David Manselle39334c2018-07-06 17:53:35 +0100116/* Explicitly instantiate the external functions for these types. */
Georgios Pinitas7cd26d42019-01-09 18:35:17 +0000117template UniqueGemmCommon<__fp16, __fp16> gemm<__fp16, __fp16>(const GemmArgs<__fp16> &args);
118template KernelDescription get_gemm_method<__fp16, __fp16>(const GemmArgs<__fp16> &args);
119template bool method_is_compatible<__fp16, __fp16>(GemmMethod method, const GemmArgs<__fp16> &args);
Georgios Pinitas14613832019-03-01 19:07:11 +0000120template std::vector<KernelDescription> get_compatible_kernels<__fp16, __fp16> (const GemmArgs<__fp16> &args);
David Manselle39334c2018-07-06 17:53:35 +0100121
Pablo Telloeb82fd22018-02-23 13:43:50 +0000122} // namespace arm_gemm
123
Georgios Pinitas14613832019-03-01 19:07:11 +0000124#endif // __ARM_FP16_ARGS