blob: d749dce98d8498247be2757669f0bf26d7d89ad6 [file] [log] [blame]
Pablo Telloeb82fd22018-02-23 13:43:50 +00001/*
Francesco.Petrogalli@arm.com193cad32022-03-07 13:39:21 +00002 * Copyright (c) 2017-2020, 2022 Arm Limited.
Pablo Telloeb82fd22018-02-23 13:43:50 +00003 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
Pablo Tello99ef8402018-03-20 16:46:55 +000024
25// This can only be built if the target/compiler supports FP16 arguments.
Georgios Pinitas4ee8b152021-07-16 16:16:43 +010026#if defined(__aarch64__) && (defined(FP16_KERNELS) || defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC))
Pablo Telloeb82fd22018-02-23 13:43:50 +000027
28#include "arm_gemm.hpp"
29
30#include "gemm_common.hpp"
Georgios Pinitas14613832019-03-01 19:07:11 +000031#include "gemm_hybrid.hpp"
Georgios Pinitasc0b6f762020-11-02 01:37:17 +000032#include "gemm_hybrid_indirect.hpp"
David Manselle39334c2018-07-06 17:53:35 +010033#include "gemm_implementation.hpp"
Pablo Telloeb82fd22018-02-23 13:43:50 +000034#include "gemm_interleaved.hpp"
35
Georgios Pinitas14613832019-03-01 19:07:11 +000036#include "kernels/a32_sgemm_8x6.hpp"
Francesco Petrogalli553f6952022-06-30 10:22:01 +000037#ifdef ARM_COMPUTE_ENABLE_FIXED_FORMAT_KERNELS
Francesco.Petrogalli@arm.com5fcf22d2022-04-05 10:31:08 +000038#include "kernels/a64_ffhybrid_fp16_mla_6x32.hpp"
39#include "kernels/a64_ffinterleaved_fp16_mla_8x24.hpp"
Francesco Petrogalli553f6952022-06-30 10:22:01 +000040#endif // ARM_COMPUTE_ENABLE_FIXED_FORMAT_KERNELS
Georgios Pinitasc0b6f762020-11-02 01:37:17 +000041#include "kernels/a64_hgemm_8x24.hpp"
42#include "kernels/a64_hybrid_fp16_mla_6x32.hpp"
43#include "kernels/a64_sgemm_8x12.hpp"
Francesco Petrogalli553f6952022-06-30 10:22:01 +000044#ifdef ARM_COMPUTE_ENABLE_FIXED_FORMAT_KERNELS
Francesco.Petrogalli@arm.com5fcf22d2022-04-05 10:31:08 +000045#include "kernels/sve_ffhybrid_fp16_mla_6x4VL.hpp"
46#include "kernels/sve_ffinterleaved_fp16_mla_8x3VL.hpp"
Francesco Petrogalli553f6952022-06-30 10:22:01 +000047#endif // ARM_COMPUTE_ENABLE_FIXED_FORMAT_KERNELS
Georgios Pinitasc0b6f762020-11-02 01:37:17 +000048#include "kernels/sve_hybrid_fp16_mla_6x4VL.hpp"
49#include "kernels/sve_interleaved_fp16_mla_8x3VL.hpp"
Pablo Telloeb82fd22018-02-23 13:43:50 +000050
Anthony Barbier5f707732018-07-03 16:22:02 +010051namespace arm_gemm {
52
Georgios Pinitas7cd26d42019-01-09 18:35:17 +000053static const GemmImplementation<__fp16, __fp16> gemm_fp16_methods[] = {
Georgios Pinitas4ee8b152021-07-16 16:16:43 +010054#ifdef ARM_COMPUTE_ENABLE_SVE
55GemmImplementation<__fp16, __fp16>::with_estimate(
Georgios Pinitas14613832019-03-01 19:07:11 +000056 GemmMethod::GEMM_HYBRID,
Georgios Pinitasc0b6f762020-11-02 01:37:17 +000057 "sve_hybrid_fp16_mla_6x4VL",
Pablo Marquez Telloa50f1932021-03-08 17:27:05 +000058 [](const GemmArgs &args) { return args._ci->has_sve(); },
Georgios Pinitas4ee8b152021-07-16 16:16:43 +010059 [](const GemmArgs &args) { return GemmHybridIndirect<cls_sve_hybrid_fp16_mla_6x4VL, __fp16, __fp16>::estimate_cycles<__fp16>(args); },
60 [](const GemmArgs &args) { return new GemmHybridIndirect<cls_sve_hybrid_fp16_mla_6x4VL, __fp16, __fp16>(args); }
61),
62GemmImplementation<__fp16, __fp16>::with_estimate(
Georgios Pinitas7cd26d42019-01-09 18:35:17 +000063 GemmMethod::GEMM_INTERLEAVED,
Georgios Pinitasc0b6f762020-11-02 01:37:17 +000064 "sve_interleaved_fp16_mla_8x3VL",
Pablo Marquez Telloa50f1932021-03-08 17:27:05 +000065 [](const GemmArgs &args) { return args._ci->has_sve() && (args._Ksize > 4); },
Georgios Pinitas4ee8b152021-07-16 16:16:43 +010066 [](const GemmArgs &args) { return GemmInterleaved<cls_sve_interleaved_fp16_mla_8x3VL, __fp16, __fp16>::estimate_cycles<__fp16>(args); },
Georgios Pinitasc0b6f762020-11-02 01:37:17 +000067 [](const GemmArgs &args) { return new GemmInterleaved<cls_sve_interleaved_fp16_mla_8x3VL, __fp16, __fp16>(args); }
Georgios Pinitas4ee8b152021-07-16 16:16:43 +010068),
Francesco Petrogalli553f6952022-06-30 10:22:01 +000069#ifdef ARM_COMPUTE_ENABLE_FIXED_FORMAT_KERNELS
Francesco.Petrogalli@arm.com5fcf22d2022-04-05 10:31:08 +000070GemmImplementation<__fp16, __fp16>::with_estimate(
71 GemmMethod::GEMM_INTERLEAVED,
72 "sve_ffinterleaved_fp16_mla_8x3VL",
73 KernelWeightFormat::VL1VL_BL16,
74 [](const GemmArgs &args) { return args._ci->has_sve(); },
75 [](const GemmArgs &args) { return GemmInterleavedFixedFormat<cls_sve_ffinterleaved_fp16_mla_8x3VL, __fp16, __fp16>::estimate_cycles<__fp16>(args); },
76 [](const GemmArgs &args) { return new GemmInterleavedFixedFormat<cls_sve_ffinterleaved_fp16_mla_8x3VL, __fp16, __fp16>(args); }
77),
78GemmImplementation<__fp16, __fp16>::with_estimate(
79 GemmMethod::GEMM_HYBRID,
80 "sve_ffhybrid_fp16_mla_6x4VL",
81 KernelWeightFormat::VL1VL_BL16,
82 [](const GemmArgs &args) { return args._ci->has_sve(); },
83 [](const GemmArgs &args) { return GemmHybridIndirectFixedFormat<cls_sve_ffhybrid_fp16_mla_6x4VL, __fp16, __fp16>::estimate_cycles<__fp16>(args); },
84 [](const GemmArgs &args) { return new GemmHybridIndirectFixedFormat<cls_sve_ffhybrid_fp16_mla_6x4VL, __fp16, __fp16>(args); }
85),
Francesco Petrogalli553f6952022-06-30 10:22:01 +000086#endif // ARM_COMPUTE_ENABLE_FIXED_FORMAT_KERNELS
Georgios Pinitas4ee8b152021-07-16 16:16:43 +010087#endif // ARM_COMPUTE_ENABLE_SVE
88#if defined(__aarch64__)
Georgios Pinitas40943df2020-11-17 18:46:40 +000089GemmImplementation<__fp16, __fp16>::with_estimate(
Georgios Pinitasc0b6f762020-11-02 01:37:17 +000090 GemmMethod::GEMM_HYBRID,
91 "a64_hybrid_fp16_mla_6x32",
cfRod534fdea2020-06-25 18:12:25 +010092 [](const GemmArgs &args) { return args._ci->has_fp16(); },
Georgios Pinitas4ee8b152021-07-16 16:16:43 +010093 [](const GemmArgs &args) { return GemmHybridIndirect<cls_a64_hybrid_fp16_mla_6x32, __fp16, __fp16>::estimate_cycles<__fp16>(args); },
Georgios Pinitasc0b6f762020-11-02 01:37:17 +000094 [](const GemmArgs &args) { return new GemmHybridIndirect<cls_a64_hybrid_fp16_mla_6x32, __fp16, __fp16>(args); }
Georgios Pinitas40943df2020-11-17 18:46:40 +000095),
96GemmImplementation<__fp16, __fp16>::with_estimate(
Georgios Pinitas7cd26d42019-01-09 18:35:17 +000097 GemmMethod::GEMM_INTERLEAVED,
Georgios Pinitasc0b6f762020-11-02 01:37:17 +000098 "a64_hgemm_8x24",
Georgios Pinitas48b3ef82019-10-14 19:03:09 +010099 [](const GemmArgs &args) { return args._ci->has_fp16(); },
Georgios Pinitas4ee8b152021-07-16 16:16:43 +0100100 [](const GemmArgs &args) { return GemmInterleaved<cls_a64_hgemm_8x24, __fp16, __fp16>::estimate_cycles<__fp16>(args); },
Georgios Pinitasc0b6f762020-11-02 01:37:17 +0000101 [](const GemmArgs &args) { return new GemmInterleaved<cls_a64_hgemm_8x24, __fp16, __fp16>(args); }
Georgios Pinitas40943df2020-11-17 18:46:40 +0000102),
Francesco Petrogalli553f6952022-06-30 10:22:01 +0000103#ifdef ARM_COMPUTE_ENABLE_FIXED_FORMAT_KERNELS
Francesco.Petrogalli@arm.com5fcf22d2022-04-05 10:31:08 +0000104GemmImplementation<__fp16, __fp16>::with_estimate(
105 GemmMethod::GEMM_INTERLEAVED,
106 "a64_ffinterleaved_fp16_mla_8x24",
107 KernelWeightFormat::VL128_BL16,
108 [](const GemmArgs &args) { return args._ci->has_fp16(); },
109 [](const GemmArgs &args) { return GemmInterleavedFixedFormat<cls_a64_ffinterleaved_fp16_mla_8x24, __fp16, __fp16>::estimate_cycles<__fp16>(args); },
110 [](const GemmArgs &args) { return new GemmInterleavedFixedFormat<cls_a64_ffinterleaved_fp16_mla_8x24, __fp16, __fp16>(args); }
111),
112GemmImplementation<__fp16, __fp16>::with_estimate(
113 GemmMethod::GEMM_HYBRID,
114 "a64_ffhybrid_fp16_mla_6x32",
115 KernelWeightFormat::VL128_BL16,
116 [](const GemmArgs &args) { return args._ci->has_fp16(); },
117 [](const GemmArgs &args) { return GemmHybridIndirectFixedFormat<cls_a64_ffhybrid_fp16_mla_6x32, __fp16, __fp16>::estimate_cycles<__fp16>(args); },
118 [](const GemmArgs &args) { return new GemmHybridIndirectFixedFormat<cls_a64_ffhybrid_fp16_mla_6x32, __fp16, __fp16>(args); }
119),
Francesco Petrogalli553f6952022-06-30 10:22:01 +0000120#endif // ARM_COMPUTE_ENABLE_FIXED_FORMAT_KERNELS
Georgios Pinitas14613832019-03-01 19:07:11 +0000121{
122 GemmMethod::GEMM_INTERLEAVED,
Georgios Pinitasc0b6f762020-11-02 01:37:17 +0000123 "a64_sgemm_8x12",
Georgios Pinitas14613832019-03-01 19:07:11 +0000124 nullptr,
Georgios Pinitas40943df2020-11-17 18:46:40 +0000125 [](const GemmArgs &args) { return !args._ci->has_fp16(); },
Georgios Pinitasc0b6f762020-11-02 01:37:17 +0000126 [](const GemmArgs &args) { return new GemmInterleaved<cls_a64_sgemm_8x12, __fp16, __fp16>(args); }
Georgios Pinitas14613832019-03-01 19:07:11 +0000127},
128#elif defined(__arm__)
Georgios Pinitasa41c54b2019-01-30 18:16:43 +0000129{
130 GemmMethod::GEMM_INTERLEAVED,
131 "sgemm_8x6",
Georgios Pinitas14613832019-03-01 19:07:11 +0000132 nullptr,
133 nullptr,
Georgios Pinitas48b3ef82019-10-14 19:03:09 +0100134 [](const GemmArgs &args) { return new GemmInterleaved<sgemm_8x6, __fp16, __fp16>(args); }
Georgios Pinitasa41c54b2019-01-30 18:16:43 +0000135},
Georgios Pinitas14613832019-03-01 19:07:11 +0000136#else // not AArch64 or AArch32
137# error Unknown Architecture
Georgios Pinitasa41c54b2019-01-30 18:16:43 +0000138#endif
Georgios Pinitas7cd26d42019-01-09 18:35:17 +0000139{
140 GemmMethod::DEFAULT,
141 "",
142 nullptr,
143 nullptr,
144 nullptr,
145}
David Manselle39334c2018-07-06 17:53:35 +0100146};
147
148template<>
Georgios Pinitas7cd26d42019-01-09 18:35:17 +0000149const GemmImplementation<__fp16, __fp16> *gemm_implementation_list<__fp16, __fp16>() {
David Manselle39334c2018-07-06 17:53:35 +0100150 return gemm_fp16_methods;
Pablo Telloeb82fd22018-02-23 13:43:50 +0000151}
152
David Manselle39334c2018-07-06 17:53:35 +0100153/* Explicitly instantiate the external functions for these types. */
Georgios Pinitas48b3ef82019-10-14 19:03:09 +0100154template UniqueGemmCommon<__fp16, __fp16> gemm<__fp16, __fp16, Nothing>(const GemmArgs &args, const Nothing &);
Francesco Petrogalli553f6952022-06-30 10:22:01 +0000155template bool has_opt_gemm<__fp16, __fp16, Nothing>(WeightFormat &weight_format, const GemmArgs &args, const Nothing &);
Francesco.Petrogalli@arm.com5fcf22d2022-04-05 10:31:08 +0000156template KernelDescription get_gemm_method<__fp16, __fp16, Nothing>(const GemmArgs &args, const Nothing &);
Georgios Pinitas48b3ef82019-10-14 19:03:09 +0100157template std::vector<KernelDescription> get_compatible_kernels<__fp16, __fp16, Nothing>(const GemmArgs &args, const Nothing &);
David Manselle39334c2018-07-06 17:53:35 +0100158
Pablo Telloeb82fd22018-02-23 13:43:50 +0000159} // namespace arm_gemm
160
Georgios Pinitas4ee8b152021-07-16 16:16:43 +0100161#endif // defined(__aarch64__) && (defined(FP16_KERNELS) || defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC))