Pablo Tello | eb82fd2 | 2018-02-23 13:43:50 +0000 | [diff] [blame] | 1 | /* |
Milos Puzovic | 905786e | 2024-03-26 14:34:30 +0000 | [diff] [blame] | 2 | * Copyright (c) 2017-2024 Arm Limited. |
Pablo Tello | eb82fd2 | 2018-02-23 13:43:50 +0000 | [diff] [blame] | 3 | * |
| 4 | * SPDX-License-Identifier: MIT |
| 5 | * |
| 6 | * Permission is hereby granted, free of charge, to any person obtaining a copy |
| 7 | * of this software and associated documentation files (the "Software"), to |
| 8 | * deal in the Software without restriction, including without limitation the |
| 9 | * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or |
| 10 | * sell copies of the Software, and to permit persons to whom the Software is |
| 11 | * furnished to do so, subject to the following conditions: |
| 12 | * |
| 13 | * The above copyright notice and this permission notice shall be included in all |
| 14 | * copies or substantial portions of the Software. |
| 15 | * |
| 16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
| 17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
| 18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
| 19 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
| 20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
| 21 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
| 22 | * SOFTWARE. |
| 23 | */ |
| 24 | #include "arm_gemm.hpp" |
| 25 | #include "gemm_common.hpp" |
Georgios Pinitas | 7cd26d4 | 2019-01-09 18:35:17 +0000 | [diff] [blame] | 26 | #include "gemm_hybrid.hpp" |
Georgios Pinitas | c0b6f76 | 2020-11-02 01:37:17 +0000 | [diff] [blame] | 27 | #include "gemm_hybrid_indirect.hpp" |
David Mansell | e39334c | 2018-07-06 17:53:35 +0100 | [diff] [blame] | 28 | #include "gemm_implementation.hpp" |
Pablo Tello | eb82fd2 | 2018-02-23 13:43:50 +0000 | [diff] [blame] | 29 | #include "gemm_interleaved.hpp" |
David Mansell | ce8f605 | 2018-05-17 18:51:26 +0100 | [diff] [blame] | 30 | #include "gemv_batched.hpp" |
Pablo Tello | eb82fd2 | 2018-02-23 13:43:50 +0000 | [diff] [blame] | 31 | #include "gemv_pretransposed.hpp" |
| 32 | |
Anthony Barbier | 5f70773 | 2018-07-03 16:22:02 +0100 | [diff] [blame] | 33 | #include "kernels/a32_sgemm_8x6.hpp" |
Francesco Petrogalli | 553f695 | 2022-06-30 10:22:01 +0000 | [diff] [blame] | 34 | #ifdef ARM_COMPUTE_ENABLE_FIXED_FORMAT_KERNELS |
Francesco.Petrogalli@arm.com | 5fcf22d | 2022-04-05 10:31:08 +0000 | [diff] [blame] | 35 | #include "kernels/a64_ffhybrid_fp32_mla_6x16.hpp" |
| 36 | #include "kernels/a64_ffhybrid_fp32bf16fp32_mmla_4x24.hpp" |
Milos Puzovic | 905786e | 2024-03-26 14:34:30 +0000 | [diff] [blame] | 37 | #include "kernels/a64_ffhybrid_fp32bf16fp32_mmla_6x16.hpp" |
Francesco.Petrogalli@arm.com | 5fcf22d | 2022-04-05 10:31:08 +0000 | [diff] [blame] | 38 | #include "kernels/a64_ffinterleaved_bf16fp32_mmla_8x12.hpp" |
| 39 | #include "kernels/a64_ffinterleaved_fp32_mla_8x12.hpp" |
Francesco Petrogalli | 553f695 | 2022-06-30 10:22:01 +0000 | [diff] [blame] | 40 | #endif // ARM_COMPUTE_ENABLE_FIXED_FORMAT_KERNELS |
Georgios Pinitas | 4ee8b15 | 2021-07-16 16:16:43 +0100 | [diff] [blame] | 41 | #include "kernels/a64_hybrid_fp32bf16fp32_mmla_4x24.hpp" |
| 42 | #include "kernels/a64_hybrid_fp32bf16fp32_mmla_6x16.hpp" |
| 43 | #include "kernels/a64_hybrid_fp32_mla_4x24.hpp" |
Georgios Pinitas | c0b6f76 | 2020-11-02 01:37:17 +0000 | [diff] [blame] | 44 | #include "kernels/a64_hybrid_fp32_mla_6x16.hpp" |
| 45 | #include "kernels/a64_hybrid_fp32_mla_8x4.hpp" |
Georgios Pinitas | 4ee8b15 | 2021-07-16 16:16:43 +0100 | [diff] [blame] | 46 | #include "kernels/a64_interleaved_bf16fp32_mmla_8x12.hpp" |
Georgios Pinitas | c0b6f76 | 2020-11-02 01:37:17 +0000 | [diff] [blame] | 47 | #include "kernels/a64_sgemm_8x12.hpp" |
Michalis Spyrou | 778b95c | 2021-04-20 12:15:52 +0100 | [diff] [blame] | 48 | #include "kernels/a64_sgemm_8x6.hpp" |
Georgios Pinitas | c0b6f76 | 2020-11-02 01:37:17 +0000 | [diff] [blame] | 49 | #include "kernels/a64_smallK_hybrid_fp32_mla_6x4.hpp" |
| 50 | #include "kernels/a64_smallK_hybrid_fp32_mla_8x4.hpp" |
Pablo Tello | eb82fd2 | 2018-02-23 13:43:50 +0000 | [diff] [blame] | 51 | |
Viet-Hoa Do | 03b2971 | 2022-06-01 11:47:14 +0100 | [diff] [blame] | 52 | #ifdef ARM_COMPUTE_ENABLE_SVE |
Francesco Petrogalli | 553f695 | 2022-06-30 10:22:01 +0000 | [diff] [blame] | 53 | #ifdef ARM_COMPUTE_ENABLE_FIXED_FORMAT_KERNELS |
Francesco.Petrogalli@arm.com | 5fcf22d | 2022-04-05 10:31:08 +0000 | [diff] [blame] | 54 | #include "kernels/sve_ffhybrid_fp32_mla_6x4VL.hpp" |
| 55 | #include "kernels/sve_ffhybrid_fp32bf16fp32_mmla_4x6VL.hpp" |
| 56 | #include "kernels/sve_ffinterleaved_fp32_mla_8x3VL.hpp" |
| 57 | #include "kernels/sve_ffinterleaved_bf16fp32_mmla_8x3VL.hpp" |
Francesco Petrogalli | 553f695 | 2022-06-30 10:22:01 +0000 | [diff] [blame] | 58 | #endif // ARM_COMPUTE_ENABLE_FIXED_FORMAT_KERNELS |
Viet-Hoa Do | 03b2971 | 2022-06-01 11:47:14 +0100 | [diff] [blame] | 59 | #ifdef ARM_COMPUTE_ENABLE_SME2 |
| 60 | #include "kernels/sme2_gemv_fp32_mla_16VL.hpp" |
| 61 | #include "kernels/sme2_gemv_fp32bf16fp32_dot_16VL.hpp" |
| 62 | #include "kernels/sme2_interleaved_nomerge_fp32_mopa_1VLx4VL.hpp" |
| 63 | #include "kernels/sme2_interleaved_nomerge_bf16fp32_mopa_1VLx4VL.hpp" |
| 64 | #include "kernels/sme2_interleaved_nomerge_fp32_mopa_2VLx2VL.hpp" |
| 65 | #include "kernels/sme2_interleaved_nomerge_bf16fp32_mopa_2VLx2VL.hpp" |
| 66 | #include "kernels/sme2_interleaved_nomerge_fp32_mopa_4VLx1VL.hpp" |
| 67 | #include "kernels/sme2_interleaved_nomerge_bf16fp32_mopa_4VLx1VL.hpp" |
| 68 | #endif // ARM_COMPUTE_ENABLE_SME2 |
| 69 | |
Michael Tyler | 74921ee | 2023-04-12 17:43:17 +0100 | [diff] [blame] | 70 | #include "kernels/sve_ffhybrid_fp32_mla_6x4VL.hpp" |
| 71 | #include "kernels/sve_ffhybrid_fp32bf16fp32_mmla_4x6VL.hpp" |
| 72 | #include "kernels/sve_ffinterleaved_fp32_mla_8x3VL.hpp" |
| 73 | #include "kernels/sve_ffinterleaved_bf16fp32_mmla_8x3VL.hpp" |
Georgios Pinitas | 4ee8b15 | 2021-07-16 16:16:43 +0100 | [diff] [blame] | 74 | #include "kernels/sve_hybrid_fp32bf16fp32_mmla_4x6VL.hpp" |
| 75 | #include "kernels/sve_hybrid_fp32bf16fp32_mmla_6x4VL.hpp" |
Georgios Pinitas | c0b6f76 | 2020-11-02 01:37:17 +0000 | [diff] [blame] | 76 | #include "kernels/sve_hybrid_fp32_mla_6x4VL.hpp" |
| 77 | #include "kernels/sve_hybrid_fp32_mla_8x1VL.hpp" |
Georgios Pinitas | 4ee8b15 | 2021-07-16 16:16:43 +0100 | [diff] [blame] | 78 | #include "kernels/sve_interleaved_bf16fp32_mmla_8x3VL.hpp" |
Georgios Pinitas | c0b6f76 | 2020-11-02 01:37:17 +0000 | [diff] [blame] | 79 | #include "kernels/sve_interleaved_fp32_mla_8x3VL.hpp" |
| 80 | #include "kernels/sve_interleaved_fp32_mmla_8x3VL.hpp" |
Viet-Hoa Do | 03b2971 | 2022-06-01 11:47:14 +0100 | [diff] [blame] | 81 | #endif // ARM_COMPUTE_ENABLE_SVE |
Georgios Pinitas | 421405b | 2018-10-26 19:05:32 +0100 | [diff] [blame] | 82 | |
Anthony Barbier | 5f70773 | 2018-07-03 16:22:02 +0100 | [diff] [blame] | 83 | namespace arm_gemm { |
| 84 | |
Georgios Pinitas | 7cd26d4 | 2019-01-09 18:35:17 +0000 | [diff] [blame] | 85 | static const GemmImplementation<float, float> gemm_fp32_methods[] = |
| 86 | { |
Georgios Pinitas | c0b6f76 | 2020-11-02 01:37:17 +0000 | [diff] [blame] | 87 | // GEMV cases - starting with 'gemv_batched' wrapper to turn batched GEMV into GEMM. |
Georgios Pinitas | 7cd26d4 | 2019-01-09 18:35:17 +0000 | [diff] [blame] | 88 | { |
| 89 | GemmMethod::GEMV_BATCHED, |
| 90 | "gemv_batched", |
Georgios Pinitas | c0b6f76 | 2020-11-02 01:37:17 +0000 | [diff] [blame] | 91 | [](const GemmArgs &args) { return args._Msize==1 && args._nbatches>1 && !args._indirect_input; }, |
Georgios Pinitas | 7cd26d4 | 2019-01-09 18:35:17 +0000 | [diff] [blame] | 92 | nullptr, |
Georgios Pinitas | 48b3ef8 | 2019-10-14 19:03:09 +0100 | [diff] [blame] | 93 | [](const GemmArgs &args) { return new GemvBatched<float, float>(args); } |
Georgios Pinitas | 7cd26d4 | 2019-01-09 18:35:17 +0000 | [diff] [blame] | 94 | }, |
| 95 | #ifdef __aarch64__ |
Georgios Pinitas | 4ee8b15 | 2021-07-16 16:16:43 +0100 | [diff] [blame] | 96 | #ifdef ARM_COMPUTE_ENABLE_BF16 |
| 97 | // "fast mode" (BF16) kernels |
| 98 | GemmImplementation<float, float>::with_estimate( |
| 99 | GemmMethod::GEMM_INTERLEAVED, |
| 100 | "a64_interleaved_bf16fp32_mmla_8x12", |
| 101 | [](const GemmArgs &args) { return args._fast_mode && args._ci->has_bf16(); }, |
| 102 | [](const GemmArgs &args) { return GemmInterleaved<cls_a64_interleaved_bf16fp32_mmla_8x12, float, float>::estimate_cycles<float>(args); }, |
| 103 | [](const GemmArgs &args) { return new GemmInterleaved<cls_a64_interleaved_bf16fp32_mmla_8x12, float, float>(args); } |
| 104 | ), |
Francesco.Petrogalli@arm.com | 5fcf22d | 2022-04-05 10:31:08 +0000 | [diff] [blame] | 105 | |
Georgios Pinitas | 4ee8b15 | 2021-07-16 16:16:43 +0100 | [diff] [blame] | 106 | GemmImplementation<float, float>::with_estimate( |
| 107 | GemmMethod::GEMM_HYBRID, |
| 108 | "a64_hybrid_fp32bf16fp32_mmla_6x16", |
| 109 | [](const GemmArgs &args) { return args._fast_mode && args._ci->has_bf16(); }, |
| 110 | [](const GemmArgs &args) { return GemmHybridIndirect<cls_a64_hybrid_fp32bf16fp32_mmla_6x16, float, float>::estimate_cycles<float>(args); }, |
| 111 | [](const GemmArgs &args) { return new GemmHybridIndirect<cls_a64_hybrid_fp32bf16fp32_mmla_6x16, float, float>(args); } |
| 112 | ), |
| 113 | GemmImplementation<float, float>::with_estimate( |
| 114 | GemmMethod::GEMM_HYBRID, |
| 115 | "a64_hybrid_fp32bf16fp32_mmla_4x24", |
| 116 | [](const GemmArgs &args) { return args._fast_mode && args._ci->has_bf16(); }, |
| 117 | [](const GemmArgs &args) { return GemmHybridIndirect<cls_a64_hybrid_fp32bf16fp32_mmla_4x24, float, float>::estimate_cycles<float>(args); }, |
| 118 | [](const GemmArgs &args) { return new GemmHybridIndirect<cls_a64_hybrid_fp32bf16fp32_mmla_4x24, float, float>(args); } |
| 119 | ), |
| 120 | #endif // ARM_COMPUTE_ENABLE_BF16 |
Michalis Spyrou | 20fca52 | 2021-06-07 14:23:57 +0100 | [diff] [blame] | 121 | #ifdef ARM_COMPUTE_ENABLE_SVE |
Viet-Hoa Do | 03b2971 | 2022-06-01 11:47:14 +0100 | [diff] [blame] | 122 | #ifdef ARM_COMPUTE_ENABLE_SME2 |
| 123 | // SME kernels |
| 124 | { |
| 125 | GemmMethod::GEMM_HYBRID, |
| 126 | "sme2_gemv_fp32bf16fp32_dot_16VL", |
Gunes Bayir | 499b5bc | 2024-04-26 13:15:05 +0100 | [diff] [blame] | 127 | [](const GemmArgs &args) { return args._fast_mode && args._ci->has_sme2() && args._Msize==1 && args._nbatches==1 && !args._indirect_input && !args._accumulate; }, |
Viet-Hoa Do | 03b2971 | 2022-06-01 11:47:14 +0100 | [diff] [blame] | 128 | nullptr, |
| 129 | [](const GemmArgs &args) { return new GemvPretransposed<cls_sme2_gemv_fp32bf16fp32_dot_16VL, float, float>(args); } |
| 130 | }, |
| 131 | { |
| 132 | GemmMethod::GEMM_HYBRID, |
| 133 | "sme2_gemv_fp32_mla_16VL", |
Gunes Bayir | 499b5bc | 2024-04-26 13:15:05 +0100 | [diff] [blame] | 134 | [](const GemmArgs &args) { return args._ci->has_sme2() && args._Msize==1 && args._nbatches==1 && !args._indirect_input && !args._accumulate; }, |
Viet-Hoa Do | 03b2971 | 2022-06-01 11:47:14 +0100 | [diff] [blame] | 135 | nullptr, |
| 136 | [](const GemmArgs &args) { return new GemvPretransposed<cls_sme2_gemv_fp32_mla_16VL, float, float>(args); } |
| 137 | }, |
| 138 | #ifdef ARM_COMPUTE_ENABLE_BF16 |
| 139 | { |
| 140 | GemmMethod::GEMM_INTERLEAVED, |
| 141 | "sme2_interleaved_nomerge_bf16fp32_mopa_1VLx4VL", |
Gunes Bayir | 499b5bc | 2024-04-26 13:15:05 +0100 | [diff] [blame] | 142 | [](const GemmArgs &args) { return args._fast_mode && args._ci->has_sme2() && !args._accumulate; }, |
Viet-Hoa Do | 03b2971 | 2022-06-01 11:47:14 +0100 | [diff] [blame] | 143 | [](const GemmArgs &args) { const auto VL = sme::get_vector_length<float>(); |
David Mansell | 5c76742 | 2024-03-15 16:35:13 +0000 | [diff] [blame] | 144 | return args._Nsize >= 8*VL || args._Msize <= VL || (2*VL < args._Msize && args._Msize <= 3*VL); }, |
Viet-Hoa Do | 03b2971 | 2022-06-01 11:47:14 +0100 | [diff] [blame] | 145 | [](const GemmArgs &args) { return new GemmInterleavedNoMerge<cls_sme2_interleaved_nomerge_bf16fp32_mopa_1VLx4VL, float, float>(args); } |
| 146 | }, |
| 147 | #endif // ARM_COMPUTE_ENABLE_BF16 |
| 148 | { |
| 149 | GemmMethod::GEMM_INTERLEAVED, |
| 150 | "sme2_interleaved_nomerge_fp32_mopa_1VLx4VL", |
Gunes Bayir | 499b5bc | 2024-04-26 13:15:05 +0100 | [diff] [blame] | 151 | [](const GemmArgs &args) { return args._ci->has_sme2() && !args._accumulate; }, |
Viet-Hoa Do | 03b2971 | 2022-06-01 11:47:14 +0100 | [diff] [blame] | 152 | [](const GemmArgs &args) { const auto VL = sme::get_vector_length<float>(); |
David Mansell | 5c76742 | 2024-03-15 16:35:13 +0000 | [diff] [blame] | 153 | return args._Nsize >= 8*VL || args._Msize <= VL || (2*VL < args._Msize && args._Msize <= 3*VL); }, |
Viet-Hoa Do | 03b2971 | 2022-06-01 11:47:14 +0100 | [diff] [blame] | 154 | [](const GemmArgs &args) { return new GemmInterleavedNoMerge<cls_sme2_interleaved_nomerge_fp32_mopa_1VLx4VL, float, float>(args); } |
| 155 | }, |
| 156 | #ifdef ARM_COMPUTE_ENABLE_BF16 |
| 157 | { |
| 158 | GemmMethod::GEMM_INTERLEAVED, |
| 159 | "sme2_interleaved_nomerge_bf16fp32_mopa_4VLx1VL", |
Gunes Bayir | 499b5bc | 2024-04-26 13:15:05 +0100 | [diff] [blame] | 160 | [](const GemmArgs &args) { return args._fast_mode && args._ci->has_sme2() && !args._accumulate; }, |
Viet-Hoa Do | 03b2971 | 2022-06-01 11:47:14 +0100 | [diff] [blame] | 161 | [](const GemmArgs &args) { const auto VL = sme::get_vector_length<float>(); |
| 162 | return args._Nsize <= VL || (2*VL < args._Nsize && args._Nsize <= 3*VL); }, |
| 163 | [](const GemmArgs &args) { return new GemmInterleavedNoMerge<cls_sme2_interleaved_nomerge_bf16fp32_mopa_4VLx1VL, float, float>(args); } |
| 164 | }, |
| 165 | #endif // ARM_COMPUTE_ENABLE_BF16 |
| 166 | { |
| 167 | GemmMethod::GEMM_INTERLEAVED, |
| 168 | "sme2_interleaved_nomerge_fp32_mopa_4VLx1VL", |
Gunes Bayir | 499b5bc | 2024-04-26 13:15:05 +0100 | [diff] [blame] | 169 | [](const GemmArgs &args) { return args._ci->has_sme2() && !args._accumulate; }, |
Viet-Hoa Do | 03b2971 | 2022-06-01 11:47:14 +0100 | [diff] [blame] | 170 | [](const GemmArgs &args) { const auto VL = sme::get_vector_length<float>(); |
| 171 | return args._Nsize <= VL || (2*VL < args._Nsize && args._Nsize <= 3*VL); }, |
| 172 | [](const GemmArgs &args) { return new GemmInterleavedNoMerge<cls_sme2_interleaved_nomerge_fp32_mopa_4VLx1VL, float, float>(args); } |
| 173 | }, |
| 174 | #ifdef ARM_COMPUTE_ENABLE_BF16 |
| 175 | { |
| 176 | GemmMethod::GEMM_INTERLEAVED, |
| 177 | "sme2_interleaved_nomerge_bf16fp32_mopa_2VLx2VL", |
Gunes Bayir | 499b5bc | 2024-04-26 13:15:05 +0100 | [diff] [blame] | 178 | [](const GemmArgs &args) { return args._fast_mode && args._ci->has_sme2() && !args._accumulate; }, |
Viet-Hoa Do | 03b2971 | 2022-06-01 11:47:14 +0100 | [diff] [blame] | 179 | nullptr, |
| 180 | [](const GemmArgs &args) { return new GemmInterleavedNoMerge<cls_sme2_interleaved_nomerge_bf16fp32_mopa_2VLx2VL, float, float>(args); } |
| 181 | }, |
| 182 | #endif // ARM_COMPUTE_ENABLE_BF16 |
| 183 | { |
| 184 | GemmMethod::GEMM_INTERLEAVED, |
| 185 | "sme2_interleaved_nomerge_fp32_mopa_2VLx2VL", |
Gunes Bayir | 499b5bc | 2024-04-26 13:15:05 +0100 | [diff] [blame] | 186 | [](const GemmArgs &args) { return args._ci->has_sme2() && !args._accumulate; }, |
Viet-Hoa Do | 03b2971 | 2022-06-01 11:47:14 +0100 | [diff] [blame] | 187 | nullptr, |
| 188 | [](const GemmArgs &args) { return new GemmInterleavedNoMerge<cls_sme2_interleaved_nomerge_fp32_mopa_2VLx2VL, float, float>(args); } |
| 189 | }, |
| 190 | #endif // ARM_COMPUTE_ENABLE_SME2 |
Georgios Pinitas | 4ee8b15 | 2021-07-16 16:16:43 +0100 | [diff] [blame] | 191 | #ifdef ARM_COMPUTE_ENABLE_BF16 |
| 192 | GemmImplementation<float, float>::with_estimate( |
| 193 | GemmMethod::GEMM_INTERLEAVED, |
| 194 | "sve_interleaved_bf16fp32_mmla_8x3VL", |
| 195 | [](const GemmArgs &args) { return args._fast_mode && args._ci->has_svebf16(); }, |
| 196 | [](const GemmArgs &args) { return GemmInterleaved<cls_sve_interleaved_bf16fp32_mmla_8x3VL, float, float>::estimate_cycles<float>(args); }, |
| 197 | [](const GemmArgs &args) { return new GemmInterleaved<cls_sve_interleaved_bf16fp32_mmla_8x3VL, float, float>(args); } |
| 198 | ), |
| 199 | GemmImplementation<float, float>::with_estimate( |
Georgios Pinitas | 5aa1a0b | 2020-07-02 20:02:20 +0100 | [diff] [blame] | 200 | GemmMethod::GEMM_HYBRID, |
Georgios Pinitas | 4ee8b15 | 2021-07-16 16:16:43 +0100 | [diff] [blame] | 201 | "sve_hybrid_fp32bf16fp32_mmla_6x4VL", |
David Mansell | c22e126 | 2024-05-03 13:24:48 +0100 | [diff] [blame] | 202 | [](const GemmArgs &args) { return args._fast_mode && args._ci->has_svebf16(); }, |
Georgios Pinitas | 4ee8b15 | 2021-07-16 16:16:43 +0100 | [diff] [blame] | 203 | [](const GemmArgs &args) { return GemmHybridIndirect<cls_sve_hybrid_fp32bf16fp32_mmla_6x4VL, float, float>::estimate_cycles<float>(args); }, |
| 204 | [](const GemmArgs &args) { return new GemmHybridIndirect<cls_sve_hybrid_fp32bf16fp32_mmla_6x4VL, float, float>(args); } |
| 205 | ), |
| 206 | GemmImplementation<float, float>::with_estimate( |
Georgios Pinitas | c0b6f76 | 2020-11-02 01:37:17 +0000 | [diff] [blame] | 207 | GemmMethod::GEMM_HYBRID, |
Georgios Pinitas | 4ee8b15 | 2021-07-16 16:16:43 +0100 | [diff] [blame] | 208 | "sve_hybrid_fp32bf16fp32_mmla_4x6VL", |
David Mansell | c22e126 | 2024-05-03 13:24:48 +0100 | [diff] [blame] | 209 | [](const GemmArgs &args) { return args._fast_mode && args._ci->has_svebf16(); }, |
Georgios Pinitas | 4ee8b15 | 2021-07-16 16:16:43 +0100 | [diff] [blame] | 210 | [](const GemmArgs &args) { return GemmHybridIndirect<cls_sve_hybrid_fp32bf16fp32_mmla_4x6VL, float, float>::estimate_cycles<float>(args); }, |
| 211 | [](const GemmArgs &args) { return new GemmHybridIndirect<cls_sve_hybrid_fp32bf16fp32_mmla_4x6VL, float, float>(args); } |
| 212 | ), |
| 213 | #endif // ARM_COMPUTE_ENABLE_BF16 |
| 214 | #ifdef ARM_COMPUTE_ENABLE_SVEF32MM |
| 215 | // MMLA next due to higher throughput (which is SVE only) |
| 216 | // Prefer this in all cases, except if fast mode is requested and BF16 is available. |
Georgios Pinitas | 5aa1a0b | 2020-07-02 20:02:20 +0100 | [diff] [blame] | 217 | { |
| 218 | GemmMethod::GEMM_INTERLEAVED, |
Georgios Pinitas | c0b6f76 | 2020-11-02 01:37:17 +0000 | [diff] [blame] | 219 | "sve_interleaved_fp32_mmla_8x3VL", |
Michalis Spyrou | 20fca52 | 2021-06-07 14:23:57 +0100 | [diff] [blame] | 220 | [](const GemmArgs &args) { return args._ci->has_svef32mm() && (args._Ksize>4); }, |
Georgios Pinitas | 4ee8b15 | 2021-07-16 16:16:43 +0100 | [diff] [blame] | 221 | [](const GemmArgs &args) { return !(args._fast_mode && args._ci->has_bf16()); }, |
Georgios Pinitas | c0b6f76 | 2020-11-02 01:37:17 +0000 | [diff] [blame] | 222 | [](const GemmArgs &args) { return new GemmInterleaved<cls_sve_interleaved_fp32_mmla_8x3VL, float, float>(args); } |
Georgios Pinitas | 5aa1a0b | 2020-07-02 20:02:20 +0100 | [diff] [blame] | 223 | }, |
Georgios Pinitas | 4ee8b15 | 2021-07-16 16:16:43 +0100 | [diff] [blame] | 224 | #endif // ARM_COMPUTE_ENABLE_SVEF32MM |
| 225 | // SVE kernels |
Georgios Pinitas | c7b183a | 2020-03-06 18:12:09 +0000 | [diff] [blame] | 226 | { |
| 227 | GemmMethod::GEMM_HYBRID, |
Georgios Pinitas | c0b6f76 | 2020-11-02 01:37:17 +0000 | [diff] [blame] | 228 | "sve_hybrid_fp32_mla_8x1VL", |
Pablo Marquez Tello | a50f193 | 2021-03-08 17:27:05 +0000 | [diff] [blame] | 229 | [](const GemmArgs &args) { return args._ci->has_sve(); }, |
Georgios Pinitas | 4ee8b15 | 2021-07-16 16:16:43 +0100 | [diff] [blame] | 230 | [](const GemmArgs &args) { return (args._Nsize < 12); }, |
Georgios Pinitas | c0b6f76 | 2020-11-02 01:37:17 +0000 | [diff] [blame] | 231 | [](const GemmArgs &args) { return new GemmHybridIndirect<cls_sve_hybrid_fp32_mla_8x1VL, float, float>(args); } |
| 232 | }, |
Georgios Pinitas | 4ee8b15 | 2021-07-16 16:16:43 +0100 | [diff] [blame] | 233 | GemmImplementation<float, float>::with_estimate( |
Georgios Pinitas | c0b6f76 | 2020-11-02 01:37:17 +0000 | [diff] [blame] | 234 | GemmMethod::GEMM_HYBRID, |
| 235 | "sve_hybrid_fp32_mla_6x4VL", |
Pablo Marquez Tello | a50f193 | 2021-03-08 17:27:05 +0000 | [diff] [blame] | 236 | [](const GemmArgs &args) { return args._ci->has_sve(); }, |
Georgios Pinitas | 4ee8b15 | 2021-07-16 16:16:43 +0100 | [diff] [blame] | 237 | [](const GemmArgs &args) { return GemmHybridIndirect<cls_sve_hybrid_fp32_mla_6x4VL, float, float>::estimate_cycles<float>(args); }, |
Georgios Pinitas | c0b6f76 | 2020-11-02 01:37:17 +0000 | [diff] [blame] | 238 | [](const GemmArgs &args) { return new GemmHybridIndirect<cls_sve_hybrid_fp32_mla_6x4VL, float, float>(args); } |
Georgios Pinitas | 4ee8b15 | 2021-07-16 16:16:43 +0100 | [diff] [blame] | 239 | ), |
| 240 | GemmImplementation<float, float>::with_estimate( |
| 241 | GemmMethod::GEMM_INTERLEAVED, |
| 242 | "sve_interleaved_fp32_mla_8x3VL", |
| 243 | [](const GemmArgs &args) { return args._ci->has_sve(); }, |
| 244 | [](const GemmArgs &args) { return GemmInterleaved<cls_sve_interleaved_fp32_mla_8x3VL, float, float>::estimate_cycles<float>(args); }, |
| 245 | [](const GemmArgs &args) { return new GemmInterleaved<cls_sve_interleaved_fp32_mla_8x3VL, float, float>(args); } |
| 246 | ), |
Francesco Petrogalli | 553f695 | 2022-06-30 10:22:01 +0000 | [diff] [blame] | 247 | #ifdef ARM_COMPUTE_ENABLE_FIXED_FORMAT_KERNELS |
Francesco.Petrogalli@arm.com | 5fcf22d | 2022-04-05 10:31:08 +0000 | [diff] [blame] | 248 | #ifdef ARM_COMPUTE_ENABLE_BF16 |
| 249 | GemmImplementation<float, float>::with_estimate( |
| 250 | GemmMethod::GEMM_INTERLEAVED, |
| 251 | "sve_ffinterleaved_bf16fp32_mmla_8x3VL", |
| 252 | KernelWeightFormat::VL2VL_BL64_BF16, |
| 253 | [](const GemmArgs &args) { return args._fast_mode && args._ci->has_svebf16(); }, |
| 254 | [](const GemmArgs &args) { return GemmInterleavedFixedFormat<cls_sve_ffinterleaved_bf16fp32_mmla_8x3VL, float, float>::estimate_cycles<float>(args); }, |
| 255 | [](const GemmArgs &args) { return new GemmInterleavedFixedFormat<cls_sve_ffinterleaved_bf16fp32_mmla_8x3VL, float, float>(args); } |
| 256 | ), |
| 257 | GemmImplementation<float, float>::with_estimate( |
| 258 | GemmMethod::GEMM_HYBRID, |
| 259 | "sve_ffhybrid_fp32bf16fp32_mmla_4x6VL", |
| 260 | KernelWeightFormat::VL2VL_BL64_BF16, |
| 261 | [](const GemmArgs &args) { return args._fast_mode && args._ci->has_svebf16(); }, |
| 262 | [](const GemmArgs &args) { return GemmHybridIndirectFixedFormat<cls_sve_ffhybrid_fp32bf16fp32_mmla_4x6VL, float, float>::estimate_cycles<float>(args); }, |
| 263 | [](const GemmArgs &args) { return new GemmHybridIndirectFixedFormat<cls_sve_ffhybrid_fp32bf16fp32_mmla_4x6VL, float, float>(args); } |
| 264 | ), |
| 265 | #endif |
| 266 | GemmImplementation<float, float>::with_estimate( |
| 267 | GemmMethod::GEMM_INTERLEAVED, |
| 268 | "sve_ffinterleaved_fp32_mla_8x3VL", |
| 269 | KernelWeightFormat::VL1VL_BL32, |
| 270 | [](const GemmArgs &args) { return args._ci->has_sve(); }, |
| 271 | [](const GemmArgs &args) { return GemmInterleavedFixedFormat<cls_sve_ffinterleaved_fp32_mla_8x3VL, float, float>::estimate_cycles<float>(args); }, |
| 272 | [](const GemmArgs &args) { return new GemmInterleavedFixedFormat<cls_sve_ffinterleaved_fp32_mla_8x3VL, float, float>(args); } |
| 273 | ), |
| 274 | GemmImplementation<float, float>::with_estimate( |
| 275 | GemmMethod::GEMM_HYBRID, |
| 276 | "sve_ffhybrid_fp32_mla_6x4VL", |
| 277 | KernelWeightFormat::VL1VL_BL32, |
| 278 | [](const GemmArgs &args) { return args._ci->has_sve(); }, |
| 279 | [](const GemmArgs &args) { return GemmHybridIndirectFixedFormat<cls_sve_ffhybrid_fp32_mla_6x4VL, float, float>::estimate_cycles<float>(args); }, |
| 280 | [](const GemmArgs &args) { return new GemmHybridIndirectFixedFormat<cls_sve_ffhybrid_fp32_mla_6x4VL, float, float>(args); } |
| 281 | ), |
Francesco Petrogalli | 553f695 | 2022-06-30 10:22:01 +0000 | [diff] [blame] | 282 | #endif // ARM_COMPUTE_ENABLE_FIXED_FORMAT_KERNELS |
Michalis Spyrou | 20fca52 | 2021-06-07 14:23:57 +0100 | [diff] [blame] | 283 | #endif // ARM_COMPUTE_ENABLE_SVE |
Michalis Spyrou | 778b95c | 2021-04-20 12:15:52 +0100 | [diff] [blame] | 284 | // Cortex-A35 specific kernel - use for any problem on A35, and never in any other cases. |
| 285 | { |
| 286 | GemmMethod::GEMM_INTERLEAVED, |
| 287 | "a64_sgemm_8x6", |
| 288 | nullptr, |
| 289 | [](const GemmArgs &args) { return args._ci->get_cpu_model() == CPUModel::A35; }, |
| 290 | [](const GemmArgs &args) { return new GemmInterleaved<cls_a64_sgemm_8x6, float, float>(args); } |
| 291 | }, |
Michele Di Giorgio | 33f41fa | 2021-03-09 14:09:08 +0000 | [diff] [blame] | 292 | // Arm® Neon™ hybrid methods |
Georgios Pinitas | 7cd26d4 | 2019-01-09 18:35:17 +0000 | [diff] [blame] | 293 | { |
| 294 | GemmMethod::GEMM_HYBRID, |
Georgios Pinitas | c0b6f76 | 2020-11-02 01:37:17 +0000 | [diff] [blame] | 295 | "a64_smallK_hybrid_fp32_mla_8x4", |
Radu Salavat | f1f1f87 | 2024-02-27 18:32:26 +0000 | [diff] [blame] | 296 | [](const GemmArgs &args) { return args._Ksize <= 8 && (args._Nsize % 4)==0 && !args._indirect_input && !args._accumulate; }, |
Georgios Pinitas | 48b3ef8 | 2019-10-14 19:03:09 +0100 | [diff] [blame] | 297 | nullptr, |
Georgios Pinitas | c0b6f76 | 2020-11-02 01:37:17 +0000 | [diff] [blame] | 298 | [](const GemmArgs &args) { return new GemmHybrid<cls_a64_smallK_hybrid_fp32_mla_8x4, float, float>(args); } |
Georgios Pinitas | 48b3ef8 | 2019-10-14 19:03:09 +0100 | [diff] [blame] | 299 | }, |
| 300 | { |
| 301 | GemmMethod::GEMM_HYBRID, |
Georgios Pinitas | c0b6f76 | 2020-11-02 01:37:17 +0000 | [diff] [blame] | 302 | "a64_smallK_hybrid_fp32_mla_6x4", |
Radu Salavat | f1f1f87 | 2024-02-27 18:32:26 +0000 | [diff] [blame] | 303 | [](const GemmArgs &args) { return (args._Ksize > 8 && args._Ksize <= 16) && (args._Nsize % 4)==0 && !args._indirect_input && !args._accumulate; }, |
Georgios Pinitas | 48b3ef8 | 2019-10-14 19:03:09 +0100 | [diff] [blame] | 304 | nullptr, |
Georgios Pinitas | c0b6f76 | 2020-11-02 01:37:17 +0000 | [diff] [blame] | 305 | [](const GemmArgs &args) { return new GemmHybrid<cls_a64_smallK_hybrid_fp32_mla_6x4, float, float>(args); } |
Georgios Pinitas | 7cd26d4 | 2019-01-09 18:35:17 +0000 | [diff] [blame] | 306 | }, |
| 307 | { |
Georgios Pinitas | 1461383 | 2019-03-01 19:07:11 +0000 | [diff] [blame] | 308 | GemmMethod::GEMM_HYBRID, |
Georgios Pinitas | c0b6f76 | 2020-11-02 01:37:17 +0000 | [diff] [blame] | 309 | "a64_hybrid_fp32_mla_8x4", |
| 310 | nullptr, |
Michalis Spyrou | 71ac903 | 2019-11-14 14:31:44 +0000 | [diff] [blame] | 311 | [](const GemmArgs &args) { return (args._Nsize < 12); }, |
Georgios Pinitas | c0b6f76 | 2020-11-02 01:37:17 +0000 | [diff] [blame] | 312 | [](const GemmArgs &args) { return new GemmHybridIndirect<cls_a64_hybrid_fp32_mla_8x4, float, float>(args); } |
Michalis Spyrou | 71ac903 | 2019-11-14 14:31:44 +0000 | [diff] [blame] | 313 | }, |
David Mansell | 318c9f4 | 2020-07-08 13:28:45 +0100 | [diff] [blame] | 314 | GemmImplementation<float, float>::with_estimate( |
Michalis Spyrou | 71ac903 | 2019-11-14 14:31:44 +0000 | [diff] [blame] | 315 | GemmMethod::GEMM_HYBRID, |
Georgios Pinitas | 4ee8b15 | 2021-07-16 16:16:43 +0100 | [diff] [blame] | 316 | "a64_hybrid_fp32_mla_4x24", |
| 317 | nullptr, |
| 318 | [](const GemmArgs &args) { return GemmHybridIndirect<cls_a64_hybrid_fp32_mla_4x24, float, float>::estimate_cycles<float>(args); }, |
| 319 | [](const GemmArgs &args) { return new GemmHybridIndirect<cls_a64_hybrid_fp32_mla_4x24, float, float>(args); } |
| 320 | ), |
| 321 | GemmImplementation<float, float>::with_estimate( |
| 322 | GemmMethod::GEMM_HYBRID, |
Georgios Pinitas | c0b6f76 | 2020-11-02 01:37:17 +0000 | [diff] [blame] | 323 | "a64_hybrid_fp32_mla_6x16", |
| 324 | nullptr, |
Georgios Pinitas | 4ee8b15 | 2021-07-16 16:16:43 +0100 | [diff] [blame] | 325 | [](const GemmArgs &args) { return GemmHybridIndirect<cls_a64_hybrid_fp32_mla_6x16, float, float>::estimate_cycles<float>(args); }, |
Georgios Pinitas | c0b6f76 | 2020-11-02 01:37:17 +0000 | [diff] [blame] | 326 | [](const GemmArgs &args) { return new GemmHybridIndirect<cls_a64_hybrid_fp32_mla_6x16, float, float>(args); } |
David Mansell | 318c9f4 | 2020-07-08 13:28:45 +0100 | [diff] [blame] | 327 | ), |
David Mansell | 318c9f4 | 2020-07-08 13:28:45 +0100 | [diff] [blame] | 328 | GemmImplementation<float, float>::with_estimate( |
Georgios Pinitas | 7cd26d4 | 2019-01-09 18:35:17 +0000 | [diff] [blame] | 329 | GemmMethod::GEMM_INTERLEAVED, |
Georgios Pinitas | c0b6f76 | 2020-11-02 01:37:17 +0000 | [diff] [blame] | 330 | "a64_sgemm_8x12", |
Gian Marco Iodice | 463f976 | 2020-05-19 14:12:27 +0100 | [diff] [blame] | 331 | nullptr, |
Georgios Pinitas | 4ee8b15 | 2021-07-16 16:16:43 +0100 | [diff] [blame] | 332 | [](const GemmArgs &args) { return GemmInterleaved<cls_a64_sgemm_8x12, float, float>::estimate_cycles<float>(args); }, |
Georgios Pinitas | c0b6f76 | 2020-11-02 01:37:17 +0000 | [diff] [blame] | 333 | [](const GemmArgs &args) { return new GemmInterleaved<cls_a64_sgemm_8x12, float, float>(args); } |
David Mansell | 318c9f4 | 2020-07-08 13:28:45 +0100 | [diff] [blame] | 334 | ), |
Francesco Petrogalli | 553f695 | 2022-06-30 10:22:01 +0000 | [diff] [blame] | 335 | #ifdef ARM_COMPUTE_ENABLE_FIXED_FORMAT_KERNELS |
Francesco.Petrogalli@arm.com | 5fcf22d | 2022-04-05 10:31:08 +0000 | [diff] [blame] | 336 | #ifdef ARM_COMPUTE_ENABLE_BF16 |
| 337 | // "fast mode" (BF16) kernels |
| 338 | GemmImplementation<float, float>::with_estimate( |
| 339 | GemmMethod::GEMM_INTERLEAVED, |
| 340 | "a64_ffinterleaved_bf16fp32_mmla_8x12", |
| 341 | KernelWeightFormat::VL256_BL64_BF16, |
| 342 | [](const GemmArgs &args) { return args._fast_mode && args._ci->has_bf16(); }, |
| 343 | [](const GemmArgs &args) { return GemmInterleavedFixedFormat<cls_a64_ffinterleaved_bf16fp32_mmla_8x12, float, float>::estimate_cycles<float>(args); }, |
| 344 | [](const GemmArgs &args) { return new GemmInterleavedFixedFormat<cls_a64_ffinterleaved_bf16fp32_mmla_8x12, float, float>(args); } |
| 345 | ), |
| 346 | GemmImplementation<float, float>::with_estimate( |
| 347 | GemmMethod::GEMM_HYBRID, |
| 348 | "a64_ffhybrid_fp32bf16fp32_mmla_4x24", |
| 349 | KernelWeightFormat::VL256_BL64_BF16, |
| 350 | [](const GemmArgs &args) { return args._fast_mode && args._ci->has_bf16(); }, |
| 351 | [](const GemmArgs &args) { return GemmHybridIndirectFixedFormat<cls_a64_ffhybrid_fp32bf16fp32_mmla_4x24, float, float>::estimate_cycles<float>(args); }, |
| 352 | [](const GemmArgs &args) { return new GemmHybridIndirectFixedFormat<cls_a64_ffhybrid_fp32bf16fp32_mmla_4x24, float, float>(args); } |
| 353 | ), |
Milos Puzovic | 905786e | 2024-03-26 14:34:30 +0000 | [diff] [blame] | 354 | GemmImplementation<float, float>::with_estimate( |
| 355 | GemmMethod::GEMM_HYBRID, |
| 356 | "a64_ffhybrid_fp32bf16fp32_mmla_6x16", |
| 357 | KernelWeightFormat::VL256_BL64_BF16, |
| 358 | [](const GemmArgs &args) { return args._fast_mode && args._ci->has_bf16(); }, |
| 359 | [](const GemmArgs &args) { return GemmHybridIndirectFixedFormat<cls_a64_ffhybrid_fp32bf16fp32_mmla_6x16, float, float>::estimate_cycles<float>(args); }, |
| 360 | [](const GemmArgs &args) { return new GemmHybridIndirectFixedFormat<cls_a64_ffhybrid_fp32bf16fp32_mmla_6x16, float, float>(args); } |
| 361 | ), |
Francesco.Petrogalli@arm.com | 5fcf22d | 2022-04-05 10:31:08 +0000 | [diff] [blame] | 362 | #endif // BF16 |
| 363 | GemmImplementation<float, float>::with_estimate( |
| 364 | GemmMethod::GEMM_INTERLEAVED, |
| 365 | "a64_ffinterleaved_fp32_mla_8x12", |
| 366 | KernelWeightFormat::VL128_BL32, |
| 367 | nullptr, |
| 368 | [](const GemmArgs &args) { return GemmInterleavedFixedFormat<cls_a64_ffinterleaved_fp32_mla_8x12, float, float>::estimate_cycles<float>(args); }, |
| 369 | [](const GemmArgs &args) { return new GemmInterleavedFixedFormat<cls_a64_ffinterleaved_fp32_mla_8x12, float, float>(args); } |
| 370 | ), |
| 371 | GemmImplementation<float, float>::with_estimate( |
| 372 | GemmMethod::GEMM_HYBRID, |
| 373 | "a64_ffhybrid_fp32_mla_6x16", |
| 374 | KernelWeightFormat::VL128_BL32, |
| 375 | nullptr, |
| 376 | [](const GemmArgs &args) { return GemmHybridIndirectFixedFormat<cls_a64_ffhybrid_fp32_mla_6x16, float, float>::estimate_cycles<float>(args); }, |
| 377 | [](const GemmArgs &args) { return new GemmHybridIndirectFixedFormat<cls_a64_ffhybrid_fp32_mla_6x16, float, float>(args); } |
| 378 | ), |
Francesco Petrogalli | 553f695 | 2022-06-30 10:22:01 +0000 | [diff] [blame] | 379 | #endif // ARM_COMPUTE_ENABLE_FIXED_FORMAT_KERNELS |
David Mansell | e39334c | 2018-07-06 17:53:35 +0100 | [diff] [blame] | 380 | #endif // __aarch64__ |
| 381 | |
Georgios Pinitas | 7cd26d4 | 2019-01-09 18:35:17 +0000 | [diff] [blame] | 382 | #ifdef __arm__ |
Georgios Pinitas | cfa2bba | 2019-06-27 17:00:52 +0100 | [diff] [blame] | 383 | { |
Georgios Pinitas | 7cd26d4 | 2019-01-09 18:35:17 +0000 | [diff] [blame] | 384 | GemmMethod::GEMM_INTERLEAVED, |
| 385 | "sgemm_8x6", |
| 386 | nullptr, |
| 387 | nullptr, |
Georgios Pinitas | 48b3ef8 | 2019-10-14 19:03:09 +0100 | [diff] [blame] | 388 | [](const GemmArgs &args) { return new GemmInterleaved<sgemm_8x6, float, float>(args); } |
Georgios Pinitas | 7cd26d4 | 2019-01-09 18:35:17 +0000 | [diff] [blame] | 389 | }, |
| 390 | #endif // __arm__ |
| 391 | { |
| 392 | GemmMethod::DEFAULT, |
| 393 | "", |
| 394 | nullptr, |
| 395 | nullptr, |
| 396 | nullptr |
| 397 | } |
David Mansell | e39334c | 2018-07-06 17:53:35 +0100 | [diff] [blame] | 398 | }; |
| 399 | |
| 400 | /* Templated function to return this list. */ |
| 401 | template<> |
Georgios Pinitas | 7cd26d4 | 2019-01-09 18:35:17 +0000 | [diff] [blame] | 402 | const GemmImplementation<float, float> *gemm_implementation_list<float, float>() { |
| 403 | return gemm_fp32_methods; |
Pablo Tello | eb82fd2 | 2018-02-23 13:43:50 +0000 | [diff] [blame] | 404 | } |
| 405 | |
David Mansell | e39334c | 2018-07-06 17:53:35 +0100 | [diff] [blame] | 406 | /* Explicitly instantiate the external functions for these types. */ |
Georgios Pinitas | 48b3ef8 | 2019-10-14 19:03:09 +0100 | [diff] [blame] | 407 | template UniqueGemmCommon<float, float> gemm<float, float, Nothing>(const GemmArgs &args, const Nothing &); |
Francesco Petrogalli | 553f695 | 2022-06-30 10:22:01 +0000 | [diff] [blame] | 408 | template bool has_opt_gemm<float, float, Nothing>(WeightFormat &weight_format, const GemmArgs &args, const Nothing &); |
Francesco.Petrogalli@arm.com | 5fcf22d | 2022-04-05 10:31:08 +0000 | [diff] [blame] | 409 | template KernelDescription get_gemm_method<float, float, Nothing>(const GemmArgs &args, const Nothing &); |
Georgios Pinitas | 48b3ef8 | 2019-10-14 19:03:09 +0100 | [diff] [blame] | 410 | template std::vector<KernelDescription> get_compatible_kernels<float, float, Nothing> (const GemmArgs &args, const Nothing &); |
Pablo Tello | eb82fd2 | 2018-02-23 13:43:50 +0000 | [diff] [blame] | 411 | |
Georgios Pinitas | 1461383 | 2019-03-01 19:07:11 +0000 | [diff] [blame] | 412 | } // namespace arm_gemm |