blob: 1d5b97b41a78e7e07f12d75eff071b775e7aaae4 [file] [log] [blame]
Pablo Telloeb82fd22018-02-23 13:43:50 +00001/*
Georgios Pinitas5aa1a0b2020-07-02 20:02:20 +01002 * Copyright (c) 2017-2020 Arm Limited.
Pablo Telloeb82fd22018-02-23 13:43:50 +00003 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24#include "arm_gemm.hpp"
25#include "gemm_common.hpp"
Georgios Pinitas7cd26d42019-01-09 18:35:17 +000026#include "gemm_hybrid.hpp"
David Manselle39334c2018-07-06 17:53:35 +010027#include "gemm_implementation.hpp"
Pablo Telloeb82fd22018-02-23 13:43:50 +000028#include "gemm_interleaved.hpp"
Joseph Dobson6f8b17d2020-02-11 19:32:11 +000029#include "gemm_interleaved_2d.hpp"
30#include "gemm_interleaved_pretransposed_2d.hpp"
Pablo Telloeb82fd22018-02-23 13:43:50 +000031#include "gemm_native.hpp"
David Mansellce8f6052018-05-17 18:51:26 +010032#include "gemv_batched.hpp"
Pablo Telloeb82fd22018-02-23 13:43:50 +000033#include "gemv_native_transposed.hpp"
34#include "gemv_pretransposed.hpp"
35
Anthony Barbier5f707732018-07-03 16:22:02 +010036#include "kernels/a32_sgemm_8x6.hpp"
Georgios Pinitas14613832019-03-01 19:07:11 +000037#include "kernels/a64_hybrid_fp32_mla_16x4.hpp"
Michalis Spyrou71ac9032019-11-14 14:31:44 +000038#include "kernels/a64_hybrid_fp32_mla_4x8.hpp"
Georgios Pinitas48b3ef82019-10-14 19:03:09 +010039#include "kernels/a64_native_fp32_mla_16x4.hpp"
40#include "kernels/a64_smallK_hybrid_fp32_mla_4x6.hpp"
41#include "kernels/a64_smallK_hybrid_fp32_mla_4x8.hpp"
Georgios Pinitas7cd26d42019-01-09 18:35:17 +000042#include "kernels/a64_sgemm_12x8.hpp"
Georgios Pinitas7cd26d42019-01-09 18:35:17 +000043#include "kernels/a64_sgemv_pretransposed.hpp"
44#include "kernels/a64_sgemv_trans.hpp"
Pablo Telloeb82fd22018-02-23 13:43:50 +000045
Georgios Pinitas7cd26d42019-01-09 18:35:17 +000046#include "kernels/sve_hybrid_fp32_mla_4VLx4.hpp"
Georgios Pinitas5aa1a0b2020-07-02 20:02:20 +010047#include "kernels/sve_hybrid_fp32_mmla_4VLx4.hpp"
Georgios Pinitas421405b2018-10-26 19:05:32 +010048#include "kernels/sve_interleaved_fp32_mla_3VLx8.hpp"
Georgios Pinitas5aa1a0b2020-07-02 20:02:20 +010049#include "kernels/sve_interleaved_fp32_mmla_3VLx8.hpp"
Georgios Pinitas7cd26d42019-01-09 18:35:17 +000050#include "kernels/sve_native_fp32_mla_4VLx4.hpp"
Georgios Pinitasc7b183a2020-03-06 18:12:09 +000051#include "kernels/sve_smallK_hybrid_fp32_mla_1VLx8.hpp"
Georgios Pinitas421405b2018-10-26 19:05:32 +010052
Anthony Barbier5f707732018-07-03 16:22:02 +010053namespace arm_gemm {
54
Georgios Pinitas7cd26d42019-01-09 18:35:17 +000055static const GemmImplementation<float, float> gemm_fp32_methods[] =
56{
57{
58 GemmMethod::GEMV_BATCHED,
59 "gemv_batched",
Georgios Pinitas48b3ef82019-10-14 19:03:09 +010060 [](const GemmArgs &args) { return (args._Msize==1) && (args._nbatches>1); },
Georgios Pinitas7cd26d42019-01-09 18:35:17 +000061 nullptr,
Georgios Pinitas48b3ef82019-10-14 19:03:09 +010062 [](const GemmArgs &args) { return new GemvBatched<float, float>(args); }
Georgios Pinitas7cd26d42019-01-09 18:35:17 +000063},
64#ifdef __aarch64__
65{
66 GemmMethod::GEMV_PRETRANSPOSED,
67 "sgemv_pretransposed",
Georgios Pinitas48b3ef82019-10-14 19:03:09 +010068 [](const GemmArgs &args) { return (args._Msize==1 && args._pretransposed_hint && args._nbatches==1); },
Georgios Pinitas7cd26d42019-01-09 18:35:17 +000069 nullptr,
Georgios Pinitas48b3ef82019-10-14 19:03:09 +010070 [](const GemmArgs &args) { return new GemvPretransposed<sgemv_pretransposed, float, float>(args); }
Georgios Pinitas7cd26d42019-01-09 18:35:17 +000071},
72{
73 GemmMethod::GEMV_NATIVE_TRANSPOSED,
74 "sgemv_trans",
Georgios Pinitas48b3ef82019-10-14 19:03:09 +010075 [](const GemmArgs &args) { return (args._Msize==1 && !args._trA && !args._trB && args._nbatches==1); },
Georgios Pinitas7cd26d42019-01-09 18:35:17 +000076 nullptr,
Georgios Pinitas48b3ef82019-10-14 19:03:09 +010077 [](const GemmArgs &args) { return new GemvNativeTransposed<sgemv_trans, float, float>(args); }
Georgios Pinitas7cd26d42019-01-09 18:35:17 +000078},
David Manselle39334c2018-07-06 17:53:35 +010079
Georgios Pinitas5aa1a0b2020-07-02 20:02:20 +010080#if defined(__ARM_FEATURE_SVE) && defined(MMLA_FP32)
81{
82 GemmMethod::GEMM_HYBRID,
83 "hybrid_fp32_mmla_4VLx4",
84 [](const GemmArgs &args) { return (args._Ksize >= 4) && !args._trA && args._pretransposed_hint; },
85 [](const GemmArgs &args) { return ((args._Ksize <= 256) && (args._Nsize <= 256)) || ((args._nmulti > 1) && ((args._Msize / args._maxthreads) < 8)); },
86 [](const GemmArgs &args) { return new GemmHybrid<hybrid_fp32_mmla_4VLx4, float, float>(args); }
87},
88{
89 GemmMethod::GEMM_INTERLEAVED,
90 "interleaved_fp32_mmla_3VLx8",
91 [](const GemmArgs &args) { return (args._Ksize>4); },
92 nullptr,
93 [](const GemmArgs &args) { return new GemmInterleaved<interleaved_fp32_mmla_3VLx8, float, float>(args); }
94},
95#endif // __ARM_FEATURE_SVE && MMLA_FP32
96
Georgios Pinitas7cd26d42019-01-09 18:35:17 +000097#ifdef __ARM_FEATURE_SVE
Georgios Pinitasc7b183a2020-03-06 18:12:09 +000098// SVE smallk / native / hybrid methods
99{
100 GemmMethod::GEMM_HYBRID,
101 "smallK_hybrid_fp32_mla_1VLx8",
102 [](const GemmArgs &args) { return (args._Ksize <= 24) && !args._trA && args._pretransposed_hint; },
103 nullptr,
104 [](const GemmArgs &args) { return new GemmHybrid<smallK_hybrid_fp32_mla_1VLx8, float, float>(args); }
105},
Georgios Pinitas7cd26d42019-01-09 18:35:17 +0000106{
107 GemmMethod::GEMM_HYBRID,
108 "hybrid_fp32_mla_4VLx4",
Georgios Pinitas48b3ef82019-10-14 19:03:09 +0100109 [](const GemmArgs &args) { return (args._Ksize >= 4) && !args._trA && args._pretransposed_hint; },
110 [](const GemmArgs &args) { return ((args._Ksize <= 256) && (args._Nsize <= 256)) || ((args._nmulti > 1) && ((args._Msize / args._maxthreads) < 8)); },
111 [](const GemmArgs &args) { return new GemmHybrid<hybrid_fp32_mla_4VLx4, float, float>(args); }
Georgios Pinitas7cd26d42019-01-09 18:35:17 +0000112},
113{
114 GemmMethod::GEMM_NATIVE,
115 "native_fp32_mla_4VLx4",
Georgios Pinitas48b3ef82019-10-14 19:03:09 +0100116 [](const GemmArgs &args) { return (args._Ksize>4 && !args._trA && !args._trB); },
117 [](const GemmArgs &args) { return ((args._Ksize <= 128) && (args._Nsize <= 128)) || ((args._nmulti > 1) && ((args._Msize / args._maxthreads) < 8)); },
118 [](const GemmArgs &args) { return new GemmNative<native_fp32_mla_4VLx4, float, float>(args); }
Georgios Pinitas7cd26d42019-01-09 18:35:17 +0000119},
120#endif // __ARM_FEATURE_SVE
Pablo Telloeb82fd22018-02-23 13:43:50 +0000121
Georgios Pinitas7cd26d42019-01-09 18:35:17 +0000122// NEON native / hybrid methods
123{
124 GemmMethod::GEMM_HYBRID,
Georgios Pinitas48b3ef82019-10-14 19:03:09 +0100125 "smallK_hybrid_fp32_mla_4x8",
126 [](const GemmArgs &args) { return (args._Ksize <= 8) && (args._Nsize % 4)==0 && !args._trA && args._pretransposed_hint; },
127 nullptr,
128 [](const GemmArgs &args) { return new GemmHybrid<smallK_hybrid_fp32_mla_4x8, float, float>(args); }
129},
130{
131 GemmMethod::GEMM_HYBRID,
132 "smallK_hybrid_fp32_mla_4x6",
133 [](const GemmArgs &args) { return (args._Ksize > 8) && (args._Ksize <= 16) && (args._Nsize % 4)==0 && !args._trA && args._pretransposed_hint; },
134 nullptr,
135 [](const GemmArgs &args) { return new GemmHybrid<smallK_hybrid_fp32_mla_4x6, float, float>(args); }
Georgios Pinitas7cd26d42019-01-09 18:35:17 +0000136},
137{
Georgios Pinitas14613832019-03-01 19:07:11 +0000138 GemmMethod::GEMM_HYBRID,
Michalis Spyrou71ac9032019-11-14 14:31:44 +0000139 "hybrid_fp32_mla_4x8_normal",
140 [](const GemmArgs &args) { return (args._Ksize >= 4) && !args._trA && args._pretransposed_hint; },
141 [](const GemmArgs &args) { return (args._Nsize < 12); },
142 [](const GemmArgs &args) { return new GemmHybrid<hybrid_fp32_mla_4x8, float, float>(args); }
143},
144{
145 GemmMethod::GEMM_HYBRID,
Georgios Pinitas5aa1a0b2020-07-02 20:02:20 +0100146 "hybrid_fp32_mla_16x4",
Georgios Pinitas48b3ef82019-10-14 19:03:09 +0100147 [](const GemmArgs &args) { return (args._Ksize >= 4) && !args._trA && args._pretransposed_hint; },
Michalis Spyrou71ac9032019-11-14 14:31:44 +0000148 [](const GemmArgs &args) { return ((args._Ksize <= 256) && (args._Nsize <= 256)) || (args._Msize < 16) || (args._nmulti > 1); },
Georgios Pinitas48b3ef82019-10-14 19:03:09 +0100149 [](const GemmArgs &args) { return new GemmHybrid<hybrid_fp32_mla_16x4, float, float>(args); }
Georgios Pinitas14613832019-03-01 19:07:11 +0000150},
151{
Georgios Pinitas7cd26d42019-01-09 18:35:17 +0000152 GemmMethod::GEMM_NATIVE,
Georgios Pinitas48b3ef82019-10-14 19:03:09 +0100153 "native_fp32_mla_16x4",
154 [](const GemmArgs &args) { return (args._Ksize>4 && (args._Nsize % 16)==0 && !args._trA && !args._trB); },
155 [](const GemmArgs &args) { return ((args._Ksize <= 128) && (args._Nsize <= 128)) || ((args._nmulti > 1) && ((args._Msize / args._maxthreads) < 8)); },
156 [](const GemmArgs &args) { return new GemmNative<native_fp32_mla_16x4, float, float>(args); }
Georgios Pinitas7cd26d42019-01-09 18:35:17 +0000157},
Pablo Telloeb82fd22018-02-23 13:43:50 +0000158
Georgios Pinitas7cd26d42019-01-09 18:35:17 +0000159#ifdef __ARM_FEATURE_SVE
Georgios Pinitascfa2bba2019-06-27 17:00:52 +0100160{
Georgios Pinitas7cd26d42019-01-09 18:35:17 +0000161 GemmMethod::GEMM_INTERLEAVED,
162 "interleaved_fp32_mla_3VLx8",
Georgios Pinitas48b3ef82019-10-14 19:03:09 +0100163 [](const GemmArgs &args) { return (args._Ksize>4); },
Georgios Pinitas7cd26d42019-01-09 18:35:17 +0000164 nullptr,
Georgios Pinitas48b3ef82019-10-14 19:03:09 +0100165 [](const GemmArgs &args) { return new GemmInterleaved<interleaved_fp32_mla_3VLx8, float, float>(args); }
Georgios Pinitas7cd26d42019-01-09 18:35:17 +0000166},
167#endif // __ARM_FEATURE_SVE
Georgios Pinitas5aa1a0b2020-07-02 20:02:20 +0100168// Pretranposed, 2D split
Joseph Dobson6f8b17d2020-02-11 19:32:11 +0000169{
170 GemmMethod::GEMM_INTERLEAVED_2D,
Gian Marco Iodice463f9762020-05-19 14:12:27 +0100171 "sgemm_12x8_pretranspose_2d",
Joseph Dobson6f8b17d2020-02-11 19:32:11 +0000172 [](const GemmArgs &args) { return args._pretransposed_hint; },
Gian Marco Iodice463f9762020-05-19 14:12:27 +0100173 [](const GemmArgs &args) { return args._maxthreads >= 8; },
Joseph Dobson6f8b17d2020-02-11 19:32:11 +0000174 [](const GemmArgs &args) { return new GemmInterleavedPretransposed2d<sgemm_12x8, float, float>(args); }
175},
Georgios Pinitas5aa1a0b2020-07-02 20:02:20 +0100176// Non-pretransposed, 2D split (no buffer manager)
Joseph Dobson6f8b17d2020-02-11 19:32:11 +0000177{
178 GemmMethod::GEMM_INTERLEAVED_2D,
Gian Marco Iodice463f9762020-05-19 14:12:27 +0100179 "sgemm_12x8_2d",
180 nullptr,
181 [](const GemmArgs &args) { return (!args._pretransposed_hint) && (args._maxthreads >= 8); },
Joseph Dobson6f8b17d2020-02-11 19:32:11 +0000182 [](const GemmArgs &args) { return new GemmInterleaved2d<sgemm_12x8, float, float>(args); }
183},
Georgios Pinitas5aa1a0b2020-07-02 20:02:20 +0100184// 1D split (with pretransposed or not)
Georgios Pinitas7cd26d42019-01-09 18:35:17 +0000185{
186 GemmMethod::GEMM_INTERLEAVED,
Gian Marco Iodice463f9762020-05-19 14:12:27 +0100187 "sgemm_12x8_1d",
188 nullptr,
189 nullptr,
Georgios Pinitas48b3ef82019-10-14 19:03:09 +0100190 [](const GemmArgs &args) { return new GemmInterleaved<sgemm_12x8, float, float>(args); }
Georgios Pinitas7cd26d42019-01-09 18:35:17 +0000191},
David Manselle39334c2018-07-06 17:53:35 +0100192#endif // __aarch64__
193
Georgios Pinitas7cd26d42019-01-09 18:35:17 +0000194#ifdef __arm__
Georgios Pinitascfa2bba2019-06-27 17:00:52 +0100195{
Georgios Pinitas7cd26d42019-01-09 18:35:17 +0000196 GemmMethod::GEMM_INTERLEAVED,
197 "sgemm_8x6",
198 nullptr,
199 nullptr,
Georgios Pinitas48b3ef82019-10-14 19:03:09 +0100200 [](const GemmArgs &args) { return new GemmInterleaved<sgemm_8x6, float, float>(args); }
Georgios Pinitas7cd26d42019-01-09 18:35:17 +0000201},
202#endif // __arm__
203{
204 GemmMethod::DEFAULT,
205 "",
206 nullptr,
207 nullptr,
208 nullptr
209}
David Manselle39334c2018-07-06 17:53:35 +0100210};
211
212/* Templated function to return this list. */
213template<>
Georgios Pinitas7cd26d42019-01-09 18:35:17 +0000214const GemmImplementation<float, float> *gemm_implementation_list<float, float>() {
215 return gemm_fp32_methods;
Pablo Telloeb82fd22018-02-23 13:43:50 +0000216}
217
David Manselle39334c2018-07-06 17:53:35 +0100218/* Explicitly instantiate the external functions for these types. */
Georgios Pinitas48b3ef82019-10-14 19:03:09 +0100219template UniqueGemmCommon<float, float> gemm<float, float, Nothing>(const GemmArgs &args, const Nothing &);
220template KernelDescription get_gemm_method<float, float, Nothing>(const GemmArgs &args, const Nothing &);
221template std::vector<KernelDescription> get_compatible_kernels<float, float, Nothing> (const GemmArgs &args, const Nothing &);
Pablo Telloeb82fd22018-02-23 13:43:50 +0000222
Georgios Pinitas14613832019-03-01 19:07:11 +0000223} // namespace arm_gemm