blob: 9321bfccfd51d26abc72009762664669a7331f81 [file] [log] [blame]
Pablo Telloeb82fd22018-02-23 13:43:50 +00001/*
Georgios Pinitas14613832019-03-01 19:07:11 +00002 * Copyright (c) 2017-2019 ARM Limited.
Pablo Telloeb82fd22018-02-23 13:43:50 +00003 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24#ifdef __aarch64__
25
26#include "arm_gemm.hpp"
27#include "gemm_common.hpp"
David Manselle39334c2018-07-06 17:53:35 +010028#include "gemm_implementation.hpp"
Pablo Telloeb82fd22018-02-23 13:43:50 +000029#include "gemm_interleaved.hpp"
Georgios Pinitas1d480652019-01-23 11:24:50 +000030#include "gemm_hybrid.hpp"
Georgios Pinitas7cd26d42019-01-09 18:35:17 +000031#include "gemm_native.hpp"
Pablo Telloeb82fd22018-02-23 13:43:50 +000032
David Manselle39334c2018-07-06 17:53:35 +010033#include "kernels/a64_gemm_u16_12x8.hpp"
Anthony Barbier5f707732018-07-03 16:22:02 +010034#include "kernels/a64_gemm_u8_12x8.hpp"
David Manselle39334c2018-07-06 17:53:35 +010035#include "kernels/a64_gemm_u8_4x4.hpp"
Georgios Pinitas1d480652019-01-23 11:24:50 +000036#include "kernels/a64_hybrid_u8u32_dot_16x4.hpp"
Georgios Pinitas14613832019-03-01 19:07:11 +000037#include "kernels/sve_hybrid_u8u32_dot_4VLx4.hpp"
Georgios Pinitas421405b2018-10-26 19:05:32 +010038#include "kernels/sve_interleaved_u8u32_dot_3VLx8.hpp"
Georgios Pinitas7cd26d42019-01-09 18:35:17 +000039#include "kernels/sve_native_u8u32_dot_4VLx4.hpp"
Pablo Telloeb82fd22018-02-23 13:43:50 +000040
Anthony Barbier5f707732018-07-03 16:22:02 +010041namespace arm_gemm {
42
Georgios Pinitas7cd26d42019-01-09 18:35:17 +000043static const GemmImplementation<uint8_t, uint32_t> gemm_u8_methods[] = {
Georgios Pinitas421405b2018-10-26 19:05:32 +010044#ifdef __ARM_FEATURE_SVE
Georgios Pinitas7cd26d42019-01-09 18:35:17 +000045{
Georgios Pinitas14613832019-03-01 19:07:11 +000046 GemmMethod::GEMM_HYBRID,
47 "hybrid_u8u32_dot_4VLx4",
48 [](const GemmArgs<uint32_t> &args) { return args._Ksize>=16 && args._alpha==1 && !args._trA && !args._trB && args._pretransposed_hint; },
49 [](const GemmArgs<uint32_t> &args) { return ((args._Ksize <= 128) && (args._Nsize <= 128)) || ((args._nmulti > 1) && ((args._Msize / args._maxthreads) < 8)); },
50 [](const GemmArgs<uint32_t> &args) { return new GemmHybrid<hybrid_u8u32_dot_4VLx4, uint8_t, uint32_t>(args); }
51},
52{
Georgios Pinitas7cd26d42019-01-09 18:35:17 +000053 GemmMethod::GEMM_NATIVE,
54 "native_u8u32_dot_4VLx4",
55 [](const GemmArgs<uint32_t> &args) { return (args._Ksize>=16 && args._alpha==1 && !args._trA && !args._trB); },
56 [](const GemmArgs<uint32_t> &args) { return ((args._Ksize <= 128) && (args._Nsize <= 128)); },
57 [](const GemmArgs<uint32_t> &args) { return new GemmNative<native_u8u32_dot_4VLx4, uint8_t, uint32_t>(args); }
58},
59{
60 GemmMethod::GEMM_INTERLEAVED,
61 "interleaved_u8u32_dot_3VLx8",
62 [](const GemmArgs<uint32_t> &args) { return (args._Ksize>4); },
63 nullptr,
64 [](const GemmArgs<uint32_t> &args) { return new GemmInterleaved<interleaved_u8u32_dot_3VLx8, uint8_t, uint32_t>(args); }
65},
Georgios Pinitas421405b2018-10-26 19:05:32 +010066#endif
Georgios Pinitas7cd26d42019-01-09 18:35:17 +000067{
Georgios Pinitas1d480652019-01-23 11:24:50 +000068 GemmMethod::GEMM_HYBRID,
69 "hybrid_u8u32_dot_16x4",
Georgios Pinitas14613832019-03-01 19:07:11 +000070 [](const GemmArgs<uint32_t> &args) { return args._ci->has_dotprod() && args._Ksize>=16 && !args._trA && !args._trB && args._pretransposed_hint; },
Georgios Pinitas1d480652019-01-23 11:24:50 +000071 [](const GemmArgs<uint32_t> &args) { return args._Nsize<=256 && args._Ksize>128; },
72 [](const GemmArgs<uint32_t> &args) { return new GemmHybrid<hybrid_u8u32_dot_16x4, uint8_t, uint32_t>(args); }
73},
74{
Georgios Pinitas7cd26d42019-01-09 18:35:17 +000075 GemmMethod::GEMM_INTERLEAVED,
76 "gemm_u8_12x8",
77 [](const GemmArgs<uint32_t> &args) { return args._ci->has_dotprod(); },
78 nullptr,
79 [](const GemmArgs<uint32_t> &args) { return new GemmInterleaved<gemm_u8_12x8, uint8_t, uint32_t>(args); }
80},
81{
82 GemmMethod::GEMM_INTERLEAVED,
83 "gemm_u8_4x4",
84 nullptr,
85 nullptr,
86 [](const GemmArgs<uint32_t> &args) { return new GemmInterleaved<gemm_u8_4x4, uint8_t, uint32_t>(args); }
87},
88{
89 GemmMethod::DEFAULT,
90 "",
91 nullptr,
92 nullptr,
93 nullptr
94}
David Manselle39334c2018-07-06 17:53:35 +010095};
96
97template<>
Georgios Pinitas7cd26d42019-01-09 18:35:17 +000098const GemmImplementation<uint8_t, uint32_t> *gemm_implementation_list<uint8_t, uint32_t>() {
David Manselle39334c2018-07-06 17:53:35 +010099 return gemm_u8_methods;
Pablo Telloeb82fd22018-02-23 13:43:50 +0000100}
101
David Manselle39334c2018-07-06 17:53:35 +0100102/* Explicitly instantiate the external functions for these types. */
Georgios Pinitas7cd26d42019-01-09 18:35:17 +0000103template UniqueGemmCommon<uint8_t, uint32_t> gemm<uint8_t, uint32_t>(const GemmArgs<uint32_t> &args);
104template KernelDescription get_gemm_method<uint8_t, uint32_t>(const GemmArgs<uint32_t> &args);
105template bool method_is_compatible<uint8_t, uint32_t>(GemmMethod method, const GemmArgs<uint32_t> &args);
Georgios Pinitas14613832019-03-01 19:07:11 +0000106template std::vector<KernelDescription> get_compatible_kernels<uint8_t, uint32_t> (const GemmArgs<uint32_t> &args);
David Manselle39334c2018-07-06 17:53:35 +0100107
Pablo Telloeb82fd22018-02-23 13:43:50 +0000108} // namespace arm_gemm
109
David Manselle39334c2018-07-06 17:53:35 +0100110#endif // __aarch64__