blob: 7c860a24a11c55bfb90326f2fefc14588f65c9e3 [file] [log] [blame]
Michalis Spyrou71ac9032019-11-14 14:31:44 +00001/*
David Manselle6955792024-01-12 11:08:56 +00002 * Copyright (c) 2019-2020, 2022-2024 Arm Limited.
Michalis Spyrou71ac9032019-11-14 14:31:44 +00003 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24#ifdef __aarch64__
25
26#include "arm_gemm.hpp"
27
Georgios Pinitasc0b6f762020-11-02 01:37:17 +000028#include "kernels/a64_gemm_s16_8x12.hpp"
29#include "kernels/a64_gemm_s8_4x4.hpp"
30#include "kernels/a64_gemm_s8_8x12.hpp"
31#include "kernels/a64_hybrid_s8qa_dot_4x16.hpp"
Georgios Pinitas4ee8b152021-07-16 16:16:43 +010032#include "kernels/a64_hybrid_s8qa_mmla_4x16.hpp"
Georgios Pinitasc0b6f762020-11-02 01:37:17 +000033#include "kernels/a64_hybrid_s8qs_dot_6x16.hpp"
Georgios Pinitas4ee8b152021-07-16 16:16:43 +010034#include "kernels/a64_hybrid_s8qs_mmla_6x16.hpp"
Georgios Pinitasc0b6f762020-11-02 01:37:17 +000035#include "kernels/a64_hybrid_s8s32_dot_6x16.hpp"
Georgios Pinitas4ee8b152021-07-16 16:16:43 +010036#include "kernels/a64_hybrid_s8s32_mmla_6x16.hpp"
Georgios Pinitasc0b6f762020-11-02 01:37:17 +000037#include "kernels/a64_interleaved_s8s32_mmla_8x12.hpp"
38#include "kernels/a64_smallK_hybrid_s8s32_dot_6x4.hpp"
39#include "kernels/a64_smallK_hybrid_s8s32_dot_8x4.hpp"
Michalis Spyrou71ac9032019-11-14 14:31:44 +000040
Viet-Hoa Do03b29712022-06-01 11:47:14 +010041#ifdef ARM_COMPUTE_ENABLE_SVE
42#ifdef ARM_COMPUTE_ENABLE_SME2
43#include "kernels/sme2_gemv_s8qa_dot_16VL.hpp"
44#include "kernels/sme2_interleaved_nomerge_s8q_mopa_1VLx4VL.hpp"
45#include "kernels/sme2_interleaved_nomerge_s8q_mopa_2VLx2VL.hpp"
46#include "kernels/sme2_interleaved_nomerge_s8q_mopa_4VLx1VL.hpp"
47#endif // ARM_COMPUTE_ENABLE_SME2
48
Georgios Pinitasc0b6f762020-11-02 01:37:17 +000049#include "kernels/sve_hybrid_s8qa_dot_4x4VL.hpp"
Georgios Pinitas4ee8b152021-07-16 16:16:43 +010050#include "kernels/sve_hybrid_s8qa_mmla_4x4VL.hpp"
Georgios Pinitasc0b6f762020-11-02 01:37:17 +000051#include "kernels/sve_hybrid_s8qs_dot_6x4VL.hpp"
Georgios Pinitas4ee8b152021-07-16 16:16:43 +010052#include "kernels/sve_hybrid_s8qs_mmla_6x4VL.hpp"
53#include "kernels/sve_hybrid_s8s32_dot_6x4VL.hpp"
54#include "kernels/sve_hybrid_s8s32_mmla_6x4VL.hpp"
Georgios Pinitasc0b6f762020-11-02 01:37:17 +000055#include "kernels/sve_interleaved_s8s32_dot_8x3VL.hpp"
56#include "kernels/sve_interleaved_s8s32_mmla_8x3VL.hpp"
Viet-Hoa Do03b29712022-06-01 11:47:14 +010057#endif // ARM_COMPUTE_ENABLE_SVE
Georgios Pinitasc0b6f762020-11-02 01:37:17 +000058
59#include "gemm_hybrid_indirect.hpp"
Michalis Spyrou71ac9032019-11-14 14:31:44 +000060#include "gemm_hybrid_quantized.hpp"
Georgios Pinitasc0b6f762020-11-02 01:37:17 +000061#include "gemm_hybrid_quantized_inline.hpp"
62#include "gemm_interleaved.hpp"
Viet-Hoa Do03b29712022-06-01 11:47:14 +010063#include "gemv_pretransposed.hpp"
Michalis Spyrou71ac9032019-11-14 14:31:44 +000064#include "quantize_wrapper.hpp"
Georgios Pinitasc0b6f762020-11-02 01:37:17 +000065#include "utils.hpp"
Michalis Spyrou71ac9032019-11-14 14:31:44 +000066
67namespace arm_gemm {
68
69static const GemmImplementation<int8_t, int8_t, Requantize32> gemm_qint8_methods[] =
70{
Michalis Spyrou20fca522021-06-07 14:23:57 +010071#ifdef ARM_COMPUTE_ENABLE_SVE
Viet-Hoa Do03b29712022-06-01 11:47:14 +010072#ifdef ARM_COMPUTE_ENABLE_SME2
73{
74 GemmMethod::GEMM_HYBRID,
75 "sme2_gemv_s8qa_dot_16VL",
76 [](const GemmArgs &args, const Requantize32 &qp) { return args._ci->has_sme2() && quant_hybrid_asymmetric(qp) && args._Msize == 1 && !args._indirect_input && args._nbatches == 1; },
77 nullptr,
78 [](const GemmArgs &args, const Requantize32 &qp) { return new GemvPretransposed<cls_sme2_gemv_s8qa_dot_16VL, int8_t, int8_t, Requantize32>(args, qp); }
79},
80{
81 GemmMethod::GEMM_INTERLEAVED,
82 "sme2_interleaved_nomerge_s8q_mopa_1VLx4VL",
David Manselle6955792024-01-12 11:08:56 +000083 [](const GemmArgs &args, const Requantize32 &qp) { return args._ci->has_sme2() && ((qp.per_channel_requant && (qp.per_channel_left_shifts == nullptr)) || (!qp.per_channel_requant && (qp.per_layer_left_shift == 0)));},
Viet-Hoa Do03b29712022-06-01 11:47:14 +010084 [](const GemmArgs &args, const Requantize32 &) { const auto VL = sme::get_vector_length<int32_t>();
85 return args._Msize <= VL || (2*VL < args._Msize && args._Msize <= 3*VL); },
86 [](const GemmArgs &args, const Requantize32 &qp) { return new GemmInterleavedPretransposedNoMergeQuantizedInline<cls_sme2_interleaved_nomerge_s8q_mopa_1VLx4VL, int8_t, int8_t>(args, qp); }
87},
88{
89 GemmMethod::GEMM_INTERLEAVED,
90 "sme2_interleaved_nomerge_s8q_mopa_4VLx1VL",
David Manselle6955792024-01-12 11:08:56 +000091 [](const GemmArgs &args, const Requantize32 &qp) { return args._ci->has_sme2() && ((qp.per_channel_requant && (qp.per_channel_left_shifts == nullptr)) || (!qp.per_channel_requant && (qp.per_layer_left_shift == 0)));},
Viet-Hoa Do03b29712022-06-01 11:47:14 +010092 [](const GemmArgs &args, const Requantize32 &) { const auto VL = sme::get_vector_length<int32_t>();
93 return args._Nsize <= VL || (2*VL < args._Nsize && args._Nsize <= 3*VL); },
94 [](const GemmArgs &args, const Requantize32 &qp) { return new GemmInterleavedPretransposedNoMergeQuantizedInline<cls_sme2_interleaved_nomerge_s8q_mopa_4VLx1VL, int8_t, int8_t>(args, qp); }
95},
96{
97 GemmMethod::GEMM_INTERLEAVED,
98 "sme2_interleaved_nomerge_s8q_mopa_2VLx2VL",
David Manselle6955792024-01-12 11:08:56 +000099 [](const GemmArgs &args, const Requantize32 &qp) { return args._ci->has_sme2() && ((qp.per_channel_requant && (qp.per_channel_left_shifts == nullptr)) || (!qp.per_channel_requant && (qp.per_layer_left_shift == 0)));},
Viet-Hoa Do03b29712022-06-01 11:47:14 +0100100 nullptr,
101 [](const GemmArgs &args, const Requantize32 &qp) { return new GemmInterleavedPretransposedNoMergeQuantizedInline<cls_sme2_interleaved_nomerge_s8q_mopa_2VLx2VL, int8_t, int8_t>(args, qp); }
102},
103#endif // ARM_COMPUTE_ENABLE_SME2
Georgios Pinitas4ee8b152021-07-16 16:16:43 +0100104GemmImplementation<int8_t, int8_t, Requantize32>::with_estimate(
105 GemmMethod::GEMM_HYBRID,
106 "sve_hybrid_s8qa_mmla_4x4VL",
107 [](const GemmArgs &args, const Requantize32 &qp) { return quant_hybrid_asymmetric(qp) && args._ci->has_sve2() && args._ci->has_svei8mm(); },
108 [](const GemmArgs &args, const Requantize32 &) { return GemmHybridIndirect<cls_sve_hybrid_s8qa_mmla_4x4VL, int8_t, int8_t, Requantize32>::estimate_cycles<int8_t>(args); },
109 [](const GemmArgs &args, const Requantize32 &qp) { return new GemmHybridIndirect<cls_sve_hybrid_s8qa_mmla_4x4VL, int8_t, int8_t, Requantize32>(args, qp); }
110),
111GemmImplementation<int8_t, int8_t, Requantize32>::with_estimate(
112 GemmMethod::GEMM_HYBRID,
113 "sve_hybrid_s8qs_mmla_6x4VL",
114 [](const GemmArgs &args, const Requantize32 &qp) { return quant_hybrid_symmetric(qp) && args._ci->has_sve2() && args._ci->has_svei8mm(); },
115 [](const GemmArgs &args, const Requantize32 &) { return GemmHybridIndirect<cls_sve_hybrid_s8qs_mmla_6x4VL, int8_t, int8_t, Requantize32>::estimate_cycles<int8_t>(args); },
116 [](const GemmArgs &args, const Requantize32 &qp) { return new GemmHybridIndirect<cls_sve_hybrid_s8qs_mmla_6x4VL, int8_t, int8_t, Requantize32>(args, qp); }
117),
118GemmImplementation<int8_t, int8_t, Requantize32>::with_estimate(
Georgios Pinitasc0b6f762020-11-02 01:37:17 +0000119 GemmMethod::GEMM_INTERLEAVED,
120 "sve_interleaved_s8s32_mmla_8x3VL",
Michalis Spyrou20fca522021-06-07 14:23:57 +0100121 [](const GemmArgs &args, const Requantize32 &) { return args._ci->has_svei8mm() && (args._Ksize>8); },
Georgios Pinitas4ee8b152021-07-16 16:16:43 +0100122 [](const GemmArgs &args, const Requantize32 &) { return GemmInterleavedQuantized<cls_sve_interleaved_s8s32_mmla_8x3VL, int8_t, int8_t>::estimate_cycles<int8_t>(args); },
Georgios Pinitasc0b6f762020-11-02 01:37:17 +0000123 [](const GemmArgs &args, const Requantize32 &qp) { return new GemmInterleavedQuantized<cls_sve_interleaved_s8s32_mmla_8x3VL, int8_t, int8_t>(args, qp); }
Georgios Pinitas4ee8b152021-07-16 16:16:43 +0100124),
125GemmImplementation<int8_t, int8_t, Requantize32>::with_estimate(
126 GemmMethod::GEMM_INTERLEAVED,
127 "sve_hybrid_s8s32_mmla_6x4VL",
128 [](const GemmArgs &args, const Requantize32 &) { return args._ci->has_svei8mm(); },
129 [](const GemmArgs &args, const Requantize32 &) { return GemmHybridIndirect<cls_sve_hybrid_s8s32_mmla_6x4VL, int8_t, int8_t, Requantize32, true>::estimate_cycles<int8_t>(args); },
130 [](const GemmArgs &args, const Requantize32 &qp) { return new GemmHybridIndirect<cls_sve_hybrid_s8s32_mmla_6x4VL, int8_t, int8_t, Requantize32, true>(args, qp); }
131),
Georgios Pinitas4ee8b152021-07-16 16:16:43 +0100132GemmImplementation<int8_t, int8_t, Requantize32>::with_estimate(
Georgios Pinitasc0b6f762020-11-02 01:37:17 +0000133 GemmMethod::GEMM_HYBRID,
134 "sve_hybrid_s8qs_dot_6x4VL",
Michalis Spyrou20fca522021-06-07 14:23:57 +0100135 [](const GemmArgs &args, const Requantize32 &qp) { return args._ci->has_sve2() && quant_hybrid_symmetric(qp); },
Georgios Pinitas4ee8b152021-07-16 16:16:43 +0100136 [](const GemmArgs &args, const Requantize32 &) { return GemmHybridIndirect<cls_sve_hybrid_s8qs_dot_6x4VL, int8_t, int8_t, Requantize32>::estimate_cycles<int8_t>(args); },
Georgios Pinitasc0b6f762020-11-02 01:37:17 +0000137 [](const GemmArgs &args, const Requantize32 &qp) { return new GemmHybridIndirect<cls_sve_hybrid_s8qs_dot_6x4VL, int8_t, int8_t, Requantize32>(args, qp); }
Georgios Pinitas4ee8b152021-07-16 16:16:43 +0100138),
139GemmImplementation<int8_t, int8_t, Requantize32>::with_estimate(
Georgios Pinitasc0b6f762020-11-02 01:37:17 +0000140 GemmMethod::GEMM_HYBRID,
141 "sve_hybrid_s8qa_dot_4x4VL",
Georgios Pinitas4ee8b152021-07-16 16:16:43 +0100142 [](const GemmArgs &args, const Requantize32 &qp) { return args._ci->has_sve2() && quant_hybrid_asymmetric(qp); },
143 [](const GemmArgs &args, const Requantize32 &) { return GemmHybridIndirect<cls_sve_hybrid_s8qa_dot_4x4VL, int8_t, int8_t, Requantize32>::estimate_cycles<int8_t>(args); },
Georgios Pinitasc0b6f762020-11-02 01:37:17 +0000144 [](const GemmArgs &args, const Requantize32 &qp) { return new GemmHybridIndirect<cls_sve_hybrid_s8qa_dot_4x4VL, int8_t, int8_t, Requantize32>(args, qp); }
Georgios Pinitas4ee8b152021-07-16 16:16:43 +0100145),
146GemmImplementation<int8_t, int8_t, Requantize32>::with_estimate(
Georgios Pinitasc0b6f762020-11-02 01:37:17 +0000147 GemmMethod::GEMM_HYBRID,
148 "sve_hybrid_s8s32_dot_6x4VL",
Georgios Pinitas4ee8b152021-07-16 16:16:43 +0100149 [](const GemmArgs &args, const Requantize32 &) { return args._ci->has_sve(); },
150 [](const GemmArgs &args, const Requantize32 &) { return GemmHybridIndirect<cls_sve_hybrid_s8s32_dot_6x4VL, int8_t, int8_t, Requantize32, true>::estimate_cycles<int8_t>(args); },
Georgios Pinitasc0b6f762020-11-02 01:37:17 +0000151 [](const GemmArgs &args, const Requantize32 &qp) { return new GemmHybridIndirect<cls_sve_hybrid_s8s32_dot_6x4VL, int8_t, int8_t, Requantize32, true>(args, qp); }
Georgios Pinitas4ee8b152021-07-16 16:16:43 +0100152),
153GemmImplementation<int8_t, int8_t, Requantize32>::with_estimate(
Georgios Pinitasc0b6f762020-11-02 01:37:17 +0000154 GemmMethod::GEMM_INTERLEAVED,
155 "sve_interleaved_s8s32_dot_8x3VL",
Pablo Marquez Telloa50f1932021-03-08 17:27:05 +0000156 [](const GemmArgs &args, const Requantize32 &) { return args._ci->has_sve() && (args._Ksize>4); },
Georgios Pinitas4ee8b152021-07-16 16:16:43 +0100157 [](const GemmArgs &args, const Requantize32 &) { return GemmInterleavedQuantized<cls_sve_interleaved_s8s32_dot_8x3VL, int8_t, int8_t>::estimate_cycles<int8_t>(args); },
Georgios Pinitasc0b6f762020-11-02 01:37:17 +0000158 [](const GemmArgs &args, const Requantize32 &qp) { return new GemmInterleavedQuantized<cls_sve_interleaved_s8s32_dot_8x3VL, int8_t, int8_t>(args, qp); }
Georgios Pinitas4ee8b152021-07-16 16:16:43 +0100159),
160#endif // ARM_COMPUTE_ENABLE_SVE
161GemmImplementation<int8_t, int8_t, Requantize32>::with_estimate(
162 GemmMethod::GEMM_HYBRID,
163 "a64_hybrid_s8qa_mmla_4x16",
164 [](const GemmArgs &args, const Requantize32 &qp) { return args._ci->has_i8mm() && quant_hybrid_asymmetric(qp); },
165 [](const GemmArgs &args, const Requantize32 &) { return GemmHybridIndirect<cls_a64_hybrid_s8qa_mmla_4x16, int8_t, int8_t, Requantize32>::estimate_cycles<int8_t>(args); },
166 [](const GemmArgs &args, const Requantize32 &qp) { return new GemmHybridIndirect<cls_a64_hybrid_s8qa_mmla_4x16, int8_t, int8_t, Requantize32>(args, qp); }
167),
168GemmImplementation<int8_t, int8_t, Requantize32>::with_estimate(
169 GemmMethod::GEMM_HYBRID,
170 "a64_hybrid_s8qs_mmla_6x16",
171 [](const GemmArgs &args, const Requantize32 &qp) { return args._ci->has_i8mm() && quant_hybrid_symmetric(qp); },
172 [](const GemmArgs &args, const Requantize32 &) { return GemmHybridIndirect<cls_a64_hybrid_s8qs_mmla_6x16, int8_t, int8_t, Requantize32>::estimate_cycles<int8_t>(args); },
173 [](const GemmArgs &args, const Requantize32 &qp) { return new GemmHybridIndirect<cls_a64_hybrid_s8qs_mmla_6x16, int8_t, int8_t, Requantize32>(args, qp); }
174),
175GemmImplementation<int8_t, int8_t, Requantize32>::with_estimate(
Georgios Pinitasc0b6f762020-11-02 01:37:17 +0000176 GemmMethod::GEMM_INTERLEAVED,
177 "a64_interleaved_s8s32_mmla_8x12",
Michalis Spyrou20fca522021-06-07 14:23:57 +0100178 [](const GemmArgs &args, const Requantize32 &) { return args._ci->has_i8mm() && (args._Ksize>8); },
Georgios Pinitas4ee8b152021-07-16 16:16:43 +0100179 [](const GemmArgs &args, const Requantize32 &) { return GemmInterleavedQuantized<cls_a64_interleaved_s8s32_mmla_8x12, int8_t, int8_t>::estimate_cycles<int8_t>(args); },
Georgios Pinitasc0b6f762020-11-02 01:37:17 +0000180 [](const GemmArgs &args, const Requantize32 &qp) { return new GemmInterleavedQuantized<cls_a64_interleaved_s8s32_mmla_8x12, int8_t, int8_t>(args, qp); }
Georgios Pinitas4ee8b152021-07-16 16:16:43 +0100181),
182GemmImplementation<int8_t, int8_t, Requantize32>::with_estimate(
183 GemmMethod::GEMM_INTERLEAVED,
184 "a64_hybrid_s8s32_mmla_6x16",
185 [](const GemmArgs &args, const Requantize32 &) { return args._ci->has_i8mm(); },
186 [](const GemmArgs &args, const Requantize32 &) { return GemmHybridIndirect<cls_a64_hybrid_s8s32_mmla_6x16, int8_t, int8_t, Requantize32, true>::estimate_cycles<int8_t>(args); },
187 [](const GemmArgs &args, const Requantize32 &qp) { return new GemmHybridIndirect<cls_a64_hybrid_s8s32_mmla_6x16, int8_t, int8_t, Requantize32, true>(args, qp); }
188),
Georgios Pinitasc0b6f762020-11-02 01:37:17 +0000189{
190 GemmMethod::GEMM_HYBRID_QUANTIZED,
191 "a64_smallK_hybrid_s8s32_dot_8x4",
192 [](const GemmArgs &args, const Requantize32 &) { return args._ci->has_dotprod() && (args._Nsize % 4 == 0) && (args._Ksize<=32) && !args._indirect_input; },
Gunes Bayire42a87f2021-09-13 13:24:38 +0100193 [](const GemmArgs &args, const Requantize32 &) { return !(args._ci->has_svei8mm() || args._ci->has_i8mm()); },
Georgios Pinitasc0b6f762020-11-02 01:37:17 +0000194 [](const GemmArgs &args, const Requantize32 &qp) { return new GemmHybridQuantized<cls_a64_smallK_hybrid_s8s32_dot_8x4, int8_t, int8_t>(args, qp); }
Michalis Spyrou71ac9032019-11-14 14:31:44 +0000195},
196{
197 GemmMethod::GEMM_HYBRID_QUANTIZED,
Georgios Pinitasc0b6f762020-11-02 01:37:17 +0000198 "a64_smallK_hybrid_s8s32_dot_6x4",
199 [](const GemmArgs &args, const Requantize32 &) { return args._ci->has_dotprod() && (args._Nsize % 4 == 0) && (args._Ksize>32) && (args._Ksize<=64) && !args._indirect_input; },
Gunes Bayire42a87f2021-09-13 13:24:38 +0100200 [](const GemmArgs &args, const Requantize32 &) { return !(args._ci->has_svei8mm() || args._ci->has_i8mm()); },
Georgios Pinitasc0b6f762020-11-02 01:37:17 +0000201 [](const GemmArgs &args, const Requantize32 &qp) { return new GemmHybridQuantized<cls_a64_smallK_hybrid_s8s32_dot_6x4, int8_t, int8_t>(args, qp); }
Michalis Spyrou71ac9032019-11-14 14:31:44 +0000202},
203{
Georgios Pinitasc0b6f762020-11-02 01:37:17 +0000204 GemmMethod::GEMM_INTERLEAVED,
205 "a64_gemm_s16_8x12",
Aleksandr Nikolaeva084b462020-06-25 12:25:52 +0100206 nullptr,
Georgios Pinitascd22cbf2020-12-02 16:06:01 +0000207 [](const GemmArgs &args, const Requantize32 &) { return args._ci->get_cpu_model() == CPUModel::A53 && ((args._Msize > 28) || ((args._Msize % 8) > 4)); },
Georgios Pinitasc0b6f762020-11-02 01:37:17 +0000208 [](const GemmArgs &args, const Requantize32 &qp) { return new GemmInterleavedQuantized<cls_a64_gemm_s16_8x12, int8_t, int8_t>(args, qp); }
209},
Georgios Pinitas33e03072021-01-14 13:43:40 +0000210GemmImplementation<int8_t, int8_t, Requantize32>::with_estimate(
Georgios Pinitasc0b6f762020-11-02 01:37:17 +0000211 GemmMethod::GEMM_HYBRID,
212 "a64_hybrid_s8qs_dot_6x16",
213 [](const GemmArgs &args, const Requantize32 &qp) { return args._ci->has_dotprod() && quant_hybrid_symmetric(qp); },
Georgios Pinitas4ee8b152021-07-16 16:16:43 +0100214 [](const GemmArgs &args, const Requantize32 &) { return GemmHybridIndirect<cls_a64_hybrid_s8qs_dot_6x16, int8_t, int8_t, Requantize32>::estimate_cycles<int8_t>(args); },
Georgios Pinitasc0b6f762020-11-02 01:37:17 +0000215 [](const GemmArgs &args, const Requantize32 &qp) { return new GemmHybridIndirect<cls_a64_hybrid_s8qs_dot_6x16, int8_t, int8_t, Requantize32>(args, qp); }
Georgios Pinitas33e03072021-01-14 13:43:40 +0000216),
217GemmImplementation<int8_t, int8_t, Requantize32>::with_estimate(
Georgios Pinitasc0b6f762020-11-02 01:37:17 +0000218 GemmMethod::GEMM_HYBRID,
219 "a64_hybrid_s8qa_dot_4x16",
220 [](const GemmArgs &args, const Requantize32 &qp) { return args._ci->has_dotprod() && quant_hybrid_asymmetric(qp); },
Georgios Pinitas4ee8b152021-07-16 16:16:43 +0100221 [](const GemmArgs &args, const Requantize32 &) { return GemmHybridIndirect<cls_a64_hybrid_s8qa_dot_4x16, int8_t, int8_t, Requantize32>::estimate_cycles<int8_t>(args); },
Georgios Pinitasc0b6f762020-11-02 01:37:17 +0000222 [](const GemmArgs &args, const Requantize32 &qp) { return new GemmHybridIndirect<cls_a64_hybrid_s8qa_dot_4x16, int8_t, int8_t, Requantize32>(args, qp); }
Georgios Pinitas33e03072021-01-14 13:43:40 +0000223),
224GemmImplementation<int8_t, int8_t, Requantize32>::with_estimate(
Georgios Pinitasc0b6f762020-11-02 01:37:17 +0000225 GemmMethod::GEMM_HYBRID,
226 "a64_hybrid_s8s32_dot_6x16",
227 [](const GemmArgs &args, const Requantize32 &) { return args._ci->has_dotprod(); },
Georgios Pinitas4ee8b152021-07-16 16:16:43 +0100228 [](const GemmArgs &args, const Requantize32 &) { return GemmHybridIndirect<cls_a64_hybrid_s8s32_dot_6x16, int8_t, int8_t, Requantize32, true>::estimate_cycles<int8_t>(args); },
Georgios Pinitasc0b6f762020-11-02 01:37:17 +0000229 [](const GemmArgs &args, const Requantize32 &qp) { return new GemmHybridIndirect<cls_a64_hybrid_s8s32_dot_6x16, int8_t, int8_t, Requantize32, true>(args, qp); }
Georgios Pinitas33e03072021-01-14 13:43:40 +0000230),
231GemmImplementation<int8_t, int8_t, Requantize32>::with_estimate(
Georgios Pinitasc0b6f762020-11-02 01:37:17 +0000232 GemmMethod::GEMM_INTERLEAVED,
233 "a64_gemm_s8_8x12",
234 [](const GemmArgs &args, const Requantize32 &) { return args._ci->has_dotprod(); },
Georgios Pinitas4ee8b152021-07-16 16:16:43 +0100235 [](const GemmArgs &args, const Requantize32 &) { return GemmInterleavedQuantized<cls_a64_gemm_s8_8x12, int8_t, int8_t>::estimate_cycles<int8_t>(args); },
Georgios Pinitasc0b6f762020-11-02 01:37:17 +0000236 [](const GemmArgs &args, const Requantize32 &qp) { return new GemmInterleavedQuantized<cls_a64_gemm_s8_8x12, int8_t, int8_t>(args, qp); }
Georgios Pinitas33e03072021-01-14 13:43:40 +0000237),
Georgios Pinitas4ee8b152021-07-16 16:16:43 +0100238GemmImplementation<int8_t, int8_t, Requantize32>::with_estimate(
Georgios Pinitasc0b6f762020-11-02 01:37:17 +0000239 GemmMethod::GEMM_INTERLEAVED,
240 "a64_gemm_s8_4x4",
241 nullptr,
Georgios Pinitas4ee8b152021-07-16 16:16:43 +0100242 [](const GemmArgs &args, const Requantize32 &) { return GemmInterleavedQuantized<cls_a64_gemm_s8_4x4, int8_t, int8_t>::estimate_cycles<int8_t>(args); },
Georgios Pinitasc0b6f762020-11-02 01:37:17 +0000243 [](const GemmArgs &args, const Requantize32 &qp) { return new GemmInterleavedQuantized<cls_a64_gemm_s8_4x4, int8_t, int8_t>(args, qp); }
Georgios Pinitas4ee8b152021-07-16 16:16:43 +0100244),
Michalis Spyrou71ac9032019-11-14 14:31:44 +0000245{
246 GemmMethod::QUANTIZE_WRAPPER,
247 "quantized_wrapper",
Georgios Pinitasc0b6f762020-11-02 01:37:17 +0000248 [](const GemmArgs &args, const Requantize32 &) { return !args._indirect_input; },
Georgios Pinitas4ee8b152021-07-16 16:16:43 +0100249 [](const GemmArgs &, const Requantize32 &) { return false; },
Michalis Spyrou71ac9032019-11-14 14:31:44 +0000250 [](const GemmArgs &args, const Requantize32 &qp) { return new QuantizeWrapper<int8_t, int8_t, int32_t>(args, qp); }
251},
252{
253 GemmMethod::DEFAULT,
254 "",
255 nullptr,
256 nullptr,
257 nullptr
258}
259};
260
261template<>
262const GemmImplementation<int8_t, int8_t, Requantize32> *gemm_implementation_list<int8_t, int8_t, Requantize32>() {
263 return gemm_qint8_methods;
264}
265
266template UniqueGemmCommon<int8_t, int8_t> gemm<int8_t, int8_t, Requantize32>(const GemmArgs &args, const Requantize32 &os);
Francesco Petrogalli553f6952022-06-30 10:22:01 +0000267template bool has_opt_gemm<int8_t, int8_t, Requantize32>(WeightFormat &weight_format, const GemmArgs &args, const Requantize32 &os);
Michael Tyler74921ee2023-04-12 17:43:17 +0100268template KernelDescription get_gemm_method<int8_t, int8_t, Requantize32>(const GemmArgs &args, const Requantize32 &os);
Michalis Spyrou71ac9032019-11-14 14:31:44 +0000269template std::vector<KernelDescription> get_compatible_kernels<int8_t, int8_t, Requantize32>(const GemmArgs &args, const Requantize32 &os);
270
271} // namespace arm_gemm
272
273#endif // __aarch64__