blob: 2d03183c5986385011972f1376a7f0994f61daa5 [file] [log] [blame]
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +00001/*
ramelg018a164882022-04-07 02:42:52 +01002 * Copyright (c) 2021-2022 Arm Limited.
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +00003 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24
25#include "arm_gemm_local.hpp"
26
27#include "depthwise_implementation.hpp"
ramelg018a164882022-04-07 02:42:52 +010028#include "depthwise_depthfirst.hpp"
29#include "depthwise_depthfirst_generic.hpp"
30#include "depthwise_depthfirst_multiplier.hpp"
Viet-Hoa Do03b29712022-06-01 11:47:14 +010031#include "depthwise_planar.hpp"
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +000032
33#include "depthwise_implementation_constraints.hpp"
34
35#if defined(__aarch64__)
ramelg018a164882022-04-07 02:42:52 +010036#if defined(ARM_COMPUTE_ENABLE_SVE)
Viet-Hoa Do03b29712022-06-01 11:47:14 +010037#if defined(ARM_COMPUTE_ENABLE_SME2)
38#include "kernels/sme2_s8q_planar_3x3_s1_4rows_dot_za.hpp"
39#include "kernels/sme2_s8q_planar_3x3_s2_4rows_dot_za.hpp"
40#include "kernels/sme2_s8q_planar_5x5_s1_4rows_dot_za.hpp"
41#include "kernels/sme2_s8q_planar_5x5_s2_4rows_dot_za.hpp"
42#endif // defined(ARM_COMPUTE_ENABLE_SME2)
43
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +000044#include "kernels/sve_s8qs_nhwc_3x3_s1_output2x2_dot_depthfirst.hpp"
45#include "kernels/sve_s8q_nhwc_3x3_s1_output2x2_dot_depthfirst.hpp"
46#include "kernels/sve_s8q_nhwc_3x3_s1_output2x2_mla_depthfirst.hpp"
47#include "kernels/sve_s8q_nhwc_3x3_s2_output2x2_mla_depthfirst.hpp"
48#include "kernels/sve_s8q_nhwc_5x5_s1_output2x2_mla_depthfirst.hpp"
49#include "kernels/sve_s8q_packed_to_nhwc_3x3_s2_with_multiplier_output2x4_dot_depthfirst.hpp"
50#include "kernels/sve_s8q_packed_to_nhwc_5x5_s1_with_multiplier_output4x2_dot_depthfirst.hpp"
ramelg018a164882022-04-07 02:42:52 +010051#endif // defined(ARM_COMPUTE_ENABLE_SVE)
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +000052#include "kernels/a64_s8qs_nhwc_3x3_s1_output2x2_dot_depthfirst.hpp"
53#include "kernels/a64_s8q_nhwc_3x3_s1_output2x2_dot_depthfirst.hpp"
54#include "kernels/a64_s8q_nhwc_3x3_s1_output2x2_mla_depthfirst.hpp"
55#include "kernels/a64_s8q_nhwc_3x3_s2_output2x2_mla_depthfirst.hpp"
56#include "kernels/a64_s8q_nhwc_5x5_s1_output2x2_mla_depthfirst.hpp"
57#include "kernels/a64_s8q_nhwc_generic_output9_mla_depthfirst.hpp"
58#include "kernels/a64_s8q_packed_to_nhwc_3x3_s2_with_multiplier_output2x4_dot_depthfirst.hpp"
59#include "kernels/a64_s8q_packed_to_nhwc_5x5_s1_with_multiplier_output4x2_dot_depthfirst.hpp"
60#include "kernels/a64_s8q_packed_to_nhwc_generic_with_multiplier_output2x8_mla_depthfirst.hpp"
61#endif // defined(__aarch64__)
62
63#include <cstdint>
64
65using arm_gemm::Requantize32;
66
67namespace arm_conv {
68namespace depthwise {
69
70namespace
71{
Freddie Liardet487d3902021-09-21 12:36:43 +010072#if defined(__aarch64__)
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +000073bool qp_weights_are_symmetric(const DepthwiseArgs &, const void *_qp)
74{
75 const auto qp = static_cast<const arm_gemm::Requantize32 *>(_qp);
76 return qp->b_offset == 0;
77}
Freddie Liardet487d3902021-09-21 12:36:43 +010078#endif // defined(__aarch64__)
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +000079}
80
81static const DepthwiseImplementation<int8_t, int8_t, int8_t, Requantize32> depthwise_s8q_methods[] = {
82#if defined(__aarch64__)
ramelg018a164882022-04-07 02:42:52 +010083#if defined(ARM_COMPUTE_ENABLE_SVE)
Viet-Hoa Do03b29712022-06-01 11:47:14 +010084#if defined(ARM_COMPUTE_ENABLE_SME2)
85 {
86 DepthwiseMethod::PLANAR,
87 "sme2_s8q_planar_3x3_s1_4rows_dot_za",
88 constraint<Requantize32>(cpu_has_sme, cpu_has_sme2,
89 is_supported<sme2_s8q_planar_3x3_s1_4rows_dot_za>,
90 has_no_channel_multiplier,
91 qp_has_no_left_shift, no_prime_right_pad),
92 nullptr,
93 [] (const DepthwiseArgs &args, const Requantize32 &qp) -> DepthwiseCommon<int8_t, int8_t, int8_t> * {
94 auto strat = new sme2_s8q_planar_3x3_s1_4rows_dot_za(args.cpu_info);
95 return new DepthwisePlanar<int8_t>(strat, args, qp);
96 },
97 },
98 {
99 DepthwiseMethod::PLANAR,
100 "sme2_s8q_planar_3x3_s2_4rows_dot_za",
101 constraint<Requantize32>(cpu_has_sme, cpu_has_sme2,
102 is_supported<sme2_s8q_planar_3x3_s2_4rows_dot_za>,
103 has_no_channel_multiplier,
104 qp_has_no_left_shift, no_prime_right_pad),
105 nullptr,
106 [] (const DepthwiseArgs &args, const Requantize32 &qp) -> DepthwiseCommon<int8_t, int8_t, int8_t> * {
107 auto strat = new sme2_s8q_planar_3x3_s2_4rows_dot_za(args.cpu_info);
108 return new DepthwisePlanar<int8_t>(strat, args, qp);
109 },
110 },
111 {
112 DepthwiseMethod::PLANAR,
113 "sme2_s8q_planar_5x5_s1_4rows_dot_za",
114 constraint<Requantize32>(cpu_has_sme, cpu_has_sme2,
115 is_supported<sme2_s8q_planar_5x5_s1_4rows_dot_za>,
116 has_no_channel_multiplier,
117 qp_has_no_left_shift, no_prime_right_pad),
118 nullptr,
119 [] (const DepthwiseArgs &args, const Requantize32 &qp) -> DepthwiseCommon<int8_t, int8_t, int8_t> * {
120 auto strat = new sme2_s8q_planar_5x5_s1_4rows_dot_za(args.cpu_info);
121 return new DepthwisePlanar<int8_t>(strat, args, qp);
122 },
123 },
124 {
125 DepthwiseMethod::PLANAR,
126 "sme2_s8q_planar_5x5_s2_4rows_dot_za",
127 constraint<Requantize32>(cpu_has_sme, cpu_has_sme2,
128 is_supported<sme2_s8q_planar_5x5_s2_4rows_dot_za>,
129 has_no_channel_multiplier,
130 qp_has_no_left_shift, no_prime_right_pad),
131 nullptr,
132 [] (const DepthwiseArgs &args, const Requantize32 &qp) -> DepthwiseCommon<int8_t, int8_t, int8_t> * {
133 auto strat = new sme2_s8q_planar_5x5_s2_4rows_dot_za(args.cpu_info);
134 return new DepthwisePlanar<int8_t>(strat, args, qp);
135 },
136 },
137#endif // defined(ARM_COMPUTE_ENABLE_SME2)
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +0000138 {
139 DepthwiseMethod::DEPTHFIRST,
140 "sve_s8qs_nhwc_3x3_s1_output2x2_dot_depthfirst",
141 constraint<Requantize32>(is_supported<sve_s8qs_nhwc_3x3_s1_output2x2_dot_depthfirst>,
142 has_no_channel_multiplier,
143 qp_has_no_left_shift,
Michalis Spyrou20fca522021-06-07 14:23:57 +0100144 qp_weights_are_symmetric,
145 cpu_has_sve2),
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +0000146 nullptr,
147 [] (const DepthwiseArgs &args, const Requantize32 &qp) -> DepthwiseCommon<int8_t, int8_t, int8_t> * {
ramelg018a164882022-04-07 02:42:52 +0100148 auto strat = new sve_s8qs_nhwc_3x3_s1_output2x2_dot_depthfirst(args.cpu_info);
149 return new DepthwiseDepthfirst<int8_t>(strat, args, qp);
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +0000150 },
151 },
152 {
153 DepthwiseMethod::DEPTHFIRST,
154 "sve_s8q_nhwc_3x3_s1_output2x2_dot_depthfirst",
155 constraint<Requantize32>(is_supported<sve_s8q_nhwc_3x3_s1_output2x2_dot_depthfirst>,
156 has_no_channel_multiplier,
Michalis Spyrou20fca522021-06-07 14:23:57 +0100157 qp_has_no_left_shift,
158 cpu_has_sve2),
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +0000159 nullptr,
160 [] (const DepthwiseArgs &args, const Requantize32 &qp) -> DepthwiseCommon<int8_t, int8_t, int8_t> * {
ramelg018a164882022-04-07 02:42:52 +0100161 auto strat = new sve_s8q_nhwc_3x3_s1_output2x2_dot_depthfirst(args.cpu_info);
162 return new DepthwiseDepthfirst<int8_t>(strat, args, qp);
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +0000163 },
164 },
165 {
166 DepthwiseMethod::DEPTHFIRST,
167 "sve_s8q_nhwc_3x3_s1_output2x2_mla_depthfirst",
168 constraint<Requantize32>(is_supported<sve_s8q_nhwc_3x3_s1_output2x2_mla_depthfirst>,
169 has_no_channel_multiplier,
Michalis Spyrou20fca522021-06-07 14:23:57 +0100170 qp_has_no_left_shift,
171 cpu_has_sve2),
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +0000172 nullptr,
173 [] (const DepthwiseArgs &args, const Requantize32 &qp) -> DepthwiseCommon<int8_t, int8_t, int8_t> * {
ramelg018a164882022-04-07 02:42:52 +0100174 auto strat = new sve_s8q_nhwc_3x3_s1_output2x2_mla_depthfirst(args.cpu_info);
175 return new DepthwiseDepthfirst<int8_t>(strat, args, qp);
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +0000176 },
177 },
178 {
179 DepthwiseMethod::DEPTHFIRST,
180 "sve_s8q_nhwc_3x3_s2_output2x2_mla_depthfirst",
181 constraint<Requantize32>(is_supported<sve_s8q_nhwc_3x3_s2_output2x2_mla_depthfirst>,
182 has_no_channel_multiplier,
Michalis Spyrou20fca522021-06-07 14:23:57 +0100183 qp_has_no_left_shift,
184 cpu_has_sve2),
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +0000185 nullptr,
186 [] (const DepthwiseArgs &args, const Requantize32 &qp) -> DepthwiseCommon<int8_t, int8_t, int8_t> * {
ramelg018a164882022-04-07 02:42:52 +0100187 auto strat = new sve_s8q_nhwc_3x3_s2_output2x2_mla_depthfirst(args.cpu_info);
188 return new DepthwiseDepthfirst<int8_t>(strat, args, qp);
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +0000189 },
190 },
191 {
192 DepthwiseMethod::DEPTHFIRST,
193 "sve_s8q_nhwc_5x5_s1_output2x2_mla_depthfirst",
194 constraint<Requantize32>(is_supported<sve_s8q_nhwc_5x5_s1_output2x2_mla_depthfirst>,
195 has_no_channel_multiplier,
Michalis Spyrou20fca522021-06-07 14:23:57 +0100196 qp_has_no_left_shift,
197 cpu_has_sve2),
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +0000198 nullptr,
199 [] (const DepthwiseArgs &args, const Requantize32 &qp) -> DepthwiseCommon<int8_t, int8_t, int8_t> * {
ramelg018a164882022-04-07 02:42:52 +0100200 auto strat = new sve_s8q_nhwc_5x5_s1_output2x2_mla_depthfirst(args.cpu_info);
201 return new DepthwiseDepthfirst<int8_t>(strat, args, qp);
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +0000202 },
203 },
204 {
205 DepthwiseMethod::DEPTHFIRST,
206 "sve_s8q_packed_to_nhwc_3x3_s2_with_multiplier_output2x4_dot_depthfirst",
207 constraint<Requantize32>(is_supported<sve_s8q_packed_to_nhwc_3x3_s2_with_multiplier_output2x4_dot_depthfirst>,
Michalis Spyrou20fca522021-06-07 14:23:57 +0100208 qp_has_no_left_shift,
ramelg018a164882022-04-07 02:42:52 +0100209 has_channel_multiplier,
Michalis Spyrou20fca522021-06-07 14:23:57 +0100210 cpu_has_sve2),
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +0000211 nullptr,
212 [] (const DepthwiseArgs &args, const Requantize32 &qp) -> DepthwiseCommon<int8_t, int8_t, int8_t> * {
ramelg018a164882022-04-07 02:42:52 +0100213 auto strat = new sve_s8q_packed_to_nhwc_3x3_s2_with_multiplier_output2x4_dot_depthfirst(args.cpu_info);
214 return new DepthwiseDepthfirstMultiplier<int8_t, int8_t, int8_t, int32_t, false>(strat, args, qp);
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +0000215 },
216 },
217 {
218 DepthwiseMethod::DEPTHFIRST,
219 "sve_s8q_packed_to_nhwc_5x5_s1_with_multiplier_output4x2_dot_depthfirst",
220 constraint<Requantize32>(is_supported<sve_s8q_packed_to_nhwc_5x5_s1_with_multiplier_output4x2_dot_depthfirst>,
Michalis Spyrou20fca522021-06-07 14:23:57 +0100221 qp_has_no_left_shift,
ramelg018a164882022-04-07 02:42:52 +0100222 has_channel_multiplier,
Michalis Spyrou20fca522021-06-07 14:23:57 +0100223 cpu_has_sve2),
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +0000224 nullptr,
225 [] (const DepthwiseArgs &args, const Requantize32 &qp) -> DepthwiseCommon<int8_t, int8_t, int8_t> * {
ramelg018a164882022-04-07 02:42:52 +0100226 auto strat = new sve_s8q_packed_to_nhwc_5x5_s1_with_multiplier_output4x2_dot_depthfirst(args.cpu_info);
227 return new DepthwiseDepthfirstMultiplier<int8_t, int8_t, int8_t, int32_t, false>(strat, args, qp);
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +0000228 },
229 },
ramelg018a164882022-04-07 02:42:52 +0100230#endif // defined(ARM_COMPUTE_ENABLE_SVE)
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +0000231 {
232 DepthwiseMethod::DEPTHFIRST,
233 "a64_s8qs_nhwc_3x3_s1_output2x2_dot_depthfirst",
234 constraint<Requantize32>(is_supported<a64_s8qs_nhwc_3x3_s1_output2x2_dot_depthfirst>,
235 has_no_channel_multiplier,
236 qp_weights_are_symmetric,
237 qp_has_no_left_shift,
238 cpu_has_dot_product),
239 nullptr,
240 [] (const DepthwiseArgs &args, const Requantize32 &qp) -> DepthwiseCommon<int8_t, int8_t, int8_t> * {
ramelg018a164882022-04-07 02:42:52 +0100241 auto strat = new a64_s8qs_nhwc_3x3_s1_output2x2_dot_depthfirst(args.cpu_info);
242 return new DepthwiseDepthfirst<int8_t>(strat, args, qp);
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +0000243 },
244 },
245 {
246 DepthwiseMethod::DEPTHFIRST,
247 "a64_s8q_nhwc_3x3_s1_output2x2_dot_depthfirst",
248 constraint<Requantize32>(is_supported<a64_s8q_nhwc_3x3_s1_output2x2_dot_depthfirst>,
249 has_no_channel_multiplier,
250 qp_has_no_left_shift,
251 cpu_has_dot_product),
252 nullptr,
253 [] (const DepthwiseArgs &args, const Requantize32 &qp) -> DepthwiseCommon<int8_t, int8_t, int8_t> * {
ramelg018a164882022-04-07 02:42:52 +0100254 auto strat = new a64_s8q_nhwc_3x3_s1_output2x2_dot_depthfirst(args.cpu_info);
255 return new DepthwiseDepthfirst<int8_t>(strat, args, qp);
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +0000256 },
257 },
258 {
259 DepthwiseMethod::DEPTHFIRST,
260 "a64_s8q_nhwc_3x3_s1_output2x2_mla_depthfirst",
261 constraint<Requantize32>(is_supported<a64_s8q_nhwc_3x3_s1_output2x2_mla_depthfirst>,
262 has_no_channel_multiplier,
263 qp_has_no_left_shift),
264 nullptr,
265 [] (const DepthwiseArgs &args, const Requantize32 &qp) -> DepthwiseCommon<int8_t, int8_t, int8_t> * {
ramelg018a164882022-04-07 02:42:52 +0100266 auto strat = new a64_s8q_nhwc_3x3_s1_output2x2_mla_depthfirst(args.cpu_info);
267 return new DepthwiseDepthfirst<int8_t>(strat, args, qp);
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +0000268 },
269 },
270 {
271 DepthwiseMethod::DEPTHFIRST,
272 "a64_s8q_nhwc_3x3_s2_output2x2_mla_depthfirst",
273 constraint<Requantize32>(is_supported<a64_s8q_nhwc_3x3_s2_output2x2_mla_depthfirst>,
274 has_no_channel_multiplier,
275 qp_has_no_left_shift),
276 nullptr,
277 [] (const DepthwiseArgs &args, const Requantize32 &qp) -> DepthwiseCommon<int8_t, int8_t, int8_t> * {
ramelg018a164882022-04-07 02:42:52 +0100278 auto strat = new a64_s8q_nhwc_3x3_s2_output2x2_mla_depthfirst(args.cpu_info);
279 return new DepthwiseDepthfirst<int8_t>(strat, args, qp);
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +0000280 },
281 },
282 {
283 DepthwiseMethod::DEPTHFIRST,
284 "a64_s8q_nhwc_5x5_s1_output2x2_mla_depthfirst",
285 constraint<Requantize32>(is_supported<a64_s8q_nhwc_5x5_s1_output2x2_mla_depthfirst>,
286 has_no_channel_multiplier,
287 qp_has_no_left_shift),
288 nullptr,
289 [] (const DepthwiseArgs &args, const Requantize32 &qp) -> DepthwiseCommon<int8_t, int8_t, int8_t> * {
ramelg018a164882022-04-07 02:42:52 +0100290 auto strat = new a64_s8q_nhwc_5x5_s1_output2x2_mla_depthfirst(args.cpu_info);
291 return new DepthwiseDepthfirst<int8_t>(strat, args, qp);
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +0000292 },
293 },
294 {
295 DepthwiseMethod::DEPTHFIRST,
296 "a64_s8q_nhwc_generic_output3x3_mla_depthfirst",
297 constraint<Requantize32>(has_no_channel_multiplier),
298 nullptr,
299 [] (const DepthwiseArgs &args, const Requantize32 &qp) -> DepthwiseCommon<int8_t, int8_t, int8_t> * {
ramelg018a164882022-04-07 02:42:52 +0100300 auto kernel = new a64_s8q_nhwc_generic_output9_mla_depthfirst(args.cpu_info);
301 auto strat = new GenericDepthfirstStrategy<int8_t>(kernel, 3, 3, args);
302 return new DepthwiseDepthfirstGeneric<int8_t>(strat, args, qp);
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +0000303 },
304 },
305 {
306 DepthwiseMethod::DEPTHFIRST,
307 "a64_s8q_packed_to_nhwc_3x3_s2_with_multiplier_output2x4_dot_depthfirst",
308 constraint<Requantize32>(is_supported<a64_s8q_packed_to_nhwc_3x3_s2_with_multiplier_output2x4_dot_depthfirst>,
309 qp_has_no_left_shift,
ramelg018a164882022-04-07 02:42:52 +0100310 has_channel_multiplier,
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +0000311 cpu_has_dot_product),
312 nullptr,
313 [] (const DepthwiseArgs &args, const Requantize32 &qp) -> DepthwiseCommon<int8_t, int8_t, int8_t> * {
ramelg018a164882022-04-07 02:42:52 +0100314 auto strat = new a64_s8q_packed_to_nhwc_3x3_s2_with_multiplier_output2x4_dot_depthfirst(args.cpu_info);
315 return new DepthwiseDepthfirstMultiplier<int8_t, int8_t, int8_t, int32_t, false>(strat, args, qp);
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +0000316 },
317 },
318 {
319 DepthwiseMethod::DEPTHFIRST,
320 "a64_s8q_packed_to_nhwc_5x5_s1_with_multiplier_output4x2_dot_depthfirst",
321 constraint<Requantize32>(is_supported<a64_s8q_packed_to_nhwc_5x5_s1_with_multiplier_output4x2_dot_depthfirst>,
322 qp_has_no_left_shift,
ramelg018a164882022-04-07 02:42:52 +0100323 has_channel_multiplier,
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +0000324 cpu_has_dot_product),
325 nullptr,
326 [] (const DepthwiseArgs &args, const Requantize32 &qp) -> DepthwiseCommon<int8_t, int8_t, int8_t> * {
ramelg018a164882022-04-07 02:42:52 +0100327 auto strat = new a64_s8q_packed_to_nhwc_5x5_s1_with_multiplier_output4x2_dot_depthfirst(args.cpu_info);
328 return new DepthwiseDepthfirstMultiplier<int8_t, int8_t, int8_t, int32_t, false>(strat, args, qp);
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +0000329 },
330 },
331 {
332 DepthwiseMethod::DEPTHFIRST,
333 "a64_s8q_packed_to_nhwc_generic_with_multiplier_output2x8_mla_depthfirst",
ramelg018a164882022-04-07 02:42:52 +0100334 constraint<Requantize32>(has_channel_multiplier),
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +0000335 nullptr,
336 [] (const DepthwiseArgs &args, const Requantize32 &qp) -> DepthwiseCommon<int8_t, int8_t, int8_t> * {
ramelg018a164882022-04-07 02:42:52 +0100337 auto kern = new a64_s8q_packed_to_nhwc_generic_with_multiplier_output2x8_mla_depthfirst(args.cpu_info);
338 auto strat = new GenericDepthfirstMultiplierStrategy<int8_t>(kern, args);
339 return new DepthwiseDepthfirstMultiplier<int8_t, int8_t, int8_t, int32_t, true>(strat, args, qp);
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +0000340 },
341 },
342#endif // defined(__aarch64__)
343 { DepthwiseMethod::DEFAULT, "", nullptr, nullptr, nullptr }, // End of list
344};
345
346template <>
347const DepthwiseImplementation<int8_t, int8_t, int8_t, Requantize32> *depthwise_implementation_list()
348{
349 return depthwise_s8q_methods;
350}
351
352template UniqueDepthwiseCommon<int8_t, int8_t, int8_t> depthwise(const DepthwiseArgs &, const Requantize32 &);
353template std::vector<KernelDescription> get_compatible_kernels<int8_t, int8_t, int8_t, Requantize32>(const DepthwiseArgs &, const Requantize32 &);
354
355} // namespace depthwise
356} // namespace arm_conv