blob: 236930ee265649636d8d7dcbfba1ef53511ea3be [file] [log] [blame]
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +00001/*
Michael Tyler4c30de02023-07-07 12:01:32 +01002 * Copyright (c) 2021-2023 Arm Limited.
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +00003 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24
25#include "arm_gemm_local.hpp"
26
27#include "depthwise_implementation.hpp"
ramelg018a164882022-04-07 02:42:52 +010028#include "depthwise_depthfirst.hpp"
29#include "depthwise_depthfirst_generic.hpp"
30#include "depthwise_depthfirst_multiplier.hpp"
Viet-Hoa Do03b29712022-06-01 11:47:14 +010031#include "depthwise_planar.hpp"
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +000032
33#include "depthwise_implementation_constraints.hpp"
34
35#if defined(__aarch64__)
ramelg018a164882022-04-07 02:42:52 +010036#if defined(ARM_COMPUTE_ENABLE_SVE)
Viet-Hoa Do03b29712022-06-01 11:47:14 +010037#if defined(ARM_COMPUTE_ENABLE_SME2)
38#include "kernels/sme2_u8q_planar_3x3_s1_4rows_dot_za.hpp"
39#include "kernels/sme2_u8q_planar_3x3_s2_4rows_dot_za.hpp"
40#include "kernels/sme2_u8q_planar_5x5_s1_4rows_dot_za.hpp"
41#include "kernels/sme2_u8q_planar_5x5_s2_4rows_dot_za.hpp"
42#endif // defined(ARM_COMPUTE_ENABLE_SME2)
43
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +000044#include "kernels/sve_u8q_nhwc_3x3_s1_output2x2_dot_depthfirst.hpp"
45#include "kernels/sve_u8q_nhwc_3x3_s1_output2x2_mla_depthfirst.hpp"
46#include "kernels/sve_u8q_nhwc_3x3_s2_output2x2_mla_depthfirst.hpp"
47#include "kernels/sve_u8q_nhwc_5x5_s1_output2x2_mla_depthfirst.hpp"
48#include "kernels/sve_u8q_packed_to_nhwc_3x3_s2_with_multiplier_output2x4_dot_depthfirst.hpp"
49#include "kernels/sve_u8q_packed_to_nhwc_5x5_s1_with_multiplier_output4x2_dot_depthfirst.hpp"
ramelg018a164882022-04-07 02:42:52 +010050#endif // defined(ARM_COMPUTE_ENABLE_SVE)
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +000051#include "kernels/a64_u8q_nhwc_3x3_s1_output2x2_dot_depthfirst.hpp"
ramelg018a164882022-04-07 02:42:52 +010052
53#include "kernels/a64_u8qa_nhwc_3x3_s1_output2x2_mla_depthfirst.hpp"
54#include "kernels/a64_u8qa_nhwc_3x3_s2_output2x2_mla_depthfirst.hpp"
55#include "kernels/a64_u8qa_nhwc_5x5_s1_output2x2_mla_depthfirst.hpp"
56
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +000057#include "kernels/a64_u8q_nhwc_3x3_s1_output2x2_mla_depthfirst.hpp"
58#include "kernels/a64_u8q_nhwc_3x3_s2_output2x2_mla_depthfirst.hpp"
59#include "kernels/a64_u8q_nhwc_5x5_s1_output2x2_mla_depthfirst.hpp"
60#include "kernels/a64_u8q_nhwc_generic_output9_mla_depthfirst.hpp"
61#include "kernels/a64_u8q_packed_to_nhwc_3x3_s2_with_multiplier_output2x4_dot_depthfirst.hpp"
62#include "kernels/a64_u8q_packed_to_nhwc_5x5_s1_with_multiplier_output4x2_dot_depthfirst.hpp"
63#include "kernels/a64_u8q_packed_to_nhwc_generic_with_multiplier_output2x8_mla_depthfirst.hpp"
ramelg018a164882022-04-07 02:42:52 +010064
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +000065#endif // defined(__aarch64__)
66
67#include <cstdint>
68
69using arm_gemm::Requantize32;
70
71namespace arm_conv {
72namespace depthwise {
73
Michael Tyler4c30de02023-07-07 12:01:32 +010074namespace
75{
76#if defined(__aarch64__)
77uint64_t not_preferred(const DepthwiseArgs &, const Requantize32 &)
78{
79 return std::numeric_limits<uint64_t>::max();
80}
81#endif // defined(__aarch64__)
82}
83
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +000084static const DepthwiseImplementation<uint8_t, uint8_t, uint8_t, Requantize32> depthwise_u8q_methods[] = {
85#if defined(__aarch64__)
ramelg018a164882022-04-07 02:42:52 +010086#if defined(ARM_COMPUTE_ENABLE_SVE)
Viet-Hoa Do03b29712022-06-01 11:47:14 +010087#if defined(ARM_COMPUTE_ENABLE_SME2)
88 {
89 DepthwiseMethod::PLANAR,
90 "sme2_u8q_planar_3x3_s1_4rows_dot_za",
91 constraint<Requantize32>(cpu_has_sme, cpu_has_sme2,
92 is_supported<sme2_u8q_planar_3x3_s1_4rows_dot_za>,
93 has_no_channel_multiplier,
94 qp_has_no_left_shift, no_prime_right_pad),
95 nullptr,
96 [] (const DepthwiseArgs &args, const Requantize32 &qp) -> DepthwiseCommon<uint8_t, uint8_t, uint8_t> * {
97 auto strat = new sme2_u8q_planar_3x3_s1_4rows_dot_za(args.cpu_info);
98 return new DepthwisePlanar<uint8_t>(strat, args, qp);
99 },
100 },
101 {
102 DepthwiseMethod::PLANAR,
103 "sme2_u8q_planar_3x3_s2_4rows_dot_za",
104 constraint<Requantize32>(cpu_has_sme, cpu_has_sme2,
105 is_supported<sme2_u8q_planar_3x3_s2_4rows_dot_za>,
106 has_no_channel_multiplier,
107 qp_has_no_left_shift, no_prime_right_pad),
108 nullptr,
109 [] (const DepthwiseArgs &args, const Requantize32 &qp) -> DepthwiseCommon<uint8_t, uint8_t, uint8_t> * {
110 auto strat = new sme2_u8q_planar_3x3_s2_4rows_dot_za(args.cpu_info);
111 return new DepthwisePlanar<uint8_t>(strat, args, qp);
112 },
113 },
114 {
115 DepthwiseMethod::PLANAR,
116 "sme2_u8q_planar_5x5_s1_4rows_dot_za",
117 constraint<Requantize32>(cpu_has_sme, cpu_has_sme2,
118 is_supported<sme2_u8q_planar_5x5_s1_4rows_dot_za>,
119 has_no_channel_multiplier,
120 qp_has_no_left_shift, no_prime_right_pad),
121 nullptr,
122 [] (const DepthwiseArgs &args, const Requantize32 &qp) -> DepthwiseCommon<uint8_t, uint8_t, uint8_t> * {
123 auto strat = new sme2_u8q_planar_5x5_s1_4rows_dot_za(args.cpu_info);
124 return new DepthwisePlanar<uint8_t>(strat, args, qp);
125 },
126 },
127 {
128 DepthwiseMethod::PLANAR,
129 "sme2_u8q_planar_5x5_s2_4rows_dot_za",
130 constraint<Requantize32>(cpu_has_sme, cpu_has_sme2,
131 is_supported<sme2_u8q_planar_5x5_s2_4rows_dot_za>,
132 has_no_channel_multiplier,
133 qp_has_no_left_shift, no_prime_right_pad),
134 nullptr,
135 [] (const DepthwiseArgs &args, const Requantize32 &qp) -> DepthwiseCommon<uint8_t, uint8_t, uint8_t> * {
136 auto strat = new sme2_u8q_planar_5x5_s2_4rows_dot_za(args.cpu_info);
137 return new DepthwisePlanar<uint8_t>(strat, args, qp);
138 },
139 },
140#endif // defined(ARM_COMPUTE_ENABLE_SME2)
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +0000141 {
142 DepthwiseMethod::DEPTHFIRST,
143 "sve_u8q_nhwc_3x3_s1_output2x2_dot_depthfirst",
144 constraint<Requantize32>(is_supported<sve_u8q_nhwc_3x3_s1_output2x2_dot_depthfirst>,
Michalis Spyrou20fca522021-06-07 14:23:57 +0100145 qp_has_no_left_shift,
146 cpu_has_sve2),
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +0000147 nullptr,
148 [] (const DepthwiseArgs &args, const Requantize32 &qp) -> DepthwiseCommon<uint8_t, uint8_t, uint8_t> * {
ramelg018a164882022-04-07 02:42:52 +0100149 auto strat = new sve_u8q_nhwc_3x3_s1_output2x2_dot_depthfirst(args.cpu_info);
150 return new DepthwiseDepthfirst<uint8_t>(strat, args, qp);
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +0000151 },
152 },
153 {
154 DepthwiseMethod::DEPTHFIRST,
155 "sve_u8q_nhwc_3x3_s1_output2x2_mla_depthfirst",
156 constraint<Requantize32>(is_supported<sve_u8q_nhwc_3x3_s1_output2x2_mla_depthfirst>,
Michalis Spyrou20fca522021-06-07 14:23:57 +0100157 qp_has_no_left_shift,
158 cpu_has_sve2),
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +0000159 nullptr,
160 [] (const DepthwiseArgs &args, const Requantize32 &qp) -> DepthwiseCommon<uint8_t, uint8_t, uint8_t> * {
ramelg018a164882022-04-07 02:42:52 +0100161 auto strat = new sve_u8q_nhwc_3x3_s1_output2x2_mla_depthfirst(args.cpu_info);
162 return new DepthwiseDepthfirst<uint8_t>(strat, args, qp);
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +0000163 },
164 },
165 {
166 DepthwiseMethod::DEPTHFIRST,
167 "sve_u8q_nhwc_3x3_s2_output2x2_mla_depthfirst",
168 constraint<Requantize32>(is_supported<sve_u8q_nhwc_3x3_s2_output2x2_mla_depthfirst>,
Michalis Spyrou20fca522021-06-07 14:23:57 +0100169 qp_has_no_left_shift,
170 cpu_has_sve2),
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +0000171 nullptr,
172 [] (const DepthwiseArgs &args, const Requantize32 &qp) -> DepthwiseCommon<uint8_t, uint8_t, uint8_t> * {
ramelg018a164882022-04-07 02:42:52 +0100173 auto strat = new sve_u8q_nhwc_3x3_s2_output2x2_mla_depthfirst(args.cpu_info);
174 return new DepthwiseDepthfirst<uint8_t>(strat, args, qp);
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +0000175 },
176 },
177 {
178 DepthwiseMethod::DEPTHFIRST,
179 "sve_u8q_nhwc_5x5_s1_output2x2_mla_depthfirst",
180 constraint<Requantize32>(is_supported<sve_u8q_nhwc_5x5_s1_output2x2_mla_depthfirst>,
Michalis Spyrou20fca522021-06-07 14:23:57 +0100181 qp_has_no_left_shift,
182 cpu_has_sve2),
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +0000183 nullptr,
184 [] (const DepthwiseArgs &args, const Requantize32 &qp) -> DepthwiseCommon<uint8_t, uint8_t, uint8_t> * {
ramelg018a164882022-04-07 02:42:52 +0100185 auto strat = new sve_u8q_nhwc_5x5_s1_output2x2_mla_depthfirst(args.cpu_info);
186 return new DepthwiseDepthfirst<uint8_t>(strat, args, qp);
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +0000187 },
188 },
189 {
190 DepthwiseMethod::DEPTHFIRST,
191 "sve_u8q_packed_to_nhwc_3x3_s2_with_multiplier_output2x4_dot_depthfirst",
192 constraint<Requantize32>(is_supported<sve_u8q_packed_to_nhwc_3x3_s2_with_multiplier_output2x4_dot_depthfirst>,
Michalis Spyrou20fca522021-06-07 14:23:57 +0100193 qp_has_no_left_shift,
ramelg018a164882022-04-07 02:42:52 +0100194 has_channel_multiplier,
Michalis Spyrou20fca522021-06-07 14:23:57 +0100195 cpu_has_sve2),
Michael Tyler4c30de02023-07-07 12:01:32 +0100196 not_preferred,
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +0000197 [] (const DepthwiseArgs &args, const Requantize32 &qp) -> DepthwiseCommon<uint8_t, uint8_t, uint8_t> * {
ramelg018a164882022-04-07 02:42:52 +0100198 auto strat = new sve_u8q_packed_to_nhwc_3x3_s2_with_multiplier_output2x4_dot_depthfirst(args.cpu_info);
199 return new DepthwiseDepthfirstMultiplier<uint8_t, uint8_t, uint8_t, int32_t, false>(strat, args, qp);
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +0000200 },
201 },
202 {
203 DepthwiseMethod::DEPTHFIRST,
204 "sve_u8q_packed_to_nhwc_5x5_s1_with_multiplier_output4x2_dot_depthfirst",
205 constraint<Requantize32>(is_supported<sve_u8q_packed_to_nhwc_5x5_s1_with_multiplier_output4x2_dot_depthfirst>,
Michalis Spyrou20fca522021-06-07 14:23:57 +0100206 qp_has_no_left_shift,
ramelg018a164882022-04-07 02:42:52 +0100207 has_channel_multiplier,
Michalis Spyrou20fca522021-06-07 14:23:57 +0100208 cpu_has_sve2),
Michael Tyler4c30de02023-07-07 12:01:32 +0100209 not_preferred,
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +0000210 [] (const DepthwiseArgs &args, const Requantize32 &qp) -> DepthwiseCommon<uint8_t, uint8_t, uint8_t> * {
ramelg018a164882022-04-07 02:42:52 +0100211 auto strat = new sve_u8q_packed_to_nhwc_5x5_s1_with_multiplier_output4x2_dot_depthfirst(args.cpu_info);
212 return new DepthwiseDepthfirstMultiplier<uint8_t, uint8_t, uint8_t, int32_t, false>(strat, args, qp);
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +0000213 },
214 },
ramelg018a164882022-04-07 02:42:52 +0100215#endif // defined(ARM_COMPUTE_ENABLE_SVE)
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +0000216 {
217 DepthwiseMethod::DEPTHFIRST,
218 "a64_u8q_nhwc_3x3_s1_output2x2_dot_depthfirst",
219 constraint<Requantize32>(is_supported<a64_u8q_nhwc_3x3_s1_output2x2_dot_depthfirst>,
220 cpu_has_dot_product,
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +0000221 qp_has_no_left_shift),
222 nullptr,
223 [] (const DepthwiseArgs &args, const Requantize32 &qp) -> DepthwiseCommon<uint8_t, uint8_t, uint8_t> * {
ramelg018a164882022-04-07 02:42:52 +0100224 auto strat = new a64_u8q_nhwc_3x3_s1_output2x2_dot_depthfirst(args.cpu_info);
225 return new DepthwiseDepthfirst<uint8_t>(strat, args, qp);
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +0000226 },
227 },
ramelg018a164882022-04-07 02:42:52 +0100228
229 {
230 DepthwiseMethod::DEPTHFIRST,
231 "a64_u8qa_nhwc_3x3_s1_output2x2_mla_depthfirst",
232 constraint<Requantize32>(is_supported<a64_u8qa_nhwc_3x3_s1_output2x2_mla_depthfirst>,
ramelg018a164882022-04-07 02:42:52 +0100233 qp_zero_a_offset,
234 qp_has_no_left_shift),
235 nullptr,
236 [] (const DepthwiseArgs &args, const Requantize32 &qp) -> DepthwiseCommon<uint8_t, uint8_t, uint8_t> * {
237 auto strat = new a64_u8qa_nhwc_3x3_s1_output2x2_mla_depthfirst(args.cpu_info);
238 return new DepthwiseDepthfirst<uint8_t>(strat, args, qp);
239 },
240 },
241 {
242 DepthwiseMethod::DEPTHFIRST,
243 "a64_u8qa_nhwc_3x3_s2_output2x2_mla_depthfirst",
244 constraint<Requantize32>(is_supported<a64_u8qa_nhwc_3x3_s2_output2x2_mla_depthfirst>,
ramelg018a164882022-04-07 02:42:52 +0100245 qp_zero_a_offset,
246 qp_has_no_left_shift),
247 nullptr,
248 [] (const DepthwiseArgs &args, const Requantize32 &qp) -> DepthwiseCommon<uint8_t, uint8_t, uint8_t> * {
249 auto strat = new a64_u8qa_nhwc_3x3_s2_output2x2_mla_depthfirst(args.cpu_info);
250 return new DepthwiseDepthfirst<uint8_t>(strat, args, qp);
251 },
252 },
253 {
254 DepthwiseMethod::DEPTHFIRST,
255 "a64_u8qa_nhwc_5x5_s1_output2x2_mla_depthfirst",
256 constraint<Requantize32>(is_supported<a64_u8qa_nhwc_5x5_s1_output2x2_mla_depthfirst>,
ramelg018a164882022-04-07 02:42:52 +0100257 qp_zero_a_offset,
258 qp_has_no_left_shift),
259 nullptr,
260 [] (const DepthwiseArgs &args, const Requantize32 &qp) -> DepthwiseCommon<uint8_t, uint8_t, uint8_t> * {
261 auto strat = new a64_u8qa_nhwc_5x5_s1_output2x2_mla_depthfirst(args.cpu_info);
262 return new DepthwiseDepthfirst<uint8_t>(strat, args, qp);
263 },
264 },
265
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +0000266 {
267 DepthwiseMethod::DEPTHFIRST,
268 "a64_u8q_nhwc_3x3_s1_output2x2_mla_depthfirst",
269 constraint<Requantize32>(is_supported<a64_u8q_nhwc_3x3_s1_output2x2_mla_depthfirst>,
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +0000270 qp_has_no_left_shift),
271 nullptr,
272 [] (const DepthwiseArgs &args, const Requantize32 &qp) -> DepthwiseCommon<uint8_t, uint8_t, uint8_t> * {
ramelg018a164882022-04-07 02:42:52 +0100273 auto strat = new a64_u8q_nhwc_3x3_s1_output2x2_mla_depthfirst(args.cpu_info);
274 return new DepthwiseDepthfirst<uint8_t>(strat, args, qp);
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +0000275 },
276 },
277 {
278 DepthwiseMethod::DEPTHFIRST,
279 "a64_u8q_nhwc_3x3_s2_output2x2_mla_depthfirst",
280 constraint<Requantize32>(is_supported<a64_u8q_nhwc_3x3_s2_output2x2_mla_depthfirst>,
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +0000281 qp_has_no_left_shift),
282 nullptr,
283 [] (const DepthwiseArgs &args, const Requantize32 &qp) -> DepthwiseCommon<uint8_t, uint8_t, uint8_t> * {
ramelg018a164882022-04-07 02:42:52 +0100284 auto strat = new a64_u8q_nhwc_3x3_s2_output2x2_mla_depthfirst(args.cpu_info);
285 return new DepthwiseDepthfirst<uint8_t>(strat, args, qp);
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +0000286 },
287 },
288 {
289 DepthwiseMethod::DEPTHFIRST,
290 "a64_u8q_nhwc_5x5_s1_output2x2_mla_depthfirst",
291 constraint<Requantize32>(is_supported<a64_u8q_nhwc_5x5_s1_output2x2_mla_depthfirst>,
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +0000292 qp_has_no_left_shift),
293 nullptr,
294 [] (const DepthwiseArgs &args, const Requantize32 &qp) -> DepthwiseCommon<uint8_t, uint8_t, uint8_t> * {
ramelg018a164882022-04-07 02:42:52 +0100295 auto strat = new a64_u8q_nhwc_5x5_s1_output2x2_mla_depthfirst(args.cpu_info);
296 return new DepthwiseDepthfirst<uint8_t>(strat, args, qp);
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +0000297 },
298 },
299 {
300 DepthwiseMethod::DEPTHFIRST,
301 "a64_u8q_nhwc_generic_output3x3_mla_depthfirst",
Michael Tyler4c30de02023-07-07 12:01:32 +0100302 nullptr,
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +0000303 nullptr,
304 [] (const DepthwiseArgs &args, const Requantize32 &qp) -> DepthwiseCommon<uint8_t, uint8_t, uint8_t> * {
ramelg018a164882022-04-07 02:42:52 +0100305 auto kernel = new a64_u8q_nhwc_generic_output9_mla_depthfirst(args.cpu_info);
306 auto strat = new GenericDepthfirstStrategy<uint8_t>(kernel, 3, 3, args);
307 return new DepthwiseDepthfirstGeneric<uint8_t>(strat, args, qp);
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +0000308 },
309 },
310 {
311 DepthwiseMethod::DEPTHFIRST,
312 "a64_u8q_packed_to_nhwc_3x3_s2_with_multiplier_output2x4_dot_depthfirst",
313 constraint<Requantize32>(is_supported<a64_u8q_packed_to_nhwc_3x3_s2_with_multiplier_output2x4_dot_depthfirst>,
314 cpu_has_dot_product,
ramelg018a164882022-04-07 02:42:52 +0100315 has_channel_multiplier,
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +0000316 qp_has_no_left_shift),
Michael Tyler4c30de02023-07-07 12:01:32 +0100317 not_preferred,
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +0000318 [] (const DepthwiseArgs &args, const Requantize32 &qp) -> DepthwiseCommon<uint8_t, uint8_t, uint8_t> * {
ramelg018a164882022-04-07 02:42:52 +0100319 auto strat = new a64_u8q_packed_to_nhwc_3x3_s2_with_multiplier_output2x4_dot_depthfirst(args.cpu_info);
320 return new DepthwiseDepthfirstMultiplier<uint8_t, uint8_t, uint8_t, int32_t, false>(strat, args, qp);
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +0000321 },
322 },
323 {
324 DepthwiseMethod::DEPTHFIRST,
325 "a64_u8q_packed_to_nhwc_5x5_s1_with_multiplier_output4x2_dot_depthfirst",
326 constraint<Requantize32>(is_supported<a64_u8q_packed_to_nhwc_5x5_s1_with_multiplier_output4x2_dot_depthfirst>,
327 cpu_has_dot_product,
ramelg018a164882022-04-07 02:42:52 +0100328 has_channel_multiplier,
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +0000329 qp_has_no_left_shift),
Michael Tyler4c30de02023-07-07 12:01:32 +0100330 not_preferred,
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +0000331 [] (const DepthwiseArgs &args, const Requantize32 &qp) -> DepthwiseCommon<uint8_t, uint8_t, uint8_t> * {
ramelg018a164882022-04-07 02:42:52 +0100332 auto strat = new a64_u8q_packed_to_nhwc_5x5_s1_with_multiplier_output4x2_dot_depthfirst(args.cpu_info);
333 return new DepthwiseDepthfirstMultiplier<uint8_t, uint8_t, uint8_t, int32_t, false>(strat, args, qp);
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +0000334 },
335 },
336 {
337 DepthwiseMethod::DEPTHFIRST,
338 "a64_u8q_packed_to_nhwc_generic_with_multiplier_output2x8_mla_depthfirst",
ramelg018a164882022-04-07 02:42:52 +0100339 constraint<Requantize32>(has_channel_multiplier),
Michael Tyler4c30de02023-07-07 12:01:32 +0100340 not_preferred,
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +0000341 [] (const DepthwiseArgs &args, const Requantize32 &qp) -> DepthwiseCommon<uint8_t, uint8_t, uint8_t> * {
ramelg018a164882022-04-07 02:42:52 +0100342 auto kern = new a64_u8q_packed_to_nhwc_generic_with_multiplier_output2x8_mla_depthfirst(args.cpu_info);
343 auto strat = new GenericDepthfirstMultiplierStrategy<uint8_t>(kern, args);
344 return new DepthwiseDepthfirstMultiplier<uint8_t, uint8_t, uint8_t, int32_t, true>(strat, args, qp);
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +0000345 },
346 },
ramelg018a164882022-04-07 02:42:52 +0100347
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +0000348#endif // defined(__aarch64__)
349 { DepthwiseMethod::DEFAULT, "", nullptr, nullptr, nullptr }, // End of list
350};
351
352template <>
353const DepthwiseImplementation<uint8_t, uint8_t, uint8_t, Requantize32> *depthwise_implementation_list()
354{
355 return depthwise_u8q_methods;
356}
357
358template UniqueDepthwiseCommon<uint8_t, uint8_t, uint8_t> depthwise(const DepthwiseArgs &, const Requantize32 &);
359template std::vector<KernelDescription> get_compatible_kernels<uint8_t, uint8_t, uint8_t, Requantize32>(const DepthwiseArgs &, const Requantize32 &);
360
361} // namespace depthwise
362} // namespace arm_conv