blob: 8fef6f8ae090dafa3872636c96a9e6acdf10a062 [file] [log] [blame]
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +00001/*
Michael Tyler74921ee2023-04-12 17:43:17 +01002 * Copyright (c) 2021-2023 Arm Limited.
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +00003 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24
25#include "arm_gemm_local.hpp"
26
27#include "depthwise_implementation.hpp"
28#include "depthwise_depthfirst.hpp"
29#include "depthwise_depthfirst_generic.hpp"
30#include "depthwise_depthfirst_multiplier.hpp"
Michael Tyler8deee9b2023-06-30 11:26:05 +010031#include "depthwise_planar.hpp"
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +000032
33#include "depthwise_implementation_constraints.hpp"
34
Freddie Liardetd216f572021-08-03 15:57:32 +010035// This can only be built if the target/compiler supports FP16 arguments.
36#if defined(__ARM_FP16_ARGS)
37
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +000038#if defined(__aarch64__)
David Mansellb2758f32023-03-30 19:10:52 +010039#if defined(ARM_COMPUTE_ENABLE_SME2)
40#include "kernels/sme2_fp16_nhwc_3x3_s1_output4x4_mla_depthfirst.hpp"
41#include "kernels/sme2_fp16_nhwc_3x3_s1_output3x3_mla_depthfirst.hpp"
42#include "kernels/sme2_fp16_nhwc_3x3_s1_output2x2_mla_depthfirst.hpp"
43#include "kernels/sme2_fp16_nhwc_3x3_s2_output2x2_mla_depthfirst.hpp"
44#include "kernels/sme2_fp16_nhwc_5x5_s1_output2x2_mla_depthfirst.hpp"
Michael Tyler8deee9b2023-06-30 11:26:05 +010045#endif // defined(ARM_COMPUTE_ENABLE_SME2)
46#if defined(ARM_COMPUTE_ENABLE_SVE)
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +000047#include "kernels/sve_fp16_nhwc_3x3_s1_output4x4_mla_depthfirst.hpp"
48#include "kernels/sve_fp16_nhwc_3x3_s1_output3x3_mla_depthfirst.hpp"
49#include "kernels/sve_fp16_nhwc_3x3_s1_output2x2_mla_depthfirst.hpp"
50#include "kernels/sve_fp16_nhwc_3x3_s2_output2x2_mla_depthfirst.hpp"
51#include "kernels/sve_fp16_nhwc_5x5_s1_output2x2_mla_depthfirst.hpp"
Michalis Spyrou20fca522021-06-07 14:23:57 +010052#endif // defined(ARM_COMPUTE_ENABLE_SVE)
ramelg018a164882022-04-07 02:42:52 +010053#if defined(ENABLE_FP16_KERNELS) && defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +000054#include "kernels/a64_fp16_nhwc_3x3_s1_output4x4_mla_depthfirst.hpp"
55#include "kernels/a64_fp16_nhwc_3x3_s1_output3x3_mla_depthfirst.hpp"
56#include "kernels/a64_fp16_nhwc_3x3_s1_output2x2_mla_depthfirst.hpp"
57#include "kernels/a64_fp16_nhwc_3x3_s2_output2x2_mla_depthfirst.hpp"
58#include "kernels/a64_fp16_nhwc_5x5_s1_output2x2_mla_depthfirst.hpp"
59#include "kernels/a64_fp16_nhwc_generic_output9_mla_depthfirst.hpp"
60#include "kernels/a64_fp16_packed_to_nhwc_generic_with_multiplier_output2x8_mla_depthfirst.hpp"
ramelg018a164882022-04-07 02:42:52 +010061#endif // defined(ENABLE_FP16_KERNELS) && defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +000062#endif // defined(__aarch64__)
63
64namespace arm_conv {
65namespace depthwise {
66
67namespace
68{
Michael Tyler4c30de02023-07-07 12:01:32 +010069#if defined(__aarch64__)
70#if defined(ENABLE_FP16_KERNELS) && defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)
71 bool prefer_premultiply(const DepthwiseArgs &args) {
72 if ((args.stride_rows != args.stride_cols) || (args.kernel_rows != args.kernel_cols))
73 {
74 return false;
75 }
76
77 unsigned int threshold;
78
79 if (args.stride_rows == 1 && args.kernel_rows == 3)
80 {
81 threshold = 30;
82 }
83 else if (args.stride_rows == 1 && args.kernel_rows == 5)
84 {
85 threshold = 31;
86 }
87 else if (args.stride_rows == 2 && args.kernel_rows == 3)
88 {
89 threshold = 11;
90 }
91 else if (args.stride_rows == 2 && args.kernel_rows == 5)
92 {
93 threshold = 19;
94 } else
95 {
96 return false;
97 }
98
99 return args.channel_multiplier <= threshold;
100 }
101
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +0000102 template <class Strategy>
103 unsigned int cycle_estimate(const DepthwiseArgs &args, const Nothing &)
104 {
Michael Tyler4c30de02023-07-07 12:01:32 +0100105 if (args.channel_multiplier > 1 && !prefer_premultiply(args))
106 {
107 return std::numeric_limits<unsigned int>::max();
108 }
109
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +0000110 // First-pass: compute the number of output pixels which will be computed.
111 return arm_gemm::roundup(args.output_rows, Strategy::output_rows) *
112 arm_gemm::roundup(args.output_cols, Strategy::output_cols) *
113 arm_gemm::iceildiv(
114 (long unsigned) args.input_channels * args.channel_multiplier,
115 arm_gemm::utils::get_vector_length<typename Strategy::return_type>(Strategy::vl_type)
116 );
117 }
118
Michael Tyler74921ee2023-04-12 17:43:17 +0100119 template <class Strategy>
120 unsigned int planar_cycle_estimate(const DepthwiseArgs &args, const Nothing &)
121 {
122 // First-pass: compute the number of output pixels which will be computed.
123 return arm_gemm::roundup(args.output_rows, Strategy::output_rows) *
124 args.output_cols *
125 arm_gemm::iceildiv(
126 (long unsigned) args.input_channels * args.channel_multiplier,
127 arm_gemm::utils::get_vector_length<typename Strategy::return_type>(Strategy::vl_type)
128 );
129 }
130
Michael Tyler4c30de02023-07-07 12:01:32 +0100131 unsigned int multiplier_cycle_estimate(const DepthwiseArgs &args, const Nothing &)
132 {
133 return prefer_premultiply(args)? std::numeric_limits<unsigned int>::max() : 0;
134 }
135
ramelg018a164882022-04-07 02:42:52 +0100136 unsigned int not_preferred(const DepthwiseArgs &, const Nothing &) __attribute__ ((unused));
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +0000137 unsigned int not_preferred(const DepthwiseArgs &, const Nothing &)
138 {
139 return std::numeric_limits<unsigned int>::max();
140 }
Michael Tyler4c30de02023-07-07 12:01:32 +0100141#endif // defined(ENABLE_FP16_KERNELS) && defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)
142#endif // defined(__aarch64__)
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +0000143}
144
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +0000145static const DepthwiseImplementation<__fp16, __fp16> depthwise_fp16_methods[] = {
146#if defined(__aarch64__)
Michael Tyler7a31a822023-06-06 17:31:44 +0100147#if defined(ENABLE_FP16_KERNELS) && defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)
Michalis Spyrou20fca522021-06-07 14:23:57 +0100148#if defined(ARM_COMPUTE_ENABLE_SVE)
David Mansellb2758f32023-03-30 19:10:52 +0100149#if defined(ARM_COMPUTE_ENABLE_SME2)
150 {
151 DepthwiseMethod::DEPTHFIRST,
152 "sme2_fp16_nhwc_3x3_s1_output4x4_mla_depthfirst",
153 constraint(is_supported<sme2_fp16_nhwc_3x3_s1_output4x4_mla_depthfirst>,
David Mansellb2758f32023-03-30 19:10:52 +0100154 cpu_has_sme2),
155 cycle_estimate<sme2_fp16_nhwc_3x3_s1_output4x4_mla_depthfirst>,
156 [] (const DepthwiseArgs &args, const Nothing &) -> DepthwiseCommon<__fp16, __fp16, __fp16> * {
157 auto strat = new sme2_fp16_nhwc_3x3_s1_output4x4_mla_depthfirst(args.cpu_info);
158 return new DepthwiseDepthfirst<__fp16>(strat, args);
159 },
160 },
161 {
162 DepthwiseMethod::DEPTHFIRST,
163 "sme2_fp16_nhwc_3x3_s1_output3x3_mla_depthfirst",
164 constraint(is_supported<sme2_fp16_nhwc_3x3_s1_output3x3_mla_depthfirst>,
David Mansellb2758f32023-03-30 19:10:52 +0100165 cpu_has_sme2),
166 cycle_estimate<sme2_fp16_nhwc_3x3_s1_output3x3_mla_depthfirst>,
167 [] (const DepthwiseArgs &args, const Nothing &) -> DepthwiseCommon<__fp16, __fp16, __fp16> * {
168 auto strat = new sme2_fp16_nhwc_3x3_s1_output3x3_mla_depthfirst(args.cpu_info);
169 return new DepthwiseDepthfirst<__fp16>(strat, args);
170 },
171 },
172 {
173 DepthwiseMethod::DEPTHFIRST,
174 "sme2_fp16_nhwc_3x3_s1_output2x2_mla_depthfirst",
175 constraint(is_supported<sme2_fp16_nhwc_3x3_s1_output2x2_mla_depthfirst>,
David Mansellb2758f32023-03-30 19:10:52 +0100176 cpu_has_sme2),
177 cycle_estimate<sme2_fp16_nhwc_3x3_s1_output2x2_mla_depthfirst>,
178 [] (const DepthwiseArgs &args, const Nothing &) -> DepthwiseCommon<__fp16, __fp16, __fp16> * {
179 auto strat = new sme2_fp16_nhwc_3x3_s1_output2x2_mla_depthfirst(args.cpu_info);
180 return new DepthwiseDepthfirst<__fp16>(strat, args);
181 },
182 },
183 {
184 DepthwiseMethod::DEPTHFIRST,
185 "sme2_fp16_nhwc_3x3_s2_output2x2_mla_depthfirst",
186 constraint(is_supported<sme2_fp16_nhwc_3x3_s2_output2x2_mla_depthfirst>,
David Mansellb2758f32023-03-30 19:10:52 +0100187 cpu_has_sme2),
188 cycle_estimate<sme2_fp16_nhwc_3x3_s2_output2x2_mla_depthfirst>,
189 [] (const DepthwiseArgs &args, const Nothing &) -> DepthwiseCommon<__fp16, __fp16, __fp16> * {
190 auto strat = new sme2_fp16_nhwc_3x3_s2_output2x2_mla_depthfirst(args.cpu_info);
191 return new DepthwiseDepthfirst<__fp16>(strat, args);
192 },
193 },
194 {
195 DepthwiseMethod::DEPTHFIRST,
196 "sme2_fp16_nhwc_5x5_s1_output2x2_mla_depthfirst",
197 constraint(is_supported<sme2_fp16_nhwc_5x5_s1_output2x2_mla_depthfirst>,
David Mansellb2758f32023-03-30 19:10:52 +0100198 cpu_has_sme2),
199 cycle_estimate<sme2_fp16_nhwc_5x5_s1_output2x2_mla_depthfirst>,
200 [] (const DepthwiseArgs &args, const Nothing &) -> DepthwiseCommon<__fp16, __fp16, __fp16> * {
201 auto strat = new sme2_fp16_nhwc_5x5_s1_output2x2_mla_depthfirst(args.cpu_info);
202 return new DepthwiseDepthfirst<__fp16>(strat, args);
203 },
204 },
Michael Tyler8deee9b2023-06-30 11:26:05 +0100205#endif // defined(ARM_COMPUTE_ENABLE_SME2)
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +0000206 {
207 DepthwiseMethod::DEPTHFIRST,
208 "sve_fp16_nhwc_3x3_s1_output4x4_mla_depthfirst",
209 constraint(is_supported<sve_fp16_nhwc_3x3_s1_output4x4_mla_depthfirst>,
Michalis Spyrou20fca522021-06-07 14:23:57 +0100210 cpu_has_sve),
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +0000211 cycle_estimate<sve_fp16_nhwc_3x3_s1_output4x4_mla_depthfirst>,
212 [] (const DepthwiseArgs &args, const Nothing &) -> DepthwiseCommon<__fp16, __fp16, __fp16> * {
Freddie Liardetd216f572021-08-03 15:57:32 +0100213 auto strat = new sve_fp16_nhwc_3x3_s1_output4x4_mla_depthfirst(args.cpu_info);
ramelg018a164882022-04-07 02:42:52 +0100214 return new DepthwiseDepthfirst<__fp16>(strat, args);
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +0000215 },
216 },
217 {
218 DepthwiseMethod::DEPTHFIRST,
219 "sve_fp16_nhwc_3x3_s1_output3x3_mla_depthfirst",
220 constraint(is_supported<sve_fp16_nhwc_3x3_s1_output3x3_mla_depthfirst>,
Michalis Spyrou20fca522021-06-07 14:23:57 +0100221 cpu_has_sve),
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +0000222 cycle_estimate<sve_fp16_nhwc_3x3_s1_output3x3_mla_depthfirst>,
223 [] (const DepthwiseArgs &args, const Nothing &) -> DepthwiseCommon<__fp16, __fp16, __fp16> * {
Freddie Liardetd216f572021-08-03 15:57:32 +0100224 auto strat = new sve_fp16_nhwc_3x3_s1_output3x3_mla_depthfirst(args.cpu_info);
ramelg018a164882022-04-07 02:42:52 +0100225 return new DepthwiseDepthfirst<__fp16>(strat, args);
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +0000226 },
227 },
228 {
229 DepthwiseMethod::DEPTHFIRST,
230 "sve_fp16_nhwc_3x3_s1_output2x2_mla_depthfirst",
231 constraint(is_supported<sve_fp16_nhwc_3x3_s1_output2x2_mla_depthfirst>,
Michalis Spyrou20fca522021-06-07 14:23:57 +0100232 cpu_has_sve),
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +0000233 cycle_estimate<sve_fp16_nhwc_3x3_s1_output2x2_mla_depthfirst>,
234 [] (const DepthwiseArgs &args, const Nothing &) -> DepthwiseCommon<__fp16, __fp16, __fp16> * {
Freddie Liardetd216f572021-08-03 15:57:32 +0100235 auto strat = new sve_fp16_nhwc_3x3_s1_output2x2_mla_depthfirst(args.cpu_info);
ramelg018a164882022-04-07 02:42:52 +0100236 return new DepthwiseDepthfirst<__fp16>(strat, args);
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +0000237 },
238 },
239 {
240 DepthwiseMethod::DEPTHFIRST,
241 "sve_fp16_nhwc_3x3_s2_output2x2_mla_depthfirst",
242 constraint(is_supported<sve_fp16_nhwc_3x3_s2_output2x2_mla_depthfirst>,
Michalis Spyrou20fca522021-06-07 14:23:57 +0100243 cpu_has_sve),
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +0000244 cycle_estimate<sve_fp16_nhwc_3x3_s2_output2x2_mla_depthfirst>,
245 [] (const DepthwiseArgs &args, const Nothing &) -> DepthwiseCommon<__fp16, __fp16, __fp16> * {
Freddie Liardetd216f572021-08-03 15:57:32 +0100246 auto strat = new sve_fp16_nhwc_3x3_s2_output2x2_mla_depthfirst(args.cpu_info);
ramelg018a164882022-04-07 02:42:52 +0100247 return new DepthwiseDepthfirst<__fp16>(strat, args);
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +0000248 },
249 },
250 {
251 DepthwiseMethod::DEPTHFIRST,
252 "sve_fp16_nhwc_5x5_s1_output2x2_mla_depthfirst",
253 constraint(is_supported<sve_fp16_nhwc_5x5_s1_output2x2_mla_depthfirst>,
Michalis Spyrou20fca522021-06-07 14:23:57 +0100254 cpu_has_sve),
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +0000255 cycle_estimate<sve_fp16_nhwc_5x5_s1_output2x2_mla_depthfirst>,
256 [] (const DepthwiseArgs &args, const Nothing &) -> DepthwiseCommon<__fp16, __fp16, __fp16> * {
Freddie Liardetd216f572021-08-03 15:57:32 +0100257 auto strat = new sve_fp16_nhwc_5x5_s1_output2x2_mla_depthfirst(args.cpu_info);
ramelg018a164882022-04-07 02:42:52 +0100258 return new DepthwiseDepthfirst<__fp16>(strat, args);
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +0000259 },
260 },
Michalis Spyrou20fca522021-06-07 14:23:57 +0100261#endif // defined(ARM_COMPUTE_ENABLE_SVE)
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +0000262 {
263 DepthwiseMethod::DEPTHFIRST,
264 "a64_fp16_nhwc_3x3_s1_output4x4_mla_depthfirst",
265 constraint(is_supported<a64_fp16_nhwc_3x3_s1_output4x4_mla_depthfirst>,
Freddie Liardetd216f572021-08-03 15:57:32 +0100266 cpu_has_fp16),
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +0000267 cycle_estimate<a64_fp16_nhwc_3x3_s1_output4x4_mla_depthfirst>,
268 [] (const DepthwiseArgs &args, const Nothing &) -> DepthwiseCommon<__fp16, __fp16, __fp16> * {
Freddie Liardetd216f572021-08-03 15:57:32 +0100269 auto strat = new a64_fp16_nhwc_3x3_s1_output4x4_mla_depthfirst(args.cpu_info);
ramelg018a164882022-04-07 02:42:52 +0100270 return new DepthwiseDepthfirst<__fp16>(strat, args);
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +0000271 },
272 },
273 {
274 DepthwiseMethod::DEPTHFIRST,
275 "a64_fp16_nhwc_3x3_s1_output3x3_mla_depthfirst",
276 constraint(is_supported<a64_fp16_nhwc_3x3_s1_output3x3_mla_depthfirst>,
Freddie Liardetd216f572021-08-03 15:57:32 +0100277 cpu_has_fp16),
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +0000278 cycle_estimate<a64_fp16_nhwc_3x3_s1_output3x3_mla_depthfirst>,
279 [] (const DepthwiseArgs &args, const Nothing &) -> DepthwiseCommon<__fp16, __fp16, __fp16> * {
Freddie Liardetd216f572021-08-03 15:57:32 +0100280 auto strat = new a64_fp16_nhwc_3x3_s1_output3x3_mla_depthfirst(args.cpu_info);
ramelg018a164882022-04-07 02:42:52 +0100281 return new DepthwiseDepthfirst<__fp16>(strat, args);
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +0000282 },
283 },
284 {
285 DepthwiseMethod::DEPTHFIRST,
286 "a64_fp16_nhwc_3x3_s1_output2x2_mla_depthfirst",
287 constraint(is_supported<a64_fp16_nhwc_3x3_s1_output2x2_mla_depthfirst>,
Freddie Liardetd216f572021-08-03 15:57:32 +0100288 cpu_has_fp16),
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +0000289 cycle_estimate<a64_fp16_nhwc_3x3_s1_output2x2_mla_depthfirst>,
290 [] (const DepthwiseArgs &args, const Nothing &) -> DepthwiseCommon<__fp16, __fp16, __fp16> * {
Freddie Liardetd216f572021-08-03 15:57:32 +0100291 auto strat = new a64_fp16_nhwc_3x3_s1_output2x2_mla_depthfirst(args.cpu_info);
ramelg018a164882022-04-07 02:42:52 +0100292 return new DepthwiseDepthfirst<__fp16>(strat, args);
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +0000293 },
294 },
295 {
296 DepthwiseMethod::DEPTHFIRST,
297 "a64_fp16_nhwc_3x3_s2_output2x2_mla_depthfirst",
298 constraint(is_supported<a64_fp16_nhwc_3x3_s2_output2x2_mla_depthfirst>,
Freddie Liardetd216f572021-08-03 15:57:32 +0100299 cpu_has_fp16),
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +0000300 cycle_estimate<a64_fp16_nhwc_3x3_s2_output2x2_mla_depthfirst>,
301 [] (const DepthwiseArgs &args, const Nothing &) -> DepthwiseCommon<__fp16, __fp16, __fp16> * {
Freddie Liardetd216f572021-08-03 15:57:32 +0100302 auto strat = new a64_fp16_nhwc_3x3_s2_output2x2_mla_depthfirst(args.cpu_info);
ramelg018a164882022-04-07 02:42:52 +0100303 return new DepthwiseDepthfirst<__fp16>(strat, args);
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +0000304 },
305 },
306 {
307 DepthwiseMethod::DEPTHFIRST,
308 "a64_fp16_nhwc_5x5_s1_output2x2_mla_depthfirst",
309 constraint(is_supported<a64_fp16_nhwc_5x5_s1_output2x2_mla_depthfirst>,
Freddie Liardetd216f572021-08-03 15:57:32 +0100310 cpu_has_fp16),
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +0000311 cycle_estimate<a64_fp16_nhwc_5x5_s1_output2x2_mla_depthfirst>,
312 [] (const DepthwiseArgs &args, const Nothing &) -> DepthwiseCommon<__fp16, __fp16, __fp16> * {
Freddie Liardetd216f572021-08-03 15:57:32 +0100313 auto strat = new a64_fp16_nhwc_5x5_s1_output2x2_mla_depthfirst(args.cpu_info);
ramelg018a164882022-04-07 02:42:52 +0100314 return new DepthwiseDepthfirst<__fp16>(strat, args);
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +0000315 },
316 },
317 {
318 DepthwiseMethod::DEPTHFIRST,
319 "a64_fp16_nhwc_generic_output3x3_mla_depthfirst",
Michael Tyler8deee9b2023-06-30 11:26:05 +0100320 constraint(cpu_has_fp16),
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +0000321 not_preferred,
322 [] (const DepthwiseArgs &args, const Nothing &) -> DepthwiseCommon<__fp16, __fp16, __fp16> * {
ramelg018a164882022-04-07 02:42:52 +0100323 auto kern = new a64_fp16_nhwc_generic_output9_mla_depthfirst(args.cpu_info);
324 auto strat = new GenericDepthfirstStrategy<__fp16>(kern, 3, 3, args);
325 return new DepthwiseDepthfirstGeneric<__fp16>(strat, args);
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +0000326 },
327 },
328 {
329 DepthwiseMethod::DEPTHFIRST,
330 "a64_fp16_nhwc_generic_with_multiplier_output2x8_mla_depthfirst",
ramelg018a164882022-04-07 02:42:52 +0100331 constraint(cpu_has_fp16, has_channel_multiplier),
Michael Tyler4c30de02023-07-07 12:01:32 +0100332 multiplier_cycle_estimate,
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +0000333 [] (const DepthwiseArgs &args, const Nothing &) -> DepthwiseCommon<__fp16, __fp16, __fp16> * {
ramelg018a164882022-04-07 02:42:52 +0100334 auto kern = new a64_fp16_packed_to_nhwc_generic_with_multiplier_output2x8_mla_depthfirst(args.cpu_info);
335 auto strat = new GenericDepthfirstMultiplierStrategy<__fp16>(kern, args);
336 return new DepthwiseDepthfirstMultiplier<__fp16, __fp16, __fp16, __fp16, true>(strat, args);
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +0000337 },
338 },
ramelg018a164882022-04-07 02:42:52 +0100339#endif // defined(ENABLE_FP16_KERNELS) && defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +0000340#endif // defined(__aarch64__)
341 { DepthwiseMethod::DEFAULT, "", nullptr, nullptr, nullptr }, // End of list
342};
343
344template <>
345const DepthwiseImplementation<__fp16> *depthwise_implementation_list()
346{
347 return depthwise_fp16_methods;
348}
349
350template UniqueDepthwiseCommon<__fp16> depthwise(const DepthwiseArgs &, const Nothing &);
351template std::vector<KernelDescription> get_compatible_kernels<__fp16>(const DepthwiseArgs &, const Nothing &);
352
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +0000353} // namespace depthwise
354} // namespace arm_conv
Freddie Liardetd216f572021-08-03 15:57:32 +0100355
356#endif // defined(__ARM_FP16_ARGS)