blob: 134dbd1b4c4f33a1471166dc210976055bca8180 [file] [log] [blame]
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +00001/*
Michael Tyler74921ee2023-04-12 17:43:17 +01002 * Copyright (c) 2021-2023 Arm Limited.
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +00003 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24
25#include "arm_gemm_local.hpp"
26
27#include "depthwise_implementation.hpp"
28#include "depthwise_depthfirst.hpp"
29#include "depthwise_depthfirst_generic.hpp"
30#include "depthwise_depthfirst_multiplier.hpp"
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +000031
32#include "depthwise_implementation_constraints.hpp"
33
Freddie Liardetd216f572021-08-03 15:57:32 +010034// This can only be built if the target/compiler supports FP16 arguments.
35#if defined(__ARM_FP16_ARGS)
36
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +000037#if defined(__aarch64__)
Michalis Spyrou20fca522021-06-07 14:23:57 +010038#if defined(ARM_COMPUTE_ENABLE_SVE)
David Mansellb2758f32023-03-30 19:10:52 +010039#if defined(ARM_COMPUTE_ENABLE_SME2)
40#include "kernels/sme2_fp16_nhwc_3x3_s1_output4x4_mla_depthfirst.hpp"
41#include "kernels/sme2_fp16_nhwc_3x3_s1_output3x3_mla_depthfirst.hpp"
42#include "kernels/sme2_fp16_nhwc_3x3_s1_output2x2_mla_depthfirst.hpp"
43#include "kernels/sme2_fp16_nhwc_3x3_s2_output2x2_mla_depthfirst.hpp"
44#include "kernels/sme2_fp16_nhwc_5x5_s1_output2x2_mla_depthfirst.hpp"
45#endif // defined(ARM_COMPUTE_ENABLE_SME2)
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +000046#include "kernels/sve_fp16_nhwc_3x3_s1_output4x4_mla_depthfirst.hpp"
47#include "kernels/sve_fp16_nhwc_3x3_s1_output3x3_mla_depthfirst.hpp"
48#include "kernels/sve_fp16_nhwc_3x3_s1_output2x2_mla_depthfirst.hpp"
49#include "kernels/sve_fp16_nhwc_3x3_s2_output2x2_mla_depthfirst.hpp"
50#include "kernels/sve_fp16_nhwc_5x5_s1_output2x2_mla_depthfirst.hpp"
Michalis Spyrou20fca522021-06-07 14:23:57 +010051#endif // defined(ARM_COMPUTE_ENABLE_SVE)
ramelg018a164882022-04-07 02:42:52 +010052#if defined(ENABLE_FP16_KERNELS) && defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +000053#include "kernels/a64_fp16_nhwc_3x3_s1_output4x4_mla_depthfirst.hpp"
54#include "kernels/a64_fp16_nhwc_3x3_s1_output3x3_mla_depthfirst.hpp"
55#include "kernels/a64_fp16_nhwc_3x3_s1_output2x2_mla_depthfirst.hpp"
56#include "kernels/a64_fp16_nhwc_3x3_s2_output2x2_mla_depthfirst.hpp"
57#include "kernels/a64_fp16_nhwc_5x5_s1_output2x2_mla_depthfirst.hpp"
58#include "kernels/a64_fp16_nhwc_generic_output9_mla_depthfirst.hpp"
59#include "kernels/a64_fp16_packed_to_nhwc_generic_with_multiplier_output2x8_mla_depthfirst.hpp"
ramelg018a164882022-04-07 02:42:52 +010060#endif // defined(ENABLE_FP16_KERNELS) && defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +000061#endif // defined(__aarch64__)
62
63namespace arm_conv {
64namespace depthwise {
65
66namespace
67{
68 template <class Strategy>
69 unsigned int cycle_estimate(const DepthwiseArgs &args, const Nothing &)
70 {
71 // First-pass: compute the number of output pixels which will be computed.
72 return arm_gemm::roundup(args.output_rows, Strategy::output_rows) *
73 arm_gemm::roundup(args.output_cols, Strategy::output_cols) *
74 arm_gemm::iceildiv(
75 (long unsigned) args.input_channels * args.channel_multiplier,
76 arm_gemm::utils::get_vector_length<typename Strategy::return_type>(Strategy::vl_type)
77 );
78 }
79
Michael Tyler74921ee2023-04-12 17:43:17 +010080 template <class Strategy>
81 unsigned int planar_cycle_estimate(const DepthwiseArgs &args, const Nothing &)
82 {
83 // First-pass: compute the number of output pixels which will be computed.
84 return arm_gemm::roundup(args.output_rows, Strategy::output_rows) *
85 args.output_cols *
86 arm_gemm::iceildiv(
87 (long unsigned) args.input_channels * args.channel_multiplier,
88 arm_gemm::utils::get_vector_length<typename Strategy::return_type>(Strategy::vl_type)
89 );
90 }
91
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +000092#if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)
ramelg018a164882022-04-07 02:42:52 +010093 unsigned int not_preferred(const DepthwiseArgs &, const Nothing &) __attribute__ ((unused));
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +000094 unsigned int not_preferred(const DepthwiseArgs &, const Nothing &)
95 {
96 return std::numeric_limits<unsigned int>::max();
97 }
Freddie Liardetd216f572021-08-03 15:57:32 +010098#endif // defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +000099}
100
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +0000101static const DepthwiseImplementation<__fp16, __fp16> depthwise_fp16_methods[] = {
102#if defined(__aarch64__)
Michalis Spyrou20fca522021-06-07 14:23:57 +0100103#if defined(ARM_COMPUTE_ENABLE_SVE)
David Mansellb2758f32023-03-30 19:10:52 +0100104#if defined(ARM_COMPUTE_ENABLE_SME2)
105 {
106 DepthwiseMethod::DEPTHFIRST,
107 "sme2_fp16_nhwc_3x3_s1_output4x4_mla_depthfirst",
108 constraint(is_supported<sme2_fp16_nhwc_3x3_s1_output4x4_mla_depthfirst>,
109 has_no_channel_multiplier,
110 cpu_has_sme2),
111 cycle_estimate<sme2_fp16_nhwc_3x3_s1_output4x4_mla_depthfirst>,
112 [] (const DepthwiseArgs &args, const Nothing &) -> DepthwiseCommon<__fp16, __fp16, __fp16> * {
113 auto strat = new sme2_fp16_nhwc_3x3_s1_output4x4_mla_depthfirst(args.cpu_info);
114 return new DepthwiseDepthfirst<__fp16>(strat, args);
115 },
116 },
117 {
118 DepthwiseMethod::DEPTHFIRST,
119 "sme2_fp16_nhwc_3x3_s1_output3x3_mla_depthfirst",
120 constraint(is_supported<sme2_fp16_nhwc_3x3_s1_output3x3_mla_depthfirst>,
121 has_no_channel_multiplier,
122 cpu_has_sme2),
123 cycle_estimate<sme2_fp16_nhwc_3x3_s1_output3x3_mla_depthfirst>,
124 [] (const DepthwiseArgs &args, const Nothing &) -> DepthwiseCommon<__fp16, __fp16, __fp16> * {
125 auto strat = new sme2_fp16_nhwc_3x3_s1_output3x3_mla_depthfirst(args.cpu_info);
126 return new DepthwiseDepthfirst<__fp16>(strat, args);
127 },
128 },
129 {
130 DepthwiseMethod::DEPTHFIRST,
131 "sme2_fp16_nhwc_3x3_s1_output2x2_mla_depthfirst",
132 constraint(is_supported<sme2_fp16_nhwc_3x3_s1_output2x2_mla_depthfirst>,
133 has_no_channel_multiplier,
134 cpu_has_sme2),
135 cycle_estimate<sme2_fp16_nhwc_3x3_s1_output2x2_mla_depthfirst>,
136 [] (const DepthwiseArgs &args, const Nothing &) -> DepthwiseCommon<__fp16, __fp16, __fp16> * {
137 auto strat = new sme2_fp16_nhwc_3x3_s1_output2x2_mla_depthfirst(args.cpu_info);
138 return new DepthwiseDepthfirst<__fp16>(strat, args);
139 },
140 },
141 {
142 DepthwiseMethod::DEPTHFIRST,
143 "sme2_fp16_nhwc_3x3_s2_output2x2_mla_depthfirst",
144 constraint(is_supported<sme2_fp16_nhwc_3x3_s2_output2x2_mla_depthfirst>,
145 has_no_channel_multiplier,
146 cpu_has_sme2),
147 cycle_estimate<sme2_fp16_nhwc_3x3_s2_output2x2_mla_depthfirst>,
148 [] (const DepthwiseArgs &args, const Nothing &) -> DepthwiseCommon<__fp16, __fp16, __fp16> * {
149 auto strat = new sme2_fp16_nhwc_3x3_s2_output2x2_mla_depthfirst(args.cpu_info);
150 return new DepthwiseDepthfirst<__fp16>(strat, args);
151 },
152 },
153 {
154 DepthwiseMethod::DEPTHFIRST,
155 "sme2_fp16_nhwc_5x5_s1_output2x2_mla_depthfirst",
156 constraint(is_supported<sme2_fp16_nhwc_5x5_s1_output2x2_mla_depthfirst>,
157 has_no_channel_multiplier,
158 cpu_has_sme2),
159 cycle_estimate<sme2_fp16_nhwc_5x5_s1_output2x2_mla_depthfirst>,
160 [] (const DepthwiseArgs &args, const Nothing &) -> DepthwiseCommon<__fp16, __fp16, __fp16> * {
161 auto strat = new sme2_fp16_nhwc_5x5_s1_output2x2_mla_depthfirst(args.cpu_info);
162 return new DepthwiseDepthfirst<__fp16>(strat, args);
163 },
164 },
165#endif // defined(ARM_COMPUTE_ENABLE_SME2)
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +0000166 {
167 DepthwiseMethod::DEPTHFIRST,
168 "sve_fp16_nhwc_3x3_s1_output4x4_mla_depthfirst",
169 constraint(is_supported<sve_fp16_nhwc_3x3_s1_output4x4_mla_depthfirst>,
Michalis Spyrou20fca522021-06-07 14:23:57 +0100170 has_no_channel_multiplier,
171 cpu_has_sve),
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +0000172 cycle_estimate<sve_fp16_nhwc_3x3_s1_output4x4_mla_depthfirst>,
173 [] (const DepthwiseArgs &args, const Nothing &) -> DepthwiseCommon<__fp16, __fp16, __fp16> * {
Freddie Liardetd216f572021-08-03 15:57:32 +0100174 auto strat = new sve_fp16_nhwc_3x3_s1_output4x4_mla_depthfirst(args.cpu_info);
ramelg018a164882022-04-07 02:42:52 +0100175 return new DepthwiseDepthfirst<__fp16>(strat, args);
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +0000176 },
177 },
178 {
179 DepthwiseMethod::DEPTHFIRST,
180 "sve_fp16_nhwc_3x3_s1_output3x3_mla_depthfirst",
181 constraint(is_supported<sve_fp16_nhwc_3x3_s1_output3x3_mla_depthfirst>,
Michalis Spyrou20fca522021-06-07 14:23:57 +0100182 has_no_channel_multiplier,
183 cpu_has_sve),
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +0000184 cycle_estimate<sve_fp16_nhwc_3x3_s1_output3x3_mla_depthfirst>,
185 [] (const DepthwiseArgs &args, const Nothing &) -> DepthwiseCommon<__fp16, __fp16, __fp16> * {
Freddie Liardetd216f572021-08-03 15:57:32 +0100186 auto strat = new sve_fp16_nhwc_3x3_s1_output3x3_mla_depthfirst(args.cpu_info);
ramelg018a164882022-04-07 02:42:52 +0100187 return new DepthwiseDepthfirst<__fp16>(strat, args);
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +0000188 },
189 },
190 {
191 DepthwiseMethod::DEPTHFIRST,
192 "sve_fp16_nhwc_3x3_s1_output2x2_mla_depthfirst",
193 constraint(is_supported<sve_fp16_nhwc_3x3_s1_output2x2_mla_depthfirst>,
Michalis Spyrou20fca522021-06-07 14:23:57 +0100194 has_no_channel_multiplier,
195 cpu_has_sve),
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +0000196 cycle_estimate<sve_fp16_nhwc_3x3_s1_output2x2_mla_depthfirst>,
197 [] (const DepthwiseArgs &args, const Nothing &) -> DepthwiseCommon<__fp16, __fp16, __fp16> * {
Freddie Liardetd216f572021-08-03 15:57:32 +0100198 auto strat = new sve_fp16_nhwc_3x3_s1_output2x2_mla_depthfirst(args.cpu_info);
ramelg018a164882022-04-07 02:42:52 +0100199 return new DepthwiseDepthfirst<__fp16>(strat, args);
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +0000200 },
201 },
202 {
203 DepthwiseMethod::DEPTHFIRST,
204 "sve_fp16_nhwc_3x3_s2_output2x2_mla_depthfirst",
205 constraint(is_supported<sve_fp16_nhwc_3x3_s2_output2x2_mla_depthfirst>,
Michalis Spyrou20fca522021-06-07 14:23:57 +0100206 has_no_channel_multiplier,
207 cpu_has_sve),
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +0000208 cycle_estimate<sve_fp16_nhwc_3x3_s2_output2x2_mla_depthfirst>,
209 [] (const DepthwiseArgs &args, const Nothing &) -> DepthwiseCommon<__fp16, __fp16, __fp16> * {
Freddie Liardetd216f572021-08-03 15:57:32 +0100210 auto strat = new sve_fp16_nhwc_3x3_s2_output2x2_mla_depthfirst(args.cpu_info);
ramelg018a164882022-04-07 02:42:52 +0100211 return new DepthwiseDepthfirst<__fp16>(strat, args);
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +0000212 },
213 },
214 {
215 DepthwiseMethod::DEPTHFIRST,
216 "sve_fp16_nhwc_5x5_s1_output2x2_mla_depthfirst",
217 constraint(is_supported<sve_fp16_nhwc_5x5_s1_output2x2_mla_depthfirst>,
Michalis Spyrou20fca522021-06-07 14:23:57 +0100218 has_no_channel_multiplier,
219 cpu_has_sve),
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +0000220 cycle_estimate<sve_fp16_nhwc_5x5_s1_output2x2_mla_depthfirst>,
221 [] (const DepthwiseArgs &args, const Nothing &) -> DepthwiseCommon<__fp16, __fp16, __fp16> * {
Freddie Liardetd216f572021-08-03 15:57:32 +0100222 auto strat = new sve_fp16_nhwc_5x5_s1_output2x2_mla_depthfirst(args.cpu_info);
ramelg018a164882022-04-07 02:42:52 +0100223 return new DepthwiseDepthfirst<__fp16>(strat, args);
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +0000224 },
225 },
Michalis Spyrou20fca522021-06-07 14:23:57 +0100226#endif // defined(ARM_COMPUTE_ENABLE_SVE)
ramelg018a164882022-04-07 02:42:52 +0100227#if defined(ENABLE_FP16_KERNELS) && defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +0000228 {
229 DepthwiseMethod::DEPTHFIRST,
230 "a64_fp16_nhwc_3x3_s1_output4x4_mla_depthfirst",
231 constraint(is_supported<a64_fp16_nhwc_3x3_s1_output4x4_mla_depthfirst>,
Freddie Liardetd216f572021-08-03 15:57:32 +0100232 has_no_channel_multiplier,
233 cpu_has_fp16),
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +0000234 cycle_estimate<a64_fp16_nhwc_3x3_s1_output4x4_mla_depthfirst>,
235 [] (const DepthwiseArgs &args, const Nothing &) -> DepthwiseCommon<__fp16, __fp16, __fp16> * {
Freddie Liardetd216f572021-08-03 15:57:32 +0100236 auto strat = new a64_fp16_nhwc_3x3_s1_output4x4_mla_depthfirst(args.cpu_info);
ramelg018a164882022-04-07 02:42:52 +0100237 return new DepthwiseDepthfirst<__fp16>(strat, args);
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +0000238 },
239 },
240 {
241 DepthwiseMethod::DEPTHFIRST,
242 "a64_fp16_nhwc_3x3_s1_output3x3_mla_depthfirst",
243 constraint(is_supported<a64_fp16_nhwc_3x3_s1_output3x3_mla_depthfirst>,
Freddie Liardetd216f572021-08-03 15:57:32 +0100244 has_no_channel_multiplier,
245 cpu_has_fp16),
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +0000246 cycle_estimate<a64_fp16_nhwc_3x3_s1_output3x3_mla_depthfirst>,
247 [] (const DepthwiseArgs &args, const Nothing &) -> DepthwiseCommon<__fp16, __fp16, __fp16> * {
Freddie Liardetd216f572021-08-03 15:57:32 +0100248 auto strat = new a64_fp16_nhwc_3x3_s1_output3x3_mla_depthfirst(args.cpu_info);
ramelg018a164882022-04-07 02:42:52 +0100249 return new DepthwiseDepthfirst<__fp16>(strat, args);
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +0000250 },
251 },
252 {
253 DepthwiseMethod::DEPTHFIRST,
254 "a64_fp16_nhwc_3x3_s1_output2x2_mla_depthfirst",
255 constraint(is_supported<a64_fp16_nhwc_3x3_s1_output2x2_mla_depthfirst>,
Freddie Liardetd216f572021-08-03 15:57:32 +0100256 has_no_channel_multiplier,
257 cpu_has_fp16),
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +0000258 cycle_estimate<a64_fp16_nhwc_3x3_s1_output2x2_mla_depthfirst>,
259 [] (const DepthwiseArgs &args, const Nothing &) -> DepthwiseCommon<__fp16, __fp16, __fp16> * {
Freddie Liardetd216f572021-08-03 15:57:32 +0100260 auto strat = new a64_fp16_nhwc_3x3_s1_output2x2_mla_depthfirst(args.cpu_info);
ramelg018a164882022-04-07 02:42:52 +0100261 return new DepthwiseDepthfirst<__fp16>(strat, args);
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +0000262 },
263 },
264 {
265 DepthwiseMethod::DEPTHFIRST,
266 "a64_fp16_nhwc_3x3_s2_output2x2_mla_depthfirst",
267 constraint(is_supported<a64_fp16_nhwc_3x3_s2_output2x2_mla_depthfirst>,
Freddie Liardetd216f572021-08-03 15:57:32 +0100268 has_no_channel_multiplier,
269 cpu_has_fp16),
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +0000270 cycle_estimate<a64_fp16_nhwc_3x3_s2_output2x2_mla_depthfirst>,
271 [] (const DepthwiseArgs &args, const Nothing &) -> DepthwiseCommon<__fp16, __fp16, __fp16> * {
Freddie Liardetd216f572021-08-03 15:57:32 +0100272 auto strat = new a64_fp16_nhwc_3x3_s2_output2x2_mla_depthfirst(args.cpu_info);
ramelg018a164882022-04-07 02:42:52 +0100273 return new DepthwiseDepthfirst<__fp16>(strat, args);
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +0000274 },
275 },
276 {
277 DepthwiseMethod::DEPTHFIRST,
278 "a64_fp16_nhwc_5x5_s1_output2x2_mla_depthfirst",
279 constraint(is_supported<a64_fp16_nhwc_5x5_s1_output2x2_mla_depthfirst>,
Freddie Liardetd216f572021-08-03 15:57:32 +0100280 has_no_channel_multiplier,
281 cpu_has_fp16),
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +0000282 cycle_estimate<a64_fp16_nhwc_5x5_s1_output2x2_mla_depthfirst>,
283 [] (const DepthwiseArgs &args, const Nothing &) -> DepthwiseCommon<__fp16, __fp16, __fp16> * {
Freddie Liardetd216f572021-08-03 15:57:32 +0100284 auto strat = new a64_fp16_nhwc_5x5_s1_output2x2_mla_depthfirst(args.cpu_info);
ramelg018a164882022-04-07 02:42:52 +0100285 return new DepthwiseDepthfirst<__fp16>(strat, args);
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +0000286 },
287 },
288 {
289 DepthwiseMethod::DEPTHFIRST,
290 "a64_fp16_nhwc_generic_output3x3_mla_depthfirst",
Freddie Liardetd216f572021-08-03 15:57:32 +0100291 constraint(has_no_channel_multiplier, cpu_has_fp16),
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +0000292 not_preferred,
293 [] (const DepthwiseArgs &args, const Nothing &) -> DepthwiseCommon<__fp16, __fp16, __fp16> * {
ramelg018a164882022-04-07 02:42:52 +0100294 auto kern = new a64_fp16_nhwc_generic_output9_mla_depthfirst(args.cpu_info);
295 auto strat = new GenericDepthfirstStrategy<__fp16>(kern, 3, 3, args);
296 return new DepthwiseDepthfirstGeneric<__fp16>(strat, args);
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +0000297 },
298 },
299 {
300 DepthwiseMethod::DEPTHFIRST,
301 "a64_fp16_nhwc_generic_with_multiplier_output2x8_mla_depthfirst",
ramelg018a164882022-04-07 02:42:52 +0100302 constraint(cpu_has_fp16, has_channel_multiplier),
303 nullptr,
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +0000304 [] (const DepthwiseArgs &args, const Nothing &) -> DepthwiseCommon<__fp16, __fp16, __fp16> * {
ramelg018a164882022-04-07 02:42:52 +0100305 auto kern = new a64_fp16_packed_to_nhwc_generic_with_multiplier_output2x8_mla_depthfirst(args.cpu_info);
306 auto strat = new GenericDepthfirstMultiplierStrategy<__fp16>(kern, args);
307 return new DepthwiseDepthfirstMultiplier<__fp16, __fp16, __fp16, __fp16, true>(strat, args);
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +0000308 },
309 },
ramelg018a164882022-04-07 02:42:52 +0100310#endif // defined(ENABLE_FP16_KERNELS) && defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +0000311#endif // defined(__aarch64__)
312 { DepthwiseMethod::DEFAULT, "", nullptr, nullptr, nullptr }, // End of list
313};
314
315template <>
316const DepthwiseImplementation<__fp16> *depthwise_implementation_list()
317{
318 return depthwise_fp16_methods;
319}
320
321template UniqueDepthwiseCommon<__fp16> depthwise(const DepthwiseArgs &, const Nothing &);
322template std::vector<KernelDescription> get_compatible_kernels<__fp16>(const DepthwiseArgs &, const Nothing &);
323
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +0000324} // namespace depthwise
325} // namespace arm_conv
Freddie Liardetd216f572021-08-03 15:57:32 +0100326
327#endif // defined(__ARM_FP16_ARGS)