blob: 6b100d9d6121f4ae9939cef16e377b48a54405e9 [file] [log] [blame]
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +00001/*
ramelg018a164882022-04-07 02:42:52 +01002 * Copyright (c) 2021-2022 Arm Limited.
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +00003 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24
25#include "arm_gemm_local.hpp"
26
27#include "depthwise_implementation.hpp"
28#include "depthwise_depthfirst.hpp"
29#include "depthwise_depthfirst_generic.hpp"
30#include "depthwise_depthfirst_multiplier.hpp"
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +000031
32#include "depthwise_implementation_constraints.hpp"
33
Freddie Liardetd216f572021-08-03 15:57:32 +010034// This can only be built if the target/compiler supports FP16 arguments.
35#if defined(__ARM_FP16_ARGS)
36
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +000037#if defined(__aarch64__)
Michalis Spyrou20fca522021-06-07 14:23:57 +010038#if defined(ARM_COMPUTE_ENABLE_SVE)
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +000039#include "kernels/sve_fp16_nhwc_3x3_s1_output4x4_mla_depthfirst.hpp"
40#include "kernels/sve_fp16_nhwc_3x3_s1_output3x3_mla_depthfirst.hpp"
41#include "kernels/sve_fp16_nhwc_3x3_s1_output2x2_mla_depthfirst.hpp"
42#include "kernels/sve_fp16_nhwc_3x3_s2_output2x2_mla_depthfirst.hpp"
43#include "kernels/sve_fp16_nhwc_5x5_s1_output2x2_mla_depthfirst.hpp"
Michalis Spyrou20fca522021-06-07 14:23:57 +010044#endif // defined(ARM_COMPUTE_ENABLE_SVE)
ramelg018a164882022-04-07 02:42:52 +010045#if defined(ENABLE_FP16_KERNELS) && defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +000046#include "kernels/a64_fp16_nhwc_3x3_s1_output4x4_mla_depthfirst.hpp"
47#include "kernels/a64_fp16_nhwc_3x3_s1_output3x3_mla_depthfirst.hpp"
48#include "kernels/a64_fp16_nhwc_3x3_s1_output2x2_mla_depthfirst.hpp"
49#include "kernels/a64_fp16_nhwc_3x3_s2_output2x2_mla_depthfirst.hpp"
50#include "kernels/a64_fp16_nhwc_5x5_s1_output2x2_mla_depthfirst.hpp"
51#include "kernels/a64_fp16_nhwc_generic_output9_mla_depthfirst.hpp"
52#include "kernels/a64_fp16_packed_to_nhwc_generic_with_multiplier_output2x8_mla_depthfirst.hpp"
ramelg018a164882022-04-07 02:42:52 +010053#endif // defined(ENABLE_FP16_KERNELS) && defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +000054#endif // defined(__aarch64__)
55
56namespace arm_conv {
57namespace depthwise {
58
59namespace
60{
61 template <class Strategy>
62 unsigned int cycle_estimate(const DepthwiseArgs &args, const Nothing &)
63 {
64 // First-pass: compute the number of output pixels which will be computed.
65 return arm_gemm::roundup(args.output_rows, Strategy::output_rows) *
66 arm_gemm::roundup(args.output_cols, Strategy::output_cols) *
67 arm_gemm::iceildiv(
68 (long unsigned) args.input_channels * args.channel_multiplier,
69 arm_gemm::utils::get_vector_length<typename Strategy::return_type>(Strategy::vl_type)
70 );
71 }
72
73#if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)
ramelg018a164882022-04-07 02:42:52 +010074 unsigned int not_preferred(const DepthwiseArgs &, const Nothing &) __attribute__ ((unused));
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +000075 unsigned int not_preferred(const DepthwiseArgs &, const Nothing &)
76 {
77 return std::numeric_limits<unsigned int>::max();
78 }
Freddie Liardetd216f572021-08-03 15:57:32 +010079#endif // defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +000080}
81
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +000082static const DepthwiseImplementation<__fp16, __fp16> depthwise_fp16_methods[] = {
83#if defined(__aarch64__)
Michalis Spyrou20fca522021-06-07 14:23:57 +010084#if defined(ARM_COMPUTE_ENABLE_SVE)
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +000085 {
86 DepthwiseMethod::DEPTHFIRST,
87 "sve_fp16_nhwc_3x3_s1_output4x4_mla_depthfirst",
88 constraint(is_supported<sve_fp16_nhwc_3x3_s1_output4x4_mla_depthfirst>,
Michalis Spyrou20fca522021-06-07 14:23:57 +010089 has_no_channel_multiplier,
90 cpu_has_sve),
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +000091 cycle_estimate<sve_fp16_nhwc_3x3_s1_output4x4_mla_depthfirst>,
92 [] (const DepthwiseArgs &args, const Nothing &) -> DepthwiseCommon<__fp16, __fp16, __fp16> * {
Freddie Liardetd216f572021-08-03 15:57:32 +010093 auto strat = new sve_fp16_nhwc_3x3_s1_output4x4_mla_depthfirst(args.cpu_info);
ramelg018a164882022-04-07 02:42:52 +010094 return new DepthwiseDepthfirst<__fp16>(strat, args);
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +000095 },
96 },
97 {
98 DepthwiseMethod::DEPTHFIRST,
99 "sve_fp16_nhwc_3x3_s1_output3x3_mla_depthfirst",
100 constraint(is_supported<sve_fp16_nhwc_3x3_s1_output3x3_mla_depthfirst>,
Michalis Spyrou20fca522021-06-07 14:23:57 +0100101 has_no_channel_multiplier,
102 cpu_has_sve),
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +0000103 cycle_estimate<sve_fp16_nhwc_3x3_s1_output3x3_mla_depthfirst>,
104 [] (const DepthwiseArgs &args, const Nothing &) -> DepthwiseCommon<__fp16, __fp16, __fp16> * {
Freddie Liardetd216f572021-08-03 15:57:32 +0100105 auto strat = new sve_fp16_nhwc_3x3_s1_output3x3_mla_depthfirst(args.cpu_info);
ramelg018a164882022-04-07 02:42:52 +0100106 return new DepthwiseDepthfirst<__fp16>(strat, args);
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +0000107 },
108 },
109 {
110 DepthwiseMethod::DEPTHFIRST,
111 "sve_fp16_nhwc_3x3_s1_output2x2_mla_depthfirst",
112 constraint(is_supported<sve_fp16_nhwc_3x3_s1_output2x2_mla_depthfirst>,
Michalis Spyrou20fca522021-06-07 14:23:57 +0100113 has_no_channel_multiplier,
114 cpu_has_sve),
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +0000115 cycle_estimate<sve_fp16_nhwc_3x3_s1_output2x2_mla_depthfirst>,
116 [] (const DepthwiseArgs &args, const Nothing &) -> DepthwiseCommon<__fp16, __fp16, __fp16> * {
Freddie Liardetd216f572021-08-03 15:57:32 +0100117 auto strat = new sve_fp16_nhwc_3x3_s1_output2x2_mla_depthfirst(args.cpu_info);
ramelg018a164882022-04-07 02:42:52 +0100118 return new DepthwiseDepthfirst<__fp16>(strat, args);
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +0000119 },
120 },
121 {
122 DepthwiseMethod::DEPTHFIRST,
123 "sve_fp16_nhwc_3x3_s2_output2x2_mla_depthfirst",
124 constraint(is_supported<sve_fp16_nhwc_3x3_s2_output2x2_mla_depthfirst>,
Michalis Spyrou20fca522021-06-07 14:23:57 +0100125 has_no_channel_multiplier,
126 cpu_has_sve),
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +0000127 cycle_estimate<sve_fp16_nhwc_3x3_s2_output2x2_mla_depthfirst>,
128 [] (const DepthwiseArgs &args, const Nothing &) -> DepthwiseCommon<__fp16, __fp16, __fp16> * {
Freddie Liardetd216f572021-08-03 15:57:32 +0100129 auto strat = new sve_fp16_nhwc_3x3_s2_output2x2_mla_depthfirst(args.cpu_info);
ramelg018a164882022-04-07 02:42:52 +0100130 return new DepthwiseDepthfirst<__fp16>(strat, args);
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +0000131 },
132 },
133 {
134 DepthwiseMethod::DEPTHFIRST,
135 "sve_fp16_nhwc_5x5_s1_output2x2_mla_depthfirst",
136 constraint(is_supported<sve_fp16_nhwc_5x5_s1_output2x2_mla_depthfirst>,
Michalis Spyrou20fca522021-06-07 14:23:57 +0100137 has_no_channel_multiplier,
138 cpu_has_sve),
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +0000139 cycle_estimate<sve_fp16_nhwc_5x5_s1_output2x2_mla_depthfirst>,
140 [] (const DepthwiseArgs &args, const Nothing &) -> DepthwiseCommon<__fp16, __fp16, __fp16> * {
Freddie Liardetd216f572021-08-03 15:57:32 +0100141 auto strat = new sve_fp16_nhwc_5x5_s1_output2x2_mla_depthfirst(args.cpu_info);
ramelg018a164882022-04-07 02:42:52 +0100142 return new DepthwiseDepthfirst<__fp16>(strat, args);
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +0000143 },
144 },
Michalis Spyrou20fca522021-06-07 14:23:57 +0100145#endif // defined(ARM_COMPUTE_ENABLE_SVE)
ramelg018a164882022-04-07 02:42:52 +0100146#if defined(ENABLE_FP16_KERNELS) && defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +0000147 {
148 DepthwiseMethod::DEPTHFIRST,
149 "a64_fp16_nhwc_3x3_s1_output4x4_mla_depthfirst",
150 constraint(is_supported<a64_fp16_nhwc_3x3_s1_output4x4_mla_depthfirst>,
Freddie Liardetd216f572021-08-03 15:57:32 +0100151 has_no_channel_multiplier,
152 cpu_has_fp16),
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +0000153 cycle_estimate<a64_fp16_nhwc_3x3_s1_output4x4_mla_depthfirst>,
154 [] (const DepthwiseArgs &args, const Nothing &) -> DepthwiseCommon<__fp16, __fp16, __fp16> * {
Freddie Liardetd216f572021-08-03 15:57:32 +0100155 auto strat = new a64_fp16_nhwc_3x3_s1_output4x4_mla_depthfirst(args.cpu_info);
ramelg018a164882022-04-07 02:42:52 +0100156 return new DepthwiseDepthfirst<__fp16>(strat, args);
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +0000157 },
158 },
159 {
160 DepthwiseMethod::DEPTHFIRST,
161 "a64_fp16_nhwc_3x3_s1_output3x3_mla_depthfirst",
162 constraint(is_supported<a64_fp16_nhwc_3x3_s1_output3x3_mla_depthfirst>,
Freddie Liardetd216f572021-08-03 15:57:32 +0100163 has_no_channel_multiplier,
164 cpu_has_fp16),
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +0000165 cycle_estimate<a64_fp16_nhwc_3x3_s1_output3x3_mla_depthfirst>,
166 [] (const DepthwiseArgs &args, const Nothing &) -> DepthwiseCommon<__fp16, __fp16, __fp16> * {
Freddie Liardetd216f572021-08-03 15:57:32 +0100167 auto strat = new a64_fp16_nhwc_3x3_s1_output3x3_mla_depthfirst(args.cpu_info);
ramelg018a164882022-04-07 02:42:52 +0100168 return new DepthwiseDepthfirst<__fp16>(strat, args);
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +0000169 },
170 },
171 {
172 DepthwiseMethod::DEPTHFIRST,
173 "a64_fp16_nhwc_3x3_s1_output2x2_mla_depthfirst",
174 constraint(is_supported<a64_fp16_nhwc_3x3_s1_output2x2_mla_depthfirst>,
Freddie Liardetd216f572021-08-03 15:57:32 +0100175 has_no_channel_multiplier,
176 cpu_has_fp16),
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +0000177 cycle_estimate<a64_fp16_nhwc_3x3_s1_output2x2_mla_depthfirst>,
178 [] (const DepthwiseArgs &args, const Nothing &) -> DepthwiseCommon<__fp16, __fp16, __fp16> * {
Freddie Liardetd216f572021-08-03 15:57:32 +0100179 auto strat = new a64_fp16_nhwc_3x3_s1_output2x2_mla_depthfirst(args.cpu_info);
ramelg018a164882022-04-07 02:42:52 +0100180 return new DepthwiseDepthfirst<__fp16>(strat, args);
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +0000181 },
182 },
183 {
184 DepthwiseMethod::DEPTHFIRST,
185 "a64_fp16_nhwc_3x3_s2_output2x2_mla_depthfirst",
186 constraint(is_supported<a64_fp16_nhwc_3x3_s2_output2x2_mla_depthfirst>,
Freddie Liardetd216f572021-08-03 15:57:32 +0100187 has_no_channel_multiplier,
188 cpu_has_fp16),
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +0000189 cycle_estimate<a64_fp16_nhwc_3x3_s2_output2x2_mla_depthfirst>,
190 [] (const DepthwiseArgs &args, const Nothing &) -> DepthwiseCommon<__fp16, __fp16, __fp16> * {
Freddie Liardetd216f572021-08-03 15:57:32 +0100191 auto strat = new a64_fp16_nhwc_3x3_s2_output2x2_mla_depthfirst(args.cpu_info);
ramelg018a164882022-04-07 02:42:52 +0100192 return new DepthwiseDepthfirst<__fp16>(strat, args);
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +0000193 },
194 },
195 {
196 DepthwiseMethod::DEPTHFIRST,
197 "a64_fp16_nhwc_5x5_s1_output2x2_mla_depthfirst",
198 constraint(is_supported<a64_fp16_nhwc_5x5_s1_output2x2_mla_depthfirst>,
Freddie Liardetd216f572021-08-03 15:57:32 +0100199 has_no_channel_multiplier,
200 cpu_has_fp16),
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +0000201 cycle_estimate<a64_fp16_nhwc_5x5_s1_output2x2_mla_depthfirst>,
202 [] (const DepthwiseArgs &args, const Nothing &) -> DepthwiseCommon<__fp16, __fp16, __fp16> * {
Freddie Liardetd216f572021-08-03 15:57:32 +0100203 auto strat = new a64_fp16_nhwc_5x5_s1_output2x2_mla_depthfirst(args.cpu_info);
ramelg018a164882022-04-07 02:42:52 +0100204 return new DepthwiseDepthfirst<__fp16>(strat, args);
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +0000205 },
206 },
207 {
208 DepthwiseMethod::DEPTHFIRST,
209 "a64_fp16_nhwc_generic_output3x3_mla_depthfirst",
Freddie Liardetd216f572021-08-03 15:57:32 +0100210 constraint(has_no_channel_multiplier, cpu_has_fp16),
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +0000211 not_preferred,
212 [] (const DepthwiseArgs &args, const Nothing &) -> DepthwiseCommon<__fp16, __fp16, __fp16> * {
ramelg018a164882022-04-07 02:42:52 +0100213 auto kern = new a64_fp16_nhwc_generic_output9_mla_depthfirst(args.cpu_info);
214 auto strat = new GenericDepthfirstStrategy<__fp16>(kern, 3, 3, args);
215 return new DepthwiseDepthfirstGeneric<__fp16>(strat, args);
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +0000216 },
217 },
218 {
219 DepthwiseMethod::DEPTHFIRST,
220 "a64_fp16_nhwc_generic_with_multiplier_output2x8_mla_depthfirst",
ramelg018a164882022-04-07 02:42:52 +0100221 constraint(cpu_has_fp16, has_channel_multiplier),
222 nullptr,
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +0000223 [] (const DepthwiseArgs &args, const Nothing &) -> DepthwiseCommon<__fp16, __fp16, __fp16> * {
ramelg018a164882022-04-07 02:42:52 +0100224 auto kern = new a64_fp16_packed_to_nhwc_generic_with_multiplier_output2x8_mla_depthfirst(args.cpu_info);
225 auto strat = new GenericDepthfirstMultiplierStrategy<__fp16>(kern, args);
226 return new DepthwiseDepthfirstMultiplier<__fp16, __fp16, __fp16, __fp16, true>(strat, args);
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +0000227 },
228 },
ramelg018a164882022-04-07 02:42:52 +0100229#endif // defined(ENABLE_FP16_KERNELS) && defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +0000230#endif // defined(__aarch64__)
231 { DepthwiseMethod::DEFAULT, "", nullptr, nullptr, nullptr }, // End of list
232};
233
234template <>
235const DepthwiseImplementation<__fp16> *depthwise_implementation_list()
236{
237 return depthwise_fp16_methods;
238}
239
240template UniqueDepthwiseCommon<__fp16> depthwise(const DepthwiseArgs &, const Nothing &);
241template std::vector<KernelDescription> get_compatible_kernels<__fp16>(const DepthwiseArgs &, const Nothing &);
242
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +0000243} // namespace depthwise
244} // namespace arm_conv
Freddie Liardetd216f572021-08-03 15:57:32 +0100245
246#endif // defined(__ARM_FP16_ARGS)