blob: 934272a0ac88d2ba5c9516bb2abf1895ccc53019 [file] [log] [blame]
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +00001/*
2 * Copyright (c) 2021 Arm Limited.
3 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24
25#include "arm_gemm_local.hpp"
26
27#include "depthwise_implementation.hpp"
28#include "depthwise_depthfirst.hpp"
29#include "depthwise_depthfirst_generic.hpp"
30#include "depthwise_depthfirst_multiplier.hpp"
31#include "depthwise_depthfirst_generic_multiplier.hpp"
32
33#include "depthwise_implementation_constraints.hpp"
34
Freddie Liardetd216f572021-08-03 15:57:32 +010035// This can only be built if the target/compiler supports FP16 arguments.
36#if defined(__ARM_FP16_ARGS)
37
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +000038#if defined(__aarch64__)
Michalis Spyrou20fca522021-06-07 14:23:57 +010039#if defined(ARM_COMPUTE_ENABLE_SVE)
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +000040#include "kernels/sve_fp16_nhwc_3x3_s1_output4x4_mla_depthfirst.hpp"
41#include "kernels/sve_fp16_nhwc_3x3_s1_output3x3_mla_depthfirst.hpp"
42#include "kernels/sve_fp16_nhwc_3x3_s1_output2x2_mla_depthfirst.hpp"
43#include "kernels/sve_fp16_nhwc_3x3_s2_output2x2_mla_depthfirst.hpp"
44#include "kernels/sve_fp16_nhwc_5x5_s1_output2x2_mla_depthfirst.hpp"
Michalis Spyrou20fca522021-06-07 14:23:57 +010045#endif // defined(ARM_COMPUTE_ENABLE_SVE)
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +000046#include "kernels/a64_fp16_nhwc_3x3_s1_output4x4_mla_depthfirst.hpp"
47#include "kernels/a64_fp16_nhwc_3x3_s1_output3x3_mla_depthfirst.hpp"
48#include "kernels/a64_fp16_nhwc_3x3_s1_output2x2_mla_depthfirst.hpp"
49#include "kernels/a64_fp16_nhwc_3x3_s2_output2x2_mla_depthfirst.hpp"
50#include "kernels/a64_fp16_nhwc_5x5_s1_output2x2_mla_depthfirst.hpp"
51#include "kernels/a64_fp16_nhwc_generic_output9_mla_depthfirst.hpp"
52#include "kernels/a64_fp16_packed_to_nhwc_generic_with_multiplier_output2x8_mla_depthfirst.hpp"
53#endif // defined(__aarch64__)
54
55namespace arm_conv {
56namespace depthwise {
57
58namespace
59{
60 template <class Strategy>
61 unsigned int cycle_estimate(const DepthwiseArgs &args, const Nothing &)
62 {
63 // First-pass: compute the number of output pixels which will be computed.
64 return arm_gemm::roundup(args.output_rows, Strategy::output_rows) *
65 arm_gemm::roundup(args.output_cols, Strategy::output_cols) *
66 arm_gemm::iceildiv(
67 (long unsigned) args.input_channels * args.channel_multiplier,
68 arm_gemm::utils::get_vector_length<typename Strategy::return_type>(Strategy::vl_type)
69 );
70 }
71
72#if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)
73 unsigned int not_preferred(const DepthwiseArgs &, const Nothing &)
74 {
75 return std::numeric_limits<unsigned int>::max();
76 }
77
78 unsigned int not_preferred_if_no_multiplier(const DepthwiseArgs &args, const Nothing &)
79 {
80 return args.channel_multiplier > 1 ? 0 : std::numeric_limits<unsigned int>::max();
81 }
Freddie Liardetd216f572021-08-03 15:57:32 +010082#endif // defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +000083}
84
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +000085static const DepthwiseImplementation<__fp16, __fp16> depthwise_fp16_methods[] = {
86#if defined(__aarch64__)
Michalis Spyrou20fca522021-06-07 14:23:57 +010087#if defined(ARM_COMPUTE_ENABLE_SVE)
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +000088 {
89 DepthwiseMethod::DEPTHFIRST,
90 "sve_fp16_nhwc_3x3_s1_output4x4_mla_depthfirst",
91 constraint(is_supported<sve_fp16_nhwc_3x3_s1_output4x4_mla_depthfirst>,
Michalis Spyrou20fca522021-06-07 14:23:57 +010092 has_no_channel_multiplier,
93 cpu_has_sve),
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +000094 cycle_estimate<sve_fp16_nhwc_3x3_s1_output4x4_mla_depthfirst>,
95 [] (const DepthwiseArgs &args, const Nothing &) -> DepthwiseCommon<__fp16, __fp16, __fp16> * {
Freddie Liardetd216f572021-08-03 15:57:32 +010096 auto strat = new sve_fp16_nhwc_3x3_s1_output4x4_mla_depthfirst(args.cpu_info);
97 return new DepthwiseDepthfirst<__fp16, __fp16, __fp16, __fp16>(strat, args);
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +000098 },
99 },
100 {
101 DepthwiseMethod::DEPTHFIRST,
102 "sve_fp16_nhwc_3x3_s1_output3x3_mla_depthfirst",
103 constraint(is_supported<sve_fp16_nhwc_3x3_s1_output3x3_mla_depthfirst>,
Michalis Spyrou20fca522021-06-07 14:23:57 +0100104 has_no_channel_multiplier,
105 cpu_has_sve),
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +0000106 cycle_estimate<sve_fp16_nhwc_3x3_s1_output3x3_mla_depthfirst>,
107 [] (const DepthwiseArgs &args, const Nothing &) -> DepthwiseCommon<__fp16, __fp16, __fp16> * {
Freddie Liardetd216f572021-08-03 15:57:32 +0100108 auto strat = new sve_fp16_nhwc_3x3_s1_output3x3_mla_depthfirst(args.cpu_info);
109 return new DepthwiseDepthfirst<__fp16, __fp16, __fp16, __fp16>(strat, args);
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +0000110 },
111 },
112 {
113 DepthwiseMethod::DEPTHFIRST,
114 "sve_fp16_nhwc_3x3_s1_output2x2_mla_depthfirst",
115 constraint(is_supported<sve_fp16_nhwc_3x3_s1_output2x2_mla_depthfirst>,
Michalis Spyrou20fca522021-06-07 14:23:57 +0100116 has_no_channel_multiplier,
117 cpu_has_sve),
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +0000118 cycle_estimate<sve_fp16_nhwc_3x3_s1_output2x2_mla_depthfirst>,
119 [] (const DepthwiseArgs &args, const Nothing &) -> DepthwiseCommon<__fp16, __fp16, __fp16> * {
Freddie Liardetd216f572021-08-03 15:57:32 +0100120 auto strat = new sve_fp16_nhwc_3x3_s1_output2x2_mla_depthfirst(args.cpu_info);
121 return new DepthwiseDepthfirst<__fp16, __fp16, __fp16, __fp16>(strat, args);
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +0000122 },
123 },
124 {
125 DepthwiseMethod::DEPTHFIRST,
126 "sve_fp16_nhwc_3x3_s2_output2x2_mla_depthfirst",
127 constraint(is_supported<sve_fp16_nhwc_3x3_s2_output2x2_mla_depthfirst>,
Michalis Spyrou20fca522021-06-07 14:23:57 +0100128 has_no_channel_multiplier,
129 cpu_has_sve),
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +0000130 cycle_estimate<sve_fp16_nhwc_3x3_s2_output2x2_mla_depthfirst>,
131 [] (const DepthwiseArgs &args, const Nothing &) -> DepthwiseCommon<__fp16, __fp16, __fp16> * {
Freddie Liardetd216f572021-08-03 15:57:32 +0100132 auto strat = new sve_fp16_nhwc_3x3_s2_output2x2_mla_depthfirst(args.cpu_info);
133 return new DepthwiseDepthfirst<__fp16, __fp16, __fp16, __fp16>(strat, args);
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +0000134 },
135 },
136 {
137 DepthwiseMethod::DEPTHFIRST,
138 "sve_fp16_nhwc_5x5_s1_output2x2_mla_depthfirst",
139 constraint(is_supported<sve_fp16_nhwc_5x5_s1_output2x2_mla_depthfirst>,
Michalis Spyrou20fca522021-06-07 14:23:57 +0100140 has_no_channel_multiplier,
141 cpu_has_sve),
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +0000142 cycle_estimate<sve_fp16_nhwc_5x5_s1_output2x2_mla_depthfirst>,
143 [] (const DepthwiseArgs &args, const Nothing &) -> DepthwiseCommon<__fp16, __fp16, __fp16> * {
Freddie Liardetd216f572021-08-03 15:57:32 +0100144 auto strat = new sve_fp16_nhwc_5x5_s1_output2x2_mla_depthfirst(args.cpu_info);
145 return new DepthwiseDepthfirst<__fp16, __fp16, __fp16, __fp16>(strat, args);
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +0000146 },
147 },
Michalis Spyrou20fca522021-06-07 14:23:57 +0100148#endif // defined(ARM_COMPUTE_ENABLE_SVE)
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +0000149#if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)
150 {
151 DepthwiseMethod::DEPTHFIRST,
152 "a64_fp16_nhwc_3x3_s1_output4x4_mla_depthfirst",
153 constraint(is_supported<a64_fp16_nhwc_3x3_s1_output4x4_mla_depthfirst>,
Freddie Liardetd216f572021-08-03 15:57:32 +0100154 has_no_channel_multiplier,
155 cpu_has_fp16),
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +0000156 cycle_estimate<a64_fp16_nhwc_3x3_s1_output4x4_mla_depthfirst>,
157 [] (const DepthwiseArgs &args, const Nothing &) -> DepthwiseCommon<__fp16, __fp16, __fp16> * {
Freddie Liardetd216f572021-08-03 15:57:32 +0100158 auto strat = new a64_fp16_nhwc_3x3_s1_output4x4_mla_depthfirst(args.cpu_info);
159 return new DepthwiseDepthfirst<__fp16, __fp16, __fp16, __fp16>(strat, args);
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +0000160 },
161 },
162 {
163 DepthwiseMethod::DEPTHFIRST,
164 "a64_fp16_nhwc_3x3_s1_output3x3_mla_depthfirst",
165 constraint(is_supported<a64_fp16_nhwc_3x3_s1_output3x3_mla_depthfirst>,
Freddie Liardetd216f572021-08-03 15:57:32 +0100166 has_no_channel_multiplier,
167 cpu_has_fp16),
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +0000168 cycle_estimate<a64_fp16_nhwc_3x3_s1_output3x3_mla_depthfirst>,
169 [] (const DepthwiseArgs &args, const Nothing &) -> DepthwiseCommon<__fp16, __fp16, __fp16> * {
Freddie Liardetd216f572021-08-03 15:57:32 +0100170 auto strat = new a64_fp16_nhwc_3x3_s1_output3x3_mla_depthfirst(args.cpu_info);
171 return new DepthwiseDepthfirst<__fp16, __fp16, __fp16, __fp16>(strat, args);
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +0000172 },
173 },
174 {
175 DepthwiseMethod::DEPTHFIRST,
176 "a64_fp16_nhwc_3x3_s1_output2x2_mla_depthfirst",
177 constraint(is_supported<a64_fp16_nhwc_3x3_s1_output2x2_mla_depthfirst>,
Freddie Liardetd216f572021-08-03 15:57:32 +0100178 has_no_channel_multiplier,
179 cpu_has_fp16),
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +0000180 cycle_estimate<a64_fp16_nhwc_3x3_s1_output2x2_mla_depthfirst>,
181 [] (const DepthwiseArgs &args, const Nothing &) -> DepthwiseCommon<__fp16, __fp16, __fp16> * {
Freddie Liardetd216f572021-08-03 15:57:32 +0100182 auto strat = new a64_fp16_nhwc_3x3_s1_output2x2_mla_depthfirst(args.cpu_info);
183 return new DepthwiseDepthfirst<__fp16, __fp16, __fp16, __fp16>(strat, args);
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +0000184 },
185 },
186 {
187 DepthwiseMethod::DEPTHFIRST,
188 "a64_fp16_nhwc_3x3_s2_output2x2_mla_depthfirst",
189 constraint(is_supported<a64_fp16_nhwc_3x3_s2_output2x2_mla_depthfirst>,
Freddie Liardetd216f572021-08-03 15:57:32 +0100190 has_no_channel_multiplier,
191 cpu_has_fp16),
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +0000192 cycle_estimate<a64_fp16_nhwc_3x3_s2_output2x2_mla_depthfirst>,
193 [] (const DepthwiseArgs &args, const Nothing &) -> DepthwiseCommon<__fp16, __fp16, __fp16> * {
Freddie Liardetd216f572021-08-03 15:57:32 +0100194 auto strat = new a64_fp16_nhwc_3x3_s2_output2x2_mla_depthfirst(args.cpu_info);
195 return new DepthwiseDepthfirst<__fp16, __fp16, __fp16, __fp16>(strat, args);
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +0000196 },
197 },
198 {
199 DepthwiseMethod::DEPTHFIRST,
200 "a64_fp16_nhwc_5x5_s1_output2x2_mla_depthfirst",
201 constraint(is_supported<a64_fp16_nhwc_5x5_s1_output2x2_mla_depthfirst>,
Freddie Liardetd216f572021-08-03 15:57:32 +0100202 has_no_channel_multiplier,
203 cpu_has_fp16),
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +0000204 cycle_estimate<a64_fp16_nhwc_5x5_s1_output2x2_mla_depthfirst>,
205 [] (const DepthwiseArgs &args, const Nothing &) -> DepthwiseCommon<__fp16, __fp16, __fp16> * {
Freddie Liardetd216f572021-08-03 15:57:32 +0100206 auto strat = new a64_fp16_nhwc_5x5_s1_output2x2_mla_depthfirst(args.cpu_info);
207 return new DepthwiseDepthfirst<__fp16, __fp16, __fp16, __fp16>(strat, args);
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +0000208 },
209 },
210 {
211 DepthwiseMethod::DEPTHFIRST,
212 "a64_fp16_nhwc_generic_output3x3_mla_depthfirst",
Freddie Liardetd216f572021-08-03 15:57:32 +0100213 constraint(has_no_channel_multiplier, cpu_has_fp16),
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +0000214 not_preferred,
215 [] (const DepthwiseArgs &args, const Nothing &) -> DepthwiseCommon<__fp16, __fp16, __fp16> * {
216 return new DepthwiseDepthfirstGeneric<a64_fp16_nhwc_generic_output9_mla_depthfirst, 3, 3>(args);
217 },
218 },
219 {
220 DepthwiseMethod::DEPTHFIRST,
221 "a64_fp16_nhwc_generic_with_multiplier_output2x8_mla_depthfirst",
Freddie Liardetd216f572021-08-03 15:57:32 +0100222 constraint(cpu_has_fp16),
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +0000223 not_preferred_if_no_multiplier,
224 [] (const DepthwiseArgs &args, const Nothing &) -> DepthwiseCommon<__fp16, __fp16, __fp16> * {
225 return new DepthwiseDepthfirstGenericWithMultiplier<a64_fp16_packed_to_nhwc_generic_with_multiplier_output2x8_mla_depthfirst>(args);
226 },
227 },
228#endif // defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)
229#endif // defined(__aarch64__)
230 { DepthwiseMethod::DEFAULT, "", nullptr, nullptr, nullptr }, // End of list
231};
232
233template <>
234const DepthwiseImplementation<__fp16> *depthwise_implementation_list()
235{
236 return depthwise_fp16_methods;
237}
238
239template UniqueDepthwiseCommon<__fp16> depthwise(const DepthwiseArgs &, const Nothing &);
240template std::vector<KernelDescription> get_compatible_kernels<__fp16>(const DepthwiseArgs &, const Nothing &);
241
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +0000242} // namespace depthwise
243} // namespace arm_conv
Freddie Liardetd216f572021-08-03 15:57:32 +0100244
245#endif // defined(__ARM_FP16_ARGS)