blob: 9b989012b89b30a57a937a9e452027569314ad3b [file] [log] [blame]
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +00001/*
ramelg018a164882022-04-07 02:42:52 +01002 * Copyright (c) 2021-2022 Arm Limited.
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +00003 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24
25#include "arm_gemm_local.hpp"
26
27#include "depthwise_implementation.hpp"
ramelg018a164882022-04-07 02:42:52 +010028#include "depthwise_depthfirst.hpp"
29#include "depthwise_depthfirst_generic.hpp"
30#include "depthwise_depthfirst_multiplier.hpp"
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +000031
32#include "depthwise_implementation_constraints.hpp"
33
34#if defined(__aarch64__)
ramelg018a164882022-04-07 02:42:52 +010035#if defined(ARM_COMPUTE_ENABLE_SVE)
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +000036#include "kernels/sve_u8s8u8q_nhwc_3x3_s1_output2x2_mla_depthfirst.hpp"
37#include "kernels/sve_u8s8u8q_nhwc_3x3_s2_output2x2_mla_depthfirst.hpp"
38#include "kernels/sve_u8s8u8q_nhwc_5x5_s1_output2x2_mla_depthfirst.hpp"
ramelg018a164882022-04-07 02:42:52 +010039#endif // defined(ARM_COMPUTE_ENABLE_SVE)
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +000040#include "kernels/a64_u8s8u8q_nhwc_3x3_s1_output2x2_mla_depthfirst.hpp"
41#include "kernels/a64_u8s8u8q_nhwc_3x3_s2_output2x2_mla_depthfirst.hpp"
42#include "kernels/a64_u8s8u8q_nhwc_5x5_s1_output2x2_mla_depthfirst.hpp"
43#include "kernels/a64_u8s8u8q_nhwc_generic_output9_mla_depthfirst.hpp"
44#include "kernels/a64_u8s8u8q_packed_to_nhwc_generic_with_multiplier_output2x8_mla_depthfirst.hpp"
45#endif // defined(__aarch64__)
46
47#include <cstdint>
48
49using arm_gemm::Requantize32;
50
51namespace arm_conv {
52namespace depthwise {
53
54static const DepthwiseImplementation<uint8_t, int8_t, uint8_t, Requantize32> depthwise_u8q_methods[] = {
55#if defined(__aarch64__)
ramelg018a164882022-04-07 02:42:52 +010056#if defined(ARM_COMPUTE_ENABLE_SVE)
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +000057 {
58 DepthwiseMethod::DEPTHFIRST,
59 "sve_u8s8u8q_nhwc_3x3_s1_output2x2_mla_depthfirst",
60 constraint<Requantize32>(is_supported<sve_u8s8u8q_nhwc_3x3_s1_output2x2_mla_depthfirst>,
61 has_no_channel_multiplier,
Michalis Spyrou20fca522021-06-07 14:23:57 +010062 qp_has_no_left_shift,
63 cpu_has_sve2),
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +000064 nullptr,
65 [] (const DepthwiseArgs &args, const Requantize32 &qp) -> DepthwiseCommon<uint8_t, int8_t, uint8_t> * {
ramelg018a164882022-04-07 02:42:52 +010066 auto strat = new sve_u8s8u8q_nhwc_3x3_s1_output2x2_mla_depthfirst(args.cpu_info);
67 return new DepthwiseDepthfirst<uint8_t, int8_t>(strat, args, qp);
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +000068 },
69 },
70 {
71 DepthwiseMethod::DEPTHFIRST,
72 "sve_u8s8u8q_nhwc_3x3_s2_output2x2_mla_depthfirst",
73 constraint<Requantize32>(is_supported<sve_u8s8u8q_nhwc_3x3_s2_output2x2_mla_depthfirst>,
74 has_no_channel_multiplier,
Michalis Spyrou20fca522021-06-07 14:23:57 +010075 qp_has_no_left_shift,
76 cpu_has_sve2),
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +000077 nullptr,
78 [] (const DepthwiseArgs &args, const Requantize32 &qp) -> DepthwiseCommon<uint8_t, int8_t, uint8_t> * {
ramelg018a164882022-04-07 02:42:52 +010079 auto strat = new sve_u8s8u8q_nhwc_3x3_s2_output2x2_mla_depthfirst(args.cpu_info);
80 return new DepthwiseDepthfirst<uint8_t, int8_t>(strat, args, qp);
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +000081 },
82 },
83 {
84 DepthwiseMethod::DEPTHFIRST,
85 "sve_u8s8u8q_nhwc_5x5_s1_output2x2_mla_depthfirst",
86 constraint<Requantize32>(is_supported<sve_u8s8u8q_nhwc_5x5_s1_output2x2_mla_depthfirst>,
87 has_no_channel_multiplier,
Michalis Spyrou20fca522021-06-07 14:23:57 +010088 qp_has_no_left_shift,
89 cpu_has_sve2),
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +000090 nullptr,
91 [] (const DepthwiseArgs &args, const Requantize32 &qp) -> DepthwiseCommon<uint8_t, int8_t, uint8_t> * {
ramelg018a164882022-04-07 02:42:52 +010092 auto strat = new sve_u8s8u8q_nhwc_5x5_s1_output2x2_mla_depthfirst(args.cpu_info);
93 return new DepthwiseDepthfirst<uint8_t, int8_t>(strat, args, qp);
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +000094 },
95 },
ramelg018a164882022-04-07 02:42:52 +010096#endif // defined(ARM_COMPUTE_ENABLE_SVE)
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +000097 {
98 DepthwiseMethod::DEPTHFIRST,
99 "a64_u8s8u8q_nhwc_3x3_s1_output2x2_mla_depthfirst",
100 constraint<Requantize32>(is_supported<a64_u8s8u8q_nhwc_3x3_s1_output2x2_mla_depthfirst>,
101 has_no_channel_multiplier,
102 qp_has_no_left_shift),
103 nullptr,
104 [] (const DepthwiseArgs &args, const Requantize32 &qp) -> DepthwiseCommon<uint8_t, int8_t, uint8_t> * {
ramelg018a164882022-04-07 02:42:52 +0100105 auto strat = new a64_u8s8u8q_nhwc_3x3_s1_output2x2_mla_depthfirst(args.cpu_info);
106 return new DepthwiseDepthfirst<uint8_t, int8_t>(strat, args, qp);
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +0000107 },
108 },
109 {
110 DepthwiseMethod::DEPTHFIRST,
111 "a64_u8s8u8q_nhwc_3x3_s2_output2x2_mla_depthfirst",
112 constraint<Requantize32>(is_supported<a64_u8s8u8q_nhwc_3x3_s2_output2x2_mla_depthfirst>,
113 has_no_channel_multiplier,
114 qp_has_no_left_shift),
115 nullptr,
116 [] (const DepthwiseArgs &args, const Requantize32 &qp) -> DepthwiseCommon<uint8_t, int8_t, uint8_t> * {
ramelg018a164882022-04-07 02:42:52 +0100117 auto strat = new a64_u8s8u8q_nhwc_3x3_s2_output2x2_mla_depthfirst(args.cpu_info);
118 return new DepthwiseDepthfirst<uint8_t, int8_t>(strat, args, qp);
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +0000119 },
120 },
121 {
122 DepthwiseMethod::DEPTHFIRST,
123 "a64_u8s8u8q_nhwc_5x5_s1_output2x2_mla_depthfirst",
124 constraint<Requantize32>(is_supported<a64_u8s8u8q_nhwc_5x5_s1_output2x2_mla_depthfirst>,
125 has_no_channel_multiplier,
126 qp_has_no_left_shift),
127 nullptr,
128 [] (const DepthwiseArgs &args, const Requantize32 &qp) -> DepthwiseCommon<uint8_t, int8_t, uint8_t> * {
ramelg018a164882022-04-07 02:42:52 +0100129 auto strat = new a64_u8s8u8q_nhwc_5x5_s1_output2x2_mla_depthfirst(args.cpu_info);
130 return new DepthwiseDepthfirst<uint8_t, int8_t>(strat, args, qp);
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +0000131 },
132 },
133 {
134 DepthwiseMethod::DEPTHFIRST,
135 "a64_u8s8u8q_nhwc_generic_output3x3_mla_depthfirst",
136 constraint<Requantize32>(has_no_channel_multiplier),
137 nullptr,
138 [] (const DepthwiseArgs &args, const Requantize32 &qp) -> DepthwiseCommon<uint8_t, int8_t, uint8_t> * {
ramelg018a164882022-04-07 02:42:52 +0100139 auto kernel = new a64_u8s8u8q_nhwc_generic_output9_mla_depthfirst(args.cpu_info);
140 auto strat = new GenericDepthfirstStrategy<uint8_t, int8_t>(kernel, 3, 3, args);
141 return new DepthwiseDepthfirstGeneric<uint8_t, int8_t>(strat, args, qp);
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +0000142 },
143 },
144 {
145 DepthwiseMethod::DEPTHFIRST,
146 "a64_u8s8u8q_packed_to_nhwc_generic_with_multiplier_output2x8_mla_depthfirst",
ramelg018a164882022-04-07 02:42:52 +0100147 constraint<Requantize32>(has_channel_multiplier),
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +0000148 nullptr,
149 [] (const DepthwiseArgs &args, const Requantize32 &qp) -> DepthwiseCommon<uint8_t, int8_t, uint8_t> * {
ramelg018a164882022-04-07 02:42:52 +0100150 auto kern = new a64_u8s8u8q_packed_to_nhwc_generic_with_multiplier_output2x8_mla_depthfirst(args.cpu_info);
151 auto strat = new GenericDepthfirstMultiplierStrategy<uint8_t, int8_t>(kern, args);
152 return new DepthwiseDepthfirstMultiplier<uint8_t, int8_t, uint8_t, int32_t, true>(strat, args, qp);
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +0000153 },
154 },
155#endif // defined(__aarch64__)
156 { DepthwiseMethod::DEFAULT, "", nullptr, nullptr, nullptr }, // End of list
157};
158
159template <>
160const DepthwiseImplementation<uint8_t, int8_t, uint8_t, Requantize32> *depthwise_implementation_list()
161{
162 return depthwise_u8q_methods;
163}
164
165template UniqueDepthwiseCommon<uint8_t, int8_t, uint8_t> depthwise(const DepthwiseArgs &, const Requantize32 &);
166template std::vector<KernelDescription> get_compatible_kernels<uint8_t, int8_t, uint8_t, Requantize32>(const DepthwiseArgs &, const Requantize32 &);
167
168} // namespace depthwise
169} // namespace arm_conv