blob: af4426b69ff4e65e235821abb98745cdb80c0f9c [file] [log] [blame]
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +00001/*
2 * Copyright (c) 2021 Arm Limited.
3 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24
25#include "arm_gemm_local.hpp"
26
27#include "depthwise_implementation.hpp"
28#include "depthwise_depthfirst_quantized.hpp"
29#include "depthwise_depthfirst_generic_quantized.hpp"
30#include "depthwise_depthfirst_multiplier_quantized.hpp"
31#include "depthwise_depthfirst_generic_multiplier_quantized.hpp"
32
33#include "depthwise_implementation_constraints.hpp"
34
35#if defined(__aarch64__)
Michalis Spyrou20fca522021-06-07 14:23:57 +010036#if defined(ARM_COMPUTE_ENABLE_SVE) && defined(ARM_COMPUTE_ENABLE_SVE2)
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +000037#include "kernels/sve_u8s8u8q_nhwc_3x3_s1_output2x2_mla_depthfirst.hpp"
38#include "kernels/sve_u8s8u8q_nhwc_3x3_s2_output2x2_mla_depthfirst.hpp"
39#include "kernels/sve_u8s8u8q_nhwc_5x5_s1_output2x2_mla_depthfirst.hpp"
Michalis Spyrou20fca522021-06-07 14:23:57 +010040#endif // defined(ARM_COMPUTE_ENABLE_SVE) && defined(ARM_COMPUTE_ENABLE_SVE2)
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +000041#include "kernels/a64_u8s8u8q_nhwc_3x3_s1_output2x2_mla_depthfirst.hpp"
42#include "kernels/a64_u8s8u8q_nhwc_3x3_s2_output2x2_mla_depthfirst.hpp"
43#include "kernels/a64_u8s8u8q_nhwc_5x5_s1_output2x2_mla_depthfirst.hpp"
44#include "kernels/a64_u8s8u8q_nhwc_generic_output9_mla_depthfirst.hpp"
45#include "kernels/a64_u8s8u8q_packed_to_nhwc_generic_with_multiplier_output2x8_mla_depthfirst.hpp"
46#endif // defined(__aarch64__)
47
48#include <cstdint>
49
50using arm_gemm::Requantize32;
51
52namespace arm_conv {
53namespace depthwise {
54
55static const DepthwiseImplementation<uint8_t, int8_t, uint8_t, Requantize32> depthwise_u8q_methods[] = {
56#if defined(__aarch64__)
Michalis Spyrou20fca522021-06-07 14:23:57 +010057#if defined(ARM_COMPUTE_ENABLE_SVE) && defined(ARM_COMPUTE_ENABLE_SVE2)
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +000058 {
59 DepthwiseMethod::DEPTHFIRST,
60 "sve_u8s8u8q_nhwc_3x3_s1_output2x2_mla_depthfirst",
61 constraint<Requantize32>(is_supported<sve_u8s8u8q_nhwc_3x3_s1_output2x2_mla_depthfirst>,
62 has_no_channel_multiplier,
Michalis Spyrou20fca522021-06-07 14:23:57 +010063 qp_has_no_left_shift,
64 cpu_has_sve2),
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +000065 nullptr,
66 [] (const DepthwiseArgs &args, const Requantize32 &qp) -> DepthwiseCommon<uint8_t, int8_t, uint8_t> * {
67 return new DepthwiseDepthfirstQuantized<sve_u8s8u8q_nhwc_3x3_s1_output2x2_mla_depthfirst>(args, qp);
68 },
69 },
70 {
71 DepthwiseMethod::DEPTHFIRST,
72 "sve_u8s8u8q_nhwc_3x3_s2_output2x2_mla_depthfirst",
73 constraint<Requantize32>(is_supported<sve_u8s8u8q_nhwc_3x3_s2_output2x2_mla_depthfirst>,
74 has_no_channel_multiplier,
Michalis Spyrou20fca522021-06-07 14:23:57 +010075 qp_has_no_left_shift,
76 cpu_has_sve2),
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +000077 nullptr,
78 [] (const DepthwiseArgs &args, const Requantize32 &qp) -> DepthwiseCommon<uint8_t, int8_t, uint8_t> * {
79 return new DepthwiseDepthfirstQuantized<sve_u8s8u8q_nhwc_3x3_s2_output2x2_mla_depthfirst>(args, qp);
80 },
81 },
82 {
83 DepthwiseMethod::DEPTHFIRST,
84 "sve_u8s8u8q_nhwc_5x5_s1_output2x2_mla_depthfirst",
85 constraint<Requantize32>(is_supported<sve_u8s8u8q_nhwc_5x5_s1_output2x2_mla_depthfirst>,
86 has_no_channel_multiplier,
Michalis Spyrou20fca522021-06-07 14:23:57 +010087 qp_has_no_left_shift,
88 cpu_has_sve2),
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +000089 nullptr,
90 [] (const DepthwiseArgs &args, const Requantize32 &qp) -> DepthwiseCommon<uint8_t, int8_t, uint8_t> * {
91 return new DepthwiseDepthfirstQuantized<sve_u8s8u8q_nhwc_5x5_s1_output2x2_mla_depthfirst>(args, qp);
92 },
93 },
Michalis Spyrou20fca522021-06-07 14:23:57 +010094#endif // defined(ARM_COMPUTE_ENABLE_SVE) && defined(ARM_COMPUTE_ENABLE_SVE2)
Michele Di Giorgiod02d5ed2021-01-22 09:47:04 +000095 {
96 DepthwiseMethod::DEPTHFIRST,
97 "a64_u8s8u8q_nhwc_3x3_s1_output2x2_mla_depthfirst",
98 constraint<Requantize32>(is_supported<a64_u8s8u8q_nhwc_3x3_s1_output2x2_mla_depthfirst>,
99 has_no_channel_multiplier,
100 qp_has_no_left_shift),
101 nullptr,
102 [] (const DepthwiseArgs &args, const Requantize32 &qp) -> DepthwiseCommon<uint8_t, int8_t, uint8_t> * {
103 return new DepthwiseDepthfirstQuantized<a64_u8s8u8q_nhwc_3x3_s1_output2x2_mla_depthfirst>(args, qp);
104 },
105 },
106 {
107 DepthwiseMethod::DEPTHFIRST,
108 "a64_u8s8u8q_nhwc_3x3_s2_output2x2_mla_depthfirst",
109 constraint<Requantize32>(is_supported<a64_u8s8u8q_nhwc_3x3_s2_output2x2_mla_depthfirst>,
110 has_no_channel_multiplier,
111 qp_has_no_left_shift),
112 nullptr,
113 [] (const DepthwiseArgs &args, const Requantize32 &qp) -> DepthwiseCommon<uint8_t, int8_t, uint8_t> * {
114 return new DepthwiseDepthfirstQuantized<a64_u8s8u8q_nhwc_3x3_s2_output2x2_mla_depthfirst>(args, qp);
115 },
116 },
117 {
118 DepthwiseMethod::DEPTHFIRST,
119 "a64_u8s8u8q_nhwc_5x5_s1_output2x2_mla_depthfirst",
120 constraint<Requantize32>(is_supported<a64_u8s8u8q_nhwc_5x5_s1_output2x2_mla_depthfirst>,
121 has_no_channel_multiplier,
122 qp_has_no_left_shift),
123 nullptr,
124 [] (const DepthwiseArgs &args, const Requantize32 &qp) -> DepthwiseCommon<uint8_t, int8_t, uint8_t> * {
125 return new DepthwiseDepthfirstQuantized<a64_u8s8u8q_nhwc_5x5_s1_output2x2_mla_depthfirst>(args, qp);
126 },
127 },
128 {
129 DepthwiseMethod::DEPTHFIRST,
130 "a64_u8s8u8q_nhwc_generic_output3x3_mla_depthfirst",
131 constraint<Requantize32>(has_no_channel_multiplier),
132 nullptr,
133 [] (const DepthwiseArgs &args, const Requantize32 &qp) -> DepthwiseCommon<uint8_t, int8_t, uint8_t> * {
134 return new DepthwiseDepthfirstGenericQuantized<a64_u8s8u8q_nhwc_generic_output9_mla_depthfirst, 3, 3>(args, qp);
135 },
136 },
137 {
138 DepthwiseMethod::DEPTHFIRST,
139 "a64_u8s8u8q_packed_to_nhwc_generic_with_multiplier_output2x8_mla_depthfirst",
140 nullptr,
141 nullptr,
142 [] (const DepthwiseArgs &args, const Requantize32 &qp) -> DepthwiseCommon<uint8_t, int8_t, uint8_t> * {
143 return new DepthwiseDepthfirstGenericWithMultiplierQuantized<a64_u8s8u8q_packed_to_nhwc_generic_with_multiplier_output2x8_mla_depthfirst>(args, qp);
144 },
145 },
146#endif // defined(__aarch64__)
147 { DepthwiseMethod::DEFAULT, "", nullptr, nullptr, nullptr }, // End of list
148};
149
150template <>
151const DepthwiseImplementation<uint8_t, int8_t, uint8_t, Requantize32> *depthwise_implementation_list()
152{
153 return depthwise_u8q_methods;
154}
155
156template UniqueDepthwiseCommon<uint8_t, int8_t, uint8_t> depthwise(const DepthwiseArgs &, const Requantize32 &);
157template std::vector<KernelDescription> get_compatible_kernels<uint8_t, int8_t, uint8_t, Requantize32>(const DepthwiseArgs &, const Requantize32 &);
158
159} // namespace depthwise
160} // namespace arm_conv