blob: b84bdd54e965f173c869479bc5d88809363cceca [file] [log] [blame]
Gunes Bayirae72a462023-01-29 13:24:24 +00001/*
2 * Copyright (c) 2023 Arm Limited.
3 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24#include "src/cpu/kernels/CpuAddMulAddKernel.h"
25
26#include "arm_compute/core/ITensor.h"
27#include "arm_compute/core/TensorInfo.h"
28#include "arm_compute/core/Validate.h"
29
30#include "src/core/CPP/Validate.h"
31#include "src/core/common/Registrars.h"
32#include "src/core/helpers/AutoConfiguration.h"
33#include "src/core/helpers/WindowHelpers.h"
34#include "src/cpu/kernels/addmuladd/list.h"
35
36namespace arm_compute
37{
38namespace cpu
39{
40namespace kernels
41{
42namespace
43{
44static const std::vector<CpuAddMulAddKernel::AddMulAddKernel> available_kernels =
45{
46#ifdef __aarch64__
47 {
48 "neon_fp32_add_mul_add",
49 [](const DataTypeISASelectorData & data) { return (data.dt == DataType::F32); },
50 REGISTER_FP32_NEON(arm_compute::cpu::add_mul_add_fp32_neon)
51 },
52 {
53 "neon_fp16_add_mul_add",
54 [](const DataTypeISASelectorData & data) { return (data.dt == DataType::F16); },
55 REGISTER_FP16_NEON(arm_compute::cpu::add_mul_add_fp16_neon)
56 },
57 {
58 "neon_qasymm8_add_mul_add",
59 [](const DataTypeISASelectorData & data) { return (data.dt == DataType::QASYMM8); },
60 REGISTER_QASYMM8_NEON(arm_compute::cpu::add_mul_add_u8_neon)
61 },
62 {
63 "neon_qasymm8_signed_add_mul_add",
64 [](const DataTypeISASelectorData & data) { return (data.dt == DataType::QASYMM8_SIGNED); },
65 REGISTER_QASYMM8_SIGNED_NEON(arm_compute::cpu::add_mul_add_s8_neon)
66 }
67#endif // __aarch64__
68};
69
70Status validate_arguments(const ITensorInfo *input1, const ITensorInfo *input2,
71 const ITensorInfo *bn_mul, const ITensorInfo *bn_add,
72 const ITensorInfo *add_output, const ITensorInfo *final_output,
73 ConvertPolicy policy, const ActivationLayerInfo &act_info)
74{
75 ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input1, input2, bn_mul, bn_add, final_output);
76
77 ARM_COMPUTE_RETURN_ERROR_ON_MSG(policy != ConvertPolicy::SATURATE, "Only Saturate Policy is supported");
78
79 using ActFunction = ActivationLayerInfo::ActivationFunction;
80 const ActFunction act_func = act_info.activation();
81 ARM_COMPUTE_RETURN_ERROR_ON_MSG(
82 (act_func != ActFunction::BOUNDED_RELU && act_func != ActFunction::RELU && act_func != ActFunction::LU_BOUNDED_RELU && act_func != ActFunction::IDENTITY),
83 "Only RELU Family activations, or no activation, is supported");
84
85 ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(input1);
86 ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input1, 1, DataType::QASYMM8, DataType::QASYMM8_SIGNED,
87 DataType::F16, DataType::F32);
88 ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input1, input2);
89
90 if(is_data_type_quantized(input1->data_type()))
91 {
92 ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(bn_mul, 1, DataType::F32);
93 ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(bn_add, 1, DataType::F32);
94 }
95 else
96 {
97 ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input1, bn_mul);
98 ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input1, bn_add);
99 }
100
101 ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(input1, input2); // No broadcasting
102 ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(bn_mul, bn_add);
103 ARM_COMPUTE_RETURN_ERROR_ON_MSG(bn_mul->num_dimensions() != 1, "BatchNorm coefficients should be 1D array");
104 ARM_COMPUTE_RETURN_ERROR_ON_MSG(bn_mul->tensor_shape()[0] != input1->tensor_shape()[0], "First dimensions of inputs and batchNorm coefs should match");
105
106 // Validate in case we have add layer's output (intermediate) initialized
107 if(add_output != nullptr && add_output->total_size() > 0)
108 {
109 ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input1, add_output);
110 ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(input1, add_output);
111 }
112
113 // Validate in case final output has been initialized
114 if(final_output->total_size() > 0)
115 {
116 ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input1, final_output);
117 ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(input1, final_output);
118 }
119
120 const auto uk = CpuAddMulAddKernel::get_implementation<DataTypeISASelectorData>(DataTypeISASelectorData{ input1->data_type(), CPUInfo::get().get_isa() });
121 ARM_COMPUTE_RETURN_ERROR_ON(uk == nullptr || uk->ukernel == nullptr);
122
123 return Status{};
124}
125} // namespace
126
127void CpuAddMulAddKernel::configure(const ITensorInfo *input1, const ITensorInfo *input2,
128 const ITensorInfo *bn_mul, const ITensorInfo *bn_add,
129 ITensorInfo *add_output, ITensorInfo *final_output,
130 ConvertPolicy policy, const ActivationLayerInfo &act_info)
131{
132 ARM_COMPUTE_UNUSED(bn_mul, bn_add, input2);
133 ARM_COMPUTE_ERROR_ON_NULLPTR(input1, input2, bn_add, bn_mul, final_output);
134 ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input1, input2, bn_mul, bn_add, add_output, final_output, policy, act_info));
135
136 const auto uk = CpuAddMulAddKernel::get_implementation<DataTypeISASelectorData>(DataTypeISASelectorData{ input1->data_type(), CPUInfo::get().get_isa() });
137 ARM_COMPUTE_ERROR_ON_NULLPTR(uk);
138 ARM_COMPUTE_ERROR_ON(uk->ukernel == nullptr);
139
140 _policy = policy;
141 _act_info = act_info;
142 _run_method = uk->ukernel;
143 _name = std::string("CpuAddMulAddKernel/").append(uk->name);
144
145 // Auto initialize outputs if not initialized
146 set_shape_if_empty(*final_output, input1->tensor_shape());
147 set_data_type_if_unknown(*final_output, input1->data_type());
148
149 if(add_output != nullptr)
150 {
151 set_shape_if_empty(*add_output, input1->tensor_shape());
152 set_data_type_if_unknown(*add_output, input1->data_type());
153 }
154
155 // Configure kernel window
156 Window win;
157 win = calculate_max_window(*final_output, Steps());
158 ICpuKernel::configure(win);
159}
160
161Status CpuAddMulAddKernel::validate(const ITensorInfo *input1, const ITensorInfo *input2,
162 const ITensorInfo *bn_mul, const ITensorInfo *bn_add,
163 const ITensorInfo *add_output, const ITensorInfo *final_output,
164 ConvertPolicy policy, const ActivationLayerInfo &act_info)
165{
166 ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input1, input2, bn_mul, bn_add, final_output);
167
168 ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input1, input2, bn_mul, bn_add, add_output, final_output, policy, act_info));
169
170 return Status{};
171}
172
173void CpuAddMulAddKernel::run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info)
174{
175 ARM_COMPUTE_UNUSED(info);
176
177 ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
178 ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICpuKernel::window(), window);
179
180 ARM_COMPUTE_ERROR_ON(tensors.empty());
181 ARM_COMPUTE_ERROR_ON(_run_method == nullptr);
182
183 const ITensor *input1 = tensors.get_const_tensor(TensorType::ACL_SRC_0);
184 const ITensor *input2 = tensors.get_const_tensor(TensorType::ACL_SRC_1);
185 const ITensor *bn_mul = tensors.get_const_tensor(TensorType::ACL_SRC_2);
186 const ITensor *bn_add = tensors.get_const_tensor(TensorType::ACL_SRC_3);
187 ITensor *add_output = tensors.get_tensor(TensorType::ACL_DST_0);
188 ITensor *final_output = tensors.get_tensor(TensorType::ACL_DST_1);
189
190 _run_method(input1, input2, bn_mul, bn_add, add_output, final_output, _policy, _act_info, window);
191}
192
193const char *CpuAddMulAddKernel::name() const
194{
195 return _name.c_str();
196}
197
198const std::vector<CpuAddMulAddKernel::AddMulAddKernel> &CpuAddMulAddKernel::get_available_kernels()
199{
200 return available_kernels;
201}
202} // namespace kernels
203} // namespace cpu
204} // namespace arm_compute