blob: 8ab0288ab1f22102ac13b60cede83a309b8ef346 [file] [log] [blame]
Georgios Pinitasd9769582017-08-03 10:19:40 +01001/*
Yair Schwarzbaum5e993182022-01-10 15:11:07 +02002 * Copyright (c) 2017-2022 Arm Limited.
Georgios Pinitasd9769582017-08-03 10:19:40 +01003 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
Michalis Spyrouebcebf12020-10-21 00:04:14 +010024#include "src/core/NEON/kernels/NEL2NormalizeLayerKernel.h"
Georgios Pinitasd9769582017-08-03 10:19:40 +010025
26#include "arm_compute/core/Error.h"
27#include "arm_compute/core/Helpers.h"
28#include "arm_compute/core/ITensor.h"
Georgios Pinitasd9769582017-08-03 10:19:40 +010029#include "arm_compute/core/TensorInfo.h"
30#include "arm_compute/core/Utils.h"
31#include "arm_compute/core/Validate.h"
32#include "arm_compute/core/Window.h"
Yair Schwarzbaum5e993182022-01-10 15:11:07 +020033#include "src/common/cpuinfo/CpuIsaInfo.h"
Georgios Pinitasddb93bb2020-10-02 16:38:59 +010034#include "src/core/NEON/NEMath.h"
Yair Schwarzbaum5e993182022-01-10 15:11:07 +020035#include "src/core/common/Registrars.h"
Sang-Hoon Park68dd25f2020-10-19 16:00:11 +010036#include "src/core/helpers/AutoConfiguration.h"
37#include "src/core/helpers/WindowHelpers.h"
Yair Schwarzbaum5e993182022-01-10 15:11:07 +020038#include "src/cpu/kernels/l2normlayer/list.h"
Georgios Pinitasd9769582017-08-03 10:19:40 +010039
40#include <arm_neon.h>
41#include <cmath>
42
Michalis Spyrou2897e612018-11-20 18:38:29 +000043namespace arm_compute
44{
Georgios Pinitasd9769582017-08-03 10:19:40 +010045namespace
46{
Manuel Bottini4b5c5882019-05-14 10:38:30 +010047constexpr int max_input_tensor_dim = 3;
48
Yair Schwarzbaum5e993182022-01-10 15:11:07 +020049struct L2NormalizeLayerSelectorData
Georgios Pinitasd9769582017-08-03 10:19:40 +010050{
Yair Schwarzbaum5e993182022-01-10 15:11:07 +020051 DataType dt;
52 unsigned int actual_axis;
53 cpuinfo::CpuIsaInfo isa;
54};
Michalis Spyrou2897e612018-11-20 18:38:29 +000055
Yair Schwarzbaum5e993182022-01-10 15:11:07 +020056using L2NormalizeLayerKernelSelctorPtr = std::add_pointer<bool(const L2NormalizeLayerSelectorData &data)>::type;
Georgios Pinitasd9769582017-08-03 10:19:40 +010057
Yair Schwarzbaum5e993182022-01-10 15:11:07 +020058using L2NormalizeLayerPtr = std::add_pointer<void(const ITensor *in, const ITensor *sum, ITensor *out, float epsilon, const Window &window, size_t axis)>::type;
Georgios Pinitasd9769582017-08-03 10:19:40 +010059
Yair Schwarzbaum5e993182022-01-10 15:11:07 +020060struct L2NormalizeLayerKernel
Michalis Spyrou2897e612018-11-20 18:38:29 +000061{
Yair Schwarzbaum5e993182022-01-10 15:11:07 +020062 const char *name;
63 const L2NormalizeLayerKernelSelctorPtr is_selected;
64 L2NormalizeLayerPtr ukernel;
65};
Michalis Spyrou2897e612018-11-20 18:38:29 +000066
Yair Schwarzbaum5e993182022-01-10 15:11:07 +020067static const L2NormalizeLayerKernel available_kernels[] =
68{
Michalis Spyrou2897e612018-11-20 18:38:29 +000069 {
Yair Schwarzbaum5e993182022-01-10 15:11:07 +020070 "fp32_neon_l2normalize_x",
71 [](const L2NormalizeLayerSelectorData & data) { return data.dt == DataType::F32 && data.actual_axis == Window::DimX; },
72 REGISTER_FP32_NEON(arm_compute::cpu::neon_fp32_l2_normalize_x)
Georgios Pinitas6cb26ce2020-06-24 17:20:23 +010073 },
Yair Schwarzbaum5e993182022-01-10 15:11:07 +020074 {
75 "fp32_neon_l2normalize_yz",
76 [](const L2NormalizeLayerSelectorData & data) { return data.dt == DataType::F32 && data.actual_axis != Window::DimX; },
77 REGISTER_FP32_NEON(arm_compute::cpu::neon_fp32_l2_normalize_yz)
78 },
79 {
80 "fp16_neon_l2normalize_x",
81 [](const L2NormalizeLayerSelectorData & data) { return data.dt == DataType::F16 && data.isa.fp16 && data.actual_axis == Window::DimX; },
82 REGISTER_FP16_NEON(arm_compute::cpu::neon_fp16_l2_normalize_x),
83 },
84 {
85 "fp16_neon_l2normalize_yz",
86 [](const L2NormalizeLayerSelectorData & data) { return data.dt == DataType::F16 && data.isa.fp16 && data.actual_axis != Window::DimX; },
87 REGISTER_FP16_NEON(arm_compute::cpu::neon_fp16_l2_normalize_yz),
88 },
89};
90
91/** Micro-kernel selector
92 *
93 * @param[in] data Selection data passed to help pick the appropriate micro-kernel
94 *
95 * @return A matching micro-kernel else nullptr
96 */
97const L2NormalizeLayerKernel *get_implementation(const L2NormalizeLayerSelectorData &data)
98{
99 for(const auto &uk : available_kernels)
100 {
101 if(uk.is_selected(data))
102 {
103 return &uk;
104 }
105 }
106 return nullptr;
Michalis Spyrou2897e612018-11-20 18:38:29 +0000107}
108
Manuel Bottini4b5c5882019-05-14 10:38:30 +0100109Status validate_arguments(const ITensorInfo *input, const ITensorInfo *sum, const ITensorInfo *output, int axis, float epsilon)
John Richardson73d4aef2018-05-08 14:34:33 +0100110{
111 ARM_COMPUTE_UNUSED(epsilon);
112
Manuel Bottini4b5c5882019-05-14 10:38:30 +0100113 const uint32_t actual_axis = wrap_around(axis, max_input_tensor_dim);
John Richardson73d4aef2018-05-08 14:34:33 +0100114 ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, sum, output);
115 ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, sum);
Michalis Spyrou2897e612018-11-20 18:38:29 +0000116 ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16, DataType::F32);
Manuel Bottini4b5c5882019-05-14 10:38:30 +0100117 ARM_COMPUTE_RETURN_ERROR_ON_MSG(actual_axis > 2, "Actual axis greater than 2 is not supported");
118 ARM_COMPUTE_RETURN_ERROR_ON_MSG(actual_axis >= TensorShape::num_max_dimensions, "Actual normalization axis greater than max number of dimensions");
John Richardson73d4aef2018-05-08 14:34:33 +0100119
120 // Reduce shape on axis
121 TensorShape sum_shape = input->tensor_shape();
Manuel Bottini4b5c5882019-05-14 10:38:30 +0100122 sum_shape.set(actual_axis, 1);
John Richardson73d4aef2018-05-08 14:34:33 +0100123 ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DIMENSIONS(sum->tensor_shape(), sum_shape);
124
125 if(output->total_size() != 0)
126 {
127 ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(input, output);
128 ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
129 ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DIMENSIONS(input->tensor_shape(), output->tensor_shape());
Michalis Spyrou2897e612018-11-20 18:38:29 +0000130 ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_LAYOUT(input, output);
John Richardson73d4aef2018-05-08 14:34:33 +0100131 }
132
133 return Status{};
134}
135
Georgios Pinitas6cb26ce2020-06-24 17:20:23 +0100136std::tuple<Status, Window> validate_and_configure_window(ITensorInfo *input, ITensorInfo *output)
John Richardson73d4aef2018-05-08 14:34:33 +0100137{
Georgios Pinitas6cb26ce2020-06-24 17:20:23 +0100138 Window win = calculate_max_window(*input, Steps());
John Richardson73d4aef2018-05-08 14:34:33 +0100139
140 // Output auto initialization if not yet initialized
Vidhya Sudhan Loganathan7485d5a2018-07-04 09:34:00 +0100141 auto_init_if_empty(*output, input->tensor_shape(), 1, input->data_type());
John Richardson73d4aef2018-05-08 14:34:33 +0100142
Georgios Pinitas6cb26ce2020-06-24 17:20:23 +0100143 // NEL2NormalizeLayerKernel doesn't need padding so update_window_and_padding() can be skipped
John Richardson73d4aef2018-05-08 14:34:33 +0100144
Georgios Pinitas6cb26ce2020-06-24 17:20:23 +0100145 return std::make_tuple(Status{}, win);
John Richardson73d4aef2018-05-08 14:34:33 +0100146}
Georgios Pinitasd9769582017-08-03 10:19:40 +0100147} // namespace
148
Giorgio Arena04a8f8c2017-11-23 11:45:24 +0000149NEL2NormalizeLayerKernel::NEL2NormalizeLayerKernel()
Manuel Bottini4b5c5882019-05-14 10:38:30 +0100150 : _input(nullptr), _sum(nullptr), _output(nullptr), _actual_axis(0), _epsilon(1e-12)
Georgios Pinitasd9769582017-08-03 10:19:40 +0100151{
152}
153
Manuel Bottini4b5c5882019-05-14 10:38:30 +0100154void NEL2NormalizeLayerKernel::configure(const ITensor *input, const ITensor *sum, ITensor *output, int axis, float epsilon)
Georgios Pinitasd9769582017-08-03 10:19:40 +0100155{
156 ARM_COMPUTE_ERROR_ON_NULLPTR(input, sum, output);
John Richardson73d4aef2018-05-08 14:34:33 +0100157 ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), sum->info(), output->info(), axis, epsilon));
Georgios Pinitasd9769582017-08-03 10:19:40 +0100158
Georgios Pinitas6cb26ce2020-06-24 17:20:23 +0100159 _input = input;
160 _sum = sum;
161 _output = output;
162 _actual_axis = wrap_around(axis, max_input_tensor_dim);
163 _epsilon = epsilon;
Georgios Pinitasd9769582017-08-03 10:19:40 +0100164
165 // Configure kernel window
Georgios Pinitas6cb26ce2020-06-24 17:20:23 +0100166 auto win_config = validate_and_configure_window(_input->info(), _output->info());
John Richardson73d4aef2018-05-08 14:34:33 +0100167 ARM_COMPUTE_ERROR_THROW_ON(std::get<0>(win_config));
Georgios Pinitasd9769582017-08-03 10:19:40 +0100168
John Richardson73d4aef2018-05-08 14:34:33 +0100169 INEKernel::configure(std::get<1>(win_config));
170}
Georgios Pinitasd9769582017-08-03 10:19:40 +0100171
Manuel Bottini4b5c5882019-05-14 10:38:30 +0100172Status NEL2NormalizeLayerKernel::validate(const ITensorInfo *input, const ITensorInfo *sum, const ITensorInfo *output, int axis, float epsilon)
John Richardson73d4aef2018-05-08 14:34:33 +0100173{
174 ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, sum, output, axis, epsilon));
Georgios Pinitas6cb26ce2020-06-24 17:20:23 +0100175 ARM_COMPUTE_RETURN_ON_ERROR(std::get<0>(validate_and_configure_window(input->clone().get(), output->clone().get())));
Georgios Pinitasd9769582017-08-03 10:19:40 +0100176
John Richardson73d4aef2018-05-08 14:34:33 +0100177 return Status{};
Georgios Pinitasd9769582017-08-03 10:19:40 +0100178}
179
Giorgio Arena04a8f8c2017-11-23 11:45:24 +0000180void NEL2NormalizeLayerKernel::run(const Window &window, const ThreadInfo &info)
Georgios Pinitasd9769582017-08-03 10:19:40 +0100181{
Moritz Pflanzerc186b572017-09-07 09:48:04 +0100182 ARM_COMPUTE_UNUSED(info);
Georgios Pinitasd9769582017-08-03 10:19:40 +0100183 ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
184 ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window);
185
Georgios Pinitas6cb26ce2020-06-24 17:20:23 +0100186 if(_actual_axis > 2)
Georgios Pinitasd9769582017-08-03 10:19:40 +0100187 {
Georgios Pinitas6cb26ce2020-06-24 17:20:23 +0100188 ARM_COMPUTE_ERROR("Unsupported normalization axis");
189 }
190
Yair Schwarzbaum5e993182022-01-10 15:11:07 +0200191 const auto *uk = get_implementation(L2NormalizeLayerSelectorData{ _output->info()->data_type(), _actual_axis, CPUInfo::get().get_isa() });
192 ARM_COMPUTE_ERROR_ON(uk == nullptr);
193 ARM_COMPUTE_ERROR_ON(uk->ukernel == nullptr);
194
195 uk->ukernel(_input, _sum, _output, _epsilon, window, _actual_axis);
Georgios Pinitasd9769582017-08-03 10:19:40 +0100196}
Michalis Spyrou2897e612018-11-20 18:38:29 +0000197} // namespace arm_compute