blob: 33bcc20d39b1ffeee7dce6a7be4db234e24c457c [file] [log] [blame]
Michalis Spyroua6825a42018-09-13 12:24:03 +01001/*
Michele Di Giorgiod9eaf612020-07-08 11:12:57 +01002 * Copyright (c) 2018-2020 Arm Limited.
Michalis Spyroua6825a42018-09-13 12:24:03 +01003 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
Michalis Spyrouebcebf12020-10-21 00:04:14 +010024#include "src/core/NEON/kernels/NEYOLOLayerKernel.h"
Michalis Spyroua6825a42018-09-13 12:24:03 +010025
Michalis Spyroua6825a42018-09-13 12:24:03 +010026#include "arm_compute/core/Helpers.h"
27#include "arm_compute/core/ITensor.h"
Michalis Spyroua6825a42018-09-13 12:24:03 +010028#include "arm_compute/core/TensorInfo.h"
29#include "arm_compute/core/Utils.h"
30#include "arm_compute/core/Validate.h"
31#include "arm_compute/core/Window.h"
Sang-Hoon Park68dd25f2020-10-19 16:00:11 +010032#include "src/core/CPP/Validate.h"
Georgios Pinitasddb93bb2020-10-02 16:38:59 +010033#include "src/core/NEON/NEAsymm.h"
34#include "src/core/NEON/NEFixedPoint.h"
35#include "src/core/NEON/NEMath.h"
Sang-Hoon Park68dd25f2020-10-19 16:00:11 +010036#include "src/core/helpers/AutoConfiguration.h"
37#include "src/core/helpers/WindowHelpers.h"
Georgios Pinitasddb93bb2020-10-02 16:38:59 +010038
39#include "src/core/NEON/kernels/detail/NEActivationFunctionDetail.h"
Michalis Spyroua6825a42018-09-13 12:24:03 +010040
41#include <arm_neon.h>
42
Michalis Spyroubb1ad0c2020-02-28 17:31:27 +000043namespace arm_compute
44{
Michalis Spyroua6825a42018-09-13 12:24:03 +010045namespace
46{
47Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, const ActivationLayerInfo &act_info, int32_t num_classes)
48{
49 ARM_COMPUTE_UNUSED(act_info);
Georgios Pinitas8f5802f2019-02-22 11:08:32 +000050 ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(input);
Michalis Spyroua6825a42018-09-13 12:24:03 +010051 ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16, DataType::F32);
52 ARM_COMPUTE_RETURN_ERROR_ON(input->data_layout() == DataLayout::UNKNOWN);
53 ARM_COMPUTE_RETURN_ERROR_ON(act_info.activation() != ActivationLayerInfo::ActivationFunction::LOGISTIC);
54
55 const unsigned int channel_idx = get_data_layout_dimension_index(input->data_layout(), DataLayoutDimension::CHANNEL);
56 ARM_COMPUTE_RETURN_ERROR_ON(num_classes <= 0);
57 ARM_COMPUTE_RETURN_ERROR_ON((input->dimension(channel_idx) % (num_classes + 5)) != 0);
58
59 // Checks performed when output is configured
60 if((output != nullptr) && (output->total_size() != 0))
61 {
62 ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(input, output);
63 ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
64 }
65
66 return Status{};
67}
Michalis Spyroua6825a42018-09-13 12:24:03 +010068} // namespace
69
70NEYOLOLayerKernel::NEYOLOLayerKernel()
71 : _func(nullptr), _input(nullptr), _output(nullptr), _act_info(), _num_classes()
72{
73}
74
Michalis Spyroubb1ad0c2020-02-28 17:31:27 +000075template <typename T, int S>
76void NEYOLOLayerKernel::yolo_layer_nchw(const Window &window)
Michalis Spyroua6825a42018-09-13 12:24:03 +010077{
Michalis Spyroubb1ad0c2020-02-28 17:31:27 +000078 const auto window_start_x = static_cast<int>(window.x().start());
79 const auto window_end_x = static_cast<int>(window.x().end());
80 const int window_step_x = S;
Michalis Spyroua6825a42018-09-13 12:24:03 +010081
Michalis Spyroubb1ad0c2020-02-28 17:31:27 +000082 Window win{ window };
83 win.set(Window::DimX, Window::Dimension(0, 1, 1));
84 Iterator input(_input, win);
85 Iterator output(_output, win);
86
87 execute_window_loop(win, [&](const Coordinates & id)
Michalis Spyroua6825a42018-09-13 12:24:03 +010088 {
Michalis Spyroubb1ad0c2020-02-28 17:31:27 +000089 const auto input_ptr = reinterpret_cast<const T *>(input.ptr());
90 const auto output_ptr = reinterpret_cast<T *>(output.ptr());
91 int x = window_start_x;
92 const int box_ch_id = id.z() % (_num_classes + 5);
93 const bool activate = box_ch_id != 2 && box_ch_id != 3;
Michalis Spyroua6825a42018-09-13 12:24:03 +010094
Michalis Spyroubb1ad0c2020-02-28 17:31:27 +000095 for(; x <= (window_end_x - window_step_x); x += window_step_x)
Michalis Spyroua6825a42018-09-13 12:24:03 +010096 {
Michalis Spyroubb1ad0c2020-02-28 17:31:27 +000097 auto res = wrapper::vloadq(input_ptr + x);
98
99 // Perform activation
100 if(activate)
101 {
102 auto activation = detail::logistic<T, S>(_act_info);
103 activation(res);
104 }
105
106 // Store results
107 wrapper::vstore(output_ptr + x, res);
Michalis Spyroua6825a42018-09-13 12:24:03 +0100108 }
109
Michalis Spyroubb1ad0c2020-02-28 17:31:27 +0000110 // Compute left-over elements
111 for(; x < window_end_x; ++x)
112 {
113 auto res = *(input_ptr + x);
114
115 // Perform activation
116 if(activate)
117 {
118 res = 1.f / (1.f + std::exp(-res));
119 }
120
121 *(output_ptr + x) = res;
122 }
Michalis Spyroua6825a42018-09-13 12:24:03 +0100123 },
124 input, output);
125}
126
Michalis Spyroubb1ad0c2020-02-28 17:31:27 +0000127template <typename T>
128void NEYOLOLayerKernel::yolo_layer_nhwc(const Window &window)
Michalis Spyroua6825a42018-09-13 12:24:03 +0100129{
130 Iterator input(_input, window);
131 Iterator output(_output, window);
132
133 execute_window_loop(window, [&](const Coordinates & id)
134 {
Michalis Spyroubb1ad0c2020-02-28 17:31:27 +0000135 auto res = *(reinterpret_cast<T *>(input.ptr()));
Michalis Spyroua6825a42018-09-13 12:24:03 +0100136
137 const int box_ch_id = id.x() % (_num_classes + 5);
138 const bool activate = box_ch_id != 2 && box_ch_id != 3;
139
140 // Perform activation
141 if(activate)
142 {
143 res = 1.f / (1.f + std::exp(-res));
144 }
145
146 // Store result
Michalis Spyroubb1ad0c2020-02-28 17:31:27 +0000147 *(reinterpret_cast<T *>(output.ptr())) = res;
Michalis Spyroua6825a42018-09-13 12:24:03 +0100148 },
149 input, output);
150}
151
Michalis Spyroua6825a42018-09-13 12:24:03 +0100152void NEYOLOLayerKernel::configure(ITensor *input, ITensor *output, const ActivationLayerInfo &act_info, int32_t num_classes)
153{
154 ARM_COMPUTE_ERROR_ON_NULLPTR(input);
155 ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), (output != nullptr) ? output->info() : nullptr, act_info, num_classes));
156
157 _input = input;
158 _output = output;
159 _act_info = act_info;
160 _num_classes = num_classes;
161
162 switch(_input->info()->data_type())
163 {
164#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
165 case DataType::F16:
Michalis Spyroubb1ad0c2020-02-28 17:31:27 +0000166 _func = (_input->info()->data_layout() == DataLayout::NHWC) ? &NEYOLOLayerKernel::yolo_layer_nhwc<float16_t> : &NEYOLOLayerKernel::yolo_layer_nchw<float16_t, 8>;
Michalis Spyroua6825a42018-09-13 12:24:03 +0100167 break;
168#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
169 case DataType::F32:
Michalis Spyroubb1ad0c2020-02-28 17:31:27 +0000170 _func = (_input->info()->data_layout() == DataLayout::NHWC) ? &NEYOLOLayerKernel::yolo_layer_nhwc<float> : &NEYOLOLayerKernel::yolo_layer_nchw<float, 4>;
Michalis Spyroua6825a42018-09-13 12:24:03 +0100171 break;
172 default:
173 ARM_COMPUTE_ERROR("Element size not supported");
174 break;
175 }
176
Michalis Spyroubb1ad0c2020-02-28 17:31:27 +0000177 Window win = calculate_max_window(*input->info(), Steps());
178
Michalis Spyroua6825a42018-09-13 12:24:03 +0100179 // Configure kernel window
Michalis Spyroubb1ad0c2020-02-28 17:31:27 +0000180 if(output != nullptr)
181 {
182 // Output auto inizialitation if not yet initialized
183 auto_init_if_empty(*output->info(), *input->info());
184
185 Coordinates coord;
186 coord.set_num_dimensions(output->info()->num_dimensions());
187
188 output->info()->set_valid_region(ValidRegion(coord, output->info()->tensor_shape()));
189 }
190
191 ICPPKernel::configure(win);
Michalis Spyroua6825a42018-09-13 12:24:03 +0100192}
193
194Status NEYOLOLayerKernel::validate(const ITensorInfo *input, const ITensorInfo *output, const ActivationLayerInfo &act_info, int32_t num_classes)
195{
196 ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, output, act_info, num_classes));
Michalis Spyroua6825a42018-09-13 12:24:03 +0100197
198 return Status{};
199}
200
201void NEYOLOLayerKernel::run(const Window &window, const ThreadInfo &info)
202{
203 ARM_COMPUTE_UNUSED(info);
204 ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
205 ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IKernel::window(), window);
206 ARM_COMPUTE_ERROR_ON(_func == nullptr);
207
208 (this->*_func)(window);
209}
Michalis Spyroubb1ad0c2020-02-28 17:31:27 +0000210} // namespace arm_compute