blob: fe8882f59fa27d9a42397617d99216fb7ec3b55e [file] [log] [blame]
David Svantesson3b162e52023-03-28 14:13:32 +00001/*
Renato Arantes0eb9cfb2023-11-23 11:12:51 +00002 * Copyright (c) 2023-2024 Arm Limited.
David Svantesson3b162e52023-03-28 14:13:32 +00003 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
David Svantessoncd8b40d2023-05-02 13:05:36 +000024#if defined(__aarch64__)
David Svantesson3b162e52023-03-28 14:13:32 +000025
26#include "src/core/NEON/kernels/NEReorderKernel.h"
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +010027
David Svantesson3b162e52023-03-28 14:13:32 +000028#include "arm_compute/core/Helpers.h"
29#include "arm_compute/core/Validate.h"
Radu Salavat0c5ba9e2024-04-15 15:02:09 +000030#include "arm_compute/runtime/Scheduler.h"
David Svantesson3b162e52023-03-28 14:13:32 +000031
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +010032#include "src/common/utils/Log.h"
33#include "src/core/NEON/kernels/arm_gemm/transform.hpp"
34
David Svantesson3b162e52023-03-28 14:13:32 +000035namespace arm_compute
36{
37
38void NEReorderKernel::run(const Window &window, const ThreadInfo &info)
39{
40 ARM_COMPUTE_UNUSED(info);
41 ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
42 ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window);
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +010043 switch (_input->info()->data_type())
David Svantesson3b162e52023-03-28 14:13:32 +000044 {
45 case DataType::F32:
46 {
47 const int ksize_rows_elements = _xmax * _ksize;
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +010048 const int jump_rows = ksize_rows_elements * window.x().start();
49 const int k_start = window.x().start() * _ksize;
50 const int k_end = std::min(window.x().end() * _ksize, _kmax);
51 const int stride = _kmax;
52 if (k_start < k_end)
David Svantesson3b162e52023-03-28 14:13:32 +000053 {
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +010054 switch (_output_wf)
David Svantesson3b162e52023-03-28 14:13:32 +000055 {
56 case WeightFormat::OHWIo4:
57 {
Renato Arantes0eb9cfb2023-11-23 11:12:51 +000058 switch (_output->info()->data_type())
59 {
60 case DataType::F32:
61 arm_gemm::Transform<4, 1, true, arm_gemm::VLType::None>(
62 reinterpret_cast<float *>(_output->buffer()) + jump_rows,
63 reinterpret_cast<float *>(_input->buffer()), stride, k_start, k_end, 0, _xmax);
64 break;
65 case DataType::BFLOAT16:
66 arm_gemm::Transform<4, 4, true, arm_gemm::VLType::None>(
67 reinterpret_cast<bfloat16 *>(_output->buffer()) + jump_rows,
68 reinterpret_cast<float *>(_input->buffer()), stride, k_start, k_end, 0, _xmax);
69 break;
70 default:
71 ARM_COMPUTE_ERROR("Unsupported data type!");
72 }
David Svantesson3b162e52023-03-28 14:13:32 +000073 break;
74 }
75#if defined(ARM_COMPUTE_ENABLE_SVE)
76 case WeightFormat::OHWIo8:
77 {
Renato Arantes0eb9cfb2023-11-23 11:12:51 +000078 switch (_output->info()->data_type())
79 {
80 case DataType::F32:
81 arm_gemm::Transform<1, 1, true, arm_gemm::VLType::SVE>(
82 reinterpret_cast<float *>(_output->buffer()) + jump_rows,
83 reinterpret_cast<float *>(_input->buffer()), stride, k_start, k_end, 0, _xmax);
84 break;
85 case DataType::BFLOAT16:
86 arm_gemm::Transform<2, 4, true, arm_gemm::VLType::SVE>(
87 reinterpret_cast<bfloat16 *>(_output->buffer()) + jump_rows,
88 reinterpret_cast<float *>(_input->buffer()), stride, k_start, k_end, 0, _xmax);
89 break;
90 default:
91 ARM_COMPUTE_ERROR("Unsupported data type!");
92 }
David Svantesson3b162e52023-03-28 14:13:32 +000093 break;
94 }
95#endif /* ARM_COMPUTE_ENABLE_SVE */
96 default:
97 {
98 ARM_COMPUTE_ERROR("Unsupported data type!");
99 break;
100 }
101 }
102 }
103 break;
104 }
105 default:
106 ARM_COMPUTE_ERROR("Unsupported data type!");
107 }
108}
109
110NEReorderKernel::NEReorderKernel()
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100111 : _input(nullptr),
112 _output(nullptr),
113 _ksize(0),
114 _kmax(0),
115 _xmax(0),
116 _input_wf(WeightFormat::ANY),
117 _output_wf(WeightFormat::ANY)
David Svantesson3b162e52023-03-28 14:13:32 +0000118{
119}
120
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100121void NEReorderKernel::configure(const ITensor *input,
122 ITensor *output,
123 arm_compute::WeightFormat input_wf,
124 arm_compute::WeightFormat output_wf)
David Svantesson3b162e52023-03-28 14:13:32 +0000125{
126 ARM_COMPUTE_LOG_PARAMS(input, output, input_wf, output_wf);
127 ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
128 ARM_COMPUTE_ERROR_THROW_ON(validate(input->info(), output->info(), input_wf, output_wf));
129
130 // Set variables
131 _input = input;
132 _output = output;
133 _input_wf = input_wf;
134 _output_wf = output_wf;
135
136 // Setting parameters for transform
137 auto dims = input->info()->num_dimensions();
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100138 switch (dims)
David Svantesson3b162e52023-03-28 14:13:32 +0000139 {
140 case 2:
141 {
142 _xmax = input->info()->dimension(0); // Number of columns in input matrix
143 _kmax = input->info()->dimension(1); // Number of rows in input matrix
144 break;
145 }
146 case 4:
147 {
148 _xmax = input->info()->dimension(2); // Number of columns in input matrix
149 _kmax = input->info()->dimension(3); // Number of rows in input matrix
150 break;
151 }
152 default:
153 {
154 ARM_COMPUTE_ERROR("Only 2 or 4 dimensions supported.");
155 }
156 }
157
158 // Configure kernel window
159 // Window size is set by rows / _ksize
160 Window win;
161 int window_size = 0;
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100162 switch (_output_wf)
David Svantesson3b162e52023-03-28 14:13:32 +0000163 {
164#if defined(ARM_COMPUTE_ENABLE_SVE)
165 case WeightFormat::OHWIo8:
166 {
167 _ksize = 8;
168 window_size = _kmax / _ksize;
169 break;
170 }
171#endif /* ARM_COMPUTE_ENABLE_SVE */
172 case WeightFormat::OHWIo4:
173 {
174 _ksize = 4;
175 window_size = _kmax / _ksize;
176 break;
177 }
178 default:
179 {
180 ARM_COMPUTE_ERROR("Unsupported weight format.");
181 break;
182 }
183 }
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100184 if (_kmax % _ksize != 0)
David Svantesson3b162e52023-03-28 14:13:32 +0000185 {
186 window_size += 1;
187 }
188
189 win.set(Window::DimX, Window::Dimension(0, window_size, 1));
190
191 INEKernel::configure(win);
192}
193
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100194Status NEReorderKernel::validate(const ITensorInfo *input,
195 const ITensorInfo *output,
196 arm_compute::WeightFormat input_wf,
197 arm_compute::WeightFormat output_wf)
David Svantesson3b162e52023-03-28 14:13:32 +0000198{
199 ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output);
200 ARM_COMPUTE_RETURN_ERROR_ON(input->data_type() == DataType::UNKNOWN);
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100201 if (output->tensor_shape().total_size() != 0)
David Svantesson3b162e52023-03-28 14:13:32 +0000202 {
Renato Arantes0eb9cfb2023-11-23 11:12:51 +0000203 ARM_COMPUTE_RETURN_ERROR_ON(input->data_type() != DataType::F32);
204 ARM_COMPUTE_RETURN_ERROR_ON(output->data_type() != DataType::F32 && output->data_type() != DataType::BFLOAT16);
David Svantesson3b162e52023-03-28 14:13:32 +0000205 ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_QUANTIZATION_INFO(input, output);
206 // Only input WeightFormat OHWI supported
207 ARM_COMPUTE_RETURN_ERROR_ON(input_wf != arm_compute::WeightFormat::OHWI);
208 int input_x_dim;
209 int input_k_dim;
210 int output_x_dim;
211 int output_k_dim;
212 auto dims = output->num_dimensions();
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100213 switch (dims)
David Svantesson3b162e52023-03-28 14:13:32 +0000214 {
215 case 2:
216 {
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100217 input_x_dim = input->dimension(0); // Number of columns in input matrix
218 input_k_dim = input->dimension(1); // Number of rows in input matrix
David Svantesson3b162e52023-03-28 14:13:32 +0000219 output_x_dim = output->dimension(0); // Number of columns in output matrix
220 output_k_dim = output->dimension(1); // Number of rows in output matrix
221 break;
222 }
223 case 4:
224 {
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100225 input_x_dim = input->dimension(2); // Number of columns in input matrix
226 input_k_dim = input->dimension(3); // Number of rows in input matrix
David Svantesson3b162e52023-03-28 14:13:32 +0000227 output_x_dim = output->dimension(2); // Number of columns in output matrix
228 output_k_dim = output->dimension(3); // Number of rows in output matrix
229 break;
230 }
231 default:
232 {
233 ARM_COMPUTE_RETURN_ERROR_MSG("Only 2 or 4 dimensions supported.");
234 }
235 }
236
Radu Salavat0c5ba9e2024-04-15 15:02:09 +0000237 int ksize = 0;
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100238 switch (output_wf)
David Svantesson3b162e52023-03-28 14:13:32 +0000239 {
David Svantessondfd56a62023-10-10 11:52:51 +0000240#if defined(ARM_COMPUTE_ENABLE_SVE)
David Svantesson3b162e52023-03-28 14:13:32 +0000241 case WeightFormat::OHWIo8:
242 {
Radu Salavat0c5ba9e2024-04-15 15:02:09 +0000243 if (Scheduler::get().cpu_info().has_sve() && arm_gemm::utils::get_vector_length<float>() == 8)
244 {
245 ksize = 8;
246 }
247 else
248 {
249 ARM_COMPUTE_RETURN_ERROR_MSG("Unsupported weight format.");
250 }
David Svantesson3b162e52023-03-28 14:13:32 +0000251 break;
252 }
David Svantessondfd56a62023-10-10 11:52:51 +0000253#endif /* ARM_COMPUTE_ENABLE_SVE */
David Svantesson3b162e52023-03-28 14:13:32 +0000254 case WeightFormat::OHWIo4:
255 {
256 ksize = 4;
257 break;
258 }
259 default:
260 {
261 ARM_COMPUTE_RETURN_ERROR_MSG("Unsupported weight format.");
262 break;
263 }
264 }
265
266 // output k_dim needs to be same as input but multiple of ksize
267 int32_t rnd_up_input_kdim = arm_compute::ceil_to_multiple<int32_t, int32_t>(input_k_dim, ksize);
268 ARM_COMPUTE_RETURN_ERROR_ON(rnd_up_input_kdim != output_k_dim);
269 // output x_dim needs to be same as input
270 ARM_COMPUTE_RETURN_ERROR_ON(input_x_dim != output_x_dim);
David Svantesson3b162e52023-03-28 14:13:32 +0000271 }
272 return Status{};
273}
274
275} // namespace arm_compute
David Svantessoncd8b40d2023-05-02 13:05:36 +0000276
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100277#endif // defined(__aarch64__)