blob: f299bb94a418a3d219f8299f9b5d6c0943237cc2 [file] [log] [blame]
giuros01fc1da132019-02-18 16:48:35 +00001/*
SiCong Li5a7d1572023-03-21 12:00:15 +00002 * Copyright (c) 2019-2020, 2023 Arm Limited.
giuros01fc1da132019-02-18 16:48:35 +00003 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
Michalis Spyrouebcebf12020-10-21 00:04:14 +010024#include "src/core/NEON/kernels/NEBatchToSpaceLayerKernel.h"
giuros01fc1da132019-02-18 16:48:35 +000025
26#include "arm_compute/core/Helpers.h"
27#include "arm_compute/core/ITensor.h"
SiCong Li8893e452023-03-23 12:06:45 +000028#include "arm_compute/core/TensorInfo.h"
giuros01fc1da132019-02-18 16:48:35 +000029#include "arm_compute/core/Types.h"
giuros01fc1da132019-02-18 16:48:35 +000030#include "arm_compute/core/utils/misc/ShapeCalculator.h"
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +010031#include "arm_compute/core/Validate.h"
32
Sang-Hoon Park68dd25f2020-10-19 16:00:11 +010033#include "src/core/helpers/AutoConfiguration.h"
34#include "src/core/helpers/WindowHelpers.h"
giuros01fc1da132019-02-18 16:48:35 +000035
36using namespace arm_compute::misc::shape_calculator;
37
38namespace arm_compute
39{
40namespace
41{
42Status validate_arguments(const ITensorInfo *input, const ITensorInfo *block_info, const ITensorInfo *output)
43{
44 ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, block_info, output);
45 ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(block_info, 1, DataType::S32);
46 ARM_COMPUTE_RETURN_ERROR_ON(input->num_dimensions() > 4);
Georgios Pinitas33843562019-12-10 13:33:18 +000047 ARM_COMPUTE_RETURN_ERROR_ON(input->data_type() == DataType::UNKNOWN);
giuros01fc1da132019-02-18 16:48:35 +000048
49 // Validate output if initialized
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +010050 if (output->total_size() != 0)
giuros01fc1da132019-02-18 16:48:35 +000051 {
52 ARM_COMPUTE_RETURN_ERROR_ON(output->num_dimensions() > 4);
53 ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
54 }
55
56 return Status{};
57}
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +010058Status validate_arguments_static(const ITensorInfo *input,
59 int block_shape_x,
60 int block_shape_y,
61 const ITensorInfo *output,
62 const CropInfo &crop_info)
giuros01fc1da132019-02-18 16:48:35 +000063{
64 ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output);
65 ARM_COMPUTE_RETURN_ERROR_ON(input->num_dimensions() > 4);
66 ARM_COMPUTE_RETURN_ERROR_ON(block_shape_x <= 0);
67 ARM_COMPUTE_RETURN_ERROR_ON(block_shape_y <= 0);
68
69 const DataLayout data_layout = input->data_layout();
70 const int idx_batch = get_data_layout_dimension_index(data_layout, DataLayoutDimension::BATCHES);
71 ARM_COMPUTE_RETURN_ERROR_ON(input->tensor_shape()[idx_batch] % (block_shape_x * block_shape_y) != 0);
giuros01fc1da132019-02-18 16:48:35 +000072 // Validate output if initialized
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +010073 if (output->total_size() != 0)
giuros01fc1da132019-02-18 16:48:35 +000074 {
giuros01fc1da132019-02-18 16:48:35 +000075 ARM_COMPUTE_RETURN_ERROR_ON(output->num_dimensions() > 4);
76 ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
SiCong Li8893e452023-03-23 12:06:45 +000077
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +010078 const TensorShape expected_output_shape = compute_batch_to_space_shape(
79 input->data_layout(), input->tensor_shape(), block_shape_x, block_shape_y, crop_info);
80 const TensorInfo expected_output = output->clone()->set_tensor_shape(expected_output_shape);
SiCong Li8893e452023-03-23 12:06:45 +000081 ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(output, &expected_output);
giuros01fc1da132019-02-18 16:48:35 +000082 }
83
84 return Status{};
85}
86} // namespace
87
88NEBatchToSpaceLayerKernel::NEBatchToSpaceLayerKernel()
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +010089 : _input(nullptr),
90 _block_shape(nullptr),
91 _output(nullptr),
92 _data_layout(DataLayout::UNKNOWN),
93 _block_shape_x(),
94 _block_shape_y(),
95 _crop_info()
giuros01fc1da132019-02-18 16:48:35 +000096{
97}
98
99void NEBatchToSpaceLayerKernel::configure(const ITensor *input, const ITensor *block_shape, ITensor *output)
100{
101 ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
102 ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), block_shape->info(), output->info()));
103
104 _input = input;
105 _block_shape = block_shape;
106 _output = output;
Sadik Armagan29658042020-05-11 10:35:08 +0100107 _data_layout = input->info()->data_layout();
giuros01fc1da132019-02-18 16:48:35 +0000108
109 // Configure kernel window
SiCong Li8893e452023-03-23 12:06:45 +0000110 Window win = calculate_max_window(*output->info(), Steps());
giuros01fc1da132019-02-18 16:48:35 +0000111 ICPPKernel::configure(win);
112}
113
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100114void NEBatchToSpaceLayerKernel::configure(
115 const ITensor *input, int32_t block_shape_x, int32_t block_shape_y, ITensor *output, const CropInfo &crop_info)
giuros01fc1da132019-02-18 16:48:35 +0000116{
117 ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100118 const TensorShape output_shape = compute_batch_to_space_shape(
119 input->info()->data_layout(), input->info()->tensor_shape(), block_shape_x, block_shape_y);
SiCong Li8893e452023-03-23 12:06:45 +0000120 // Output auto initialization if not yet initialized
giuros01fc1da132019-02-18 16:48:35 +0000121 auto_init_if_empty(*output->info(), input->info()->clone()->set_tensor_shape(output_shape));
122
123 // Perform validation step
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100124 ARM_COMPUTE_ERROR_THROW_ON(
125 validate_arguments_static(input->info(), block_shape_x, block_shape_y, output->info(), crop_info));
giuros01fc1da132019-02-18 16:48:35 +0000126
127 _input = input;
128 _output = output;
129 _block_shape_x = block_shape_x;
130 _block_shape_y = block_shape_y;
Sadik Armagan29658042020-05-11 10:35:08 +0100131 _data_layout = input->info()->data_layout();
SiCong Li8893e452023-03-23 12:06:45 +0000132 _crop_info = crop_info;
giuros01fc1da132019-02-18 16:48:35 +0000133
134 // Configure kernel window
SiCong Li8893e452023-03-23 12:06:45 +0000135 Window win = calculate_max_window(*output->info(), Steps());
giuros01fc1da132019-02-18 16:48:35 +0000136 ICPPKernel::configure(win);
137}
138
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100139Status
140NEBatchToSpaceLayerKernel::validate(const ITensorInfo *input, const ITensorInfo *block_shape, const ITensorInfo *output)
giuros01fc1da132019-02-18 16:48:35 +0000141{
142 ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, block_shape, output);
143 ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, block_shape, output));
144 return Status{};
145}
146
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100147Status NEBatchToSpaceLayerKernel::validate(const ITensorInfo *input,
148 int32_t block_shape_x,
149 int32_t block_shape_y,
150 const ITensorInfo *output,
151 const CropInfo &crop_info)
giuros01fc1da132019-02-18 16:48:35 +0000152{
153 ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output);
SiCong Li8893e452023-03-23 12:06:45 +0000154 ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments_static(input, block_shape_x, block_shape_y, output, crop_info));
giuros01fc1da132019-02-18 16:48:35 +0000155 return Status{};
156}
157
158void NEBatchToSpaceLayerKernel::run(const Window &window, const ThreadInfo &info)
159{
160 ARM_COMPUTE_UNUSED(info);
161 ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
162 ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICPPKernel::window(), window);
163
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100164 if (_block_shape != nullptr)
giuros01fc1da132019-02-18 16:48:35 +0000165 {
166 // Retrieve the block shapes dynamically
167 _block_shape_x = *(reinterpret_cast<const int *>(_block_shape->ptr_to_element(0)));
168 _block_shape_y = *(reinterpret_cast<const int *>(_block_shape->ptr_to_element(1)));
169 }
170
SiCong Li8893e452023-03-23 12:06:45 +0000171 const int batch_size = _output->info()->dimension(3);
172 const int element_size = _output->info()->element_size();
giuros01fc1da132019-02-18 16:48:35 +0000173
SiCong Li8893e452023-03-23 12:06:45 +0000174 Window slice_out = window.first_slice_window_3D();
giuros01fc1da132019-02-18 16:48:35 +0000175
176 int batch_id = 0;
177 // Main loop for NCHW and NHWC
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100178 if (_data_layout == DataLayout::NCHW)
giuros01fc1da132019-02-18 16:48:35 +0000179 {
180 do
181 {
SiCong Li8893e452023-03-23 12:06:45 +0000182 Iterator out(_output, slice_out);
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100183 execute_window_loop(
184 slice_out,
185 [&](const Coordinates &id)
186 {
187 const int x = id.x();
188 const int y = id.y();
189 const int z = id.z();
190 // Translate x, y to uncropped version
191 const int x_c = x + _crop_info.left;
192 const int y_c = y + _crop_info.top;
giuros01fc1da132019-02-18 16:48:35 +0000193
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100194 const int in_batch =
195 batch_id + ((x_c % _block_shape_x) + (y_c % _block_shape_y) * _block_shape_x) * batch_size;
196 const int in_x = x_c / _block_shape_x;
197 const int in_y = y_c / _block_shape_y;
198 Coordinates input_coords{in_x, in_y, z, in_batch};
199 memcpy(out.ptr(), _input->ptr_to_element(input_coords), element_size);
200 },
201 out);
giuros01fc1da132019-02-18 16:48:35 +0000202 ++batch_id;
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100203 } while (window.slide_window_slice_3D(slice_out));
giuros01fc1da132019-02-18 16:48:35 +0000204 }
205 else
206 {
SiCong Li8893e452023-03-23 12:06:45 +0000207 // For NHWC we can perform a block copy on the Channel (first) dimension. Thus we do not need to iterate over this dimension
208 slice_out.set(0U, Window::Dimension(0U, 1U, 1U));
giuros01fc1da132019-02-18 16:48:35 +0000209 do
210 {
SiCong Li8893e452023-03-23 12:06:45 +0000211 Iterator out(_output, slice_out);
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100212 execute_window_loop(
213 slice_out,
214 [&](const Coordinates &id)
215 {
216 const int x = id.y();
217 const int y = id.z();
giuros01fc1da132019-02-18 16:48:35 +0000218
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100219 // Translate x, y to uncropped version
220 const int x_c = x + _crop_info.left;
221 const int y_c = y + _crop_info.top;
giuros01fc1da132019-02-18 16:48:35 +0000222
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100223 const int in_batch =
224 batch_id + ((x_c % _block_shape_x) + (y_c % _block_shape_y) * _block_shape_x) * batch_size;
225 const int in_x = x_c / _block_shape_x;
226 const int in_y = y_c / _block_shape_y;
227 Coordinates input_coords{0, in_x, in_y, in_batch};
228 memcpy(out.ptr(), _input->ptr_to_element(input_coords),
229 element_size * _input->info()->dimension(0));
230 },
231 out);
giuros01fc1da132019-02-18 16:48:35 +0000232 ++batch_id;
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100233 } while (window.slide_window_slice_3D(slice_out));
giuros01fc1da132019-02-18 16:48:35 +0000234 }
235}
236} // namespace arm_compute