blob: 3fccc0447da019d86f1af62103df95b8009ba775 [file] [log] [blame]
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001/*
Vidhya Sudhan Loganathan7485d5a2018-07-04 09:34:00 +01002 * Copyright (c) 2017-2018 ARM Limited.
Anthony Barbier6ff3b192017-09-04 18:44:23 +01003 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
Giorgio Arena04a8f8c2017-11-23 11:45:24 +000024#include "arm_compute/core/CL/kernels/CLDepthConcatenateLayerKernel.h"
Anthony Barbier6ff3b192017-09-04 18:44:23 +010025
26#include "arm_compute/core/CL/CLHelpers.h"
27#include "arm_compute/core/CL/CLKernelLibrary.h"
Vidhya Sudhan Loganathanf1f49062018-05-25 13:21:26 +010028#include "arm_compute/core/CL/CLValidate.h"
Anthony Barbier6ff3b192017-09-04 18:44:23 +010029#include "arm_compute/core/CL/ICLTensor.h"
30#include "arm_compute/core/CL/OpenCL.h"
31#include "arm_compute/core/Error.h"
32#include "arm_compute/core/Helpers.h"
33#include "arm_compute/core/IAccessWindow.h"
34#include "arm_compute/core/TensorInfo.h"
35#include "arm_compute/core/Utils.h"
Anthony Barbier6ff3b192017-09-04 18:44:23 +010036#include "arm_compute/core/Window.h"
37
Georgios Pinitasac4e8732017-07-05 17:02:25 +010038#include "support/ToolchainSupport.h"
39
40#include <map>
41
Anthony Barbier6ff3b192017-09-04 18:44:23 +010042using namespace arm_compute;
43
Georgios Pinitase29acf12018-07-16 14:40:09 +010044namespace
45{
46std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input, unsigned int depth_offset, ITensorInfo *output)
47{
48 ARM_COMPUTE_UNUSED(depth_offset);
49
50 // Configure kernel window
51 const int left_right = (output->dimension(0) - input->dimension(0)) / 2;
52 const int top_bottom = (output->dimension(1) - input->dimension(1)) / 2;
53
54 const unsigned int num_elems_processed_per_iteration = 16 / input->element_size();
55 const unsigned int num_elems_read_per_iteration = 16 / input->element_size();
56 const unsigned int num_rows_read_per_iteration = 1;
57
58 // The window needs to be based on input as we copy all the depths of input
59 Window win = calculate_max_window(*output, Steps(num_elems_processed_per_iteration));
60 win.set(Window::DimZ, Window::Dimension(0, input->tensor_shape().z(), 1));
61
62 AccessWindowRectangle input_access(input, -left_right, -top_bottom, num_elems_read_per_iteration, num_rows_read_per_iteration);
63 AccessWindowHorizontal output_access(output, 0, num_elems_processed_per_iteration);
64 bool window_changed = update_window_and_padding(win, input_access, output_access);
65 output_access.set_valid_region(win, ValidRegion(Coordinates(), output->tensor_shape()));
66
67 Status err = (window_changed) ? ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Insufficient Padding!") : Status{};
68 return std::make_pair(err, win);
69}
70Status validate_arguments(const ITensorInfo *input, unsigned int depth_offset, const ITensorInfo *output)
71{
72 ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output);
73 ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(input);
74 ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::F16, DataType::F32);
75 ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
76
77 ARM_COMPUTE_RETURN_ERROR_ON(input->dimension(2) + depth_offset > output->dimension(2));
78 ARM_COMPUTE_RETURN_ERROR_ON(input->dimension(0) > output->dimension(0));
79 ARM_COMPUTE_RETURN_ERROR_ON(input->dimension(1) > output->dimension(1));
80 ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(3, input, output);
81
82 // The gaps between the two lowest dimensions of input and output need to be divisible by 2
83 // Otherwise it is not clear how the padding should be added onto the input tensor
84 ARM_COMPUTE_RETURN_ERROR_ON((output->dimension(0) - input->dimension(0)) % 2);
85 ARM_COMPUTE_RETURN_ERROR_ON((output->dimension(1) - input->dimension(1)) % 2);
86
87 return Status{};
88}
89} // namespace
90
Giorgio Arena04a8f8c2017-11-23 11:45:24 +000091CLDepthConcatenateLayerKernel::CLDepthConcatenateLayerKernel()
Gian Marco Iodice906443f2017-09-06 15:09:54 +010092 : _input(nullptr), _output(nullptr), _top_bottom(0), _left_right(0), _depth_offset(0)
Anthony Barbier6ff3b192017-09-04 18:44:23 +010093{
94}
95
Giorgio Arena04a8f8c2017-11-23 11:45:24 +000096BorderSize CLDepthConcatenateLayerKernel::border_size() const
Anthony Barbier6ff3b192017-09-04 18:44:23 +010097{
98 return BorderSize(_top_bottom, _left_right);
99}
100
Giorgio Arena04a8f8c2017-11-23 11:45:24 +0000101void CLDepthConcatenateLayerKernel::configure(const ICLTensor *input, unsigned int depth_offset, ICLTensor *output)
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100102{
Georgios Pinitase29acf12018-07-16 14:40:09 +0100103 ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
104 ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), depth_offset, output->info()));
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100105
Gian Marco Iodice906443f2017-09-06 15:09:54 +0100106 _input = input;
107 _output = output;
108 _depth_offset = depth_offset;
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100109
Georgios Pinitase29acf12018-07-16 14:40:09 +0100110 const unsigned int num_elems_processed_per_iteration = 16 / input->info()->element_size();
111
Georgios Pinitasac4e8732017-07-05 17:02:25 +0100112 // Add build options
Georgios Pinitase29acf12018-07-16 14:40:09 +0100113 CLBuildOptions build_opts;
114 build_opts.add_option("-DDATA_TYPE=" + get_underlying_cl_type_from_data_type(input->info()->data_type()));
115 build_opts.add_option("-DVEC_SIZE=" + support::cpp11::to_string(num_elems_processed_per_iteration));
Pablo Telloeb6c88a2019-02-07 15:53:19 +0000116 if(is_data_type_quantized_asymmetric(input->info()->data_type()) && input->info()->quantization_info() != output->info()->quantization_info())
117 {
118 build_opts.add_option("-DOFFSET_IN1=" + float_to_string_with_full_precision(input->info()->quantization_info().offset));
119 build_opts.add_option("-DOFFSET_OUT=" + float_to_string_with_full_precision(output->info()->quantization_info().offset));
120 build_opts.add_option("-DSCALE_IN1=" + float_to_string_with_full_precision(input->info()->quantization_info().scale));
121 build_opts.add_option("-DSCALE_OUT=" + float_to_string_with_full_precision(output->info()->quantization_info().scale));
122 }
Georgios Pinitasac4e8732017-07-05 17:02:25 +0100123
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100124 // Create kernel
Georgios Pinitase29acf12018-07-16 14:40:09 +0100125 _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel("concatenate_depth", build_opts.options()));
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100126
127 // Configure kernel window
128 _left_right = (output->info()->dimension(0) - input->info()->dimension(0)) / 2;
129 _top_bottom = (output->info()->dimension(1) - input->info()->dimension(1)) / 2;
130
Georgios Pinitase29acf12018-07-16 14:40:09 +0100131 // Configure kernel window
132 auto win_config = validate_and_configure_window(input->info(), depth_offset, output->info());
133 ARM_COMPUTE_ERROR_THROW_ON(std::get<0>(win_config));
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100134
Anthony Barbierb6eb3532018-08-08 13:20:04 +0100135 ICLKernel::configure_internal(std::get<1>(win_config));
Georgios Pinitase29acf12018-07-16 14:40:09 +0100136}
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100137
Georgios Pinitase29acf12018-07-16 14:40:09 +0100138Status CLDepthConcatenateLayerKernel::validate(const arm_compute::ITensorInfo *input,
139 unsigned int depth_offset,
140 const arm_compute::ITensorInfo *output)
141{
142 ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, depth_offset, output));
143 ARM_COMPUTE_RETURN_ON_ERROR(validate_and_configure_window(input->clone().get(), depth_offset, output->clone().get()).first);
144 return Status{};
Gian Marco Iodice906443f2017-09-06 15:09:54 +0100145}
146
Giorgio Arena04a8f8c2017-11-23 11:45:24 +0000147void CLDepthConcatenateLayerKernel::run(const Window &window, cl::CommandQueue &queue)
Gian Marco Iodice906443f2017-09-06 15:09:54 +0100148{
149 ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
150 ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window);
151
152 Window slice = window.first_slice_window_3D();
153
154 const int offset_to_first_elements_in_bytes = _depth_offset * _output->info()->strides_in_bytes()[2];
155
Georgios Pinitasac4e8732017-07-05 17:02:25 +0100156 unsigned int idx = 2 * num_arguments_per_3D_tensor(); // Skip the input and output parameters
157 const cl_int3 offsets =
158 {
159 {
160 static_cast<cl_int>(_left_right),
161 static_cast<cl_int>(_top_bottom),
162 static_cast<cl_int>(offset_to_first_elements_in_bytes),
163 }
164 };
165 _kernel.setArg<cl_int3>(idx, offsets);
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100166
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100167 do
168 {
169 unsigned int idx = 0;
Georgios Pinitasac4e8732017-07-05 17:02:25 +0100170 add_3D_tensor_argument(idx, _input, slice);
171 add_3D_tensor_argument(idx, _output, slice);
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100172 enqueue(queue, *this, slice);
173 }
Georgios Pinitasac4e8732017-07-05 17:02:25 +0100174 while(window.slide_window_slice_3D(slice));
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100175}