Blame - src/core/CL/kernels/CLWidthConcatenate2TensorsKernel.cpp - ml/ComputeLibrary

blob: 91db5b17c2e1f88b4ec458c0c91139a8f8655530 [file] [log] [blame]

Michele Di Giorgio	27400b9	2018-11-01 13:44:05 +0000	[diff] [blame]	1	/*
Michele Di Giorgio	d9eaf61	2020-07-08 11:12:57 +0100	[diff] [blame^]	2	* Copyright (c) 2018-2020 Arm Limited.
Michele Di Giorgio	27400b9	2018-11-01 13:44:05 +0000	[diff] [blame]	3	*
				4	* SPDX-License-Identifier: MIT
				5	*
				6	* Permission is hereby granted, free of charge, to any person obtaining a copy
				7	* of this software and associated documentation files (the "Software"), to
				8	* deal in the Software without restriction, including without limitation the
				9	* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
				10	* sell copies of the Software, and to permit persons to whom the Software is
				11	* furnished to do so, subject to the following conditions:
				12	*
				13	* The above copyright notice and this permission notice shall be included in all
				14	* copies or substantial portions of the Software.
				15	*
				16	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
				17	* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
				18	* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
				19	* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
				20	* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
				21	* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
				22	* SOFTWARE.
				23	*/
				24	#include "arm_compute/core/CL/kernels/CLWidthConcatenate2TensorsKernel.h"
				25
				26	#include "arm_compute/core/AccessWindowStatic.h"
				27	#include "arm_compute/core/CL/CLHelpers.h"
				28	#include "arm_compute/core/CL/CLKernelLibrary.h"
				29	#include "arm_compute/core/CL/CLValidate.h"
				30	#include "arm_compute/core/CL/ICLTensor.h"
				31	#include "arm_compute/core/CL/OpenCL.h"
				32	#include "arm_compute/core/Error.h"
				33	#include "arm_compute/core/Helpers.h"
				34	#include "arm_compute/core/IAccessWindow.h"
				35	#include "arm_compute/core/TensorInfo.h"
				36	#include "arm_compute/core/Utils.h"
				37	#include "arm_compute/core/Window.h"
Georgios Pinitas	6631ac2	2019-04-17 12:12:56 +0100	[diff] [blame]	38	#include "arm_compute/core/utils/helpers/tensor_info.h"
Michele Di Giorgio	27400b9	2018-11-01 13:44:05 +0000	[diff] [blame]	39	#include "arm_compute/core/utils/misc/ShapeCalculator.h"
				40
Matthew Bentham	758b5ba	2020-03-05 23:37:48 +0000	[diff] [blame]	41	#include "support/StringSupport.h"
Michele Di Giorgio	27400b9	2018-11-01 13:44:05 +0000	[diff] [blame]	42
				43	namespace arm_compute
				44	{
				45	namespace
				46	{
				47	constexpr unsigned int num_elems_processed_per_iteration = 8;
				48
				49	std::pair<Status, Window> validate_and_configure_window(ITensorInfo input1, ITensorInfo input2, ITensorInfo *output)
				50	{
				51	// The window needs to be based on the output
				52	Window win = calculate_max_window(*output, Steps(num_elems_processed_per_iteration));
Michele Di Giorgio	8e150a1	2018-12-21 15:20:56 +0000	[diff] [blame]	53	AccessWindowStatic input1_access(input1, 0, 0, ceil_to_multiple(input1->dimension(0), num_elems_processed_per_iteration), input1->dimension(1));
Georgios Pinitas	0b5af9f	2020-06-19 23:22:08 +0100	[diff] [blame]	54	const unsigned int input2_right_padding = ((output->dimension(0) / num_elems_processed_per_iteration) * num_elems_processed_per_iteration - input1->dimension(0) - input2->dimension(
				55	0)) % num_elems_processed_per_iteration;
Michele Di Giorgio	8e150a1	2018-12-21 15:20:56 +0000	[diff] [blame]	56	AccessWindowStatic input2_access(input2, -(input1->dimension(0) % num_elems_processed_per_iteration),
				57	0, input2->dimension(0) + input2_right_padding, input2->dimension(1));
Michele Di Giorgio	27400b9	2018-11-01 13:44:05 +0000	[diff] [blame]	58	AccessWindowHorizontal output_access(output, 0, num_elems_processed_per_iteration);
				59	bool window_changed = update_window_and_padding(win, input1_access, input2_access, output_access);
				60
				61	Window win_collapsed = win.collapse(win, Window::DimZ);
				62
				63	Status err = (window_changed) ? ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Insufficient Padding!") : Status{};
				64	return std::make_pair(err, win_collapsed);
				65	}
				66	Status validate_arguments(const ITensorInfo input1, const ITensorInfo input2, const ITensorInfo *output)
				67	{
				68	ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input1, input2, output);
				69	ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(input1);
Manuel Bottini	8481d83	2019-12-10 15:28:40 +0000	[diff] [blame]	70	ARM_COMPUTE_RETURN_ERROR_ON(input1->data_type() == DataType::UNKNOWN);
Michele Di Giorgio	27400b9	2018-11-01 13:44:05 +0000	[diff] [blame]	71	ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input1, input2, output);
				72	ARM_COMPUTE_RETURN_ERROR_ON(input1->dimension(0) + input2->dimension(0) > output->dimension(0));
				73
				74	for(size_t i = 1; i < Coordinates::num_max_dimensions; ++i)
				75	{
				76	ARM_COMPUTE_RETURN_ERROR_ON(input1->dimension(i) != output->dimension(i));
				77	ARM_COMPUTE_RETURN_ERROR_ON(input2->dimension(i) != output->dimension(i));
				78	}
				79	ARM_COMPUTE_RETURN_ERROR_ON(input1->num_dimensions() > 4);
				80
				81	return Status{};
				82	}
				83	} // namespace
				84
				85	CLWidthConcatenate2TensorsKernel::CLWidthConcatenate2TensorsKernel()
				86	: _input1(nullptr), _input2(nullptr), _output(nullptr)
				87	{
				88	}
				89
				90	Status CLWidthConcatenate2TensorsKernel::validate(const ITensorInfo input1, const ITensorInfo input2, const ITensorInfo *output)
				91	{
				92	ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input1, input2, output));
				93	ARM_COMPUTE_RETURN_ON_ERROR(validate_and_configure_window(input1->clone().get(), input2->clone().get(), output->clone().get()).first);
				94	return Status{};
				95	}
				96
				97	void CLWidthConcatenate2TensorsKernel::configure(const ICLTensor input1, const ICLTensor input2, ICLTensor *output)
				98	{
Manuel Bottini	4c6bd51	2020-04-08 10:15:51 +0100	[diff] [blame]	99	configure(CLKernelLibrary::get().get_compile_context(), input1, input2, output);
				100	}
				101
Manuel Bottini	2803f70	2020-04-21 16:20:03 +0100	[diff] [blame]	102	void CLWidthConcatenate2TensorsKernel::configure(const CLCompileContext &compile_context, const ICLTensor input1, const ICLTensor input2, ICLTensor *output)
Manuel Bottini	4c6bd51	2020-04-08 10:15:51 +0100	[diff] [blame]	103	{
Michele Di Giorgio	27400b9	2018-11-01 13:44:05 +0000	[diff] [blame]	104	ARM_COMPUTE_ERROR_ON_NULLPTR(input1, input2, output);
				105	ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input1->info(), input2->info(), output->info()));
				106
				107	_input1 = input1;
				108	_input2 = input2;
				109	_output = output;
				110
				111	// Add build options
				112	CLBuildOptions build_opts;
				113	build_opts.add_option("-DDATA_TYPE=" + get_underlying_cl_type_from_data_type(input1->info()->data_type()));
				114	build_opts.add_option("-DVEC_SIZE=" + support::cpp11::to_string(num_elems_processed_per_iteration));
				115	build_opts.add_option("-DDEPTH=" + support::cpp11::to_string(input1->info()->dimension(2)));
				116	build_opts.add_option("-DINPUT1_WIDTH=" + support::cpp11::to_string(input1->info()->dimension(0)));
				117	build_opts.add_option("-DELEMENT_SIZE=" + support::cpp11::to_string(input1->info()->element_size()));
				118
Georgios Pinitas	6631ac2	2019-04-17 12:12:56 +0100	[diff] [blame]	119	// If input have different quantization info set quantization parameters needed for the re-quantization process
				120	const bool have_different_qinfo = helpers::tensor_info::tensors_have_different_quantization_info(output->info(), input1->info(), input2->info());
				121	if(is_data_type_quantized_asymmetric(input1->info()->data_type()) && have_different_qinfo)
Pablo Tello	eb6c88a	2019-02-07 15:53:19 +0000	[diff] [blame]	122	{
Georgios Pinitas	4c5469b	2019-05-21 13:32:43 +0100	[diff] [blame]	123	const UniformQuantizationInfo iq1_info = input1->info()->quantization_info().uniform();
				124	const UniformQuantizationInfo iq2_info = input2->info()->quantization_info().uniform();
				125	const UniformQuantizationInfo oq_info = output->info()->quantization_info().uniform();
				126
				127	build_opts.add_option("-DOFFSET_IN1=" + float_to_string_with_full_precision(iq1_info.offset));
				128	build_opts.add_option("-DSCALE_IN1=" + float_to_string_with_full_precision(iq1_info.scale));
				129	build_opts.add_option("-DOFFSET_IN2=" + float_to_string_with_full_precision(iq2_info.offset));
				130	build_opts.add_option("-DSCALE_IN2=" + float_to_string_with_full_precision(iq2_info.scale));
				131	build_opts.add_option("-DOFFSET_OUT=" + float_to_string_with_full_precision(oq_info.offset));
				132	build_opts.add_option("-DSCALE_OUT=" + float_to_string_with_full_precision(oq_info.scale));
Pablo Tello	eb6c88a	2019-02-07 15:53:19 +0000	[diff] [blame]	133	}
				134
Michele Di Giorgio	27400b9	2018-11-01 13:44:05 +0000	[diff] [blame]	135	// Create kernel
Manuel Bottini	4c6bd51	2020-04-08 10:15:51 +0100	[diff] [blame]	136	_kernel = create_kernel(compile_context, "concatenate_width_x2", build_opts.options());
Michele Di Giorgio	27400b9	2018-11-01 13:44:05 +0000	[diff] [blame]	137
				138	// Configure kernel window
				139	auto win_config = validate_and_configure_window(input1->info(), input2->info(), output->info());
				140	ARM_COMPUTE_ERROR_THROW_ON(std::get<0>(win_config));
				141
				142	ICLKernel::configure_internal(std::get<1>(win_config));
				143
Isabella Gottardi	f59b16f	2019-07-25 12:03:39 +0100	[diff] [blame]	144	// Set output valid region
				145	output->info()->set_valid_region(ValidRegion(Coordinates(), output->info()->tensor_shape()));
				146
Michele Di Giorgio	8e150a1	2018-12-21 15:20:56 +0000	[diff] [blame]	147	// Pass paddings as arguments to the kernel
				148	const unsigned int input1_width = input1->info()->dimension(0);
				149	const unsigned int input1_right_padding = ceil_to_multiple(input1_width, num_elems_processed_per_iteration) - input1_width;
				150	const unsigned int input2_left_padding = input1_width % num_elems_processed_per_iteration;
				151	unsigned int idx0 = 3 * num_arguments_per_4D_tensor();
				152	_kernel.setArg<cl_uint>(idx0++, input1_right_padding);
				153	_kernel.setArg<cl_uint>(idx0++, input2_left_padding);
				154
Michele Di Giorgio	27400b9	2018-11-01 13:44:05 +0000	[diff] [blame]	155	// Set config_id for enabling LWS tuning
				156	_config_id = "concatenate_width_x2_";
				157	_config_id += lower_string(string_from_data_type(input1->info()->data_type()));
				158	_config_id += "_";
				159	_config_id += support::cpp11::to_string(input1->info()->dimension(0));
				160	_config_id += "_";
				161	_config_id += support::cpp11::to_string(input1->info()->dimension(1));
				162	_config_id += "_";
				163	_config_id += support::cpp11::to_string(input2->info()->dimension(0));
				164	_config_id += "_";
				165	_config_id += support::cpp11::to_string(input2->info()->dimension(1));
				166	}
				167
				168	void CLWidthConcatenate2TensorsKernel::run(const Window &window, cl::CommandQueue &queue)
				169	{
				170	ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
				171	ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window);
				172
				173	Window slice = window.first_slice_window_4D();
				174
				175	do
				176	{
				177	unsigned int idx = 0;
				178	add_4D_tensor_argument(idx, _input1, slice);
				179	add_4D_tensor_argument(idx, _input2, slice);
				180	add_4D_tensor_argument(idx, _output, slice);
				181	enqueue(queue, *this, window, lws_hint());
				182	}
				183	while(window.slide_window_slice_4D(slice));
				184	}
				185	} // namespace arm_compute