Blame - src/core/CL/kernels/CLQLSTMLayerNormalizationKernel.cpp - ml/ComputeLibrary

blob: 2f676d30d1a7803f3cd586b4bed9fba067959c14 [file] [log] [blame]

Sheri Zhang	b18252d	2020-04-07 11:04:57 +0100	[diff] [blame]	1	/*
Michele Di Giorgio	d9eaf61	2020-07-08 11:12:57 +0100	[diff] [blame]	2	* Copyright (c) 2020 Arm Limited.
Sheri Zhang	b18252d	2020-04-07 11:04:57 +0100	[diff] [blame]	3	*
				4	* SPDX-License-Identifier: MIT
				5	*
				6	* Permission is hereby granted, free of charge, to any person obtaining a copy
				7	* of this software and associated documentation files (the "Software"), to
				8	* deal in the Software without restriction, including without limitation the
				9	* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
				10	* sell copies of the Software, and to permit persons to whom the Software is
				11	* furnished to do so, subject to the following conditions:
				12	*
				13	* The above copyright notice and this permission notice shall be included in all
				14	* copies or substantial portions of the Software.
				15	*
				16	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
				17	* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
				18	* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
				19	* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
				20	* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
				21	* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
				22	* SOFTWARE.
				23	*/
				24	#include "arm_compute/core/CL/kernels/CLQLSTMLayerNormalizationKernel.h"
				25	#include "arm_compute/core/CL/ICLTensor.h"
				26	#include "arm_compute/core/Error.h"
				27	#include "arm_compute/core/utils/quantization/AsymmHelpers.h"
				28	#include "support/StringSupport.h"
				29
				30	namespace arm_compute
				31	{
				32	namespace
				33	{
Sheri Zhang	3a35398	2020-04-21 13:10:24 +0100	[diff] [blame]	34	QuantizationInfo compute_output_qinfo()
				35	{
				36	return QuantizationInfo(1.f / 4096);
				37	}
				38
Sheri Zhang	b18252d	2020-04-07 11:04:57 +0100	[diff] [blame]	39	std::pair<Status, Window> validate_and_configure_window(ITensorInfo input, ITensorInfo output)
				40	{
				41	ARM_COMPUTE_ERROR_ON_NULLPTR(input);
				42	// Output auto inizialitation if not yet initialized
				43	auto_init_if_empty(output, input);
Sheri Zhang	3a35398	2020-04-21 13:10:24 +0100	[diff] [blame]	44	output->set_quantization_info(compute_output_qinfo());
Sheri Zhang	b18252d	2020-04-07 11:04:57 +0100	[diff] [blame]	45
				46	const uint32_t temp_num_elems_processed_per_iteration = max_cl_vector_width / input->element_size();
				47	/* If width is less then step, then make step same as width to avoid global size being step instead of actual width. */
				48	/* Or we should fix in arm_compute::enqueue() or arm_compute::calculate_max_window(). */
				49	const uint32_t num_elems_processed_per_iteration = (input->dimension(0) < temp_num_elems_processed_per_iteration) ? input->dimension(0) : temp_num_elems_processed_per_iteration;
				50
				51	// This kernel doesn't need padding
				52	Window win = calculate_max_window(*input, Steps(num_elems_processed_per_iteration));
				53	output->set_valid_region(ValidRegion(Coordinates(), output->tensor_shape()));
				54
				55	return std::make_pair(Status{}, win);
				56	}
Sheri Zhang	3a35398	2020-04-21 13:10:24 +0100	[diff] [blame]	57	Status validate_arguments(const ITensorInfo input, const ITensorInfo output, const ITensorInfo weight, const ITensorInfo bias)
Sheri Zhang	b18252d	2020-04-07 11:04:57 +0100	[diff] [blame]	58	{
				59	ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, weight, bias, output);
				60
				61	ARM_COMPUTE_RETURN_ERROR_ON_MSG(input->num_dimensions() > 2, "Input tensor cannot have more than 2 dimensions");
				62	ARM_COMPUTE_RETURN_ERROR_ON_MSG(weight->num_dimensions() > 1, "Weight tensor cannot have more than 1 dimensions");
				63	ARM_COMPUTE_RETURN_ERROR_ON_MSG(bias->num_dimensions() > 1, "Bias tensor cannot have more than 1 dimensions");
				64
				65	ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QSYMM16);
				66	ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(weight, 1, DataType::QSYMM16);
				67	ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(bias, 1, DataType::S32);
				68
				69	ARM_COMPUTE_RETURN_ERROR_ON(input->tensor_shape().x() != weight->tensor_shape().x());
				70	ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(weight, bias);
				71
				72	// Checks performed when output is configured
				73	if(output->total_size() != 0)
				74	{
				75	ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(input, output);
				76	ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
				77	}
				78	return Status{};
				79	}
				80	} // namespace
				81
				82	CLQLSTMLayerNormalizationKernel::CLQLSTMLayerNormalizationKernel()
				83	: _input(nullptr), _weight(nullptr), _bias(nullptr), _output(nullptr)
				84	{
				85	}
				86
Manuel Bottini	679fc96	2020-04-21 16:08:53 +0100	[diff] [blame]	87	void CLQLSTMLayerNormalizationKernel::configure(const CLCompileContext &compile_context, const ICLTensor input, ICLTensor output, const ICLTensor weight, const ICLTensor bias)
Sheri Zhang	b18252d	2020-04-07 11:04:57 +0100	[diff] [blame]	88	{
				89	ARM_COMPUTE_ERROR_ON_NULLPTR(input, weight, bias, output);
				90
				91	ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), output->info(), weight->info(), bias->info()));
				92
				93	_input = input;
				94	_weight = weight;
				95	_bias = bias;
				96	_output = output;
				97
				98	const uint32_t num_elems_processed_per_iteration = max_cl_vector_width / input->info()->element_size();
				99
				100	int32_t output_multiplier{};
				101	int32_t output_shift{};
				102	const UniformQuantizationInfo quan_info = _weight->info()->quantization_info().uniform();
				103	const Status status = quantization::calculate_quantized_multiplier(quan_info.scale, &output_multiplier, &output_shift);
				104	output_shift *= -1;
				105
				106	// Set build options
				107	CLBuildOptions build_opts;
				108	build_opts.add_option("-DDATA_TYPE=" + get_cl_type_from_data_type(input->info()->data_type()));
				109	build_opts.add_option("-DVEC_SIZE=" + support::cpp11::to_string(num_elems_processed_per_iteration));
				110	build_opts.add_option("-DWIDTH=" + support::cpp11::to_string(input->info()->dimension(0)));
				111	build_opts.add_option("-DOUTPUT_MULTIPLIER=" + support::cpp11::to_string(output_multiplier));
				112	build_opts.add_option("-DOUTPUT_SHIFT=" + support::cpp11::to_string(output_shift));
				113	build_opts.add_option("-DMIN_BOUND=" + support::cpp11::to_string(std::get<0>(quantization::get_min_max_values_from_quantized_data_type(input->info()->data_type()))));
				114	build_opts.add_option("-DMAX_BOUND=" + support::cpp11::to_string(std::get<1>(quantization::get_min_max_values_from_quantized_data_type(input->info()->data_type()))));
				115
				116	// Create kernel
				117	_kernel = create_kernel(compile_context, "qlstm_layer_normalization", build_opts.options());
				118
				119	// Configure kernel window
				120	auto win_config = validate_and_configure_window(input->info(), output->info());
				121	ARM_COMPUTE_ERROR_THROW_ON(win_config.first);
				122	ICLKernel::configure_internal(win_config.second);
				123
				124	// Set config_id for enabling LWS tuning
				125	_config_id = "qlstm_layer_normalization_";
				126	_config_id += lower_string(string_from_data_type(input->info()->data_type()));
				127	_config_id += "_";
				128	_config_id += support::cpp11::to_string(input->info()->dimension(0));
				129	_config_id += "_";
				130	_config_id += support::cpp11::to_string(input->info()->dimension(1));
				131	}
				132
				133	void CLQLSTMLayerNormalizationKernel::configure(const ICLTensor input, ICLTensor output, const ICLTensor weight, const ICLTensor bias)
				134	{
				135	configure(CLKernelLibrary::get().get_compile_context(), input, output, weight, bias);
				136	}
				137
Sheri Zhang	3a35398	2020-04-21 13:10:24 +0100	[diff] [blame]	138	Status CLQLSTMLayerNormalizationKernel::validate(const ITensorInfo input, const ITensorInfo output, const ITensorInfo weight, const ITensorInfo bias)
Sheri Zhang	b18252d	2020-04-07 11:04:57 +0100	[diff] [blame]	139	{
				140	ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, output, weight, bias));
				141	ARM_COMPUTE_RETURN_ON_ERROR(validate_and_configure_window(input->clone().get(), output->clone().get()).first);
				142	return Status{};
				143	}
				144
				145	void CLQLSTMLayerNormalizationKernel::run(const Window &window, cl::CommandQueue &queue)
				146	{
				147	ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
				148	ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window);
				149
				150	Window slice = window.first_slice_window_2D();
				151	// Set slice step equal to width to force gws[0] to 1, as each thread normalizes across all rows
				152	slice.set_dimension_step(Window::DimX, _input->info()->dimension(0));
				153
				154	Window weight_window;
				155	Window weight_slice;
				156
				157	weight_window.use_tensor_dimensions(_weight->info()->tensor_shape());
				158	weight_slice = weight_window.first_slice_window_1D();
				159
				160	do
				161	{
				162	unsigned int idx = 0;
				163	add_2D_tensor_argument(idx, _input, slice);
				164	add_1D_tensor_argument(idx, _weight, weight_slice);
				165	add_1D_tensor_argument(idx, _bias, weight_slice);
				166	add_2D_tensor_argument(idx, _output, slice);
				167
				168	enqueue(queue, *this, slice, lws_hint());
				169	}
				170	while(window.slide_window_slice_2D(slice));
				171	}
				172	} // namespace arm_compute