Blame - src/runtime/CL/functions/CLSoftmaxLayer.cpp - ml/ComputeLibrary

blob: 3a7d6c770b1a9085f1b06d0c32834f82b3641c1f [file] [log] [blame]

Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	1	/*
Giorgio Arena	4402cb9	2018-02-15 13:37:40 +0000	[diff] [blame]	2	* Copyright (c) 2017-2018 ARM Limited.
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	3	*
				4	* SPDX-License-Identifier: MIT
				5	*
				6	* Permission is hereby granted, free of charge, to any person obtaining a copy
				7	* of this software and associated documentation files (the "Software"), to
				8	* deal in the Software without restriction, including without limitation the
				9	* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
				10	* sell copies of the Software, and to permit persons to whom the Software is
				11	* furnished to do so, subject to the following conditions:
				12	*
				13	* The above copyright notice and this permission notice shall be included in all
				14	* copies or substantial portions of the Software.
				15	*
				16	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
				17	* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
				18	* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
				19	* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
				20	* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
				21	* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
				22	* SOFTWARE.
				23	*/
				24	#include "arm_compute/runtime/CL/functions/CLSoftmaxLayer.h"
				25
Chunosov	d6afedc	2017-11-06 22:09:45 +0700	[diff] [blame]	26	#include "arm_compute/core/CL/CLHelpers.h"
				27	#include "arm_compute/core/CL/ICLKernel.h"
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	28	#include "arm_compute/core/CL/kernels/CLSoftmaxLayerKernel.h"
				29	#include "arm_compute/core/Helpers.h"
Chunosov	d6afedc	2017-11-06 22:09:45 +0700	[diff] [blame]	30	#include "arm_compute/core/Types.h"
				31	#include "arm_compute/core/Utils.h"
Giuseppe Rossini	87e896a	2018-08-24 10:24:12 +0100	[diff] [blame]	32	#include "arm_compute/core/utils/misc/ShapeCalculator.h"
Georgios Pinitas	baf174e	2017-09-08 19:47:30 +0100	[diff] [blame]	33	#include "arm_compute/runtime/CL/CLMemoryGroup.h"
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	34	#include "arm_compute/runtime/CL/CLScheduler.h"
				35
Giuseppe Rossini	87e896a	2018-08-24 10:24:12 +0100	[diff] [blame]	36	namespace arm_compute
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	37	{
Giuseppe Rossini	87e896a	2018-08-24 10:24:12 +0100	[diff] [blame]	38	CLSoftmaxLayer::CLSoftmaxLayer(std::shared_ptr<IMemoryManager> memory_manager)
				39	: _memory_group(std::move(memory_manager)), _max_shift_exp_sum_kernel(), _norm_kernel(), _flatten_kernel(), _reshape_kernel(), _max(), _sum(), _tmp(), _input_flat(), _output_flat(),
				40	_needs_flattening(false)
				41	{
				42	}
				43
				44	void CLSoftmaxLayer::configure_flatten_kernel(const ICLTensor input, const ICLTensor output)
				45	{
				46	// Flatten the input
				47	const TensorShape shape_flatten = misc::shape_calculator::compute_flatten_shape(input->info());
				48
				49	// Initialize the flat input
				50	_input_flat.allocator()->init(input->info()->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(shape_flatten));
				51
				52	// Configure the flatten_kernel
				53	_flatten_kernel.configure(input, &_input_flat);
				54
				55	// We need to init the output tensor here. Indeed, the reshape kernel expects
				56	// both tensors to be already initialized
				57	auto_init_if_empty(output->info(), input->info()->clone());
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	58	}
				59
Pablo Palmier	48a60f9	2017-10-18 11:03:08 +0100	[diff] [blame]	60	void CLSoftmaxLayer::configure(const ICLTensor input, ICLTensor output, float beta)
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	61	{
Georgios Pinitas	ee8be2d	2017-11-22 12:53:45 +0000	[diff] [blame]	62	// Perform validation step
				63	ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
				64	ARM_COMPUTE_ERROR_THROW_ON(CLSoftmaxLayer::validate(input->info(), output->info()));
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	65
Giuseppe Rossini	87e896a	2018-08-24 10:24:12 +0100	[diff] [blame]	66	_needs_flattening = input->info()->num_dimensions() > 2;
				67
				68	// If we are dealing with a 4D tensor, we will:
				69	// - Flatten the input, so that we end up with a [widthheightdepth] * batches 2D tensor
				70	// - Execute all the pipeline (reduction + normalization) on the flattened tensor
				71	// - Reshape the flattened output into the real output
				72	if(_needs_flattening)
				73	{
				74	// Add to the memory manager _input_flat
				75	_memory_group.manage(&_input_flat);
				76
				77	// Cofigure _flatten_kernel and _input_flat
				78	configure_flatten_kernel(input, output);
				79	}
				80
				81	// We want to deal with a 2D input. Either it is the flattened version of the original input (4D case)
				82	// or it is the original input case (2D case)
				83	const ICLTensor *input_2D = (_needs_flattening ? &_input_flat : input);
				84
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	85	// Create intermediate tensors shapes
Giuseppe Rossini	87e896a	2018-08-24 10:24:12 +0100	[diff] [blame]	86	TensorInfo input_info = input_2D->info()->clone()->reset_padding().set_is_resizable(true);
				87	DataType tmp_data_type = is_data_type_quantized_asymmetric(input_2D->info()->data_type()) ? DataType::S32 : input_2D->info()->data_type();
				88	TensorInfo tensor_info_tmp(input_info.clone()->set_data_type(tmp_data_type));
Chunosov	f450caa	2017-11-08 16:09:35 +0700	[diff] [blame]	89	_tmp.allocator()->init(tensor_info_tmp);
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	90
Giuseppe Rossini	87e896a	2018-08-24 10:24:12 +0100	[diff] [blame]	91	TensorShape max_sum_shape = input_2D->info()->tensor_shape();
Chunosov	f450caa	2017-11-08 16:09:35 +0700	[diff] [blame]	92	max_sum_shape.set(0, 1);
Georgios Pinitas	ee8be2d	2017-11-22 12:53:45 +0000	[diff] [blame]	93	_max.allocator()->init(input_info.clone()->set_tensor_shape(max_sum_shape));
				94	_sum.allocator()->init(input_info.clone()->set_tensor_shape(max_sum_shape).set_data_type(tmp_data_type));
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	95
Chunosov	d6afedc	2017-11-06 22:09:45 +0700	[diff] [blame]	96	// Set GPU target to kernels
				97	_max_shift_exp_sum_kernel.set_target(CLScheduler::get().target());
				98
Georgios Pinitas	baf174e	2017-09-08 19:47:30 +0100	[diff] [blame]	99	// Manage intermediate buffers
				100	_memory_group.manage(&_tmp);
				101	_memory_group.manage(&_max);
				102	_memory_group.manage(&_sum);
				103
Chunosov	d6afedc	2017-11-06 22:09:45 +0700	[diff] [blame]	104	// Configure kernels
Giuseppe Rossini	87e896a	2018-08-24 10:24:12 +0100	[diff] [blame]	105	_max_shift_exp_sum_kernel.configure(input_2D, &_max, &_tmp, &_sum, beta);
				106
				107	if(_needs_flattening)
				108	{
				109	// Add to the memory manager _output_flat
				110	_memory_group.manage(&_output_flat);
				111
				112	// The normalization kernel stores the result in a flat output tensor
				113	_norm_kernel.configure(&_tmp, &_sum, &_output_flat, beta);
				114
				115	// Reshape the flat output into a the requested (4D) output
				116	_reshape_kernel.configure(&_output_flat, output);
				117
				118	// Allocate the intermediate flat tensors
				119	_input_flat.allocator()->allocate();
				120	_output_flat.allocator()->allocate();
				121	}
				122	else
				123	{
				124	// Softmax 2D case
				125	_norm_kernel.configure(&_tmp, &_sum, output, beta);
				126	}
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	127
				128	// Allocate intermediate buffers
				129	_tmp.allocator()->allocate();
				130	_max.allocator()->allocate();
				131	_sum.allocator()->allocate();
				132	}
				133
Georgios Pinitas	631c41a	2017-12-06 11:53:03 +0000	[diff] [blame]	134	Status CLSoftmaxLayer::validate(const ITensorInfo input, const ITensorInfo output)
Georgios Pinitas	30902ed	2017-11-14 15:32:57 +0000	[diff] [blame]	135	{
Georgios Pinitas	ee8be2d	2017-11-22 12:53:45 +0000	[diff] [blame]	136	ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output);
Giuseppe Rossini	87e896a	2018-08-24 10:24:12 +0100	[diff] [blame]	137	ARM_COMPUTE_RETURN_ERROR_ON_MSG(input->num_dimensions() > 4, "Only up to 4 dimensions are supported");
Georgios Pinitas	30902ed	2017-11-14 15:32:57 +0000	[diff] [blame]	138
				139	// Create intermediate tensor info
				140	DataType tmp_data_type = is_data_type_quantized_asymmetric(input->data_type()) ? DataType::S32 : input->data_type();
Michele Di Giorgio	5cb3773	2018-06-08 18:07:08 +0100	[diff] [blame]	141	TensorInfo tensor_info_tmp(input->clone()->set_data_type(tmp_data_type).set_is_resizable(true));
Georgios Pinitas	30902ed	2017-11-14 15:32:57 +0000	[diff] [blame]	142
				143	TensorShape max_sum_shape = input->tensor_shape();
				144	max_sum_shape.set(0, 1);
Michele Di Giorgio	5cb3773	2018-06-08 18:07:08 +0100	[diff] [blame]	145	TensorInfo tensor_info_max(input->clone()->set_tensor_shape(max_sum_shape).set_is_resizable(true));
				146	TensorInfo tensor_info_sum(input->clone()->set_tensor_shape(max_sum_shape).set_data_type(tmp_data_type).set_quantization_info(QuantizationInfo()).set_is_resizable(true));
Georgios Pinitas	30902ed	2017-11-14 15:32:57 +0000	[diff] [blame]	147
Giuseppe Rossini	87e896a	2018-08-24 10:24:12 +0100	[diff] [blame]	148	const TensorShape shape_flatten = misc::shape_calculator::compute_flatten_shape(input);
				149	TensorInfo tensor_info_flat(input->clone()->set_tensor_shape(shape_flatten).set_is_resizable(true));
				150
				151	if(input->num_dimensions() > 2) // needs flattening
				152	{
				153	ARM_COMPUTE_RETURN_ON_ERROR(CLFlattenLayerKernel::validate(input, &tensor_info_flat));
				154	}
				155
Giorgio Arena	4402cb9	2018-02-15 13:37:40 +0000	[diff] [blame]	156	ARM_COMPUTE_RETURN_ON_ERROR(CLLogits1DMaxShiftExpSumKernel::validate(input, &tensor_info_max, &tensor_info_tmp, &tensor_info_sum));
Georgios Pinitas	30902ed	2017-11-14 15:32:57 +0000	[diff] [blame]	157	ARM_COMPUTE_RETURN_ON_ERROR(CLLogits1DNormKernel::validate(&tensor_info_tmp, &tensor_info_sum, output));
				158
Georgios Pinitas	631c41a	2017-12-06 11:53:03 +0000	[diff] [blame]	159	return Status{};
Georgios Pinitas	30902ed	2017-11-14 15:32:57 +0000	[diff] [blame]	160	}
				161
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	162	void CLSoftmaxLayer::run()
				163	{
Georgios Pinitas	baf174e	2017-09-08 19:47:30 +0100	[diff] [blame]	164	_memory_group.acquire();
Giuseppe Rossini	87e896a	2018-08-24 10:24:12 +0100	[diff] [blame]	165	if(_needs_flattening)
				166	{
				167	CLScheduler::get().enqueue(_flatten_kernel, false);
				168	}
Georgios Pinitas	baf174e	2017-09-08 19:47:30 +0100	[diff] [blame]	169
Giorgio Arena	4402cb9	2018-02-15 13:37:40 +0000	[diff] [blame]	170	CLScheduler::get().enqueue(_max_shift_exp_sum_kernel, false);
Giuseppe Rossini	87e896a	2018-08-24 10:24:12 +0100	[diff] [blame]	171	CLScheduler::get().enqueue(_norm_kernel, !_needs_flattening);
Georgios Pinitas	baf174e	2017-09-08 19:47:30 +0100	[diff] [blame]	172
Giuseppe Rossini	87e896a	2018-08-24 10:24:12 +0100	[diff] [blame]	173	if(_needs_flattening)
				174	{
				175	CLScheduler::get().enqueue(_reshape_kernel, true);
				176	}
				177
				178	// Relase intermediate buffers
Georgios Pinitas	baf174e	2017-09-08 19:47:30 +0100	[diff] [blame]	179	_memory_group.release();
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	180	}
Giuseppe Rossini	87e896a	2018-08-24 10:24:12 +0100	[diff] [blame]	181
				182	} // namespace arm_compute