Blame - src/core/NEON/kernels/NEBatchToSpaceLayerKernel.cpp - ml/ComputeLibrary

blob: 6211abcad0c1189fa7ca4f76fcae4b369652d84c [file] [log] [blame]

giuros01	fc1da13	2019-02-18 16:48:35 +0000	[diff] [blame]	1	/*
				2	* Copyright (c) 2019 ARM Limited.
				3	*
				4	* SPDX-License-Identifier: MIT
				5	*
				6	* Permission is hereby granted, free of charge, to any person obtaining a copy
				7	* of this software and associated documentation files (the "Software"), to
				8	* deal in the Software without restriction, including without limitation the
				9	* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
				10	* sell copies of the Software, and to permit persons to whom the Software is
				11	* furnished to do so, subject to the following conditions:
				12	*
				13	* The above copyright notice and this permission notice shall be included in all
				14	* copies or substantial portions of the Software.
				15	*
				16	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
				17	* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
				18	* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
				19	* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
				20	* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
				21	* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
				22	* SOFTWARE.
				23	*/
				24	#include "arm_compute/core/NEON/kernels/NEBatchToSpaceLayerKernel.h"
				25
				26	#include "arm_compute/core/Helpers.h"
				27	#include "arm_compute/core/ITensor.h"
				28	#include "arm_compute/core/NEON/wrapper/wrapper.h"
				29	#include "arm_compute/core/Types.h"
				30	#include "arm_compute/core/Validate.h"
				31	#include "arm_compute/core/utils/misc/ShapeCalculator.h"
				32	#include <arm_neon.h>
				33	#include <cstdint>
				34
				35	using namespace arm_compute::misc::shape_calculator;
				36
				37	namespace arm_compute
				38	{
				39	namespace
				40	{
				41	Status validate_arguments(const ITensorInfo input, const ITensorInfo block_info, const ITensorInfo *output)
				42	{
				43	ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, block_info, output);
				44	ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(block_info, 1, DataType::S32);
				45	ARM_COMPUTE_RETURN_ERROR_ON(input->num_dimensions() > 4);
				46
				47	// Validate output if initialized
				48	if(output->total_size() != 0)
				49	{
				50	ARM_COMPUTE_RETURN_ERROR_ON(output->num_dimensions() > 4);
				51	ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
				52	}
				53
				54	return Status{};
				55	}
				56	Status validate_arguments_static(const ITensorInfo input, const int block_shape_x, const int block_shape_y, const ITensorInfo output)
				57	{
				58	ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output);
				59	ARM_COMPUTE_RETURN_ERROR_ON(input->num_dimensions() > 4);
				60	ARM_COMPUTE_RETURN_ERROR_ON(block_shape_x <= 0);
				61	ARM_COMPUTE_RETURN_ERROR_ON(block_shape_y <= 0);
				62
				63	const DataLayout data_layout = input->data_layout();
				64	const int idx_batch = get_data_layout_dimension_index(data_layout, DataLayoutDimension::BATCHES);
				65	ARM_COMPUTE_RETURN_ERROR_ON(input->tensor_shape()[idx_batch] % (block_shape_x * block_shape_y) != 0);
giuros01	fc1da13	2019-02-18 16:48:35 +0000	[diff] [blame]	66	// Validate output if initialized
				67	if(output->total_size() != 0)
				68	{
				69	const int idx_width = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH);
				70	const int idx_height = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT);
				71	const int idx_channel = get_data_layout_dimension_index(data_layout, DataLayoutDimension::CHANNEL);
				72	ARM_COMPUTE_RETURN_ERROR_ON(output->tensor_shape()[idx_width] != (block_shape_x * input->tensor_shape()[idx_width]));
Pablo Tello	0a33a21	2019-04-23 12:06:19 +0100	[diff] [blame]	73	ARM_COMPUTE_RETURN_ERROR_ON(output->tensor_shape()[idx_height] != (block_shape_y * input->tensor_shape()[idx_height]));
giuros01	fc1da13	2019-02-18 16:48:35 +0000	[diff] [blame]	74	ARM_COMPUTE_RETURN_ERROR_ON(output->tensor_shape()[idx_channel] != input->tensor_shape()[idx_channel]);
				75	ARM_COMPUTE_RETURN_ERROR_ON(output->num_dimensions() > 4);
				76	ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
				77	}
				78
				79	return Status{};
				80	}
				81	} // namespace
				82
				83	NEBatchToSpaceLayerKernel::NEBatchToSpaceLayerKernel()
				84	: _input(nullptr), _block_shape(nullptr), _output(nullptr), _block_shape_x(), _block_shape_y()
				85	{
				86	}
				87
				88	void NEBatchToSpaceLayerKernel::configure(const ITensor input, const ITensor block_shape, ITensor *output)
				89	{
				90	ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
				91	ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), block_shape->info(), output->info()));
				92
				93	_input = input;
				94	_block_shape = block_shape;
				95	_output = output;
				96
				97	// Configure kernel window
				98	Window win = calculate_max_window(*input->info(), Steps());
				99	ICPPKernel::configure(win);
				100	}
				101
				102	void NEBatchToSpaceLayerKernel::configure(const ITensor input, const int32_t block_shape_x, const int32_t block_shape_y, ITensor output)
				103	{
				104	ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
				105	TensorShape output_shape = compute_batch_to_space_shape(input->info(), block_shape_x, block_shape_y);
				106	// Output auto inizialitation if not yet initialized
				107	auto_init_if_empty(*output->info(), input->info()->clone()->set_tensor_shape(output_shape));
				108
				109	// Perform validation step
				110	ARM_COMPUTE_ERROR_THROW_ON(validate_arguments_static(input->info(), block_shape_x, block_shape_y, output->info()));
				111
				112	_input = input;
				113	_output = output;
				114	_block_shape_x = block_shape_x;
				115	_block_shape_y = block_shape_y;
				116
				117	// Configure kernel window
				118	Window win = calculate_max_window(*input->info(), Steps());
				119	ICPPKernel::configure(win);
				120	}
				121
				122	Status NEBatchToSpaceLayerKernel::validate(const ITensorInfo input, const ITensorInfo block_shape, const ITensorInfo *output)
				123	{
				124	ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, block_shape, output);
				125	ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, block_shape, output));
				126	return Status{};
				127	}
				128
				129	Status NEBatchToSpaceLayerKernel::validate(const ITensorInfo input, const int32_t block_shape_x, const int32_t block_shape_y, const ITensorInfo output)
				130	{
				131	ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output);
				132	ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments_static(input, block_shape_x, block_shape_y, output));
				133	return Status{};
				134	}
				135
				136	void NEBatchToSpaceLayerKernel::run(const Window &window, const ThreadInfo &info)
				137	{
				138	ARM_COMPUTE_UNUSED(info);
				139	ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
				140	ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICPPKernel::window(), window);
				141
				142	if(_block_shape != nullptr)
				143	{
				144	// Retrieve the block shapes dynamically
				145	_block_shape_x = (reinterpret_cast<const int >(_block_shape->ptr_to_element(0)));
				146	_block_shape_y = (reinterpret_cast<const int >(_block_shape->ptr_to_element(1)));
				147	}
				148
				149	const int batch_size = _input->info()->dimension(3);
				150	const int r = (batch_size / (_block_shape_x * _block_shape_y));
				151	const int element_size = _input->info()->element_size();
				152
				153	Window slice_in = window.first_slice_window_3D();
				154	Window slice_out = window.first_slice_window_4D();
				155
				156	// The slice_out slice does not move
				157	slice_out.set(Window::DimX, Window::Dimension(0, 0, 0));
				158	slice_out.set(Window::DimY, Window::Dimension(0, 0, 0));
				159	slice_out.set(Window::DimZ, Window::Dimension(0, 0, 0));
				160	slice_out.set(3, Window::Dimension(0, 0, 0));
				161
				162	int batch_id = 0;
				163	// Main loop for NCHW and NHWC
				164	if(_input->info()->data_layout() == DataLayout::NCHW)
				165	{
				166	do
				167	{
				168	Iterator in(_input, slice_in);
				169	execute_window_loop(slice_in, [&](const Coordinates & id)
				170	{
				171
				172	const int x = id.x();
				173	const int y = id.y();
				174	const int z = id.z();
				175
				176	const int w = batch_id % r;
				177	const int out_x = x * _block_shape_x + (batch_id / r) % _block_shape_x;
				178	const int out_y = y * _block_shape_y + (batch_id / r) / _block_shape_x;
				179	Coordinates output_coords{ out_x, out_y, z, w };
				180	memcpy(_output->ptr_to_element(output_coords), in.ptr(), element_size);
				181	},
				182	in);
				183	++batch_id;
				184	}
				185	while(window.slide_window_slice_3D(slice_in));
				186	}
				187	else
				188	{
				189	do
				190	{
				191	Iterator in(_input, slice_in);
				192	execute_window_loop(slice_in, [&](const Coordinates & id)
				193	{
				194
				195	const int z = id.x();
				196	const int x = id.y();
				197	const int y = id.z();
				198
				199	const int w = batch_id % r;
				200	const int out_x = x * _block_shape_x + (batch_id / r) % _block_shape_x;
				201	const int out_y = y * _block_shape_y + (batch_id / r) / _block_shape_x;
				202	Coordinates output_coords{ z, out_x, out_y, w };
				203	memcpy(_output->ptr_to_element(output_coords), in.ptr(), element_size);
				204	},
				205	in);
				206	++batch_id;
				207	}
				208	while(window.slide_window_slice_3D(slice_in));
				209	}
				210	}
				211	} // namespace arm_compute