Blame - src/runtime/NEON/functions/NEGEMMLowp.cpp - ml/ComputeLibrary

blob: ab7fa079b13562d55305bf3339f2a369adde6438 [file] [log] [blame]

Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	1	/*
				2	* Copyright (c) 2017 ARM Limited.
				3	*
				4	* SPDX-License-Identifier: MIT
				5	*
				6	* Permission is hereby granted, free of charge, to any person obtaining a copy
				7	* of this software and associated documentation files (the "Software"), to
				8	* deal in the Software without restriction, including without limitation the
				9	* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
				10	* sell copies of the Software, and to permit persons to whom the Software is
				11	* furnished to do so, subject to the following conditions:
				12	*
				13	* The above copyright notice and this permission notice shall be included in all
				14	* copies or substantial portions of the Software.
				15	*
				16	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
				17	* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
				18	* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
				19	* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
				20	* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
				21	* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
				22	* SOFTWARE.
				23	*/
				24	#include "arm_compute/runtime/NEON/functions/NEGEMMLowp.h"
				25
				26	#include "arm_compute/core/Error.h"
				27	#include "arm_compute/core/Helpers.h"
				28	#include "arm_compute/core/ITensor.h"
Pablo Tello	bf2fb95	2017-09-29 16:43:25 +0100	[diff] [blame]	29	#include "arm_compute/core/NEON/kernels/arm64/NEGEMMLowpAArch64V8P4Kernel.h"
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	30	#include "arm_compute/core/TensorInfo.h"
				31	#include "arm_compute/core/Types.h"
				32	#include "arm_compute/core/Validate.h"
				33	#include "arm_compute/runtime/NEON/NEScheduler.h"
Gian Marco Iodice	ab18212	2017-10-09 15:05:40 +0100	[diff] [blame^]	34	#include "arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.h"
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	35	#include "arm_compute/runtime/TensorAllocator.h"
Pablo Tello	bf2fb95	2017-09-29 16:43:25 +0100	[diff] [blame]	36	#include "support/ToolchainSupport.h"
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	37
				38	using namespace arm_compute;
				39
Georgios Pinitas	658039b	2017-09-15 16:30:50 +0100	[diff] [blame]	40	NEGEMMLowp::NEGEMMLowp(std::shared_ptr<IMemoryManager> memory_manager)
Gian Marco Iodice	ab18212	2017-10-09 15:05:40 +0100	[diff] [blame^]	41	: _memory_group(std::move(memory_manager)), _mm_func(), _mtx_a_reduction_kernel(), _mtx_b_reduction_kernel(), _finalize_kernel(), _vector_sum_col(), _vector_sum_row(), _mm_output(), _a_offset(0),
				42	_b_offset(0)
Anthony Barbier	6ff3b19	2017-09-04 18:44:23 +0100	[diff] [blame]	43	{
				44	}
				45
Gian Marco Iodice	ab18212	2017-10-09 15:05:40 +0100	[diff] [blame^]	46	void NEGEMMLowp::configure(const ITensor a, const ITensor b, ITensor *output, int32_t a_offset, int32_t b_offset, int32_t c_offset, int32_t output_mult_int, int32_t shift)
Pablo Tello	bf2fb95	2017-09-29 16:43:25 +0100	[diff] [blame]	47	{
Gian Marco Iodice	ab18212	2017-10-09 15:05:40 +0100	[diff] [blame^]	48	ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN((a), 1, DataType::U8);
				49	ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(a, b, output);
				50	ARM_COMPUTE_ERROR_ON_MSG((a)->info()->dimension(0) != (b)->info()->dimension(1), "The product AB is defined only if the number of columns in A is equal to the number of rows in B");
				51	ARM_COMPUTE_ERROR_ON_MSG((a)->info()->dimension(1) != (output)->info()->dimension(1), "The output matrix must have the same number of rows as the matrix A");
				52	ARM_COMPUTE_ERROR_ON_MSG((b)->info()->dimension(0) != (output)->info()->dimension(0), "The output matrix must have the same number of columns as the matrix B");
Pablo Tello	bf2fb95	2017-09-29 16:43:25 +0100	[diff] [blame]	53
Gian Marco Iodice	ab18212	2017-10-09 15:05:40 +0100	[diff] [blame^]	54	_a_offset = a_offset;
				55	_b_offset = b_offset;
				56
				57	// Initialize matrix multiply output tensor
				58	const TensorShape &shape_mm_output = output->info()->tensor_shape();
				59	TensorInfo info_mm_output(shape_mm_output, 1, DataType::S32);
				60	_mm_output.allocator()->init(info_mm_output);
				61	_memory_group.manage(&_mm_output);
				62
				63	// Initialize Matrix B reduction kernel only if _a_offset is not equal to 0
				64	if(_a_offset != 0)
Pablo Tello	bf2fb95	2017-09-29 16:43:25 +0100	[diff] [blame]	65	{
Gian Marco Iodice	ab18212	2017-10-09 15:05:40 +0100	[diff] [blame^]	66	TensorShape shape_vector_sum_col = b->info()->tensor_shape();
				67	shape_vector_sum_col.remove_dimension(1);
				68	TensorInfo info_vector_sum_col(shape_vector_sum_col, 1, DataType::S32);
				69	_vector_sum_col.allocator()->init(info_vector_sum_col);
				70	_memory_group.manage(&_vector_sum_col);
Pablo Tello	bf2fb95	2017-09-29 16:43:25 +0100	[diff] [blame]	71
Gian Marco Iodice	ab18212	2017-10-09 15:05:40 +0100	[diff] [blame^]	72	// Configure Matrix B reduction kernel
				73	_mtx_b_reduction_kernel.configure(b, &_vector_sum_col, a->info()->dimension(0), false);
Pablo Tello	bf2fb95	2017-09-29 16:43:25 +0100	[diff] [blame]	74	}
Gian Marco Iodice	ab18212	2017-10-09 15:05:40 +0100	[diff] [blame^]	75
				76	// Initialize Matrix A reduction kernel only if _b_offset is not equal to 0
				77	if(_b_offset != 0)
Pablo Tello	bf2fb95	2017-09-29 16:43:25 +0100	[diff] [blame]	78	{
Gian Marco Iodice	ab18212	2017-10-09 15:05:40 +0100	[diff] [blame^]	79	TensorShape shape_vector_sum_row = a->info()->tensor_shape();
				80	shape_vector_sum_row.set(Window::DimX, a->info()->dimension(1));
				81	shape_vector_sum_row.remove_dimension(1);
				82	TensorInfo info_vector_sum_row(shape_vector_sum_row, 1, DataType::S32);
				83	_vector_sum_row.allocator()->init(info_vector_sum_row);
				84	_memory_group.manage(&_vector_sum_row);
				85
				86	// Configure Matrix A reduction kernel
				87	_mtx_a_reduction_kernel.configure(a, &_vector_sum_row, a->info()->dimension(0), false);
				88	}
				89
				90	// Configure matrix multiply function
				91	_mm_func.configure(a, b, &_mm_output);
				92
				93	// Configure finalize kernel
				94	_finalize_kernel.configure(_a_offset == 0 ? nullptr : &_vector_sum_col, _b_offset == 0 ? nullptr : &_vector_sum_row, &_mm_output, output, a->info()->dimension(0), a_offset, b_offset, c_offset,
				95	output_mult_int, shift);
				96
				97	// Allocate tensors
				98	_mm_output.allocator()->allocate();
				99
				100	if(_a_offset != 0)
				101	{
				102	_vector_sum_col.allocator()->allocate();
				103	}
				104
				105	if(_b_offset != 0)
				106	{
				107	_vector_sum_row.allocator()->allocate();
Pablo Tello	bf2fb95	2017-09-29 16:43:25 +0100	[diff] [blame]	108	}
				109	}
				110
				111	void NEGEMMLowp::run()
				112	{
				113	_memory_group.acquire();
				114
Gian Marco Iodice	ab18212	2017-10-09 15:05:40 +0100	[diff] [blame^]	115	// Run matrix A reduction kernel only if _b_offset is not equal to 0
				116	if(_b_offset != 0)
Pablo Tello	bf2fb95	2017-09-29 16:43:25 +0100	[diff] [blame]	117	{
Gian Marco Iodice	ab18212	2017-10-09 15:05:40 +0100	[diff] [blame^]	118	NEScheduler::get().schedule(&_mtx_a_reduction_kernel, Window::DimX);
Pablo Tello	bf2fb95	2017-09-29 16:43:25 +0100	[diff] [blame]	119	}
Gian Marco Iodice	ab18212	2017-10-09 15:05:40 +0100	[diff] [blame^]	120
				121	// Run matrix B reduction kernel only if _a_offset is not equal to 0
				122	if(_a_offset != 0)
Pablo Tello	bf2fb95	2017-09-29 16:43:25 +0100	[diff] [blame]	123	{
Gian Marco Iodice	ab18212	2017-10-09 15:05:40 +0100	[diff] [blame^]	124	NEScheduler::get().schedule(&_mtx_b_reduction_kernel, Window::DimX);
Pablo Tello	bf2fb95	2017-09-29 16:43:25 +0100	[diff] [blame]	125	}
				126
Gian Marco Iodice	ab18212	2017-10-09 15:05:40 +0100	[diff] [blame^]	127	// Run matrix multiply core function
				128	_mm_func.run();
				129
				130	// Run finalise kernel
				131	NEScheduler::get().schedule(&_finalize_kernel, Window::DimY);
				132
Pablo Tello	bf2fb95	2017-09-29 16:43:25 +0100	[diff] [blame]	133	_memory_group.release();
Gian Marco Iodice	ab18212	2017-10-09 15:05:40 +0100	[diff] [blame^]	134	}