Blame - src/runtime/NEON/functions/NEWinogradConvolutionLayer.cpp - ml/ComputeLibrary

blob: 1cb2458e134f798726ecde9af56278e23f18b880 [file] [log] [blame]

Pablo Tello	8951933	2017-11-17 11:52:36 +0000	[diff] [blame]	1	/*
Michele Di Giorgio	d9eaf61	2020-07-08 11:12:57 +0100	[diff] [blame]	2	* Copyright (c) 2017-2020 Arm Limited.
Pablo Tello	8951933	2017-11-17 11:52:36 +0000	[diff] [blame]	3	*
				4	* SPDX-License-Identifier: MIT
				5	*
				6	* Permission is hereby granted, free of charge, to any person obtaining a copy
				7	* of this software and associated documentation files (the "Software"), to
				8	* deal in the Software without restriction, including without limitation the
				9	* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
				10	* sell copies of the Software, and to permit persons to whom the Software is
				11	* furnished to do so, subject to the following conditions:
				12	*
				13	* The above copyright notice and this permission notice shall be included in all
				14	* copies or substantial portions of the Software.
				15	*
				16	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
				17	* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
				18	* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
				19	* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
				20	* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
				21	* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
				22	* SOFTWARE.
				23	*/
Georgios Pinitas	9fb1159	2018-04-26 20:34:58 +0100	[diff] [blame]	24	#include "arm_compute/runtime/NEON/functions/NEWinogradConvolutionLayer.h"
Pablo Tello	8951933	2017-11-17 11:52:36 +0000	[diff] [blame]	25
Isabella Gottardi	6acc6ad	2018-02-02 17:19:18 +0000	[diff] [blame]	26	#include "arm_compute/core/Error.h"
Pablo Tello	8951933	2017-11-17 11:52:36 +0000	[diff] [blame]	27	#include "arm_compute/core/Utils.h"
				28	#include "arm_compute/core/Validate.h"
Vidhya Sudhan Loganathan	3ca9786	2018-04-23 08:20:04 +0100	[diff] [blame]	29	#include "arm_compute/core/utils/misc/ShapeCalculator.h"
Pablo Tello	8951933	2017-11-17 11:52:36 +0000	[diff] [blame]	30	#include "arm_compute/runtime/NEON/NEScheduler.h"
Anthony Barbier	71d9b57	2018-07-06 17:05:59 +0100	[diff] [blame]	31	#include "arm_compute/runtime/NEON/functions/NEGEMMAssemblyDispatch.h"
Sang-Hoon Park	68dd25f	2020-10-19 16:00:11 +0100	[diff] [blame]	32	#include "src/core/CPP/Validate.h"
Michalis Spyrou	ebcebf1	2020-10-21 00:04:14 +0100	[diff] [blame]	33	#include "src/core/NEON/kernels/NEGEMMInterleave4x4Kernel.h"
				34	#include "src/core/NEON/kernels/NEGEMMMatrixAdditionKernel.h"
				35	#include "src/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.h"
				36	#include "src/core/NEON/kernels/NEGEMMTranspose1xWKernel.h"
Michele Di Giorgio	6ad60af	2020-06-09 14:52:15 +0100	[diff] [blame]	37	#include "src/core/NEON/kernels/NEWinogradConvolutionLayerKernel.h"
Matthew Bentham	9204646	2020-03-07 22:15:55 +0000	[diff] [blame]	38	#include "support/MemorySupport.h"
Pablo Tello	8951933	2017-11-17 11:52:36 +0000	[diff] [blame]	39
Sang-Hoon Park	68dd25f	2020-10-19 16:00:11 +0100	[diff] [blame]	40	#include "src/core/NEON/kernels/convolution/common/utils.hpp"
Michele Di Giorgio	6ad60af	2020-06-09 14:52:15 +0100	[diff] [blame]	41	#include "src/core/NEON/kernels/convolution/winograd/winograd.hpp"
Pablo Tello	d6ca478	2018-01-23 09:36:04 +0000	[diff] [blame]	42
Pablo Tello	8951933	2017-11-17 11:52:36 +0000	[diff] [blame]	43	namespace arm_compute
				44	{
Isabella Gottardi	6acc6ad	2018-02-02 17:19:18 +0000	[diff] [blame]	45	namespace
				46	{
Pablo Tello	000d33a	2018-09-03 16:59:20 +0100	[diff] [blame]	47	inline Status validate_kernel_3x3(const Size2D input_dims, const ITensorInfo input, const TensorInfo input0, const TensorInfo input1, const TensorInfo batched_mm_output,
				48	const ITensorInfo weights, const ITensorInfo biases, const ITensorInfo *output, const WinogradInfo &winograd_info, const ActivationLayerInfo &act_info)
				49	{
Georgios Pinitas	5ce897f	2020-04-29 11:44:10 +0100	[diff] [blame]	50	ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input);
				51	ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16, DataType::F32);
				52
				53	if(input->data_type() == DataType::F32)
Pablo Tello	000d33a	2018-09-03 16:59:20 +0100	[diff] [blame]	54	{
Georgios Pinitas	5ce897f	2020-04-29 11:44:10 +0100	[diff] [blame]	55	if(input_dims.width > 4 && input_dims.height > 4)
				56	{
				57	ARM_COMPUTE_RETURN_ON_ERROR((NEWinogradLayerTransformInputKernel<float, 4, 4, 3, 3>::validate(input, input0, winograd_info)));
				58	ARM_COMPUTE_RETURN_ON_ERROR((NEWinogradLayerTransformWeightsKernel<float, 4, 4, 3, 3>::validate(weights, input1, winograd_info)));
				59	ARM_COMPUTE_RETURN_ON_ERROR((NEWinogradLayerTransformOutputKernel<float, 4, 4, 3, 3>::validate(batched_mm_output, biases, output, winograd_info)));
				60	}
				61	else
				62	{
				63	ARM_COMPUTE_RETURN_ON_ERROR((NEWinogradLayerTransformInputKernel<float, 2, 2, 3, 3>::validate(input, input0, winograd_info)));
				64	ARM_COMPUTE_RETURN_ON_ERROR((NEWinogradLayerTransformWeightsKernel<float, 2, 2, 3, 3>::validate(weights, input1, winograd_info)));
				65	ARM_COMPUTE_RETURN_ON_ERROR((NEWinogradLayerTransformOutputKernel<float, 2, 2, 3, 3>::validate(batched_mm_output, biases, output, winograd_info)));
				66	}
Pablo Tello	000d33a	2018-09-03 16:59:20 +0100	[diff] [blame]	67	}
Georgios Pinitas	5ce897f	2020-04-29 11:44:10 +0100	[diff] [blame]	68	#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
SiCong Li	6b6a16f	2020-05-28 08:55:51 +0100	[diff] [blame]	69	else if(input->data_type() == DataType::F16)
Pablo Tello	000d33a	2018-09-03 16:59:20 +0100	[diff] [blame]	70	{
Georgios Pinitas	5ce897f	2020-04-29 11:44:10 +0100	[diff] [blame]	71	ARM_COMPUTE_RETURN_ON_ERROR((NEWinogradLayerTransformInputKernel<__fp16, 4, 4, 3, 3>::validate(input, input0, winograd_info)));
				72	ARM_COMPUTE_RETURN_ON_ERROR((NEWinogradLayerTransformWeightsKernel<__fp16, 4, 4, 3, 3>::validate(weights, input1, winograd_info)));
				73	ARM_COMPUTE_RETURN_ON_ERROR((NEWinogradLayerTransformOutputKernel<__fp16, 4, 4, 3, 3>::validate(batched_mm_output, biases, output, winograd_info)));
Pablo Tello	000d33a	2018-09-03 16:59:20 +0100	[diff] [blame]	74	}
Georgios Pinitas	5ce897f	2020-04-29 11:44:10 +0100	[diff] [blame]	75	#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
Pablo Tello	000d33a	2018-09-03 16:59:20 +0100	[diff] [blame]	76
				77	if(act_info.enabled())
				78	{
				79	NEActivationLayer::validate(output, nullptr, act_info);
				80	}
				81	return Status{};
				82	}
				83
				84	inline Status validate_kernel_5x5(const ITensorInfo input, const TensorInfo input0, const TensorInfo input1, const TensorInfo batched_mm_output,
				85	const ITensorInfo weights, const ITensorInfo biases, const ITensorInfo *output, const WinogradInfo &winograd_info, const ActivationLayerInfo &act_info)
				86	{
				87	ARM_COMPUTE_RETURN_ON_ERROR((NEWinogradLayerTransformInputKernel<float, 2, 2, 5, 5>::validate(input, input0, winograd_info)));
				88	ARM_COMPUTE_RETURN_ON_ERROR((NEWinogradLayerTransformWeightsKernel<float, 2, 2, 5, 5>::validate(weights, input1, winograd_info)));
				89	ARM_COMPUTE_RETURN_ON_ERROR((NEWinogradLayerTransformOutputKernel<float, 2, 2, 5, 5>::validate(batched_mm_output, biases, output, winograd_info)));
				90	if(act_info.enabled())
				91	{
				92	NEActivationLayer::validate(output, nullptr, act_info);
				93	}
				94	return Status{};
				95	}
				96
				97	inline Status validate_kernel_3x1(const ITensorInfo input, const TensorInfo input0, const TensorInfo input1, const TensorInfo batched_mm_output,
				98	const ITensorInfo weights, const ITensorInfo biases, const ITensorInfo *output, const WinogradInfo &winograd_info, const ActivationLayerInfo &act_info)
				99	{
Georgios Pinitas	5ce897f	2020-04-29 11:44:10 +0100	[diff] [blame]	100	ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F32);
Pablo Tello	000d33a	2018-09-03 16:59:20 +0100	[diff] [blame]	101	ARM_COMPUTE_RETURN_ON_ERROR((NEWinogradLayerTransformInputKernel<float, 1, 6, 1, 3>::validate(input, input0, winograd_info)));
				102	ARM_COMPUTE_RETURN_ON_ERROR((NEWinogradLayerTransformWeightsKernel<float, 1, 6, 1, 3>::validate(weights, input1, winograd_info)));
				103	ARM_COMPUTE_RETURN_ON_ERROR((NEWinogradLayerTransformOutputKernel<float, 1, 6, 1, 3>::validate(batched_mm_output, biases, output, winograd_info)));
				104	if(act_info.enabled())
				105	{
				106	NEActivationLayer::validate(output, nullptr, act_info);
				107	}
				108	return Status{};
				109	}
				110
				111	inline Status validate_kernel_1x3(const ITensorInfo input, const TensorInfo input0, const TensorInfo input1, const TensorInfo batched_mm_output,
				112	const ITensorInfo weights, const ITensorInfo biases, const ITensorInfo *output, const WinogradInfo &winograd_info, const ActivationLayerInfo &act_info)
				113	{
Georgios Pinitas	5ce897f	2020-04-29 11:44:10 +0100	[diff] [blame]	114	ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F32);
Pablo Tello	000d33a	2018-09-03 16:59:20 +0100	[diff] [blame]	115	ARM_COMPUTE_RETURN_ON_ERROR((NEWinogradLayerTransformInputKernel<float, 6, 1, 3, 1>::validate(input, input0, winograd_info)));
				116	ARM_COMPUTE_RETURN_ON_ERROR((NEWinogradLayerTransformWeightsKernel<float, 6, 1, 3, 1>::validate(weights, input1, winograd_info)));
				117	ARM_COMPUTE_RETURN_ON_ERROR((NEWinogradLayerTransformOutputKernel<float, 6, 1, 3, 1>::validate(batched_mm_output, biases, output, winograd_info)));
				118
				119	if(act_info.enabled())
				120	{
				121	NEActivationLayer::validate(output, nullptr, act_info);
				122	}
				123	return Status{};
				124	}
				125
				126	inline Status validate_kernel_5x1(const ITensorInfo input, const TensorInfo input0, const TensorInfo input1, const TensorInfo batched_mm_output,
				127	const ITensorInfo weights, const ITensorInfo biases, const ITensorInfo *output, const WinogradInfo &winograd_info, const ActivationLayerInfo &act_info)
				128	{
Georgios Pinitas	5ce897f	2020-04-29 11:44:10 +0100	[diff] [blame]	129	ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F32);
Pablo Tello	000d33a	2018-09-03 16:59:20 +0100	[diff] [blame]	130	ARM_COMPUTE_RETURN_ON_ERROR((NEWinogradLayerTransformInputKernel<float, 1, 4, 1, 5>::validate(input, input0, winograd_info)));
				131	ARM_COMPUTE_RETURN_ON_ERROR((NEWinogradLayerTransformWeightsKernel<float, 1, 4, 1, 5>::validate(weights, input1, winograd_info)));
				132	ARM_COMPUTE_RETURN_ON_ERROR((NEWinogradLayerTransformOutputKernel<float, 1, 4, 1, 5>::validate(batched_mm_output, biases, output, winograd_info)));
				133	if(act_info.enabled())
				134	{
				135	NEActivationLayer::validate(output, nullptr, act_info);
				136	}
				137	return Status{};
				138	}
				139	inline Status validate_kernel_1x5(const ITensorInfo input, const TensorInfo input0, const TensorInfo input1, const TensorInfo batched_mm_output,
				140	const ITensorInfo weights, const ITensorInfo biases, const ITensorInfo *output, const WinogradInfo &winograd_info, const ActivationLayerInfo &act_info)
				141	{
Georgios Pinitas	5ce897f	2020-04-29 11:44:10 +0100	[diff] [blame]	142	ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F32);
Pablo Tello	000d33a	2018-09-03 16:59:20 +0100	[diff] [blame]	143	ARM_COMPUTE_RETURN_ON_ERROR((NEWinogradLayerTransformInputKernel<float, 4, 1, 5, 1>::validate(input, input0, winograd_info)));
				144	ARM_COMPUTE_RETURN_ON_ERROR((NEWinogradLayerTransformWeightsKernel<float, 4, 1, 5, 1>::validate(weights, input1, winograd_info)));
				145	ARM_COMPUTE_RETURN_ON_ERROR((NEWinogradLayerTransformOutputKernel<float, 4, 1, 5, 1>::validate(batched_mm_output, biases, output, winograd_info)));
				146	if(act_info.enabled())
				147	{
				148	NEActivationLayer::validate(output, nullptr, act_info);
				149	}
				150	return Status{};
				151	}
				152
				153	inline Status validate_kernel_7x1(const ITensorInfo input, const TensorInfo input0, const TensorInfo input1, const TensorInfo batched_mm_output,
				154	const ITensorInfo weights, const ITensorInfo biases, const ITensorInfo *output, const WinogradInfo &winograd_info, const ActivationLayerInfo &act_info)
				155	{
Georgios Pinitas	5ce897f	2020-04-29 11:44:10 +0100	[diff] [blame]	156	ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F32);
Pablo Tello	000d33a	2018-09-03 16:59:20 +0100	[diff] [blame]	157	ARM_COMPUTE_RETURN_ON_ERROR((NEWinogradLayerTransformInputKernel<float, 1, 2, 1, 7>::validate(input, input0, winograd_info)));
				158	ARM_COMPUTE_RETURN_ON_ERROR((NEWinogradLayerTransformWeightsKernel<float, 1, 2, 1, 7>::validate(weights, input1, winograd_info)));
				159	ARM_COMPUTE_RETURN_ON_ERROR((NEWinogradLayerTransformOutputKernel<float, 1, 2, 1, 7>::validate(batched_mm_output, biases, output, winograd_info)));
				160	if(act_info.enabled())
				161	{
				162	NEActivationLayer::validate(output, nullptr, act_info);
				163	}
				164	return Status{};
				165	}
				166
				167	inline Status validate_kernel_1x7(const ITensorInfo input, const TensorInfo input0, const TensorInfo input1, const TensorInfo batched_mm_output,
				168	const ITensorInfo weights, const ITensorInfo biases, const ITensorInfo *output, const WinogradInfo &winograd_info, const ActivationLayerInfo &act_info)
				169	{
Georgios Pinitas	5ce897f	2020-04-29 11:44:10 +0100	[diff] [blame]	170	ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F32);
Pablo Tello	000d33a	2018-09-03 16:59:20 +0100	[diff] [blame]	171	ARM_COMPUTE_RETURN_ON_ERROR((NEWinogradLayerTransformInputKernel<float, 2, 1, 7, 1>::validate(input, input0, winograd_info)));
				172	ARM_COMPUTE_RETURN_ON_ERROR((NEWinogradLayerTransformWeightsKernel<float, 2, 1, 7, 1>::validate(weights, input1, winograd_info)));
				173	ARM_COMPUTE_RETURN_ON_ERROR((NEWinogradLayerTransformOutputKernel<float, 2, 1, 7, 1>::validate(batched_mm_output, biases, output, winograd_info)));
				174
				175	if(act_info.enabled())
				176	{
				177	NEActivationLayer::validate(output, nullptr, act_info);
				178	}
				179	return Status{};
				180	}
				181
Vidhya Sudhan Loganathan	cb0010b	2018-05-11 16:23:53 +0100	[diff] [blame]	182	inline Tensor4DShape internal_get_input_shape(const arm_compute::ITensor *input)
				183	{
				184	const DataLayout data_layout = input->info()->data_layout();
				185	const int in_width = input->info()->dimension(get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH));
				186	const int in_height = input->info()->dimension(get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT));
				187	const int in_channels = input->info()->dimension(get_data_layout_dimension_index(data_layout, DataLayoutDimension::CHANNEL));
				188	const int in_batches = input->info()->dimension(3);
				189
Michalis Spyrou	a4f378d	2019-04-26 14:54:54 +0100	[diff] [blame]	190	return Tensor4DShape{ in_batches, in_height, in_width, in_channels };
Vidhya Sudhan Loganathan	cb0010b	2018-05-11 16:23:53 +0100	[diff] [blame]	191	}
				192
Isabella Gottardi	6acc6ad	2018-02-02 17:19:18 +0000	[diff] [blame]	193	Status validate_arguments(const ITensorInfo input, const ITensorInfo weights, const ITensorInfo biases, const ITensorInfo output, const PadStrideInfo &conv_info)
				194	{
Vidhya Sudhan Loganathan	cb0010b	2018-05-11 16:23:53 +0100	[diff] [blame]	195	ARM_COMPUTE_UNUSED(output);
Georgios Pinitas	5ce897f	2020-04-29 11:44:10 +0100	[diff] [blame]	196	ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(input);
				197
Vidhya Sudhan Loganathan	cb0010b	2018-05-11 16:23:53 +0100	[diff] [blame]	198	ARM_COMPUTE_RETURN_ERROR_ON_MSG(conv_info.stride().first != 1 \|\| conv_info.stride().second != 1, "Winograd layer only supports unit strides.");
Isabella Gottardi	6acc6ad	2018-02-02 17:19:18 +0000	[diff] [blame]	199	if(biases != nullptr)
				200	{
				201	ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, biases);
				202	ARM_COMPUTE_RETURN_ERROR_ON(biases->num_dimensions() > 1);
				203	}
Georgios Pinitas	5ce897f	2020-04-29 11:44:10 +0100	[diff] [blame]	204	return INEWinogradLayerTransformWeightsKernel::validate(input, weights);
Isabella Gottardi	6acc6ad	2018-02-02 17:19:18 +0000	[diff] [blame]	205	}
Giorgio Arena	a3221e6	2018-05-03 15:57:48 +0100	[diff] [blame]	206
Georgios Pinitas	5ce897f	2020-04-29 11:44:10 +0100	[diff] [blame]	207	Size2D winograd_output_tile(const Size2D &input_dims, const Size2D &kernel_dims, DataType data_type)
Giorgio Arena	a3221e6	2018-05-03 15:57:48 +0100	[diff] [blame]	208	{
				209	Size2D output_tile = Size2D{};
Giorgio Arena	a3221e6	2018-05-03 15:57:48 +0100	[diff] [blame]	210	if(kernel_dims == Size2D(3U, 3U))
				211	{
giuros01	f44fe3d	2019-08-14 16:49:27 +0100	[diff] [blame]	212	output_tile = (input_dims.width <= 4 \|\| input_dims.height <= 4) ? Size2D(2U, 2U) : Size2D(4U, 4U);
Georgios Pinitas	5ce897f	2020-04-29 11:44:10 +0100	[diff] [blame]	213	if(data_type == DataType::F16)
				214	{
				215	output_tile = Size2D(4U, 4U);
				216	}
Giorgio Arena	a3221e6	2018-05-03 15:57:48 +0100	[diff] [blame]	217	}
				218	else if(kernel_dims == Size2D(5U, 5U))
				219	{
				220	output_tile = Size2D(2U, 2U);
				221	}
Pablo Tello	bda6e4b	2018-08-22 11:40:33 +0100	[diff] [blame]	222	else if(kernel_dims == Size2D(1U, 3U))
				223	{
				224	output_tile = Size2D(1U, 6U);
				225	}
				226	else if(kernel_dims == Size2D(3U, 1U))
				227	{
				228	output_tile = Size2D(6U, 1U);
				229	}
Pablo Tello	000d33a	2018-09-03 16:59:20 +0100	[diff] [blame]	230	else if(kernel_dims == Size2D(1U, 5U))
				231	{
				232	output_tile = Size2D(1U, 4U);
				233	}
				234	else if(kernel_dims == Size2D(5U, 1U))
				235	{
				236	output_tile = Size2D(4U, 1U);
				237	}
				238	else if(kernel_dims == Size2D(7U, 1U))
				239	{
				240	output_tile = Size2D(2U, 1U);
				241	}
				242	else if(kernel_dims == Size2D(1U, 7U))
				243	{
				244	output_tile = Size2D(1U, 2U);
				245	}
Giorgio Arena	a3221e6	2018-05-03 15:57:48 +0100	[diff] [blame]	246	return output_tile;
				247	}
				248
Georgios Pinitas	5ce897f	2020-04-29 11:44:10 +0100	[diff] [blame]	249	bool check_support_fast_math(const Size2D &output_tile, const Size2D &kernel_size, DataType data_type)
Giorgio Arena	a3221e6	2018-05-03 15:57:48 +0100	[diff] [blame]	250	{
				251	// Check if we want to configure a Winograd configuration which requires fast math
				252	using WinogradConfiguration = std::pair<std::pair<int, int>, std::pair<int, int>>;
				253
Georgios Pinitas	5ce897f	2020-04-29 11:44:10 +0100	[diff] [blame]	254	const std::vector<WinogradConfiguration> fast_math_winograd_f16 =
				255	{
				256	WinogradConfiguration(std::pair<int, int>(4, 4), std::pair<int, int>(3, 3))
				257	};
				258
				259	const std::vector<WinogradConfiguration> fast_math_winograd_f32 =
Giorgio Arena	a3221e6	2018-05-03 15:57:48 +0100	[diff] [blame]	260	{
				261	WinogradConfiguration(std::pair<int, int>(2, 2), std::pair<int, int>(5, 5)),
				262	WinogradConfiguration(std::pair<int, int>(4, 4), std::pair<int, int>(5, 5))
				263	};
				264
				265	auto p = std::make_pair(std::pair<int, int>(output_tile.width, output_tile.height),
				266	std::pair<int, int>(kernel_size.width, kernel_size.height));
				267
Georgios Pinitas	5ce897f	2020-04-29 11:44:10 +0100	[diff] [blame]	268	switch(data_type)
				269	{
				270	case DataType::F16:
				271	return std::find(fast_math_winograd_f16.begin(), fast_math_winograd_f16.end(), p) != fast_math_winograd_f16.end();
				272	case DataType::F32:
				273	return std::find(fast_math_winograd_f32.begin(), fast_math_winograd_f32.end(), p) != fast_math_winograd_f32.end();
				274	default:
				275	return false;
				276	}
Giorgio Arena	a3221e6	2018-05-03 15:57:48 +0100	[diff] [blame]	277	}
Pablo Tello	7df2786	2018-05-30 11:44:26 +0100	[diff] [blame]	278
Pablo Tello	5264b7d	2019-10-21 14:25:41 +0100	[diff] [blame]	279	inline bool fuse_function_supported(const ActivationLayerInfo &act_info)
				280	{
Matthew Bentham	9204646	2020-03-07 22:15:55 +0000	[diff] [blame]	281	return act_info.activation() == ActivationLayerInfo::ActivationFunction::RELU \|\| act_info.activation() == ActivationLayerInfo::ActivationFunction::BOUNDED_RELU;
Pablo Tello	5264b7d	2019-10-21 14:25:41 +0100	[diff] [blame]	282	}
				283
				284	arm_gemm::Activation arm_gemm_activation_from_acl_activation(const ActivationLayerInfo &act_info)
				285	{
Matthew Bentham	9204646	2020-03-07 22:15:55 +0000	[diff] [blame]	286	switch(act_info.activation())
				287	{
				288	case ActivationLayerInfo::ActivationFunction::RELU:
Pablo Tello	5264b7d	2019-10-21 14:25:41 +0100	[diff] [blame]	289	{
Matthew Bentham	9204646	2020-03-07 22:15:55 +0000	[diff] [blame]	290	return arm_gemm::Activation(arm_gemm::Activation::Type::ReLU, act_info.a(), act_info.b());
Pablo Tello	5264b7d	2019-10-21 14:25:41 +0100	[diff] [blame]	291	}
Matthew Bentham	9204646	2020-03-07 22:15:55 +0000	[diff] [blame]	292	case ActivationLayerInfo::ActivationFunction::BOUNDED_RELU:
				293	{
				294	return arm_gemm::Activation(arm_gemm::Activation::Type::BoundedReLU, act_info.a(), act_info.b());
				295	}
				296	default:
				297	{
				298	return arm_gemm::Activation(arm_gemm::Activation::Type::None);
				299	}
				300	}
Pablo Tello	5264b7d	2019-10-21 14:25:41 +0100	[diff] [blame]	301	}
Isabella Gottardi	6acc6ad	2018-02-02 17:19:18 +0000	[diff] [blame]	302	} //namespace
				303
Michalis Spyrou	a4f378d	2019-04-26 14:54:54 +0100	[diff] [blame]	304	NEWinogradConvolutionLayer::NEWinogradConvolutionLayer(const std::shared_ptr<IMemoryManager> &memory_manager)
Pablo Tello	a518f30	2018-09-19 11:33:03 +0100	[diff] [blame]	305	: _memory_group(memory_manager), _gemm_function(memory_manager), _transform_input_kernel(nullptr), _transform_output_kernel(nullptr), _transform_weights_kernel(nullptr), _activationlayer_function(),
Pablo Tello	8f43d74	2019-03-27 09:28:32 +0000	[diff] [blame]	306	_permute_input(), _permute_weights(), _permute_output(), _input_transformed(), _output_transformed(), _input_workspace(), _output_workspace(), _kernel_storage(), _input_nhwc(), _output_nhwc(),
				307	_weights_hwio(), _input(), _weights(), _output(), _is_prepared(false), _is_activationlayer_enabled(false)
Pablo Tello	8951933	2017-11-17 11:52:36 +0000	[diff] [blame]	308	{
Pablo Tello	8f43d74	2019-03-27 09:28:32 +0000	[diff] [blame]	309	}
Pablo Tello	8951933	2017-11-17 11:52:36 +0000	[diff] [blame]	310
Giorgio Arena	a3221e6	2018-05-03 15:57:48 +0100	[diff] [blame]	311	void NEWinogradConvolutionLayer::configure(const ITensor input, const ITensor weights, const ITensor biases, ITensor output, const PadStrideInfo &conv_info, const ActivationLayerInfo &act_info,
				312	bool enable_fast_math)
Pablo Tello	8951933	2017-11-17 11:52:36 +0000	[diff] [blame]	313	{
Andrew Mundy	4d9379a	2018-03-15 16:47:03 +0000	[diff] [blame]	314	ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, output);
Andrew Mundy	4d9379a	2018-03-15 16:47:03 +0000	[diff] [blame]	315	ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), weights->info(), (biases != nullptr) ? biases->info() : nullptr, output->info(), conv_info));
Pablo Tello	8951933	2017-11-17 11:52:36 +0000	[diff] [blame]	316
Vidhya Sudhan Loganathan	cb0010b	2018-05-11 16:23:53 +0100	[diff] [blame]	317	// Get indices for the width and height
				318	const DataLayout data_layout = input->info()->data_layout();
				319	const unsigned int width_idx = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH);
				320	const unsigned int height_idx = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT);
				321	const unsigned int channel_idx = get_data_layout_dimension_index(data_layout, DataLayoutDimension::CHANNEL);
				322
Georgios Pinitas	5ce897f	2020-04-29 11:44:10 +0100	[diff] [blame]	323	const Size2D input_dims = Size2D(input->info()->dimension(width_idx), input->info()->dimension(height_idx));
				324	const Size2D kernel_size = Size2D(weights->info()->dimension(width_idx), weights->info()->dimension(height_idx));
				325	const DataType data_type = input->info()->data_type();
				326	const Size2D output_tile = winograd_output_tile(input_dims, kernel_size, data_type);
Giorgio Arena	a3221e6	2018-05-03 15:57:48 +0100	[diff] [blame]	327
				328	// Check if the Winograd configuration requires fast math
				329	if(!enable_fast_math)
				330	{
Georgios Pinitas	5ce897f	2020-04-29 11:44:10 +0100	[diff] [blame]	331	ARM_COMPUTE_ERROR_ON_MSG(check_support_fast_math(output_tile, kernel_size, data_type),
				332	"This Winograd configuration requires enable_fast_math=true");
Giorgio Arena	a3221e6	2018-05-03 15:57:48 +0100	[diff] [blame]	333	}
				334
Georgios Pinitas	7221933	2018-06-05 14:56:06 +0100	[diff] [blame]	335	_weights = weights;
				336	_input = input;
				337	_output = output;
				338	_is_prepared = false;
Giorgio Arena	a3221e6	2018-05-03 15:57:48 +0100	[diff] [blame]	339
Giorgio Arena	a3221e6	2018-05-03 15:57:48 +0100	[diff] [blame]	340	int n_gemms = 0;
				341	int N_BLOCK = 0; // Size of block used by GEMM.
Michalis Spyrou	2b3129e	2018-04-25 18:10:13 +0100	[diff] [blame]	342
Georgios Pinitas	5ce897f	2020-04-29 11:44:10 +0100	[diff] [blame]	343	std::unique_ptr<INEWinogradLayerTransformInputKernel> transform_input_kernel;
				344	std::unique_ptr<INEWinogradLayerTransformWeightsKernel> transform_weights_kernel;
				345	std::unique_ptr<INEWinogradLayerTransformOutputKernel> transform_output_kernel;
				346
				347	if(data_type == DataType::F32)
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	348	{
Georgios Pinitas	5ce897f	2020-04-29 11:44:10 +0100	[diff] [blame]	349	if(kernel_size == Size2D(3, 3))
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	350	{
Georgios Pinitas	5ce897f	2020-04-29 11:44:10 +0100	[diff] [blame]	351	if(input->info()->dimension(width_idx) > 4 && input->info()->dimension(height_idx) > 4)
				352	{
				353	using config = NEWinogradLayerConfiguration<float, float, 4, 4, 3, 3>;
				354	transform_input_kernel = support::cpp14::make_unique<config::TransformInputKernel>();
				355	transform_weights_kernel = support::cpp14::make_unique<config::TransformWeightsKernel>();
				356	transform_output_kernel = support::cpp14::make_unique<config::TransformOutputKernel>();
				357	n_gemms = config::WinogradBase::N_GEMMS;
				358	N_BLOCK = config::WinogradConv::N_BLOCK;
				359	}
				360	else
				361	{
				362	using config = NEWinogradLayerConfiguration<float, float, 2, 2, 3, 3>;
				363	transform_input_kernel = support::cpp14::make_unique<config::TransformInputKernel>();
				364	transform_weights_kernel = support::cpp14::make_unique<config::TransformWeightsKernel>();
				365	transform_output_kernel = support::cpp14::make_unique<config::TransformOutputKernel>();
				366	n_gemms = config::WinogradBase::N_GEMMS;
				367	N_BLOCK = config::WinogradConv::N_BLOCK;
				368	}
				369	}
				370	else if(kernel_size == Size2D(5, 5))
				371	{
				372	using config = NEWinogradLayerConfiguration<float, float, 2, 2, 5, 5>;
				373	transform_input_kernel = support::cpp14::make_unique<config::TransformInputKernel>();
				374	transform_weights_kernel = support::cpp14::make_unique<config::TransformWeightsKernel>();
				375	transform_output_kernel = support::cpp14::make_unique<config::TransformOutputKernel>();
				376	n_gemms = config::WinogradBase::N_GEMMS;
				377	N_BLOCK = config::WinogradConv::N_BLOCK;
				378	}
				379	else if(kernel_size == Size2D(1, 3))
				380	{
				381	using config = NEWinogradLayerConfiguration<float, float, 6, 1, 3, 1>;
				382	transform_input_kernel = support::cpp14::make_unique<config::TransformInputKernel>();
				383	transform_weights_kernel = support::cpp14::make_unique<config::TransformWeightsKernel>();
				384	transform_output_kernel = support::cpp14::make_unique<config::TransformOutputKernel>();
				385	n_gemms = config::WinogradBase::N_GEMMS;
				386	N_BLOCK = config::WinogradConv::N_BLOCK;
				387	}
				388	else if(kernel_size == Size2D(3, 1))
				389	{
				390	using config = NEWinogradLayerConfiguration<float, float, 1, 6, 1, 3>;
				391	transform_input_kernel = support::cpp14::make_unique<config::TransformInputKernel>();
				392	transform_weights_kernel = support::cpp14::make_unique<config::TransformWeightsKernel>();
				393	transform_output_kernel = support::cpp14::make_unique<config::TransformOutputKernel>();
				394	n_gemms = config::WinogradBase::N_GEMMS;
				395	N_BLOCK = config::WinogradConv::N_BLOCK;
				396	}
				397	else if(kernel_size == Size2D(1, 5))
				398	{
				399	using config = NEWinogradLayerConfiguration<float, float, 4, 1, 5, 1>;
				400	transform_input_kernel = support::cpp14::make_unique<config::TransformInputKernel>();
				401	transform_weights_kernel = support::cpp14::make_unique<config::TransformWeightsKernel>();
				402	transform_output_kernel = support::cpp14::make_unique<config::TransformOutputKernel>();
				403	n_gemms = config::WinogradBase::N_GEMMS;
				404	N_BLOCK = config::WinogradConv::N_BLOCK;
				405	}
				406	else if(kernel_size == Size2D(5, 1))
				407	{
				408	using config = NEWinogradLayerConfiguration<float, float, 1, 4, 1, 5>;
				409	transform_input_kernel = support::cpp14::make_unique<config::TransformInputKernel>();
				410	transform_weights_kernel = support::cpp14::make_unique<config::TransformWeightsKernel>();
				411	transform_output_kernel = support::cpp14::make_unique<config::TransformOutputKernel>();
				412	n_gemms = config::WinogradBase::N_GEMMS;
				413	N_BLOCK = config::WinogradConv::N_BLOCK;
				414	}
				415	else if(kernel_size == Size2D(1, 7))
				416	{
				417	using config = NEWinogradLayerConfiguration<float, float, 2, 1, 7, 1>;
				418	transform_input_kernel = support::cpp14::make_unique<config::TransformInputKernel>();
				419	transform_weights_kernel = support::cpp14::make_unique<config::TransformWeightsKernel>();
				420	transform_output_kernel = support::cpp14::make_unique<config::TransformOutputKernel>();
				421	n_gemms = config::WinogradBase::N_GEMMS;
				422	N_BLOCK = config::WinogradConv::N_BLOCK;
				423	}
				424	else if(kernel_size == Size2D(7, 1))
				425	{
				426	using config = NEWinogradLayerConfiguration<float, float, 1, 2, 1, 7>;
Pablo Tello	000d33a	2018-09-03 16:59:20 +0100	[diff] [blame]	427	transform_input_kernel = support::cpp14::make_unique<config::TransformInputKernel>();
				428	transform_weights_kernel = support::cpp14::make_unique<config::TransformWeightsKernel>();
				429	transform_output_kernel = support::cpp14::make_unique<config::TransformOutputKernel>();
				430	n_gemms = config::WinogradBase::N_GEMMS;
				431	N_BLOCK = config::WinogradConv::N_BLOCK;
				432	}
Pablo Tello	bda6e4b	2018-08-22 11:40:33 +0100	[diff] [blame]	433	else
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	434	{
Georgios Pinitas	5ce897f	2020-04-29 11:44:10 +0100	[diff] [blame]	435	ARM_COMPUTE_ERROR("Not supported.");
				436	}
				437	}
				438	#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
				439	else if(data_type == DataType::F16)
				440	{
				441	if(kernel_size == Size2D(3, 3))
				442	{
				443	using config = NEWinogradLayerConfiguration<__fp16, __fp16, 4, 4, 3, 3>;
Pablo Tello	fe4b05f	2018-09-24 16:28:25 +0100	[diff] [blame]	444	transform_input_kernel = support::cpp14::make_unique<config::TransformInputKernel>();
				445	transform_weights_kernel = support::cpp14::make_unique<config::TransformWeightsKernel>();
				446	transform_output_kernel = support::cpp14::make_unique<config::TransformOutputKernel>();
				447	n_gemms = config::WinogradBase::N_GEMMS;
				448	N_BLOCK = config::WinogradConv::N_BLOCK;
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	449	}
Georgios Pinitas	5ce897f	2020-04-29 11:44:10 +0100	[diff] [blame]	450	else
				451	{
				452	ARM_COMPUTE_ERROR("Not supported.");
				453	}
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	454	}
Georgios Pinitas	5ce897f	2020-04-29 11:44:10 +0100	[diff] [blame]	455	#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	456
Pablo Tello	bda6e4b	2018-08-22 11:40:33 +0100	[diff] [blame]	457	const PaddingType use_padding_type = (conv_info.pad_top() != 0u \|\| conv_info.pad_left() != 0) ? PADDING_SAME : PADDING_VALID;
Pablo Tello	679463a	2018-02-06 11:47:59 +0000	[diff] [blame]	458	const bool use_same_padding = use_padding_type == PADDING_SAME;
				459
Pablo Tello	8951933	2017-11-17 11:52:36 +0000	[diff] [blame]	460	// Get convolved dimensions
Vidhya Sudhan Loganathan	cb0010b	2018-05-11 16:23:53 +0100	[diff] [blame]	461	const int in_channels = input->info()->dimension(channel_idx);
				462	const int out_channels = output->info()->dimension(channel_idx);
Pablo Tello	8951933	2017-11-17 11:52:36 +0000	[diff] [blame]	463
Pablo Tello	8951933	2017-11-17 11:52:36 +0000	[diff] [blame]	464	const Tensor4DShape in_shape(internal_get_input_shape(input));
Pablo Tello	d6ca478	2018-01-23 09:36:04 +0000	[diff] [blame]	465	const size_t data_type_size = input->info()->element_size();
Pablo Tello	8951933	2017-11-17 11:52:36 +0000	[diff] [blame]	466	// Get the memory required to instantiate a new Winograd operator.
Georgios Pinitas	7221933	2018-06-05 14:56:06 +0100	[diff] [blame]	467	constexpr size_t storage_alignment = 64;
				468
				469	// Kernel Storage
Anthony Barbier	578225e	2018-07-16 18:00:11 +0100	[diff] [blame]	470	const size_t kernel_storage_size = transform_weights_kernel->get_weight_storage_size(out_channels,
Anthony Barbier	e155337	2018-07-16 18:53:52 +0100	[diff] [blame]	471	in_channels)
Georgios Pinitas	7179837	2019-04-17 13:01:54 +0100	[diff] [blame]	472	* data_type_size;
Georgios Pinitas	7221933	2018-06-05 14:56:06 +0100	[diff] [blame]	473
Pablo Tello	9ceebbe	2018-01-10 16:44:13 +0000	[diff] [blame]	474	// Input storage
Anthony Barbier	578225e	2018-07-16 18:00:11 +0100	[diff] [blame]	475	const size_t input_storage_size = transform_input_kernel->get_input_storage_size(in_shape.n_batches, in_shape.n_channels, in_shape.n_rows, in_shape.n_cols,
Anthony Barbier	e155337	2018-07-16 18:53:52 +0100	[diff] [blame]	476	use_same_padding)
Georgios Pinitas	7179837	2019-04-17 13:01:54 +0100	[diff] [blame]	477	* data_type_size;
Pablo Tello	8951933	2017-11-17 11:52:36 +0000	[diff] [blame]	478
Pablo Tello	9ceebbe	2018-01-10 16:44:13 +0000	[diff] [blame]	479	// Output storage
Pablo Tello	5264b7d	2019-10-21 14:25:41 +0100	[diff] [blame]	480	const size_t output_storage_size = transform_output_kernel->get_output_storage_size(in_shape.n_batches, in_shape.n_rows, in_shape.n_cols, out_channels) * data_type_size;
				481	const int kernel_matrix_stride = transform_weights_kernel->get_matrix_stride(out_channels, in_channels);
				482	const int output_matrix_stride = transform_output_kernel->get_matrix_stride(in_shape.n_batches, in_shape.n_rows, in_shape.n_cols, out_channels);
				483	const auto output_shape = transform_output_kernel->get_output_shape(in_shape.n_rows, in_shape.n_cols, use_padding_type == PADDING_SAME);
				484	const int input_matrix_stride = transform_input_kernel->get_matrix_stride(in_shape.n_batches, in_channels, in_shape.n_rows, in_shape.n_cols, use_padding_type == PADDING_SAME);
Anthony Barbier	578225e	2018-07-16 18:00:11 +0100	[diff] [blame]	485
				486	// Configure GEMM
Pablo Tello	5264b7d	2019-10-21 14:25:41 +0100	[diff] [blame]	487	const int tile_rows = iceildiv(output_shape.first, output_tile.height);
				488	const int tile_cols = iceildiv(output_shape.second, output_tile.width);
Anthony Barbier	578225e	2018-07-16 18:00:11 +0100	[diff] [blame]	489	const int m = in_shape.n_batches * tile_rows * tile_cols;
				490	const int k = in_shape.n_channels;
				491	const int n = out_channels;
				492	const int kernel_matrix_row_stride = roundup(out_channels, N_BLOCK);
				493	const int output_matrix_row_stride = kernel_matrix_row_stride;
				494
				495	TensorShape a_shape(k, m, 1, n_gemms);
Anthony Barbier	e155337	2018-07-16 18:53:52 +0100	[diff] [blame]	496	Strides a_strides(data_type_size);
Anthony Barbier	578225e	2018-07-16 18:00:11 +0100	[diff] [blame]	497	a_strides.set(1, a_strides[0] * k);
Anthony Barbier	e155337	2018-07-16 18:53:52 +0100	[diff] [blame]	498	//a_strides.set(2, data_type_size * input_matrix_stride / n_gemms); FIXME: This is the real batch size, but RSH's code crashes if it's not 0.
Anthony Barbier	578225e	2018-07-16 18:00:11 +0100	[diff] [blame]	499	a_strides.set(2, 0);
Anthony Barbier	e155337	2018-07-16 18:53:52 +0100	[diff] [blame]	500	a_strides.set(3, data_type_size * input_matrix_stride);
Anthony Barbier	578225e	2018-07-16 18:00:11 +0100	[diff] [blame]	501
				502	TensorShape b_shape(n, k, n_gemms);
Anthony Barbier	e155337	2018-07-16 18:53:52 +0100	[diff] [blame]	503	Strides b_strides(data_type_size);
				504	b_strides.set(1, data_type_size * kernel_matrix_row_stride);
				505	b_strides.set(2, data_type_size * kernel_matrix_stride);
Anthony Barbier	578225e	2018-07-16 18:00:11 +0100	[diff] [blame]	506
				507	TensorShape d_shape(n, m, 1, n_gemms);
Anthony Barbier	e155337	2018-07-16 18:53:52 +0100	[diff] [blame]	508	Strides d_strides(data_type_size);
				509	d_strides.set(1, data_type_size * output_matrix_row_stride);
				510	//d_strides.set(2, data_type_size * output_matrix_stride / n_gemms); FIXME: This is the real batch size, but RSH's code crashes if it's not 0.
Anthony Barbier	578225e	2018-07-16 18:00:11 +0100	[diff] [blame]	511	d_strides.set(2, 0);
Anthony Barbier	e155337	2018-07-16 18:53:52 +0100	[diff] [blame]	512	d_strides.set(3, data_type_size * output_matrix_stride);
Anthony Barbier	578225e	2018-07-16 18:00:11 +0100	[diff] [blame]	513
Michalis Spyrou	a4f378d	2019-04-26 14:54:54 +0100	[diff] [blame]	514	TensorInfo a_info{};
				515	TensorInfo b_info{};
				516	TensorInfo d_info{};
Anthony Barbier	e155337	2018-07-16 18:53:52 +0100	[diff] [blame]	517	a_info.init(a_shape, 1, data_type, a_strides, 0, input_storage_size);
				518	b_info.init(b_shape, 1, data_type, b_strides, 0, kernel_storage_size);
				519	d_info.init(d_shape, 1, data_type, d_strides, 0, output_storage_size);
Anthony Barbier	578225e	2018-07-16 18:00:11 +0100	[diff] [blame]	520
Pablo Tello	8f43d74	2019-03-27 09:28:32 +0000	[diff] [blame]	521	_input_transformed.allocator()->init(a_info, storage_alignment);
Anthony Barbier	578225e	2018-07-16 18:00:11 +0100	[diff] [blame]	522	_kernel_storage.allocator()->init(b_info, storage_alignment);
Pablo Tello	8f43d74	2019-03-27 09:28:32 +0000	[diff] [blame]	523	_output_transformed.allocator()->init(d_info, storage_alignment);
Pablo Tello	8951933	2017-11-17 11:52:36 +0000	[diff] [blame]	524
Pablo Tello	9ceebbe	2018-01-10 16:44:13 +0000	[diff] [blame]	525	// configure and allocate dst tensor to be used to convert from winograd domain to spatial domain when calling to reshape_output()
				526	TensorInfo info(TensorShape(_output->info()->dimension(2), _output->info()->dimension(0),
				527	_output->info()->dimension(1), _output->info()->dimension(3)),
				528	1, _output->info()->data_type());
				529	_output_nhwc.allocator()->init(info);
Pablo Tello	02541fb	2017-12-15 09:48:59 +0000	[diff] [blame]	530
Georgios Pinitas	7179837	2019-04-17 13:01:54 +0100	[diff] [blame]	531	const ITensor *input_to_use = _input;
				532	ITensor *output_to_use = _output;
				533	PermutationVector weights_permutation_vector(3U, 0U, 1U, 2U);
				534	const unsigned int max_num_threads = NEScheduler::get().num_threads();
Pablo Tello	f718ce2	2018-10-29 13:13:23 +0000	[diff] [blame]	535
Georgios Pinitas	7179837	2019-04-17 13:01:54 +0100	[diff] [blame]	536	// Configure the kernel to transform the input tensor from NCHW -> NHWC
Pablo Tello	7df2786	2018-05-30 11:44:26 +0100	[diff] [blame]	537	if(data_layout == DataLayout::NCHW)
				538	{
Georgios Pinitas	7179837	2019-04-17 13:01:54 +0100	[diff] [blame]	539	_memory_group.manage(&_input_nhwc);
Pablo Tello	7df2786	2018-05-30 11:44:26 +0100	[diff] [blame]	540	_permute_input.configure(input, &_input_nhwc, PermutationVector(2U, 0U, 1U));
Georgios Pinitas	7179837	2019-04-17 13:01:54 +0100	[diff] [blame]	541	input_to_use = &_input_nhwc;
				542	weights_permutation_vector = PermutationVector(3U, 2U, 0U, 1U);
Pablo Tello	7df2786	2018-05-30 11:44:26 +0100	[diff] [blame]	543	}
Pablo Tello	9ceebbe	2018-01-10 16:44:13 +0000	[diff] [blame]	544
Georgios Pinitas	7179837	2019-04-17 13:01:54 +0100	[diff] [blame]	545	// Configure input transform kernel
				546	_memory_group.manage(&_input_transformed);
				547	_memory_group.manage(&_input_workspace);
				548	transform_input_kernel->configure(input_to_use, in_shape.n_batches, in_shape.n_rows, in_shape.n_cols, in_shape.n_channels, use_padding_type,
				549	&_input_transformed, input_matrix_stride, &_input_workspace);
				550	const size_t input_workspace_size = transform_input_kernel->get_working_space_size(max_num_threads);
				551	TensorInfo input_workspace_info(TensorShape(input_workspace_size), 1, _input->info()->data_type());
Pablo Tello	8f43d74	2019-03-27 09:28:32 +0000	[diff] [blame]	552	_input_workspace.allocator()->init(input_workspace_info);
Georgios Pinitas	7179837	2019-04-17 13:01:54 +0100	[diff] [blame]	553	_input_workspace.allocator()->allocate();
				554	if(data_layout == DataLayout::NCHW)
				555	{
				556	_input_nhwc.allocator()->allocate();
				557	}
Pablo Tello	8f43d74	2019-03-27 09:28:32 +0000	[diff] [blame]	558
Georgios Pinitas	7179837	2019-04-17 13:01:54 +0100	[diff] [blame]	559	// Re-order a weight tensor from [Output feature map x Input feature map x Height x Width] to [Height x Width x Input feature map x Output feature map]
				560	_permute_weights.configure(weights, &_weights_hwio, weights_permutation_vector);
				561	transform_weights_kernel->configure(&_weights_hwio, &_kernel_storage, kernel_matrix_stride, out_channels, in_channels);
Pablo Tello	8f43d74	2019-03-27 09:28:32 +0000	[diff] [blame]	562
Georgios Pinitas	7179837	2019-04-17 13:01:54 +0100	[diff] [blame]	563	// Configure GEMM function
				564	_memory_group.manage(&_output_transformed);
Pablo Tello	8f43d74	2019-03-27 09:28:32 +0000	[diff] [blame]	565	_gemm_function.configure(&_input_transformed, &_kernel_storage, nullptr, &_output_transformed, 1.0f, 0.f);
				566	_input_transformed.allocator()->allocate();
Georgios Pinitas	7179837	2019-04-17 13:01:54 +0100	[diff] [blame]	567
				568	// Configure output transform function
				569	// The biases tensor has not been allocated at this point in time, the output transform will add the biases to the final result in the run() method
				570	if(data_layout == DataLayout::NCHW)
				571	{
				572	_memory_group.manage(&_output_nhwc);
				573	output_to_use = &_output_nhwc;
				574	}
Matthew Bentham	9204646	2020-03-07 22:15:55 +0000	[diff] [blame]	575	const arm_gemm::Activation activation = arm_gemm_activation_from_acl_activation(act_info);
Pablo Tello	5264b7d	2019-10-21 14:25:41 +0100	[diff] [blame]	576
				577	transform_output_kernel->configure(biases,
				578	&_output_transformed,
				579	output_matrix_stride,
				580	output_to_use,
				581	in_shape.n_batches,
				582	output_shape.first,
				583	output_shape.second,
				584	out_channels,
				585	&_output_workspace,
				586	activation);
				587
Georgios Pinitas	7179837	2019-04-17 13:01:54 +0100	[diff] [blame]	588	const size_t output_workspace_size = transform_output_kernel->get_working_space_size(max_num_threads);
				589	TensorInfo output_workspace_info(TensorShape(output_workspace_size), 1, _output->info()->data_type());
				590	_output_workspace.allocator()->init(output_workspace_info);
Anthony Barbier	20394d5	2018-08-02 11:29:09 +0100	[diff] [blame]	591	_output_workspace.allocator()->allocate();
Georgios Pinitas	7179837	2019-04-17 13:01:54 +0100	[diff] [blame]	592	_output_transformed.allocator()->allocate();
Pablo Tello	52140b4	2018-01-30 14:48:11 +0000	[diff] [blame]	593
Pablo Tello	9ceebbe	2018-01-10 16:44:13 +0000	[diff] [blame]	594	// Reorder the convoluted output to ACL's ordering NCHW
Georgios Pinitas	ca1250d	2018-11-22 19:38:27 +0000	[diff] [blame]	595	if(data_layout == DataLayout::NCHW)
				596	{
				597	_permute_output.configure(&_output_nhwc, _output, PermutationVector(1U, 2U, 0U));
				598	_output_nhwc.allocator()->allocate();
				599	}
Anthony Barbier	20394d5	2018-08-02 11:29:09 +0100	[diff] [blame]	600
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	601	_transform_input_kernel = std::move(transform_input_kernel);
				602	_transform_weights_kernel = std::move(transform_weights_kernel);
				603	_transform_output_kernel = std::move(transform_output_kernel);
Isabella Gottardi	3f217ec	2018-02-12 14:59:19 +0000	[diff] [blame]	604
				605	//Configure Activation Layer
Matthew Bentham	9204646	2020-03-07 22:15:55 +0000	[diff] [blame]	606	_is_activationlayer_enabled = act_info.enabled() && !fuse_function_supported(act_info);
Pablo Tello	7282d56	2018-06-14 15:35:49 +0100	[diff] [blame]	607	if(_is_activationlayer_enabled)
Isabella Gottardi	3f217ec	2018-02-12 14:59:19 +0000	[diff] [blame]	608	{
Pablo Tello	7df2786	2018-05-30 11:44:26 +0100	[diff] [blame]	609	_activationlayer_function.configure(_output, nullptr, act_info);
Isabella Gottardi	3f217ec	2018-02-12 14:59:19 +0000	[diff] [blame]	610	}
Pablo Tello	8951933	2017-11-17 11:52:36 +0000	[diff] [blame]	611	}
				612
Georgios Pinitas	9fb1159	2018-04-26 20:34:58 +0100	[diff] [blame]	613	void NEWinogradConvolutionLayer::run()
Pablo Tello	8951933	2017-11-17 11:52:36 +0000	[diff] [blame]	614	{
Pablo Tello	7df2786	2018-05-30 11:44:26 +0100	[diff] [blame]	615	const DataLayout data_layout = _input->info()->data_layout();
				616
Georgios Pinitas	7221933	2018-06-05 14:56:06 +0100	[diff] [blame]	617	prepare();
				618
Georgios Pinitas	da953f2	2019-04-02 17:27:03 +0100	[diff] [blame]	619	MemoryGroupResourceScope scope_mg(_memory_group);
Pablo Tello	679463a	2018-02-06 11:47:59 +0000	[diff] [blame]	620
Pablo Tello	7df2786	2018-05-30 11:44:26 +0100	[diff] [blame]	621	if(data_layout == DataLayout::NCHW)
				622	{
				623	//Bring channels to the front as Winograd code expects the tensor to be in the format NHWC
				624	_permute_input.run();
				625	}
Pablo Tello	bda6e4b	2018-08-22 11:40:33 +0100	[diff] [blame]	626
Pablo Tello	9ceebbe	2018-01-10 16:44:13 +0000	[diff] [blame]	627	// Transform input tensor to the winograd domain
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	628	NEScheduler::get().schedule(_transform_input_kernel.get(), Window::DimX);
Pablo Tello	d6ca478	2018-01-23 09:36:04 +0000	[diff] [blame]	629
Pablo Tello	8951933	2017-11-17 11:52:36 +0000	[diff] [blame]	630	//Run 16 GEMMs in multiple threads, each kernel runs one or more GEMMs
Pablo Tello	a518f30	2018-09-19 11:33:03 +0100	[diff] [blame]	631	_gemm_function.run();
Georgios Pinitas	7179837	2019-04-17 13:01:54 +0100	[diff] [blame]	632
Pablo Tello	9ceebbe	2018-01-10 16:44:13 +0000	[diff] [blame]	633	// Transform output tensor to the spatial domain
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	634	NEScheduler::get().schedule(_transform_output_kernel.get(), Window::DimX);
Pablo Tello	d6ca478	2018-01-23 09:36:04 +0000	[diff] [blame]	635
Pablo Tello	7df2786	2018-05-30 11:44:26 +0100	[diff] [blame]	636	if(data_layout == DataLayout::NCHW)
				637	{
				638	// Reorder the convoluted output to ACL's ordering NCHW
				639	_permute_output.run();
				640	}
Isabella Gottardi	3f217ec	2018-02-12 14:59:19 +0000	[diff] [blame]	641
Matthew Bentham	9204646	2020-03-07 22:15:55 +0000	[diff] [blame]	642	if(_is_activationlayer_enabled)
Isabella Gottardi	3f217ec	2018-02-12 14:59:19 +0000	[diff] [blame]	643	{
				644	_activationlayer_function.run();
				645	}
Pablo Tello	8951933	2017-11-17 11:52:36 +0000	[diff] [blame]	646	}
Isabella Gottardi	6acc6ad	2018-02-02 17:19:18 +0000	[diff] [blame]	647
Georgios Pinitas	9fb1159	2018-04-26 20:34:58 +0100	[diff] [blame]	648	Status NEWinogradConvolutionLayer::validate(const ITensorInfo input, const ITensorInfo weights, const ITensorInfo biases, const ITensorInfo output, const PadStrideInfo &conv_info,
Giorgio Arena	a3221e6	2018-05-03 15:57:48 +0100	[diff] [blame]	649	const ActivationLayerInfo &act_info, bool enable_fast_math)
Isabella Gottardi	6acc6ad	2018-02-02 17:19:18 +0000	[diff] [blame]	650	{
Vidhya Sudhan Loganathan	cb0010b	2018-05-11 16:23:53 +0100	[diff] [blame]	651	ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, weights, output);
Vidhya Sudhan Loganathan	3ca9786	2018-04-23 08:20:04 +0100	[diff] [blame]	652	ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, weights, biases, output, conv_info));
Isabella Gottardi	6acc6ad	2018-02-02 17:19:18 +0000	[diff] [blame]	653
Vidhya Sudhan Loganathan	3ca9786	2018-04-23 08:20:04 +0100	[diff] [blame]	654	// Get indices for the width and height
				655	const size_t idx_width = get_data_layout_dimension_index(input->data_layout(), DataLayoutDimension::WIDTH);
				656	const size_t idx_height = get_data_layout_dimension_index(input->data_layout(), DataLayoutDimension::HEIGHT);
				657
Giorgio Arena	a3221e6	2018-05-03 15:57:48 +0100	[diff] [blame]	658	// Input shape, kernel size and output tile
Georgios Pinitas	5ce897f	2020-04-29 11:44:10 +0100	[diff] [blame]	659	const Size2D input_dims = Size2D(input->dimension(idx_width), input->dimension(idx_height));
				660	const Size2D kernel_size = Size2D(weights->dimension(idx_width), weights->dimension(idx_height));
				661	const DataType data_type = input->data_type();
				662	const Size2D output_tile = winograd_output_tile(input_dims, kernel_size, data_type);
Vidhya Sudhan Loganathan	3ca9786	2018-04-23 08:20:04 +0100	[diff] [blame]	663
Giorgio Arena	a3221e6	2018-05-03 15:57:48 +0100	[diff] [blame]	664	// Check if the Winograd configuration requires fast math
				665	if(!enable_fast_math)
				666	{
Georgios Pinitas	5ce897f	2020-04-29 11:44:10 +0100	[diff] [blame]	667	ARM_COMPUTE_RETURN_ERROR_ON_MSG(check_support_fast_math(output_tile, kernel_size, data_type),
				668	"This Winograd configuration requires enable_fast_math=true");
Giorgio Arena	a3221e6	2018-05-03 15:57:48 +0100	[diff] [blame]	669	}
Vidhya Sudhan Loganathan	cb0010b	2018-05-11 16:23:53 +0100	[diff] [blame]	670
				671	const WinogradInfo winograd_info = WinogradInfo(output_tile,
Giorgio Arena	a3221e6	2018-05-03 15:57:48 +0100	[diff] [blame]	672	kernel_size,
				673	input_dims,
Vidhya Sudhan Loganathan	84ce1f9	2018-04-25 13:00:09 +0100	[diff] [blame]	674	conv_info,
				675	input->data_layout());
Vidhya Sudhan Loganathan	3ca9786	2018-04-23 08:20:04 +0100	[diff] [blame]	676
				677	// Validate input transform
Vidhya Sudhan Loganathan	84ce1f9	2018-04-25 13:00:09 +0100	[diff] [blame]	678	const TensorShape input0_shape = misc::shape_calculator::compute_winograd_input_transform_shape(*input, winograd_info);
Vidhya Sudhan Loganathan	3ca9786	2018-04-23 08:20:04 +0100	[diff] [blame]	679	const TensorInfo input0 = input->clone()->set_tensor_shape(input0_shape);
Pablo Tello	000d33a	2018-09-03 16:59:20 +0100	[diff] [blame]	680	// Validate filter transform
				681	const TensorShape input1_shape = misc::shape_calculator::compute_winograd_filter_transform_shape(*weights, winograd_info);
				682	const TensorInfo input1 = weights->clone()->set_tensor_shape(input1_shape);
				683	// Validate batched matrix multiply
				684	TensorShape batched_mm_output_shape = input0.tensor_shape();
				685	batched_mm_output_shape[0] = input1.tensor_shape()[0];
				686	const TensorInfo batched_mm_output = input0.clone()->set_tensor_shape(batched_mm_output_shape);
Pablo Tello	7282d56	2018-06-14 15:35:49 +0100	[diff] [blame]	687
Pablo Tello	000d33a	2018-09-03 16:59:20 +0100	[diff] [blame]	688	if(kernel_size == Size2D(3, 3))
Vidhya Sudhan Loganathan	3ca9786	2018-04-23 08:20:04 +0100	[diff] [blame]	689	{
Pablo Tello	fe4b05f	2018-09-24 16:28:25 +0100	[diff] [blame]	690	ARM_COMPUTE_RETURN_ERROR_ON_MSG(conv_info.pad_top() != 0u && conv_info.pad_top() != 1, "Only SAME or VALID padding supported");
				691	ARM_COMPUTE_RETURN_ERROR_ON_MSG(conv_info.pad_bottom() != 0u && conv_info.pad_bottom() != 1, "Only SAME or VALID padding supported");
				692	ARM_COMPUTE_RETURN_ERROR_ON_MSG(conv_info.pad_left() != 0u && conv_info.pad_left() != 1, "Only SAME or VALID padding supported");
				693	ARM_COMPUTE_RETURN_ERROR_ON_MSG(conv_info.pad_right() != 0u && conv_info.pad_right() != 1, "Only SAME or VALID padding supported");
				694	ARM_COMPUTE_RETURN_ERROR_ON_MSG(conv_info.pad_right() != conv_info.pad_left(), "Only SAME or VALID padding supported");
				695	ARM_COMPUTE_RETURN_ERROR_ON_MSG(conv_info.pad_top() != conv_info.pad_bottom(), "Only SAME or VALID padding supported");
				696	ARM_COMPUTE_RETURN_ERROR_ON_MSG(conv_info.pad_top() != conv_info.pad_left(), "Only SAME or VALID padding supported");
Pablo Tello	000d33a	2018-09-03 16:59:20 +0100	[diff] [blame]	697	return validate_kernel_3x3(input_dims, input, &input0, &input1, &batched_mm_output, weights, biases, output, winograd_info, act_info);
				698	}
				699	else if(kernel_size == Size2D(5, 5))
				700	{
Pablo Tello	fe4b05f	2018-09-24 16:28:25 +0100	[diff] [blame]	701	ARM_COMPUTE_RETURN_ERROR_ON_MSG(conv_info.pad_top() != 0u && conv_info.pad_top() != 2, "Only SAME or VALID padding supported");
				702	ARM_COMPUTE_RETURN_ERROR_ON_MSG(conv_info.pad_left() != 0u && conv_info.pad_left() != 2, "Only SAME or VALID padding supported");
				703	ARM_COMPUTE_RETURN_ERROR_ON_MSG(conv_info.pad_bottom() != 0u && conv_info.pad_bottom() != 2, "Only SAME or VALID padding supported");
				704	ARM_COMPUTE_RETURN_ERROR_ON_MSG(conv_info.pad_right() != 0u && conv_info.pad_right() != 2, "Only SAME or VALID padding supported");
				705	ARM_COMPUTE_RETURN_ERROR_ON_MSG(conv_info.pad_right() != conv_info.pad_left(), "Only SAME or VALID padding supported");
				706	ARM_COMPUTE_RETURN_ERROR_ON_MSG(conv_info.pad_top() != conv_info.pad_bottom(), "Only SAME or VALID padding supported");
				707	ARM_COMPUTE_RETURN_ERROR_ON_MSG(conv_info.pad_top() != conv_info.pad_left(), "Only SAME or VALID padding supported");
Pablo Tello	000d33a	2018-09-03 16:59:20 +0100	[diff] [blame]	708	return validate_kernel_5x5(input, &input0, &input1, &batched_mm_output, weights, biases, output, winograd_info, act_info);
				709	}
				710	if(kernel_size == Size2D(3, 1))
				711	{
Pablo Tello	fe4b05f	2018-09-24 16:28:25 +0100	[diff] [blame]	712	ARM_COMPUTE_RETURN_ERROR_ON_MSG(conv_info.pad_left() != 0u && conv_info.pad_left() != 1, "Only SAME or VALID padding supported");
				713	ARM_COMPUTE_RETURN_ERROR_ON_MSG(conv_info.pad_right() != 0u && conv_info.pad_right() != 1, "Only SAME or VALID padding supported");
				714	ARM_COMPUTE_RETURN_ERROR_ON_MSG(conv_info.pad_top() != 0u && conv_info.pad_bottom() != 0, "Only SAME or VALID padding supported");
Pablo Tello	000d33a	2018-09-03 16:59:20 +0100	[diff] [blame]	715	return validate_kernel_3x1(input, &input0, &input1, &batched_mm_output, weights, biases, output, winograd_info, act_info);
				716	}
				717	else if(kernel_size == Size2D(1, 3))
				718	{
Pablo Tello	fe4b05f	2018-09-24 16:28:25 +0100	[diff] [blame]	719	ARM_COMPUTE_RETURN_ERROR_ON_MSG(conv_info.pad_top() != 0u && conv_info.pad_top() != 1, "Only SAME or VALID padding supported");
				720	ARM_COMPUTE_RETURN_ERROR_ON_MSG(conv_info.pad_bottom() != 0u && conv_info.pad_bottom() != 1, "Only SAME or VALID padding supported");
				721	ARM_COMPUTE_RETURN_ERROR_ON_MSG(conv_info.pad_left() != 0u && conv_info.pad_right() != 0, "Only SAME or VALID padding supported");
Pablo Tello	000d33a	2018-09-03 16:59:20 +0100	[diff] [blame]	722	return validate_kernel_1x3(input, &input0, &input1, &batched_mm_output, weights, biases, output, winograd_info, act_info);
				723	}
				724	else if(kernel_size == Size2D(5, 1))
				725	{
Pablo Tello	fe4b05f	2018-09-24 16:28:25 +0100	[diff] [blame]	726	ARM_COMPUTE_RETURN_ERROR_ON_MSG(conv_info.pad_left() != 0u && conv_info.pad_left() != 2, "Only SAME or VALID padding supported");
				727	ARM_COMPUTE_RETURN_ERROR_ON_MSG(conv_info.pad_right() != 0u && conv_info.pad_right() != 2, "Only SAME or VALID padding supported");
				728	ARM_COMPUTE_RETURN_ERROR_ON_MSG(conv_info.pad_top() != 0u && conv_info.pad_bottom() != 0, "Only SAME or VALID padding supported");
Pablo Tello	000d33a	2018-09-03 16:59:20 +0100	[diff] [blame]	729	return validate_kernel_5x1(input, &input0, &input1, &batched_mm_output, weights, biases, output, winograd_info, act_info);
				730	}
				731	else if(kernel_size == Size2D(1, 5))
				732	{
Pablo Tello	fe4b05f	2018-09-24 16:28:25 +0100	[diff] [blame]	733	ARM_COMPUTE_RETURN_ERROR_ON_MSG(conv_info.pad_top() != 0u && conv_info.pad_top() != 2, "Only SAME or VALID padding supported");
				734	ARM_COMPUTE_RETURN_ERROR_ON_MSG(conv_info.pad_bottom() != 0u && conv_info.pad_bottom() != 2, "Only SAME or VALID padding supported");
				735	ARM_COMPUTE_RETURN_ERROR_ON_MSG(conv_info.pad_left() != 0u && conv_info.pad_right() != 0, "Only SAME or VALID padding supported");
Pablo Tello	000d33a	2018-09-03 16:59:20 +0100	[diff] [blame]	736	return validate_kernel_1x5(input, &input0, &input1, &batched_mm_output, weights, biases, output, winograd_info, act_info);
				737	}
				738	else if(kernel_size == Size2D(7, 1))
				739	{
Pablo Tello	fe4b05f	2018-09-24 16:28:25 +0100	[diff] [blame]	740	ARM_COMPUTE_RETURN_ERROR_ON_MSG(conv_info.pad_left() != 0u && conv_info.pad_left() != 3, "Only SAME or VALID padding supported");
				741	ARM_COMPUTE_RETURN_ERROR_ON_MSG(conv_info.pad_right() != 0u && conv_info.pad_right() != 3, "Only SAME or VALID padding supported");
				742	ARM_COMPUTE_RETURN_ERROR_ON_MSG(conv_info.pad_top() != 0u && conv_info.pad_bottom() != 0, "Only SAME or VALID padding supported");
Pablo Tello	000d33a	2018-09-03 16:59:20 +0100	[diff] [blame]	743	return validate_kernel_7x1(input, &input0, &input1, &batched_mm_output, weights, biases, output, winograd_info, act_info);
				744	}
				745	else if(kernel_size == Size2D(1, 7))
				746	{
Pablo Tello	fe4b05f	2018-09-24 16:28:25 +0100	[diff] [blame]	747	ARM_COMPUTE_RETURN_ERROR_ON_MSG(conv_info.pad_top() != 0u && conv_info.pad_top() != 3, "Only SAME or VALID padding supported");
				748	ARM_COMPUTE_RETURN_ERROR_ON_MSG(conv_info.pad_bottom() != 0u && conv_info.pad_bottom() != 3, "Only SAME or VALID padding supported");
				749	ARM_COMPUTE_RETURN_ERROR_ON_MSG(conv_info.pad_left() != 0u && conv_info.pad_right() != 0, "Only SAME or VALID padding supported");
Pablo Tello	000d33a	2018-09-03 16:59:20 +0100	[diff] [blame]	750	return validate_kernel_1x7(input, &input0, &input1, &batched_mm_output, weights, biases, output, winograd_info, act_info);
Vidhya Sudhan Loganathan	3ca9786	2018-04-23 08:20:04 +0100	[diff] [blame]	751	}
Pablo Tello	bda6e4b	2018-08-22 11:40:33 +0100	[diff] [blame]	752	else
Vidhya Sudhan Loganathan	3ca9786	2018-04-23 08:20:04 +0100	[diff] [blame]	753	{
Pablo Tello	000d33a	2018-09-03 16:59:20 +0100	[diff] [blame]	754	ARM_COMPUTE_RETURN_ERROR_MSG("Kernel shape not supported");
Vidhya Sudhan Loganathan	3ca9786	2018-04-23 08:20:04 +0100	[diff] [blame]	755	}
Isabella Gottardi	6acc6ad	2018-02-02 17:19:18 +0000	[diff] [blame]	756	}
				757
Georgios Pinitas	7221933	2018-06-05 14:56:06 +0100	[diff] [blame]	758	void NEWinogradConvolutionLayer::prepare()
				759	{
				760	if(!_is_prepared)
				761	{
				762	// Permute weights
Georgios Pinitas	ca1250d	2018-11-22 19:38:27 +0000	[diff] [blame]	763	_weights_hwio.allocator()->allocate();
Georgios Pinitas	7221933	2018-06-05 14:56:06 +0100	[diff] [blame]	764	_permute_weights.run();
				765	_weights->mark_as_unused();
				766
				767	// Transform weights
Georgios Pinitas	ca1250d	2018-11-22 19:38:27 +0000	[diff] [blame]	768	_kernel_storage.allocator()->allocate();
Georgios Pinitas	7221933	2018-06-05 14:56:06 +0100	[diff] [blame]	769	NEScheduler::get().schedule(_transform_weights_kernel.get(), Window::DimX);
Georgios Pinitas	7221933	2018-06-05 14:56:06 +0100	[diff] [blame]	770
Pablo Tello	bda6e4b	2018-08-22 11:40:33 +0100	[diff] [blame]	771	_weights_hwio.allocator()->free();
Georgios Pinitas	7221933	2018-06-05 14:56:06 +0100	[diff] [blame]	772	_is_prepared = true;
				773	}
				774	}
Pablo Tello	8951933	2017-11-17 11:52:36 +0000	[diff] [blame]	775	} // namespace arm_compute