Blame - src/runtime/NEON/functions/NEWinogradConvolutionLayer.cpp - ml/ComputeLibrary

blob: 57950d512630072d2809635eef26075b6f6a13b2 [file] [log] [blame]

Pablo Tello	8951933	2017-11-17 11:52:36 +0000	[diff] [blame]	1	/*
Georgios Pinitas	ddd79f5	2021-01-15 09:42:26 +0000	[diff] [blame]	2	* Copyright (c) 2017-2021 Arm Limited.
Pablo Tello	8951933	2017-11-17 11:52:36 +0000	[diff] [blame]	3	*
				4	* SPDX-License-Identifier: MIT
				5	*
				6	* Permission is hereby granted, free of charge, to any person obtaining a copy
				7	* of this software and associated documentation files (the "Software"), to
				8	* deal in the Software without restriction, including without limitation the
				9	* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
				10	* sell copies of the Software, and to permit persons to whom the Software is
				11	* furnished to do so, subject to the following conditions:
				12	*
				13	* The above copyright notice and this permission notice shall be included in all
				14	* copies or substantial portions of the Software.
				15	*
				16	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
				17	* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
				18	* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
				19	* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
				20	* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
				21	* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
				22	* SOFTWARE.
				23	*/
Georgios Pinitas	9fb1159	2018-04-26 20:34:58 +0100	[diff] [blame]	24	#include "arm_compute/runtime/NEON/functions/NEWinogradConvolutionLayer.h"
Pablo Tello	8951933	2017-11-17 11:52:36 +0000	[diff] [blame]	25
Isabella Gottardi	6acc6ad	2018-02-02 17:19:18 +0000	[diff] [blame]	26	#include "arm_compute/core/Error.h"
Pablo Tello	8951933	2017-11-17 11:52:36 +0000	[diff] [blame]	27	#include "arm_compute/core/Utils.h"
				28	#include "arm_compute/core/Validate.h"
Vidhya Sudhan Loganathan	3ca9786	2018-04-23 08:20:04 +0100	[diff] [blame]	29	#include "arm_compute/core/utils/misc/ShapeCalculator.h"
Pablo Tello	8951933	2017-11-17 11:52:36 +0000	[diff] [blame]	30	#include "arm_compute/runtime/NEON/NEScheduler.h"
Sang-Hoon Park	68dd25f	2020-10-19 16:00:11 +0100	[diff] [blame]	31	#include "src/core/CPP/Validate.h"
Michele Di Giorgio	6ad60af	2020-06-09 14:52:15 +0100	[diff] [blame]	32	#include "src/core/NEON/kernels/NEWinogradConvolutionLayerKernel.h"
Pablo Tello	8951933	2017-11-17 11:52:36 +0000	[diff] [blame]	33
Sang-Hoon Park	68dd25f	2020-10-19 16:00:11 +0100	[diff] [blame]	34	#include "src/core/NEON/kernels/convolution/common/utils.hpp"
Michele Di Giorgio	6ad60af	2020-06-09 14:52:15 +0100	[diff] [blame]	35	#include "src/core/NEON/kernels/convolution/winograd/winograd.hpp"
Pablo Tello	d6ca478	2018-01-23 09:36:04 +0000	[diff] [blame]	36
Pablo Tello	8951933	2017-11-17 11:52:36 +0000	[diff] [blame]	37	namespace arm_compute
				38	{
Isabella Gottardi	6acc6ad	2018-02-02 17:19:18 +0000	[diff] [blame]	39	namespace
				40	{
Pablo Tello	000d33a	2018-09-03 16:59:20 +0100	[diff] [blame]	41	inline Status validate_kernel_3x3(const Size2D input_dims, const ITensorInfo input, const TensorInfo input0, const TensorInfo input1, const TensorInfo batched_mm_output,
				42	const ITensorInfo weights, const ITensorInfo biases, const ITensorInfo *output, const WinogradInfo &winograd_info, const ActivationLayerInfo &act_info)
				43	{
Georgios Pinitas	5ce897f	2020-04-29 11:44:10 +0100	[diff] [blame]	44	ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input);
				45	ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16, DataType::F32);
				46
				47	if(input->data_type() == DataType::F32)
Pablo Tello	000d33a	2018-09-03 16:59:20 +0100	[diff] [blame]	48	{
Georgios Pinitas	5ce897f	2020-04-29 11:44:10 +0100	[diff] [blame]	49	if(input_dims.width > 4 && input_dims.height > 4)
				50	{
				51	ARM_COMPUTE_RETURN_ON_ERROR((NEWinogradLayerTransformInputKernel<float, 4, 4, 3, 3>::validate(input, input0, winograd_info)));
				52	ARM_COMPUTE_RETURN_ON_ERROR((NEWinogradLayerTransformWeightsKernel<float, 4, 4, 3, 3>::validate(weights, input1, winograd_info)));
				53	ARM_COMPUTE_RETURN_ON_ERROR((NEWinogradLayerTransformOutputKernel<float, 4, 4, 3, 3>::validate(batched_mm_output, biases, output, winograd_info)));
				54	}
				55	else
				56	{
				57	ARM_COMPUTE_RETURN_ON_ERROR((NEWinogradLayerTransformInputKernel<float, 2, 2, 3, 3>::validate(input, input0, winograd_info)));
				58	ARM_COMPUTE_RETURN_ON_ERROR((NEWinogradLayerTransformWeightsKernel<float, 2, 2, 3, 3>::validate(weights, input1, winograd_info)));
				59	ARM_COMPUTE_RETURN_ON_ERROR((NEWinogradLayerTransformOutputKernel<float, 2, 2, 3, 3>::validate(batched_mm_output, biases, output, winograd_info)));
				60	}
Pablo Tello	000d33a	2018-09-03 16:59:20 +0100	[diff] [blame]	61	}
Georgios Pinitas	5ce897f	2020-04-29 11:44:10 +0100	[diff] [blame]	62	#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
SiCong Li	6b6a16f	2020-05-28 08:55:51 +0100	[diff] [blame]	63	else if(input->data_type() == DataType::F16)
Pablo Tello	000d33a	2018-09-03 16:59:20 +0100	[diff] [blame]	64	{
Georgios Pinitas	5ce897f	2020-04-29 11:44:10 +0100	[diff] [blame]	65	ARM_COMPUTE_RETURN_ON_ERROR((NEWinogradLayerTransformInputKernel<__fp16, 4, 4, 3, 3>::validate(input, input0, winograd_info)));
				66	ARM_COMPUTE_RETURN_ON_ERROR((NEWinogradLayerTransformWeightsKernel<__fp16, 4, 4, 3, 3>::validate(weights, input1, winograd_info)));
				67	ARM_COMPUTE_RETURN_ON_ERROR((NEWinogradLayerTransformOutputKernel<__fp16, 4, 4, 3, 3>::validate(batched_mm_output, biases, output, winograd_info)));
Pablo Tello	000d33a	2018-09-03 16:59:20 +0100	[diff] [blame]	68	}
Georgios Pinitas	5ce897f	2020-04-29 11:44:10 +0100	[diff] [blame]	69	#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
Pablo Tello	000d33a	2018-09-03 16:59:20 +0100	[diff] [blame]	70
				71	if(act_info.enabled())
				72	{
				73	NEActivationLayer::validate(output, nullptr, act_info);
				74	}
				75	return Status{};
				76	}
				77
				78	inline Status validate_kernel_5x5(const ITensorInfo input, const TensorInfo input0, const TensorInfo input1, const TensorInfo batched_mm_output,
				79	const ITensorInfo weights, const ITensorInfo biases, const ITensorInfo *output, const WinogradInfo &winograd_info, const ActivationLayerInfo &act_info)
				80	{
				81	ARM_COMPUTE_RETURN_ON_ERROR((NEWinogradLayerTransformInputKernel<float, 2, 2, 5, 5>::validate(input, input0, winograd_info)));
				82	ARM_COMPUTE_RETURN_ON_ERROR((NEWinogradLayerTransformWeightsKernel<float, 2, 2, 5, 5>::validate(weights, input1, winograd_info)));
				83	ARM_COMPUTE_RETURN_ON_ERROR((NEWinogradLayerTransformOutputKernel<float, 2, 2, 5, 5>::validate(batched_mm_output, biases, output, winograd_info)));
				84	if(act_info.enabled())
				85	{
				86	NEActivationLayer::validate(output, nullptr, act_info);
				87	}
				88	return Status{};
				89	}
				90
				91	inline Status validate_kernel_3x1(const ITensorInfo input, const TensorInfo input0, const TensorInfo input1, const TensorInfo batched_mm_output,
				92	const ITensorInfo weights, const ITensorInfo biases, const ITensorInfo *output, const WinogradInfo &winograd_info, const ActivationLayerInfo &act_info)
				93	{
Georgios Pinitas	5ce897f	2020-04-29 11:44:10 +0100	[diff] [blame]	94	ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F32);
Pablo Tello	000d33a	2018-09-03 16:59:20 +0100	[diff] [blame]	95	ARM_COMPUTE_RETURN_ON_ERROR((NEWinogradLayerTransformInputKernel<float, 1, 6, 1, 3>::validate(input, input0, winograd_info)));
				96	ARM_COMPUTE_RETURN_ON_ERROR((NEWinogradLayerTransformWeightsKernel<float, 1, 6, 1, 3>::validate(weights, input1, winograd_info)));
				97	ARM_COMPUTE_RETURN_ON_ERROR((NEWinogradLayerTransformOutputKernel<float, 1, 6, 1, 3>::validate(batched_mm_output, biases, output, winograd_info)));
				98	if(act_info.enabled())
				99	{
				100	NEActivationLayer::validate(output, nullptr, act_info);
				101	}
				102	return Status{};
				103	}
				104
				105	inline Status validate_kernel_1x3(const ITensorInfo input, const TensorInfo input0, const TensorInfo input1, const TensorInfo batched_mm_output,
				106	const ITensorInfo weights, const ITensorInfo biases, const ITensorInfo *output, const WinogradInfo &winograd_info, const ActivationLayerInfo &act_info)
				107	{
Georgios Pinitas	5ce897f	2020-04-29 11:44:10 +0100	[diff] [blame]	108	ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F32);
Pablo Tello	000d33a	2018-09-03 16:59:20 +0100	[diff] [blame]	109	ARM_COMPUTE_RETURN_ON_ERROR((NEWinogradLayerTransformInputKernel<float, 6, 1, 3, 1>::validate(input, input0, winograd_info)));
				110	ARM_COMPUTE_RETURN_ON_ERROR((NEWinogradLayerTransformWeightsKernel<float, 6, 1, 3, 1>::validate(weights, input1, winograd_info)));
				111	ARM_COMPUTE_RETURN_ON_ERROR((NEWinogradLayerTransformOutputKernel<float, 6, 1, 3, 1>::validate(batched_mm_output, biases, output, winograd_info)));
				112
				113	if(act_info.enabled())
				114	{
				115	NEActivationLayer::validate(output, nullptr, act_info);
				116	}
				117	return Status{};
				118	}
				119
				120	inline Status validate_kernel_5x1(const ITensorInfo input, const TensorInfo input0, const TensorInfo input1, const TensorInfo batched_mm_output,
				121	const ITensorInfo weights, const ITensorInfo biases, const ITensorInfo *output, const WinogradInfo &winograd_info, const ActivationLayerInfo &act_info)
				122	{
Georgios Pinitas	5ce897f	2020-04-29 11:44:10 +0100	[diff] [blame]	123	ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F32);
Pablo Tello	000d33a	2018-09-03 16:59:20 +0100	[diff] [blame]	124	ARM_COMPUTE_RETURN_ON_ERROR((NEWinogradLayerTransformInputKernel<float, 1, 4, 1, 5>::validate(input, input0, winograd_info)));
				125	ARM_COMPUTE_RETURN_ON_ERROR((NEWinogradLayerTransformWeightsKernel<float, 1, 4, 1, 5>::validate(weights, input1, winograd_info)));
				126	ARM_COMPUTE_RETURN_ON_ERROR((NEWinogradLayerTransformOutputKernel<float, 1, 4, 1, 5>::validate(batched_mm_output, biases, output, winograd_info)));
				127	if(act_info.enabled())
				128	{
				129	NEActivationLayer::validate(output, nullptr, act_info);
				130	}
				131	return Status{};
				132	}
				133	inline Status validate_kernel_1x5(const ITensorInfo input, const TensorInfo input0, const TensorInfo input1, const TensorInfo batched_mm_output,
				134	const ITensorInfo weights, const ITensorInfo biases, const ITensorInfo *output, const WinogradInfo &winograd_info, const ActivationLayerInfo &act_info)
				135	{
Georgios Pinitas	5ce897f	2020-04-29 11:44:10 +0100	[diff] [blame]	136	ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F32);
Pablo Tello	000d33a	2018-09-03 16:59:20 +0100	[diff] [blame]	137	ARM_COMPUTE_RETURN_ON_ERROR((NEWinogradLayerTransformInputKernel<float, 4, 1, 5, 1>::validate(input, input0, winograd_info)));
				138	ARM_COMPUTE_RETURN_ON_ERROR((NEWinogradLayerTransformWeightsKernel<float, 4, 1, 5, 1>::validate(weights, input1, winograd_info)));
				139	ARM_COMPUTE_RETURN_ON_ERROR((NEWinogradLayerTransformOutputKernel<float, 4, 1, 5, 1>::validate(batched_mm_output, biases, output, winograd_info)));
				140	if(act_info.enabled())
				141	{
				142	NEActivationLayer::validate(output, nullptr, act_info);
				143	}
				144	return Status{};
				145	}
				146
				147	inline Status validate_kernel_7x1(const ITensorInfo input, const TensorInfo input0, const TensorInfo input1, const TensorInfo batched_mm_output,
				148	const ITensorInfo weights, const ITensorInfo biases, const ITensorInfo *output, const WinogradInfo &winograd_info, const ActivationLayerInfo &act_info)
				149	{
Georgios Pinitas	5ce897f	2020-04-29 11:44:10 +0100	[diff] [blame]	150	ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F32);
Pablo Tello	000d33a	2018-09-03 16:59:20 +0100	[diff] [blame]	151	ARM_COMPUTE_RETURN_ON_ERROR((NEWinogradLayerTransformInputKernel<float, 1, 2, 1, 7>::validate(input, input0, winograd_info)));
				152	ARM_COMPUTE_RETURN_ON_ERROR((NEWinogradLayerTransformWeightsKernel<float, 1, 2, 1, 7>::validate(weights, input1, winograd_info)));
				153	ARM_COMPUTE_RETURN_ON_ERROR((NEWinogradLayerTransformOutputKernel<float, 1, 2, 1, 7>::validate(batched_mm_output, biases, output, winograd_info)));
				154	if(act_info.enabled())
				155	{
				156	NEActivationLayer::validate(output, nullptr, act_info);
				157	}
				158	return Status{};
				159	}
				160
				161	inline Status validate_kernel_1x7(const ITensorInfo input, const TensorInfo input0, const TensorInfo input1, const TensorInfo batched_mm_output,
				162	const ITensorInfo weights, const ITensorInfo biases, const ITensorInfo *output, const WinogradInfo &winograd_info, const ActivationLayerInfo &act_info)
				163	{
Georgios Pinitas	5ce897f	2020-04-29 11:44:10 +0100	[diff] [blame]	164	ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F32);
Pablo Tello	000d33a	2018-09-03 16:59:20 +0100	[diff] [blame]	165	ARM_COMPUTE_RETURN_ON_ERROR((NEWinogradLayerTransformInputKernel<float, 2, 1, 7, 1>::validate(input, input0, winograd_info)));
				166	ARM_COMPUTE_RETURN_ON_ERROR((NEWinogradLayerTransformWeightsKernel<float, 2, 1, 7, 1>::validate(weights, input1, winograd_info)));
				167	ARM_COMPUTE_RETURN_ON_ERROR((NEWinogradLayerTransformOutputKernel<float, 2, 1, 7, 1>::validate(batched_mm_output, biases, output, winograd_info)));
				168
				169	if(act_info.enabled())
				170	{
				171	NEActivationLayer::validate(output, nullptr, act_info);
				172	}
				173	return Status{};
				174	}
				175
Vidhya Sudhan Loganathan	cb0010b	2018-05-11 16:23:53 +0100	[diff] [blame]	176	inline Tensor4DShape internal_get_input_shape(const arm_compute::ITensor *input)
				177	{
				178	const DataLayout data_layout = input->info()->data_layout();
				179	const int in_width = input->info()->dimension(get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH));
				180	const int in_height = input->info()->dimension(get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT));
				181	const int in_channels = input->info()->dimension(get_data_layout_dimension_index(data_layout, DataLayoutDimension::CHANNEL));
				182	const int in_batches = input->info()->dimension(3);
				183
Michalis Spyrou	a4f378d	2019-04-26 14:54:54 +0100	[diff] [blame]	184	return Tensor4DShape{ in_batches, in_height, in_width, in_channels };
Vidhya Sudhan Loganathan	cb0010b	2018-05-11 16:23:53 +0100	[diff] [blame]	185	}
				186
Isabella Gottardi	6acc6ad	2018-02-02 17:19:18 +0000	[diff] [blame]	187	Status validate_arguments(const ITensorInfo input, const ITensorInfo weights, const ITensorInfo biases, const ITensorInfo output, const PadStrideInfo &conv_info)
				188	{
Vidhya Sudhan Loganathan	cb0010b	2018-05-11 16:23:53 +0100	[diff] [blame]	189	ARM_COMPUTE_UNUSED(output);
Georgios Pinitas	5ce897f	2020-04-29 11:44:10 +0100	[diff] [blame]	190	ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(input);
				191
Vidhya Sudhan Loganathan	cb0010b	2018-05-11 16:23:53 +0100	[diff] [blame]	192	ARM_COMPUTE_RETURN_ERROR_ON_MSG(conv_info.stride().first != 1 \|\| conv_info.stride().second != 1, "Winograd layer only supports unit strides.");
Isabella Gottardi	6acc6ad	2018-02-02 17:19:18 +0000	[diff] [blame]	193	if(biases != nullptr)
				194	{
				195	ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, biases);
				196	ARM_COMPUTE_RETURN_ERROR_ON(biases->num_dimensions() > 1);
				197	}
Georgios Pinitas	5ce897f	2020-04-29 11:44:10 +0100	[diff] [blame]	198	return INEWinogradLayerTransformWeightsKernel::validate(input, weights);
Isabella Gottardi	6acc6ad	2018-02-02 17:19:18 +0000	[diff] [blame]	199	}
Giorgio Arena	a3221e6	2018-05-03 15:57:48 +0100	[diff] [blame]	200
Georgios Pinitas	5ce897f	2020-04-29 11:44:10 +0100	[diff] [blame]	201	Size2D winograd_output_tile(const Size2D &input_dims, const Size2D &kernel_dims, DataType data_type)
Giorgio Arena	a3221e6	2018-05-03 15:57:48 +0100	[diff] [blame]	202	{
				203	Size2D output_tile = Size2D{};
Giorgio Arena	a3221e6	2018-05-03 15:57:48 +0100	[diff] [blame]	204	if(kernel_dims == Size2D(3U, 3U))
				205	{
giuros01	f44fe3d	2019-08-14 16:49:27 +0100	[diff] [blame]	206	output_tile = (input_dims.width <= 4 \|\| input_dims.height <= 4) ? Size2D(2U, 2U) : Size2D(4U, 4U);
Georgios Pinitas	5ce897f	2020-04-29 11:44:10 +0100	[diff] [blame]	207	if(data_type == DataType::F16)
				208	{
				209	output_tile = Size2D(4U, 4U);
				210	}
Giorgio Arena	a3221e6	2018-05-03 15:57:48 +0100	[diff] [blame]	211	}
				212	else if(kernel_dims == Size2D(5U, 5U))
				213	{
				214	output_tile = Size2D(2U, 2U);
				215	}
Pablo Tello	bda6e4b	2018-08-22 11:40:33 +0100	[diff] [blame]	216	else if(kernel_dims == Size2D(1U, 3U))
				217	{
				218	output_tile = Size2D(1U, 6U);
				219	}
				220	else if(kernel_dims == Size2D(3U, 1U))
				221	{
				222	output_tile = Size2D(6U, 1U);
				223	}
Pablo Tello	000d33a	2018-09-03 16:59:20 +0100	[diff] [blame]	224	else if(kernel_dims == Size2D(1U, 5U))
				225	{
				226	output_tile = Size2D(1U, 4U);
				227	}
				228	else if(kernel_dims == Size2D(5U, 1U))
				229	{
				230	output_tile = Size2D(4U, 1U);
				231	}
				232	else if(kernel_dims == Size2D(7U, 1U))
				233	{
				234	output_tile = Size2D(2U, 1U);
				235	}
				236	else if(kernel_dims == Size2D(1U, 7U))
				237	{
				238	output_tile = Size2D(1U, 2U);
				239	}
Giorgio Arena	a3221e6	2018-05-03 15:57:48 +0100	[diff] [blame]	240	return output_tile;
				241	}
				242
Georgios Pinitas	5ce897f	2020-04-29 11:44:10 +0100	[diff] [blame]	243	bool check_support_fast_math(const Size2D &output_tile, const Size2D &kernel_size, DataType data_type)
Giorgio Arena	a3221e6	2018-05-03 15:57:48 +0100	[diff] [blame]	244	{
				245	// Check if we want to configure a Winograd configuration which requires fast math
				246	using WinogradConfiguration = std::pair<std::pair<int, int>, std::pair<int, int>>;
				247
Georgios Pinitas	5ce897f	2020-04-29 11:44:10 +0100	[diff] [blame]	248	const std::vector<WinogradConfiguration> fast_math_winograd_f16 =
				249	{
				250	WinogradConfiguration(std::pair<int, int>(4, 4), std::pair<int, int>(3, 3))
				251	};
				252
				253	const std::vector<WinogradConfiguration> fast_math_winograd_f32 =
Giorgio Arena	a3221e6	2018-05-03 15:57:48 +0100	[diff] [blame]	254	{
				255	WinogradConfiguration(std::pair<int, int>(2, 2), std::pair<int, int>(5, 5)),
				256	WinogradConfiguration(std::pair<int, int>(4, 4), std::pair<int, int>(5, 5))
				257	};
				258
				259	auto p = std::make_pair(std::pair<int, int>(output_tile.width, output_tile.height),
				260	std::pair<int, int>(kernel_size.width, kernel_size.height));
				261
Georgios Pinitas	5ce897f	2020-04-29 11:44:10 +0100	[diff] [blame]	262	switch(data_type)
				263	{
				264	case DataType::F16:
				265	return std::find(fast_math_winograd_f16.begin(), fast_math_winograd_f16.end(), p) != fast_math_winograd_f16.end();
				266	case DataType::F32:
				267	return std::find(fast_math_winograd_f32.begin(), fast_math_winograd_f32.end(), p) != fast_math_winograd_f32.end();
				268	default:
				269	return false;
				270	}
Giorgio Arena	a3221e6	2018-05-03 15:57:48 +0100	[diff] [blame]	271	}
Pablo Tello	7df2786	2018-05-30 11:44:26 +0100	[diff] [blame]	272
Pablo Tello	5264b7d	2019-10-21 14:25:41 +0100	[diff] [blame]	273	inline bool fuse_function_supported(const ActivationLayerInfo &act_info)
				274	{
Matthew Bentham	9204646	2020-03-07 22:15:55 +0000	[diff] [blame]	275	return act_info.activation() == ActivationLayerInfo::ActivationFunction::RELU \|\| act_info.activation() == ActivationLayerInfo::ActivationFunction::BOUNDED_RELU;
Pablo Tello	5264b7d	2019-10-21 14:25:41 +0100	[diff] [blame]	276	}
				277
				278	arm_gemm::Activation arm_gemm_activation_from_acl_activation(const ActivationLayerInfo &act_info)
				279	{
Matthew Bentham	9204646	2020-03-07 22:15:55 +0000	[diff] [blame]	280	switch(act_info.activation())
				281	{
				282	case ActivationLayerInfo::ActivationFunction::RELU:
Pablo Tello	5264b7d	2019-10-21 14:25:41 +0100	[diff] [blame]	283	{
Matthew Bentham	9204646	2020-03-07 22:15:55 +0000	[diff] [blame]	284	return arm_gemm::Activation(arm_gemm::Activation::Type::ReLU, act_info.a(), act_info.b());
Pablo Tello	5264b7d	2019-10-21 14:25:41 +0100	[diff] [blame]	285	}
Matthew Bentham	9204646	2020-03-07 22:15:55 +0000	[diff] [blame]	286	case ActivationLayerInfo::ActivationFunction::BOUNDED_RELU:
				287	{
				288	return arm_gemm::Activation(arm_gemm::Activation::Type::BoundedReLU, act_info.a(), act_info.b());
				289	}
				290	default:
				291	{
				292	return arm_gemm::Activation(arm_gemm::Activation::Type::None);
				293	}
				294	}
Pablo Tello	5264b7d	2019-10-21 14:25:41 +0100	[diff] [blame]	295	}
Isabella Gottardi	6acc6ad	2018-02-02 17:19:18 +0000	[diff] [blame]	296	} //namespace
				297
Michalis Spyrou	a4f378d	2019-04-26 14:54:54 +0100	[diff] [blame]	298	NEWinogradConvolutionLayer::NEWinogradConvolutionLayer(const std::shared_ptr<IMemoryManager> &memory_manager)
Pablo Tello	a518f30	2018-09-19 11:33:03 +0100	[diff] [blame]	299	: _memory_group(memory_manager), _gemm_function(memory_manager), _transform_input_kernel(nullptr), _transform_output_kernel(nullptr), _transform_weights_kernel(nullptr), _activationlayer_function(),
Pablo Tello	8f43d74	2019-03-27 09:28:32 +0000	[diff] [blame]	300	_permute_input(), _permute_weights(), _permute_output(), _input_transformed(), _output_transformed(), _input_workspace(), _output_workspace(), _kernel_storage(), _input_nhwc(), _output_nhwc(),
Manuel Bottini	ca62c6f	2021-03-23 11:50:34 +0000	[diff] [blame]	301	_weights_hwio(), _input(), _weights(), _output(), _is_prepared(false), _is_activationlayer_enabled(false), _data_layout()
Pablo Tello	8951933	2017-11-17 11:52:36 +0000	[diff] [blame]	302	{
Pablo Tello	8f43d74	2019-03-27 09:28:32 +0000	[diff] [blame]	303	}
Pablo Tello	8951933	2017-11-17 11:52:36 +0000	[diff] [blame]	304
Giorgio Arena	a3221e6	2018-05-03 15:57:48 +0100	[diff] [blame]	305	void NEWinogradConvolutionLayer::configure(const ITensor input, const ITensor weights, const ITensor biases, ITensor output, const PadStrideInfo &conv_info, const ActivationLayerInfo &act_info,
				306	bool enable_fast_math)
Pablo Tello	8951933	2017-11-17 11:52:36 +0000	[diff] [blame]	307	{
Andrew Mundy	4d9379a	2018-03-15 16:47:03 +0000	[diff] [blame]	308	ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, output);
Andrew Mundy	4d9379a	2018-03-15 16:47:03 +0000	[diff] [blame]	309	ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), weights->info(), (biases != nullptr) ? biases->info() : nullptr, output->info(), conv_info));
Pablo Tello	8951933	2017-11-17 11:52:36 +0000	[diff] [blame]	310
Vidhya Sudhan Loganathan	cb0010b	2018-05-11 16:23:53 +0100	[diff] [blame]	311	// Get indices for the width and height
Manuel Bottini	ca62c6f	2021-03-23 11:50:34 +0000	[diff] [blame]	312	_data_layout = input->info()->data_layout();
				313	const unsigned int width_idx = get_data_layout_dimension_index(_data_layout, DataLayoutDimension::WIDTH);
				314	const unsigned int height_idx = get_data_layout_dimension_index(_data_layout, DataLayoutDimension::HEIGHT);
				315	const unsigned int channel_idx = get_data_layout_dimension_index(_data_layout, DataLayoutDimension::CHANNEL);
Vidhya Sudhan Loganathan	cb0010b	2018-05-11 16:23:53 +0100	[diff] [blame]	316
Georgios Pinitas	5ce897f	2020-04-29 11:44:10 +0100	[diff] [blame]	317	const Size2D input_dims = Size2D(input->info()->dimension(width_idx), input->info()->dimension(height_idx));
				318	const Size2D kernel_size = Size2D(weights->info()->dimension(width_idx), weights->info()->dimension(height_idx));
				319	const DataType data_type = input->info()->data_type();
				320	const Size2D output_tile = winograd_output_tile(input_dims, kernel_size, data_type);
Giorgio Arena	a3221e6	2018-05-03 15:57:48 +0100	[diff] [blame]	321
				322	// Check if the Winograd configuration requires fast math
				323	if(!enable_fast_math)
				324	{
Georgios Pinitas	5ce897f	2020-04-29 11:44:10 +0100	[diff] [blame]	325	ARM_COMPUTE_ERROR_ON_MSG(check_support_fast_math(output_tile, kernel_size, data_type),
				326	"This Winograd configuration requires enable_fast_math=true");
Giorgio Arena	a3221e6	2018-05-03 15:57:48 +0100	[diff] [blame]	327	}
				328
Georgios Pinitas	7221933	2018-06-05 14:56:06 +0100	[diff] [blame]	329	_weights = weights;
				330	_input = input;
				331	_output = output;
				332	_is_prepared = false;
Giorgio Arena	a3221e6	2018-05-03 15:57:48 +0100	[diff] [blame]	333
Georgios Pinitas	9a67178	2021-02-25 00:04:08 +0000	[diff] [blame]	334	int n_gemms = 1;
				335	int N_BLOCK = 1; // Size of block used by GEMM.
Michalis Spyrou	2b3129e	2018-04-25 18:10:13 +0100	[diff] [blame]	336
Georgios Pinitas	5ce897f	2020-04-29 11:44:10 +0100	[diff] [blame]	337	std::unique_ptr<INEWinogradLayerTransformInputKernel> transform_input_kernel;
				338	std::unique_ptr<INEWinogradLayerTransformWeightsKernel> transform_weights_kernel;
				339	std::unique_ptr<INEWinogradLayerTransformOutputKernel> transform_output_kernel;
				340
				341	if(data_type == DataType::F32)
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	342	{
Georgios Pinitas	5ce897f	2020-04-29 11:44:10 +0100	[diff] [blame]	343	if(kernel_size == Size2D(3, 3))
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	344	{
Georgios Pinitas	5ce897f	2020-04-29 11:44:10 +0100	[diff] [blame]	345	if(input->info()->dimension(width_idx) > 4 && input->info()->dimension(height_idx) > 4)
				346	{
				347	using config = NEWinogradLayerConfiguration<float, float, 4, 4, 3, 3>;
Georgios Pinitas	40f51a6	2020-11-21 03:04:18 +0000	[diff] [blame]	348	transform_input_kernel = std::make_unique<config::TransformInputKernel>();
				349	transform_weights_kernel = std::make_unique<config::TransformWeightsKernel>();
				350	transform_output_kernel = std::make_unique<config::TransformOutputKernel>();
Georgios Pinitas	5ce897f	2020-04-29 11:44:10 +0100	[diff] [blame]	351	n_gemms = config::WinogradBase::N_GEMMS;
				352	N_BLOCK = config::WinogradConv::N_BLOCK;
				353	}
				354	else
				355	{
				356	using config = NEWinogradLayerConfiguration<float, float, 2, 2, 3, 3>;
Georgios Pinitas	40f51a6	2020-11-21 03:04:18 +0000	[diff] [blame]	357	transform_input_kernel = std::make_unique<config::TransformInputKernel>();
				358	transform_weights_kernel = std::make_unique<config::TransformWeightsKernel>();
				359	transform_output_kernel = std::make_unique<config::TransformOutputKernel>();
Georgios Pinitas	5ce897f	2020-04-29 11:44:10 +0100	[diff] [blame]	360	n_gemms = config::WinogradBase::N_GEMMS;
				361	N_BLOCK = config::WinogradConv::N_BLOCK;
				362	}
				363	}
				364	else if(kernel_size == Size2D(5, 5))
				365	{
				366	using config = NEWinogradLayerConfiguration<float, float, 2, 2, 5, 5>;
Georgios Pinitas	40f51a6	2020-11-21 03:04:18 +0000	[diff] [blame]	367	transform_input_kernel = std::make_unique<config::TransformInputKernel>();
				368	transform_weights_kernel = std::make_unique<config::TransformWeightsKernel>();
				369	transform_output_kernel = std::make_unique<config::TransformOutputKernel>();
Georgios Pinitas	5ce897f	2020-04-29 11:44:10 +0100	[diff] [blame]	370	n_gemms = config::WinogradBase::N_GEMMS;
				371	N_BLOCK = config::WinogradConv::N_BLOCK;
				372	}
				373	else if(kernel_size == Size2D(1, 3))
				374	{
				375	using config = NEWinogradLayerConfiguration<float, float, 6, 1, 3, 1>;
Georgios Pinitas	40f51a6	2020-11-21 03:04:18 +0000	[diff] [blame]	376	transform_input_kernel = std::make_unique<config::TransformInputKernel>();
				377	transform_weights_kernel = std::make_unique<config::TransformWeightsKernel>();
				378	transform_output_kernel = std::make_unique<config::TransformOutputKernel>();
Georgios Pinitas	5ce897f	2020-04-29 11:44:10 +0100	[diff] [blame]	379	n_gemms = config::WinogradBase::N_GEMMS;
				380	N_BLOCK = config::WinogradConv::N_BLOCK;
				381	}
				382	else if(kernel_size == Size2D(3, 1))
				383	{
				384	using config = NEWinogradLayerConfiguration<float, float, 1, 6, 1, 3>;
Georgios Pinitas	40f51a6	2020-11-21 03:04:18 +0000	[diff] [blame]	385	transform_input_kernel = std::make_unique<config::TransformInputKernel>();
				386	transform_weights_kernel = std::make_unique<config::TransformWeightsKernel>();
				387	transform_output_kernel = std::make_unique<config::TransformOutputKernel>();
Georgios Pinitas	5ce897f	2020-04-29 11:44:10 +0100	[diff] [blame]	388	n_gemms = config::WinogradBase::N_GEMMS;
				389	N_BLOCK = config::WinogradConv::N_BLOCK;
				390	}
				391	else if(kernel_size == Size2D(1, 5))
				392	{
				393	using config = NEWinogradLayerConfiguration<float, float, 4, 1, 5, 1>;
Georgios Pinitas	40f51a6	2020-11-21 03:04:18 +0000	[diff] [blame]	394	transform_input_kernel = std::make_unique<config::TransformInputKernel>();
				395	transform_weights_kernel = std::make_unique<config::TransformWeightsKernel>();
				396	transform_output_kernel = std::make_unique<config::TransformOutputKernel>();
Georgios Pinitas	5ce897f	2020-04-29 11:44:10 +0100	[diff] [blame]	397	n_gemms = config::WinogradBase::N_GEMMS;
				398	N_BLOCK = config::WinogradConv::N_BLOCK;
				399	}
				400	else if(kernel_size == Size2D(5, 1))
				401	{
				402	using config = NEWinogradLayerConfiguration<float, float, 1, 4, 1, 5>;
Georgios Pinitas	40f51a6	2020-11-21 03:04:18 +0000	[diff] [blame]	403	transform_input_kernel = std::make_unique<config::TransformInputKernel>();
				404	transform_weights_kernel = std::make_unique<config::TransformWeightsKernel>();
				405	transform_output_kernel = std::make_unique<config::TransformOutputKernel>();
Georgios Pinitas	5ce897f	2020-04-29 11:44:10 +0100	[diff] [blame]	406	n_gemms = config::WinogradBase::N_GEMMS;
				407	N_BLOCK = config::WinogradConv::N_BLOCK;
				408	}
				409	else if(kernel_size == Size2D(1, 7))
				410	{
				411	using config = NEWinogradLayerConfiguration<float, float, 2, 1, 7, 1>;
Georgios Pinitas	40f51a6	2020-11-21 03:04:18 +0000	[diff] [blame]	412	transform_input_kernel = std::make_unique<config::TransformInputKernel>();
				413	transform_weights_kernel = std::make_unique<config::TransformWeightsKernel>();
				414	transform_output_kernel = std::make_unique<config::TransformOutputKernel>();
Georgios Pinitas	5ce897f	2020-04-29 11:44:10 +0100	[diff] [blame]	415	n_gemms = config::WinogradBase::N_GEMMS;
				416	N_BLOCK = config::WinogradConv::N_BLOCK;
				417	}
				418	else if(kernel_size == Size2D(7, 1))
				419	{
				420	using config = NEWinogradLayerConfiguration<float, float, 1, 2, 1, 7>;
Georgios Pinitas	40f51a6	2020-11-21 03:04:18 +0000	[diff] [blame]	421	transform_input_kernel = std::make_unique<config::TransformInputKernel>();
				422	transform_weights_kernel = std::make_unique<config::TransformWeightsKernel>();
				423	transform_output_kernel = std::make_unique<config::TransformOutputKernel>();
Pablo Tello	000d33a	2018-09-03 16:59:20 +0100	[diff] [blame]	424	n_gemms = config::WinogradBase::N_GEMMS;
				425	N_BLOCK = config::WinogradConv::N_BLOCK;
				426	}
Pablo Tello	bda6e4b	2018-08-22 11:40:33 +0100	[diff] [blame]	427	else
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	428	{
Georgios Pinitas	5ce897f	2020-04-29 11:44:10 +0100	[diff] [blame]	429	ARM_COMPUTE_ERROR("Not supported.");
				430	}
				431	}
				432	#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
				433	else if(data_type == DataType::F16)
				434	{
				435	if(kernel_size == Size2D(3, 3))
				436	{
				437	using config = NEWinogradLayerConfiguration<__fp16, __fp16, 4, 4, 3, 3>;
Georgios Pinitas	40f51a6	2020-11-21 03:04:18 +0000	[diff] [blame]	438	transform_input_kernel = std::make_unique<config::TransformInputKernel>();
				439	transform_weights_kernel = std::make_unique<config::TransformWeightsKernel>();
				440	transform_output_kernel = std::make_unique<config::TransformOutputKernel>();
Pablo Tello	fe4b05f	2018-09-24 16:28:25 +0100	[diff] [blame]	441	n_gemms = config::WinogradBase::N_GEMMS;
				442	N_BLOCK = config::WinogradConv::N_BLOCK;
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	443	}
Georgios Pinitas	5ce897f	2020-04-29 11:44:10 +0100	[diff] [blame]	444	else
				445	{
				446	ARM_COMPUTE_ERROR("Not supported.");
				447	}
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	448	}
Georgios Pinitas	5ce897f	2020-04-29 11:44:10 +0100	[diff] [blame]	449	#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
Georgios Pinitas	9a67178	2021-02-25 00:04:08 +0000	[diff] [blame]	450	else
				451	{
				452	ARM_COMPUTE_ERROR("Not supported.");
				453	}
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	454
Pablo Tello	bda6e4b	2018-08-22 11:40:33 +0100	[diff] [blame]	455	const PaddingType use_padding_type = (conv_info.pad_top() != 0u \|\| conv_info.pad_left() != 0) ? PADDING_SAME : PADDING_VALID;
Pablo Tello	679463a	2018-02-06 11:47:59 +0000	[diff] [blame]	456	const bool use_same_padding = use_padding_type == PADDING_SAME;
				457
Pablo Tello	8951933	2017-11-17 11:52:36 +0000	[diff] [blame]	458	// Get convolved dimensions
Vidhya Sudhan Loganathan	cb0010b	2018-05-11 16:23:53 +0100	[diff] [blame]	459	const int in_channels = input->info()->dimension(channel_idx);
				460	const int out_channels = output->info()->dimension(channel_idx);
Pablo Tello	8951933	2017-11-17 11:52:36 +0000	[diff] [blame]	461
Pablo Tello	8951933	2017-11-17 11:52:36 +0000	[diff] [blame]	462	const Tensor4DShape in_shape(internal_get_input_shape(input));
Pablo Tello	d6ca478	2018-01-23 09:36:04 +0000	[diff] [blame]	463	const size_t data_type_size = input->info()->element_size();
Pablo Tello	8951933	2017-11-17 11:52:36 +0000	[diff] [blame]	464	// Get the memory required to instantiate a new Winograd operator.
Georgios Pinitas	7221933	2018-06-05 14:56:06 +0100	[diff] [blame]	465	constexpr size_t storage_alignment = 64;
				466
				467	// Kernel Storage
Anthony Barbier	578225e	2018-07-16 18:00:11 +0100	[diff] [blame]	468	const size_t kernel_storage_size = transform_weights_kernel->get_weight_storage_size(out_channels,
Anthony Barbier	e155337	2018-07-16 18:53:52 +0100	[diff] [blame]	469	in_channels)
Georgios Pinitas	7179837	2019-04-17 13:01:54 +0100	[diff] [blame]	470	* data_type_size;
Georgios Pinitas	7221933	2018-06-05 14:56:06 +0100	[diff] [blame]	471
Pablo Tello	9ceebbe	2018-01-10 16:44:13 +0000	[diff] [blame]	472	// Input storage
Anthony Barbier	578225e	2018-07-16 18:00:11 +0100	[diff] [blame]	473	const size_t input_storage_size = transform_input_kernel->get_input_storage_size(in_shape.n_batches, in_shape.n_channels, in_shape.n_rows, in_shape.n_cols,
Anthony Barbier	e155337	2018-07-16 18:53:52 +0100	[diff] [blame]	474	use_same_padding)
Georgios Pinitas	7179837	2019-04-17 13:01:54 +0100	[diff] [blame]	475	* data_type_size;
Pablo Tello	8951933	2017-11-17 11:52:36 +0000	[diff] [blame]	476
Pablo Tello	9ceebbe	2018-01-10 16:44:13 +0000	[diff] [blame]	477	// Output storage
Pablo Tello	5264b7d	2019-10-21 14:25:41 +0100	[diff] [blame]	478	const size_t output_storage_size = transform_output_kernel->get_output_storage_size(in_shape.n_batches, in_shape.n_rows, in_shape.n_cols, out_channels) * data_type_size;
				479	const int kernel_matrix_stride = transform_weights_kernel->get_matrix_stride(out_channels, in_channels);
				480	const int output_matrix_stride = transform_output_kernel->get_matrix_stride(in_shape.n_batches, in_shape.n_rows, in_shape.n_cols, out_channels);
				481	const auto output_shape = transform_output_kernel->get_output_shape(in_shape.n_rows, in_shape.n_cols, use_padding_type == PADDING_SAME);
				482	const int input_matrix_stride = transform_input_kernel->get_matrix_stride(in_shape.n_batches, in_channels, in_shape.n_rows, in_shape.n_cols, use_padding_type == PADDING_SAME);
Anthony Barbier	578225e	2018-07-16 18:00:11 +0100	[diff] [blame]	483
				484	// Configure GEMM
Pablo Tello	5264b7d	2019-10-21 14:25:41 +0100	[diff] [blame]	485	const int tile_rows = iceildiv(output_shape.first, output_tile.height);
				486	const int tile_cols = iceildiv(output_shape.second, output_tile.width);
Anthony Barbier	578225e	2018-07-16 18:00:11 +0100	[diff] [blame]	487	const int m = in_shape.n_batches * tile_rows * tile_cols;
				488	const int k = in_shape.n_channels;
				489	const int n = out_channels;
				490	const int kernel_matrix_row_stride = roundup(out_channels, N_BLOCK);
				491	const int output_matrix_row_stride = kernel_matrix_row_stride;
				492
				493	TensorShape a_shape(k, m, 1, n_gemms);
Anthony Barbier	e155337	2018-07-16 18:53:52 +0100	[diff] [blame]	494	Strides a_strides(data_type_size);
Anthony Barbier	578225e	2018-07-16 18:00:11 +0100	[diff] [blame]	495	a_strides.set(1, a_strides[0] * k);
Anthony Barbier	e155337	2018-07-16 18:53:52 +0100	[diff] [blame]	496	//a_strides.set(2, data_type_size * input_matrix_stride / n_gemms); FIXME: This is the real batch size, but RSH's code crashes if it's not 0.
Anthony Barbier	578225e	2018-07-16 18:00:11 +0100	[diff] [blame]	497	a_strides.set(2, 0);
Anthony Barbier	e155337	2018-07-16 18:53:52 +0100	[diff] [blame]	498	a_strides.set(3, data_type_size * input_matrix_stride);
Anthony Barbier	578225e	2018-07-16 18:00:11 +0100	[diff] [blame]	499
				500	TensorShape b_shape(n, k, n_gemms);
Anthony Barbier	e155337	2018-07-16 18:53:52 +0100	[diff] [blame]	501	Strides b_strides(data_type_size);
				502	b_strides.set(1, data_type_size * kernel_matrix_row_stride);
				503	b_strides.set(2, data_type_size * kernel_matrix_stride);
Anthony Barbier	578225e	2018-07-16 18:00:11 +0100	[diff] [blame]	504
				505	TensorShape d_shape(n, m, 1, n_gemms);
Anthony Barbier	e155337	2018-07-16 18:53:52 +0100	[diff] [blame]	506	Strides d_strides(data_type_size);
				507	d_strides.set(1, data_type_size * output_matrix_row_stride);
				508	//d_strides.set(2, data_type_size * output_matrix_stride / n_gemms); FIXME: This is the real batch size, but RSH's code crashes if it's not 0.
Anthony Barbier	578225e	2018-07-16 18:00:11 +0100	[diff] [blame]	509	d_strides.set(2, 0);
Anthony Barbier	e155337	2018-07-16 18:53:52 +0100	[diff] [blame]	510	d_strides.set(3, data_type_size * output_matrix_stride);
Anthony Barbier	578225e	2018-07-16 18:00:11 +0100	[diff] [blame]	511
Michalis Spyrou	a4f378d	2019-04-26 14:54:54 +0100	[diff] [blame]	512	TensorInfo a_info{};
				513	TensorInfo b_info{};
				514	TensorInfo d_info{};
Anthony Barbier	e155337	2018-07-16 18:53:52 +0100	[diff] [blame]	515	a_info.init(a_shape, 1, data_type, a_strides, 0, input_storage_size);
				516	b_info.init(b_shape, 1, data_type, b_strides, 0, kernel_storage_size);
				517	d_info.init(d_shape, 1, data_type, d_strides, 0, output_storage_size);
Anthony Barbier	578225e	2018-07-16 18:00:11 +0100	[diff] [blame]	518
Pablo Tello	8f43d74	2019-03-27 09:28:32 +0000	[diff] [blame]	519	_input_transformed.allocator()->init(a_info, storage_alignment);
Anthony Barbier	578225e	2018-07-16 18:00:11 +0100	[diff] [blame]	520	_kernel_storage.allocator()->init(b_info, storage_alignment);
Pablo Tello	8f43d74	2019-03-27 09:28:32 +0000	[diff] [blame]	521	_output_transformed.allocator()->init(d_info, storage_alignment);
Pablo Tello	8951933	2017-11-17 11:52:36 +0000	[diff] [blame]	522
Pablo Tello	9ceebbe	2018-01-10 16:44:13 +0000	[diff] [blame]	523	// configure and allocate dst tensor to be used to convert from winograd domain to spatial domain when calling to reshape_output()
				524	TensorInfo info(TensorShape(_output->info()->dimension(2), _output->info()->dimension(0),
				525	_output->info()->dimension(1), _output->info()->dimension(3)),
				526	1, _output->info()->data_type());
				527	_output_nhwc.allocator()->init(info);
Pablo Tello	02541fb	2017-12-15 09:48:59 +0000	[diff] [blame]	528
Georgios Pinitas	7179837	2019-04-17 13:01:54 +0100	[diff] [blame]	529	const ITensor *input_to_use = _input;
				530	ITensor *output_to_use = _output;
				531	PermutationVector weights_permutation_vector(3U, 0U, 1U, 2U);
				532	const unsigned int max_num_threads = NEScheduler::get().num_threads();
Pablo Tello	f718ce2	2018-10-29 13:13:23 +0000	[diff] [blame]	533
Georgios Pinitas	7179837	2019-04-17 13:01:54 +0100	[diff] [blame]	534	// Configure the kernel to transform the input tensor from NCHW -> NHWC
Manuel Bottini	ca62c6f	2021-03-23 11:50:34 +0000	[diff] [blame]	535	if(_data_layout == DataLayout::NCHW)
Pablo Tello	7df2786	2018-05-30 11:44:26 +0100	[diff] [blame]	536	{
Georgios Pinitas	7179837	2019-04-17 13:01:54 +0100	[diff] [blame]	537	_memory_group.manage(&_input_nhwc);
Pablo Tello	7df2786	2018-05-30 11:44:26 +0100	[diff] [blame]	538	_permute_input.configure(input, &_input_nhwc, PermutationVector(2U, 0U, 1U));
Georgios Pinitas	7179837	2019-04-17 13:01:54 +0100	[diff] [blame]	539	input_to_use = &_input_nhwc;
				540	weights_permutation_vector = PermutationVector(3U, 2U, 0U, 1U);
Pablo Tello	7df2786	2018-05-30 11:44:26 +0100	[diff] [blame]	541	}
Pablo Tello	9ceebbe	2018-01-10 16:44:13 +0000	[diff] [blame]	542
Georgios Pinitas	7179837	2019-04-17 13:01:54 +0100	[diff] [blame]	543	// Configure input transform kernel
				544	_memory_group.manage(&_input_transformed);
				545	_memory_group.manage(&_input_workspace);
				546	transform_input_kernel->configure(input_to_use, in_shape.n_batches, in_shape.n_rows, in_shape.n_cols, in_shape.n_channels, use_padding_type,
				547	&_input_transformed, input_matrix_stride, &_input_workspace);
				548	const size_t input_workspace_size = transform_input_kernel->get_working_space_size(max_num_threads);
				549	TensorInfo input_workspace_info(TensorShape(input_workspace_size), 1, _input->info()->data_type());
Pablo Tello	8f43d74	2019-03-27 09:28:32 +0000	[diff] [blame]	550	_input_workspace.allocator()->init(input_workspace_info);
Georgios Pinitas	7179837	2019-04-17 13:01:54 +0100	[diff] [blame]	551	_input_workspace.allocator()->allocate();
Manuel Bottini	ca62c6f	2021-03-23 11:50:34 +0000	[diff] [blame]	552	if(_data_layout == DataLayout::NCHW)
Georgios Pinitas	7179837	2019-04-17 13:01:54 +0100	[diff] [blame]	553	{
				554	_input_nhwc.allocator()->allocate();
				555	}
Pablo Tello	8f43d74	2019-03-27 09:28:32 +0000	[diff] [blame]	556
Georgios Pinitas	7179837	2019-04-17 13:01:54 +0100	[diff] [blame]	557	// Re-order a weight tensor from [Output feature map x Input feature map x Height x Width] to [Height x Width x Input feature map x Output feature map]
				558	_permute_weights.configure(weights, &_weights_hwio, weights_permutation_vector);
				559	transform_weights_kernel->configure(&_weights_hwio, &_kernel_storage, kernel_matrix_stride, out_channels, in_channels);
Pablo Tello	8f43d74	2019-03-27 09:28:32 +0000	[diff] [blame]	560
Georgios Pinitas	7179837	2019-04-17 13:01:54 +0100	[diff] [blame]	561	// Configure GEMM function
				562	_memory_group.manage(&_output_transformed);
Pablo Tello	8f43d74	2019-03-27 09:28:32 +0000	[diff] [blame]	563	_gemm_function.configure(&_input_transformed, &_kernel_storage, nullptr, &_output_transformed, 1.0f, 0.f);
				564	_input_transformed.allocator()->allocate();
Georgios Pinitas	7179837	2019-04-17 13:01:54 +0100	[diff] [blame]	565
				566	// Configure output transform function
				567	// The biases tensor has not been allocated at this point in time, the output transform will add the biases to the final result in the run() method
Manuel Bottini	ca62c6f	2021-03-23 11:50:34 +0000	[diff] [blame]	568	if(_data_layout == DataLayout::NCHW)
Georgios Pinitas	7179837	2019-04-17 13:01:54 +0100	[diff] [blame]	569	{
				570	_memory_group.manage(&_output_nhwc);
				571	output_to_use = &_output_nhwc;
				572	}
Matthew Bentham	9204646	2020-03-07 22:15:55 +0000	[diff] [blame]	573	const arm_gemm::Activation activation = arm_gemm_activation_from_acl_activation(act_info);
Pablo Tello	5264b7d	2019-10-21 14:25:41 +0100	[diff] [blame]	574
				575	transform_output_kernel->configure(biases,
				576	&_output_transformed,
				577	output_matrix_stride,
				578	output_to_use,
				579	in_shape.n_batches,
				580	output_shape.first,
				581	output_shape.second,
				582	out_channels,
				583	&_output_workspace,
				584	activation);
				585
Georgios Pinitas	7179837	2019-04-17 13:01:54 +0100	[diff] [blame]	586	const size_t output_workspace_size = transform_output_kernel->get_working_space_size(max_num_threads);
				587	TensorInfo output_workspace_info(TensorShape(output_workspace_size), 1, _output->info()->data_type());
				588	_output_workspace.allocator()->init(output_workspace_info);
Anthony Barbier	20394d5	2018-08-02 11:29:09 +0100	[diff] [blame]	589	_output_workspace.allocator()->allocate();
Georgios Pinitas	7179837	2019-04-17 13:01:54 +0100	[diff] [blame]	590	_output_transformed.allocator()->allocate();
Pablo Tello	52140b4	2018-01-30 14:48:11 +0000	[diff] [blame]	591
Pablo Tello	9ceebbe	2018-01-10 16:44:13 +0000	[diff] [blame]	592	// Reorder the convoluted output to ACL's ordering NCHW
Manuel Bottini	ca62c6f	2021-03-23 11:50:34 +0000	[diff] [blame]	593	if(_data_layout == DataLayout::NCHW)
Georgios Pinitas	ca1250d	2018-11-22 19:38:27 +0000	[diff] [blame]	594	{
				595	_permute_output.configure(&_output_nhwc, _output, PermutationVector(1U, 2U, 0U));
				596	_output_nhwc.allocator()->allocate();
				597	}
Anthony Barbier	20394d5	2018-08-02 11:29:09 +0100	[diff] [blame]	598
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	599	_transform_input_kernel = std::move(transform_input_kernel);
				600	_transform_weights_kernel = std::move(transform_weights_kernel);
				601	_transform_output_kernel = std::move(transform_output_kernel);
Isabella Gottardi	3f217ec	2018-02-12 14:59:19 +0000	[diff] [blame]	602
				603	//Configure Activation Layer
Matthew Bentham	9204646	2020-03-07 22:15:55 +0000	[diff] [blame]	604	_is_activationlayer_enabled = act_info.enabled() && !fuse_function_supported(act_info);
Pablo Tello	7282d56	2018-06-14 15:35:49 +0100	[diff] [blame]	605	if(_is_activationlayer_enabled)
Isabella Gottardi	3f217ec	2018-02-12 14:59:19 +0000	[diff] [blame]	606	{
Pablo Tello	7df2786	2018-05-30 11:44:26 +0100	[diff] [blame]	607	_activationlayer_function.configure(_output, nullptr, act_info);
Isabella Gottardi	3f217ec	2018-02-12 14:59:19 +0000	[diff] [blame]	608	}
Pablo Tello	8951933	2017-11-17 11:52:36 +0000	[diff] [blame]	609	}
				610
Georgios Pinitas	9fb1159	2018-04-26 20:34:58 +0100	[diff] [blame]	611	void NEWinogradConvolutionLayer::run()
Pablo Tello	8951933	2017-11-17 11:52:36 +0000	[diff] [blame]	612	{
Georgios Pinitas	7221933	2018-06-05 14:56:06 +0100	[diff] [blame]	613	prepare();
				614
Georgios Pinitas	da953f2	2019-04-02 17:27:03 +0100	[diff] [blame]	615	MemoryGroupResourceScope scope_mg(_memory_group);
Pablo Tello	679463a	2018-02-06 11:47:59 +0000	[diff] [blame]	616
Manuel Bottini	ca62c6f	2021-03-23 11:50:34 +0000	[diff] [blame]	617	if(_data_layout == DataLayout::NCHW)
Pablo Tello	7df2786	2018-05-30 11:44:26 +0100	[diff] [blame]	618	{
				619	//Bring channels to the front as Winograd code expects the tensor to be in the format NHWC
				620	_permute_input.run();
				621	}
Pablo Tello	bda6e4b	2018-08-22 11:40:33 +0100	[diff] [blame]	622
Pablo Tello	9ceebbe	2018-01-10 16:44:13 +0000	[diff] [blame]	623	// Transform input tensor to the winograd domain
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	624	NEScheduler::get().schedule(_transform_input_kernel.get(), Window::DimX);
Pablo Tello	d6ca478	2018-01-23 09:36:04 +0000	[diff] [blame]	625
Pablo Tello	8951933	2017-11-17 11:52:36 +0000	[diff] [blame]	626	//Run 16 GEMMs in multiple threads, each kernel runs one or more GEMMs
Pablo Tello	a518f30	2018-09-19 11:33:03 +0100	[diff] [blame]	627	_gemm_function.run();
Georgios Pinitas	7179837	2019-04-17 13:01:54 +0100	[diff] [blame]	628
Pablo Tello	9ceebbe	2018-01-10 16:44:13 +0000	[diff] [blame]	629	// Transform output tensor to the spatial domain
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	630	NEScheduler::get().schedule(_transform_output_kernel.get(), Window::DimX);
Pablo Tello	d6ca478	2018-01-23 09:36:04 +0000	[diff] [blame]	631
Manuel Bottini	ca62c6f	2021-03-23 11:50:34 +0000	[diff] [blame]	632	if(_data_layout == DataLayout::NCHW)
Pablo Tello	7df2786	2018-05-30 11:44:26 +0100	[diff] [blame]	633	{
				634	// Reorder the convoluted output to ACL's ordering NCHW
				635	_permute_output.run();
				636	}
Isabella Gottardi	3f217ec	2018-02-12 14:59:19 +0000	[diff] [blame]	637
Matthew Bentham	9204646	2020-03-07 22:15:55 +0000	[diff] [blame]	638	if(_is_activationlayer_enabled)
Isabella Gottardi	3f217ec	2018-02-12 14:59:19 +0000	[diff] [blame]	639	{
				640	_activationlayer_function.run();
				641	}
Pablo Tello	8951933	2017-11-17 11:52:36 +0000	[diff] [blame]	642	}
Isabella Gottardi	6acc6ad	2018-02-02 17:19:18 +0000	[diff] [blame]	643
Georgios Pinitas	9fb1159	2018-04-26 20:34:58 +0100	[diff] [blame]	644	Status NEWinogradConvolutionLayer::validate(const ITensorInfo input, const ITensorInfo weights, const ITensorInfo biases, const ITensorInfo output, const PadStrideInfo &conv_info,
Giorgio Arena	a3221e6	2018-05-03 15:57:48 +0100	[diff] [blame]	645	const ActivationLayerInfo &act_info, bool enable_fast_math)
Isabella Gottardi	6acc6ad	2018-02-02 17:19:18 +0000	[diff] [blame]	646	{
Vidhya Sudhan Loganathan	cb0010b	2018-05-11 16:23:53 +0100	[diff] [blame]	647	ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, weights, output);
Vidhya Sudhan Loganathan	3ca9786	2018-04-23 08:20:04 +0100	[diff] [blame]	648	ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, weights, biases, output, conv_info));
Isabella Gottardi	6acc6ad	2018-02-02 17:19:18 +0000	[diff] [blame]	649
Vidhya Sudhan Loganathan	3ca9786	2018-04-23 08:20:04 +0100	[diff] [blame]	650	// Get indices for the width and height
				651	const size_t idx_width = get_data_layout_dimension_index(input->data_layout(), DataLayoutDimension::WIDTH);
				652	const size_t idx_height = get_data_layout_dimension_index(input->data_layout(), DataLayoutDimension::HEIGHT);
				653
Giorgio Arena	a3221e6	2018-05-03 15:57:48 +0100	[diff] [blame]	654	// Input shape, kernel size and output tile
Georgios Pinitas	5ce897f	2020-04-29 11:44:10 +0100	[diff] [blame]	655	const Size2D input_dims = Size2D(input->dimension(idx_width), input->dimension(idx_height));
				656	const Size2D kernel_size = Size2D(weights->dimension(idx_width), weights->dimension(idx_height));
				657	const DataType data_type = input->data_type();
				658	const Size2D output_tile = winograd_output_tile(input_dims, kernel_size, data_type);
Vidhya Sudhan Loganathan	3ca9786	2018-04-23 08:20:04 +0100	[diff] [blame]	659
Giorgio Arena	a3221e6	2018-05-03 15:57:48 +0100	[diff] [blame]	660	// Check if the Winograd configuration requires fast math
				661	if(!enable_fast_math)
				662	{
Georgios Pinitas	5ce897f	2020-04-29 11:44:10 +0100	[diff] [blame]	663	ARM_COMPUTE_RETURN_ERROR_ON_MSG(check_support_fast_math(output_tile, kernel_size, data_type),
				664	"This Winograd configuration requires enable_fast_math=true");
Giorgio Arena	a3221e6	2018-05-03 15:57:48 +0100	[diff] [blame]	665	}
Vidhya Sudhan Loganathan	cb0010b	2018-05-11 16:23:53 +0100	[diff] [blame]	666
				667	const WinogradInfo winograd_info = WinogradInfo(output_tile,
Giorgio Arena	a3221e6	2018-05-03 15:57:48 +0100	[diff] [blame]	668	kernel_size,
				669	input_dims,
Vidhya Sudhan Loganathan	84ce1f9	2018-04-25 13:00:09 +0100	[diff] [blame]	670	conv_info,
				671	input->data_layout());
Vidhya Sudhan Loganathan	3ca9786	2018-04-23 08:20:04 +0100	[diff] [blame]	672
				673	// Validate input transform
Vidhya Sudhan Loganathan	84ce1f9	2018-04-25 13:00:09 +0100	[diff] [blame]	674	const TensorShape input0_shape = misc::shape_calculator::compute_winograd_input_transform_shape(*input, winograd_info);
Vidhya Sudhan Loganathan	3ca9786	2018-04-23 08:20:04 +0100	[diff] [blame]	675	const TensorInfo input0 = input->clone()->set_tensor_shape(input0_shape);
Pablo Tello	000d33a	2018-09-03 16:59:20 +0100	[diff] [blame]	676	// Validate filter transform
				677	const TensorShape input1_shape = misc::shape_calculator::compute_winograd_filter_transform_shape(*weights, winograd_info);
				678	const TensorInfo input1 = weights->clone()->set_tensor_shape(input1_shape);
				679	// Validate batched matrix multiply
				680	TensorShape batched_mm_output_shape = input0.tensor_shape();
				681	batched_mm_output_shape[0] = input1.tensor_shape()[0];
				682	const TensorInfo batched_mm_output = input0.clone()->set_tensor_shape(batched_mm_output_shape);
Pablo Tello	7282d56	2018-06-14 15:35:49 +0100	[diff] [blame]	683
Pablo Tello	000d33a	2018-09-03 16:59:20 +0100	[diff] [blame]	684	if(kernel_size == Size2D(3, 3))
Vidhya Sudhan Loganathan	3ca9786	2018-04-23 08:20:04 +0100	[diff] [blame]	685	{
Pablo Tello	fe4b05f	2018-09-24 16:28:25 +0100	[diff] [blame]	686	ARM_COMPUTE_RETURN_ERROR_ON_MSG(conv_info.pad_top() != 0u && conv_info.pad_top() != 1, "Only SAME or VALID padding supported");
				687	ARM_COMPUTE_RETURN_ERROR_ON_MSG(conv_info.pad_bottom() != 0u && conv_info.pad_bottom() != 1, "Only SAME or VALID padding supported");
				688	ARM_COMPUTE_RETURN_ERROR_ON_MSG(conv_info.pad_left() != 0u && conv_info.pad_left() != 1, "Only SAME or VALID padding supported");
				689	ARM_COMPUTE_RETURN_ERROR_ON_MSG(conv_info.pad_right() != 0u && conv_info.pad_right() != 1, "Only SAME or VALID padding supported");
				690	ARM_COMPUTE_RETURN_ERROR_ON_MSG(conv_info.pad_right() != conv_info.pad_left(), "Only SAME or VALID padding supported");
				691	ARM_COMPUTE_RETURN_ERROR_ON_MSG(conv_info.pad_top() != conv_info.pad_bottom(), "Only SAME or VALID padding supported");
				692	ARM_COMPUTE_RETURN_ERROR_ON_MSG(conv_info.pad_top() != conv_info.pad_left(), "Only SAME or VALID padding supported");
Pablo Tello	000d33a	2018-09-03 16:59:20 +0100	[diff] [blame]	693	return validate_kernel_3x3(input_dims, input, &input0, &input1, &batched_mm_output, weights, biases, output, winograd_info, act_info);
				694	}
				695	else if(kernel_size == Size2D(5, 5))
				696	{
Pablo Tello	fe4b05f	2018-09-24 16:28:25 +0100	[diff] [blame]	697	ARM_COMPUTE_RETURN_ERROR_ON_MSG(conv_info.pad_top() != 0u && conv_info.pad_top() != 2, "Only SAME or VALID padding supported");
				698	ARM_COMPUTE_RETURN_ERROR_ON_MSG(conv_info.pad_left() != 0u && conv_info.pad_left() != 2, "Only SAME or VALID padding supported");
				699	ARM_COMPUTE_RETURN_ERROR_ON_MSG(conv_info.pad_bottom() != 0u && conv_info.pad_bottom() != 2, "Only SAME or VALID padding supported");
				700	ARM_COMPUTE_RETURN_ERROR_ON_MSG(conv_info.pad_right() != 0u && conv_info.pad_right() != 2, "Only SAME or VALID padding supported");
				701	ARM_COMPUTE_RETURN_ERROR_ON_MSG(conv_info.pad_right() != conv_info.pad_left(), "Only SAME or VALID padding supported");
				702	ARM_COMPUTE_RETURN_ERROR_ON_MSG(conv_info.pad_top() != conv_info.pad_bottom(), "Only SAME or VALID padding supported");
				703	ARM_COMPUTE_RETURN_ERROR_ON_MSG(conv_info.pad_top() != conv_info.pad_left(), "Only SAME or VALID padding supported");
Pablo Tello	000d33a	2018-09-03 16:59:20 +0100	[diff] [blame]	704	return validate_kernel_5x5(input, &input0, &input1, &batched_mm_output, weights, biases, output, winograd_info, act_info);
				705	}
				706	if(kernel_size == Size2D(3, 1))
				707	{
Pablo Tello	fe4b05f	2018-09-24 16:28:25 +0100	[diff] [blame]	708	ARM_COMPUTE_RETURN_ERROR_ON_MSG(conv_info.pad_left() != 0u && conv_info.pad_left() != 1, "Only SAME or VALID padding supported");
				709	ARM_COMPUTE_RETURN_ERROR_ON_MSG(conv_info.pad_right() != 0u && conv_info.pad_right() != 1, "Only SAME or VALID padding supported");
				710	ARM_COMPUTE_RETURN_ERROR_ON_MSG(conv_info.pad_top() != 0u && conv_info.pad_bottom() != 0, "Only SAME or VALID padding supported");
Pablo Tello	000d33a	2018-09-03 16:59:20 +0100	[diff] [blame]	711	return validate_kernel_3x1(input, &input0, &input1, &batched_mm_output, weights, biases, output, winograd_info, act_info);
				712	}
				713	else if(kernel_size == Size2D(1, 3))
				714	{
Pablo Tello	fe4b05f	2018-09-24 16:28:25 +0100	[diff] [blame]	715	ARM_COMPUTE_RETURN_ERROR_ON_MSG(conv_info.pad_top() != 0u && conv_info.pad_top() != 1, "Only SAME or VALID padding supported");
				716	ARM_COMPUTE_RETURN_ERROR_ON_MSG(conv_info.pad_bottom() != 0u && conv_info.pad_bottom() != 1, "Only SAME or VALID padding supported");
				717	ARM_COMPUTE_RETURN_ERROR_ON_MSG(conv_info.pad_left() != 0u && conv_info.pad_right() != 0, "Only SAME or VALID padding supported");
Pablo Tello	000d33a	2018-09-03 16:59:20 +0100	[diff] [blame]	718	return validate_kernel_1x3(input, &input0, &input1, &batched_mm_output, weights, biases, output, winograd_info, act_info);
				719	}
				720	else if(kernel_size == Size2D(5, 1))
				721	{
Pablo Tello	fe4b05f	2018-09-24 16:28:25 +0100	[diff] [blame]	722	ARM_COMPUTE_RETURN_ERROR_ON_MSG(conv_info.pad_left() != 0u && conv_info.pad_left() != 2, "Only SAME or VALID padding supported");
				723	ARM_COMPUTE_RETURN_ERROR_ON_MSG(conv_info.pad_right() != 0u && conv_info.pad_right() != 2, "Only SAME or VALID padding supported");
				724	ARM_COMPUTE_RETURN_ERROR_ON_MSG(conv_info.pad_top() != 0u && conv_info.pad_bottom() != 0, "Only SAME or VALID padding supported");
Pablo Tello	000d33a	2018-09-03 16:59:20 +0100	[diff] [blame]	725	return validate_kernel_5x1(input, &input0, &input1, &batched_mm_output, weights, biases, output, winograd_info, act_info);
				726	}
				727	else if(kernel_size == Size2D(1, 5))
				728	{
Pablo Tello	fe4b05f	2018-09-24 16:28:25 +0100	[diff] [blame]	729	ARM_COMPUTE_RETURN_ERROR_ON_MSG(conv_info.pad_top() != 0u && conv_info.pad_top() != 2, "Only SAME or VALID padding supported");
				730	ARM_COMPUTE_RETURN_ERROR_ON_MSG(conv_info.pad_bottom() != 0u && conv_info.pad_bottom() != 2, "Only SAME or VALID padding supported");
				731	ARM_COMPUTE_RETURN_ERROR_ON_MSG(conv_info.pad_left() != 0u && conv_info.pad_right() != 0, "Only SAME or VALID padding supported");
Pablo Tello	000d33a	2018-09-03 16:59:20 +0100	[diff] [blame]	732	return validate_kernel_1x5(input, &input0, &input1, &batched_mm_output, weights, biases, output, winograd_info, act_info);
				733	}
				734	else if(kernel_size == Size2D(7, 1))
				735	{
Pablo Tello	fe4b05f	2018-09-24 16:28:25 +0100	[diff] [blame]	736	ARM_COMPUTE_RETURN_ERROR_ON_MSG(conv_info.pad_left() != 0u && conv_info.pad_left() != 3, "Only SAME or VALID padding supported");
				737	ARM_COMPUTE_RETURN_ERROR_ON_MSG(conv_info.pad_right() != 0u && conv_info.pad_right() != 3, "Only SAME or VALID padding supported");
				738	ARM_COMPUTE_RETURN_ERROR_ON_MSG(conv_info.pad_top() != 0u && conv_info.pad_bottom() != 0, "Only SAME or VALID padding supported");
Pablo Tello	000d33a	2018-09-03 16:59:20 +0100	[diff] [blame]	739	return validate_kernel_7x1(input, &input0, &input1, &batched_mm_output, weights, biases, output, winograd_info, act_info);
				740	}
				741	else if(kernel_size == Size2D(1, 7))
				742	{
Pablo Tello	fe4b05f	2018-09-24 16:28:25 +0100	[diff] [blame]	743	ARM_COMPUTE_RETURN_ERROR_ON_MSG(conv_info.pad_top() != 0u && conv_info.pad_top() != 3, "Only SAME or VALID padding supported");
				744	ARM_COMPUTE_RETURN_ERROR_ON_MSG(conv_info.pad_bottom() != 0u && conv_info.pad_bottom() != 3, "Only SAME or VALID padding supported");
				745	ARM_COMPUTE_RETURN_ERROR_ON_MSG(conv_info.pad_left() != 0u && conv_info.pad_right() != 0, "Only SAME or VALID padding supported");
Pablo Tello	000d33a	2018-09-03 16:59:20 +0100	[diff] [blame]	746	return validate_kernel_1x7(input, &input0, &input1, &batched_mm_output, weights, biases, output, winograd_info, act_info);
Vidhya Sudhan Loganathan	3ca9786	2018-04-23 08:20:04 +0100	[diff] [blame]	747	}
Pablo Tello	bda6e4b	2018-08-22 11:40:33 +0100	[diff] [blame]	748	else
Vidhya Sudhan Loganathan	3ca9786	2018-04-23 08:20:04 +0100	[diff] [blame]	749	{
Pablo Tello	000d33a	2018-09-03 16:59:20 +0100	[diff] [blame]	750	ARM_COMPUTE_RETURN_ERROR_MSG("Kernel shape not supported");
Vidhya Sudhan Loganathan	3ca9786	2018-04-23 08:20:04 +0100	[diff] [blame]	751	}
Isabella Gottardi	6acc6ad	2018-02-02 17:19:18 +0000	[diff] [blame]	752	}
				753
Georgios Pinitas	7221933	2018-06-05 14:56:06 +0100	[diff] [blame]	754	void NEWinogradConvolutionLayer::prepare()
				755	{
				756	if(!_is_prepared)
				757	{
				758	// Permute weights
Georgios Pinitas	ca1250d	2018-11-22 19:38:27 +0000	[diff] [blame]	759	_weights_hwio.allocator()->allocate();
Georgios Pinitas	7221933	2018-06-05 14:56:06 +0100	[diff] [blame]	760	_permute_weights.run();
				761	_weights->mark_as_unused();
				762
				763	// Transform weights
Georgios Pinitas	ca1250d	2018-11-22 19:38:27 +0000	[diff] [blame]	764	_kernel_storage.allocator()->allocate();
Georgios Pinitas	7221933	2018-06-05 14:56:06 +0100	[diff] [blame]	765	NEScheduler::get().schedule(_transform_weights_kernel.get(), Window::DimX);
Pablo Tello	bda6e4b	2018-08-22 11:40:33 +0100	[diff] [blame]	766	_weights_hwio.allocator()->free();
Georgios Pinitas	ddd79f5	2021-01-15 09:42:26 +0000	[diff] [blame]	767
				768	_gemm_function.prepare();
				769	if(!_kernel_storage.is_used())
				770	{
				771	_kernel_storage.allocator()->free();
				772	}
				773
Georgios Pinitas	7221933	2018-06-05 14:56:06 +0100	[diff] [blame]	774	_is_prepared = true;
				775	}
				776	}
Pablo Tello	8951933	2017-11-17 11:52:36 +0000	[diff] [blame]	777	} // namespace arm_compute