Blame - tests/validation/reference/Winograd.cpp - ml/ComputeLibrary

blob: ad0dcbd958dca9b0e574b1394fef830a8f957467 [file] [log] [blame]

Giorgio Arena	1f9ca1d	2018-03-01 11:13:45 +0000	[diff] [blame]	1	/*
				2	* Copyright (c) 2018 ARM Limited.
				3	*
				4	* SPDX-License-Identifier: MIT
				5	*
				6	* Permission is hereby granted, free of charge, to any person obtaining a copy
				7	* of this software and associated documentation files (the "Software"), to
				8	* deal in the Software without restriction, including without limitation the
				9	* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
				10	* sell copies of the Software, and to permit persons to whom the Software is
				11	* furnished to do so, subject to the following conditions:
				12	*
				13	* The above copyright notice and this permission notice shall be included in all
				14	* copies or substantial portions of the Software.
				15	*
				16	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
				17	* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
				18	* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
				19	* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
				20	* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
				21	* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
				22	* SOFTWARE.
				23	*/
				24	#include "Winograd.h"
				25
				26	#include "tests/validation/Helpers.h"
				27	#include "tests/validation/reference/Utils.h"
				28
Gian Marco Iodice	7e4b239	2018-02-22 16:17:20 +0000	[diff] [blame]	29	#include "arm_compute/core/Types.h"
				30
Giorgio Arena	1f9ca1d	2018-03-01 11:13:45 +0000	[diff] [blame]	31	namespace arm_compute
				32	{
				33	namespace test
				34	{
				35	namespace validation
				36	{
				37	namespace reference
				38	{
				39	namespace
				40	{
				41	template <typename T>
Giorgio Arena	2d9de0a	2018-03-15 17:58:20 +0000	[diff] [blame^]	42	void winograd_filter_transform3x3(const SimpleTensor<T> &in, SimpleTensor<T> &out, const Size2D &output_tile)
Gian Marco Iodice	7e4b239	2018-02-22 16:17:20 +0000	[diff] [blame]	43	{
Giorgio Arena	2d9de0a	2018-03-15 17:58:20 +0000	[diff] [blame^]	44	const bool is_2x2 = (output_tile.width == 2);
				45	const unsigned int transf_side = is_2x2 ? 4u : 6u;
				46
Gian Marco Iodice	7e4b239	2018-02-22 16:17:20 +0000	[diff] [blame]	47	// Simple tensor for the 3x3 input tile
				48	SimpleTensor<T> input_tile{ TensorShape(3u, 3u), in.data_type(), 1 };
				49
				50	// Simple tensor for the transformation matrix
Giorgio Arena	2d9de0a	2018-03-15 17:58:20 +0000	[diff] [blame^]	51	SimpleTensor<T> trans_matrix{ TensorShape(3u, transf_side), in.data_type(), 1 };
Gian Marco Iodice	7e4b239	2018-02-22 16:17:20 +0000	[diff] [blame]	52
				53	// Simple tensor for the transformation matrix transpose
Giorgio Arena	2d9de0a	2018-03-15 17:58:20 +0000	[diff] [blame^]	54	SimpleTensor<T> trans_matrix_transposed{ TensorShape(transf_side, 3u), in.data_type(), 1 };
Gian Marco Iodice	7e4b239	2018-02-22 16:17:20 +0000	[diff] [blame]	55
Giorgio Arena	2d9de0a	2018-03-15 17:58:20 +0000	[diff] [blame^]	56	// Simple tensor for the 3xSide temporary tile
				57	SimpleTensor<T> tmp_tile{ TensorShape(3u, transf_side), in.data_type(), 1 };
Gian Marco Iodice	7e4b239	2018-02-22 16:17:20 +0000	[diff] [blame]	58
Giorgio Arena	2d9de0a	2018-03-15 17:58:20 +0000	[diff] [blame^]	59	// Simple tensor for the SidexSide output tile
				60	SimpleTensor<T> transf_tile{ TensorShape(transf_side, transf_side), in.data_type(), 1 };
Gian Marco Iodice	7e4b239	2018-02-22 16:17:20 +0000	[diff] [blame]	61
Giorgio Arena	2d9de0a	2018-03-15 17:58:20 +0000	[diff] [blame^]	62	if(is_2x2)
				63	{
				64	// Initialize 3x4 transformation matrix
				65	// 1 \| 0 \| 0
				66	// 0.5 \| 0.5 \| 0.5
				67	// 0.5 \|-0.5 \| 0.5
				68	// 0 \| 0 \| 1
				69	trans_matrix[0 + 0 * 3] = 1.0f;
				70	trans_matrix[1 + 0 * 3] = 0.0f;
				71	trans_matrix[2 + 0 * 3] = 0.0f;
				72	trans_matrix[0 + 1 * 3] = 0.5f;
				73	trans_matrix[1 + 1 * 3] = 0.5f;
				74	trans_matrix[2 + 1 * 3] = 0.5f;
				75	trans_matrix[0 + 2 * 3] = 0.5f;
				76	trans_matrix[1 + 2 * 3] = -0.5f;
				77	trans_matrix[2 + 2 * 3] = 0.5f;
				78	trans_matrix[0 + 3 * 3] = 0.0f;
				79	trans_matrix[1 + 3 * 3] = 0.0f;
				80	trans_matrix[2 + 3 * 3] = 1.0f;
				81	}
				82	else
				83	{
				84	// Initialize 3x6 transformation matrix
				85	// 1/4 \| 0 \| 0
				86	// -1/6 \| -1/6 \| -1/6
				87	// -1/6 \| 1/6 \| -1/6
				88	// 1/24 \| 1/12 \| 1/6
				89	// 1/24 \| -1/12 \| 1/6
				90	// 0 \| 0 \| 1
				91	trans_matrix[0 + 0 * 3] = 1.0f / 4.0f;
				92	trans_matrix[1 + 0 * 3] = 0.0f;
				93	trans_matrix[2 + 0 * 3] = 0.0f;
				94	trans_matrix[0 + 1 * 3] = -1.0f / 6.0f;
				95	trans_matrix[1 + 1 * 3] = -1.0f / 6.0f;
				96	trans_matrix[2 + 1 * 3] = -1.0f / 6.0f;
				97	trans_matrix[0 + 2 * 3] = -1.0f / 6.0f;
				98	trans_matrix[1 + 2 * 3] = 1.0f / 6.0f;
				99	trans_matrix[2 + 2 * 3] = -1.0f / 6.0f;
				100	trans_matrix[0 + 3 * 3] = 1.0f / 24.0f;
				101	trans_matrix[1 + 3 * 3] = 1.0f / 12.0f;
				102	trans_matrix[2 + 3 * 3] = 1.0f / 6.0f;
				103	trans_matrix[0 + 4 * 3] = 1.0f / 24.0f;
				104	trans_matrix[1 + 4 * 3] = -1.0f / 12.0f;
				105	trans_matrix[2 + 4 * 3] = 1.0f / 6.0f;
				106	trans_matrix[0 + 5 * 3] = 0.0f;
				107	trans_matrix[1 + 5 * 3] = 0.0f;
				108	trans_matrix[2 + 5 * 3] = 1.0f;
				109	}
Gian Marco Iodice	7e4b239	2018-02-22 16:17:20 +0000	[diff] [blame]	110
				111	// Transpose the transformation matrix
				112	transpose_matrix(trans_matrix, trans_matrix_transposed);
				113
				114	const int num_channels = in.shape()[2];
				115	const int num_filters = in.shape()[3];
				116	const int num_batches = in.shape().total_size() / (9 * num_channels * num_filters);
				117
				118	for(int n = 0; n < num_batches; ++n)
				119	{
				120	for(int w = 0; w < num_filters; ++w)
				121	{
				122	for(int z = 0; z < num_channels; ++z)
				123	{
				124	// Load the 3x3 tile from the input tensor
				125	get_tile(in, input_tile, Coordinates(0, 0, z, w, n));
				126
				127	// First transformation
				128	matrix_multiply(trans_matrix, input_tile, tmp_tile);
				129
				130	// Second transformation
Giorgio Arena	2d9de0a	2018-03-15 17:58:20 +0000	[diff] [blame^]	131	matrix_multiply(tmp_tile, trans_matrix_transposed, transf_tile);
Gian Marco Iodice	7e4b239	2018-02-22 16:17:20 +0000	[diff] [blame]	132
				133	// Store the 4x4 output tile across the 16 channels
Giorgio Arena	2d9de0a	2018-03-15 17:58:20 +0000	[diff] [blame^]	134	const int output_offset = w + z * num_filters;
				135
				136	for(unsigned int out_h = 0, out_pos = 0; out_h < transf_side; ++out_h)
				137	{
				138	for(unsigned int out_w = 0; out_w < transf_side; ++out_w, ++out_pos)
				139	{
				140	out[output_offset + out_pos * num_filters * num_channels] = transf_tile[out_w + out_h * transf_side];
				141	}
				142	}
Gian Marco Iodice	7e4b239	2018-02-22 16:17:20 +0000	[diff] [blame]	143	}
				144	}
				145	}
				146	}
Gian Marco Iodice	d2fab73	2018-03-02 11:18:12 +0000	[diff] [blame]	147
				148	template <typename T>
				149	void winograd_input_transform3x3(const SimpleTensor<T> &src, SimpleTensor<T> &dst, const PadStrideInfo &conv_info)
				150	{
				151	TensorShape shape4x4(4u, 4u);
				152
				153	// Simple tensor for the 4x4 input tile
				154	SimpleTensor<T> src_tile{ shape4x4, src.data_type() };
				155
				156	// Simple tensor for the 4x4 temporary tile
				157	SimpleTensor<T> tmp_tile{ shape4x4, src.data_type() };
				158
				159	// Simple tensor for the 4x4 output tile
				160	SimpleTensor<T> dst_tile{ shape4x4, src.data_type() };
				161
				162	// Simple tensor for the transformation matrix
				163	SimpleTensor<T> matrix{ shape4x4, src.data_type() };
				164
				165	// Simple tensor for the transformation matrix transposed
				166	SimpleTensor<T> matrix_transposed{ shape4x4, src.data_type() };
				167
				168	const float matrix_values[] = { 1.f, 0.f, -1.f, 0.f,
				169	0.f, 1.f, 1.f, 0.f,
				170	0.f, -1.f, 1.f, 0.f,
				171	0.f, 1.f, 0.f, -1.f
				172	};
				173
				174	for(int i = 0; i < matrix.num_elements(); ++i)
				175	{
				176	matrix[i] = matrix_values[i];
				177	}
				178
				179	transpose_matrix(matrix, matrix_transposed);
				180
				181	const int in_w = src.shape().x();
				182	const int in_h = src.shape().y();
				183	const int in_d = src.shape().z();
				184	const int num_batches = src.shape().total_size() / (in_w * in_h * in_d);
				185	const int num_tiles_x = std::ceil((in_w - 2 + conv_info.pad_left() + conv_info.pad_right()) / 2.0f);
				186	const int num_tiles_y = std::ceil((in_h - 2 + conv_info.pad_top() + conv_info.pad_bottom()) / 2.0f);
				187
				188	ARM_COMPUTE_ERROR_ON((num_tiles_x * num_tiles_y) != static_cast<int>(dst.shape().y()));
				189
				190	for(int b = 0; b < num_batches; ++b)
				191	{
				192	for(int z = 0; z < in_d; ++z)
				193	{
				194	for(int y = 0; y < num_tiles_y; ++y)
				195	{
				196	for(int x = 0; x < num_tiles_x; ++x)
				197	{
				198	int xi = x * 2 - conv_info.pad_left();
				199	int yi = y * 2 - conv_info.pad_top();
				200
				201	// Get the 4x4 tile from the input tensor
				202	get_tile(src, src_tile, Coordinates(xi, yi, z, b));
				203
				204	// Compute the transformation
				205	matrix_multiply(matrix, src_tile, tmp_tile);
				206	matrix_multiply(tmp_tile, matrix_transposed, dst_tile);
				207
				208	// Store the 4x4 output tile across the 16 channels
				209	for(int i = 0; i < 16; ++i)
				210	{
				211	int xo = z;
				212	int yo = x + y * num_tiles_x;
				213	dst[coords2index(dst.shape(), Coordinates(xo, yo, i, b))] = dst_tile[i];
				214	}
				215	}
				216	}
				217	}
				218	}
				219	}
				220
				221	template <typename T>
				222	void winograd_output_transform3x3(const SimpleTensor<T> &in, SimpleTensor<T> &out, int num_tiles_x)
				223	{
				224	ARM_COMPUTE_ERROR_ON(in.shape()[2] != 16);
				225	ARM_COMPUTE_ERROR_ON(in.shape()[0] != out.shape()[2]);
				226
				227	// Simple tensor for the 3x3 input tile
				228	SimpleTensor<T> input_tile{ TensorShape(4u, 4u), in.data_type(), 1 };
				229
				230	// Simple tensor for the transformation matrix
				231	SimpleTensor<T> trans_matrix{ TensorShape(4u, 2u), in.data_type(), 1 };
				232
				233	// Simple tensor for the transformation matrix transpose
				234	SimpleTensor<T> trans_matrix_transposed{ TensorShape(2u, 4u), in.data_type(), 1 };
				235
				236	// Simple tensor for the 4x3 temporary tile
				237	SimpleTensor<T> tmp_tile{ TensorShape(4u, 2u), in.data_type(), 1 };
				238
				239	// Simple tensor for the 4x4 output tile
				240	SimpleTensor<T> output_tile{ TensorShape(2u, 2u), in.data_type(), 1 };
				241
				242	// Initialize transformation matrix
				243	// 1 \| 1 \| 1 \| 1
				244	// 0 \| 1 \| -1 \| -1
				245	trans_matrix[0 + 0 * 4] = 1.0f;
				246	trans_matrix[1 + 0 * 4] = 1.0f;
				247	trans_matrix[2 + 0 * 4] = 1.0f;
				248	trans_matrix[3 + 0 * 4] = 0.0f;
				249	trans_matrix[0 + 1 * 4] = 0.0f;
				250	trans_matrix[1 + 1 * 4] = 1.0f;
				251	trans_matrix[2 + 1 * 4] = -1.0f;
				252	trans_matrix[3 + 1 * 4] = -1.0f;
				253
				254	// Transpose the transformation matrix
				255	transpose_matrix(trans_matrix, trans_matrix_transposed);
				256
				257	const int w_in = in.shape()[0];
				258	const int h_in = in.shape()[1];
				259	const int c_in = in.shape()[2];
				260	const int w_out = out.shape()[0];
				261	const int h_out = out.shape()[1];
				262	const int c_out = out.shape()[2];
				263	const int num_batches = in.shape().total_size() / (w_in * h_in * c_in);
				264
				265	// Input strides
				266	const int stridey_in = w_in;
				267	const int stridez_in = stridey_in * h_in;
				268	const int stridew_in = stridez_in * c_in;
				269
				270	// Output strides
				271	const int stridey_out = w_out;
				272	const int stridez_out = stridey_out * h_out;
				273	const int stridew_out = stridez_out * c_out;
				274
				275	for(int n = 0; n < num_batches; ++n)
				276	{
				277	for(int y = 0; y < h_in; ++y)
				278	{
				279	for(int x = 0; x < w_in; ++x)
				280	{
				281	// Load the 4x4 tile across the 16 channels of the input tensor
				282	for(int z = 0; z < c_in; ++z)
				283	{
				284	input_tile[z] = in[x + (y * stridey_in) + (z * stridez_in) + (n * stridew_in)];
				285	}
				286
				287	// First transformation
				288	matrix_multiply(trans_matrix, input_tile, tmp_tile);
				289
				290	// Second transformation
				291	matrix_multiply(tmp_tile, trans_matrix_transposed, output_tile);
				292
				293	// Store the 2x2 output tile
				294	const int xo = (y % num_tiles_x) * 2;
				295	const int yo = (y / num_tiles_x) * 2;
				296	const int zo = x;
				297
				298	const int output_offset = xo + (yo * stridey_out) + (zo * stridez_out) + (n * stridew_out);
				299	out[output_offset + 0 * stridey_out + 0] = output_tile[0 + 0 * 2];
				300
				301	// Check out-of-bound writes
				302	if(xo + 1 < w_out)
				303	{
				304	out[output_offset + 0 * stridey_out + 1] = output_tile[1 + 0 * 2];
				305	}
				306
				307	if(yo + 1 < h_out)
				308	{
				309	out[output_offset + 1 * stridey_out + 0] = output_tile[0 + 1 * 2];
				310	}
				311
				312	if((yo + 1 < h_out) && (xo + 1 < w_out))
				313	{
				314	out[output_offset + 1 * stridey_out + 1] = output_tile[1 + 1 * 2];
				315	}
				316	}
				317	}
				318	}
				319	}
Giorgio Arena	1f9ca1d	2018-03-01 11:13:45 +0000	[diff] [blame]	320	} // namespace
				321
				322	template <typename T>
				323	SimpleTensor<T> winograd_input_transform(const SimpleTensor<T> &src, const TensorShape &dst_shape, const PadStrideInfo &conv_info, const Size2D &kernel_dims)
				324	{
				325	ARM_COMPUTE_ERROR_ON(kernel_dims.width != kernel_dims.height);
				326	ARM_COMPUTE_ERROR_ON(src.data_layout() != DataLayout::NCHW);
				327
				328	SimpleTensor<T> dst{ dst_shape, src.data_type() };
				329
				330	switch(kernel_dims.width)
				331	{
				332	case 3:
				333	winograd_input_transform3x3(src, dst, conv_info);
				334	break;
				335	default:
				336	ARM_COMPUTE_ERROR("Only 3x3 kernels are supported");
				337	}
				338
				339	return dst;
				340	}
				341
Gian Marco Iodice	7e4b239	2018-02-22 16:17:20 +0000	[diff] [blame]	342	template <typename T>
Giorgio Arena	2d9de0a	2018-03-15 17:58:20 +0000	[diff] [blame^]	343	SimpleTensor<T> winograd_filter_transform(const SimpleTensor<T> &in, const TensorShape &output_shape, const Size2D &output_tile)
Gian Marco Iodice	7e4b239	2018-02-22 16:17:20 +0000	[diff] [blame]	344	{
				345	ARM_COMPUTE_ERROR_ON_MSG(in.data_layout() != DataLayout::NCHW, "Only supported NCHW data format");
				346
				347	// Create reference
				348	SimpleTensor<T> out{ output_shape, in.data_type(), 1 };
				349
				350	switch(in.shape()[0])
				351	{
				352	case 3:
Giorgio Arena	2d9de0a	2018-03-15 17:58:20 +0000	[diff] [blame^]	353	winograd_filter_transform3x3(in, out, output_tile);
Gian Marco Iodice	7e4b239	2018-02-22 16:17:20 +0000	[diff] [blame]	354	break;
				355	default:
				356	ARM_COMPUTE_ERROR("Only supported 3x3 kernel");
				357	break;
				358	}
				359
				360	return out;
				361	}
				362
Gian Marco Iodice	d2fab73	2018-03-02 11:18:12 +0000	[diff] [blame]	363	template <typename T>
				364	SimpleTensor<T> winograd_output_transform(const SimpleTensor<T> &in, const TensorShape &output_shape, const Size2D &kernel_dims, const Size2D &num_tiles)
				365	{
				366	ARM_COMPUTE_ERROR_ON_MSG(in.data_layout() != DataLayout::NCHW, "Only supported NCHW data format");
				367	ARM_COMPUTE_ERROR_ON(kernel_dims.width != kernel_dims.height);
				368	ARM_COMPUTE_ERROR_ON(in.shape()[1] != num_tiles.area());
				369
				370	// Create reference
				371	SimpleTensor<T> out{ output_shape, in.data_type(), 1 };
				372
				373	switch(kernel_dims.width)
				374	{
				375	case 3:
				376	winograd_output_transform3x3(in, out, num_tiles.width);
				377	break;
				378	default:
				379	ARM_COMPUTE_ERROR("Only supported 3x3 kernel");
				380	break;
				381	}
				382
				383	return out;
				384	}
				385
Giorgio Arena	1f9ca1d	2018-03-01 11:13:45 +0000	[diff] [blame]	386	template SimpleTensor<float> winograd_input_transform(const SimpleTensor<float> &src, const TensorShape &dst_shape, const PadStrideInfo &conv_info, const Size2D &kernel_dims);
Giorgio Arena	2d9de0a	2018-03-15 17:58:20 +0000	[diff] [blame^]	387	template SimpleTensor<float> winograd_filter_transform(const SimpleTensor<float> &in, const TensorShape &output_shape, const Size2D &output_tile);
Gian Marco Iodice	d2fab73	2018-03-02 11:18:12 +0000	[diff] [blame]	388	template SimpleTensor<float> winograd_output_transform(const SimpleTensor<float> &in, const TensorShape &output_shape, const Size2D &kernel_dims, const Size2D &num_tiles);
Giorgio Arena	1f9ca1d	2018-03-01 11:13:45 +0000	[diff] [blame]	389	} // namespace reference
				390	} // namespace validation
				391	} // namespace test
				392	} // namespace arm_compute