Blame - src/core/NEON/kernels/convolution/winograd/weight_transforms/cpp_fp32_1x4_1x5.cpp - ml/ComputeLibrary

blob: 47a85e306d8ab5552c3d56f2dcf3c45bdf3434bf [file] [log] [blame]

ramelg01	a1f7851	2022-06-29 16:28:10 +0100	[diff] [blame]	1	/*
Viet-Hoa Do	bb1ab05	2022-12-23 14:48:33 +0000	[diff] [blame]	2	* Copyright (c) 2022 Arm Limited.
ramelg01	a1f7851	2022-06-29 16:28:10 +0100	[diff] [blame]	3	*
				4	* SPDX-License-Identifier: MIT
				5	*
				6	* Permission is hereby granted, free of charge, to any person obtaining a copy
				7	* of this software and associated documentation files (the "Software"), to
				8	* deal in the Software without restriction, including without limitation the
				9	* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
				10	* sell copies of the Software, and to permit persons to whom the Software is
				11	* furnished to do so, subject to the following conditions:
				12	*
				13	* The above copyright notice and this permission notice shall be included in all
				14	* copies or substantial portions of the Software.
				15	*
				16	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
				17	* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
				18	* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
				19	* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
				20	* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
				21	* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
				22	* SOFTWARE.
				23	*/
				24
				25	#include <cstddef>
				26
				27	namespace arm_conv {
				28	namespace winograd {
				29	namespace weight_transform {
				30
				31	void cpp_fp32_1x4_1x5(
				32	unsigned int n_channels,
				33	const float *inptr,
				34	size_t, // ld_weight_row
				35	size_t ld_weight_col,
				36	float *outptr,
				37	size_t matrix_stride
				38	)
				39	{
				40	constexpr auto kernel_cols = 5u, inner_tile_cols = 8u;
				41
				42	// For each output channel
				43	for (; n_channels; n_channels--)
				44	{
				45	// Matrices used and computed in this kernel
				46	float w[kernel_cols], V[inner_tile_cols];
				47
				48	// Read weights
				49	for (auto j = 0u; j < kernel_cols; j++)
				50	{
				51	w[j] = (inptr + j ld_weight_col);
				52	}
				53
				54	// Compute V = w WT
				55	V[0] = (w[0]*-1) / 36;
				56	V[1] = (w[1]-1 + w[3]-1 + w[0]1 + w[2]1 + w[4]*1) / 48;
				57	V[2] = (w[0]1 + w[1]1 + w[2]1 + w[3]1 + w[4]*1) / 48;
				58	V[3] = (w[0]-1 + w[4]-16 + w[2]-4 + w[1]2 + w[3]*8) / 120;
				59	V[4] = (w[0]-1 + w[4]-16 + w[3]-8 + w[2]-4 + w[1]*-2) / 120;
				60	V[5] = (w[3]-27 + w[1]-3 + w[2]9 + w[4]81 + w[0]*1) / 720;
				61	V[6] = (w[1]3 + w[2]9 + w[3]27 + w[4]81 + w[0]*1) / 720;
				62	V[7] = (w[4]*1) / 1;
				63
				64	// Store the transformed weights
				65	for (auto j = 0u; j < inner_tile_cols; j++)
				66	{
				67	(outptr + jmatrix_stride) = V[j];
				68	}
				69
				70	inptr++;
				71	outptr++;
				72	}
				73	}
				74
				75	} // namespace weight_transform
				76	} // namespace winograd
				77	} // namespace arm_conv