Blame - src/core/NEON/kernels/convolution/winograd/winograd_transforms/weights_6_3_fp32_fp32_integers.cpp - ml/ComputeLibrary

blob: 9b42224eaf2829449a62695fe76ab9686322d217 [file] [log] [blame]

Pablo Tello	8f43d74	2019-03-27 09:28:32 +0000	[diff] [blame]	1	/*
Michele Di Giorgio	d9eaf61	2020-07-08 11:12:57 +0100	[diff] [blame]	2	* Copyright (c) 2019 Arm Limited.
Pablo Tello	8f43d74	2019-03-27 09:28:32 +0000	[diff] [blame]	3	*
				4	* SPDX-License-Identifier: MIT
				5	*
				6	* Permission is hereby granted, free of charge, to any person obtaining a copy
				7	* of this software and associated documentation files (the "Software"), to
				8	* deal in the Software without restriction, including without limitation the
				9	* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
				10	* sell copies of the Software, and to permit persons to whom the Software is
				11	* furnished to do so, subject to the following conditions:
				12	*
				13	* The above copyright notice and this permission notice shall be included in all
				14	* copies or substantial portions of the Software.
				15	*
				16	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
				17	* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
				18	* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
				19	* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
				20	* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
				21	* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
				22	* SOFTWARE.
				23	*/
				24
				25	#include "arm.hpp"
				26	#include "kernel.hpp"
				27
				28	namespace winograd
				29	{
				30
				31	template <>
				32	void WeightTransform<1, 3, 1, 8, float, float, WinogradRoots::Integers>::execute(
				33	const int n_output_channels,
				34	const int n_input_channels,
				35	const float* const input, // NOTE: Data in HWIO order
				36	float* const output,
				37	const int matrix_stride,
				38	const int matrix_row_stride
				39	)
				40	{
				41	// Get pointers to each cell of the weight tensor
				42	const auto weight_col_stride = n_input_channels * n_output_channels;
				43	const float *inptrs[3];
				44	for (int j = 0; j < 3; j++)
				45	{
				46	inptrs[j] = input + j*weight_col_stride;
				47	}
				48
				49	// For each input channel
				50	for (int ic = 0; ic < n_input_channels; ic++)
				51	{
				52	float outptr = output + ic matrix_row_stride;
				53
				54	// For each output channel
				55	int channels_remaining = n_output_channels;
				56	for (; channels_remaining; channels_remaining--)
				57	{
				58	// Matrices used and computed in this kernel
				59	float w[3], V[inner_tile_cols];
				60
				61	// Read weights
				62	for (int j = 0; j < 3; j++)
				63	{
				64	w[j] = *(inptrs[j]++);
				65	}
				66
				67	// Compute V = w WT
				68	V[0] = (w[0]*-1) / 36.0f;
				69	V[1] = (w[1]-1 + w[0]1 + w[2]*1) / 48.0f;
				70	V[2] = (w[0]1 + w[1]1 + w[2]*1) / 48.0f;
				71	V[3] = (w[0]-1 + w[2]-4 + w[1]*2) / 120.0f;
				72	V[4] = (w[0]-1 + w[2]-4 + w[1]*-2) / 120.0f;
				73	V[5] = (w[1]-3 + w[2]9 + w[0]*1) / 720.0f;
				74	V[6] = (w[1]3 + w[2]9 + w[0]*1) / 720.0f;
				75	V[7] = (w[2]*1) / 1;
				76
				77	// Store the transformed weights
				78	for (int j = 0; j < inner_tile_cols; j++)
				79	{
				80	(outptr + jmatrix_stride) = V[j];
				81	}
				82	outptr++;
				83	}
				84	}
				85	}
				86
				87	template class WeightTransform<1, 3, 1, 8, float, float, WinogradRoots::Integers>;
				88	template class WeightTransform<3, 1, 8, 1, float, float, WinogradRoots::Integers>;
				89
				90	} // namespace