Blame - src/core/NEON/kernels/scale/impl/SVE/fp16.cpp - ml/ComputeLibrary

blob: 99f08dbdf9f477d6fefcaa5433e7749115664ec2 [file] [log] [blame]

Sheri Zhang	23adc4c	2021-01-05 12:48:45 +0000	[diff] [blame]	1	/*
				2	* Copyright (c) 2021 Arm Limited.
				3	*
				4	* SPDX-License-Identifier: MIT
				5	*
				6	* Permission is hereby granted, free of charge, to any person obtaining a copy
				7	* of this software and associated documentation files (the "Software"), to
				8	* deal in the Software without restriction, including without limitation the
				9	* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
				10	* sell copies of the Software, and to permit persons to whom the Software is
				11	* furnished to do so, subject to the following conditions:
				12	*
				13	* The above copyright notice and this permission notice shall be included in all
				14	* copies or substantial portions of the Software.
				15	*
				16	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
				17	* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
				18	* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
				19	* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
				20	* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
				21	* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
				22	* SOFTWARE.
				23	*/
				24	#include "arm_compute/core/Helpers.h"
				25	#include "arm_compute/core/ITensorPack.h"
				26	#include "arm_compute/core/Window.h"
				27	#include "src/core/NEON/NEMath.h"
				28	#include "src/core/NEON/wrapper/wrapper.h"
Sheri Zhang	23adc4c	2021-01-05 12:48:45 +0000	[diff] [blame]	29	#include "src/core/helpers/ScaleHelpers.h"
				30	#include "src/core/utils/ScaleUtils.h"
				31	#include "support/Rounding.h"
				32
				33	#include <cmath>
				34	#include <cstddef>
				35
				36	#if defined(__ARM_FEATURE_SVE)
				37	#include <arm_sve.h>
				38
				39	namespace arm_compute
				40	{
				41	namespace
				42	{
				43	void fp16_sve_scale_nearest(const ITensor src, ITensor dst, const ITensor *offsets,
				44	float sampling_offset, bool align_corners, const Window &window)
				45	{
				46	const size_t in_stride_c = src->info()->dimension(0) + src->info()->padding().left + src->info()->padding().right;
				47	const size_t in_stride_w = src->info()->dimension(1) + src->info()->padding().top + src->info()->padding().bottom;
				48	const size_t in_stride_wc = in_stride_w * in_stride_c;
				49	const size_t in_dim_h = src->info()->dimension(2);
				50
				51	// Compute the ratio between source height and destination height
				52	const auto hr = scale_utils::calculate_resize_ratio(in_dim_h, dst->info()->dimension(2), align_corners);
				53	const auto window_start_x = static_cast<int32_t>(window.x().start());
				54	const auto window_end_x = static_cast<int32_t>(window.x().end());
				55
				56	Window win(window);
				57	win.set(Window::DimX, Window::Dimension(0, 1, 1));
				58	Iterator out(dst, win);
				59
				60	const uint8_t *in_ptr_start = src->buffer() + src->info()->offset_first_element_in_bytes();
				61	const unsigned int in_stride_bytes_hwc = src->info()->strides_in_bytes()[3];
				62
				63	execute_window_loop(win, [&](const Coordinates & id)
				64	{
				65	const int32_t offset = reinterpret_cast<const int32_t >(offsets->ptr_to_element(Coordinates(id.y(), id.z()))) * in_stride_c;
				66	const auto in_hi = static_cast<int>(align_corners ? utils::rounding::round_half_away_from_zero((id.z() + sampling_offset) * hr) : std::floor((id.z() + sampling_offset) * hr));
				67	const int offset_row = in_hi * in_stride_wc;
				68	const auto in_ptr = reinterpret_cast<const float16_t >(in_ptr_start + in_stride_bytes_hwc id[3]);
				69	const auto out_ptr = reinterpret_cast<float16_t *>(out.ptr());
				70
				71	// Compute S elements per iteration
				72	int x = window_start_x;
				73	svbool_t pg = svwhilelt_b16(x, window_end_x);
				74	do
				75	{
				76	// Store results
				77	svst1_f16(pg, out_ptr + x, svld1_f16(pg, in_ptr + offset + offset_row + x));
				78
				79	x += svcntw();
				80	pg = svwhilelt_b16(x, window_end_x);
				81	}
				82	while(svptest_any(svptrue_b16(), pg));
				83	},
				84	out);
				85	}
				86
				87	void fp16_sve_scale_bilinear(const ITensor src, ITensor dst, const ITensor offsets, const ITensor dx, const ITensor *dy,
				88	BorderMode border_mode, PixelValue constant_border_value, float sampling_offset,
				89	bool align_corners, const Window &window)
				90	{
				91	// Compute the ratio between source height and destination height
				92	const auto hr = scale_utils::calculate_resize_ratio(src->info()->dimension(2), dst->info()->dimension(2), align_corners);
				93
				94	Iterator out(dst, window);
				95	const int in_stride_c = src->info()->dimension(0) + src->info()->padding().left + src->info()->padding().right;
				96	const int in_dim_w = src->info()->dimension(1);
				97	const int in_dim_h = src->info()->dimension(2);
				98	const int in_stride_wc = in_stride_c * (in_dim_w + src->info()->padding().top + src->info()->padding().bottom);
				99
				100	// Don't increment in Y and Z direction for the input tensor
				101	// A pointer to the start of this plane is needed as base for the precomputed offsets
				102	Window win_in(window);
				103	win_in.set(Window::DimY, Window::Dimension(0, 0, 0));
				104	win_in.set(Window::DimZ, Window::Dimension(0, 0, 0));
				105	Iterator in(src, win_in);
				106
				107	if(border_mode == BorderMode::CONSTANT)
				108	{
				109	using ConstType = typename std::conditional<std::is_same<float16_t, float16_t>::value, half, float16_t>::type;
				110
				111	const float16_t const_border_value = static_cast<float16_t>(constant_border_value.get<ConstType>());
				112	execute_window_loop(window, [&](const Coordinates & id)
				113	{
				114	const auto offset = reinterpret_cast<const int32_t >(offsets->ptr_to_element(Coordinates(id.y(), id.z())));
				115	const auto dx_val = reinterpret_cast<const float >(dx->ptr_to_element(Coordinates(id.y(), id.z())));
				116	const auto dy_val = reinterpret_cast<const float >(dy->ptr_to_element(Coordinates(id.y(), id.z())));
				117	const int32_t in_hi = std::floor((id.z() + sampling_offset) * hr - sampling_offset);
				118	const float16_t in_ptr = reinterpret_cast<const float16_t >(in.ptr()) + offset * in_stride_c + in_hi * in_stride_wc;
				119
				120	const auto a00 = (0 <= offset && offset < in_dim_w && 0 <= in_hi && in_hi < in_dim_h) ? *in_ptr : const_border_value;
				121	const auto a01 = (-1 <= offset && offset < in_dim_w - 1 && 0 <= in_hi && in_hi < in_dim_h) ? *(in_ptr + in_stride_c) : const_border_value;
				122	const auto a10 = (0 <= offset && offset < in_dim_w && -1 <= in_hi && in_hi < in_dim_h - 1) ? *(in_ptr + in_stride_wc) : const_border_value;
				123	const auto a11 = (-1 <= offset && offset < in_dim_w - 1 && -1 <= in_hi && in_hi < in_dim_h - 1) ? *(in_ptr + in_stride_c + in_stride_wc) : const_border_value;
				124
				125	reinterpret_cast<float16_t >(out.ptr()) = static_cast<float16_t>(scale_helpers::delta_bilinear(a00, a01, a10, a11, dx_val, dy_val));
				126	},
				127	in, out);
				128	}
				129	else if(border_mode == BorderMode::REPLICATE)
				130	{
				131	execute_window_loop(window, [&](const Coordinates & id)
				132	{
				133	const auto offset = reinterpret_cast<const int32_t >(offsets->ptr_to_element(Coordinates(id.y(), id.z())));
				134	const auto dx_val = reinterpret_cast<const float >(dx->ptr_to_element(Coordinates(id.y(), id.z())));
				135	const auto dy_val = reinterpret_cast<const float >(dy->ptr_to_element(Coordinates(id.y(), id.z())));
				136	const int in_hi = std::floor((id.z() + sampling_offset) * hr - sampling_offset);
				137
				138	auto clamped_w = utility::clamp<int>(offset, 0, in_dim_w - 1);
				139	auto clamped_w1 = utility::clamp<int>(offset + 1, 0, in_dim_w - 1);
				140	auto clamped_h = utility::clamp<int>(in_hi, 0, in_dim_h - 1);
				141	auto clamped_h1 = utility::clamp<int>(in_hi + 1, 0, in_dim_h - 1);
				142
				143	const auto a00 = (reinterpret_cast<const float16_t >(in.ptr()) + clamped_w * in_stride_c + clamped_h * in_stride_wc);
				144	const auto a01 = (reinterpret_cast<const float16_t >(in.ptr()) + clamped_w1 * in_stride_c + clamped_h * in_stride_wc);
				145	const auto a10 = (reinterpret_cast<const float16_t >(in.ptr()) + clamped_w * in_stride_c + clamped_h1 * in_stride_wc);
				146	const auto a11 = (reinterpret_cast<const float16_t >(in.ptr()) + clamped_w1 * in_stride_c + clamped_h1 * in_stride_wc);
				147
				148	reinterpret_cast<float16_t >(out.ptr()) = static_cast<float16_t>(scale_helpers::delta_bilinear(a00, a01, a10, a11, dx_val, dy_val));
				149	},
				150	in, out);
				151	}
				152	else
				153	{
				154	ARM_COMPUTE_ERROR("Not implemented");
				155	}
				156	}
				157	}
				158	namespace cpu
				159	{
				160	void fp16_sve_scale(const ITensor src, ITensor dst, const ITensor offsets, const ITensor dx, const ITensor *dy,
				161	InterpolationPolicy policy, BorderMode border_mode, PixelValue constant_border_value, float sampling_offset,
				162	bool align_corners, const Window &window)
				163	{
				164	if(policy == InterpolationPolicy::BILINEAR)
				165	{
				166	fp16_sve_scale_bilinear(src, dst, offsets, dx, dy, border_mode, constant_border_value, sampling_offset, align_corners, window);
				167	}
				168	else if(policy == InterpolationPolicy::NEAREST_NEIGHBOR)
				169	{
				170	fp16_sve_scale_nearest(src, dst, offsets, sampling_offset, align_corners, window);
				171	}
				172	}
				173	} // namespace cpu
				174	} // namespace arm_compute
				175
				176	#endif // __ARM_FEATURE_SVE