Blame - src/backends/neon/workloads/NeonConvertFp16ToFp32Workload.cpp - ml/armnn

blob: 3ec8e8b6ffd88e6caa1aa2cde04a73795854d652 [file] [log] [blame]

telsoa01	c577f2c	2018-08-31 09:22:23 +0100	[diff] [blame]	1	//
Mike Kelly	7cbe781	2023-07-25 17:37:33 +0100	[diff] [blame]	2	// Copyright © 2017-2019,2021-2023 Arm Ltd and Contributors. All rights reserved.
David Beck	ecb56cd	2018-09-05 12:52:57 +0100	[diff] [blame]	3	// SPDX-License-Identifier: MIT
telsoa01	c577f2c	2018-08-31 09:22:23 +0100	[diff] [blame]	4	//
				5
				6	#include "NeonConvertFp16ToFp32Workload.hpp"
Matteo Martincigh	e011d20	2019-11-28 11:35:47 +0000	[diff] [blame]	7
				8	#include <armnnUtils/FloatingPointConverter.hpp>
telsoa01	c577f2c	2018-08-31 09:22:23 +0100	[diff] [blame]	9
Aron Virginas-Tar	c9cc804	2018-11-01 16:15:57 +0000	[diff] [blame]	10	#include <Half.hpp>
Matteo Martincigh	e011d20	2019-11-28 11:35:47 +0000	[diff] [blame]	11
Aron Virginas-Tar	c9cc804	2018-11-01 16:15:57 +0000	[diff] [blame]	12	#include <backendsCommon/WorkloadUtils.hpp>
telsoa01	c577f2c	2018-08-31 09:22:23 +0100	[diff] [blame]	13
Matthew Bentham	34336f9	2023-04-27 12:13:50 +0000	[diff] [blame]	14	static constexpr arm_compute::ConvertPolicy g_AclConvertPolicy = arm_compute::ConvertPolicy::SATURATE;
				15
telsoa01	c577f2c	2018-08-31 09:22:23 +0100	[diff] [blame]	16	namespace armnn
				17	{
				18
Matthew Bentham	34336f9	2023-04-27 12:13:50 +0000	[diff] [blame]	19	arm_compute::Status NeonConvertFp16ToFp32WorkloadValidate(const TensorInfo& input, const TensorInfo& output)
				20	{
				21	// Fallback to portable software implementation if Compute Library NECast won't work, so
				22	// this method always returns success
				23
				24	armnn::IgnoreUnused(input);
				25	armnn::IgnoreUnused(output);
				26	return arm_compute::Status();
				27	}
				28
telsoa01	c577f2c	2018-08-31 09:22:23 +0100	[diff] [blame]	29	NeonConvertFp16ToFp32Workload::NeonConvertFp16ToFp32Workload(const ConvertFp16ToFp32QueueDescriptor& descriptor,
				30	const WorkloadInfo& info)
				31	: Float16ToFloat32Workload<ConvertFp16ToFp32QueueDescriptor>(descriptor, info)
				32	{
				33	this->m_Data.ValidateInputsOutputs("NeonConvertFp16ToFp32Workload", 1, 1);
Matthew Bentham	34336f9	2023-04-27 12:13:50 +0000	[diff] [blame]	34
				35	arm_compute::ITensor& input = PolymorphicDowncast<IAclTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
				36	arm_compute::ITensor& output = PolymorphicDowncast<IAclTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
				37
				38	if (arm_compute::NECast::validate(input.info(), output.info(), g_AclConvertPolicy))
				39	{
				40	// Use NECast if supported (needs hardware support for FP16)
				41	m_Cast.reset(new arm_compute::NECast());
				42	m_Cast->configure(&input, &output, g_AclConvertPolicy);
				43	}
				44	else
				45	{
				46	// Else use software implementation using Half.hpp
				47	GatherTensorHandlePairs(descriptor, m_TensorHandlePairs);
				48	}
telsoa01	c577f2c	2018-08-31 09:22:23 +0100	[diff] [blame]	49	}
				50
				51	void NeonConvertFp16ToFp32Workload::Execute() const
				52	{
Mike Kelly	7cbe781	2023-07-25 17:37:33 +0100	[diff] [blame]	53	ARMNN_SCOPED_PROFILING_EVENT_NEON_NAME_GUID("NeonConvertFp16ToFp32Workload_Execute");
telsoa01	c577f2c	2018-08-31 09:22:23 +0100	[diff] [blame]	54
Matthew Bentham	34336f9	2023-04-27 12:13:50 +0000	[diff] [blame]	55	if (m_Cast)
				56	{
				57	// Use NECast if supported and initialised
				58	m_Cast->run();
				59	}
				60	else
				61	{
				62	// Else use softare implementation using Half.hpp
				63	auto convertFunc = [](uint8_t* dst, const uint8_t* src, size_t size)
telsoa01	c577f2c	2018-08-31 09:22:23 +0100	[diff] [blame]	64	{
				65	auto input = reinterpret_cast<const Half*>(src);
				66	auto output = reinterpret_cast<float*>(dst);
				67	size_t numElements = size/2; // 2 bytes per fp16
				68	armnnUtils::FloatingPointConverter::ConvertFloat16To32(input, numElements, output);
				69	};
				70
Matthew Bentham	34336f9	2023-04-27 12:13:50 +0000	[diff] [blame]	71	for (const auto& pair : m_TensorHandlePairs)
				72	{
				73	CopyTensorContentsGeneric(pair.first, pair.second, convertFunc);
				74	}
telsoa01	c577f2c	2018-08-31 09:22:23 +0100	[diff] [blame]	75	}
				76	}
				77
David Monahan	ec81999	2022-02-10 14:47:13 +0000	[diff] [blame]	78	void NeonConvertFp16ToFp32Workload::ReplaceInputTensorHandle(ITensorHandle* tensorHandle, unsigned int slot)
				79	{
				80	ITensorHandle* backupHandle = this->m_Data.m_Inputs[slot];
				81	this->m_Data.m_Inputs[slot] = tensorHandle;
				82	try
				83	{
				84	Reconfigure();
				85	}
				86	catch(armnn::UnimplementedException& e)
				87	{
				88	// Cannot reconfigure, revert the slot back and throw the exception.
				89	this->m_Data.m_Inputs[slot] = backupHandle;
				90	throw e;
				91	}
				92	}
				93
				94	// Replace output tensor handle with the given TensorHandle
				95	void NeonConvertFp16ToFp32Workload::ReplaceOutputTensorHandle(ITensorHandle* tensorHandle, unsigned int slot)
				96	{
				97	ITensorHandle* backupHandle = this->m_Data.m_Inputs[slot];
				98	this->m_Data.m_Inputs[slot] = tensorHandle;
				99	try
				100	{
				101	Reconfigure();
				102	}
				103	catch(armnn::UnimplementedException& e)
				104	{
				105	// Cannot reconfigure, revert the slot back and throw the exception.
				106	this->m_Data.m_Inputs[slot] = backupHandle;
				107	throw e;
				108	}
				109	}
				110
				111	void NeonConvertFp16ToFp32Workload::Reconfigure()
				112	{
				113	throw armnn::UnimplementedException("Reconfigure not implemented for this workload");
				114	}
				115
telsoa01	c577f2c	2018-08-31 09:22:23 +0100	[diff] [blame]	116	} //namespace armnn