Blame - src/backends/neon/workloads/NeonConvertFp16ToFp32Workload.cpp - ml/armnn

blob: ce6c785329c3aa6a24df6b8d465add2d6106095e [file] [log] [blame]

telsoa01	c577f2c	2018-08-31 09:22:23 +0100	[diff] [blame]	1	//
Teresa Charlin	588cbdf	2022-01-19 15:55:37 +0000	[diff] [blame]	2	// Copyright © 2017 Arm Ltd and Contributors. All rights reserved.
David Beck	ecb56cd	2018-09-05 12:52:57 +0100	[diff] [blame]	3	// SPDX-License-Identifier: MIT
telsoa01	c577f2c	2018-08-31 09:22:23 +0100	[diff] [blame]	4	//
				5
				6	#include "NeonConvertFp16ToFp32Workload.hpp"
Matteo Martincigh	e011d20	2019-11-28 11:35:47 +0000	[diff] [blame]	7
				8	#include <armnnUtils/FloatingPointConverter.hpp>
telsoa01	c577f2c	2018-08-31 09:22:23 +0100	[diff] [blame]	9
Aron Virginas-Tar	c9cc804	2018-11-01 16:15:57 +0000	[diff] [blame]	10	#include <Half.hpp>
Matteo Martincigh	e011d20	2019-11-28 11:35:47 +0000	[diff] [blame]	11
Aron Virginas-Tar	c9cc804	2018-11-01 16:15:57 +0000	[diff] [blame]	12	#include <backendsCommon/WorkloadUtils.hpp>
telsoa01	c577f2c	2018-08-31 09:22:23 +0100	[diff] [blame]	13
				14	namespace armnn
				15	{
				16
				17	NeonConvertFp16ToFp32Workload::NeonConvertFp16ToFp32Workload(const ConvertFp16ToFp32QueueDescriptor& descriptor,
				18	const WorkloadInfo& info)
				19	: Float16ToFloat32Workload<ConvertFp16ToFp32QueueDescriptor>(descriptor, info)
				20	{
				21	this->m_Data.ValidateInputsOutputs("NeonConvertFp16ToFp32Workload", 1, 1);
				22	GatherTensorHandlePairs(descriptor, m_TensorHandlePairs);
				23	}
				24
				25	void NeonConvertFp16ToFp32Workload::Execute() const
				26	{
Keith Davis	2d0679f	2021-08-05 11:35:00 +0100	[diff] [blame]	27	ARMNN_SCOPED_PROFILING_EVENT_NEON_GUID("NeonConvertFp16ToFp32Workload_Execute", this->GetGuid());
telsoa01	c577f2c	2018-08-31 09:22:23 +0100	[diff] [blame]	28
				29	auto convertFunc = [](uint8_t* dst, const uint8_t* src, size_t size)
				30	{
				31	auto input = reinterpret_cast<const Half*>(src);
				32	auto output = reinterpret_cast<float*>(dst);
				33	size_t numElements = size/2; // 2 bytes per fp16
				34	armnnUtils::FloatingPointConverter::ConvertFloat16To32(input, numElements, output);
				35	};
				36
				37	for (const auto& pair : m_TensorHandlePairs)
				38	{
				39	CopyTensorContentsGeneric(pair.first, pair.second, convertFunc);
				40	}
				41	}
				42
David Monahan	ec81999	2022-02-10 14:47:13 +0000	[diff] [blame^]	43	void NeonConvertFp16ToFp32Workload::ReplaceInputTensorHandle(ITensorHandle* tensorHandle, unsigned int slot)
				44	{
				45	ITensorHandle* backupHandle = this->m_Data.m_Inputs[slot];
				46	this->m_Data.m_Inputs[slot] = tensorHandle;
				47	try
				48	{
				49	Reconfigure();
				50	}
				51	catch(armnn::UnimplementedException& e)
				52	{
				53	// Cannot reconfigure, revert the slot back and throw the exception.
				54	this->m_Data.m_Inputs[slot] = backupHandle;
				55	throw e;
				56	}
				57	}
				58
				59	// Replace output tensor handle with the given TensorHandle
				60	void NeonConvertFp16ToFp32Workload::ReplaceOutputTensorHandle(ITensorHandle* tensorHandle, unsigned int slot)
				61	{
				62	ITensorHandle* backupHandle = this->m_Data.m_Inputs[slot];
				63	this->m_Data.m_Inputs[slot] = tensorHandle;
				64	try
				65	{
				66	Reconfigure();
				67	}
				68	catch(armnn::UnimplementedException& e)
				69	{
				70	// Cannot reconfigure, revert the slot back and throw the exception.
				71	this->m_Data.m_Inputs[slot] = backupHandle;
				72	throw e;
				73	}
				74	}
				75
				76	void NeonConvertFp16ToFp32Workload::Reconfigure()
				77	{
				78	throw armnn::UnimplementedException("Reconfigure not implemented for this workload");
				79	}
				80
telsoa01	c577f2c	2018-08-31 09:22:23 +0100	[diff] [blame]	81	} //namespace armnn