blob: 3ec8e8b6ffd88e6caa1aa2cde04a73795854d652 [file] [log] [blame]
telsoa01c577f2c2018-08-31 09:22:23 +01001//
Mike Kelly7cbe7812023-07-25 17:37:33 +01002// Copyright © 2017-2019,2021-2023 Arm Ltd and Contributors. All rights reserved.
David Beckecb56cd2018-09-05 12:52:57 +01003// SPDX-License-Identifier: MIT
telsoa01c577f2c2018-08-31 09:22:23 +01004//
5
6#include "NeonConvertFp16ToFp32Workload.hpp"
Matteo Martincighe011d202019-11-28 11:35:47 +00007
8#include <armnnUtils/FloatingPointConverter.hpp>
telsoa01c577f2c2018-08-31 09:22:23 +01009
Aron Virginas-Tarc9cc8042018-11-01 16:15:57 +000010#include <Half.hpp>
Matteo Martincighe011d202019-11-28 11:35:47 +000011
Aron Virginas-Tarc9cc8042018-11-01 16:15:57 +000012#include <backendsCommon/WorkloadUtils.hpp>
telsoa01c577f2c2018-08-31 09:22:23 +010013
Matthew Bentham34336f92023-04-27 12:13:50 +000014static constexpr arm_compute::ConvertPolicy g_AclConvertPolicy = arm_compute::ConvertPolicy::SATURATE;
15
telsoa01c577f2c2018-08-31 09:22:23 +010016namespace armnn
17{
18
Matthew Bentham34336f92023-04-27 12:13:50 +000019arm_compute::Status NeonConvertFp16ToFp32WorkloadValidate(const TensorInfo& input, const TensorInfo& output)
20{
21 // Fallback to portable software implementation if Compute Library NECast won't work, so
22 // this method always returns success
23
24 armnn::IgnoreUnused(input);
25 armnn::IgnoreUnused(output);
26 return arm_compute::Status();
27}
28
telsoa01c577f2c2018-08-31 09:22:23 +010029NeonConvertFp16ToFp32Workload::NeonConvertFp16ToFp32Workload(const ConvertFp16ToFp32QueueDescriptor& descriptor,
30 const WorkloadInfo& info)
31 : Float16ToFloat32Workload<ConvertFp16ToFp32QueueDescriptor>(descriptor, info)
32{
33 this->m_Data.ValidateInputsOutputs("NeonConvertFp16ToFp32Workload", 1, 1);
Matthew Bentham34336f92023-04-27 12:13:50 +000034
35 arm_compute::ITensor& input = PolymorphicDowncast<IAclTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
36 arm_compute::ITensor& output = PolymorphicDowncast<IAclTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
37
38 if (arm_compute::NECast::validate(input.info(), output.info(), g_AclConvertPolicy))
39 {
40 // Use NECast if supported (needs hardware support for FP16)
41 m_Cast.reset(new arm_compute::NECast());
42 m_Cast->configure(&input, &output, g_AclConvertPolicy);
43 }
44 else
45 {
46 // Else use software implementation using Half.hpp
47 GatherTensorHandlePairs(descriptor, m_TensorHandlePairs);
48 }
telsoa01c577f2c2018-08-31 09:22:23 +010049}
50
51void NeonConvertFp16ToFp32Workload::Execute() const
52{
Mike Kelly7cbe7812023-07-25 17:37:33 +010053 ARMNN_SCOPED_PROFILING_EVENT_NEON_NAME_GUID("NeonConvertFp16ToFp32Workload_Execute");
telsoa01c577f2c2018-08-31 09:22:23 +010054
Matthew Bentham34336f92023-04-27 12:13:50 +000055 if (m_Cast)
56 {
57 // Use NECast if supported and initialised
58 m_Cast->run();
59 }
60 else
61 {
62 // Else use softare implementation using Half.hpp
63 auto convertFunc = [](uint8_t* dst, const uint8_t* src, size_t size)
telsoa01c577f2c2018-08-31 09:22:23 +010064 {
65 auto input = reinterpret_cast<const Half*>(src);
66 auto output = reinterpret_cast<float*>(dst);
67 size_t numElements = size/2; // 2 bytes per fp16
68 armnnUtils::FloatingPointConverter::ConvertFloat16To32(input, numElements, output);
69 };
70
Matthew Bentham34336f92023-04-27 12:13:50 +000071 for (const auto& pair : m_TensorHandlePairs)
72 {
73 CopyTensorContentsGeneric(pair.first, pair.second, convertFunc);
74 }
telsoa01c577f2c2018-08-31 09:22:23 +010075 }
76}
77
David Monahanec819992022-02-10 14:47:13 +000078void NeonConvertFp16ToFp32Workload::ReplaceInputTensorHandle(ITensorHandle* tensorHandle, unsigned int slot)
79{
80 ITensorHandle* backupHandle = this->m_Data.m_Inputs[slot];
81 this->m_Data.m_Inputs[slot] = tensorHandle;
82 try
83 {
84 Reconfigure();
85 }
86 catch(armnn::UnimplementedException& e)
87 {
88 // Cannot reconfigure, revert the slot back and throw the exception.
89 this->m_Data.m_Inputs[slot] = backupHandle;
90 throw e;
91 }
92}
93
94// Replace output tensor handle with the given TensorHandle
95void NeonConvertFp16ToFp32Workload::ReplaceOutputTensorHandle(ITensorHandle* tensorHandle, unsigned int slot)
96{
97 ITensorHandle* backupHandle = this->m_Data.m_Inputs[slot];
98 this->m_Data.m_Inputs[slot] = tensorHandle;
99 try
100 {
101 Reconfigure();
102 }
103 catch(armnn::UnimplementedException& e)
104 {
105 // Cannot reconfigure, revert the slot back and throw the exception.
106 this->m_Data.m_Inputs[slot] = backupHandle;
107 throw e;
108 }
109}
110
111void NeonConvertFp16ToFp32Workload::Reconfigure()
112{
113 throw armnn::UnimplementedException("Reconfigure not implemented for this workload");
114}
115
telsoa01c577f2c2018-08-31 09:22:23 +0100116} //namespace armnn