blob: f770f46c8170846070ea9b840acdf53c847fed85 [file] [log] [blame]
Teresa Charlin9145e382023-08-17 18:44:58 +01001//
2// Copyright © 2023 Arm Ltd and Contributors. All rights reserved.
3// SPDX-License-Identifier: MIT
4//
5
6#include "NeonFusedWorkload.hpp"
7#include "NeonWorkloadUtils.hpp"
8
9#include <aclCommon/ArmComputeTensorUtils.hpp>
10#include <aclCommon/ArmComputeUtils.hpp>
11
12#include <armnn/utility/PolymorphicDowncast.hpp>
13#include <armnn/backends/TensorHandle.hpp>
14
15#include <arm_compute/runtime/NEON/functions/NEAddMulAdd.h>
16
17namespace armnn
18{
19
20using namespace armcomputetensorutils;
21
22arm_compute::Status NeonFusedWorkloadValidate(const std::vector<std::reference_wrapper<TensorInfo>>& inputInfos,
23 const std::vector<std::reference_wrapper<TensorInfo>>& outputInfos,
24 const FusedDescriptor& fusedDescriptor,
25 const ActivationDescriptor* activationDescriptor)
26{
27 std::vector<arm_compute::TensorInfo> actInputInfos;
28 actInputInfos.reserve(inputInfos.size());
29 for (size_t i = 0u; i < inputInfos.size(); ++i)
30 {
31 actInputInfos.emplace_back(BuildArmComputeTensorInfo(inputInfos[i]));
32 }
33
34 std::vector<arm_compute::TensorInfo> actOutputInfos;
35 actOutputInfos.reserve(outputInfos.size());
36 for (size_t i = 0u; i < outputInfos.size(); ++i)
37 {
38 actOutputInfos.emplace_back(BuildArmComputeTensorInfo(outputInfos[i]));
39 }
40
41 const arm_compute::ActivationLayerInfo activationInfo =
42 ConvertActivationDescriptorToAclActivationLayerInfo(activationDescriptor);
43
44 switch (fusedDescriptor.m_FusedKernelType)
45 {
46 case FusedKernelType::AddMulAdd:
47 return arm_compute::NEAddMulAdd::validate(
48 &actInputInfos[0],
49 &actInputInfos[1],
50 &actInputInfos[2], // bn_mul
51 &actInputInfos[3], // bn_add
52 actOutputInfos.size() == 1 ? nullptr : &actOutputInfos[0], // add_output
53 actOutputInfos.size() == 1 ? &actOutputInfos[0] : &actOutputInfos[1], // final_output
54 arm_compute::ConvertPolicy::SATURATE,
55 activationInfo);
56 default:
57 return arm_compute::Status{arm_compute::ErrorCode::RUNTIME_ERROR,
58 "NeonFusedWorkloadValidate: no valid kernel type"};
59 }
60}
61
62
63NeonFusedWorkload::NeonFusedWorkload(const FusedQueueDescriptor& descriptor, const WorkloadInfo& info)
64 : NeonBaseWorkload<FusedQueueDescriptor>(descriptor, info)
65{
66 m_Data.ValidateInputsOutputs("NeonFusedWorkload",
67 static_cast<unsigned int>(info.m_InputTensorInfos.size()),
68 static_cast<unsigned int>(info.m_OutputTensorInfos.size()));
69
70 std::vector<arm_compute::ITensor*> inputs;
71 inputs.reserve(info.m_InputTensorInfos.size());
72 for (auto input : m_Data.m_Inputs)
73 {
74 inputs.emplace_back(&PolymorphicDowncast<IAclTensorHandle*>(input)->GetTensor());
75 }
76
77 std::vector<arm_compute::ITensor*> outputs;
78 outputs.reserve(info.m_OutputTensorInfos.size());
79 for (auto output : m_Data.m_Outputs)
80 {
81 outputs.emplace_back(&PolymorphicDowncast<IAclTensorHandle*>(output)->GetTensor());
82 }
83
84 const arm_compute::ActivationLayerInfo activationInfo =
85 ConvertAdditionalInfoToAclActivationLayerInfo(descriptor);
86
87 switch (descriptor.m_Parameters.m_FusedKernelType)
88 {
89 case FusedKernelType::AddMulAdd:
90 {
91 auto layer = std::make_unique<arm_compute::NEAddMulAdd>();
92 layer->configure(inputs[0],
93 inputs[1],
94 inputs[2], // bn_mul
95 inputs[3], // bn_add
96 outputs.size() == 1 ? nullptr : outputs[0], // add_output
97 outputs.size() == 1 ? outputs[0] : outputs[1], // final_output
98 arm_compute::ConvertPolicy::SATURATE,
99 activationInfo);
100 m_FusedLayer.reset(layer.release());
101 break;
102 }
103 default:
104 throw Exception("NeonFusedWorkload: no valid kernel type.");
105 }
106}
107
108void NeonFusedWorkload::Execute() const
109{
110 ARMNN_SCOPED_PROFILING_EVENT_NEON_GUID("NeonFusedWorkload_Execute", this->GetGuid());
111 m_FusedLayer->run();
112}
113
114} //namespace armnn
115