blob: 0d4595210eca17b6153d9daf0cd40e017076dc29 [file] [log] [blame]
Finn Williamsb76eaed2021-03-31 16:22:40 +01001//
2// Copyright © 2021 Arm Ltd and Contributors. All rights reserved.
3// SPDX-License-Identifier: MIT
4//
5
6#include <armnn/Exceptions.hpp>
7
8#include <backendsCommon/CpuTensorHandle.hpp>
9#include <backendsCommon/Workload.hpp>
10
11#include <boost/test/unit_test.hpp>
12
13using namespace armnn;
14
15BOOST_AUTO_TEST_SUITE(WorkloadAsyncExecuteTests)
16
17namespace
18{
19
20struct Workload0 : BaseWorkload<ElementwiseUnaryQueueDescriptor>
21{
22 Workload0(const ElementwiseUnaryQueueDescriptor& descriptor, const WorkloadInfo& info)
23 : BaseWorkload(descriptor, info)
24 {
25 }
26
27 Workload0() : BaseWorkload(ElementwiseUnaryQueueDescriptor(), WorkloadInfo())
28 {
29 }
30
31 void Execute() const
32 {
33 int* inVals = static_cast<int*>(m_Data.m_Inputs[0][0].Map());
34 int* outVals = static_cast<int*>(m_Data.m_Outputs[0][0].Map());
35
36 for (unsigned int i = 0; i < m_Data.m_Inputs[0][0].GetShape().GetNumElements(); ++i)
37 {
38 outVals[i] = inVals[i] * outVals[i];
39 inVals[i] = outVals[i];
40 }
41 }
42
43 void ExecuteAsync(WorkingMemDescriptor& desc)
44 {
45 int* inVals = static_cast<int*>(desc.m_Inputs[0][0].Map());
46 int* outVals = static_cast<int*>(desc.m_Outputs[0][0].Map());
47
48 for (unsigned int i = 0; i < desc.m_Inputs[0][0].GetShape().GetNumElements(); ++i)
49 {
50 outVals[i] = inVals[i] + outVals[i];
51 inVals[i] = outVals[i];
52 }
53 }
54
55 QueueDescriptor* GetQueueDescriptor()
56 {
57 return &m_Data;
58 }
59};
60
61struct Workload1 : BaseWorkload<ElementwiseUnaryQueueDescriptor>
62{
63 Workload1(const ElementwiseUnaryQueueDescriptor& descriptor, const WorkloadInfo& info)
64 : BaseWorkload(descriptor, info)
65 {
66 }
67
68 void Execute() const
69 {
70 int* inVals = static_cast<int*>(m_Data.m_Inputs[0][0].Map());
71 int* outVals = static_cast<int*>(m_Data.m_Outputs[0][0].Map());
72
73 for (unsigned int i = 0; i < m_Data.m_Inputs[0][0].GetShape().GetNumElements(); ++i)
74 {
75 outVals[i] = inVals[i] * outVals[i];
76 inVals[i] = outVals[i];
77 }
78 }
79};
80
81void ValidateTensor(ITensorHandle* tensorHandle, int expectedValue)
82{
83 int* actualOutput = static_cast<int*>(tensorHandle->Map());
84
85 bool allValuesCorrect = true;
86 for (unsigned int i = 0; i < tensorHandle->GetShape().GetNumElements(); ++i)
87 {
88 if (actualOutput[i] != expectedValue)
89 {
90 allValuesCorrect = false;
91 }
92 }
93
94 BOOST_CHECK(allValuesCorrect);
95}
96
97template<typename Workload>
98std::unique_ptr<Workload> CreateWorkload(TensorInfo info, ITensorHandle* inputTensor, ITensorHandle* outputTensor)
99{
100 WorkloadInfo workloadInfo;
101 workloadInfo.m_InputTensorInfos = std::vector<TensorInfo>{info};
102 workloadInfo.m_OutputTensorInfos = std::vector<TensorInfo>{info};
103
104 ElementwiseUnaryQueueDescriptor elementwiseUnaryQueueDescriptor;
105 elementwiseUnaryQueueDescriptor.m_Inputs = std::vector<ITensorHandle*>{inputTensor};
106 elementwiseUnaryQueueDescriptor.m_Outputs = std::vector<ITensorHandle*>{outputTensor};
107
108 return std::make_unique<Workload>(elementwiseUnaryQueueDescriptor, workloadInfo);
109}
110
111BOOST_AUTO_TEST_CASE(TestAsyncExecute)
112{
113 TensorInfo info({5}, DataType::Signed32);
114
115 int inVals[5]{2, 2, 2, 2, 2};
116 int outVals[5]{1, 1, 1, 1, 1};
117
118 int expectedExecuteval = 2;
119 int expectedExecuteAsyncval = 3;
120
121 ConstTensor constInputTensor(info, inVals);
122 ConstTensor constOutputTensor(info, outVals);
123
124 ScopedCpuTensorHandle syncInput0(constInputTensor);
125 ScopedCpuTensorHandle syncOutput0(constOutputTensor);
126
127 std::unique_ptr<Workload0> workload0 = CreateWorkload<Workload0>(info, &syncInput0, &syncOutput0);
128
129 workload0.get()->Execute();
130
131 ScopedCpuTensorHandle asyncInput0(constInputTensor);
132 ScopedCpuTensorHandle asyncOutput0(constOutputTensor);
133
134 WorkingMemDescriptor workingMemDescriptor0;
135 workingMemDescriptor0.m_Inputs = std::vector<ITensorHandle*>{&asyncInput0};
136 workingMemDescriptor0.m_Outputs = std::vector<ITensorHandle*>{&asyncOutput0};
137
138 workload0.get()->ExecuteAsync(workingMemDescriptor0);
139
140 // Inputs are also changed by the execute/executeAsync calls to make sure there is no interference with them
141 ValidateTensor(workingMemDescriptor0.m_Outputs[0], expectedExecuteAsyncval);
142 ValidateTensor(workingMemDescriptor0.m_Inputs[0], expectedExecuteAsyncval);
143
144 ValidateTensor(&workload0.get()->GetQueueDescriptor()->m_Outputs[0][0], expectedExecuteval);
145 ValidateTensor(&workload0.get()->GetQueueDescriptor()->m_Inputs[0][0], expectedExecuteval);
146}
147
148BOOST_AUTO_TEST_CASE(TestDefaultAsyncExecute)
149{
150 TensorInfo info({5}, DataType::Signed32);
151
152 std::vector<int> inVals{2, 2, 2, 2, 2};
153 std::vector<int> outVals{1, 1, 1, 1, 1};
154 std::vector<int> defaultVals{0, 0, 0, 0, 0};
155
156 int expectedExecuteval = 2;
157
158 ConstTensor constInputTensor(info, inVals);
159 ConstTensor constOutputTensor(info, outVals);
160 ConstTensor defaultTensor(info, &defaultVals);
161
162 ScopedCpuTensorHandle defaultInput = ScopedCpuTensorHandle(defaultTensor);
163 ScopedCpuTensorHandle defaultOutput = ScopedCpuTensorHandle(defaultTensor);
164
165 std::unique_ptr<Workload1> workload1 = CreateWorkload<Workload1>(info, &defaultInput, &defaultOutput);
166
167 ScopedCpuTensorHandle asyncInput(constInputTensor);
168 ScopedCpuTensorHandle asyncOutput(constOutputTensor);
169
170 WorkingMemDescriptor workingMemDescriptor;
171 workingMemDescriptor.m_Inputs = std::vector<ITensorHandle*>{&asyncInput};
172 workingMemDescriptor.m_Outputs = std::vector<ITensorHandle*>{&asyncOutput};
173
174 workload1.get()->ExecuteAsync(workingMemDescriptor);
175
176 // workload1 has no AsyncExecute implementation and so should use the default workload AsyncExecute
177 // implementation which will call workload1.Execute() in a thread safe manner
178 ValidateTensor(workingMemDescriptor.m_Outputs[0], expectedExecuteval);
179 ValidateTensor(workingMemDescriptor.m_Inputs[0], expectedExecuteval);
180}
181
182BOOST_AUTO_TEST_CASE(TestDefaultAsyncExeuteWithThreads)
183{
184 // Use a large vector so the threads have a chance to interact
185 unsigned int vecSize = 1000;
186 TensorInfo info({vecSize}, DataType::Signed32);
187
188 std::vector<int> inVals1(vecSize, 2);
189 std::vector<int> outVals1(vecSize, 1);
190 std::vector<int> inVals2(vecSize, 5);
191 std::vector<int> outVals2(vecSize, -1);
192
193 std::vector<int> defaultVals(vecSize, 0);
194
195 int expectedExecuteval1 = 4;
196 int expectedExecuteval2 = 25;
197 ConstTensor constInputTensor1(info, inVals1);
198 ConstTensor constOutputTensor1(info, outVals1);
199
200 ConstTensor constInputTensor2(info, inVals2);
201 ConstTensor constOutputTensor2(info, outVals2);
202
203 ConstTensor defaultTensor(info, &defaultVals);
204
205 ScopedCpuTensorHandle defaultInput = ScopedCpuTensorHandle(defaultTensor);
206 ScopedCpuTensorHandle defaultOutput = ScopedCpuTensorHandle(defaultTensor);
207 std::unique_ptr<Workload1> workload = CreateWorkload<Workload1>(info, &defaultInput, &defaultOutput);
208
209 ScopedCpuTensorHandle asyncInput1(constInputTensor1);
210 ScopedCpuTensorHandle asyncOutput1(constOutputTensor1);
211
212 WorkingMemDescriptor workingMemDescriptor1;
213 workingMemDescriptor1.m_Inputs = std::vector<ITensorHandle*>{&asyncInput1};
214 workingMemDescriptor1.m_Outputs = std::vector<ITensorHandle*>{&asyncOutput1};
215
216
217 ScopedCpuTensorHandle asyncInput2(constInputTensor2);
218 ScopedCpuTensorHandle asyncOutput2(constOutputTensor2);
219
220 WorkingMemDescriptor workingMemDescriptor2;
221 workingMemDescriptor2.m_Inputs = std::vector<ITensorHandle*>{&asyncInput2};
222 workingMemDescriptor2.m_Outputs = std::vector<ITensorHandle*>{&asyncOutput2};
223
224 std::thread thread1 = std::thread([&]()
225 {
226 workload.get()->ExecuteAsync(workingMemDescriptor1);
227 workload.get()->ExecuteAsync(workingMemDescriptor1);
228 });
229
230 std::thread thread2 = std::thread([&]()
231 {
232 workload.get()->ExecuteAsync(workingMemDescriptor2);
233 workload.get()->ExecuteAsync(workingMemDescriptor2);
234 });
235
236 thread1.join();
237 thread2.join();
238
239 ValidateTensor(workingMemDescriptor1.m_Outputs[0], expectedExecuteval1);
240 ValidateTensor(workingMemDescriptor1.m_Inputs[0], expectedExecuteval1);
241
242 ValidateTensor(workingMemDescriptor2.m_Outputs[0], expectedExecuteval2);
243 ValidateTensor(workingMemDescriptor2.m_Inputs[0], expectedExecuteval2);
244}
245
246
247BOOST_AUTO_TEST_SUITE_END()
248
249}