blob: 0cfff8494be912e0beedc978a816fd0f8a6a5198 [file] [log] [blame]
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001/*
2 * Copyright (c) 2017 ARM Limited.
3 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24#include "Globals.h"
25#include "NEON/Helper.h"
26#include "NEON/NEAccessor.h"
27#include "TensorLibrary.h"
28#include "benchmark/Datasets.h"
29#include "benchmark/Profiler.h"
30#include "benchmark/WallClockTimer.h"
31
32#include "arm_compute/core/Helpers.h"
33#include "arm_compute/core/Types.h"
34#include "arm_compute/runtime/NEON/functions/NEConvolutionLayer.h"
35#include "arm_compute/runtime/Tensor.h"
36#include "arm_compute/runtime/TensorAllocator.h"
37
38#include "benchmark/benchmark_api.h"
39
40using namespace arm_compute;
41using namespace arm_compute::test;
42using namespace arm_compute::test::benchmark;
43using namespace arm_compute::test::neon;
44
45#include "benchmark/common/ConvolutionLayer.h"
46
47namespace
48{
49using ConvolutionLayerAlexNetF32 = ConvolutionLayer<AlexNetConvolutionLayerDataset, Tensor, NEAccessor, NEConvolutionLayer>;
50using ConvolutionLayerAlexNetQS8 = ConvolutionLayer<AlexNetConvolutionLayerDataset, Tensor, NEAccessor, NEConvolutionLayer, DataType::QS8>;
51using ConvolutionLayerLeNet5 = ConvolutionLayer<LeNet5ConvolutionLayerDataset, Tensor, NEAccessor, NEConvolutionLayer>;
52using ConvolutionLayerGoogLeNet1 = ConvolutionLayer<GoogLeNetConvolutionLayerDataset1, Tensor, NEAccessor, NEConvolutionLayer>;
53using ConvolutionLayerGoogLeNet2 = ConvolutionLayer<GoogLeNetConvolutionLayerDataset2, Tensor, NEAccessor, NEConvolutionLayer>;
54} // namespace
55
56// F32
57BENCHMARK_DEFINE_F(ConvolutionLayerAlexNetF32, neon_alexnet)
58(::benchmark::State &state)
59{
60 while(state.KeepRunning())
61 {
62 // Run function
63 profiler.start();
64 conv_layer->run();
65 profiler.stop();
66 }
67}
68
69BENCHMARK_REGISTER_F(ConvolutionLayerAlexNetF32, neon_alexnet)
70->Threads(1)
71->Apply(DataSetArgBatched<AlexNetConvolutionLayerDataset, 0, 1, 4, 8>);
72BENCHMARK_REGISTER_F(ConvolutionLayerAlexNetF32, neon_alexnet)
73->Threads(1)
74->Apply(DataSetArgBatched<AlexNetConvolutionLayerDataset, 1, 1, 4, 8>);
75BENCHMARK_REGISTER_F(ConvolutionLayerAlexNetF32, neon_alexnet)
76->Threads(1)
77->Apply(DataSetArgBatched<AlexNetConvolutionLayerDataset, 2, 1, 4, 8>);
78BENCHMARK_REGISTER_F(ConvolutionLayerAlexNetF32, neon_alexnet)
79->Threads(1)
80->Apply(DataSetArgBatched<AlexNetConvolutionLayerDataset, 3, 1, 4, 8>);
81BENCHMARK_REGISTER_F(ConvolutionLayerAlexNetF32, neon_alexnet)
82->Threads(1)
83->Apply(DataSetArgBatched<AlexNetConvolutionLayerDataset, 4, 1, 4, 8>);
84
85// QS8
86BENCHMARK_DEFINE_F(ConvolutionLayerAlexNetQS8, neon_alexnet)
87(::benchmark::State &state)
88{
89 while(state.KeepRunning())
90 {
91 // Run function
92 profiler.start();
93 conv_layer->run();
94 profiler.stop();
95 }
96}
97
98BENCHMARK_REGISTER_F(ConvolutionLayerAlexNetQS8, neon_alexnet)
99->Threads(1)
100->Apply(DataSetArgBatched<AlexNetConvolutionLayerDataset, 0, 1, 4, 8>);
101BENCHMARK_REGISTER_F(ConvolutionLayerAlexNetQS8, neon_alexnet)
102->Threads(1)
103->Apply(DataSetArgBatched<AlexNetConvolutionLayerDataset, 1, 1, 4, 8>);
104BENCHMARK_REGISTER_F(ConvolutionLayerAlexNetQS8, neon_alexnet)
105->Threads(1)
106->Apply(DataSetArgBatched<AlexNetConvolutionLayerDataset, 2, 1, 4, 8>);
107BENCHMARK_REGISTER_F(ConvolutionLayerAlexNetQS8, neon_alexnet)
108->Threads(1)
109->Apply(DataSetArgBatched<AlexNetConvolutionLayerDataset, 3, 1, 4, 8>);
110BENCHMARK_REGISTER_F(ConvolutionLayerAlexNetQS8, neon_alexnet)
111->Threads(1)
112->Apply(DataSetArgBatched<AlexNetConvolutionLayerDataset, 4, 1, 4, 8>);
113
114BENCHMARK_DEFINE_F(ConvolutionLayerLeNet5, neon_lenet5)
115(::benchmark::State &state)
116{
117 while(state.KeepRunning())
118 {
119 // Run function
120 profiler.start();
121 conv_layer->run();
122 profiler.stop();
123 }
124}
125
126BENCHMARK_REGISTER_F(ConvolutionLayerLeNet5, neon_lenet5)
127->Threads(1)
128->Apply(DataSetArgBatched<LeNet5ConvolutionLayerDataset, 0, 1, 4, 8>);
129BENCHMARK_REGISTER_F(ConvolutionLayerLeNet5, neon_lenet5)
130->Threads(1)
131->Apply(DataSetArgBatched<LeNet5ConvolutionLayerDataset, 1, 1, 4, 8>);
132
133BENCHMARK_DEFINE_F(ConvolutionLayerGoogLeNet1, neon_googlenet)
134(::benchmark::State &state)
135{
136 while(state.KeepRunning())
137 {
138 // Run function
139 profiler.start();
140 conv_layer->run();
141 profiler.stop();
142 }
143}
144
145BENCHMARK_DEFINE_F(ConvolutionLayerGoogLeNet2, neon_googlenet)
146(::benchmark::State &state)
147{
148 while(state.KeepRunning())
149 {
150 // Run function
151 profiler.start();
152 conv_layer->run();
153 profiler.stop();
154 }
155}
156
157BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, neon_googlenet)
158->Threads(1)
159->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 0, 1, 4, 8>);
160BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, neon_googlenet)
161->Threads(1)
162->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 1, 1, 4, 8>);
163BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, neon_googlenet)
164->Threads(1)
165->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 2, 1, 4, 8>);
166BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, neon_googlenet)
167->Threads(1)
168->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 3, 1, 4, 8>);
169BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, neon_googlenet)
170->Threads(1)
171->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 4, 1, 4, 8>);
172BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, neon_googlenet)
173->Threads(1)
174->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 5, 1, 4, 8>);
175BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, neon_googlenet)
176->Threads(1)
177->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 6, 1, 4, 8>);
178BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, neon_googlenet)
179->Threads(1)
180->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 7, 1, 4, 8>);
181BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, neon_googlenet)
182->Threads(1)
183->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 8, 1, 4, 8>);
184BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, neon_googlenet)
185->Threads(1)
186->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 9, 1, 4, 8>);
187BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, neon_googlenet)
188->Threads(1)
189->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 10, 1, 4, 8>);
190BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, neon_googlenet)
191->Threads(1)
192->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 11, 1, 4, 8>);
193BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, neon_googlenet)
194->Threads(1)
195->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 12, 1, 4, 8>);
196BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, neon_googlenet)
197->Threads(1)
198->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 13, 1, 4, 8>);
199BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, neon_googlenet)
200->Threads(1)
201->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 14, 1, 4, 8>);
202BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, neon_googlenet)
203->Threads(1)
204->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 15, 1, 4, 8>);
205BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, neon_googlenet)
206->Threads(1)
207->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 16, 1, 4, 8>);
208BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, neon_googlenet)
209->Threads(1)
210->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 17, 1, 4, 8>);
211BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, neon_googlenet)
212->Threads(1)
213->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 18, 1, 4, 8>);
214BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, neon_googlenet)
215->Threads(1)
216->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 19, 1, 4, 8>);
217BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, neon_googlenet)
218->Threads(1)
219->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 20, 1, 4, 8>);
220BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, neon_googlenet)
221->Threads(1)
222->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 21, 1, 4, 8>);
223BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, neon_googlenet)
224->Threads(1)
225->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 22, 1, 4, 8>);
226BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, neon_googlenet)
227->Threads(1)
228->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 23, 1, 4, 8>);
229BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, neon_googlenet)
230->Threads(1)
231->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 24, 1, 4, 8>);
232BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, neon_googlenet)
233->Threads(1)
234->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 25, 1, 4, 8>);
235BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, neon_googlenet)
236->Threads(1)
237->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 26, 1, 4, 8>);
238BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, neon_googlenet)
239->Threads(1)
240->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 27, 1, 4, 8>);
241BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, neon_googlenet)
242->Threads(1)
243->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 28, 1, 4, 8>);
244BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, neon_googlenet)
245->Threads(1)
246->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 29, 1, 4, 8>);
247BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, neon_googlenet)
248->Threads(1)
249->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 30, 1, 4, 8>);
250BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, neon_googlenet)
251->Threads(1)
252->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 31, 1, 4, 8>);
253BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet2, neon_googlenet)
254->Threads(1)
255->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset2, 0, 1, 4, 8>);
256BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet2, neon_googlenet)
257->Threads(1)
258->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset2, 1, 1, 4, 8>);
259BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet2, neon_googlenet)
260->Threads(1)
261->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset2, 2, 1, 4, 8>);
262BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet2, neon_googlenet)
263->Threads(1)
264->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset2, 3, 1, 4, 8>);
265BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet2, neon_googlenet)
266->Threads(1)
267->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset2, 4, 1, 4, 8>);
268BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet2, neon_googlenet)
269->Threads(1)
270->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset2, 5, 1, 4, 8>);
271BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet2, neon_googlenet)
272->Threads(1)
273->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset2, 6, 1, 4, 8>);
274BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet2, neon_googlenet)
275->Threads(1)
276->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset2, 7, 1, 4, 8>);
277BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet2, neon_googlenet)
278->Threads(1)
279->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset2, 8, 1, 4, 8>);
280BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet2, neon_googlenet)
281->Threads(1)
282->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset2, 9, 1, 4, 8>);
283BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet2, neon_googlenet)
284->Threads(1)
285->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset2, 10, 1, 4, 8>);
286BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet2, neon_googlenet)
287->Threads(1)
288->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset2, 11, 1, 4, 8>);
289BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet2, neon_googlenet)
290->Threads(1)
291->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset2, 12, 1, 4, 8>);
292BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet2, neon_googlenet)
293->Threads(1)
294->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset2, 13, 1, 4, 8>);
295BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet2, neon_googlenet)
296->Threads(1)
297->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset2, 14, 1, 4, 8>);
298BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet2, neon_googlenet)
299->Threads(1)
300->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset2, 15, 1, 4, 8>);
301BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet2, neon_googlenet)
302->Threads(1)
303->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset2, 16, 1, 4, 8>);