blob: a0b12361778d248fba1e6c1577db199e8f7d3ca5 [file] [log] [blame]
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001/*
2 * Copyright (c) 2017 ARM Limited.
3 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24#include "Globals.h"
Anthony Barbier6ff3b192017-09-04 18:44:23 +010025#include "NEON/NEAccessor.h"
26#include "TensorLibrary.h"
27#include "benchmark/Datasets.h"
28#include "benchmark/Profiler.h"
29#include "benchmark/WallClockTimer.h"
30
31#include "arm_compute/core/Helpers.h"
32#include "arm_compute/core/Types.h"
33#include "arm_compute/runtime/NEON/functions/NEConvolutionLayer.h"
34#include "arm_compute/runtime/Tensor.h"
35#include "arm_compute/runtime/TensorAllocator.h"
36
37#include "benchmark/benchmark_api.h"
38
39using namespace arm_compute;
40using namespace arm_compute::test;
41using namespace arm_compute::test::benchmark;
42using namespace arm_compute::test::neon;
43
44#include "benchmark/common/ConvolutionLayer.h"
45
46namespace
47{
48using ConvolutionLayerAlexNetF32 = ConvolutionLayer<AlexNetConvolutionLayerDataset, Tensor, NEAccessor, NEConvolutionLayer>;
49using ConvolutionLayerAlexNetQS8 = ConvolutionLayer<AlexNetConvolutionLayerDataset, Tensor, NEAccessor, NEConvolutionLayer, DataType::QS8>;
50using ConvolutionLayerLeNet5 = ConvolutionLayer<LeNet5ConvolutionLayerDataset, Tensor, NEAccessor, NEConvolutionLayer>;
51using ConvolutionLayerGoogLeNet1 = ConvolutionLayer<GoogLeNetConvolutionLayerDataset1, Tensor, NEAccessor, NEConvolutionLayer>;
52using ConvolutionLayerGoogLeNet2 = ConvolutionLayer<GoogLeNetConvolutionLayerDataset2, Tensor, NEAccessor, NEConvolutionLayer>;
53} // namespace
54
55// F32
56BENCHMARK_DEFINE_F(ConvolutionLayerAlexNetF32, neon_alexnet)
57(::benchmark::State &state)
58{
59 while(state.KeepRunning())
60 {
61 // Run function
62 profiler.start();
63 conv_layer->run();
64 profiler.stop();
65 }
66}
67
68BENCHMARK_REGISTER_F(ConvolutionLayerAlexNetF32, neon_alexnet)
69->Threads(1)
70->Apply(DataSetArgBatched<AlexNetConvolutionLayerDataset, 0, 1, 4, 8>);
71BENCHMARK_REGISTER_F(ConvolutionLayerAlexNetF32, neon_alexnet)
72->Threads(1)
73->Apply(DataSetArgBatched<AlexNetConvolutionLayerDataset, 1, 1, 4, 8>);
74BENCHMARK_REGISTER_F(ConvolutionLayerAlexNetF32, neon_alexnet)
75->Threads(1)
76->Apply(DataSetArgBatched<AlexNetConvolutionLayerDataset, 2, 1, 4, 8>);
77BENCHMARK_REGISTER_F(ConvolutionLayerAlexNetF32, neon_alexnet)
78->Threads(1)
79->Apply(DataSetArgBatched<AlexNetConvolutionLayerDataset, 3, 1, 4, 8>);
80BENCHMARK_REGISTER_F(ConvolutionLayerAlexNetF32, neon_alexnet)
81->Threads(1)
82->Apply(DataSetArgBatched<AlexNetConvolutionLayerDataset, 4, 1, 4, 8>);
83
84// QS8
85BENCHMARK_DEFINE_F(ConvolutionLayerAlexNetQS8, neon_alexnet)
86(::benchmark::State &state)
87{
88 while(state.KeepRunning())
89 {
90 // Run function
91 profiler.start();
92 conv_layer->run();
93 profiler.stop();
94 }
95}
96
97BENCHMARK_REGISTER_F(ConvolutionLayerAlexNetQS8, neon_alexnet)
98->Threads(1)
99->Apply(DataSetArgBatched<AlexNetConvolutionLayerDataset, 0, 1, 4, 8>);
100BENCHMARK_REGISTER_F(ConvolutionLayerAlexNetQS8, neon_alexnet)
101->Threads(1)
102->Apply(DataSetArgBatched<AlexNetConvolutionLayerDataset, 1, 1, 4, 8>);
103BENCHMARK_REGISTER_F(ConvolutionLayerAlexNetQS8, neon_alexnet)
104->Threads(1)
105->Apply(DataSetArgBatched<AlexNetConvolutionLayerDataset, 2, 1, 4, 8>);
106BENCHMARK_REGISTER_F(ConvolutionLayerAlexNetQS8, neon_alexnet)
107->Threads(1)
108->Apply(DataSetArgBatched<AlexNetConvolutionLayerDataset, 3, 1, 4, 8>);
109BENCHMARK_REGISTER_F(ConvolutionLayerAlexNetQS8, neon_alexnet)
110->Threads(1)
111->Apply(DataSetArgBatched<AlexNetConvolutionLayerDataset, 4, 1, 4, 8>);
112
113BENCHMARK_DEFINE_F(ConvolutionLayerLeNet5, neon_lenet5)
114(::benchmark::State &state)
115{
116 while(state.KeepRunning())
117 {
118 // Run function
119 profiler.start();
120 conv_layer->run();
121 profiler.stop();
122 }
123}
124
125BENCHMARK_REGISTER_F(ConvolutionLayerLeNet5, neon_lenet5)
126->Threads(1)
127->Apply(DataSetArgBatched<LeNet5ConvolutionLayerDataset, 0, 1, 4, 8>);
128BENCHMARK_REGISTER_F(ConvolutionLayerLeNet5, neon_lenet5)
129->Threads(1)
130->Apply(DataSetArgBatched<LeNet5ConvolutionLayerDataset, 1, 1, 4, 8>);
131
132BENCHMARK_DEFINE_F(ConvolutionLayerGoogLeNet1, neon_googlenet)
133(::benchmark::State &state)
134{
135 while(state.KeepRunning())
136 {
137 // Run function
138 profiler.start();
139 conv_layer->run();
140 profiler.stop();
141 }
142}
143
144BENCHMARK_DEFINE_F(ConvolutionLayerGoogLeNet2, neon_googlenet)
145(::benchmark::State &state)
146{
147 while(state.KeepRunning())
148 {
149 // Run function
150 profiler.start();
151 conv_layer->run();
152 profiler.stop();
153 }
154}
155
156BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, neon_googlenet)
157->Threads(1)
158->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 0, 1, 4, 8>);
159BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, neon_googlenet)
160->Threads(1)
161->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 1, 1, 4, 8>);
162BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, neon_googlenet)
163->Threads(1)
164->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 2, 1, 4, 8>);
165BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, neon_googlenet)
166->Threads(1)
167->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 3, 1, 4, 8>);
168BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, neon_googlenet)
169->Threads(1)
170->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 4, 1, 4, 8>);
171BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, neon_googlenet)
172->Threads(1)
173->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 5, 1, 4, 8>);
174BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, neon_googlenet)
175->Threads(1)
176->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 6, 1, 4, 8>);
177BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, neon_googlenet)
178->Threads(1)
179->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 7, 1, 4, 8>);
180BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, neon_googlenet)
181->Threads(1)
182->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 8, 1, 4, 8>);
183BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, neon_googlenet)
184->Threads(1)
185->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 9, 1, 4, 8>);
186BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, neon_googlenet)
187->Threads(1)
188->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 10, 1, 4, 8>);
189BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, neon_googlenet)
190->Threads(1)
191->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 11, 1, 4, 8>);
192BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, neon_googlenet)
193->Threads(1)
194->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 12, 1, 4, 8>);
195BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, neon_googlenet)
196->Threads(1)
197->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 13, 1, 4, 8>);
198BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, neon_googlenet)
199->Threads(1)
200->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 14, 1, 4, 8>);
201BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, neon_googlenet)
202->Threads(1)
203->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 15, 1, 4, 8>);
204BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, neon_googlenet)
205->Threads(1)
206->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 16, 1, 4, 8>);
207BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, neon_googlenet)
208->Threads(1)
209->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 17, 1, 4, 8>);
210BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, neon_googlenet)
211->Threads(1)
212->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 18, 1, 4, 8>);
213BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, neon_googlenet)
214->Threads(1)
215->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 19, 1, 4, 8>);
216BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, neon_googlenet)
217->Threads(1)
218->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 20, 1, 4, 8>);
219BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, neon_googlenet)
220->Threads(1)
221->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 21, 1, 4, 8>);
222BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, neon_googlenet)
223->Threads(1)
224->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 22, 1, 4, 8>);
225BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, neon_googlenet)
226->Threads(1)
227->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 23, 1, 4, 8>);
228BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, neon_googlenet)
229->Threads(1)
230->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 24, 1, 4, 8>);
231BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, neon_googlenet)
232->Threads(1)
233->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 25, 1, 4, 8>);
234BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, neon_googlenet)
235->Threads(1)
236->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 26, 1, 4, 8>);
237BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, neon_googlenet)
238->Threads(1)
239->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 27, 1, 4, 8>);
240BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, neon_googlenet)
241->Threads(1)
242->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 28, 1, 4, 8>);
243BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, neon_googlenet)
244->Threads(1)
245->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 29, 1, 4, 8>);
246BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, neon_googlenet)
247->Threads(1)
248->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 30, 1, 4, 8>);
249BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, neon_googlenet)
250->Threads(1)
251->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 31, 1, 4, 8>);
252BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet2, neon_googlenet)
253->Threads(1)
254->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset2, 0, 1, 4, 8>);
255BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet2, neon_googlenet)
256->Threads(1)
257->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset2, 1, 1, 4, 8>);
258BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet2, neon_googlenet)
259->Threads(1)
260->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset2, 2, 1, 4, 8>);
261BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet2, neon_googlenet)
262->Threads(1)
263->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset2, 3, 1, 4, 8>);
264BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet2, neon_googlenet)
265->Threads(1)
266->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset2, 4, 1, 4, 8>);
267BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet2, neon_googlenet)
268->Threads(1)
269->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset2, 5, 1, 4, 8>);
270BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet2, neon_googlenet)
271->Threads(1)
272->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset2, 6, 1, 4, 8>);
273BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet2, neon_googlenet)
274->Threads(1)
275->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset2, 7, 1, 4, 8>);
276BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet2, neon_googlenet)
277->Threads(1)
278->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset2, 8, 1, 4, 8>);
279BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet2, neon_googlenet)
280->Threads(1)
281->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset2, 9, 1, 4, 8>);
282BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet2, neon_googlenet)
283->Threads(1)
284->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset2, 10, 1, 4, 8>);
285BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet2, neon_googlenet)
286->Threads(1)
287->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset2, 11, 1, 4, 8>);
288BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet2, neon_googlenet)
289->Threads(1)
290->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset2, 12, 1, 4, 8>);
291BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet2, neon_googlenet)
292->Threads(1)
293->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset2, 13, 1, 4, 8>);
294BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet2, neon_googlenet)
295->Threads(1)
296->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset2, 14, 1, 4, 8>);
297BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet2, neon_googlenet)
298->Threads(1)
299->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset2, 15, 1, 4, 8>);
300BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet2, neon_googlenet)
301->Threads(1)
302->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset2, 16, 1, 4, 8>);