blob: a10da09cbeb99a35a4ae607faf0ab140e1651a33 [file] [log] [blame]
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001/*
2 * Copyright (c) 2017 ARM Limited.
3 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24#include "CL/CLAccessor.h"
25#include "CL/Helper.h"
26#include "Globals.h"
27#include "TensorLibrary.h"
28#include "benchmark/Datasets.h"
29#include "benchmark/Profiler.h"
30#include "benchmark/WallClockTimer.h"
31
32#include "arm_compute/core/Helpers.h"
33#include "arm_compute/core/Types.h"
34#include "arm_compute/runtime/CL/CLScheduler.h"
35#include "arm_compute/runtime/CL/CLTensor.h"
36#include "arm_compute/runtime/CL/CLTensorAllocator.h"
37#include "arm_compute/runtime/CL/functions/CLGEMM.h"
38
39#include "benchmark/benchmark_api.h"
40
41using namespace arm_compute;
42using namespace arm_compute::test;
43using namespace arm_compute::test::benchmark;
44using namespace arm_compute::test::cl;
45
46#include "benchmark/CL/GEMM.h"
47
48namespace
49{
50using GEMMFP16GoogLeNet1 = GEMM<GoogLeNetGEMMDataset1, CLTensor, CLAccessor, CLGEMM, DataType::F16>;
51using GEMMFP16GoogLeNet2 = GEMM<GoogLeNetGEMMDataset2, CLTensor, CLAccessor, CLGEMM, DataType::F16>;
52using GEMMFP32GoogLeNet1 = GEMM<GoogLeNetGEMMDataset1, CLTensor, CLAccessor, CLGEMM, DataType::F32>;
53using GEMMFP32GoogLeNet2 = GEMM<GoogLeNetGEMMDataset2, CLTensor, CLAccessor, CLGEMM, DataType::F32>;
SiCong Li10c672c2017-06-22 15:46:40 +010054using FP16MatrixMultiply = GEMM<MatrixMultiplyDataset, CLTensor, CLAccessor, CLGEMM, DataType::F16>;
55using FP32MatrixMultiply = GEMM<MatrixMultiplyDataset, CLTensor, CLAccessor, CLGEMM, DataType::F32>;
Anthony Barbier6ff3b192017-09-04 18:44:23 +010056} // namespace
57
58BENCHMARK_DEFINE_F(GEMMFP16GoogLeNet1, cl_googlenet)
59(::benchmark::State &state)
60{
61 while(state.KeepRunning())
62 {
63 // Run function
64 profiler.start();
65 gemm_layer->run();
66 CLScheduler::get().sync();
67 profiler.stop();
68 }
69}
70
71BENCHMARK_DEFINE_F(GEMMFP16GoogLeNet2, cl_googlenet)
72(::benchmark::State &state)
73{
74 while(state.KeepRunning())
75 {
76 // Run function
77 profiler.start();
78 gemm_layer->run();
79 CLScheduler::get().sync();
80 profiler.stop();
81 }
82}
83
84BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, cl_googlenet)
85->Threads(1)
86->Apply(DataSetArg<GoogLeNetGEMMDataset1, 0>);
87BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, cl_googlenet)
88->Threads(1)
89->Apply(DataSetArg<GoogLeNetGEMMDataset1, 1>);
90BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, cl_googlenet)
91->Threads(1)
92->Apply(DataSetArg<GoogLeNetGEMMDataset1, 2>);
93BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, cl_googlenet)
94->Threads(1)
95->Apply(DataSetArg<GoogLeNetGEMMDataset1, 3>);
96BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, cl_googlenet)
97->Threads(1)
98->Apply(DataSetArg<GoogLeNetGEMMDataset1, 4>);
99BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, cl_googlenet)
100->Threads(1)
101->Apply(DataSetArg<GoogLeNetGEMMDataset1, 5>);
102BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, cl_googlenet)
103->Threads(1)
104->Apply(DataSetArg<GoogLeNetGEMMDataset1, 6>);
105BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, cl_googlenet)
106->Threads(1)
107->Apply(DataSetArg<GoogLeNetGEMMDataset1, 7>);
108BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, cl_googlenet)
109->Threads(1)
110->Apply(DataSetArg<GoogLeNetGEMMDataset1, 8>);
111BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, cl_googlenet)
112->Threads(1)
113->Apply(DataSetArg<GoogLeNetGEMMDataset1, 9>);
114BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, cl_googlenet)
115->Threads(1)
116->Apply(DataSetArg<GoogLeNetGEMMDataset1, 10>);
117BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, cl_googlenet)
118->Threads(1)
119->Apply(DataSetArg<GoogLeNetGEMMDataset1, 11>);
120BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, cl_googlenet)
121->Threads(1)
122->Apply(DataSetArg<GoogLeNetGEMMDataset1, 12>);
123BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, cl_googlenet)
124->Threads(1)
125->Apply(DataSetArg<GoogLeNetGEMMDataset1, 13>);
126BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, cl_googlenet)
127->Threads(1)
128->Apply(DataSetArg<GoogLeNetGEMMDataset1, 14>);
129BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, cl_googlenet)
130->Threads(1)
131->Apply(DataSetArg<GoogLeNetGEMMDataset1, 15>);
132BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, cl_googlenet)
133->Threads(1)
134->Apply(DataSetArg<GoogLeNetGEMMDataset1, 16>);
135BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, cl_googlenet)
136->Threads(1)
137->Apply(DataSetArg<GoogLeNetGEMMDataset1, 17>);
138BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, cl_googlenet)
139->Threads(1)
140->Apply(DataSetArg<GoogLeNetGEMMDataset1, 18>);
141BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, cl_googlenet)
142->Threads(1)
143->Apply(DataSetArg<GoogLeNetGEMMDataset1, 19>);
144BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, cl_googlenet)
145->Threads(1)
146->Apply(DataSetArg<GoogLeNetGEMMDataset1, 20>);
147BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, cl_googlenet)
148->Threads(1)
149->Apply(DataSetArg<GoogLeNetGEMMDataset1, 21>);
150BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, cl_googlenet)
151->Threads(1)
152->Apply(DataSetArg<GoogLeNetGEMMDataset1, 22>);
153BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, cl_googlenet)
154->Threads(1)
155->Apply(DataSetArg<GoogLeNetGEMMDataset1, 23>);
156BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, cl_googlenet)
157->Threads(1)
158->Apply(DataSetArg<GoogLeNetGEMMDataset1, 24>);
159BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, cl_googlenet)
160->Threads(1)
161->Apply(DataSetArg<GoogLeNetGEMMDataset1, 25>);
162BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, cl_googlenet)
163->Threads(1)
164->Apply(DataSetArg<GoogLeNetGEMMDataset1, 26>);
165BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, cl_googlenet)
166->Threads(1)
167->Apply(DataSetArg<GoogLeNetGEMMDataset1, 27>);
168BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, cl_googlenet)
169->Threads(1)
170->Apply(DataSetArg<GoogLeNetGEMMDataset1, 28>);
171BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, cl_googlenet)
172->Threads(1)
173->Apply(DataSetArg<GoogLeNetGEMMDataset1, 29>);
174BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, cl_googlenet)
175->Threads(1)
176->Apply(DataSetArg<GoogLeNetGEMMDataset1, 30>);
177BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, cl_googlenet)
178->Threads(1)
179->Apply(DataSetArg<GoogLeNetGEMMDataset1, 31>);
180BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, cl_googlenet)
181->Threads(1)
182->Apply(DataSetArg<GoogLeNetGEMMDataset2, 0>);
183BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, cl_googlenet)
184->Threads(1)
185->Apply(DataSetArg<GoogLeNetGEMMDataset2, 1>);
186BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, cl_googlenet)
187->Threads(1)
188->Apply(DataSetArg<GoogLeNetGEMMDataset2, 2>);
189BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, cl_googlenet)
190->Threads(1)
191->Apply(DataSetArg<GoogLeNetGEMMDataset2, 3>);
192BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, cl_googlenet)
193->Threads(1)
194->Apply(DataSetArg<GoogLeNetGEMMDataset2, 4>);
195BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, cl_googlenet)
196->Threads(1)
197->Apply(DataSetArg<GoogLeNetGEMMDataset2, 5>);
198BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, cl_googlenet)
199->Threads(1)
200->Apply(DataSetArg<GoogLeNetGEMMDataset2, 6>);
201BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, cl_googlenet)
202->Threads(1)
203->Apply(DataSetArg<GoogLeNetGEMMDataset2, 7>);
204BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, cl_googlenet)
205->Threads(1)
206->Apply(DataSetArg<GoogLeNetGEMMDataset2, 8>);
207BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, cl_googlenet)
208->Threads(1)
209->Apply(DataSetArg<GoogLeNetGEMMDataset2, 9>);
210BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, cl_googlenet)
211->Threads(1)
212->Apply(DataSetArg<GoogLeNetGEMMDataset2, 10>);
213BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, cl_googlenet)
214->Threads(1)
215->Apply(DataSetArg<GoogLeNetGEMMDataset2, 11>);
216BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, cl_googlenet)
217->Threads(1)
218->Apply(DataSetArg<GoogLeNetGEMMDataset2, 12>);
219BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, cl_googlenet)
220->Threads(1)
221->Apply(DataSetArg<GoogLeNetGEMMDataset2, 13>);
222BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, cl_googlenet)
223->Threads(1)
224->Apply(DataSetArg<GoogLeNetGEMMDataset2, 14>);
225BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, cl_googlenet)
226->Threads(1)
227->Apply(DataSetArg<GoogLeNetGEMMDataset2, 15>);
228BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, cl_googlenet)
229->Threads(1)
230->Apply(DataSetArg<GoogLeNetGEMMDataset2, 16>);
231BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, cl_googlenet)
232->Threads(1)
233->Apply(DataSetArg<GoogLeNetGEMMDataset2, 17>);
234BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, cl_googlenet)
235->Threads(1)
236->Apply(DataSetArg<GoogLeNetGEMMDataset2, 18>);
237BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, cl_googlenet)
238->Threads(1)
239->Apply(DataSetArg<GoogLeNetGEMMDataset2, 19>);
240BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, cl_googlenet)
241->Threads(1)
242->Apply(DataSetArg<GoogLeNetGEMMDataset2, 20>);
243BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, cl_googlenet)
244->Threads(1)
245->Apply(DataSetArg<GoogLeNetGEMMDataset2, 21>);
246BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, cl_googlenet)
247->Threads(1)
248->Apply(DataSetArg<GoogLeNetGEMMDataset2, 22>);
249BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, cl_googlenet)
250->Threads(1)
251->Apply(DataSetArg<GoogLeNetGEMMDataset2, 23>);
252BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, cl_googlenet)
253->Threads(1)
254->Apply(DataSetArg<GoogLeNetGEMMDataset2, 24>);
255BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, cl_googlenet)
256->Threads(1)
257->Apply(DataSetArg<GoogLeNetGEMMDataset2, 25>);
258BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, cl_googlenet)
259->Threads(1)
260->Apply(DataSetArg<GoogLeNetGEMMDataset2, 26>);
261BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, cl_googlenet)
262->Threads(1)
263->Apply(DataSetArg<GoogLeNetGEMMDataset2, 27>);
264BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, cl_googlenet)
265->Threads(1)
266->Apply(DataSetArg<GoogLeNetGEMMDataset2, 28>);
267BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, cl_googlenet)
268->Threads(1)
269->Apply(DataSetArg<GoogLeNetGEMMDataset2, 29>);
270BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, cl_googlenet)
271->Threads(1)
272->Apply(DataSetArg<GoogLeNetGEMMDataset2, 30>);
273BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, cl_googlenet)
274->Threads(1)
275->Apply(DataSetArg<GoogLeNetGEMMDataset2, 31>);
276
277BENCHMARK_DEFINE_F(GEMMFP32GoogLeNet1, cl_googlenet)
278(::benchmark::State &state)
279{
280 while(state.KeepRunning())
281 {
282 // Run function
283 profiler.start();
284 gemm_layer->run();
285 CLScheduler::get().sync();
286 profiler.stop();
287 }
288}
289
290BENCHMARK_DEFINE_F(GEMMFP32GoogLeNet2, cl_googlenet)
291(::benchmark::State &state)
292{
293 while(state.KeepRunning())
294 {
295 // Run function
296 profiler.start();
297 gemm_layer->run();
298 CLScheduler::get().sync();
299 profiler.stop();
300 }
301}
302
303BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, cl_googlenet)
304->Threads(1)
305->Apply(DataSetArg<GoogLeNetGEMMDataset1, 0>);
306BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, cl_googlenet)
307->Threads(1)
308->Apply(DataSetArg<GoogLeNetGEMMDataset1, 1>);
309BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, cl_googlenet)
310->Threads(1)
311->Apply(DataSetArg<GoogLeNetGEMMDataset1, 2>);
312BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, cl_googlenet)
313->Threads(1)
314->Apply(DataSetArg<GoogLeNetGEMMDataset1, 3>);
315BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, cl_googlenet)
316->Threads(1)
317->Apply(DataSetArg<GoogLeNetGEMMDataset1, 4>);
318BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, cl_googlenet)
319->Threads(1)
320->Apply(DataSetArg<GoogLeNetGEMMDataset1, 5>);
321BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, cl_googlenet)
322->Threads(1)
323->Apply(DataSetArg<GoogLeNetGEMMDataset1, 6>);
324BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, cl_googlenet)
325->Threads(1)
326->Apply(DataSetArg<GoogLeNetGEMMDataset1, 7>);
327BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, cl_googlenet)
328->Threads(1)
329->Apply(DataSetArg<GoogLeNetGEMMDataset1, 8>);
330BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, cl_googlenet)
331->Threads(1)
332->Apply(DataSetArg<GoogLeNetGEMMDataset1, 9>);
333BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, cl_googlenet)
334->Threads(1)
335->Apply(DataSetArg<GoogLeNetGEMMDataset1, 10>);
336BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, cl_googlenet)
337->Threads(1)
338->Apply(DataSetArg<GoogLeNetGEMMDataset1, 11>);
339BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, cl_googlenet)
340->Threads(1)
341->Apply(DataSetArg<GoogLeNetGEMMDataset1, 12>);
342BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, cl_googlenet)
343->Threads(1)
344->Apply(DataSetArg<GoogLeNetGEMMDataset1, 13>);
345BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, cl_googlenet)
346->Threads(1)
347->Apply(DataSetArg<GoogLeNetGEMMDataset1, 14>);
348BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, cl_googlenet)
349->Threads(1)
350->Apply(DataSetArg<GoogLeNetGEMMDataset1, 15>);
351BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, cl_googlenet)
352->Threads(1)
353->Apply(DataSetArg<GoogLeNetGEMMDataset1, 16>);
354BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, cl_googlenet)
355->Threads(1)
356->Apply(DataSetArg<GoogLeNetGEMMDataset1, 17>);
357BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, cl_googlenet)
358->Threads(1)
359->Apply(DataSetArg<GoogLeNetGEMMDataset1, 18>);
360BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, cl_googlenet)
361->Threads(1)
362->Apply(DataSetArg<GoogLeNetGEMMDataset1, 19>);
363BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, cl_googlenet)
364->Threads(1)
365->Apply(DataSetArg<GoogLeNetGEMMDataset1, 20>);
366BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, cl_googlenet)
367->Threads(1)
368->Apply(DataSetArg<GoogLeNetGEMMDataset1, 21>);
369BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, cl_googlenet)
370->Threads(1)
371->Apply(DataSetArg<GoogLeNetGEMMDataset1, 22>);
372BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, cl_googlenet)
373->Threads(1)
374->Apply(DataSetArg<GoogLeNetGEMMDataset1, 23>);
375BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, cl_googlenet)
376->Threads(1)
377->Apply(DataSetArg<GoogLeNetGEMMDataset1, 24>);
378BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, cl_googlenet)
379->Threads(1)
380->Apply(DataSetArg<GoogLeNetGEMMDataset1, 25>);
381BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, cl_googlenet)
382->Threads(1)
383->Apply(DataSetArg<GoogLeNetGEMMDataset1, 26>);
384BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, cl_googlenet)
385->Threads(1)
386->Apply(DataSetArg<GoogLeNetGEMMDataset1, 27>);
387BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, cl_googlenet)
388->Threads(1)
389->Apply(DataSetArg<GoogLeNetGEMMDataset1, 28>);
390BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, cl_googlenet)
391->Threads(1)
392->Apply(DataSetArg<GoogLeNetGEMMDataset1, 29>);
393BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, cl_googlenet)
394->Threads(1)
395->Apply(DataSetArg<GoogLeNetGEMMDataset1, 30>);
396BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, cl_googlenet)
397->Threads(1)
398->Apply(DataSetArg<GoogLeNetGEMMDataset1, 31>);
399BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, cl_googlenet)
400->Threads(1)
401->Apply(DataSetArg<GoogLeNetGEMMDataset2, 0>);
402BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, cl_googlenet)
403->Threads(1)
404->Apply(DataSetArg<GoogLeNetGEMMDataset2, 1>);
405BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, cl_googlenet)
406->Threads(1)
407->Apply(DataSetArg<GoogLeNetGEMMDataset2, 2>);
408BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, cl_googlenet)
409->Threads(1)
410->Apply(DataSetArg<GoogLeNetGEMMDataset2, 3>);
411BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, cl_googlenet)
412->Threads(1)
413->Apply(DataSetArg<GoogLeNetGEMMDataset2, 4>);
414BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, cl_googlenet)
415->Threads(1)
416->Apply(DataSetArg<GoogLeNetGEMMDataset2, 5>);
417BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, cl_googlenet)
418->Threads(1)
419->Apply(DataSetArg<GoogLeNetGEMMDataset2, 6>);
420BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, cl_googlenet)
421->Threads(1)
422->Apply(DataSetArg<GoogLeNetGEMMDataset2, 7>);
423BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, cl_googlenet)
424->Threads(1)
425->Apply(DataSetArg<GoogLeNetGEMMDataset2, 8>);
426BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, cl_googlenet)
427->Threads(1)
428->Apply(DataSetArg<GoogLeNetGEMMDataset2, 9>);
429BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, cl_googlenet)
430->Threads(1)
431->Apply(DataSetArg<GoogLeNetGEMMDataset2, 10>);
432BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, cl_googlenet)
433->Threads(1)
434->Apply(DataSetArg<GoogLeNetGEMMDataset2, 11>);
435BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, cl_googlenet)
436->Threads(1)
437->Apply(DataSetArg<GoogLeNetGEMMDataset2, 12>);
438BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, cl_googlenet)
439->Threads(1)
440->Apply(DataSetArg<GoogLeNetGEMMDataset2, 13>);
441BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, cl_googlenet)
442->Threads(1)
443->Apply(DataSetArg<GoogLeNetGEMMDataset2, 14>);
444BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, cl_googlenet)
445->Threads(1)
446->Apply(DataSetArg<GoogLeNetGEMMDataset2, 15>);
447BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, cl_googlenet)
448->Threads(1)
449->Apply(DataSetArg<GoogLeNetGEMMDataset2, 16>);
450BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, cl_googlenet)
451->Threads(1)
452->Apply(DataSetArg<GoogLeNetGEMMDataset2, 17>);
453BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, cl_googlenet)
454->Threads(1)
455->Apply(DataSetArg<GoogLeNetGEMMDataset2, 18>);
456BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, cl_googlenet)
457->Threads(1)
458->Apply(DataSetArg<GoogLeNetGEMMDataset2, 19>);
459BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, cl_googlenet)
460->Threads(1)
461->Apply(DataSetArg<GoogLeNetGEMMDataset2, 20>);
462BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, cl_googlenet)
463->Threads(1)
464->Apply(DataSetArg<GoogLeNetGEMMDataset2, 21>);
465BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, cl_googlenet)
466->Threads(1)
467->Apply(DataSetArg<GoogLeNetGEMMDataset2, 22>);
468BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, cl_googlenet)
469->Threads(1)
470->Apply(DataSetArg<GoogLeNetGEMMDataset2, 23>);
471BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, cl_googlenet)
472->Threads(1)
473->Apply(DataSetArg<GoogLeNetGEMMDataset2, 24>);
474BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, cl_googlenet)
475->Threads(1)
476->Apply(DataSetArg<GoogLeNetGEMMDataset2, 25>);
477BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, cl_googlenet)
478->Threads(1)
479->Apply(DataSetArg<GoogLeNetGEMMDataset2, 26>);
480BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, cl_googlenet)
481->Threads(1)
482->Apply(DataSetArg<GoogLeNetGEMMDataset2, 27>);
483BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, cl_googlenet)
484->Threads(1)
485->Apply(DataSetArg<GoogLeNetGEMMDataset2, 28>);
486BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, cl_googlenet)
487->Threads(1)
488->Apply(DataSetArg<GoogLeNetGEMMDataset2, 29>);
489BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, cl_googlenet)
490->Threads(1)
491->Apply(DataSetArg<GoogLeNetGEMMDataset2, 30>);
492BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, cl_googlenet)
493->Threads(1)
494->Apply(DataSetArg<GoogLeNetGEMMDataset2, 31>);
SiCong Li10c672c2017-06-22 15:46:40 +0100495
496BENCHMARK_DEFINE_F(FP16MatrixMultiply, cl_matrix_multiply)
497(::benchmark::State &state)
498{
499 while(state.KeepRunning())
500 {
501 // Run function
502 profiler.start();
503 gemm_layer->run();
504 CLScheduler::get().sync();
505 profiler.stop();
506 }
507}
508
509BENCHMARK_REGISTER_F(FP16MatrixMultiply, cl_matrix_multiply)
510->Threads(1)
511->Apply(DataSetArg<MatrixMultiplyDataset, 0>);
512BENCHMARK_REGISTER_F(FP16MatrixMultiply, cl_matrix_multiply)
513->Threads(1)
514->Apply(DataSetArg<MatrixMultiplyDataset, 1>);
515BENCHMARK_REGISTER_F(FP16MatrixMultiply, cl_matrix_multiply)
516->Threads(1)
517->Apply(DataSetArg<MatrixMultiplyDataset, 2>);
518
519BENCHMARK_DEFINE_F(FP32MatrixMultiply, cl_matrix_multiply)
520(::benchmark::State &state)
521{
522 while(state.KeepRunning())
523 {
524 // Run function
525 profiler.start();
526 gemm_layer->run();
527 CLScheduler::get().sync();
528 profiler.stop();
529 }
530}
531
532BENCHMARK_REGISTER_F(FP32MatrixMultiply, cl_matrix_multiply)
533->Threads(1)
534->Apply(DataSetArg<MatrixMultiplyDataset, 0>);
535BENCHMARK_REGISTER_F(FP32MatrixMultiply, cl_matrix_multiply)
536->Threads(1)
537->Apply(DataSetArg<MatrixMultiplyDataset, 1>);
538BENCHMARK_REGISTER_F(FP32MatrixMultiply, cl_matrix_multiply)
539->Threads(1)
540->Apply(DataSetArg<MatrixMultiplyDataset, 2>);