blob: 87dad05f3014c44c90ed726fb01094e0071b316f [file] [log] [blame]
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001/*
2 * Copyright (c) 2017 ARM Limited.
3 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24#include "CL/CLAccessor.h"
Anthony Barbier6ff3b192017-09-04 18:44:23 +010025#include "Globals.h"
26#include "TensorLibrary.h"
27#include "benchmark/Datasets.h"
28#include "benchmark/Profiler.h"
29#include "benchmark/WallClockTimer.h"
30
31#include "arm_compute/core/Helpers.h"
32#include "arm_compute/core/Types.h"
33#include "arm_compute/runtime/CL/CLScheduler.h"
34#include "arm_compute/runtime/CL/CLTensor.h"
35#include "arm_compute/runtime/CL/CLTensorAllocator.h"
36#include "arm_compute/runtime/CL/functions/CLGEMM.h"
37
38#include "benchmark/benchmark_api.h"
39
40using namespace arm_compute;
41using namespace arm_compute::test;
42using namespace arm_compute::test::benchmark;
43using namespace arm_compute::test::cl;
44
45#include "benchmark/CL/GEMM.h"
46
47namespace
48{
49using GEMMFP16GoogLeNet1 = GEMM<GoogLeNetGEMMDataset1, CLTensor, CLAccessor, CLGEMM, DataType::F16>;
50using GEMMFP16GoogLeNet2 = GEMM<GoogLeNetGEMMDataset2, CLTensor, CLAccessor, CLGEMM, DataType::F16>;
51using GEMMFP32GoogLeNet1 = GEMM<GoogLeNetGEMMDataset1, CLTensor, CLAccessor, CLGEMM, DataType::F32>;
52using GEMMFP32GoogLeNet2 = GEMM<GoogLeNetGEMMDataset2, CLTensor, CLAccessor, CLGEMM, DataType::F32>;
SiCong Li10c672c2017-06-22 15:46:40 +010053using FP16MatrixMultiply = GEMM<MatrixMultiplyDataset, CLTensor, CLAccessor, CLGEMM, DataType::F16>;
54using FP32MatrixMultiply = GEMM<MatrixMultiplyDataset, CLTensor, CLAccessor, CLGEMM, DataType::F32>;
Anthony Barbier6ff3b192017-09-04 18:44:23 +010055} // namespace
56
57BENCHMARK_DEFINE_F(GEMMFP16GoogLeNet1, cl_googlenet)
58(::benchmark::State &state)
59{
60 while(state.KeepRunning())
61 {
62 // Run function
63 profiler.start();
64 gemm_layer->run();
65 CLScheduler::get().sync();
66 profiler.stop();
67 }
68}
69
70BENCHMARK_DEFINE_F(GEMMFP16GoogLeNet2, cl_googlenet)
71(::benchmark::State &state)
72{
73 while(state.KeepRunning())
74 {
75 // Run function
76 profiler.start();
77 gemm_layer->run();
78 CLScheduler::get().sync();
79 profiler.stop();
80 }
81}
82
83BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, cl_googlenet)
84->Threads(1)
85->Apply(DataSetArg<GoogLeNetGEMMDataset1, 0>);
86BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, cl_googlenet)
87->Threads(1)
88->Apply(DataSetArg<GoogLeNetGEMMDataset1, 1>);
89BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, cl_googlenet)
90->Threads(1)
91->Apply(DataSetArg<GoogLeNetGEMMDataset1, 2>);
92BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, cl_googlenet)
93->Threads(1)
94->Apply(DataSetArg<GoogLeNetGEMMDataset1, 3>);
95BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, cl_googlenet)
96->Threads(1)
97->Apply(DataSetArg<GoogLeNetGEMMDataset1, 4>);
98BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, cl_googlenet)
99->Threads(1)
100->Apply(DataSetArg<GoogLeNetGEMMDataset1, 5>);
101BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, cl_googlenet)
102->Threads(1)
103->Apply(DataSetArg<GoogLeNetGEMMDataset1, 6>);
104BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, cl_googlenet)
105->Threads(1)
106->Apply(DataSetArg<GoogLeNetGEMMDataset1, 7>);
107BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, cl_googlenet)
108->Threads(1)
109->Apply(DataSetArg<GoogLeNetGEMMDataset1, 8>);
110BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, cl_googlenet)
111->Threads(1)
112->Apply(DataSetArg<GoogLeNetGEMMDataset1, 9>);
113BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, cl_googlenet)
114->Threads(1)
115->Apply(DataSetArg<GoogLeNetGEMMDataset1, 10>);
116BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, cl_googlenet)
117->Threads(1)
118->Apply(DataSetArg<GoogLeNetGEMMDataset1, 11>);
119BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, cl_googlenet)
120->Threads(1)
121->Apply(DataSetArg<GoogLeNetGEMMDataset1, 12>);
122BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, cl_googlenet)
123->Threads(1)
124->Apply(DataSetArg<GoogLeNetGEMMDataset1, 13>);
125BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, cl_googlenet)
126->Threads(1)
127->Apply(DataSetArg<GoogLeNetGEMMDataset1, 14>);
128BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, cl_googlenet)
129->Threads(1)
130->Apply(DataSetArg<GoogLeNetGEMMDataset1, 15>);
131BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, cl_googlenet)
132->Threads(1)
133->Apply(DataSetArg<GoogLeNetGEMMDataset1, 16>);
134BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, cl_googlenet)
135->Threads(1)
136->Apply(DataSetArg<GoogLeNetGEMMDataset1, 17>);
137BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, cl_googlenet)
138->Threads(1)
139->Apply(DataSetArg<GoogLeNetGEMMDataset1, 18>);
140BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, cl_googlenet)
141->Threads(1)
142->Apply(DataSetArg<GoogLeNetGEMMDataset1, 19>);
143BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, cl_googlenet)
144->Threads(1)
145->Apply(DataSetArg<GoogLeNetGEMMDataset1, 20>);
146BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, cl_googlenet)
147->Threads(1)
148->Apply(DataSetArg<GoogLeNetGEMMDataset1, 21>);
149BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, cl_googlenet)
150->Threads(1)
151->Apply(DataSetArg<GoogLeNetGEMMDataset1, 22>);
152BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, cl_googlenet)
153->Threads(1)
154->Apply(DataSetArg<GoogLeNetGEMMDataset1, 23>);
155BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, cl_googlenet)
156->Threads(1)
157->Apply(DataSetArg<GoogLeNetGEMMDataset1, 24>);
158BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, cl_googlenet)
159->Threads(1)
160->Apply(DataSetArg<GoogLeNetGEMMDataset1, 25>);
161BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, cl_googlenet)
162->Threads(1)
163->Apply(DataSetArg<GoogLeNetGEMMDataset1, 26>);
164BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, cl_googlenet)
165->Threads(1)
166->Apply(DataSetArg<GoogLeNetGEMMDataset1, 27>);
167BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, cl_googlenet)
168->Threads(1)
169->Apply(DataSetArg<GoogLeNetGEMMDataset1, 28>);
170BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, cl_googlenet)
171->Threads(1)
172->Apply(DataSetArg<GoogLeNetGEMMDataset1, 29>);
173BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, cl_googlenet)
174->Threads(1)
175->Apply(DataSetArg<GoogLeNetGEMMDataset1, 30>);
176BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, cl_googlenet)
177->Threads(1)
178->Apply(DataSetArg<GoogLeNetGEMMDataset1, 31>);
179BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, cl_googlenet)
180->Threads(1)
181->Apply(DataSetArg<GoogLeNetGEMMDataset2, 0>);
182BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, cl_googlenet)
183->Threads(1)
184->Apply(DataSetArg<GoogLeNetGEMMDataset2, 1>);
185BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, cl_googlenet)
186->Threads(1)
187->Apply(DataSetArg<GoogLeNetGEMMDataset2, 2>);
188BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, cl_googlenet)
189->Threads(1)
190->Apply(DataSetArg<GoogLeNetGEMMDataset2, 3>);
191BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, cl_googlenet)
192->Threads(1)
193->Apply(DataSetArg<GoogLeNetGEMMDataset2, 4>);
194BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, cl_googlenet)
195->Threads(1)
196->Apply(DataSetArg<GoogLeNetGEMMDataset2, 5>);
197BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, cl_googlenet)
198->Threads(1)
199->Apply(DataSetArg<GoogLeNetGEMMDataset2, 6>);
200BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, cl_googlenet)
201->Threads(1)
202->Apply(DataSetArg<GoogLeNetGEMMDataset2, 7>);
203BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, cl_googlenet)
204->Threads(1)
205->Apply(DataSetArg<GoogLeNetGEMMDataset2, 8>);
206BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, cl_googlenet)
207->Threads(1)
208->Apply(DataSetArg<GoogLeNetGEMMDataset2, 9>);
209BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, cl_googlenet)
210->Threads(1)
211->Apply(DataSetArg<GoogLeNetGEMMDataset2, 10>);
212BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, cl_googlenet)
213->Threads(1)
214->Apply(DataSetArg<GoogLeNetGEMMDataset2, 11>);
215BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, cl_googlenet)
216->Threads(1)
217->Apply(DataSetArg<GoogLeNetGEMMDataset2, 12>);
218BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, cl_googlenet)
219->Threads(1)
220->Apply(DataSetArg<GoogLeNetGEMMDataset2, 13>);
221BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, cl_googlenet)
222->Threads(1)
223->Apply(DataSetArg<GoogLeNetGEMMDataset2, 14>);
224BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, cl_googlenet)
225->Threads(1)
226->Apply(DataSetArg<GoogLeNetGEMMDataset2, 15>);
227BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, cl_googlenet)
228->Threads(1)
229->Apply(DataSetArg<GoogLeNetGEMMDataset2, 16>);
230BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, cl_googlenet)
231->Threads(1)
232->Apply(DataSetArg<GoogLeNetGEMMDataset2, 17>);
233BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, cl_googlenet)
234->Threads(1)
235->Apply(DataSetArg<GoogLeNetGEMMDataset2, 18>);
236BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, cl_googlenet)
237->Threads(1)
238->Apply(DataSetArg<GoogLeNetGEMMDataset2, 19>);
239BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, cl_googlenet)
240->Threads(1)
241->Apply(DataSetArg<GoogLeNetGEMMDataset2, 20>);
242BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, cl_googlenet)
243->Threads(1)
244->Apply(DataSetArg<GoogLeNetGEMMDataset2, 21>);
245BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, cl_googlenet)
246->Threads(1)
247->Apply(DataSetArg<GoogLeNetGEMMDataset2, 22>);
248BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, cl_googlenet)
249->Threads(1)
250->Apply(DataSetArg<GoogLeNetGEMMDataset2, 23>);
251BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, cl_googlenet)
252->Threads(1)
253->Apply(DataSetArg<GoogLeNetGEMMDataset2, 24>);
254BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, cl_googlenet)
255->Threads(1)
256->Apply(DataSetArg<GoogLeNetGEMMDataset2, 25>);
257BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, cl_googlenet)
258->Threads(1)
259->Apply(DataSetArg<GoogLeNetGEMMDataset2, 26>);
260BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, cl_googlenet)
261->Threads(1)
262->Apply(DataSetArg<GoogLeNetGEMMDataset2, 27>);
263BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, cl_googlenet)
264->Threads(1)
265->Apply(DataSetArg<GoogLeNetGEMMDataset2, 28>);
266BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, cl_googlenet)
267->Threads(1)
268->Apply(DataSetArg<GoogLeNetGEMMDataset2, 29>);
269BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, cl_googlenet)
270->Threads(1)
271->Apply(DataSetArg<GoogLeNetGEMMDataset2, 30>);
272BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, cl_googlenet)
273->Threads(1)
274->Apply(DataSetArg<GoogLeNetGEMMDataset2, 31>);
275
276BENCHMARK_DEFINE_F(GEMMFP32GoogLeNet1, cl_googlenet)
277(::benchmark::State &state)
278{
279 while(state.KeepRunning())
280 {
281 // Run function
282 profiler.start();
283 gemm_layer->run();
284 CLScheduler::get().sync();
285 profiler.stop();
286 }
287}
288
289BENCHMARK_DEFINE_F(GEMMFP32GoogLeNet2, cl_googlenet)
290(::benchmark::State &state)
291{
292 while(state.KeepRunning())
293 {
294 // Run function
295 profiler.start();
296 gemm_layer->run();
297 CLScheduler::get().sync();
298 profiler.stop();
299 }
300}
301
302BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, cl_googlenet)
303->Threads(1)
304->Apply(DataSetArg<GoogLeNetGEMMDataset1, 0>);
305BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, cl_googlenet)
306->Threads(1)
307->Apply(DataSetArg<GoogLeNetGEMMDataset1, 1>);
308BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, cl_googlenet)
309->Threads(1)
310->Apply(DataSetArg<GoogLeNetGEMMDataset1, 2>);
311BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, cl_googlenet)
312->Threads(1)
313->Apply(DataSetArg<GoogLeNetGEMMDataset1, 3>);
314BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, cl_googlenet)
315->Threads(1)
316->Apply(DataSetArg<GoogLeNetGEMMDataset1, 4>);
317BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, cl_googlenet)
318->Threads(1)
319->Apply(DataSetArg<GoogLeNetGEMMDataset1, 5>);
320BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, cl_googlenet)
321->Threads(1)
322->Apply(DataSetArg<GoogLeNetGEMMDataset1, 6>);
323BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, cl_googlenet)
324->Threads(1)
325->Apply(DataSetArg<GoogLeNetGEMMDataset1, 7>);
326BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, cl_googlenet)
327->Threads(1)
328->Apply(DataSetArg<GoogLeNetGEMMDataset1, 8>);
329BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, cl_googlenet)
330->Threads(1)
331->Apply(DataSetArg<GoogLeNetGEMMDataset1, 9>);
332BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, cl_googlenet)
333->Threads(1)
334->Apply(DataSetArg<GoogLeNetGEMMDataset1, 10>);
335BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, cl_googlenet)
336->Threads(1)
337->Apply(DataSetArg<GoogLeNetGEMMDataset1, 11>);
338BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, cl_googlenet)
339->Threads(1)
340->Apply(DataSetArg<GoogLeNetGEMMDataset1, 12>);
341BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, cl_googlenet)
342->Threads(1)
343->Apply(DataSetArg<GoogLeNetGEMMDataset1, 13>);
344BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, cl_googlenet)
345->Threads(1)
346->Apply(DataSetArg<GoogLeNetGEMMDataset1, 14>);
347BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, cl_googlenet)
348->Threads(1)
349->Apply(DataSetArg<GoogLeNetGEMMDataset1, 15>);
350BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, cl_googlenet)
351->Threads(1)
352->Apply(DataSetArg<GoogLeNetGEMMDataset1, 16>);
353BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, cl_googlenet)
354->Threads(1)
355->Apply(DataSetArg<GoogLeNetGEMMDataset1, 17>);
356BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, cl_googlenet)
357->Threads(1)
358->Apply(DataSetArg<GoogLeNetGEMMDataset1, 18>);
359BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, cl_googlenet)
360->Threads(1)
361->Apply(DataSetArg<GoogLeNetGEMMDataset1, 19>);
362BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, cl_googlenet)
363->Threads(1)
364->Apply(DataSetArg<GoogLeNetGEMMDataset1, 20>);
365BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, cl_googlenet)
366->Threads(1)
367->Apply(DataSetArg<GoogLeNetGEMMDataset1, 21>);
368BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, cl_googlenet)
369->Threads(1)
370->Apply(DataSetArg<GoogLeNetGEMMDataset1, 22>);
371BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, cl_googlenet)
372->Threads(1)
373->Apply(DataSetArg<GoogLeNetGEMMDataset1, 23>);
374BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, cl_googlenet)
375->Threads(1)
376->Apply(DataSetArg<GoogLeNetGEMMDataset1, 24>);
377BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, cl_googlenet)
378->Threads(1)
379->Apply(DataSetArg<GoogLeNetGEMMDataset1, 25>);
380BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, cl_googlenet)
381->Threads(1)
382->Apply(DataSetArg<GoogLeNetGEMMDataset1, 26>);
383BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, cl_googlenet)
384->Threads(1)
385->Apply(DataSetArg<GoogLeNetGEMMDataset1, 27>);
386BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, cl_googlenet)
387->Threads(1)
388->Apply(DataSetArg<GoogLeNetGEMMDataset1, 28>);
389BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, cl_googlenet)
390->Threads(1)
391->Apply(DataSetArg<GoogLeNetGEMMDataset1, 29>);
392BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, cl_googlenet)
393->Threads(1)
394->Apply(DataSetArg<GoogLeNetGEMMDataset1, 30>);
395BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, cl_googlenet)
396->Threads(1)
397->Apply(DataSetArg<GoogLeNetGEMMDataset1, 31>);
398BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, cl_googlenet)
399->Threads(1)
400->Apply(DataSetArg<GoogLeNetGEMMDataset2, 0>);
401BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, cl_googlenet)
402->Threads(1)
403->Apply(DataSetArg<GoogLeNetGEMMDataset2, 1>);
404BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, cl_googlenet)
405->Threads(1)
406->Apply(DataSetArg<GoogLeNetGEMMDataset2, 2>);
407BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, cl_googlenet)
408->Threads(1)
409->Apply(DataSetArg<GoogLeNetGEMMDataset2, 3>);
410BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, cl_googlenet)
411->Threads(1)
412->Apply(DataSetArg<GoogLeNetGEMMDataset2, 4>);
413BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, cl_googlenet)
414->Threads(1)
415->Apply(DataSetArg<GoogLeNetGEMMDataset2, 5>);
416BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, cl_googlenet)
417->Threads(1)
418->Apply(DataSetArg<GoogLeNetGEMMDataset2, 6>);
419BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, cl_googlenet)
420->Threads(1)
421->Apply(DataSetArg<GoogLeNetGEMMDataset2, 7>);
422BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, cl_googlenet)
423->Threads(1)
424->Apply(DataSetArg<GoogLeNetGEMMDataset2, 8>);
425BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, cl_googlenet)
426->Threads(1)
427->Apply(DataSetArg<GoogLeNetGEMMDataset2, 9>);
428BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, cl_googlenet)
429->Threads(1)
430->Apply(DataSetArg<GoogLeNetGEMMDataset2, 10>);
431BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, cl_googlenet)
432->Threads(1)
433->Apply(DataSetArg<GoogLeNetGEMMDataset2, 11>);
434BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, cl_googlenet)
435->Threads(1)
436->Apply(DataSetArg<GoogLeNetGEMMDataset2, 12>);
437BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, cl_googlenet)
438->Threads(1)
439->Apply(DataSetArg<GoogLeNetGEMMDataset2, 13>);
440BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, cl_googlenet)
441->Threads(1)
442->Apply(DataSetArg<GoogLeNetGEMMDataset2, 14>);
443BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, cl_googlenet)
444->Threads(1)
445->Apply(DataSetArg<GoogLeNetGEMMDataset2, 15>);
446BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, cl_googlenet)
447->Threads(1)
448->Apply(DataSetArg<GoogLeNetGEMMDataset2, 16>);
449BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, cl_googlenet)
450->Threads(1)
451->Apply(DataSetArg<GoogLeNetGEMMDataset2, 17>);
452BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, cl_googlenet)
453->Threads(1)
454->Apply(DataSetArg<GoogLeNetGEMMDataset2, 18>);
455BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, cl_googlenet)
456->Threads(1)
457->Apply(DataSetArg<GoogLeNetGEMMDataset2, 19>);
458BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, cl_googlenet)
459->Threads(1)
460->Apply(DataSetArg<GoogLeNetGEMMDataset2, 20>);
461BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, cl_googlenet)
462->Threads(1)
463->Apply(DataSetArg<GoogLeNetGEMMDataset2, 21>);
464BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, cl_googlenet)
465->Threads(1)
466->Apply(DataSetArg<GoogLeNetGEMMDataset2, 22>);
467BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, cl_googlenet)
468->Threads(1)
469->Apply(DataSetArg<GoogLeNetGEMMDataset2, 23>);
470BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, cl_googlenet)
471->Threads(1)
472->Apply(DataSetArg<GoogLeNetGEMMDataset2, 24>);
473BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, cl_googlenet)
474->Threads(1)
475->Apply(DataSetArg<GoogLeNetGEMMDataset2, 25>);
476BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, cl_googlenet)
477->Threads(1)
478->Apply(DataSetArg<GoogLeNetGEMMDataset2, 26>);
479BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, cl_googlenet)
480->Threads(1)
481->Apply(DataSetArg<GoogLeNetGEMMDataset2, 27>);
482BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, cl_googlenet)
483->Threads(1)
484->Apply(DataSetArg<GoogLeNetGEMMDataset2, 28>);
485BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, cl_googlenet)
486->Threads(1)
487->Apply(DataSetArg<GoogLeNetGEMMDataset2, 29>);
488BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, cl_googlenet)
489->Threads(1)
490->Apply(DataSetArg<GoogLeNetGEMMDataset2, 30>);
491BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, cl_googlenet)
492->Threads(1)
493->Apply(DataSetArg<GoogLeNetGEMMDataset2, 31>);
SiCong Li10c672c2017-06-22 15:46:40 +0100494
495BENCHMARK_DEFINE_F(FP16MatrixMultiply, cl_matrix_multiply)
496(::benchmark::State &state)
497{
498 while(state.KeepRunning())
499 {
500 // Run function
501 profiler.start();
502 gemm_layer->run();
503 CLScheduler::get().sync();
504 profiler.stop();
505 }
506}
507
508BENCHMARK_REGISTER_F(FP16MatrixMultiply, cl_matrix_multiply)
509->Threads(1)
510->Apply(DataSetArg<MatrixMultiplyDataset, 0>);
511BENCHMARK_REGISTER_F(FP16MatrixMultiply, cl_matrix_multiply)
512->Threads(1)
513->Apply(DataSetArg<MatrixMultiplyDataset, 1>);
514BENCHMARK_REGISTER_F(FP16MatrixMultiply, cl_matrix_multiply)
515->Threads(1)
516->Apply(DataSetArg<MatrixMultiplyDataset, 2>);
517
518BENCHMARK_DEFINE_F(FP32MatrixMultiply, cl_matrix_multiply)
519(::benchmark::State &state)
520{
521 while(state.KeepRunning())
522 {
523 // Run function
524 profiler.start();
525 gemm_layer->run();
526 CLScheduler::get().sync();
527 profiler.stop();
528 }
529}
530
531BENCHMARK_REGISTER_F(FP32MatrixMultiply, cl_matrix_multiply)
532->Threads(1)
533->Apply(DataSetArg<MatrixMultiplyDataset, 0>);
534BENCHMARK_REGISTER_F(FP32MatrixMultiply, cl_matrix_multiply)
535->Threads(1)
536->Apply(DataSetArg<MatrixMultiplyDataset, 1>);
537BENCHMARK_REGISTER_F(FP32MatrixMultiply, cl_matrix_multiply)
538->Threads(1)
539->Apply(DataSetArg<MatrixMultiplyDataset, 2>);