blob: e1729a85b0d6659eacb0e378b426eb888ee451b2 [file] [log] [blame]
Giorgio Arenacf3935f2017-10-26 17:14:13 +01001/*
2 * Copyright (c) 2017 ARM Limited.
3 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24#ifndef ARM_COMPUTE_CL /* Needed by Utils.cpp to handle OpenCL exceptions properly */
25#error "This example needs to be built with -DARM_COMPUTE_CL"
26#endif /* ARM_COMPUTE_CL */
27
28#include "arm_compute/core/Types.h"
29#include "arm_compute/runtime/CL/CLFunctions.h"
30#include "arm_compute/runtime/CL/CLScheduler.h"
31#include "arm_compute/runtime/CL/CLTuner.h"
32#include "utils/Utils.h"
33
Gian Marco0bc5a252017-12-04 13:55:08 +000034#include <cstdlib>
35
Giorgio Arenacf3935f2017-10-26 17:14:13 +010036using namespace arm_compute;
37using namespace utils;
38
39void main_cl_sgemm(int argc, const char **argv)
40{
41 NPYLoader npy0, npy1, npy2;
Gian Marco0bc5a252017-12-04 13:55:08 +000042 CLTensor src0, src1, src2, dst;
43 float alpha = 1.0f, beta = 0.0f;
Giorgio Arenacf3935f2017-10-26 17:14:13 +010044
45 CLTuner tuner;
46 CLScheduler::get().default_init(&tuner);
47
48 std::ifstream stream;
49 if(argc > 1)
50 {
51 stream.open(argv[1], std::fstream::in);
52 }
53
54 if(argc < 3 || (argc < 4 && stream.bad()))
55 {
56 // Print help
57 std::cout << "Usage: 1) ./build/cl_sgemm input_matrix_1.npy input_matrix_2.npy [input_matrix_3.npy] [alpha = 1] [beta = 0]\n";
Gian Marco0bc5a252017-12-04 13:55:08 +000058 std::cout << " 2) ./build/cl_sgemm M N K [alpha = 1.0f] [beta = 0.0f]\n\n";
59 std::cout << "Too few or no input_matrices provided. Using M=7, N=3, K=5, alpha=1.0f and beta=0.0f\n\n";
Giorgio Arenacf3935f2017-10-26 17:14:13 +010060
Gian Marco0bc5a252017-12-04 13:55:08 +000061 src0.allocator()->init(TensorInfo(TensorShape(5U, 7U), 1, DataType::F32));
62 src1.allocator()->init(TensorInfo(TensorShape(3U, 5U), 1, DataType::F32));
63 src2.allocator()->init(TensorInfo(TensorShape(3U, 7U), 1, DataType::F32));
Giorgio Arenacf3935f2017-10-26 17:14:13 +010064 }
65 else
66 {
Gian Marco0bc5a252017-12-04 13:55:08 +000067 if(stream.good()) /* case file1.npy file2.npy [file3.npy] [alpha = 1.0f] [beta = 0.0f] */
Giorgio Arenacf3935f2017-10-26 17:14:13 +010068 {
69 npy0.open(argv[1]);
Gian Marco0bc5a252017-12-04 13:55:08 +000070 npy0.init_tensor(src0, DataType::F32);
Giorgio Arenacf3935f2017-10-26 17:14:13 +010071 npy1.open(argv[2]);
Gian Marco0bc5a252017-12-04 13:55:08 +000072 npy1.init_tensor(src1, DataType::F32);
Giorgio Arenacf3935f2017-10-26 17:14:13 +010073
74 if(argc > 3)
75 {
76 stream.close();
77 stream.clear();
78 stream.open(argv[3], std::fstream::in);
79 if(stream.good()) /* case with third file */
80 {
81 npy2.open(argv[3]);
Gian Marco0bc5a252017-12-04 13:55:08 +000082 npy2.init_tensor(src2, DataType::F32);
Giorgio Arenacf3935f2017-10-26 17:14:13 +010083
84 if(argc > 4)
85 {
Gian Marco0bc5a252017-12-04 13:55:08 +000086 // Convert string to float
87 alpha = strtof(argv[4], nullptr);
Giorgio Arenacf3935f2017-10-26 17:14:13 +010088
89 if(argc > 5)
90 {
Gian Marco0bc5a252017-12-04 13:55:08 +000091 // Convert string to float
92 beta = strtof(argv[5], nullptr);
Giorgio Arenacf3935f2017-10-26 17:14:13 +010093 }
94 }
95 }
96 else /* case without third file */
97 {
Gian Marco0bc5a252017-12-04 13:55:08 +000098 alpha = strtof(argv[3], nullptr);
Giorgio Arenacf3935f2017-10-26 17:14:13 +010099
100 if(argc > 4)
101 {
Gian Marco0bc5a252017-12-04 13:55:08 +0000102 beta = strtof(argv[4], nullptr);
Giorgio Arenacf3935f2017-10-26 17:14:13 +0100103 }
104 }
105 }
106 }
Gian Marco0bc5a252017-12-04 13:55:08 +0000107 else /* case M N K [alpha = 1.0f] [beta = 0.0f] */
Giorgio Arenacf3935f2017-10-26 17:14:13 +0100108 {
109 size_t M = strtol(argv[1], nullptr, 10);
110 size_t N = strtol(argv[2], nullptr, 10);
111 size_t K = strtol(argv[3], nullptr, 10);
112
Gian Marco0bc5a252017-12-04 13:55:08 +0000113 src0.allocator()->init(TensorInfo(TensorShape(K, M), 1, DataType::F32));
114 src1.allocator()->init(TensorInfo(TensorShape(N, K), 1, DataType::F32));
115 src2.allocator()->init(TensorInfo(TensorShape(N, M), 1, DataType::F32));
Giorgio Arenacf3935f2017-10-26 17:14:13 +0100116
117 if(argc > 4)
118 {
Gian Marco0bc5a252017-12-04 13:55:08 +0000119 alpha = strtof(argv[4], nullptr);
Giorgio Arenacf3935f2017-10-26 17:14:13 +0100120
121 if(argc > 5)
122 {
Gian Marco0bc5a252017-12-04 13:55:08 +0000123 beta = strtof(argv[5], nullptr);
Giorgio Arenacf3935f2017-10-26 17:14:13 +0100124 }
125 }
126 }
127 }
128
Gian Marco0bc5a252017-12-04 13:55:08 +0000129 init_sgemm_output(dst, src0, src1, DataType::F32);
Giorgio Arenacf3935f2017-10-26 17:14:13 +0100130
131 // Configure function
132 CLGEMM sgemm;
133 sgemm.configure(&src0, &src1, (src2.info()->total_size() > 0) ? &src2 : nullptr, &dst, alpha, beta);
134
135 // Allocate all the images
136 src0.allocator()->allocate();
137 src1.allocator()->allocate();
138 dst.allocator()->allocate();
139
140 // Fill the input images with either the data provided or random data
141 if(npy0.is_open())
142 {
143 npy0.fill_tensor(src0);
144 npy1.fill_tensor(src1);
145
146 if(npy2.is_open())
147 {
148 src2.allocator()->allocate();
149 npy2.fill_tensor(src2);
150 }
151 }
152 else
153 {
154 src2.allocator()->allocate();
155
156 fill_random_tensor(src0, -1.f, 1.f);
157 fill_random_tensor(src1, -1.f, 1.f);
158 fill_random_tensor(src2, -1.f, 1.f);
159 }
160
161 // Dummy run for CLTuner
162 sgemm.run();
163
164 auto start = std::chrono::high_resolution_clock::now();
165
166 // Execute the function
167 sgemm.run();
168
169 // Make sure all the OpenCL jobs are done executing:
170 CLScheduler::get().sync();
171
172 auto stop = std::chrono::high_resolution_clock::now();
173
174 if(!npy0.is_open()) /* If the inputs were not files, print the results */
175 {
176 std::cout << "\nMatrix 1:" << std::endl;
177 src0.map(true);
178 src0.print(std::cout, IOFormatInfo());
179 src0.unmap();
180
181 std::cout << "Matrix 2:" << std::endl;
182 src1.map(true);
183 src1.print(std::cout, IOFormatInfo());
184 src1.unmap();
185
186 std::cout << "Matrix 3:" << std::endl;
187 src2.map(true);
188 src2.print(std::cout, IOFormatInfo());
189 src2.unmap();
190
191 std::cout << "Alpha:" << alpha << "\n\n";
192 std::cout << "Beta:" << beta << "\n\n";
193
194 std::cout << "Output Matrix:" << std::endl;
195 dst.map(true);
196 dst.print(std::cout, IOFormatInfo());
197 dst.unmap();
198 }
199 else /* Save to .npy file */
200 {
201 save_to_npy(dst, "sgemm_out.npy", npy0.is_fortran());
202 }
203
204 auto delta = std::chrono::duration_cast<std::chrono::microseconds>(stop - start);
205 std::cout << "Time elapsed: " << delta.count() << "us." << std::endl;
206}
207
208/** Main program for sgemm test
209 *
210 * @param[in] argc Number of arguments
211 * @param[in] argv Arguments ( [optional] Matrix A, [optional] Matrix B, [optional] Matrix C, [optional] alpha, [optional] beta )
212 */
213int main(int argc, const char **argv)
214{
215 return utils::run_example(argc, argv, main_cl_sgemm);
216}