blob: 68955c52f7fc7a329536ea413ce4a44700a7f58a [file] [log] [blame]
Giorgio Arenacf3935f2017-10-26 17:14:13 +01001/*
Sang-Hoon Parkbef7fa22020-10-21 15:58:54 +01002 * Copyright (c) 2017-2020 Arm Limited.
Giorgio Arenacf3935f2017-10-26 17:14:13 +01003 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24#ifndef ARM_COMPUTE_CL /* Needed by Utils.cpp to handle OpenCL exceptions properly */
25#error "This example needs to be built with -DARM_COMPUTE_CL"
26#endif /* ARM_COMPUTE_CL */
27
28#include "arm_compute/core/Types.h"
Giorgio Arenacf3935f2017-10-26 17:14:13 +010029#include "arm_compute/runtime/CL/CLScheduler.h"
30#include "arm_compute/runtime/CL/CLTuner.h"
Sang-Hoon Parkbef7fa22020-10-21 15:58:54 +010031#include "arm_compute/runtime/CL/functions/CLGEMM.h"
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +010032
Giorgio Arenacf3935f2017-10-26 17:14:13 +010033#include "utils/Utils.h"
34
Gian Marco0bc5a252017-12-04 13:55:08 +000035#include <cstdlib>
36
Giorgio Arenacf3935f2017-10-26 17:14:13 +010037using namespace arm_compute;
38using namespace utils;
39
Michalis Spyrou2b5f0f22018-01-10 14:08:50 +000040class CLSGEMMExample : public Example
Giorgio Arenacf3935f2017-10-26 17:14:13 +010041{
Michalis Spyrou2b5f0f22018-01-10 14:08:50 +000042public:
Georgios Pinitas12be7ab2018-07-03 12:06:23 +010043 bool do_setup(int argc, char **argv) override
Giorgio Arenacf3935f2017-10-26 17:14:13 +010044 {
Michalis Spyroua4f378d2019-04-26 14:54:54 +010045 NPYLoader npy0;
46 NPYLoader npy1;
47 NPYLoader npy2;
Michalis Spyrou2b5f0f22018-01-10 14:08:50 +000048 alpha = 1.0f;
49 beta = 0.0f;
Giorgio Arenacf3935f2017-10-26 17:14:13 +010050
Michalis Spyrou2b5f0f22018-01-10 14:08:50 +000051 CLScheduler::get().default_init(&tuner);
Giorgio Arenacf3935f2017-10-26 17:14:13 +010052
Michalis Spyrou2b5f0f22018-01-10 14:08:50 +000053 std::ifstream stream;
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +010054 if (argc > 1)
Giorgio Arenacf3935f2017-10-26 17:14:13 +010055 {
Michalis Spyrou2b5f0f22018-01-10 14:08:50 +000056 stream.open(argv[1], std::fstream::in);
57 }
Giorgio Arenacf3935f2017-10-26 17:14:13 +010058
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +010059 if (argc < 3 || (argc < 4 && stream.bad()))
Michalis Spyrou2b5f0f22018-01-10 14:08:50 +000060 {
61 // Print help
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +010062 std::cout << "Usage: 1) ./build/cl_sgemm input_matrix_1.npy input_matrix_2.npy [input_matrix_3.npy] [alpha "
63 "= 1] [beta = 0]\n";
Michalis Spyrou2b5f0f22018-01-10 14:08:50 +000064 std::cout << " 2) ./build/cl_sgemm M N K [alpha = 1.0f] [beta = 0.0f]\n\n";
65 std::cout << "Too few or no input_matrices provided. Using M=7, N=3, K=5, alpha=1.0f and beta=0.0f\n\n";
66
67 src0.allocator()->init(TensorInfo(TensorShape(5U, 7U), 1, DataType::F32));
68 src1.allocator()->init(TensorInfo(TensorShape(3U, 5U), 1, DataType::F32));
69 src2.allocator()->init(TensorInfo(TensorShape(3U, 7U), 1, DataType::F32));
70 }
71 else
72 {
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +010073 if (stream.good()) /* case file1.npy file2.npy [file3.npy] [alpha = 1.0f] [beta = 0.0f] */
Giorgio Arenacf3935f2017-10-26 17:14:13 +010074 {
Michalis Spyrou2b5f0f22018-01-10 14:08:50 +000075 npy0.open(argv[1]);
76 npy0.init_tensor(src0, DataType::F32);
77 npy1.open(argv[2]);
78 npy1.init_tensor(src1, DataType::F32);
79
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +010080 if (argc > 3)
Giorgio Arenacf3935f2017-10-26 17:14:13 +010081 {
Michalis Spyrou2b5f0f22018-01-10 14:08:50 +000082 stream.close();
83 stream.clear();
84 stream.open(argv[3], std::fstream::in);
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +010085 if (stream.good()) /* case with third file */
Giorgio Arenacf3935f2017-10-26 17:14:13 +010086 {
Michalis Spyrou2b5f0f22018-01-10 14:08:50 +000087 npy2.open(argv[3]);
88 npy2.init_tensor(src2, DataType::F32);
Giorgio Arenacf3935f2017-10-26 17:14:13 +010089
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +010090 if (argc > 4)
Giorgio Arenacf3935f2017-10-26 17:14:13 +010091 {
Gian Marco0bc5a252017-12-04 13:55:08 +000092 // Convert string to float
Michalis Spyrou2b5f0f22018-01-10 14:08:50 +000093 alpha = strtof(argv[4], nullptr);
94
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +010095 if (argc > 5)
Michalis Spyrou2b5f0f22018-01-10 14:08:50 +000096 {
97 // Convert string to float
98 beta = strtof(argv[5], nullptr);
99 }
100 }
101 }
102 else /* case without third file */
103 {
104 alpha = strtof(argv[3], nullptr);
105
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100106 if (argc > 4)
Michalis Spyrou2b5f0f22018-01-10 14:08:50 +0000107 {
108 beta = strtof(argv[4], nullptr);
Giorgio Arenacf3935f2017-10-26 17:14:13 +0100109 }
110 }
111 }
Michalis Spyrou2b5f0f22018-01-10 14:08:50 +0000112 }
113 else /* case M N K [alpha = 1.0f] [beta = 0.0f] */
114 {
115 size_t M = strtol(argv[1], nullptr, 10);
116 size_t N = strtol(argv[2], nullptr, 10);
117 size_t K = strtol(argv[3], nullptr, 10);
Giorgio Arenacf3935f2017-10-26 17:14:13 +0100118
Michalis Spyrou2b5f0f22018-01-10 14:08:50 +0000119 src0.allocator()->init(TensorInfo(TensorShape(K, M), 1, DataType::F32));
120 src1.allocator()->init(TensorInfo(TensorShape(N, K), 1, DataType::F32));
121 src2.allocator()->init(TensorInfo(TensorShape(N, M), 1, DataType::F32));
122
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100123 if (argc > 4)
Michalis Spyrou2b5f0f22018-01-10 14:08:50 +0000124 {
125 alpha = strtof(argv[4], nullptr);
126
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100127 if (argc > 5)
Giorgio Arenacf3935f2017-10-26 17:14:13 +0100128 {
Michalis Spyrou2b5f0f22018-01-10 14:08:50 +0000129 beta = strtof(argv[5], nullptr);
Giorgio Arenacf3935f2017-10-26 17:14:13 +0100130 }
131 }
132 }
133 }
Michalis Spyrou2b5f0f22018-01-10 14:08:50 +0000134
135 init_sgemm_output(dst, src0, src1, DataType::F32);
136
137 // Configure function
138 sgemm.configure(&src0, &src1, (src2.info()->total_size() > 0) ? &src2 : nullptr, &dst, alpha, beta);
139
140 // Allocate all the images
141 src0.allocator()->allocate();
142 src1.allocator()->allocate();
143 dst.allocator()->allocate();
144
145 // Fill the input images with either the data provided or random data
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100146 if (npy0.is_open())
Giorgio Arenacf3935f2017-10-26 17:14:13 +0100147 {
Michalis Spyrou2b5f0f22018-01-10 14:08:50 +0000148 npy0.fill_tensor(src0);
149 npy1.fill_tensor(src1);
Giorgio Arenacf3935f2017-10-26 17:14:13 +0100150
Michalis Spyrou2b5f0f22018-01-10 14:08:50 +0000151 output_filename = "sgemm_out.npy";
152 is_fortran = npy0.is_fortran();
Giorgio Arenacf3935f2017-10-26 17:14:13 +0100153
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100154 if (npy2.is_open())
Giorgio Arenacf3935f2017-10-26 17:14:13 +0100155 {
Michalis Spyrou2b5f0f22018-01-10 14:08:50 +0000156 src2.allocator()->allocate();
157 npy2.fill_tensor(src2);
Giorgio Arenacf3935f2017-10-26 17:14:13 +0100158 }
159 }
Michalis Spyrou2b5f0f22018-01-10 14:08:50 +0000160 else
Giorgio Arenacf3935f2017-10-26 17:14:13 +0100161 {
162 src2.allocator()->allocate();
Michalis Spyrou2b5f0f22018-01-10 14:08:50 +0000163
164 fill_random_tensor(src0, -1.f, 1.f);
165 fill_random_tensor(src1, -1.f, 1.f);
166 fill_random_tensor(src2, -1.f, 1.f);
167 }
168
169 // Dummy run for CLTuner
170 sgemm.run();
Georgios Pinitas12be7ab2018-07-03 12:06:23 +0100171
172 return true;
Michalis Spyrou2b5f0f22018-01-10 14:08:50 +0000173 }
174 void do_run() override
175 {
176 // Execute the function
177 sgemm.run();
178
179 // Make sure all the OpenCL jobs are done executing:
180 CLScheduler::get().sync();
181 }
182 void do_teardown() override
183 {
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100184 if (!output_filename.empty()) /* Save to .npy file */
Michalis Spyrou2b5f0f22018-01-10 14:08:50 +0000185 {
186 save_to_npy(dst, output_filename, is_fortran);
Giorgio Arenacf3935f2017-10-26 17:14:13 +0100187 }
188 }
Giorgio Arenacf3935f2017-10-26 17:14:13 +0100189
Michalis Spyrou2b5f0f22018-01-10 14:08:50 +0000190private:
Michalis Spyroua4f378d2019-04-26 14:54:54 +0100191 CLTensor src0{};
192 CLTensor src1{};
193 CLTensor src2{};
194 CLTensor dst{};
Michalis Spyrou2b5f0f22018-01-10 14:08:50 +0000195 CLGEMM sgemm{};
196 CLTuner tuner{};
197 float alpha{}, beta{};
198 bool is_fortran{};
199 std::string output_filename{};
200};
Giorgio Arenacf3935f2017-10-26 17:14:13 +0100201
202/** Main program for sgemm test
203 *
204 * @param[in] argc Number of arguments
205 * @param[in] argv Arguments ( [optional] Matrix A, [optional] Matrix B, [optional] Matrix C, [optional] alpha, [optional] beta )
206 */
Anthony Barbier6db0ff52018-01-05 10:59:12 +0000207int main(int argc, char **argv)
Giorgio Arenacf3935f2017-10-26 17:14:13 +0100208{
Michalis Spyrou2b5f0f22018-01-10 14:08:50 +0000209 return utils::run_example<CLSGEMMExample>(argc, argv);
Gian Marcoae2af742018-02-15 12:35:44 +0000210}