blob: 2feab89950735b9f9f81db716885c8951588eb36 [file] [log] [blame]
Moritz Pflanzer4dfc2352017-08-02 14:51:36 +01001/*
Gian Marco Iodice2213d4b2018-04-27 10:39:06 +01002 * Copyright (c) 2017-2018 ARM Limited.
Moritz Pflanzer4dfc2352017-08-02 14:51:36 +01003 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24#include "GEMM.h"
25
Georgios Pinitas583137c2017-08-31 18:12:42 +010026#include "arm_compute/core/Types.h"
Moritz Pflanzer4dfc2352017-08-02 14:51:36 +010027
28namespace arm_compute
29{
30namespace test
31{
32namespace validation
33{
34namespace reference
35{
36template <typename T, typename std::enable_if<is_floating_point<T>::value, int>::type>
37SimpleTensor<T> gemm(const SimpleTensor<T> &a, const SimpleTensor<T> &b, const SimpleTensor<T> &c, float alpha, float beta)
38{
39 // Create reference
Vidhya Sudhan Loganathan014333d2018-07-02 09:13:49 +010040 SimpleTensor<T> dst{ c.shape(), c.data_type(), 1 };
Moritz Pflanzer4dfc2352017-08-02 14:51:36 +010041
42 // Compute reference
Gian Marco Iodice2213d4b2018-04-27 10:39:06 +010043 const int M = a.shape().y();
44 const int N = b.shape().x();
Moritz Pflanzer4dfc2352017-08-02 14:51:36 +010045 const int K = a.shape().x();
Gian Marco Iodice2213d4b2018-04-27 10:39:06 +010046 const int D = a.shape().z(); // Number of matrices in a batch
47 const int W = a.shape()[3]; // Number of batched-gemm (Winograd case)
Moritz Pflanzer4dfc2352017-08-02 14:51:36 +010048
Gian Marco Iodice2213d4b2018-04-27 10:39:06 +010049 const int a_stride_z = K * M;
50 const int a_stride_w = K * M * D;
51
52 const int b_stride_z = b.shape().num_dimensions() > 2 ? N * K : 0; // Do not slide the matrix B along the 3th dimension in case matrix B has less than 3 dimensions
53 const int b_stride_w = b.shape().num_dimensions() > 3 ? K * N * D : 0; // Do not slide the matrix B along the 4th dimension in case matrix B has less than 4 dimensions
54
55 const int c_stride_z = N * M;
56 const int c_stride_w = N * M * D;
57
58 for(int w = 0; w < W; ++w)
Moritz Pflanzer4dfc2352017-08-02 14:51:36 +010059 {
Gian Marco Iodice2213d4b2018-04-27 10:39:06 +010060 for(int depth = 0; depth < D; ++depth)
Moritz Pflanzer4dfc2352017-08-02 14:51:36 +010061 {
Gian Marco Iodice2213d4b2018-04-27 10:39:06 +010062 const int base_addr_a = depth * a_stride_z + w * a_stride_w;
63 const int base_addr_b = depth * b_stride_z + w * b_stride_w;
64 const int base_addr_c = depth * c_stride_z + w * c_stride_w;
Moritz Pflanzer4dfc2352017-08-02 14:51:36 +010065
Gian Marco Iodice2213d4b2018-04-27 10:39:06 +010066 for(int row = 0; row < M; ++row)
Moritz Pflanzer4dfc2352017-08-02 14:51:36 +010067 {
Gian Marco Iodice2213d4b2018-04-27 10:39:06 +010068 for(int col = 0; col < N; ++col)
69 {
70 T acc(0);
Moritz Pflanzer4dfc2352017-08-02 14:51:36 +010071
Gian Marco Iodice2213d4b2018-04-27 10:39:06 +010072 for(int k = 0; k < K; ++k)
73 {
74 acc += a[base_addr_a + k + row * K] * b[base_addr_b + col + k * N];
75 }
76
77 // Finalize the result: alpha * A * B + beta * C
78 dst[base_addr_c + col + row * N] = alpha * acc + beta * c[base_addr_c + col + row * N];
79 }
80 }
Moritz Pflanzer4dfc2352017-08-02 14:51:36 +010081 }
82 }
83
84 return dst;
85}
86
Moritz Pflanzer4dfc2352017-08-02 14:51:36 +010087template SimpleTensor<float> gemm(const SimpleTensor<float> &a, const SimpleTensor<float> &b, const SimpleTensor<float> &c, float alpha, float beta);
Georgios Pinitas583137c2017-08-31 18:12:42 +010088template SimpleTensor<half> gemm(const SimpleTensor<half> &a, const SimpleTensor<half> &b, const SimpleTensor<half> &c, float alpha, float beta);
Moritz Pflanzer4dfc2352017-08-02 14:51:36 +010089} // namespace reference
90} // namespace validation
91} // namespace test
92} // namespace arm_compute