blob: 55bbbdaf80675f5219b3e8e00206c6749267811f [file] [log] [blame]
Moritz Pflanzer4dfc2352017-08-02 14:51:36 +01001/*
Gian Marco Iodice10e88a72021-11-29 12:49:19 +00002 * Copyright (c) 2017-2022 Arm Limited.
Moritz Pflanzer4dfc2352017-08-02 14:51:36 +01003 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24#ifndef ARM_COMPUTE_TEST_GEMM_FIXTURE
25#define ARM_COMPUTE_TEST_GEMM_FIXTURE
26
Gian Marco Iodice7026b302019-06-26 17:18:11 +010027#include "arm_compute/core/KernelDescriptors.h"
Moritz Pflanzer4dfc2352017-08-02 14:51:36 +010028#include "arm_compute/core/TensorShape.h"
29#include "arm_compute/core/Types.h"
SiCongLi1af54162021-10-06 15:25:57 +010030#include "arm_compute/core/experimental/IPostOp.h"
SiCongLi31778612021-11-12 17:33:45 +000031#include "src/core/experimental/PostOpUtils.h"
Moritz Pflanzer4dfc2352017-08-02 14:51:36 +010032#include "tests/AssetsLibrary.h"
33#include "tests/Globals.h"
34#include "tests/IAccessor.h"
Moritz Pflanzera09de0c2017-09-01 20:41:12 +010035#include "tests/framework/Asserts.h"
36#include "tests/framework/Fixture.h"
Moritz Pflanzera09de0c2017-09-01 20:41:12 +010037#include "tests/validation/Helpers.h"
Gian Marco Iodiceca1f4602019-07-16 15:46:48 +010038#include "tests/validation/reference/ActivationLayer.h"
SiCongLi1af54162021-10-06 15:25:57 +010039#include "tests/validation/reference/ElementwiseOperations.h"
Georgios Pinitas5a7e7762017-12-01 16:27:29 +000040#include "tests/validation/reference/GEMM.h"
SiCongLi1af54162021-10-06 15:25:57 +010041#include "tests/validation/reference/PostOps.h"
Moritz Pflanzer4dfc2352017-08-02 14:51:36 +010042
43#include <random>
44
45namespace arm_compute
46{
47namespace test
48{
49namespace validation
50{
Mohammed Suhail Munshibc5c4072022-04-27 13:49:51 +010051template <typename TensorType, typename AccessorType, typename FunctionType, typename T, bool disable_c = false, bool reinterpret_input_as_3d = false, bool reinterpret_output_as_3d = false, bool pretranspose_a = false, bool pretranspose_b = false>
Gian Marco Iodice68a3f562018-07-26 11:44:03 +010052class GEMMValidationFixture : public framework::Fixture
Moritz Pflanzer4dfc2352017-08-02 14:51:36 +010053{
54public:
55 template <typename...>
Pablo Tello0e37b5c2018-10-30 11:18:37 +000056 void setup(TensorShape shape_a, TensorShape shape_b, TensorShape shape_c, TensorShape output_shape, float alpha, float beta, bool pretranspose, DataType data_type)
Moritz Pflanzer4dfc2352017-08-02 14:51:36 +010057 {
Michalis Spyrou6bff1952019-10-02 17:22:11 +010058 ARM_COMPUTE_UNUSED(pretranspose);
59 _target = compute_target(shape_a, shape_b, shape_c, output_shape, alpha, beta, data_type);
60 _reference = compute_reference(shape_a, shape_b, output_shape, alpha, beta, data_type);
Moritz Pflanzer4dfc2352017-08-02 14:51:36 +010061 }
62
63protected:
64 template <typename U>
Pablo Tello0e37b5c2018-10-30 11:18:37 +000065 void fill(U &&tensor, int i, float lo = -1.f, float hi = 1.f)
Moritz Pflanzer4dfc2352017-08-02 14:51:36 +010066 {
67 switch(tensor.data_type())
68 {
69 case DataType::F16:
Giorgio Arena6aeb2172020-12-15 15:45:43 +000070 {
Giorgio Arenaa8e2aeb2021-01-06 11:34:57 +000071 arm_compute::utils::uniform_real_distribution_16bit<half> distribution{ float(lo), float(hi) };
Giorgio Arena6aeb2172020-12-15 15:45:43 +000072 library->fill(tensor, distribution, i);
73 break;
74 }
Moritz Pflanzer4dfc2352017-08-02 14:51:36 +010075 case DataType::F32:
76 {
Giorgio Arena6aeb2172020-12-15 15:45:43 +000077 std::uniform_real_distribution<float> distribution(lo, hi);
Moritz Pflanzer4dfc2352017-08-02 14:51:36 +010078 library->fill(tensor, distribution, i);
79 break;
80 }
81 default:
82 library->fill_tensor_uniform(tensor, i);
83 }
84 }
85
86 TensorType compute_target(const TensorShape &shape_a, const TensorShape &shape_b, const TensorShape &shape_c, const TensorShape &output_shape, float alpha, float beta,
Michalis Spyrou6bff1952019-10-02 17:22:11 +010087 DataType data_type)
Moritz Pflanzer4dfc2352017-08-02 14:51:36 +010088 {
89 // Create tensors
Vidhya Sudhan Loganathan014333d2018-07-02 09:13:49 +010090 TensorType a = create_tensor<TensorType>(shape_a, data_type, 1);
91 TensorType b = create_tensor<TensorType>(shape_b, data_type, 1);
92 TensorType c = create_tensor<TensorType>(shape_c, data_type, 1);
93 TensorType dst = create_tensor<TensorType>(output_shape, data_type, 1);
Moritz Pflanzer4dfc2352017-08-02 14:51:36 +010094
95 // Create and configure function
96 FunctionType gemm;
Isabella Gottardi8e74f442018-03-01 16:42:00 +000097 // The GEMMinfo includes the values of the depth in case of reinterpreted 3d output.
Gian Marco Iodice3139f032018-11-05 14:26:32 +000098 // If the output shape has the same number of dimensions of the input the method called is a 2D matrix multiplication (depth_output_reinterpreted_as_3D = 0),
Isabella Gottardi8e74f442018-03-01 16:42:00 +000099 // in the other case we have to use the reinterpreted version of GEMM (depth_output_reinterpreted_as_3D = depth of the 3D output).
Gian Marco Iodicef3622be2019-07-29 14:27:16 +0100100 gemm.configure(&a,
101 &b,
102 (disable_c) ? nullptr : &c,
103 &dst,
104 alpha, beta,
Georgios Pinitas4ee8b152021-07-16 16:16:43 +0100105 GEMMInfo(false, false, false, (reinterpret_output_as_3d ? output_shape[2] : 0), reinterpret_input_as_3d, false, GEMMLowpOutputStageInfo(), false, false, (reinterpret_input_as_3d
Gian Marco Iodicef3622be2019-07-29 14:27:16 +0100106 || reinterpret_output_as_3d)));
Michele Di Giorgio4fc10b32021-04-30 18:30:41 +0100107 ARM_COMPUTE_ASSERT(a.info()->is_resizable());
108 ARM_COMPUTE_ASSERT(b.info()->is_resizable());
109 ARM_COMPUTE_ASSERT(c.info()->is_resizable());
110 ARM_COMPUTE_ASSERT(dst.info()->is_resizable());
Moritz Pflanzer4dfc2352017-08-02 14:51:36 +0100111
Giorgio Arena63825e82021-03-25 14:54:50 +0000112 add_padding_x({ &a, &b, &c, &dst });
113
Moritz Pflanzer4dfc2352017-08-02 14:51:36 +0100114 // Allocate tensors
115 a.allocator()->allocate();
116 b.allocator()->allocate();
117 c.allocator()->allocate();
118 dst.allocator()->allocate();
119
Michele Di Giorgio4fc10b32021-04-30 18:30:41 +0100120 ARM_COMPUTE_ASSERT(!a.info()->is_resizable());
121 ARM_COMPUTE_ASSERT(!b.info()->is_resizable());
122 ARM_COMPUTE_ASSERT(!c.info()->is_resizable());
123 ARM_COMPUTE_ASSERT(!dst.info()->is_resizable());
Moritz Pflanzer4dfc2352017-08-02 14:51:36 +0100124
125 // Fill tensors
126 fill(AccessorType(a), 0);
127 fill(AccessorType(b), 1);
Pablo Tello0e37b5c2018-10-30 11:18:37 +0000128 if(!disable_c)
129 {
130 fill(AccessorType(c), 2);
131 }
Moritz Pflanzer4dfc2352017-08-02 14:51:36 +0100132
133 // Compute GEMM function
134 gemm.run();
135
136 return dst;
137 }
138
Michalis Spyrou6bff1952019-10-02 17:22:11 +0100139 SimpleTensor<T> compute_reference(const TensorShape &shape_a, const TensorShape &shape_b, const TensorShape &output_shape, float alpha, float beta,
Vidhya Sudhan Loganathan014333d2018-07-02 09:13:49 +0100140 DataType data_type)
Moritz Pflanzer4dfc2352017-08-02 14:51:36 +0100141 {
Gian Marco Iodice68a3f562018-07-26 11:44:03 +0100142 TensorShape shape_a_to_use = shape_a;
Gian Marco Iodicef3622be2019-07-29 14:27:16 +0100143
Gian Marco Iodice68a3f562018-07-26 11:44:03 +0100144 if(reinterpret_input_as_3d)
145 {
146 // Collapse the second and third dimension if the input is 3D
147 shape_a_to_use.collapse(2U, 1U);
148 }
149
Moritz Pflanzer4dfc2352017-08-02 14:51:36 +0100150 // Create reference
Gian Marco Iodice68a3f562018-07-26 11:44:03 +0100151 SimpleTensor<T> a{ shape_a_to_use, data_type, 1 };
Vidhya Sudhan Loganathan014333d2018-07-02 09:13:49 +0100152 SimpleTensor<T> b{ shape_b, data_type, 1 };
Gian Marco Iodicef3622be2019-07-29 14:27:16 +0100153 SimpleTensor<T> c{ output_shape, data_type, 1 };
Moritz Pflanzer4dfc2352017-08-02 14:51:36 +0100154
155 // Fill reference
156 fill(a, 0);
157 fill(b, 1);
Gian Marco Iodicef3622be2019-07-29 14:27:16 +0100158 fill(c, 2);
159
160 if(reinterpret_input_as_3d || reinterpret_output_as_3d)
Pablo Tello0e37b5c2018-10-30 11:18:37 +0000161 {
Gian Marco Iodicef3622be2019-07-29 14:27:16 +0100162 const int n = shape_b[0];
163 const int m = reinterpret_output_as_3d ? output_shape[1] * output_shape[2] : output_shape[1];
164 const int batch_size = reinterpret_output_as_3d ? output_shape[3] : output_shape[2];
165
Gunes Bayir4bfc70e2021-12-10 16:17:56 +0000166 // In case of broadcast, we need to simply copy the first into the following "M" ones
Gian Marco Iodicef3622be2019-07-29 14:27:16 +0100167 for(int i = 1; i < m * batch_size; i++)
168 {
169 memcpy(c.data() + i * n, c.data(), n * sizeof(T));
170 }
Pablo Tello0e37b5c2018-10-30 11:18:37 +0000171 }
Gunes Bayir4bfc70e2021-12-10 16:17:56 +0000172
Adnan AlSinan3bb72b62022-05-06 12:10:11 +0100173 /* Note: Assuming the usual batch matmul dimensions A = (B x M x K), B = (B x K x N), if pretranspose_A is set to true, then A is assumed to be (B x K x M),
174 therefore, A must be pre-transposed before passing it to the fixture. And, we transpose A again in the fixture to make it (B x M x K)
175 in order to be able to call reference implementation that works with (B x M x K) input.
176 Similarly, if pretranspose_B is set to true, then B is assumed to be (B x N x K), B must be pre-transposed before passing it to the fixture. */
Gunes Bayir4bfc70e2021-12-10 16:17:56 +0000177
Mohammed Suhail Munshibc5c4072022-04-27 13:49:51 +0100178 // Define transposed shapes
179 TensorShape a_transposed_shape(a.shape().y(), a.shape().x());
180 TensorShape b_transposed_shape(b.shape().y(), b.shape().x());
181
182 // Define transposed tensors
183 SimpleTensor<T> a_transposed{ a_transposed_shape, data_type };
184 SimpleTensor<T> b_transposed{ b_transposed_shape, data_type };
185
186 // pretranspose a if necessary
187 if(pretranspose_a)
188 {
189 transpose_matrix<T>(a, a_transposed);
190 }
191
192 // pretranspose b if necessary
193 if(pretranspose_b)
194 {
195 transpose_matrix<T>(b, b_transposed);
196 }
197
Gian Marco Iodicef3622be2019-07-29 14:27:16 +0100198 // Setting beta to 0 will effectively disable C for the
199 // computation of the reference: alpha * A * B + 0 * C
Mohammed Suhail Munshibc5c4072022-04-27 13:49:51 +0100200 // Use transposed tensors if boolean enabled else use original tensors
201 return reference::gemm<T>((pretranspose_a) ? a_transposed : a, (pretranspose_b) ? b_transposed : b, c, alpha, disable_c ? 0.f : beta);
Moritz Pflanzer4dfc2352017-08-02 14:51:36 +0100202 }
203
204 TensorType _target{};
205 SimpleTensor<T> _reference{};
Moritz Pflanzer4dfc2352017-08-02 14:51:36 +0100206};
207
Georgios Pinitas856f66e2021-04-22 21:13:21 +0100208template <typename TensorType, typename AccessorType, typename T, typename GEMMOperatorType>
Gian Marco Iodiced1f54762019-07-19 09:54:47 +0100209class GEMMMatrixMultiplyValidationFixture : public framework::Fixture
210{
211public:
212 template <typename...>
213 void setup(unsigned int m, unsigned int n, unsigned int k, unsigned int batch_size, float alpha, float beta, bool broadcast_bias, bool fp16_mixed_precision, const ActivationLayerInfo &act_info,
214 DataType data_type, GPUTarget gpu_arch)
215 {
216 // Set the tensor shapes for LHS and RHS matrices
217 const TensorShape lhs_shape(k, m, batch_size);
218 const TensorShape rhs_shape(n, k, batch_size);
219 const TensorShape bias_shape(n,
220 broadcast_bias ? 1 : m,
221 broadcast_bias ? 1 : batch_size);
222
223 _target = compute_target(lhs_shape, rhs_shape, bias_shape, data_type, alpha, beta, broadcast_bias, fp16_mixed_precision, act_info, gpu_arch);
Michalis Spyrou6bff1952019-10-02 17:22:11 +0100224 _reference = compute_reference(lhs_shape, rhs_shape, data_type, alpha, beta, broadcast_bias, act_info);
Gian Marco Iodiced1f54762019-07-19 09:54:47 +0100225 }
226
227protected:
228 template <typename U>
229 void fill(U &&tensor, int i)
230 {
Giorgio Arena4bdd1772020-12-17 16:47:07 +0000231 static_assert(std::is_floating_point<T>::value || std::is_same<T, half>::value, "Only floating point data types supported.");
Giorgio Arena33b103b2021-01-08 10:37:15 +0000232 using DistributionType = typename std::conditional<std::is_same<T, half>::value, arm_compute::utils::uniform_real_distribution_16bit<T>, std::uniform_real_distribution<T>>::type;
Giorgio Arena4bdd1772020-12-17 16:47:07 +0000233
234 DistributionType distribution{ T(-1.0f), T(1.0f) };
Gian Marco Iodiced1f54762019-07-19 09:54:47 +0100235 library->fill(tensor, distribution, i);
236
237 // Fill border with infinity in order to check the presence of NaN values (i.e. inf * 0)
Giorgio Arena4bdd1772020-12-17 16:47:07 +0000238 DistributionType distribution_inf{ T(std::numeric_limits<float>::infinity()), T(std::numeric_limits<float>::infinity()) };
Gian Marco Iodiced1f54762019-07-19 09:54:47 +0100239 library->fill_borders_with_garbage(tensor, distribution_inf, i);
240 }
241
242 TensorType compute_target(const TensorShape &lhs_shape, const TensorShape &rhs_shape, const TensorShape &bias_shape, DataType data_type, float alpha, float beta, bool broadcast_bias,
243 bool fp16_mixed_precision, const ActivationLayerInfo &act_info, GPUTarget gpu_arch)
244 {
245 // Create tensors
246 TensorType lhs = create_tensor<TensorType>(lhs_shape, data_type, 1);
247 TensorType rhs = create_tensor<TensorType>(rhs_shape, data_type, 1);
248 TensorType bias = create_tensor<TensorType>(bias_shape, data_type, 1);
249 TensorType dst;
250
251 const unsigned int m = lhs_shape[1];
252 const unsigned int n = rhs_shape[0];
253 const unsigned int k = lhs_shape[0];
254 GEMMReshapeInfo reshape_info(m, n, k, 1, 1, 0, false, broadcast_bias);
255
256 // The output tensor will be auto-initialized within the function
257
258 // Create and configure function
Georgios Pinitas856f66e2021-04-22 21:13:21 +0100259 GEMMOperatorType gemm;
260 gemm.configure(gpu_arch, lhs.info(), rhs.info(), bias.info(), dst.info(), alpha, beta, false, reshape_info, fp16_mixed_precision, act_info);
Gian Marco Iodiced1f54762019-07-19 09:54:47 +0100261
Michele Di Giorgio4fc10b32021-04-30 18:30:41 +0100262 ARM_COMPUTE_ASSERT(lhs.info()->is_resizable());
263 ARM_COMPUTE_ASSERT(rhs.info()->is_resizable());
264 ARM_COMPUTE_ASSERT(bias.info()->is_resizable());
Gian Marco Iodiced1f54762019-07-19 09:54:47 +0100265
Giorgio Arena63825e82021-03-25 14:54:50 +0000266 add_padding_x({ &lhs, &rhs, &bias, &dst });
267
Gian Marco Iodiced1f54762019-07-19 09:54:47 +0100268 // Allocate tensors
269 lhs.allocator()->allocate();
270 rhs.allocator()->allocate();
271 bias.allocator()->allocate();
272 dst.allocator()->allocate();
273
Michele Di Giorgio4fc10b32021-04-30 18:30:41 +0100274 ARM_COMPUTE_ASSERT(!lhs.info()->is_resizable());
275 ARM_COMPUTE_ASSERT(!rhs.info()->is_resizable());
276 ARM_COMPUTE_ASSERT(!bias.info()->is_resizable());
277 ARM_COMPUTE_ASSERT(!dst.info()->is_resizable());
Gian Marco Iodiced1f54762019-07-19 09:54:47 +0100278
279 // Fill tensors
280 fill(AccessorType(lhs), 0);
281 fill(AccessorType(rhs), 1);
282 fill(AccessorType(bias), 2);
283
284 // Compute GEMM
Georgios Pinitas856f66e2021-04-22 21:13:21 +0100285 ITensorPack gemm_pack({ { ACL_SRC_0, &lhs },
286 { ACL_SRC_1, &rhs },
287 { ACL_SRC_2, &bias },
288 { ACL_DST, &dst }
289 });
290 gemm.run(gemm_pack);
Gian Marco Iodiced1f54762019-07-19 09:54:47 +0100291
292 return dst;
293 }
294
Michalis Spyrou6bff1952019-10-02 17:22:11 +0100295 SimpleTensor<T> compute_reference(const TensorShape &lhs_shape, const TensorShape &rhs_shape, DataType data_type, float alpha, float beta, bool broadcast_bias,
Gian Marco Iodiced1f54762019-07-19 09:54:47 +0100296 const ActivationLayerInfo &act_info)
297 {
298 TensorShape dst_shape = lhs_shape;
299 dst_shape[0] = rhs_shape[0];
300 dst_shape[1] = lhs_shape[1];
301
302 // Create reference
303 SimpleTensor<T> lhs{ lhs_shape, data_type, 1 };
304 SimpleTensor<T> rhs{ rhs_shape, data_type, 1 };
305 SimpleTensor<T> bias{ dst_shape, data_type, 1 };
306
307 const int n = rhs_shape[0];
308 const int m = lhs_shape[1];
309 const int batch_size = lhs_shape[2];
310
311 // Fill reference
312 fill(lhs, 0);
313 fill(rhs, 1);
314 fill(bias, 2);
315
316 if(broadcast_bias)
317 {
Gunes Bayir4bfc70e2021-12-10 16:17:56 +0000318 // In case of broadcast, we need to simply copy the first into the following "M" ones
Gian Marco Iodiced1f54762019-07-19 09:54:47 +0100319 for(int i = 1; i < m * batch_size; i++)
320 {
321 memcpy(bias.data() + i * n, bias.data(), n * sizeof(T));
322 }
323 }
324
325 return reference::activation_layer(reference::gemm<T>(lhs, rhs, bias, alpha, beta), act_info);
326 }
327
328 TensorType _target{};
329 SimpleTensor<T> _reference{};
330};
331
Georgios Pinitas856f66e2021-04-22 21:13:21 +0100332template <typename TensorType, typename AccessorType, typename T, typename GEMMOperatorType>
Gian Marco Iodiced1f54762019-07-19 09:54:47 +0100333class GEMMMatrixMultiply3DValidationFixture : public framework::Fixture
334{
335public:
336 template <typename...>
337 void setup(unsigned int m_w, unsigned int m_h, unsigned int n, unsigned int k, unsigned int batch_size, float alpha, float beta, bool broadcast_bias, bool fp16_mixed_precision,
338 const ActivationLayerInfo &act_info, DataType data_type, GPUTarget gpu_arch)
339 {
Michalis Spyrou6bff1952019-10-02 17:22:11 +0100340 ARM_COMPUTE_UNUSED(broadcast_bias);
341
Gian Marco Iodiced1f54762019-07-19 09:54:47 +0100342 // In case of GEMM3D, m is the product between m_w and m_h
343 const unsigned int m = m_w * m_h;
344
345 // Set the tensor shapes for LHS and RHS matrices
346 const TensorShape lhs_shape(k, m, batch_size);
347 const TensorShape rhs_shape(n, k, batch_size);
348 const TensorShape bias_shape(n, 1, 1);
349
350 _target = compute_target(lhs_shape, rhs_shape, bias_shape, data_type, alpha, beta, m_h, fp16_mixed_precision, act_info, gpu_arch);
Michalis Spyrou6bff1952019-10-02 17:22:11 +0100351 _reference = compute_reference(lhs_shape, rhs_shape, data_type, alpha, beta, m_h, act_info);
Gian Marco Iodiced1f54762019-07-19 09:54:47 +0100352 }
353
354protected:
355 template <typename U>
356 void fill(U &&tensor, int i)
357 {
Giorgio Arena4bdd1772020-12-17 16:47:07 +0000358 static_assert(std::is_floating_point<T>::value || std::is_same<T, half>::value, "Only floating point data types supported.");
Giorgio Arena33b103b2021-01-08 10:37:15 +0000359 using DistributionType = typename std::conditional<std::is_same<T, half>::value, arm_compute::utils::uniform_real_distribution_16bit<T>, std::uniform_real_distribution<T>>::type;
Giorgio Arena4bdd1772020-12-17 16:47:07 +0000360
361 DistributionType distribution{ T(-1.0f), T(1.0f) };
Gian Marco Iodiced1f54762019-07-19 09:54:47 +0100362 library->fill(tensor, distribution, i);
363 }
364
365 TensorType compute_target(const TensorShape &lhs_shape, const TensorShape &rhs_shape, const TensorShape &bias_shape, DataType data_type, float alpha, float beta, unsigned int m_h,
366 bool fp16_mixed_precision, const ActivationLayerInfo &act_info, GPUTarget gpu_arch)
367 {
368 // Create tensors
369 TensorType lhs = create_tensor<TensorType>(lhs_shape, data_type, 1);
370 TensorType rhs = create_tensor<TensorType>(rhs_shape, data_type, 1);
371 TensorType bias = create_tensor<TensorType>(bias_shape, data_type, 1);
372 TensorType dst;
373
374 const unsigned int m = lhs_shape[1];
375 const unsigned int n = rhs_shape[0];
376 const unsigned int k = lhs_shape[0];
377 GEMMReshapeInfo reshape_info(m, n, k, 1, 1, m_h, false, true);
378
379 // The output tensor will be auto-initialized within the function
380
381 // Create and configure function
Georgios Pinitas856f66e2021-04-22 21:13:21 +0100382 GEMMOperatorType gemm;
383 gemm.configure(gpu_arch, lhs.info(), rhs.info(), bias.info(), dst.info(), alpha, beta, false, reshape_info, fp16_mixed_precision, act_info);
Gian Marco Iodiced1f54762019-07-19 09:54:47 +0100384
Michele Di Giorgio4fc10b32021-04-30 18:30:41 +0100385 ARM_COMPUTE_ASSERT(lhs.info()->is_resizable());
386 ARM_COMPUTE_ASSERT(rhs.info()->is_resizable());
387 ARM_COMPUTE_ASSERT(bias.info()->is_resizable());
Gian Marco Iodiced1f54762019-07-19 09:54:47 +0100388
Giorgio Arena63825e82021-03-25 14:54:50 +0000389 add_padding_x({ &lhs, &rhs, &bias, &dst });
390
Gian Marco Iodiced1f54762019-07-19 09:54:47 +0100391 // Allocate tensors
392 lhs.allocator()->allocate();
393 rhs.allocator()->allocate();
394 bias.allocator()->allocate();
395 dst.allocator()->allocate();
396
Michele Di Giorgio4fc10b32021-04-30 18:30:41 +0100397 ARM_COMPUTE_ASSERT(!lhs.info()->is_resizable());
398 ARM_COMPUTE_ASSERT(!rhs.info()->is_resizable());
399 ARM_COMPUTE_ASSERT(!bias.info()->is_resizable());
400 ARM_COMPUTE_ASSERT(!dst.info()->is_resizable());
Gian Marco Iodiced1f54762019-07-19 09:54:47 +0100401
402 // Fill tensors
403 fill(AccessorType(lhs), 0);
404 fill(AccessorType(rhs), 1);
405 fill(AccessorType(bias), 2);
406
407 // Compute GEMM
Georgios Pinitas856f66e2021-04-22 21:13:21 +0100408 ITensorPack gemm_pack({ { ACL_SRC_0, &lhs },
409 { ACL_SRC_1, &rhs },
410 { ACL_SRC_2, &bias },
411 { ACL_DST, &dst }
412 });
413 gemm.run(gemm_pack);
Gian Marco Iodiced1f54762019-07-19 09:54:47 +0100414
415 return dst;
416 }
417
Michalis Spyrou6bff1952019-10-02 17:22:11 +0100418 SimpleTensor<T> compute_reference(const TensorShape &lhs_shape, const TensorShape &rhs_shape, DataType data_type, float alpha, float beta, unsigned int m_h,
Gian Marco Iodiced1f54762019-07-19 09:54:47 +0100419 const ActivationLayerInfo &act_info)
420 {
421 TensorShape dst_shape = lhs_shape;
422 dst_shape.set(0, rhs_shape[0]);
423 dst_shape.set(1, lhs_shape[1] / m_h);
424 dst_shape.set(2, m_h);
425 dst_shape.set(3, lhs_shape[2]);
426
427 // Create reference
428 SimpleTensor<T> lhs{ lhs_shape, data_type, 1 };
429 SimpleTensor<T> rhs{ rhs_shape, data_type, 1 };
430 SimpleTensor<T> bias{ dst_shape, data_type, 1 };
431
432 const int n = rhs_shape[0];
433 const int m = lhs_shape[1];
434 const int batch_size = lhs_shape[2];
435
436 // Fill reference
437 fill(lhs, 0);
438 fill(rhs, 1);
439 fill(bias, 2);
440
Gunes Bayir4bfc70e2021-12-10 16:17:56 +0000441 // In case of broadcast, we need to simply copy the first into the following "M" ones
Gian Marco Iodiced1f54762019-07-19 09:54:47 +0100442 for(int i = 1; i < m * batch_size; i++)
443 {
444 memcpy(bias.data() + i * n, bias.data(), n * sizeof(T));
445 }
446
447 return reference::activation_layer(reference::gemm<T>(lhs, rhs, bias, alpha, beta), act_info);
448 }
449
450 TensorType _target{};
451 SimpleTensor<T> _reference{};
452};
453
Georgios Pinitas856f66e2021-04-22 21:13:21 +0100454template <typename TensorType, typename AccessorType, typename T, typename ReshapeLHSOperatorType, typename ReshapeRHSOperatorType, typename GEMMOperatorType>
Gian Marco Iodiced1f54762019-07-19 09:54:47 +0100455class GEMMMatrixMultiplyInterleavedTransposedValidationFixture : public framework::Fixture
456{
457public:
458 template <typename...>
459 void setup(unsigned int m, unsigned int n, unsigned int k, unsigned int batch_size, float alpha, float beta, unsigned int v0, unsigned int h0, bool broadcast_bias, bool fp16_mixed_precision,
460 const ActivationLayerInfo &act_info, DataType data_type, GPUTarget gpu_arch)
461 {
462 GEMMLHSMatrixInfo lhs_info;
463 lhs_info.m0 = 4;
464 lhs_info.k0 = 4;
465 lhs_info.v0 = v0;
466 lhs_info.interleave = true;
467 lhs_info.transpose = true;
468
469 GEMMRHSMatrixInfo rhs_info;
470 rhs_info.n0 = 16 / sizeof(T);
471 rhs_info.k0 = 1;
472 rhs_info.h0 = h0;
473 rhs_info.interleave = false;
474 rhs_info.transpose = false;
475
476 // Set the tensor shapes for LHS and RHS matrices
477 const TensorShape lhs_shape(k, m, batch_size);
478 const TensorShape rhs_shape(n, k, batch_size);
479 const TensorShape bias_shape(n,
480 broadcast_bias ? 1 : m,
481 broadcast_bias ? 1 : batch_size);
482
483 _target = compute_target(lhs_shape, rhs_shape, bias_shape, lhs_info, rhs_info, data_type, alpha, beta, broadcast_bias, fp16_mixed_precision, act_info, gpu_arch);
Michalis Spyrou6bff1952019-10-02 17:22:11 +0100484 _reference = compute_reference(lhs_shape, rhs_shape, data_type, alpha, beta, broadcast_bias, act_info);
Gian Marco Iodiced1f54762019-07-19 09:54:47 +0100485 }
486
487protected:
488 template <typename U>
489 void fill(U &&tensor, int i)
490 {
Giorgio Arena4bdd1772020-12-17 16:47:07 +0000491 static_assert(std::is_floating_point<T>::value || std::is_same<T, half>::value, "Only floating point data types supported.");
Giorgio Arena33b103b2021-01-08 10:37:15 +0000492 using DistributionType = typename std::conditional<std::is_same<T, half>::value, arm_compute::utils::uniform_real_distribution_16bit<T>, std::uniform_real_distribution<T>>::type;
Giorgio Arena4bdd1772020-12-17 16:47:07 +0000493
494 DistributionType distribution{ T(-1.0f), T(1.0f) };
Gian Marco Iodiced1f54762019-07-19 09:54:47 +0100495 library->fill(tensor, distribution, i);
496
497 // Fill border with infinity in order to check the presence of NaN values (i.e. inf * 0)
Giorgio Arena4bdd1772020-12-17 16:47:07 +0000498 DistributionType distribution_inf{ T(std::numeric_limits<float>::infinity()), T(std::numeric_limits<float>::infinity()) };
Gian Marco Iodiced1f54762019-07-19 09:54:47 +0100499 library->fill_borders_with_garbage(tensor, distribution_inf, i);
500 }
501
502 TensorType compute_target(const TensorShape &lhs_shape, const TensorShape &rhs_shape, const TensorShape &bias_shape, const GEMMLHSMatrixInfo &lhs_info, const GEMMRHSMatrixInfo &rhs_info,
503 DataType data_type, float alpha, float beta, bool broadcast_bias, bool fp16_mixed_precision, const ActivationLayerInfo &act_info, GPUTarget gpu_arch)
504 {
505 // Create tensors
506 TensorType lhs = create_tensor<TensorType>(lhs_shape, data_type, 1);
507 TensorType rhs = create_tensor<TensorType>(rhs_shape, data_type, 1);
508 TensorType bias = create_tensor<TensorType>(bias_shape, data_type, 1);
509 TensorType lhs_reshaped;
510 TensorType rhs_reshaped;
511 TensorType dst;
512
513 const unsigned int m = lhs_shape[1];
514 const unsigned int n = rhs_shape[0];
515 const unsigned int k = lhs_shape[0];
516 GEMMReshapeInfo reshape_info(m, n, k, rhs_info.h0, lhs_info.v0, 0, false, broadcast_bias);
517
518 // The output tensor will be auto-initialized within the function
519
520 // Create and configure function
Georgios Pinitas856f66e2021-04-22 21:13:21 +0100521 ReshapeLHSOperatorType reshape_lhs;
522 ReshapeRHSOperatorType reshape_rhs;
523 GEMMOperatorType gemm;
524 reshape_lhs.configure(lhs.info(), lhs_reshaped.info(), lhs_info);
525 reshape_rhs.configure(rhs.info(), rhs_reshaped.info(), rhs_info);
526 gemm.configure(gpu_arch, lhs_reshaped.info(), rhs_reshaped.info(), bias.info(), dst.info(), alpha, beta, true, reshape_info, fp16_mixed_precision, act_info);
Gian Marco Iodiced1f54762019-07-19 09:54:47 +0100527
Michele Di Giorgio4fc10b32021-04-30 18:30:41 +0100528 ARM_COMPUTE_ASSERT(lhs.info()->is_resizable());
529 ARM_COMPUTE_ASSERT(rhs.info()->is_resizable());
530 ARM_COMPUTE_ASSERT(bias.info()->is_resizable());
Gian Marco Iodiced1f54762019-07-19 09:54:47 +0100531
Georgios Pinitas3dca91b2021-04-13 13:35:58 +0100532 // We do not pad when using image as it needs to comply to strict pitch alignment restrictions
Giorgio Arena63825e82021-03-25 14:54:50 +0000533 if(!rhs_info.export_to_cl_image)
534 {
535 add_padding_x({ &lhs, &rhs, &lhs_reshaped, &rhs_reshaped, &bias, &dst });
536 }
537
Gian Marco Iodiced1f54762019-07-19 09:54:47 +0100538 // Allocate tensors
539 lhs.allocator()->allocate();
540 rhs.allocator()->allocate();
541 lhs_reshaped.allocator()->allocate();
542 rhs_reshaped.allocator()->allocate();
543 bias.allocator()->allocate();
544 dst.allocator()->allocate();
545
Michele Di Giorgio4fc10b32021-04-30 18:30:41 +0100546 ARM_COMPUTE_ASSERT(!lhs.info()->is_resizable());
547 ARM_COMPUTE_ASSERT(!rhs.info()->is_resizable());
548 ARM_COMPUTE_ASSERT(!bias.info()->is_resizable());
549 ARM_COMPUTE_ASSERT(!lhs_reshaped.info()->is_resizable());
550 ARM_COMPUTE_ASSERT(!rhs_reshaped.info()->is_resizable());
551 ARM_COMPUTE_ASSERT(!dst.info()->is_resizable());
Gian Marco Iodiced1f54762019-07-19 09:54:47 +0100552
553 // Fill tensors
554 fill(AccessorType(lhs), 0);
555 fill(AccessorType(rhs), 1);
556 fill(AccessorType(bias), 2);
557
558 // Compute GEMM
Georgios Pinitas856f66e2021-04-22 21:13:21 +0100559 ITensorPack reshape_lhs_pack = { { ACL_SRC, &lhs }, { ACL_DST, &lhs_reshaped } };
560 reshape_lhs.run(reshape_lhs_pack);
561 ITensorPack reshape_rhs_pack = { { ACL_SRC, &rhs }, { ACL_DST, &rhs_reshaped } };
562 reshape_rhs.run(reshape_rhs_pack);
563 ITensorPack gemm_pack({ { ACL_SRC_0, &lhs_reshaped },
564 { ACL_SRC_1, &rhs_reshaped },
565 { ACL_SRC_2, &bias },
566 { ACL_DST, &dst }
567 });
568 gemm.run(gemm_pack);
Gian Marco Iodiced1f54762019-07-19 09:54:47 +0100569
570 return dst;
571 }
572
Michalis Spyrou6bff1952019-10-02 17:22:11 +0100573 SimpleTensor<T> compute_reference(const TensorShape &lhs_shape, const TensorShape &rhs_shape, DataType data_type, float alpha, float beta, bool broadcast_bias,
Gian Marco Iodiced1f54762019-07-19 09:54:47 +0100574 const ActivationLayerInfo &act_info)
575 {
576 TensorShape dst_shape = lhs_shape;
577 dst_shape[0] = rhs_shape[0];
578 dst_shape[1] = lhs_shape[1];
579
580 // Create reference
581 SimpleTensor<T> lhs{ lhs_shape, data_type, 1 };
582 SimpleTensor<T> rhs{ rhs_shape, data_type, 1 };
583 SimpleTensor<T> bias{ dst_shape, data_type, 1 };
584
585 const int n = rhs_shape[0];
586 const int m = lhs_shape[1];
587 const int batch_size = lhs_shape[2];
588
589 // Fill reference
590 fill(lhs, 0);
591 fill(rhs, 1);
592 fill(bias, 2);
593
594 if(broadcast_bias)
595 {
Gunes Bayir4bfc70e2021-12-10 16:17:56 +0000596 // In case of broadcast, we need to simply copy the first into the following "M" ones
Gian Marco Iodiced1f54762019-07-19 09:54:47 +0100597 for(int i = 1; i < m * batch_size; i++)
598 {
599 memcpy(bias.data() + i * n, bias.data(), n * sizeof(T));
600 }
601 }
602
603 return reference::activation_layer(reference::gemm<T>(lhs, rhs, bias, alpha, beta), act_info);
604 }
605
606 TensorType _target{};
607 SimpleTensor<T> _reference{};
608};
609
Georgios Pinitas856f66e2021-04-22 21:13:21 +0100610template <typename TensorType, typename AccessorType, typename T, typename ReshapeLHSOperatorType, typename ReshapeRHSOperatorType, typename GEMMOperatorType>
Gian Marco Iodiced1f54762019-07-19 09:54:47 +0100611class GEMMMatrixMultiplyInterleavedTransposed3DValidationFixture : public framework::Fixture
612{
613public:
614 template <typename...>
615 void setup(unsigned int m_w, unsigned int m_h, unsigned int n, unsigned int k, unsigned int batch_size, float alpha, float beta, unsigned int v0, unsigned int h0, bool broadcast_bias,
616 bool fp16_mixed_precision, const ActivationLayerInfo &act_info, DataType data_type, GPUTarget gpu_arch)
617 {
Michalis Spyrou6bff1952019-10-02 17:22:11 +0100618 ARM_COMPUTE_UNUSED(broadcast_bias);
619
Gian Marco Iodiced1f54762019-07-19 09:54:47 +0100620 GEMMLHSMatrixInfo lhs_info;
621 lhs_info.m0 = 4;
622 lhs_info.k0 = 4;
623 lhs_info.v0 = v0;
624 lhs_info.interleave = true;
625 lhs_info.transpose = true;
626
627 GEMMRHSMatrixInfo rhs_info;
628 rhs_info.n0 = 16 / sizeof(T);
629 rhs_info.k0 = 1;
630 rhs_info.h0 = h0;
631 rhs_info.interleave = false;
632 rhs_info.transpose = false;
633
634 // In case of GEMM3D, m is the product between m_w and m_h
635 const unsigned int m = m_w * m_h;
636
637 // Set the tensor shapes for LHS and RHS matrices
638 const TensorShape lhs_shape(k, m, batch_size);
639 const TensorShape rhs_shape(n, k, batch_size);
640 const TensorShape bias_shape(n, 1, 1);
641
642 _target = compute_target(lhs_shape, rhs_shape, bias_shape, lhs_info, rhs_info, data_type, alpha, beta, m_h, fp16_mixed_precision, act_info, gpu_arch);
Michalis Spyrou6bff1952019-10-02 17:22:11 +0100643 _reference = compute_reference(lhs_shape, rhs_shape, data_type, alpha, beta, m_h, act_info);
Gian Marco Iodiced1f54762019-07-19 09:54:47 +0100644 }
645
646protected:
647 template <typename U>
648 void fill(U &&tensor, int i)
649 {
Giorgio Arena4bdd1772020-12-17 16:47:07 +0000650 static_assert(std::is_floating_point<T>::value || std::is_same<T, half>::value, "Only floating point data types supported.");
Giorgio Arena33b103b2021-01-08 10:37:15 +0000651 using DistributionType = typename std::conditional<std::is_same<T, half>::value, arm_compute::utils::uniform_real_distribution_16bit<T>, std::uniform_real_distribution<T>>::type;
Giorgio Arena4bdd1772020-12-17 16:47:07 +0000652
653 DistributionType distribution{ T(-1.0f), T(1.0f) };
Gian Marco Iodiced1f54762019-07-19 09:54:47 +0100654 library->fill(tensor, distribution, i);
655 }
656
657 TensorType compute_target(const TensorShape &lhs_shape, const TensorShape &rhs_shape, const TensorShape &bias_shape, const GEMMLHSMatrixInfo &lhs_info, const GEMMRHSMatrixInfo &rhs_info,
658 DataType data_type, float alpha, float beta, unsigned int m_h, bool fp16_mixed_precision, const ActivationLayerInfo &act_info, GPUTarget gpu_arch)
659 {
660 // Create tensors
661 TensorType lhs = create_tensor<TensorType>(lhs_shape, data_type, 1);
662 TensorType rhs = create_tensor<TensorType>(rhs_shape, data_type, 1);
663 TensorType bias = create_tensor<TensorType>(bias_shape, data_type, 1);
664 TensorType lhs_reshaped;
665 TensorType rhs_reshaped;
666 TensorType dst;
667
668 const unsigned int m = lhs_shape[1];
669 const unsigned int n = rhs_shape[0];
670 const unsigned int k = lhs_shape[0];
671 GEMMReshapeInfo reshape_info(m, n, k, rhs_info.h0, lhs_info.v0, m_h, false, true);
672
673 // The output tensor will be auto-initialized within the function
674
675 // Create and configure function
Georgios Pinitas856f66e2021-04-22 21:13:21 +0100676 ReshapeLHSOperatorType reshape_lhs;
677 ReshapeRHSOperatorType reshape_rhs;
678 GEMMOperatorType gemm;
679 reshape_lhs.configure(lhs.info(), lhs_reshaped.info(), lhs_info);
680 reshape_rhs.configure(rhs.info(), rhs_reshaped.info(), rhs_info);
681 gemm.configure(gpu_arch, lhs_reshaped.info(), rhs_reshaped.info(), bias.info(), dst.info(), alpha, beta, true, reshape_info, fp16_mixed_precision, act_info);
Gian Marco Iodiced1f54762019-07-19 09:54:47 +0100682
Michele Di Giorgio4fc10b32021-04-30 18:30:41 +0100683 ARM_COMPUTE_ASSERT(lhs.info()->is_resizable());
684 ARM_COMPUTE_ASSERT(rhs.info()->is_resizable());
685 ARM_COMPUTE_ASSERT(bias.info()->is_resizable());
Gian Marco Iodiced1f54762019-07-19 09:54:47 +0100686
Georgios Pinitas3dca91b2021-04-13 13:35:58 +0100687 // We do not pad when using image as it needs to comply to strict pitch alignment restrictions
Giorgio Arena63825e82021-03-25 14:54:50 +0000688 if(!rhs_info.export_to_cl_image)
689 {
690 add_padding_x({ &lhs, &rhs, &lhs_reshaped, &rhs_reshaped, &bias, &dst });
691 }
692
Gian Marco Iodiced1f54762019-07-19 09:54:47 +0100693 // Allocate tensors
694 lhs.allocator()->allocate();
695 rhs.allocator()->allocate();
696 lhs_reshaped.allocator()->allocate();
697 rhs_reshaped.allocator()->allocate();
698 bias.allocator()->allocate();
699 dst.allocator()->allocate();
700
Michele Di Giorgio4fc10b32021-04-30 18:30:41 +0100701 ARM_COMPUTE_ASSERT(!lhs.info()->is_resizable());
702 ARM_COMPUTE_ASSERT(!rhs.info()->is_resizable());
703 ARM_COMPUTE_ASSERT(!lhs_reshaped.info()->is_resizable());
704 ARM_COMPUTE_ASSERT(!rhs_reshaped.info()->is_resizable());
705 ARM_COMPUTE_ASSERT(!bias.info()->is_resizable());
706 ARM_COMPUTE_ASSERT(!dst.info()->is_resizable());
Gian Marco Iodiced1f54762019-07-19 09:54:47 +0100707
708 // Fill tensors
709 fill(AccessorType(lhs), 0);
710 fill(AccessorType(rhs), 1);
711 fill(AccessorType(bias), 2);
712
713 // Compute GEMM
Georgios Pinitas856f66e2021-04-22 21:13:21 +0100714 ITensorPack reshape_lhs_pack = { { ACL_SRC, &lhs }, { ACL_DST, &lhs_reshaped } };
715 reshape_lhs.run(reshape_lhs_pack);
716 ITensorPack reshape_rhs_pack = { { ACL_SRC, &rhs }, { ACL_DST, &rhs_reshaped } };
717 reshape_rhs.run(reshape_rhs_pack);
718 ITensorPack gemm_pack({ { ACL_SRC_0, &lhs_reshaped },
719 { ACL_SRC_1, &rhs_reshaped },
720 { ACL_SRC_2, &bias },
721 { ACL_DST, &dst }
722 });
723 gemm.run(gemm_pack);
Gian Marco Iodiced1f54762019-07-19 09:54:47 +0100724
725 return dst;
726 }
727
Michalis Spyrou6bff1952019-10-02 17:22:11 +0100728 SimpleTensor<T> compute_reference(const TensorShape &lhs_shape, const TensorShape &rhs_shape, DataType data_type, float alpha, float beta, unsigned int m_h,
Gian Marco Iodiced1f54762019-07-19 09:54:47 +0100729 const ActivationLayerInfo &act_info)
730 {
731 TensorShape dst_shape = lhs_shape;
732 dst_shape.set(0, rhs_shape[0]);
733 dst_shape.set(1, lhs_shape[1] / m_h);
734 dst_shape.set(2, m_h);
735 dst_shape.set(3, lhs_shape[2]);
736
737 // Create reference
738 SimpleTensor<T> lhs{ lhs_shape, data_type, 1 };
739 SimpleTensor<T> rhs{ rhs_shape, data_type, 1 };
740 SimpleTensor<T> bias{ dst_shape, data_type, 1 };
741
742 const int n = rhs_shape[0];
743 const int m = lhs_shape[1];
744 const int batch_size = lhs_shape[2];
745
746 // Fill reference
747 fill(lhs, 0);
748 fill(rhs, 1);
749 fill(bias, 2);
750
Gunes Bayir4bfc70e2021-12-10 16:17:56 +0000751 // In case of broadcast, we need to simply copy the first into the following "M" ones
Gian Marco Iodiced1f54762019-07-19 09:54:47 +0100752 for(int i = 1; i < m * batch_size; i++)
753 {
754 memcpy(bias.data() + i * n, bias.data(), n * sizeof(T));
755 }
756
757 return reference::activation_layer(reference::gemm<T>(lhs, rhs, bias, alpha, beta), act_info);
758 }
759
760 TensorType _target{};
761 SimpleTensor<T> _reference{};
762};
763
Georgios Pinitas856f66e2021-04-22 21:13:21 +0100764template <typename TensorType, typename AccessorType, typename T, typename ReshapeLHSOperatorType, typename ReshapeRHSOperatorType, typename GEMMOperatorType, bool fp_mixed_precision = false>
Gian Marco Iodicebf9731e2018-12-12 10:18:04 +0000765class GEMMMatrixMultiplyReshapedValidationFixture : public framework::Fixture
766{
767public:
768 template <typename...>
769 void setup(unsigned int m, unsigned int n, unsigned int k, unsigned int batch_size, unsigned int m0, unsigned int n0, unsigned int k0, unsigned int v0, unsigned int h0, bool interleave_lhs,
Gian Marco Iodicee3a849a2020-06-10 17:59:30 +0100770 bool interleave_rhs, bool export_to_cl_image, DataType data_type, float alpha, float beta, bool broadcast_bias, bool lhs_transpose, const ActivationLayerInfo &act_info)
Gian Marco Iodicebf9731e2018-12-12 10:18:04 +0000771 {
772 GEMMLHSMatrixInfo lhs_info;
773 lhs_info.m0 = m0;
774 lhs_info.k0 = k0;
775 lhs_info.v0 = v0;
776 lhs_info.interleave = interleave_lhs;
Giorgio Arenaae99b6e2019-08-01 14:22:12 +0100777 lhs_info.transpose = lhs_transpose;
Gian Marco Iodicebf9731e2018-12-12 10:18:04 +0000778
779 GEMMRHSMatrixInfo rhs_info;
Gian Marco Iodicee3a849a2020-06-10 17:59:30 +0100780 rhs_info.n0 = n0;
781 rhs_info.k0 = k0;
782 rhs_info.h0 = h0;
783 rhs_info.interleave = interleave_rhs;
784 rhs_info.transpose = !lhs_transpose;
785 rhs_info.export_to_cl_image = export_to_cl_image;
Gian Marco Iodicebf9731e2018-12-12 10:18:04 +0000786
787 // Set the tensor shapes for LHS and RHS matrices
788 const TensorShape lhs_shape(k, m, batch_size);
789 const TensorShape rhs_shape(n, k, batch_size);
Gian Marco Iodicee16c8902019-06-14 16:11:10 +0100790 const TensorShape bias_shape(n,
791 broadcast_bias ? 1 : m,
792 broadcast_bias ? 1 : batch_size);
Gian Marco Iodicebf9731e2018-12-12 10:18:04 +0000793
Sheri Zhangcc3e53c2020-11-16 21:17:28 +0000794 _target = compute_target(lhs_shape, rhs_shape, bias_shape, lhs_info, rhs_info, data_type, alpha, beta, broadcast_bias, act_info);
795 if(validate_result)
796 {
797 _reference = compute_reference(lhs_shape, rhs_shape, data_type, alpha, beta, broadcast_bias, act_info);
798 }
Gian Marco Iodicebf9731e2018-12-12 10:18:04 +0000799 }
800
801protected:
802 template <typename U>
803 void fill(U &&tensor, int i)
804 {
Giorgio Arena4bdd1772020-12-17 16:47:07 +0000805 static_assert(std::is_floating_point<T>::value || std::is_same<T, half>::value, "Only floating point data types supported.");
Giorgio Arena33b103b2021-01-08 10:37:15 +0000806 using DistributionType = typename std::conditional<std::is_same<T, half>::value, arm_compute::utils::uniform_real_distribution_16bit<T>, std::uniform_real_distribution<T>>::type;
Giorgio Arena4bdd1772020-12-17 16:47:07 +0000807
808 DistributionType distribution{ T(-1.0f), T(1.0f) };
Gian Marco Iodicebf9731e2018-12-12 10:18:04 +0000809 library->fill(tensor, distribution, i);
Gian Marco Iodiceb87b95e2019-01-21 17:14:31 +0000810
811 // Fill border with infinity in order to check the presence of NaN values (i.e. inf * 0)
Giorgio Arena4bdd1772020-12-17 16:47:07 +0000812 DistributionType distribution_inf{ T(std::numeric_limits<float>::infinity()), T(std::numeric_limits<float>::infinity()) };
Gian Marco Iodiceb87b95e2019-01-21 17:14:31 +0000813 library->fill_borders_with_garbage(tensor, distribution_inf, i);
Gian Marco Iodicebf9731e2018-12-12 10:18:04 +0000814 }
815
Gian Marco Iodicee16c8902019-06-14 16:11:10 +0100816 TensorType compute_target(const TensorShape &lhs_shape, const TensorShape &rhs_shape, const TensorShape &bias_shape, const GEMMLHSMatrixInfo &lhs_info, const GEMMRHSMatrixInfo &rhs_info,
Gian Marco Iodiceca1f4602019-07-16 15:46:48 +0100817 DataType data_type, float alpha, float beta, bool broadcast_bias, const ActivationLayerInfo &act_info)
Gian Marco Iodicebf9731e2018-12-12 10:18:04 +0000818 {
819 // Create tensors
Gian Marco Iodicee16c8902019-06-14 16:11:10 +0100820 TensorType lhs = create_tensor<TensorType>(lhs_shape, data_type, 1);
821 TensorType rhs = create_tensor<TensorType>(rhs_shape, data_type, 1);
822 TensorType bias = create_tensor<TensorType>(bias_shape, data_type, 1);
Gian Marco Iodicebf9731e2018-12-12 10:18:04 +0000823 TensorType lhs_reshaped;
824 TensorType rhs_reshaped;
825 TensorType dst;
826
827 const unsigned int M = lhs_shape[1];
828 const unsigned int N = rhs_shape[0];
829 const unsigned int K = lhs_shape[0];
Gian Marco Iodice7026b302019-06-26 17:18:11 +0100830 GEMMKernelInfo kernel_info;
831 kernel_info.m = M;
832 kernel_info.n = N;
833 kernel_info.k = K;
834 kernel_info.depth_output_gemm3d = 0;
835 kernel_info.reinterpret_input_as_3d = false;
836 kernel_info.broadcast_bias = broadcast_bias;
Gian Marco Iodiceca1f4602019-07-16 15:46:48 +0100837 kernel_info.activation_info = act_info;
Gian Marco Iodice0c17aa22019-09-27 09:23:15 +0100838 kernel_info.fp_mixed_precision = fp_mixed_precision;
Gian Marco Iodicebf9731e2018-12-12 10:18:04 +0000839
840 // The output tensor will be auto-initialized within the function
841
842 // Create and configure function
Georgios Pinitas856f66e2021-04-22 21:13:21 +0100843 ReshapeLHSOperatorType reshape_lhs;
844 ReshapeRHSOperatorType reshape_rhs;
845 GEMMOperatorType gemm;
Sheri Zhangcc3e53c2020-11-16 21:17:28 +0000846
847 validate_result = bool(reshape_rhs.validate(rhs.info(), rhs_reshaped.info(), rhs_info));
848 validate_result = validate_result || !rhs_info.export_to_cl_image;
849 if(!validate_result)
850 {
851 return nullptr;
852 }
853
Georgios Pinitas856f66e2021-04-22 21:13:21 +0100854 reshape_lhs.configure(lhs.info(), lhs_reshaped.info(), lhs_info);
855 reshape_rhs.configure(rhs.info(), rhs_reshaped.info(), rhs_info);
856 gemm.configure(lhs_reshaped.info(), rhs_reshaped.info(), bias.info(), dst.info(), alpha, beta, lhs_info, rhs_info, kernel_info);
Gian Marco Iodicebf9731e2018-12-12 10:18:04 +0000857
Michele Di Giorgio4fc10b32021-04-30 18:30:41 +0100858 ARM_COMPUTE_ASSERT(lhs.info()->is_resizable());
859 ARM_COMPUTE_ASSERT(rhs.info()->is_resizable());
860 ARM_COMPUTE_ASSERT(bias.info()->is_resizable());
Gian Marco Iodicebf9731e2018-12-12 10:18:04 +0000861
Georgios Pinitas3dca91b2021-04-13 13:35:58 +0100862 // We do not pad when using image as it needs to comply to strict pitch alignment restrictions
Giorgio Arena63825e82021-03-25 14:54:50 +0000863 if(!rhs_info.export_to_cl_image)
864 {
865 add_padding_x({ &lhs, &rhs, &lhs_reshaped, &rhs_reshaped, &bias, &dst });
866 }
867
Gian Marco Iodicebf9731e2018-12-12 10:18:04 +0000868 // Allocate tensors
869 lhs.allocator()->allocate();
870 rhs.allocator()->allocate();
871 lhs_reshaped.allocator()->allocate();
872 rhs_reshaped.allocator()->allocate();
Gian Marco Iodicee16c8902019-06-14 16:11:10 +0100873 bias.allocator()->allocate();
Gian Marco Iodicebf9731e2018-12-12 10:18:04 +0000874 dst.allocator()->allocate();
875
Michele Di Giorgio4fc10b32021-04-30 18:30:41 +0100876 ARM_COMPUTE_ASSERT(!lhs.info()->is_resizable());
877 ARM_COMPUTE_ASSERT(!rhs.info()->is_resizable());
878 ARM_COMPUTE_ASSERT(!bias.info()->is_resizable());
879 ARM_COMPUTE_ASSERT(!lhs_reshaped.info()->is_resizable());
880 ARM_COMPUTE_ASSERT(!rhs_reshaped.info()->is_resizable());
881 ARM_COMPUTE_ASSERT(!dst.info()->is_resizable());
Gian Marco Iodicebf9731e2018-12-12 10:18:04 +0000882
883 // Fill tensors
884 fill(AccessorType(lhs), 0);
885 fill(AccessorType(rhs), 1);
Gian Marco Iodicee16c8902019-06-14 16:11:10 +0100886 fill(AccessorType(bias), 2);
Gian Marco Iodicebf9731e2018-12-12 10:18:04 +0000887
888 // Compute GEMM
Georgios Pinitas856f66e2021-04-22 21:13:21 +0100889 ITensorPack reshape_lhs_pack = { { ACL_SRC, &lhs }, { ACL_DST, &lhs_reshaped } };
890 reshape_lhs.run(reshape_lhs_pack);
891 ITensorPack reshape_rhs_pack = { { ACL_SRC, &rhs }, { ACL_DST, &rhs_reshaped } };
892 reshape_rhs.run(reshape_rhs_pack);
893 ITensorPack gemm_pack({ { ACL_SRC_0, &lhs_reshaped },
894 { ACL_SRC_1, &rhs_reshaped },
895 { ACL_SRC_2, &bias },
896 { ACL_DST, &dst }
897 });
898 gemm.run(gemm_pack);
Gian Marco Iodicebf9731e2018-12-12 10:18:04 +0000899
900 return dst;
901 }
902
Michalis Spyrou6bff1952019-10-02 17:22:11 +0100903 SimpleTensor<T> compute_reference(const TensorShape &lhs_shape, const TensorShape &rhs_shape, DataType data_type, float alpha, float beta, bool broadcast_bias,
Gian Marco Iodiceca1f4602019-07-16 15:46:48 +0100904 const ActivationLayerInfo &act_info)
Gian Marco Iodicebf9731e2018-12-12 10:18:04 +0000905 {
906 TensorShape dst_shape = lhs_shape;
907 dst_shape[0] = rhs_shape[0];
908 dst_shape[1] = lhs_shape[1];
909
910 // Create reference
Gian Marco Iodice9382ab32018-12-17 15:12:07 +0000911 SimpleTensor<T> lhs{ lhs_shape, data_type, 1 };
912 SimpleTensor<T> rhs{ rhs_shape, data_type, 1 };
Gian Marco Iodicee16c8902019-06-14 16:11:10 +0100913 SimpleTensor<T> bias{ dst_shape, data_type, 1 };
914
915 const int n = rhs_shape[0];
916 const int m = lhs_shape[1];
917 const int batch_size = lhs_shape[2];
Gian Marco Iodicebf9731e2018-12-12 10:18:04 +0000918
919 // Fill reference
920 fill(lhs, 0);
921 fill(rhs, 1);
Gian Marco Iodicee16c8902019-06-14 16:11:10 +0100922 fill(bias, 2);
Gian Marco Iodicebf9731e2018-12-12 10:18:04 +0000923
Gian Marco Iodicee16c8902019-06-14 16:11:10 +0100924 if(broadcast_bias)
925 {
Gunes Bayir4bfc70e2021-12-10 16:17:56 +0000926 // In case of broadcast, we need to simply copy the first into the following "M" ones
Gian Marco Iodicee16c8902019-06-14 16:11:10 +0100927 for(int i = 1; i < m * batch_size; i++)
928 {
929 memcpy(bias.data() + i * n, bias.data(), n * sizeof(T));
930 }
931 }
932
Gian Marco Iodice0c17aa22019-09-27 09:23:15 +0100933 if(fp_mixed_precision)
934 {
935 return reference::activation_layer(reference::gemm_mixed_precision<T>(lhs, rhs, bias, alpha, beta), act_info);
936 }
937 else
938 {
939 return reference::activation_layer(reference::gemm<T>(lhs, rhs, bias, alpha, beta), act_info);
940 }
Gian Marco Iodicebf9731e2018-12-12 10:18:04 +0000941 }
942
Sheri Zhangcc3e53c2020-11-16 21:17:28 +0000943 bool validate_result = true;
Gian Marco Iodice9382ab32018-12-17 15:12:07 +0000944 TensorType _target{};
945 SimpleTensor<T> _reference{};
946};
947
SiCongLi1af54162021-10-06 15:25:57 +0100948/** (EXPERIMENTAL_POST_OPS)*/
949template <typename TensorType, typename AccessorType, typename T, typename ReshapeLHSOperatorType, typename ReshapeRHSOperatorType, typename GEMMOperatorType, bool fp_mixed_precision = false>
950class GEMMMatrixMultiplyReshapedWithPostOpsValidationFixture : public framework::Fixture
951{
952public:
953 using PostOpArgBroadcast = std::tuple<bool, bool, bool>; // Instruct fixture if we need broadcasting in dimension 0, 1, 2 of each PostOp argument
954public:
955 template <typename...>
956 void setup(unsigned int m, unsigned int n, unsigned int k, unsigned int batch_size, unsigned int m0, unsigned int n0, unsigned int k0, unsigned int v0, unsigned int h0, bool interleave_lhs,
957 bool interleave_rhs, bool export_to_cl_image, DataType data_type, float alpha, float beta, bool broadcast_bias, bool lhs_transpose, const ActivationLayerInfo &act_info,
958 const experimental::PostOpList<PostOpArgBroadcast> &post_ops)
959 {
960 GEMMLHSMatrixInfo lhs_info;
961 lhs_info.m0 = m0;
962 lhs_info.k0 = k0;
963 lhs_info.v0 = v0;
964 lhs_info.interleave = interleave_lhs;
965 lhs_info.transpose = lhs_transpose;
966
967 GEMMRHSMatrixInfo rhs_info;
968 rhs_info.n0 = n0;
969 rhs_info.k0 = k0;
970 rhs_info.h0 = h0;
971 rhs_info.interleave = interleave_rhs;
972 rhs_info.transpose = !lhs_transpose;
973 rhs_info.export_to_cl_image = export_to_cl_image;
974
975 // Set the tensor shapes for LHS and RHS matrices
976 const TensorShape lhs_shape(k, m, batch_size);
977 const TensorShape rhs_shape(n, k, batch_size);
978 const TensorShape bias_shape(n,
979 broadcast_bias ? 1 : m,
980 broadcast_bias ? 1 : batch_size);
981 auto post_ops_with_shapes = experimental::transform_post_op_list_arguments<PostOpArgBroadcast, TensorShape>(post_ops,
982 [ = ](auto broadcast)
983 {
984 return TensorShape
985 {
986 std::get<0>(broadcast) ? 1 : n,
987 std::get<1>(broadcast) ? 1 : m,
988 std::get<2>(broadcast) ? 1 : batch_size,
989 };
990 });
991
992 _target = compute_target(lhs_shape, rhs_shape, bias_shape, lhs_info, rhs_info, data_type, alpha, beta, broadcast_bias, act_info, post_ops_with_shapes);
993 if(validate_result)
994 {
995 _reference = compute_reference(lhs_shape, rhs_shape, data_type, alpha, beta, broadcast_bias, act_info, post_ops_with_shapes);
996 }
997 }
998
999protected:
1000 template <typename U>
1001 void fill(U &&tensor, int i)
1002 {
1003 static_assert(std::is_floating_point<T>::value || std::is_same<T, half>::value, "Only floating point data types supported.");
1004 using DistributionType = typename std::conditional<std::is_same<T, half>::value, arm_compute::utils::uniform_real_distribution_16bit<T>, std::uniform_real_distribution<T>>::type;
1005
1006 DistributionType distribution{ T(-1.0f), T(1.0f) };
1007 library->fill(tensor, distribution, i);
1008
1009 // Fill border with infinity in order to check the presence of NaN values (i.e. inf * 0)
1010 DistributionType distribution_inf{ T(std::numeric_limits<float>::infinity()), T(std::numeric_limits<float>::infinity()) };
1011 library->fill_borders_with_garbage(tensor, distribution_inf, i);
1012 }
1013
1014 TensorType compute_target(const TensorShape &lhs_shape, const TensorShape &rhs_shape, const TensorShape &bias_shape, const GEMMLHSMatrixInfo &lhs_info, const GEMMRHSMatrixInfo &rhs_info,
1015 DataType data_type, float alpha, float beta, bool broadcast_bias, const ActivationLayerInfo &act_info, const experimental::PostOpList<TensorShape> &post_ops)
1016 {
1017 // Create tensors
1018 TensorType lhs = create_tensor<TensorType>(lhs_shape, data_type, 1);
1019 TensorType rhs = create_tensor<TensorType>(rhs_shape, data_type, 1);
1020 TensorType bias = create_tensor<TensorType>(bias_shape, data_type, 1);
1021
1022 // Create post op tensors and populate post op with them
1023 std::vector<TensorType> post_op_tensors_holder{};
1024 auto populated_post_ops = experimental::transform_post_op_list_arguments<TensorShape, ITensorInfo *>(post_ops,
1025 [&post_op_tensors_holder, &data_type](auto shape)
1026 {
1027 auto t = create_tensor<TensorType>(shape, data_type, 1);
1028 post_op_tensors_holder.push_back(std::move(t));
1029 return post_op_tensors_holder.back().info();
1030 });
1031 TensorType lhs_reshaped;
1032 TensorType rhs_reshaped;
1033 TensorType dst;
1034
1035 const unsigned int M = lhs_shape[1];
1036 const unsigned int N = rhs_shape[0];
1037 const unsigned int K = lhs_shape[0];
1038 GEMMKernelInfo kernel_info;
1039 kernel_info.m = M;
1040 kernel_info.n = N;
1041 kernel_info.k = K;
1042 kernel_info.depth_output_gemm3d = 0;
1043 kernel_info.reinterpret_input_as_3d = false;
1044 kernel_info.broadcast_bias = broadcast_bias;
1045 kernel_info.activation_info = act_info;
1046 kernel_info.fp_mixed_precision = fp_mixed_precision;
1047 kernel_info.post_ops = populated_post_ops;
1048
1049 // The output tensor will be auto-initialized within the function
1050
1051 // Create and configure function
1052 ReshapeLHSOperatorType reshape_lhs;
1053 ReshapeRHSOperatorType reshape_rhs;
1054 GEMMOperatorType gemm;
1055
1056 validate_result = bool(reshape_rhs.validate(rhs.info(), rhs_reshaped.info(), rhs_info));
1057 validate_result = validate_result || !rhs_info.export_to_cl_image;
1058 if(!validate_result)
1059 {
1060 return nullptr;
1061 }
1062
1063 reshape_lhs.configure(lhs.info(), lhs_reshaped.info(), lhs_info);
1064 reshape_rhs.configure(rhs.info(), rhs_reshaped.info(), rhs_info);
1065 gemm.configure(lhs_reshaped.info(), rhs_reshaped.info(), bias.info(), dst.info(), alpha, beta, lhs_info, rhs_info, kernel_info);
1066
1067 ARM_COMPUTE_ASSERT(lhs.info()->is_resizable());
1068 ARM_COMPUTE_ASSERT(rhs.info()->is_resizable());
1069 ARM_COMPUTE_ASSERT(bias.info()->is_resizable());
1070 for(const auto &tensor : post_op_tensors_holder)
1071 {
1072 ARM_COMPUTE_ASSERT(tensor.info()->is_resizable());
1073 }
1074
1075 // We do not pad when using image as it needs to comply to strict pitch alignment restrictions
1076 if(!rhs_info.export_to_cl_image)
1077 {
1078 add_padding_x({ &lhs, &rhs, &lhs_reshaped, &rhs_reshaped, &bias, &dst });
1079 for(auto &tensor : post_op_tensors_holder)
1080 {
1081 add_padding_x({ &tensor });
1082 }
1083 }
1084
1085 // Allocate tensors
1086 lhs.allocator()->allocate();
1087 rhs.allocator()->allocate();
1088 lhs_reshaped.allocator()->allocate();
1089 rhs_reshaped.allocator()->allocate();
1090 bias.allocator()->allocate();
1091 dst.allocator()->allocate();
1092 for(auto &tensor : post_op_tensors_holder)
1093 {
1094 tensor.allocator()->allocate();
1095 }
1096
1097 ARM_COMPUTE_ASSERT(!lhs.info()->is_resizable());
1098 ARM_COMPUTE_ASSERT(!rhs.info()->is_resizable());
1099 ARM_COMPUTE_ASSERT(!bias.info()->is_resizable());
1100 ARM_COMPUTE_ASSERT(!lhs_reshaped.info()->is_resizable());
1101 ARM_COMPUTE_ASSERT(!rhs_reshaped.info()->is_resizable());
1102 ARM_COMPUTE_ASSERT(!dst.info()->is_resizable());
1103 for(const auto &tensor : post_op_tensors_holder)
1104 {
1105 ARM_COMPUTE_ASSERT(!tensor.info()->is_resizable());
1106 }
1107
1108 // Fill tensors
1109 fill(AccessorType(lhs), 0);
1110 fill(AccessorType(rhs), 1);
1111 fill(AccessorType(bias), 2);
1112 for(size_t i = 0; i < post_op_tensors_holder.size(); ++i)
1113 {
1114 fill(AccessorType(post_op_tensors_holder.at(i)), 3 + i);
1115 }
1116
1117 // Compute GEMM
1118 ITensorPack reshape_lhs_pack = { { ACL_SRC, &lhs }, { ACL_DST, &lhs_reshaped } };
1119 reshape_lhs.run(reshape_lhs_pack);
1120 ITensorPack reshape_rhs_pack = { { ACL_SRC, &rhs }, { ACL_DST, &rhs_reshaped } };
1121 reshape_rhs.run(reshape_rhs_pack);
1122 ITensorPack gemm_pack({ { ACL_SRC_0, &lhs_reshaped },
1123 { ACL_SRC_1, &rhs_reshaped },
1124 { ACL_SRC_2, &bias },
1125 { ACL_DST, &dst }
1126 });
1127 for(size_t i = 0; i < post_op_tensors_holder.size(); ++i)
1128 {
1129 gemm_pack.add_tensor(experimental::get_post_op_arg_type(i), &post_op_tensors_holder.at(i));
1130 }
1131 gemm.run(gemm_pack);
1132
1133 return dst;
1134 }
1135
1136 SimpleTensor<T> compute_reference(const TensorShape &lhs_shape, const TensorShape &rhs_shape, DataType data_type, float alpha, float beta, bool broadcast_bias,
1137 const ActivationLayerInfo &act_info, const experimental::PostOpList<TensorShape> &post_ops)
1138 {
1139 TensorShape dst_shape = lhs_shape;
1140 dst_shape[0] = rhs_shape[0];
1141 dst_shape[1] = lhs_shape[1];
1142
1143 // Create reference
1144 SimpleTensor<T> lhs{ lhs_shape, data_type, 1 };
1145 SimpleTensor<T> rhs{ rhs_shape, data_type, 1 };
1146 SimpleTensor<T> bias{ dst_shape, data_type, 1 };
1147 // Create post op tensors and populate post op with them
1148 auto populated_post_ops = experimental::transform_post_op_list_arguments<TensorShape, SimpleTensor<T>>(post_ops, [&data_type](auto shape)
1149 {
1150 return SimpleTensor<T> { shape, data_type, 1 };
1151 });
1152
1153 const int n = rhs_shape[0];
1154 const int m = lhs_shape[1];
1155 const int batch_size = lhs_shape[2];
1156
1157 // Fill reference
1158 int tensor_idx = 0;
1159 fill(lhs, tensor_idx++);
1160 fill(rhs, tensor_idx++);
1161 fill(bias, tensor_idx++);
1162 for(auto &op : populated_post_ops.get_list())
1163 {
1164 for(auto tensor : op->arguments())
1165 {
1166 fill(*tensor, tensor_idx++);
1167 }
1168 }
1169
1170 if(broadcast_bias)
1171 {
Gunes Bayir4bfc70e2021-12-10 16:17:56 +00001172 // In case of broadcast, we need to simply copy the first into the following "M" ones
SiCongLi1af54162021-10-06 15:25:57 +01001173 for(int i = 1; i < m * batch_size; i++)
1174 {
1175 memcpy(bias.data() + i * n, bias.data(), n * sizeof(T));
1176 }
1177 }
1178
1179 SimpleTensor<T> out;
1180 if(fp_mixed_precision)
1181 {
1182 out = reference::gemm_mixed_precision<T>(lhs, rhs, bias, alpha, beta);
1183 }
1184 else
1185 {
1186 out = reference::gemm<T>(lhs, rhs, bias, alpha, beta);
1187 }
1188 // Ignore activation info if post ops are used instead
1189 if(populated_post_ops.size() > 0)
1190 {
1191 out = reference::post_ops<T>(out, populated_post_ops);
1192 }
1193 else
1194 {
1195 out = reference::activation_layer(out, act_info);
1196 }
1197 return out;
1198 }
1199
1200 bool validate_result = true;
1201 TensorType _target{};
1202 SimpleTensor<T> _reference{};
1203};
1204
Georgios Pinitas856f66e2021-04-22 21:13:21 +01001205template <typename TensorType, typename AccessorType, typename T, typename ReshapeLHSOperatorType, typename ReshapeRHSOperatorType, typename GEMMOperatorType, bool fp_mixed_precision = false>
Gian Marco Iodice9382ab32018-12-17 15:12:07 +00001206class GEMMMatrixMultiplyReshaped3DValidationFixture : public framework::Fixture
1207{
1208public:
1209 template <typename...>
1210 void setup(unsigned int m_w, unsigned int m_h, unsigned int n, unsigned int k, unsigned int batch_size, unsigned int m0, unsigned int n0, unsigned int k0, unsigned int v0, unsigned int h0,
Gian Marco Iodicee3a849a2020-06-10 17:59:30 +01001211 bool interleave_lhs, bool interleave_rhs, bool export_to_cl_image, DataType data_type, float alpha, float beta, bool lhs_transpose, const ActivationLayerInfo &act_info)
Gian Marco Iodice9382ab32018-12-17 15:12:07 +00001212 {
1213 GEMMLHSMatrixInfo lhs_info;
1214 lhs_info.m0 = m0;
1215 lhs_info.k0 = k0;
1216 lhs_info.v0 = v0;
1217 lhs_info.interleave = interleave_lhs;
Giorgio Arenaae99b6e2019-08-01 14:22:12 +01001218 lhs_info.transpose = lhs_transpose;
Gian Marco Iodice9382ab32018-12-17 15:12:07 +00001219
1220 GEMMRHSMatrixInfo rhs_info;
Gian Marco Iodicee3a849a2020-06-10 17:59:30 +01001221 rhs_info.n0 = n0;
1222 rhs_info.k0 = k0;
1223 rhs_info.h0 = h0;
1224 rhs_info.interleave = interleave_rhs;
1225 rhs_info.transpose = !lhs_transpose;
1226 rhs_info.export_to_cl_image = export_to_cl_image;
Gian Marco Iodice9382ab32018-12-17 15:12:07 +00001227
1228 // In case of GEMM3D, m is the product between m_w and m_h
1229 const unsigned int m = m_w * m_h;
1230
1231 // Set the tensor shapes for LHS and RHS matrices
1232 const TensorShape lhs_shape(k, m, batch_size);
1233 const TensorShape rhs_shape(n, k, batch_size);
Gian Marco Iodicee16c8902019-06-14 16:11:10 +01001234 const TensorShape bias_shape(n, 1, 1);
Gian Marco Iodice9382ab32018-12-17 15:12:07 +00001235
Sheri Zhangcc3e53c2020-11-16 21:17:28 +00001236 _target = compute_target(lhs_shape, rhs_shape, bias_shape, lhs_info, rhs_info, data_type, alpha, beta, m_h, act_info);
1237 if(validate_result)
1238 {
1239 _reference = compute_reference(lhs_shape, rhs_shape, data_type, alpha, beta, m_h, act_info);
1240 }
Gian Marco Iodice9382ab32018-12-17 15:12:07 +00001241 }
1242
1243protected:
1244 template <typename U>
1245 void fill(U &&tensor, int i)
1246 {
Giorgio Arena4bdd1772020-12-17 16:47:07 +00001247 static_assert(std::is_floating_point<T>::value || std::is_same<T, half>::value, "Only floating point data types supported.");
Giorgio Arena33b103b2021-01-08 10:37:15 +00001248 using DistributionType = typename std::conditional<std::is_same<T, half>::value, arm_compute::utils::uniform_real_distribution_16bit<T>, std::uniform_real_distribution<T>>::type;
Giorgio Arena4bdd1772020-12-17 16:47:07 +00001249
1250 DistributionType distribution{ T(-1.0f), T(1.0f) };
Gian Marco Iodice9382ab32018-12-17 15:12:07 +00001251 library->fill(tensor, distribution, i);
1252 }
1253
Gian Marco Iodicee16c8902019-06-14 16:11:10 +01001254 TensorType compute_target(const TensorShape &lhs_shape, const TensorShape &rhs_shape, const TensorShape &bias_shape, const GEMMLHSMatrixInfo &lhs_info, const GEMMRHSMatrixInfo &rhs_info,
Gian Marco Iodiceca1f4602019-07-16 15:46:48 +01001255 DataType data_type, float alpha, float beta, unsigned int m_h, const ActivationLayerInfo &act_info)
Gian Marco Iodice9382ab32018-12-17 15:12:07 +00001256 {
1257 // Create tensors
Gian Marco Iodicee16c8902019-06-14 16:11:10 +01001258 TensorType lhs = create_tensor<TensorType>(lhs_shape, data_type, 1);
1259 TensorType rhs = create_tensor<TensorType>(rhs_shape, data_type, 1);
1260 TensorType bias = create_tensor<TensorType>(bias_shape, data_type, 1);
Gian Marco Iodice9382ab32018-12-17 15:12:07 +00001261 TensorType lhs_reshaped;
1262 TensorType rhs_reshaped;
1263 TensorType dst;
1264
1265 const unsigned int M = lhs_shape[1];
1266 const unsigned int N = rhs_shape[0];
1267 const unsigned int K = lhs_shape[0];
Gian Marco Iodice7026b302019-06-26 17:18:11 +01001268 GEMMKernelInfo kernel_info;
1269 kernel_info.m = M;
1270 kernel_info.n = N;
1271 kernel_info.k = K;
1272 kernel_info.depth_output_gemm3d = m_h;
1273 kernel_info.reinterpret_input_as_3d = false;
1274 kernel_info.broadcast_bias = true;
Gian Marco Iodiceca1f4602019-07-16 15:46:48 +01001275 kernel_info.activation_info = act_info;
Gian Marco Iodice0c17aa22019-09-27 09:23:15 +01001276 kernel_info.fp_mixed_precision = fp_mixed_precision;
Gian Marco Iodice9382ab32018-12-17 15:12:07 +00001277
1278 // The output tensor will be auto-initialized within the function
1279
1280 // Create and configure function
Georgios Pinitas856f66e2021-04-22 21:13:21 +01001281 ReshapeLHSOperatorType reshape_lhs;
1282 ReshapeRHSOperatorType reshape_rhs;
1283 GEMMOperatorType gemm;
Sheri Zhangcc3e53c2020-11-16 21:17:28 +00001284
1285 validate_result = bool(reshape_rhs.validate(rhs.info(), rhs_reshaped.info(), rhs_info));
1286 validate_result = validate_result || !rhs_info.export_to_cl_image;
1287 if(!validate_result)
1288 {
1289 return nullptr;
1290 }
1291
Georgios Pinitas856f66e2021-04-22 21:13:21 +01001292 reshape_lhs.configure(lhs.info(), lhs_reshaped.info(), lhs_info);
1293 reshape_rhs.configure(rhs.info(), rhs_reshaped.info(), rhs_info);
1294 gemm.configure(lhs_reshaped.info(), rhs_reshaped.info(), bias.info(), dst.info(), alpha, beta, lhs_info, rhs_info, kernel_info);
Gian Marco Iodice9382ab32018-12-17 15:12:07 +00001295
Michele Di Giorgio4fc10b32021-04-30 18:30:41 +01001296 ARM_COMPUTE_ASSERT(lhs.info()->is_resizable());
1297 ARM_COMPUTE_ASSERT(rhs.info()->is_resizable());
1298 ARM_COMPUTE_ASSERT(bias.info()->is_resizable());
Gian Marco Iodice9382ab32018-12-17 15:12:07 +00001299
Georgios Pinitas3dca91b2021-04-13 13:35:58 +01001300 // We do not pad when using image as it needs to comply to strict pitch alignment restrictions
Giorgio Arena63825e82021-03-25 14:54:50 +00001301 if(!rhs_info.export_to_cl_image)
1302 {
1303 add_padding_x({ &lhs, &rhs, &lhs_reshaped, &rhs_reshaped, &bias, &dst });
1304 }
1305
Gian Marco Iodice9382ab32018-12-17 15:12:07 +00001306 // Allocate tensors
1307 lhs.allocator()->allocate();
1308 rhs.allocator()->allocate();
1309 lhs_reshaped.allocator()->allocate();
1310 rhs_reshaped.allocator()->allocate();
Gian Marco Iodicee16c8902019-06-14 16:11:10 +01001311 bias.allocator()->allocate();
Gian Marco Iodice9382ab32018-12-17 15:12:07 +00001312 dst.allocator()->allocate();
1313
Michele Di Giorgio4fc10b32021-04-30 18:30:41 +01001314 ARM_COMPUTE_ASSERT(!lhs.info()->is_resizable());
1315 ARM_COMPUTE_ASSERT(!rhs.info()->is_resizable());
1316 ARM_COMPUTE_ASSERT(!lhs_reshaped.info()->is_resizable());
1317 ARM_COMPUTE_ASSERT(!rhs_reshaped.info()->is_resizable());
1318 ARM_COMPUTE_ASSERT(!bias.info()->is_resizable());
1319 ARM_COMPUTE_ASSERT(!dst.info()->is_resizable());
Gian Marco Iodice9382ab32018-12-17 15:12:07 +00001320
1321 // Fill tensors
1322 fill(AccessorType(lhs), 0);
1323 fill(AccessorType(rhs), 1);
Gian Marco Iodicee16c8902019-06-14 16:11:10 +01001324 fill(AccessorType(bias), 2);
Gian Marco Iodice9382ab32018-12-17 15:12:07 +00001325
1326 // Compute GEMM
Georgios Pinitas856f66e2021-04-22 21:13:21 +01001327 ITensorPack reshape_lhs_pack = { { ACL_SRC, &lhs }, { ACL_DST, &lhs_reshaped } };
1328 reshape_lhs.run(reshape_lhs_pack);
1329 ITensorPack reshape_rhs_pack = { { ACL_SRC, &rhs }, { ACL_DST, &rhs_reshaped } };
1330 reshape_rhs.run(reshape_rhs_pack);
1331 ITensorPack gemm_pack({ { ACL_SRC_0, &lhs_reshaped },
1332 { ACL_SRC_1, &rhs_reshaped },
1333 { ACL_SRC_2, &bias },
1334 { ACL_DST, &dst }
1335 });
1336 gemm.run(gemm_pack);
Gian Marco Iodice9382ab32018-12-17 15:12:07 +00001337
1338 return dst;
1339 }
1340
Michalis Spyrou6bff1952019-10-02 17:22:11 +01001341 SimpleTensor<T> compute_reference(const TensorShape &lhs_shape, const TensorShape &rhs_shape, DataType data_type, float alpha, float beta, unsigned int m_h,
Gian Marco Iodiceca1f4602019-07-16 15:46:48 +01001342 const ActivationLayerInfo &act_info)
Gian Marco Iodice9382ab32018-12-17 15:12:07 +00001343 {
1344 TensorShape dst_shape = lhs_shape;
1345 dst_shape.set(0, rhs_shape[0]);
1346 dst_shape.set(1, lhs_shape[1] / m_h);
1347 dst_shape.set(2, m_h);
1348 dst_shape.set(3, lhs_shape[2]);
1349
1350 // Create reference
1351 SimpleTensor<T> lhs{ lhs_shape, data_type, 1 };
1352 SimpleTensor<T> rhs{ rhs_shape, data_type, 1 };
Gian Marco Iodicee16c8902019-06-14 16:11:10 +01001353 SimpleTensor<T> bias{ dst_shape, data_type, 1 };
1354
1355 const int n = rhs_shape[0];
1356 const int m = lhs_shape[1];
1357 const int batch_size = lhs_shape[2];
Gian Marco Iodice9382ab32018-12-17 15:12:07 +00001358
1359 // Fill reference
1360 fill(lhs, 0);
1361 fill(rhs, 1);
Gian Marco Iodicee16c8902019-06-14 16:11:10 +01001362 fill(bias, 2);
Gian Marco Iodice9382ab32018-12-17 15:12:07 +00001363
Gunes Bayir4bfc70e2021-12-10 16:17:56 +00001364 // In case of broadcast, we need to simply copy the first into the following "M" ones
Gian Marco Iodicee16c8902019-06-14 16:11:10 +01001365 for(int i = 1; i < m * batch_size; i++)
1366 {
1367 memcpy(bias.data() + i * n, bias.data(), n * sizeof(T));
1368 }
1369
Gian Marco Iodice0c17aa22019-09-27 09:23:15 +01001370 if(fp_mixed_precision)
1371 {
1372 return reference::activation_layer(reference::gemm_mixed_precision<T>(lhs, rhs, bias, alpha, beta), act_info);
1373 }
1374 else
1375 {
1376 return reference::activation_layer(reference::gemm<T>(lhs, rhs, bias, alpha, beta), act_info);
1377 }
Gian Marco Iodice9382ab32018-12-17 15:12:07 +00001378 }
1379
Sheri Zhangcc3e53c2020-11-16 21:17:28 +00001380 bool validate_result = true;
Gian Marco Iodice9382ab32018-12-17 15:12:07 +00001381 TensorType _target{};
1382 SimpleTensor<T> _reference{};
Gian Marco Iodicebf9731e2018-12-12 10:18:04 +00001383};
Gian Marco Iodiceadc53952019-02-15 11:10:31 +00001384
Georgios Pinitas856f66e2021-04-22 21:13:21 +01001385template <typename TensorType, typename AccessorType, typename T, typename ReshapeRHSOperatorType, typename GEMMOperatorType>
Gian Marco Iodiceadc53952019-02-15 11:10:31 +00001386class GEMMMatrixMultiplyReshapedOnlyRHSValidationFixture : public framework::Fixture
1387{
1388public:
1389 template <typename...>
1390 void setup(unsigned int m, unsigned int n, unsigned int k, unsigned int batch_size, unsigned int m0, unsigned int n0, unsigned int k0, unsigned int h0,
Gian Marco Iodice781cba72020-06-19 16:56:57 +01001391 bool interleave_rhs, bool transpose_rhs, bool export_to_cl_image, DataType data_type, float alpha, float beta, bool broadcast_bias, const ActivationLayerInfo &act_info)
Gian Marco Iodiceadc53952019-02-15 11:10:31 +00001392 {
1393 GEMMLHSMatrixInfo lhs_info;
1394 lhs_info.m0 = m0;
1395 lhs_info.k0 = k0;
1396
1397 GEMMRHSMatrixInfo rhs_info;
Gian Marco Iodice781cba72020-06-19 16:56:57 +01001398 rhs_info.n0 = n0;
1399 rhs_info.k0 = k0;
1400 rhs_info.h0 = h0;
1401 rhs_info.interleave = interleave_rhs;
1402 rhs_info.transpose = transpose_rhs;
1403 rhs_info.export_to_cl_image = export_to_cl_image;
Gian Marco Iodiceadc53952019-02-15 11:10:31 +00001404
1405 // Set the tensor shapes for LHS and RHS matrices
1406 const TensorShape lhs_shape(k, m, batch_size);
1407 const TensorShape rhs_shape(n, k, batch_size);
Gian Marco Iodicee16c8902019-06-14 16:11:10 +01001408 const TensorShape bias_shape(n,
1409 broadcast_bias ? 1 : m,
1410 broadcast_bias ? 1 : batch_size);
Georgios Pinitasb0f342e2019-05-21 13:32:43 +01001411
Sheri Zhangcc3e53c2020-11-16 21:17:28 +00001412 _target = compute_target(lhs_shape, rhs_shape, bias_shape, lhs_info, rhs_info, data_type, alpha, beta, broadcast_bias, act_info);
1413 if(validate_result)
1414 {
1415 _reference = compute_reference(lhs_shape, rhs_shape, data_type, alpha, beta, broadcast_bias, act_info);
1416 }
Gian Marco Iodiceadc53952019-02-15 11:10:31 +00001417 }
1418
1419protected:
1420 template <typename U>
1421 void fill(U &&tensor, int i)
1422 {
Giorgio Arena4bdd1772020-12-17 16:47:07 +00001423 static_assert(std::is_floating_point<T>::value || std::is_same<T, half>::value, "Only floating point data types supported.");
Giorgio Arena33b103b2021-01-08 10:37:15 +00001424 using DistributionType = typename std::conditional<std::is_same<T, half>::value, arm_compute::utils::uniform_real_distribution_16bit<T>, std::uniform_real_distribution<T>>::type;
Giorgio Arena4bdd1772020-12-17 16:47:07 +00001425
1426 DistributionType distribution{ T(-1.0f), T(1.0f) };
Gian Marco Iodiceadc53952019-02-15 11:10:31 +00001427 library->fill(tensor, distribution, i);
1428
1429 // Fill border with infinity in order to check the presence of NaN values (i.e. inf * 0)
Giorgio Arena4bdd1772020-12-17 16:47:07 +00001430 DistributionType distribution_inf{ T(std::numeric_limits<float>::infinity()), T(std::numeric_limits<float>::infinity()) };
Gian Marco Iodiceadc53952019-02-15 11:10:31 +00001431 library->fill_borders_with_garbage(tensor, distribution_inf, i);
1432 }
1433
Georgios Pinitasb0f342e2019-05-21 13:32:43 +01001434 TensorType compute_target(const TensorShape &lhs_shape, const TensorShape &rhs_shape, const TensorShape &bias_shape, const GEMMLHSMatrixInfo &lhs_info, const GEMMRHSMatrixInfo &rhs_info,
Gian Marco Iodiceca1f4602019-07-16 15:46:48 +01001435 DataType data_type, float alpha, float beta, bool broadcast_bias, const ActivationLayerInfo &act_info)
Gian Marco Iodiceadc53952019-02-15 11:10:31 +00001436 {
1437 // Create tensors
Georgios Pinitasb0f342e2019-05-21 13:32:43 +01001438 TensorType lhs = create_tensor<TensorType>(lhs_shape, data_type, 1);
1439 TensorType rhs = create_tensor<TensorType>(rhs_shape, data_type, 1);
1440 TensorType bias = create_tensor<TensorType>(bias_shape, data_type, 1);
Gian Marco Iodiceadc53952019-02-15 11:10:31 +00001441 TensorType rhs_reshaped;
1442 TensorType dst;
1443
1444 const unsigned int M = lhs_shape[1];
1445 const unsigned int N = rhs_shape[0];
1446 const unsigned int K = lhs_shape[0];
Gian Marco Iodice7026b302019-06-26 17:18:11 +01001447 GEMMKernelInfo kernel_info;
1448 kernel_info.m = M;
1449 kernel_info.n = N;
1450 kernel_info.k = K;
1451 kernel_info.depth_output_gemm3d = 0;
1452 kernel_info.reinterpret_input_as_3d = false;
1453 kernel_info.broadcast_bias = broadcast_bias;
Gian Marco Iodiceca1f4602019-07-16 15:46:48 +01001454 kernel_info.activation_info = act_info;
Gian Marco Iodiceadc53952019-02-15 11:10:31 +00001455
1456 // The output tensor will be auto-initialized within the function
1457
1458 // Create and configure function
Georgios Pinitas856f66e2021-04-22 21:13:21 +01001459 ReshapeRHSOperatorType reshape_rhs;
1460 GEMMOperatorType gemm;
Sheri Zhangcc3e53c2020-11-16 21:17:28 +00001461
1462 validate_result = bool(reshape_rhs.validate(rhs.info(), rhs_reshaped.info(), rhs_info));
1463 validate_result = validate_result || !rhs_info.export_to_cl_image;
1464 if(!validate_result)
1465 {
1466 return nullptr;
1467 }
1468
Georgios Pinitas856f66e2021-04-22 21:13:21 +01001469 reshape_rhs.configure(rhs.info(), rhs_reshaped.info(), rhs_info);
1470 gemm.configure(lhs.info(), rhs_reshaped.info(), bias.info(), dst.info(), alpha, beta, lhs_info, rhs_info, kernel_info);
Gian Marco Iodiceadc53952019-02-15 11:10:31 +00001471
Michele Di Giorgio4fc10b32021-04-30 18:30:41 +01001472 ARM_COMPUTE_ASSERT(lhs.info()->is_resizable());
1473 ARM_COMPUTE_ASSERT(rhs.info()->is_resizable());
1474 ARM_COMPUTE_ASSERT(bias.info()->is_resizable());
Gian Marco Iodiceadc53952019-02-15 11:10:31 +00001475
Georgios Pinitas3dca91b2021-04-13 13:35:58 +01001476 // We do not pad when using image as it needs to comply to strict pitch alignment restrictions
Giorgio Arena63825e82021-03-25 14:54:50 +00001477 if(!rhs_info.export_to_cl_image)
1478 {
1479 add_padding_x({ &lhs, &rhs, &rhs_reshaped, &bias, &dst });
1480 }
1481
Gian Marco Iodiceadc53952019-02-15 11:10:31 +00001482 // Allocate tensors
1483 lhs.allocator()->allocate();
1484 rhs.allocator()->allocate();
1485 rhs_reshaped.allocator()->allocate();
Georgios Pinitasb0f342e2019-05-21 13:32:43 +01001486 bias.allocator()->allocate();
Gian Marco Iodiceadc53952019-02-15 11:10:31 +00001487 dst.allocator()->allocate();
1488
Michele Di Giorgio4fc10b32021-04-30 18:30:41 +01001489 ARM_COMPUTE_ASSERT(!lhs.info()->is_resizable());
1490 ARM_COMPUTE_ASSERT(!rhs.info()->is_resizable());
1491 ARM_COMPUTE_ASSERT(!rhs_reshaped.info()->is_resizable());
1492 ARM_COMPUTE_ASSERT(!bias.info()->is_resizable());
1493 ARM_COMPUTE_ASSERT(!dst.info()->is_resizable());
Gian Marco Iodiceadc53952019-02-15 11:10:31 +00001494
1495 // Fill tensors
1496 fill(AccessorType(lhs), 0);
1497 fill(AccessorType(rhs), 1);
Georgios Pinitasb0f342e2019-05-21 13:32:43 +01001498 fill(AccessorType(bias), 2);
Gian Marco Iodiceadc53952019-02-15 11:10:31 +00001499
1500 // Compute GEMM
Georgios Pinitas856f66e2021-04-22 21:13:21 +01001501 ITensorPack reshape_rhs_pack = { { ACL_SRC, &rhs }, { ACL_DST, &rhs_reshaped } };
1502 reshape_rhs.run(reshape_rhs_pack);
1503 ITensorPack gemm_pack({ { ACL_SRC_0, &lhs },
1504 { ACL_SRC_1, &rhs_reshaped },
1505 { ACL_SRC_2, &bias },
1506 { ACL_DST, &dst }
1507 });
1508 gemm.run(gemm_pack);
Gian Marco Iodiceadc53952019-02-15 11:10:31 +00001509
1510 return dst;
1511 }
1512
Michalis Spyrou6bff1952019-10-02 17:22:11 +01001513 SimpleTensor<T> compute_reference(const TensorShape &lhs_shape, const TensorShape &rhs_shape, DataType data_type, float alpha, float beta, bool broadcast_bias,
Gian Marco Iodiceca1f4602019-07-16 15:46:48 +01001514 const ActivationLayerInfo &act_info)
Gian Marco Iodiceadc53952019-02-15 11:10:31 +00001515 {
1516 TensorShape dst_shape = lhs_shape;
1517 dst_shape[0] = rhs_shape[0];
1518 dst_shape[1] = lhs_shape[1];
1519
1520 // Create reference
1521 SimpleTensor<T> lhs{ lhs_shape, data_type, 1 };
1522 SimpleTensor<T> rhs{ rhs_shape, data_type, 1 };
Georgios Pinitasb0f342e2019-05-21 13:32:43 +01001523 SimpleTensor<T> bias{ dst_shape, data_type, 1 };
1524
1525 const int n = rhs_shape[0];
1526 const int m = lhs_shape[1];
1527 const int batch_size = lhs_shape[2];
Gian Marco Iodiceadc53952019-02-15 11:10:31 +00001528
1529 // Fill reference
1530 fill(lhs, 0);
1531 fill(rhs, 1);
Gian Marco Iodicee16c8902019-06-14 16:11:10 +01001532 fill(bias, 2);
Gian Marco Iodiceadc53952019-02-15 11:10:31 +00001533
Georgios Pinitasb0f342e2019-05-21 13:32:43 +01001534 if(broadcast_bias)
1535 {
Gunes Bayir4bfc70e2021-12-10 16:17:56 +00001536 // In case of broadcast, we need to simply copy the first into the following "M" ones
Gian Marco Iodicee16c8902019-06-14 16:11:10 +01001537 for(int i = 1; i < m * batch_size; i++)
Georgios Pinitasb0f342e2019-05-21 13:32:43 +01001538 {
Gian Marco Iodicee16c8902019-06-14 16:11:10 +01001539 memcpy(bias.data() + i * n, bias.data(), n * sizeof(T));
Georgios Pinitasb0f342e2019-05-21 13:32:43 +01001540 }
1541 }
Georgios Pinitasb0f342e2019-05-21 13:32:43 +01001542
Gian Marco Iodiceca1f4602019-07-16 15:46:48 +01001543 return reference::activation_layer(reference::gemm<T>(lhs, rhs, bias, alpha, beta), act_info);
Gian Marco Iodiceadc53952019-02-15 11:10:31 +00001544 }
1545
Sheri Zhangcc3e53c2020-11-16 21:17:28 +00001546 bool validate_result = true;
Gian Marco Iodiceadc53952019-02-15 11:10:31 +00001547 TensorType _target{};
1548 SimpleTensor<T> _reference{};
1549};
1550
SiCongLiafa19722021-10-24 19:12:33 +01001551/** (EXPERIMENTAL_POST_OPS)*/
1552template <typename TensorType, typename AccessorType, typename T, typename ReshapeRHSOperatorType, typename GEMMOperatorType>
1553class GEMMMatrixMultiplyReshapedOnlyRHSWithPostOpsValidationFixture : public framework::Fixture
1554{
1555public:
1556 using PostOpArgBroadcast = std::tuple<bool, bool, bool>; // Instruct fixture if we need broadcasting in dimension 0, 1, 2 of each PostOp argument
1557 template <typename...>
1558 void setup(unsigned int m, unsigned int n, unsigned int k, unsigned int batch_size, unsigned int m0, unsigned int n0, unsigned int k0, unsigned int h0,
1559 bool interleave_rhs, bool transpose_rhs, bool export_to_cl_image, DataType data_type, float alpha, float beta, bool broadcast_bias, const ActivationLayerInfo &act_info,
1560 const experimental::PostOpList<PostOpArgBroadcast> &post_ops)
1561 {
1562 GEMMLHSMatrixInfo lhs_info;
1563 lhs_info.m0 = m0;
1564 lhs_info.k0 = k0;
1565
1566 GEMMRHSMatrixInfo rhs_info;
1567 rhs_info.n0 = n0;
1568 rhs_info.k0 = k0;
1569 rhs_info.h0 = h0;
1570 rhs_info.interleave = interleave_rhs;
1571 rhs_info.transpose = transpose_rhs;
1572 rhs_info.export_to_cl_image = export_to_cl_image;
1573
1574 // Set the tensor shapes for LHS and RHS matrices
1575 const TensorShape lhs_shape(k, m, batch_size);
1576 const TensorShape rhs_shape(n, k, batch_size);
1577 const TensorShape bias_shape(n,
1578 broadcast_bias ? 1 : m,
1579 broadcast_bias ? 1 : batch_size);
1580 auto post_ops_with_shapes = experimental::transform_post_op_list_arguments<PostOpArgBroadcast, TensorShape>(post_ops,
1581 [ = ](auto broadcast)
1582 {
1583 return TensorShape
1584 {
1585 std::get<0>(broadcast) ? 1 : n,
1586 std::get<1>(broadcast) ? 1 : m,
1587 std::get<2>(broadcast) ? 1 : batch_size,
1588 };
1589 });
1590
1591 _target = compute_target(lhs_shape, rhs_shape, bias_shape, lhs_info, rhs_info, data_type, alpha, beta, broadcast_bias, act_info, post_ops_with_shapes);
1592 if(validate_result)
1593 {
1594 _reference = compute_reference(lhs_shape, rhs_shape, data_type, alpha, beta, broadcast_bias, act_info, post_ops_with_shapes);
1595 }
1596 }
1597
1598protected:
1599 template <typename U>
1600 void fill(U &&tensor, int i)
1601 {
1602 static_assert(std::is_floating_point<T>::value || std::is_same<T, half>::value, "Only floating point data types supported.");
1603 using DistributionType = typename std::conditional<std::is_same<T, half>::value, arm_compute::utils::uniform_real_distribution_16bit<T>, std::uniform_real_distribution<T>>::type;
1604
1605 DistributionType distribution{ T(-1.0f), T(1.0f) };
1606 library->fill(tensor, distribution, i);
1607
1608 // Fill border with infinity in order to check the presence of NaN values (i.e. inf * 0)
1609 DistributionType distribution_inf{ T(std::numeric_limits<float>::infinity()), T(std::numeric_limits<float>::infinity()) };
1610 library->fill_borders_with_garbage(tensor, distribution_inf, i);
1611 }
1612
1613 TensorType compute_target(const TensorShape &lhs_shape, const TensorShape &rhs_shape, const TensorShape &bias_shape, const GEMMLHSMatrixInfo &lhs_info, const GEMMRHSMatrixInfo &rhs_info,
1614 DataType data_type, float alpha, float beta, bool broadcast_bias, const ActivationLayerInfo &act_info, const experimental::PostOpList<TensorShape> &post_ops)
1615 {
1616 // Create tensors
1617 TensorType lhs = create_tensor<TensorType>(lhs_shape, data_type, 1);
1618 TensorType rhs = create_tensor<TensorType>(rhs_shape, data_type, 1);
1619 TensorType bias = create_tensor<TensorType>(bias_shape, data_type, 1);
1620 TensorType rhs_reshaped;
1621 TensorType dst;
1622 // Create post op tensors and populate post op with them
1623 std::vector<TensorType> post_op_tensors_holder{};
1624 auto populated_post_ops = experimental::transform_post_op_list_arguments<TensorShape, ITensorInfo *>(post_ops,
1625 [&post_op_tensors_holder, &data_type](auto shape)
1626 {
1627 auto t = create_tensor<TensorType>(shape, data_type, 1);
1628 post_op_tensors_holder.push_back(std::move(t));
1629 return post_op_tensors_holder.back().info();
1630 });
1631
1632 const unsigned int M = lhs_shape[1];
1633 const unsigned int N = rhs_shape[0];
1634 const unsigned int K = lhs_shape[0];
1635 GEMMKernelInfo kernel_info;
1636 kernel_info.m = M;
1637 kernel_info.n = N;
1638 kernel_info.k = K;
1639 kernel_info.depth_output_gemm3d = 0;
1640 kernel_info.reinterpret_input_as_3d = false;
1641 kernel_info.broadcast_bias = broadcast_bias;
1642 kernel_info.activation_info = act_info;
1643 kernel_info.post_ops = populated_post_ops;
1644
1645 // The output tensor will be auto-initialized within the function
1646
1647 // Create and configure function
1648 ReshapeRHSOperatorType reshape_rhs;
1649 GEMMOperatorType gemm;
1650
1651 validate_result = bool(reshape_rhs.validate(rhs.info(), rhs_reshaped.info(), rhs_info));
1652 validate_result = validate_result || !rhs_info.export_to_cl_image;
1653 if(!validate_result)
1654 {
1655 return nullptr;
1656 }
1657
1658 reshape_rhs.configure(rhs.info(), rhs_reshaped.info(), rhs_info);
1659 gemm.configure(lhs.info(), rhs_reshaped.info(), bias.info(), dst.info(), alpha, beta, lhs_info, rhs_info, kernel_info);
1660
1661 ARM_COMPUTE_ASSERT(lhs.info()->is_resizable());
1662 ARM_COMPUTE_ASSERT(rhs.info()->is_resizable());
1663 ARM_COMPUTE_ASSERT(bias.info()->is_resizable());
1664 for(const auto &tensor : post_op_tensors_holder)
1665 {
1666 ARM_COMPUTE_ASSERT(tensor.info()->is_resizable());
1667 }
1668
1669 // We do not pad when using image as it needs to comply to strict pitch alignment restrictions
1670 if(!rhs_info.export_to_cl_image)
1671 {
1672 add_padding_x({ &lhs, &rhs, &rhs_reshaped, &bias, &dst });
1673 for(auto &tensor : post_op_tensors_holder)
1674 {
1675 add_padding_x({ &tensor });
1676 }
1677 }
1678
1679 // Allocate tensors
1680 lhs.allocator()->allocate();
1681 rhs.allocator()->allocate();
1682 rhs_reshaped.allocator()->allocate();
1683 bias.allocator()->allocate();
1684 dst.allocator()->allocate();
1685 for(auto &tensor : post_op_tensors_holder)
1686 {
1687 tensor.allocator()->allocate();
1688 }
1689
1690 ARM_COMPUTE_ASSERT(!lhs.info()->is_resizable());
1691 ARM_COMPUTE_ASSERT(!rhs.info()->is_resizable());
1692 ARM_COMPUTE_ASSERT(!rhs_reshaped.info()->is_resizable());
1693 ARM_COMPUTE_ASSERT(!bias.info()->is_resizable());
1694 ARM_COMPUTE_ASSERT(!dst.info()->is_resizable());
1695 for(const auto &tensor : post_op_tensors_holder)
1696 {
1697 ARM_COMPUTE_ASSERT(!tensor.info()->is_resizable());
1698 }
1699
1700 // Fill tensors
1701 fill(AccessorType(lhs), 0);
1702 fill(AccessorType(rhs), 1);
1703 fill(AccessorType(bias), 2);
1704 for(size_t i = 0; i < post_op_tensors_holder.size(); ++i)
1705 {
1706 fill(AccessorType(post_op_tensors_holder.at(i)), 3 + i);
1707 }
1708
1709 // Compute GEMM
1710 ITensorPack reshape_rhs_pack = { { ACL_SRC, &rhs }, { ACL_DST, &rhs_reshaped } };
1711 reshape_rhs.run(reshape_rhs_pack);
1712 ITensorPack gemm_pack({ { ACL_SRC_0, &lhs },
1713 { ACL_SRC_1, &rhs_reshaped },
1714 { ACL_SRC_2, &bias },
1715 { ACL_DST, &dst }
1716 });
1717 for(size_t i = 0; i < post_op_tensors_holder.size(); ++i)
1718 {
1719 gemm_pack.add_tensor(experimental::get_post_op_arg_type(i), &post_op_tensors_holder.at(i));
1720 }
1721 gemm.run(gemm_pack);
1722
1723 return dst;
1724 }
1725
1726 SimpleTensor<T> compute_reference(const TensorShape &lhs_shape, const TensorShape &rhs_shape, DataType data_type, float alpha, float beta, bool broadcast_bias,
1727 const ActivationLayerInfo &act_info, const experimental::PostOpList<TensorShape> &post_ops)
1728 {
1729 TensorShape dst_shape = lhs_shape;
1730 dst_shape[0] = rhs_shape[0];
1731 dst_shape[1] = lhs_shape[1];
1732
1733 // Create reference
1734 SimpleTensor<T> lhs{ lhs_shape, data_type, 1 };
1735 SimpleTensor<T> rhs{ rhs_shape, data_type, 1 };
1736 SimpleTensor<T> bias{ dst_shape, data_type, 1 };
1737 // Create post op tensors and populate post op with them
1738 auto populated_post_ops = experimental::transform_post_op_list_arguments<TensorShape, SimpleTensor<T>>(post_ops, [&data_type](auto shape)
1739 {
1740 return SimpleTensor<T> { shape, data_type, 1 };
1741 });
1742
1743 const int n = rhs_shape[0];
1744 const int m = lhs_shape[1];
1745 const int batch_size = lhs_shape[2];
1746
1747 // Fill reference
1748 int tensor_idx = 0;
1749 fill(lhs, tensor_idx++);
1750 fill(rhs, tensor_idx++);
1751 fill(bias, tensor_idx++);
1752 for(auto &op : populated_post_ops.get_list())
1753 {
1754 for(auto tensor : op->arguments())
1755 {
1756 fill(*tensor, tensor_idx++);
1757 }
1758 }
1759
1760 if(broadcast_bias)
1761 {
Gunes Bayir4bfc70e2021-12-10 16:17:56 +00001762 // In case of broadcast, we need to simply copy the first into the following "M" ones
SiCongLiafa19722021-10-24 19:12:33 +01001763 for(int i = 1; i < m * batch_size; i++)
1764 {
1765 memcpy(bias.data() + i * n, bias.data(), n * sizeof(T));
1766 }
1767 }
1768
1769 SimpleTensor<T> out;
1770 out = reference::gemm<T>(lhs, rhs, bias, alpha, beta);
1771 // Ignore activation info if post ops are used instead
1772 if(populated_post_ops.size() > 0)
1773 {
1774 out = reference::post_ops<T>(out, populated_post_ops);
1775 }
1776 else
1777 {
1778 out = reference::activation_layer(out, act_info);
1779 }
1780 return out;
1781 }
1782
1783 bool validate_result = true;
1784 TensorType _target{};
1785 SimpleTensor<T> _reference{};
1786};
1787
Georgios Pinitas856f66e2021-04-22 21:13:21 +01001788template <typename TensorType, typename AccessorType, typename T, typename ReshapeRHSOperatorType, typename GEMMOperatorType>
Gian Marco Iodicee16c8902019-06-14 16:11:10 +01001789class GEMMMatrixMultiplyReshapedOnlyRHS3DValidationFixture : public framework::Fixture
1790{
1791public:
1792 template <typename...>
1793 void setup(unsigned int m_w, unsigned int m_h, unsigned int n, unsigned int k, unsigned int batch_size, unsigned int m0, unsigned int n0, unsigned int k0, unsigned int h0,
Gian Marco Iodice9ae06d42020-10-22 16:37:12 +01001794 bool interleave_rhs, bool transpose_rhs, bool export_to_cl_image, bool has_pad_y, DataType data_type, float alpha, float beta, const ActivationLayerInfo &act_info)
Gian Marco Iodicee16c8902019-06-14 16:11:10 +01001795 {
1796 GEMMLHSMatrixInfo lhs_info;
1797 lhs_info.m0 = m0;
1798 lhs_info.k0 = k0;
1799
1800 GEMMRHSMatrixInfo rhs_info;
Gian Marco Iodice781cba72020-06-19 16:56:57 +01001801 rhs_info.n0 = n0;
1802 rhs_info.k0 = k0;
1803 rhs_info.h0 = h0;
1804 rhs_info.interleave = interleave_rhs;
1805 rhs_info.transpose = transpose_rhs;
1806 rhs_info.export_to_cl_image = export_to_cl_image;
Gian Marco Iodicee16c8902019-06-14 16:11:10 +01001807
1808 // In case of GEMM3D, m is the product between m_w and m_h
1809 const unsigned int m = m_w * m_h;
1810
1811 // Set the tensor shapes for LHS and RHS matrices
1812 const TensorShape lhs_shape(k, m, batch_size);
1813 const TensorShape rhs_shape(n, k, batch_size);
1814 const TensorShape bias_shape(n, 1, 1);
1815
Sheri Zhangcc3e53c2020-11-16 21:17:28 +00001816 _target = compute_target(lhs_shape, rhs_shape, bias_shape, lhs_info, rhs_info, data_type, alpha, beta, m_h, act_info, has_pad_y);
1817 if(validate_result)
1818 {
1819 _reference = compute_reference(lhs_shape, rhs_shape, data_type, alpha, beta, m_h, act_info);
1820 }
Gian Marco Iodicee16c8902019-06-14 16:11:10 +01001821 }
1822
1823protected:
1824 template <typename U>
1825 void fill(U &&tensor, int i)
1826 {
Giorgio Arena4bdd1772020-12-17 16:47:07 +00001827 static_assert(std::is_floating_point<T>::value || std::is_same<T, half>::value, "Only floating point data types supported.");
Giorgio Arena33b103b2021-01-08 10:37:15 +00001828 using DistributionType = typename std::conditional<std::is_same<T, half>::value, arm_compute::utils::uniform_real_distribution_16bit<T>, std::uniform_real_distribution<T>>::type;
Giorgio Arena4bdd1772020-12-17 16:47:07 +00001829
1830 DistributionType distribution{ T(-1.0f), T(1.0f) };
Gian Marco Iodicee16c8902019-06-14 16:11:10 +01001831 library->fill(tensor, distribution, i);
1832 }
1833
1834 TensorType compute_target(const TensorShape &lhs_shape, const TensorShape &rhs_shape, const TensorShape &bias_shape, const GEMMLHSMatrixInfo &lhs_info, const GEMMRHSMatrixInfo &rhs_info,
1835 DataType data_type, float alpha, float beta,
Gian Marco Iodice9ae06d42020-10-22 16:37:12 +01001836 unsigned int m_h, const ActivationLayerInfo &act_info, bool has_pad_y)
Gian Marco Iodicee16c8902019-06-14 16:11:10 +01001837 {
1838 // Create tensors
1839 TensorType lhs = create_tensor<TensorType>(lhs_shape, data_type, 1);
1840 TensorType rhs = create_tensor<TensorType>(rhs_shape, data_type, 1);
1841 TensorType bias = create_tensor<TensorType>(bias_shape, data_type, 1);
1842 TensorType rhs_reshaped;
1843 TensorType dst;
1844
1845 const unsigned int M = lhs_shape[1];
1846 const unsigned int N = rhs_shape[0];
1847 const unsigned int K = lhs_shape[0];
Gian Marco Iodice7026b302019-06-26 17:18:11 +01001848 GEMMKernelInfo kernel_info;
1849 kernel_info.m = M;
1850 kernel_info.n = N;
1851 kernel_info.k = K;
1852 kernel_info.depth_output_gemm3d = m_h;
1853 kernel_info.reinterpret_input_as_3d = false;
1854 kernel_info.broadcast_bias = true;
Gian Marco Iodiceca1f4602019-07-16 15:46:48 +01001855 kernel_info.activation_info = act_info;
Gian Marco Iodice9ae06d42020-10-22 16:37:12 +01001856 kernel_info.has_pad_y = has_pad_y;
Gian Marco Iodicee16c8902019-06-14 16:11:10 +01001857
1858 // The output tensor will be auto-initialized within the function
Gian Marco Iodicee16c8902019-06-14 16:11:10 +01001859 // Create and configure function
Georgios Pinitas856f66e2021-04-22 21:13:21 +01001860 ReshapeRHSOperatorType reshape_rhs;
1861 GEMMOperatorType gemm;
Sheri Zhangcc3e53c2020-11-16 21:17:28 +00001862
1863 validate_result = bool(reshape_rhs.validate(rhs.info(), rhs_reshaped.info(), rhs_info));
1864 validate_result = validate_result || !rhs_info.export_to_cl_image;
1865 if(!validate_result)
1866 {
1867 return nullptr;
1868 }
1869
Georgios Pinitas856f66e2021-04-22 21:13:21 +01001870 reshape_rhs.configure(rhs.info(), rhs_reshaped.info(), rhs_info);
1871 gemm.configure(lhs.info(), rhs_reshaped.info(), bias.info(), dst.info(), alpha, beta, lhs_info, rhs_info, kernel_info);
Gian Marco Iodicee16c8902019-06-14 16:11:10 +01001872
Gian Marco Iodice9ae06d42020-10-22 16:37:12 +01001873 if(has_pad_y)
1874 {
1875 // Add dummy padding into lhs to validate has_pad_y path
1876 lhs.info()->extend_padding(PaddingSize(2, 0, 2, 0));
1877 dst.info()->extend_padding(PaddingSize(2, 0, 1, 0));
1878 }
1879
Michele Di Giorgio4fc10b32021-04-30 18:30:41 +01001880 ARM_COMPUTE_ASSERT(lhs.info()->is_resizable());
1881 ARM_COMPUTE_ASSERT(rhs.info()->is_resizable());
1882 ARM_COMPUTE_ASSERT(bias.info()->is_resizable());
Gian Marco Iodicee16c8902019-06-14 16:11:10 +01001883
Georgios Pinitas3dca91b2021-04-13 13:35:58 +01001884 // We do not pad when using image as it needs to comply to strict pitch alignment restrictions
Giorgio Arena63825e82021-03-25 14:54:50 +00001885 if(!rhs_info.export_to_cl_image)
1886 {
1887 add_padding_x({ &lhs, &rhs, &rhs_reshaped, &bias, &dst });
1888 }
1889
Gian Marco Iodicee16c8902019-06-14 16:11:10 +01001890 // Allocate tensors
1891 lhs.allocator()->allocate();
1892 rhs.allocator()->allocate();
1893 rhs_reshaped.allocator()->allocate();
1894 bias.allocator()->allocate();
1895 dst.allocator()->allocate();
1896
Michele Di Giorgio4fc10b32021-04-30 18:30:41 +01001897 ARM_COMPUTE_ASSERT(!lhs.info()->is_resizable());
1898 ARM_COMPUTE_ASSERT(!rhs.info()->is_resizable());
1899 ARM_COMPUTE_ASSERT(!rhs_reshaped.info()->is_resizable());
1900 ARM_COMPUTE_ASSERT(!bias.info()->is_resizable());
1901 ARM_COMPUTE_ASSERT(!dst.info()->is_resizable());
Gian Marco Iodicee16c8902019-06-14 16:11:10 +01001902
1903 // Fill tensors
1904 fill(AccessorType(lhs), 0);
1905 fill(AccessorType(rhs), 1);
1906 fill(AccessorType(bias), 2);
1907
1908 // Compute GEMM
Georgios Pinitas856f66e2021-04-22 21:13:21 +01001909 ITensorPack reshape_rhs_pack = { { ACL_SRC, &rhs }, { ACL_DST, &rhs_reshaped } };
1910 reshape_rhs.run(reshape_rhs_pack);
1911 ITensorPack gemm_pack({ { ACL_SRC_0, &lhs },
1912 { ACL_SRC_1, &rhs_reshaped },
1913 { ACL_SRC_2, &bias },
1914 { ACL_DST, &dst }
1915 });
1916 gemm.run(gemm_pack);
Gian Marco Iodicee16c8902019-06-14 16:11:10 +01001917
1918 return dst;
1919 }
1920
Michalis Spyrou6bff1952019-10-02 17:22:11 +01001921 SimpleTensor<T> compute_reference(const TensorShape &lhs_shape, const TensorShape &rhs_shape, DataType data_type, float alpha, float beta, unsigned int m_h,
Gian Marco Iodiceca1f4602019-07-16 15:46:48 +01001922 const ActivationLayerInfo &act_info)
Gian Marco Iodicee16c8902019-06-14 16:11:10 +01001923 {
1924 TensorShape dst_shape = lhs_shape;
1925 dst_shape.set(0, rhs_shape[0]);
1926 dst_shape.set(1, lhs_shape[1] / m_h);
1927 dst_shape.set(2, m_h);
1928 dst_shape.set(3, lhs_shape[2]);
1929
1930 // Create reference
1931 SimpleTensor<T> lhs{ lhs_shape, data_type, 1 };
1932 SimpleTensor<T> rhs{ rhs_shape, data_type, 1 };
1933 SimpleTensor<T> bias{ dst_shape, data_type, 1 };
1934
1935 const int n = rhs_shape[0];
1936 const int m = lhs_shape[1];
1937 const int batch_size = lhs_shape[2];
1938
1939 // Fill reference
1940 fill(lhs, 0);
1941 fill(rhs, 1);
1942 fill(bias, 2);
1943
Gunes Bayir4bfc70e2021-12-10 16:17:56 +00001944 // In case of broadcast, we need to simply copy the first into the following "M" ones
Gian Marco Iodicee16c8902019-06-14 16:11:10 +01001945 for(int i = 1; i < m * batch_size; i++)
1946 {
1947 memcpy(bias.data() + i * n, bias.data(), n * sizeof(T));
1948 }
1949
Gian Marco Iodiceca1f4602019-07-16 15:46:48 +01001950 return reference::activation_layer(reference::gemm<T>(lhs, rhs, bias, alpha, beta), act_info);
Gian Marco Iodicee16c8902019-06-14 16:11:10 +01001951 }
1952
Sheri Zhangcc3e53c2020-11-16 21:17:28 +00001953 bool validate_result = true;
Gian Marco Iodicee16c8902019-06-14 16:11:10 +01001954 TensorType _target{};
1955 SimpleTensor<T> _reference{};
1956};
1957
Georgios Pinitas856f66e2021-04-22 21:13:21 +01001958template <typename TensorType, typename AccessorType, typename T, typename GEMMOperatorType>
giuros01b3204e72019-04-01 13:50:22 +01001959class GEMMMatrixMultiplyNativeValidationFixture : public framework::Fixture
1960{
1961public:
1962 template <typename...>
Gian Marco Iodiceca1f4602019-07-16 15:46:48 +01001963 void setup(unsigned int m, unsigned int n, unsigned int k, unsigned int batch_size, unsigned int m0, unsigned int n0, unsigned int k0, DataType data_type, float alpha, float beta, bool broadcast_bias,
1964 const ActivationLayerInfo &act_info)
giuros01b3204e72019-04-01 13:50:22 +01001965 {
1966 GEMMLHSMatrixInfo lhs_info;
1967 lhs_info.m0 = m0;
1968 lhs_info.k0 = k0;
1969
1970 GEMMRHSMatrixInfo rhs_info;
1971 rhs_info.n0 = n0;
1972 rhs_info.k0 = k0;
1973
1974 // Set the tensor shapes for LHS and RHS matrices
1975 const TensorShape lhs_shape(k, m, batch_size);
1976 const TensorShape rhs_shape(n, k, batch_size);
Gian Marco Iodice944170e2019-06-24 14:40:30 +01001977 const TensorShape bias_shape(n,
1978 broadcast_bias ? 1 : m,
1979 broadcast_bias ? 1 : batch_size);
giuros01b3204e72019-04-01 13:50:22 +01001980
Gian Marco Iodiceca1f4602019-07-16 15:46:48 +01001981 _target = compute_target(lhs_shape, rhs_shape, bias_shape, lhs_info, rhs_info, data_type, alpha, beta, broadcast_bias, act_info);
Michalis Spyrou6bff1952019-10-02 17:22:11 +01001982 _reference = compute_reference(lhs_shape, rhs_shape, data_type, alpha, beta, broadcast_bias, act_info);
giuros01b3204e72019-04-01 13:50:22 +01001983 }
1984
1985protected:
1986 template <typename U>
1987 void fill(U &&tensor, int i)
1988 {
Giorgio Arena4bdd1772020-12-17 16:47:07 +00001989 static_assert(std::is_floating_point<T>::value || std::is_same<T, half>::value, "Only floating point data types supported.");
Giorgio Arena33b103b2021-01-08 10:37:15 +00001990 using DistributionType = typename std::conditional<std::is_same<T, half>::value, arm_compute::utils::uniform_real_distribution_16bit<T>, std::uniform_real_distribution<T>>::type;
Giorgio Arena4bdd1772020-12-17 16:47:07 +00001991
1992 DistributionType distribution{ T(-1.0f), T(1.0f) };
giuros01b3204e72019-04-01 13:50:22 +01001993 library->fill(tensor, distribution, i);
1994
1995 // Fill border with infinity in order to check the presence of NaN values (i.e. inf * 0)
Giorgio Arena4bdd1772020-12-17 16:47:07 +00001996 DistributionType distribution_inf{ T(std::numeric_limits<float>::infinity()), T(std::numeric_limits<float>::infinity()) };
giuros01b3204e72019-04-01 13:50:22 +01001997 library->fill_borders_with_garbage(tensor, distribution_inf, i);
1998 }
1999
Gian Marco Iodice944170e2019-06-24 14:40:30 +01002000 TensorType compute_target(const TensorShape &lhs_shape, const TensorShape &rhs_shape, const TensorShape &bias_shape, const GEMMLHSMatrixInfo &lhs_info, const GEMMRHSMatrixInfo &rhs_info,
Gian Marco Iodiceca1f4602019-07-16 15:46:48 +01002001 DataType data_type, float alpha, float beta, bool broadcast_bias, const ActivationLayerInfo &act_info)
giuros01b3204e72019-04-01 13:50:22 +01002002 {
2003 // Create tensors
Gian Marco Iodice944170e2019-06-24 14:40:30 +01002004 TensorType lhs = create_tensor<TensorType>(lhs_shape, data_type, 1);
2005 TensorType rhs = create_tensor<TensorType>(rhs_shape, data_type, 1);
2006 TensorType bias = create_tensor<TensorType>(bias_shape, data_type, 1);
giuros01b3204e72019-04-01 13:50:22 +01002007 TensorType dst;
2008
2009 const unsigned int M = lhs_shape[1];
2010 const unsigned int N = rhs_shape[0];
2011 const unsigned int K = lhs_shape[0];
Gian Marco Iodice7026b302019-06-26 17:18:11 +01002012 GEMMKernelInfo kernel_info;
2013 kernel_info.m = M;
2014 kernel_info.n = N;
2015 kernel_info.k = K;
2016 kernel_info.depth_output_gemm3d = 0;
2017 kernel_info.reinterpret_input_as_3d = false;
2018 kernel_info.broadcast_bias = broadcast_bias;
Gian Marco Iodiceca1f4602019-07-16 15:46:48 +01002019 kernel_info.activation_info = act_info;
giuros01b3204e72019-04-01 13:50:22 +01002020
2021 // Create and configure function
Georgios Pinitas856f66e2021-04-22 21:13:21 +01002022 GEMMOperatorType gemm;
2023 gemm.configure(lhs.info(), rhs.info(), bias.info(), dst.info(), alpha, beta, lhs_info, rhs_info, kernel_info);
giuros01b3204e72019-04-01 13:50:22 +01002024
Michele Di Giorgio4fc10b32021-04-30 18:30:41 +01002025 ARM_COMPUTE_ASSERT(lhs.info()->is_resizable());
2026 ARM_COMPUTE_ASSERT(rhs.info()->is_resizable());
2027 ARM_COMPUTE_ASSERT(bias.info()->is_resizable());
giuros01b3204e72019-04-01 13:50:22 +01002028
Giorgio Arena63825e82021-03-25 14:54:50 +00002029 add_padding_x({ &lhs, &rhs, &bias, &dst });
2030
giuros01b3204e72019-04-01 13:50:22 +01002031 // Allocate tensors
2032 lhs.allocator()->allocate();
2033 rhs.allocator()->allocate();
Gian Marco Iodice944170e2019-06-24 14:40:30 +01002034 bias.allocator()->allocate();
giuros01b3204e72019-04-01 13:50:22 +01002035 dst.allocator()->allocate();
2036
Michele Di Giorgio4fc10b32021-04-30 18:30:41 +01002037 ARM_COMPUTE_ASSERT(!lhs.info()->is_resizable());
2038 ARM_COMPUTE_ASSERT(!rhs.info()->is_resizable());
2039 ARM_COMPUTE_ASSERT(!bias.info()->is_resizable());
2040 ARM_COMPUTE_ASSERT(!dst.info()->is_resizable());
giuros01b3204e72019-04-01 13:50:22 +01002041
2042 // Fill tensors
2043 fill(AccessorType(lhs), 0);
2044 fill(AccessorType(rhs), 1);
Gian Marco Iodice944170e2019-06-24 14:40:30 +01002045 fill(AccessorType(bias), 2);
giuros01b3204e72019-04-01 13:50:22 +01002046
2047 // Compute GEMM
Georgios Pinitas856f66e2021-04-22 21:13:21 +01002048 ITensorPack gemm_pack({ { ACL_SRC_0, &lhs },
2049 { ACL_SRC_1, &rhs },
2050 { ACL_SRC_2, &bias },
2051 { ACL_DST, &dst }
2052 });
2053 gemm.run(gemm_pack);
giuros01b3204e72019-04-01 13:50:22 +01002054
2055 return dst;
2056 }
2057
Michalis Spyrou6bff1952019-10-02 17:22:11 +01002058 SimpleTensor<T> compute_reference(const TensorShape &lhs_shape, const TensorShape &rhs_shape, DataType data_type, float alpha, float beta, bool broadcast_bias,
Gian Marco Iodiceca1f4602019-07-16 15:46:48 +01002059 const ActivationLayerInfo &act_info)
giuros01b3204e72019-04-01 13:50:22 +01002060 {
2061 TensorShape dst_shape = lhs_shape;
2062 dst_shape[0] = rhs_shape[0];
2063 dst_shape[1] = lhs_shape[1];
2064
2065 // Create reference
2066 SimpleTensor<T> lhs{ lhs_shape, data_type, 1 };
2067 SimpleTensor<T> rhs{ rhs_shape, data_type, 1 };
Gian Marco Iodice944170e2019-06-24 14:40:30 +01002068 SimpleTensor<T> bias{ dst_shape, data_type, 1 };
2069
2070 const int n = rhs_shape[0];
2071 const int m = lhs_shape[1];
2072 const int batch_size = lhs_shape[2];
giuros01b3204e72019-04-01 13:50:22 +01002073
2074 // Fill reference
2075 fill(lhs, 0);
2076 fill(rhs, 1);
Gian Marco Iodice944170e2019-06-24 14:40:30 +01002077 fill(bias, 2);
giuros01b3204e72019-04-01 13:50:22 +01002078
Gian Marco Iodice944170e2019-06-24 14:40:30 +01002079 if(broadcast_bias)
2080 {
Gunes Bayir4bfc70e2021-12-10 16:17:56 +00002081 // In case of broadcast, we need to simply copy the first into the following "M" ones
Gian Marco Iodice944170e2019-06-24 14:40:30 +01002082 for(int i = 1; i < m * batch_size; i++)
2083 {
2084 memcpy(bias.data() + i * n, bias.data(), n * sizeof(T));
2085 }
2086 }
2087
Gian Marco Iodiceca1f4602019-07-16 15:46:48 +01002088 return reference::activation_layer(reference::gemm<T>(lhs, rhs, bias, alpha, beta), act_info);
giuros01b3204e72019-04-01 13:50:22 +01002089 }
2090
2091 TensorType _target{};
2092 SimpleTensor<T> _reference{};
2093};
2094
Georgios Pinitas856f66e2021-04-22 21:13:21 +01002095template <typename TensorType, typename AccessorType, typename T, typename GEMMOperatorType>
SiCongLiafa19722021-10-24 19:12:33 +01002096class GEMMMatrixMultiplyNativeWithPostOpsValidationFixture : public framework::Fixture
2097{
2098public:
2099 using PostOpArgBroadcast = std::tuple<bool, bool, bool>; // Instruct fixture if we need broadcasting in dimension 0, 1, 2 of each PostOp argument
2100public:
2101 template <typename...>
2102 void setup(unsigned int m, unsigned int n, unsigned int k, unsigned int batch_size, unsigned int m0, unsigned int n0, unsigned int k0, DataType data_type, float alpha, float beta, bool broadcast_bias,
2103 const ActivationLayerInfo &act_info, const experimental::PostOpList<PostOpArgBroadcast> &post_ops)
2104 {
2105 GEMMLHSMatrixInfo lhs_info;
2106 lhs_info.m0 = m0;
2107 lhs_info.k0 = k0;
2108
2109 GEMMRHSMatrixInfo rhs_info;
2110 rhs_info.n0 = n0;
2111 rhs_info.k0 = k0;
2112
2113 // Set the tensor shapes for LHS and RHS matrices
2114 const TensorShape lhs_shape(k, m, batch_size);
2115 const TensorShape rhs_shape(n, k, batch_size);
2116 const TensorShape bias_shape(n,
2117 broadcast_bias ? 1 : m,
2118 broadcast_bias ? 1 : batch_size);
2119 const auto post_ops_with_shapes = experimental::transform_post_op_list_arguments<PostOpArgBroadcast, TensorShape>(post_ops,
2120 [ = ](auto broadcast)
2121 {
2122 return TensorShape
2123 {
2124 std::get<0>(broadcast) ? 1 : n,
2125 std::get<1>(broadcast) ? 1 : m,
2126 std::get<2>(broadcast) ? 1 : batch_size,
2127 };
2128 });
2129
2130 _target = compute_target(lhs_shape, rhs_shape, bias_shape, lhs_info, rhs_info, data_type, alpha, beta, broadcast_bias, act_info, post_ops_with_shapes);
2131 _reference = compute_reference(lhs_shape, rhs_shape, data_type, alpha, beta, broadcast_bias, act_info, post_ops_with_shapes);
2132 }
2133
2134protected:
2135 template <typename U>
2136 void fill(U &&tensor, int i)
2137 {
2138 static_assert(std::is_floating_point<T>::value || std::is_same<T, half>::value, "Only floating point data types supported.");
2139 using DistributionType = typename std::conditional<std::is_same<T, half>::value, arm_compute::utils::uniform_real_distribution_16bit<T>, std::uniform_real_distribution<T>>::type;
2140
2141 DistributionType distribution{ T(-1.0f), T(1.0f) };
2142 library->fill(tensor, distribution, i);
2143
2144 // Fill border with infinity in order to check the presence of NaN values (i.e. inf * 0)
2145 DistributionType distribution_inf{ T(std::numeric_limits<float>::infinity()), T(std::numeric_limits<float>::infinity()) };
2146 library->fill_borders_with_garbage(tensor, distribution_inf, i);
2147 }
2148
2149 TensorType compute_target(const TensorShape &lhs_shape, const TensorShape &rhs_shape, const TensorShape &bias_shape, const GEMMLHSMatrixInfo &lhs_info, const GEMMRHSMatrixInfo &rhs_info,
2150 DataType data_type, float alpha, float beta, bool broadcast_bias, const ActivationLayerInfo &act_info, const experimental::PostOpList<TensorShape> &post_ops)
2151 {
2152 // Create tensors
2153 TensorType lhs = create_tensor<TensorType>(lhs_shape, data_type, 1);
2154 TensorType rhs = create_tensor<TensorType>(rhs_shape, data_type, 1);
2155 TensorType bias = create_tensor<TensorType>(bias_shape, data_type, 1);
2156 TensorType dst;
2157 // Create post op tensors and populate post op with them
2158 std::vector<TensorType> post_op_tensors_holder{};
2159 auto populated_post_ops = experimental::transform_post_op_list_arguments<TensorShape, ITensorInfo *>(post_ops,
2160 [&post_op_tensors_holder, &data_type](auto shape)
2161 {
2162 auto t = create_tensor<TensorType>(shape, data_type, 1);
2163 post_op_tensors_holder.push_back(std::move(t));
2164 return post_op_tensors_holder.back().info();
2165 });
2166
2167 const unsigned int M = lhs_shape[1];
2168 const unsigned int N = rhs_shape[0];
2169 const unsigned int K = lhs_shape[0];
2170 GEMMKernelInfo kernel_info;
2171 kernel_info.m = M;
2172 kernel_info.n = N;
2173 kernel_info.k = K;
2174 kernel_info.depth_output_gemm3d = 0;
2175 kernel_info.reinterpret_input_as_3d = false;
2176 kernel_info.broadcast_bias = broadcast_bias;
2177 kernel_info.activation_info = act_info;
2178 kernel_info.post_ops = populated_post_ops;
2179
2180 // Create and configure function
2181 GEMMOperatorType gemm;
2182 gemm.configure(lhs.info(), rhs.info(), bias.info(), dst.info(), alpha, beta, lhs_info, rhs_info, kernel_info);
2183
2184 ARM_COMPUTE_ASSERT(lhs.info()->is_resizable());
2185 ARM_COMPUTE_ASSERT(rhs.info()->is_resizable());
2186 ARM_COMPUTE_ASSERT(bias.info()->is_resizable());
2187 for(const auto &tensor : post_op_tensors_holder)
2188 {
2189 ARM_COMPUTE_ASSERT(tensor.info()->is_resizable());
2190 }
2191
2192 add_padding_x({ &lhs, &rhs, &bias, &dst });
2193 for(auto &tensor : post_op_tensors_holder)
2194 {
2195 add_padding_x({ &tensor });
2196 }
2197
2198 // Allocate tensors
2199 lhs.allocator()->allocate();
2200 rhs.allocator()->allocate();
2201 bias.allocator()->allocate();
2202 dst.allocator()->allocate();
2203 for(auto &tensor : post_op_tensors_holder)
2204 {
2205 tensor.allocator()->allocate();
2206 }
2207
2208 ARM_COMPUTE_ASSERT(!lhs.info()->is_resizable());
2209 ARM_COMPUTE_ASSERT(!rhs.info()->is_resizable());
2210 ARM_COMPUTE_ASSERT(!bias.info()->is_resizable());
2211 ARM_COMPUTE_ASSERT(!dst.info()->is_resizable());
2212 for(const auto &tensor : post_op_tensors_holder)
2213 {
2214 ARM_COMPUTE_ASSERT(!tensor.info()->is_resizable());
2215 }
2216
2217 // Fill tensors
2218 fill(AccessorType(lhs), 0);
2219 fill(AccessorType(rhs), 1);
2220 fill(AccessorType(bias), 2);
2221 for(size_t i = 0; i < post_op_tensors_holder.size(); ++i)
2222 {
2223 fill(AccessorType(post_op_tensors_holder.at(i)), 3 + i);
2224 }
2225
2226 // Compute GEMM
2227 ITensorPack gemm_pack({ { ACL_SRC_0, &lhs },
2228 { ACL_SRC_1, &rhs },
2229 { ACL_SRC_2, &bias },
2230 { ACL_DST, &dst }
2231 });
2232 for(size_t i = 0; i < post_op_tensors_holder.size(); ++i)
2233 {
2234 gemm_pack.add_tensor(experimental::get_post_op_arg_type(i), &post_op_tensors_holder.at(i));
2235 }
2236 gemm.run(gemm_pack);
2237
2238 return dst;
2239 }
2240
2241 SimpleTensor<T> compute_reference(const TensorShape &lhs_shape, const TensorShape &rhs_shape, DataType data_type, float alpha, float beta, bool broadcast_bias,
2242 const ActivationLayerInfo &act_info, const experimental::PostOpList<TensorShape> &post_ops)
2243 {
2244 TensorShape dst_shape = lhs_shape;
2245 dst_shape[0] = rhs_shape[0];
2246 dst_shape[1] = lhs_shape[1];
2247
2248 // Create reference
2249 SimpleTensor<T> lhs{ lhs_shape, data_type, 1 };
2250 SimpleTensor<T> rhs{ rhs_shape, data_type, 1 };
2251 SimpleTensor<T> bias{ dst_shape, data_type, 1 };
2252 // Create post op tensors and populate post op with them
2253 auto populated_post_ops = experimental::transform_post_op_list_arguments<TensorShape, SimpleTensor<T>>(post_ops, [&data_type](auto shape)
2254 {
2255 return SimpleTensor<T> { shape, data_type, 1 };
2256 });
2257
2258 const int n = rhs_shape[0];
2259 const int m = lhs_shape[1];
2260 const int batch_size = lhs_shape[2];
2261
2262 // Fill reference
2263 int tensor_idx = 0;
2264 fill(lhs, tensor_idx++);
2265 fill(rhs, tensor_idx++);
2266 fill(bias, tensor_idx++);
2267 for(auto &op : populated_post_ops.get_list())
2268 {
2269 for(auto tensor : op->arguments())
2270 {
2271 fill(*tensor, tensor_idx++);
2272 }
2273 }
2274
2275 if(broadcast_bias)
2276 {
Gunes Bayir4bfc70e2021-12-10 16:17:56 +00002277 // In case of broadcast, we need to simply copy the first into the following "M" ones
SiCongLiafa19722021-10-24 19:12:33 +01002278 for(int i = 1; i < m * batch_size; i++)
2279 {
2280 memcpy(bias.data() + i * n, bias.data(), n * sizeof(T));
2281 }
2282 }
2283
2284 SimpleTensor<T> out;
2285 out = reference::gemm<T>(lhs, rhs, bias, alpha, beta);
2286 // Ignore activation info if post ops are used instead
2287 if(populated_post_ops.size() > 0)
2288 {
2289 out = reference::post_ops<T>(out, populated_post_ops);
2290 }
2291 else
2292 {
2293 out = reference::activation_layer(out, act_info);
2294 }
2295 return out;
2296 }
2297
2298 TensorType _target{};
2299 SimpleTensor<T> _reference{};
2300};
2301
2302template <typename TensorType, typename AccessorType, typename T, typename GEMMOperatorType>
giuros01b3204e72019-04-01 13:50:22 +01002303class GEMMMatrixMultiplyNative3DValidationFixture : public framework::Fixture
2304{
2305public:
2306 template <typename...>
Gian Marco Iodiceca1f4602019-07-16 15:46:48 +01002307 void setup(unsigned int m_w, unsigned int m_h, unsigned int n, unsigned int k, unsigned int batch_size, unsigned int m0, unsigned int n0, unsigned int k0, DataType data_type, float alpha, float beta,
2308 const ActivationLayerInfo &act_info)
giuros01b3204e72019-04-01 13:50:22 +01002309 {
2310 GEMMLHSMatrixInfo lhs_info;
2311 lhs_info.m0 = m0;
2312 lhs_info.k0 = k0;
2313
2314 GEMMRHSMatrixInfo rhs_info;
2315 rhs_info.n0 = n0;
2316 rhs_info.k0 = k0;
2317
2318 // In case of GEMM3D, m is the product between m_w and m_h
2319 const unsigned int m = m_w * m_h;
2320
2321 // Set the tensor shapes for LHS and RHS matrices
2322 const TensorShape lhs_shape(k, m, batch_size);
2323 const TensorShape rhs_shape(n, k, batch_size);
Gian Marco Iodice944170e2019-06-24 14:40:30 +01002324 const TensorShape bias_shape(n, 1, 1);
giuros01b3204e72019-04-01 13:50:22 +01002325
Gian Marco Iodiceca1f4602019-07-16 15:46:48 +01002326 _target = compute_target(lhs_shape, rhs_shape, bias_shape, lhs_info, rhs_info, data_type, alpha, beta, m_h, act_info);
Michalis Spyrou6bff1952019-10-02 17:22:11 +01002327 _reference = compute_reference(lhs_shape, rhs_shape, data_type, alpha, beta, m_h, act_info);
giuros01b3204e72019-04-01 13:50:22 +01002328 }
2329
2330protected:
2331 template <typename U>
2332 void fill(U &&tensor, int i)
2333 {
Giorgio Arena4bdd1772020-12-17 16:47:07 +00002334 static_assert(std::is_floating_point<T>::value || std::is_same<T, half>::value, "Only floating point data types supported.");
Giorgio Arena33b103b2021-01-08 10:37:15 +00002335 using DistributionType = typename std::conditional<std::is_same<T, half>::value, arm_compute::utils::uniform_real_distribution_16bit<T>, std::uniform_real_distribution<T>>::type;
Giorgio Arena4bdd1772020-12-17 16:47:07 +00002336
2337 DistributionType distribution{ T(-1.0f), T(1.0f) };
giuros01b3204e72019-04-01 13:50:22 +01002338 library->fill(tensor, distribution, i);
2339 }
2340
Gian Marco Iodice944170e2019-06-24 14:40:30 +01002341 TensorType compute_target(const TensorShape &lhs_shape, const TensorShape &rhs_shape, const TensorShape &bias_shape, const GEMMLHSMatrixInfo &lhs_info, const GEMMRHSMatrixInfo &rhs_info,
Gian Marco Iodiceca1f4602019-07-16 15:46:48 +01002342 DataType data_type, float alpha, float beta, unsigned int m_h, const ActivationLayerInfo &act_info)
giuros01b3204e72019-04-01 13:50:22 +01002343 {
2344 // Create tensors
Gian Marco Iodice944170e2019-06-24 14:40:30 +01002345 TensorType lhs = create_tensor<TensorType>(lhs_shape, data_type, 1);
2346 TensorType rhs = create_tensor<TensorType>(rhs_shape, data_type, 1);
2347 TensorType bias = create_tensor<TensorType>(bias_shape, data_type, 1);
giuros01b3204e72019-04-01 13:50:22 +01002348 TensorType dst;
2349
2350 const unsigned int M = lhs_shape[1];
2351 const unsigned int N = rhs_shape[0];
2352 const unsigned int K = lhs_shape[0];
Gian Marco Iodice7026b302019-06-26 17:18:11 +01002353 GEMMKernelInfo kernel_info;
2354 kernel_info.m = M;
2355 kernel_info.n = N;
2356 kernel_info.k = K;
2357 kernel_info.depth_output_gemm3d = m_h;
2358 kernel_info.reinterpret_input_as_3d = false;
2359 kernel_info.broadcast_bias = true;
Gian Marco Iodiceca1f4602019-07-16 15:46:48 +01002360 kernel_info.activation_info = act_info;
giuros01b3204e72019-04-01 13:50:22 +01002361
2362 // The output tensor will be auto-initialized within the function
2363
2364 // Create and configure function
Georgios Pinitas856f66e2021-04-22 21:13:21 +01002365 GEMMOperatorType gemm;
2366 gemm.configure(lhs.info(), rhs.info(), bias.info(), dst.info(), alpha, beta, lhs_info, rhs_info, kernel_info);
giuros01b3204e72019-04-01 13:50:22 +01002367
Michele Di Giorgio4fc10b32021-04-30 18:30:41 +01002368 ARM_COMPUTE_ASSERT(lhs.info()->is_resizable());
2369 ARM_COMPUTE_ASSERT(rhs.info()->is_resizable());
2370 ARM_COMPUTE_ASSERT(bias.info()->is_resizable());
giuros01b3204e72019-04-01 13:50:22 +01002371
Giorgio Arena63825e82021-03-25 14:54:50 +00002372 add_padding_x({ &lhs, &rhs, &bias, &dst });
2373
giuros01b3204e72019-04-01 13:50:22 +01002374 // Allocate tensors
2375 lhs.allocator()->allocate();
2376 rhs.allocator()->allocate();
Gian Marco Iodice944170e2019-06-24 14:40:30 +01002377 bias.allocator()->allocate();
giuros01b3204e72019-04-01 13:50:22 +01002378 dst.allocator()->allocate();
2379
Michele Di Giorgio4fc10b32021-04-30 18:30:41 +01002380 ARM_COMPUTE_ASSERT(!lhs.info()->is_resizable());
2381 ARM_COMPUTE_ASSERT(!rhs.info()->is_resizable());
2382 ARM_COMPUTE_ASSERT(!bias.info()->is_resizable());
2383 ARM_COMPUTE_ASSERT(!dst.info()->is_resizable());
giuros01b3204e72019-04-01 13:50:22 +01002384
2385 // Fill tensors
2386 fill(AccessorType(lhs), 0);
2387 fill(AccessorType(rhs), 1);
Gian Marco Iodice944170e2019-06-24 14:40:30 +01002388 fill(AccessorType(bias), 2);
giuros01b3204e72019-04-01 13:50:22 +01002389
2390 // Compute GEMM
Georgios Pinitas856f66e2021-04-22 21:13:21 +01002391 ITensorPack gemm_pack({ { ACL_SRC_0, &lhs },
2392 { ACL_SRC_1, &rhs },
2393 { ACL_SRC_2, &bias },
2394 { ACL_DST, &dst }
2395 });
2396 gemm.run(gemm_pack);
giuros01b3204e72019-04-01 13:50:22 +01002397
2398 return dst;
2399 }
2400
Michalis Spyrou6bff1952019-10-02 17:22:11 +01002401 SimpleTensor<T> compute_reference(const TensorShape &lhs_shape, const TensorShape &rhs_shape, DataType data_type, float alpha, float beta, unsigned int m_h,
Gian Marco Iodiceca1f4602019-07-16 15:46:48 +01002402 const ActivationLayerInfo &act_info)
giuros01b3204e72019-04-01 13:50:22 +01002403 {
2404 TensorShape dst_shape = lhs_shape;
2405 dst_shape.set(0, rhs_shape[0]);
2406 dst_shape.set(1, lhs_shape[1] / m_h);
2407 dst_shape.set(2, m_h);
2408 dst_shape.set(3, lhs_shape[2]);
2409
2410 // Create reference
2411 SimpleTensor<T> lhs{ lhs_shape, data_type, 1 };
2412 SimpleTensor<T> rhs{ rhs_shape, data_type, 1 };
Gian Marco Iodice944170e2019-06-24 14:40:30 +01002413 SimpleTensor<T> bias{ dst_shape, data_type, 1 };
2414
2415 const int n = rhs_shape[0];
2416 const int m = lhs_shape[1];
2417 const int batch_size = lhs_shape[2];
giuros01b3204e72019-04-01 13:50:22 +01002418
2419 // Fill reference
2420 fill(lhs, 0);
2421 fill(rhs, 1);
Gian Marco Iodice944170e2019-06-24 14:40:30 +01002422 fill(bias, 2);
giuros01b3204e72019-04-01 13:50:22 +01002423
Gunes Bayir4bfc70e2021-12-10 16:17:56 +00002424 // In case of broadcast, we need to simply copy the first into the following "M" ones
Gian Marco Iodice944170e2019-06-24 14:40:30 +01002425 for(int i = 1; i < m * batch_size; i++)
2426 {
2427 memcpy(bias.data() + i * n, bias.data(), n * sizeof(T));
2428 }
2429
Gian Marco Iodiceca1f4602019-07-16 15:46:48 +01002430 return reference::activation_layer(reference::gemm<T>(lhs, rhs, bias, alpha, beta), act_info);
giuros01b3204e72019-04-01 13:50:22 +01002431 }
2432
2433 TensorType _target{};
2434 SimpleTensor<T> _reference{};
2435};
2436
Gunes Bayir4bfc70e2021-12-10 16:17:56 +00002437template <typename TensorType, typename AccessorType, typename T, typename ReshapeRHSOperatorType, typename GEMMOperatorType>
2438class GEMMMatrixMultiplyReshapedOnlyRhsMMULValidationFixture : public framework::Fixture
2439{
2440public:
2441 template <typename...>
2442 void setup(unsigned int m, unsigned int n, unsigned int k, unsigned int batch_size, unsigned int m0, unsigned int n0, unsigned int k0, bool export_to_cl_image, DataType data_type, float alpha,
2443 float beta, bool broadcast_bias,
2444 const ActivationLayerInfo &act_info)
2445 {
2446 GEMMLHSMatrixInfo lhs_info;
2447 lhs_info.m0 = m0;
2448 lhs_info.k0 = k0;
2449
2450 GEMMRHSMatrixInfo rhs_info;
2451 rhs_info.n0 = n0;
2452 rhs_info.k0 = k0;
2453 rhs_info.interleave = true;
2454 rhs_info.transpose = false;
2455 rhs_info.h0 = 4;
2456 rhs_info.export_to_cl_image = export_to_cl_image;
2457
2458 // Set the tensor shapes for LHS and RHS matrices
2459 const TensorShape lhs_shape(k, m, batch_size);
2460 const TensorShape rhs_shape(n, k, batch_size);
2461 const TensorShape bias_shape(n,
2462 broadcast_bias ? 1 : m,
2463 broadcast_bias ? 1 : batch_size);
2464
2465 _target = compute_target(lhs_shape, rhs_shape, bias_shape, lhs_info, rhs_info, data_type, alpha, beta, broadcast_bias, act_info);
2466 _reference = compute_reference(lhs_shape, rhs_shape, data_type, alpha, beta, broadcast_bias, act_info);
2467 }
2468
2469protected:
2470 template <typename U>
2471 void fill(U &&tensor, int i)
2472 {
2473 static_assert(std::is_floating_point<T>::value || std::is_same<T, half>::value, "Only floating point data types supported.");
2474 using DistributionType = typename std::conditional<std::is_same<T, half>::value, arm_compute::utils::uniform_real_distribution_16bit<T>, std::uniform_real_distribution<T>>::type;
2475
2476 DistributionType distribution{ T(-1.0f), T(1.0f) };
2477 library->fill(tensor, distribution, i);
2478
2479 // Fill border with infinity in order to check the presence of NaN values (i.e. inf * 0)
2480 DistributionType distribution_inf{ T(std::numeric_limits<float>::infinity()), T(std::numeric_limits<float>::infinity()) };
2481 library->fill_borders_with_garbage(tensor, distribution_inf, i);
2482 }
2483
2484 TensorType compute_target(const TensorShape &lhs_shape, const TensorShape &rhs_shape, const TensorShape &bias_shape, const GEMMLHSMatrixInfo &lhs_info, const GEMMRHSMatrixInfo &rhs_info,
2485 DataType data_type, float alpha, float beta, bool broadcast_bias, const ActivationLayerInfo &act_info)
2486 {
2487 // Create tensors
2488 TensorType lhs = create_tensor<TensorType>(lhs_shape, data_type, 1);
2489 TensorType rhs = create_tensor<TensorType>(rhs_shape, data_type, 1);
2490 TensorType bias = create_tensor<TensorType>(bias_shape, data_type, 1);
2491 TensorType rhs_reshaped;
2492 TensorType dst;
2493
2494 const unsigned int M = lhs_shape[1];
2495 const unsigned int N = rhs_shape[0];
2496 const unsigned int K = lhs_shape[0];
2497 GEMMKernelInfo kernel_info;
2498 kernel_info.m = M;
2499 kernel_info.n = N;
2500 kernel_info.k = K;
2501 kernel_info.depth_output_gemm3d = 0;
2502 kernel_info.reinterpret_input_as_3d = false;
2503 kernel_info.broadcast_bias = broadcast_bias;
2504 kernel_info.activation_info = act_info;
2505
2506 // Create and configure function
2507 ReshapeRHSOperatorType reshape_rhs;
2508 GEMMOperatorType gemm;
2509
2510 validate_result = bool(reshape_rhs.validate(rhs.info(), rhs_reshaped.info(), rhs_info));
2511 if(!validate_result)
2512 {
2513 return nullptr;
2514 }
2515
2516 reshape_rhs.configure(rhs.info(), rhs_reshaped.info(), rhs_info);
2517
2518 validate_result = bool(gemm.validate(lhs.info(), rhs_reshaped.info(), bias.info(), dst.info(), alpha, beta, lhs_info, rhs_info, kernel_info));
2519 if(!validate_result)
2520 {
2521 return nullptr;
2522 }
2523
2524 gemm.configure(lhs.info(), rhs_reshaped.info(), bias.info(), dst.info(), alpha, beta, lhs_info, rhs_info, kernel_info);
2525
2526 ARM_COMPUTE_ASSERT(lhs.info()->is_resizable());
2527 ARM_COMPUTE_ASSERT(rhs.info()->is_resizable());
2528 ARM_COMPUTE_ASSERT(bias.info()->is_resizable());
2529
2530 // Allocate tensors
2531 lhs.allocator()->allocate();
2532 rhs.allocator()->allocate();
2533 rhs_reshaped.allocator()->allocate();
2534 bias.allocator()->allocate();
2535 dst.allocator()->allocate();
2536
2537 ARM_COMPUTE_ASSERT(!lhs.info()->is_resizable());
2538 ARM_COMPUTE_ASSERT(!rhs.info()->is_resizable());
2539 ARM_COMPUTE_ASSERT(!rhs_reshaped.info()->is_resizable());
2540 ARM_COMPUTE_ASSERT(!bias.info()->is_resizable());
2541 ARM_COMPUTE_ASSERT(!dst.info()->is_resizable());
2542
2543 // Fill tensors
2544 fill(AccessorType(lhs), 0);
2545 fill(AccessorType(rhs), 1);
2546 fill(AccessorType(bias), 2);
2547
2548 // Compute GEMM
2549 ITensorPack reshape_rhs_pack = { { ACL_SRC, &rhs }, { ACL_DST, &rhs_reshaped } };
2550 reshape_rhs.run(reshape_rhs_pack);
2551 ITensorPack gemm_pack({ { ACL_SRC_0, &lhs },
2552 { ACL_SRC_1, &rhs_reshaped },
2553 { ACL_SRC_2, &bias },
2554 { ACL_DST, &dst }
2555 });
2556 gemm.run(gemm_pack);
2557
2558 return dst;
2559 }
2560
2561 SimpleTensor<T> compute_reference(const TensorShape &lhs_shape, const TensorShape &rhs_shape, DataType data_type, float alpha, float beta, bool broadcast_bias,
2562 const ActivationLayerInfo &act_info)
2563 {
2564 if(!validate_result)
2565 return SimpleTensor<T>();
2566
2567 TensorShape dst_shape = lhs_shape;
2568 dst_shape[0] = rhs_shape[0];
2569 dst_shape[1] = lhs_shape[1];
2570
2571 // Create reference
2572 SimpleTensor<T> lhs{ lhs_shape, data_type, 1 };
2573 SimpleTensor<T> rhs{ rhs_shape, data_type, 1 };
2574 SimpleTensor<T> bias{ dst_shape, data_type, 1 };
2575
2576 const int n = rhs_shape[0];
2577 const int m = lhs_shape[1];
2578 const int batch_size = lhs_shape[2];
2579
2580 // Fill reference
2581 fill(lhs, 0);
2582 fill(rhs, 1);
2583 fill(bias, 2);
2584
2585 if(broadcast_bias)
2586 {
2587 // In case of broadcast, we need to simply copy the first into the following "M" ones
2588 for(int i = 1; i < m * batch_size; i++)
2589 {
2590 memcpy(bias.data() + i * n, bias.data(), n * sizeof(T));
2591 }
2592 }
2593
2594 return reference::activation_layer(reference::gemm<T>(lhs, rhs, bias, alpha, beta), act_info);
2595 }
2596
2597 bool validate_result = true;
2598 TensorType _target{};
2599 SimpleTensor<T> _reference{};
2600};
2601
Moritz Pflanzer4dfc2352017-08-02 14:51:36 +01002602} // namespace validation
2603} // namespace test
2604} // namespace arm_compute
2605#endif /* ARM_COMPUTE_TEST_GEMM_FIXTURE */