blob: ae6345141efd654978c247da31cffa9d2a639a3d [file] [log] [blame]
Mohammed Suhail Munshia1b1e412023-03-23 22:21:31 +00001/*
2 * Copyright (c) 2023 Arm Limited.
3 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24#ifndef SRC_CPU_OPERATORS_CPUMATMUL
25#define SRC_CPU_OPERATORS_CPUMATMUL
26
27#include "arm_compute/core/TensorInfo.h"
28#include "src/core/common/Macros.h"
29#include "src/cpu/ICpuOperator.h"
30#include "src/cpu/kernels/CpuTransposeKernel.h"
31#include "src/cpu/operators/internal/CpuGemmAssemblyDispatch.h"
32
33namespace arm_compute
34{
35// Forward Declarations
36class MatMulInfo;
37class CpuMatMulSettings;
38
39namespace cpu
40{
41/** Function to execute MatMul Operation. This function calls the following functions/kernels:
42 *
43 * If adjoint/adj flag is enabled for either input lhs or rhs (or both) :
44 * -# @ref cpu::kernels::CpuTransposeKernel
45 * Then :
46 * -# @ref cpu::CpuGemmAssemblyDispatch
47 */
48class CpuMatMul : public ICpuOperator
49{
50public:
51 /* Constructor */
52 CpuMatMul();
53 /* Destructor */
54 ~CpuMatMul() = default;
55
56 ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE(CpuMatMul);
57 /** Configure operator for a given list of arguments
58 *
59 * Note: Check documentation of @ref NEMatMul for a list of supported datatypes and layouts
60 *
61 *
62 * @param[in] lhs Source tensor info.
63 * @param[in] rhs Source tensor info.
64 * @param[out] dst Destination tensor info. Data types supported: same as @p lhs / @p rhs.
65 * @param[in] info Contains MatMul operation information described in @ref MatMulInfo.
66 * @param[in] settings The settings for matmul operation (i.e fast math)
67 */
68 void configure(ITensorInfo *lhs, ITensorInfo *rhs, ITensorInfo *dst, const MatMulInfo &info, const CpuMatMulSettings &settings);
69 /** Static function to check if given info will lead to a valid configuration
70 *
71 * Similar to CpuMatMul::configure()
72 *
73 * @return a status
74 */
75 static Status validate(const ITensorInfo *lhs, const ITensorInfo *rhs, const ITensorInfo *dst, const MatMulInfo &info, const CpuMatMulSettings &settings);
76
77 // Inherited methods overridden:
78 void run(ITensorPack &tensors) override;
79 experimental::MemoryRequirements workspace() const override;
80
81private:
82 enum InternalTensorIdx
83 {
84 AsmGemmWorkspace = 0, // Pre-allocate workspace tensors for CpuGemmAssemblyDispatch
85 PretransposeRHS, // Pre-allocate workspace tensors for CpuGemmAssemblyDispatch
86 TransposeLHS,
87 TransposeRHS,
88 Count
89 };
90
91 // Define unique pointers to kernels/operators used by matmul
92 std::unique_ptr<kernels::CpuTransposeKernel> _transpose_kernel_lhs{ nullptr };
93 std::unique_ptr<kernels::CpuTransposeKernel> _transpose_kernel_rhs{ nullptr };
94 std::unique_ptr<CpuGemmAssemblyDispatch> _asm_glue{ nullptr };
95
96 // TensorInfo for tensors stored in auxillary memory
97 TensorInfo _lhs_transposed{};
98 TensorInfo _rhs_transposed{};
99
100 // Original tensor shapes prior to reshaping tensors and collapsing dimensions
101 TensorShape _original_lhs_shape{};
102 TensorShape _original_rhs_shape{};
103 TensorShape _original_dst_shape{};
104
105 // Note : adj_lhs means the same as transposing lhs
106 bool _adj_lhs{ false };
107 bool _adj_rhs{ false };
108 bool _fast_math{ false };
109 AsmGemmInfo _gemm_info{};
110 experimental::MemoryRequirements _aux_mem{ Count };
111};
112}
113}
114
115#endif /* SRC_CPU_OPERATORS_CPUMATMUL */