Blame - src/cpu/operators/CpuGemmLowpMatrixMultiplyCore.h - ml/ComputeLibrary

Manuel Bottini

cfac51c

2021-06-18 15:47:28 +0100

[diff] [blame]

1

/*

Matthew Bentham

f1aeab9

2023-05-30 13:35:34 +0000

[diff] [blame]

2

Manuel Bottini

cfac51c

2021-06-18 15:47:28 +0100

[diff] [blame]

3

*

4

* SPDX-License-Identifier: MIT

5

*

6

* Permission is hereby granted, free of charge, to any person obtaining a copy

7

* of this software and associated documentation files (the "Software"), to

8

* deal in the Software without restriction, including without limitation the

9

* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or

10

* sell copies of the Software, and to permit persons to whom the Software is

11

* furnished to do so, subject to the following conditions:

12

*

13

* The above copyright notice and this permission notice shall be included in all

14

* copies or substantial portions of the Software.

15

*

16

* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR

17

* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,

18

* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE

19

* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER

20

* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,

21

* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE

22

* SOFTWARE.

23

*/

SiCong Li

c5ab4df

2023-10-17 17:38:57 +0100

[diff] [blame]

24

#ifndef ACL_SRC_CPU_OPERATORS_CPUGEMMLOWPMATRIXMULTIPLYCORE_H

25

#define ACL_SRC_CPU_OPERATORS_CPUGEMMLOWPMATRIXMULTIPLYCORE_H

Manuel Bottini

cfac51c

2021-06-18 15:47:28 +0100

[diff] [blame]

26

27

#include "arm_compute/core/TensorInfo.h"

SiCong Li

9129549

2023-07-21 18:16:13 +0100

[diff] [blame]

28

#include "arm_compute/function_info/GEMMInfo.h"

Felix Thomasmathibalan

afd38f0

2023-09-27 17:46:17 +0100

[diff] [blame]

29

Manuel Bottini

cfac51c

2021-06-18 15:47:28 +0100

[diff] [blame]

30

#include "src/core/common/Macros.h"

Georgios Pinitas

7891a73

2021-08-20 21:39:25 +0100

[diff] [blame]

31

#include "src/cpu/ICpuOperator.h"

Manuel Bottini

cfac51c

2021-06-18 15:47:28 +0100

[diff] [blame]

#include <memory>

namespace arm_compute

{

namespace cpu

{

namespace kernels

{

class CpuGemmInterleave4x4Kernel;

42

class CpuGemmLowpMatrixMultiplyKernel;

43

class CpuGemmLowpOffsetContributionKernel;

44

class CpuGemmLowpOffsetContributionOutputStageKernel;

45

class CpuGemmLowpMatrixAReductionKernel;

46

class CpuGemmLowpMatrixBReductionKernel;

47

class CpuGemmTranspose1xWKernel;

48

class CpuConvertQuantizedSignednessKernel;

49

} // namespace kernels

50

class CpuGemmAssemblyDispatch;

51

class CpuActivation;

52

53

/** Basic function to execute GEMMLowpMatrixMultiplyCore. This function calls the following kernels if the DOT product instruction is not available:

54

*

55

* -# @ref kernels::CpuGemmInterleave4x4Kernel

56

* -# @ref kernels::CpuGemmTranspose1xWKernel

57

* -# @ref kernels::CpuGemmLowpMatrixMultiplyKernel

58

* -# @ref kernels::CpuGemmLowpOffsetContributionKernel

59

* -# @ref CpuActivation

60

*

61

* otherwise if the DOT product instruction is available:

62

*

63

* -# @ref kernels::CpuGemmLowpOffsetContributionKernel

64

*

65

*/

66

class CpuGemmLowpMatrixMultiplyCore : public ICpuOperator

{

public:

/** Constructor */

CpuGemmLowpMatrixMultiplyCore();

71

ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE(CpuGemmLowpMatrixMultiplyCore);

72

/** Destructor */

73

~CpuGemmLowpMatrixMultiplyCore();

74

/** Initialise the kernel's inputs, output

75

*

76

* Valid data layouts:

* - NHWC

* - NCHW

*

* Valid data type configurations:

81

* |src0 |src1 |src2 |dst |

82

* |:--------------|:------------------|:--------|:--------------|

* |QASYMM8 |QASYMM8 |S32 |S32 |

87

* |QASYMM8 |QSYMM8_PER_CHANNEL |S32 |S32 |

88

* |QASYMM8 |QSYMM8 |S32 |S32 |

* |QASYMM8_SIGNED |QASYMM8_SIGNED |S32 |S32 |

93

* |QASYMM8_SIGNED |QSYMM8_PER_CHANNEL |S32 |S32 |

94

* |QASYMM8_SIGNED |QSYMM8 |S32 |S32 |

95

*

96

* @note GEMM_LOWP: low precision GEMM kernel

97

* This kernel performs the following computations:

98

*

99

* -# Convert a values from QASYMM8 to int32 and add a_offset to each of them.

100

* -# Convert b values from QASYMM8 to int32 add b_offset to each of them.

101

* -# Compute the matrix product of the resulting a * b in int32.

102

*

103

* @note The @p output type is S32 if @p gemm_info.type == GEMMLowpOutputStageType::NONE. It is QASYMM8/QASYMM8_SIGNED otherwise

104

*

105

* @param[in] a First input tensor info (Matrix A). Data type supported: QASYMM8/QASYMM8_SIGNED.

106

* @param[in] b Second input tensor info (Matrix B). Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8/QSYMM8_PER_CHANNEL.

107

* @param[in] c Third input tensor info (Matrix C). It can be a nullptr. Data type supported: S32

108

* @param[out] dst Output tensor info. Data type supported: Data type supported: S32/QASYMM8/QASYMM8_SIGNED

109

* @param[in] gemm_info (Optional) Specifies if the matrix A and/or matrix B have been reshaped and

110

* if the reshape of matrix B should be executed only for the first run

111

*/

Felix Thomasmathibalan

afd38f0

2023-09-27 17:46:17 +0100

[diff] [blame]

112

void configure(const ITensorInfo *a,

113

const ITensorInfo *b,

114

const ITensorInfo *c,

115

ITensorInfo *dst,

116

const GEMMInfo &gemm_info = GEMMInfo());

Manuel Bottini

cfac51c

2021-06-18 15:47:28 +0100

[diff] [blame]

117

/** Static function to check if given info will lead to a valid configuration

118

*

119

* Similar to CpuGemmLowpMatrixMultiplyCore::configure()

120

*

121

* @return a status

122

*/

Felix Thomasmathibalan

afd38f0

2023-09-27 17:46:17 +0100

[diff] [blame]

123

static Status validate(const ITensorInfo *a,

124

const ITensorInfo *b,

125

const ITensorInfo *c,

126

const ITensorInfo *dst,

127

const GEMMInfo &gemm_info = GEMMInfo());

Manuel Bottini

cfac51c

2021-06-18 15:47:28 +0100

[diff] [blame]

128

129

// Inherited methods overridden:

Felix Thomasmathibalan

afd38f0

2023-09-27 17:46:17 +0100

[diff] [blame]

130

void run(ITensorPack &tensors) override;

131

void prepare(ITensorPack &tensors) override;

Manuel Bottini

cfac51c

2021-06-18 15:47:28 +0100

[diff] [blame]

132

experimental::MemoryRequirements workspace() const override;

private:

enum AuxTensorIdx

{

SiCong Li

c5ab4df

2023-10-17 17:38:57 +0100

[diff] [blame]

137

/* Slots 0 - 2 reserved for CpuGemmAssemblyDispatch */

138

VectorSumCol = 3,

Manuel Bottini

cfac51c

2021-06-18 15:47:28 +0100

[diff] [blame]

VectorSumRow,

TmpA,

TmpB,

MMResultS32,

SignedA,

SignedOutput,

Count

};

std::unique_ptr<CpuGemmAssemblyDispatch> _asm_glue;

149

std::unique_ptr<kernels::CpuGemmLowpMatrixMultiplyKernel> _mm_kernel;

150

std::unique_ptr<kernels::CpuGemmInterleave4x4Kernel> _mtx_a_reshape_kernel;

151

std::unique_ptr<kernels::CpuGemmTranspose1xWKernel> _mtx_b_reshape_kernel;

152

std::unique_ptr<kernels::CpuGemmLowpMatrixAReductionKernel> _mtx_a_reduction_kernel;

153

std::unique_ptr<kernels::CpuGemmLowpMatrixBReductionKernel> _mtx_b_reduction_kernel;

154

std::unique_ptr<kernels::CpuGemmLowpOffsetContributionKernel> _offset_contribution_kernel;

155

std::unique_ptr<kernels::CpuGemmLowpOffsetContributionOutputStageKernel> _offset_contribution_output_stage_kernel;

156

std::unique_ptr<CpuActivation> _activation_func;

157

std::unique_ptr<kernels::CpuConvertQuantizedSignednessKernel> _convert_to_signed_asymm;

158

std::unique_ptr<kernels::CpuConvertQuantizedSignednessKernel> _convert_from_signed_asymm;

159

160

TensorInfo _vector_sum_col;

161

TensorInfo _vector_sum_row;

162

TensorInfo _tmp_a;

163

TensorInfo _tmp_b;

164

TensorInfo _mm_result_s32;

165

TensorInfo _signed_a;

166

TensorInfo _signed_output;

int32_t _a_offset;

int32_t _b_offset;

bool _run_vector_matrix_multiplication;

171

bool _assembly_path;

172

bool _fused_assembly_path;

173

bool _reshape_b_only_on_first_run;

174

bool _is_prepared;

175

bool _fuse_output_stage;

176

bool _run_activation;

177

bool _flip_signedness;

178

GEMMInfo _gemm_info;

179

experimental::MemoryRequirements _aux_mem{};

180

};

181

} // namespace cpu

182

} // namespace arm_compute

SiCong Li

c5ab4df

2023-10-17 17:38:57 +0100

[diff] [blame]

183

#endif // ACL_SRC_CPU_OPERATORS_CPUGEMMLOWPMATRIXMULTIPLYCORE_H