Blame - arm_compute/runtime/NEON/functions/assembly/NEGEMMInterleavedWrapper.h - ml/ComputeLibrary

Anthony Barbier

3d677cc

2018-07-23 16:42:59 +0100

[diff] [blame]

1

/*

Georgios Pinitas

7cd26d4

2019-01-09 18:35:17 +0000

[diff] [blame]

2

Anthony Barbier

3d677cc

2018-07-23 16:42:59 +0100

[diff] [blame]

3

*

4

* SPDX-License-Identifier: MIT

5

*

6

* Permission is hereby granted, free of charge, to any person obtaining a copy

7

* of this software and associated documentation files (the "Software"), to

8

* deal in the Software without restriction, including without limitation the

9

* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or

10

* sell copies of the Software, and to permit persons to whom the Software is

11

* furnished to do so, subject to the following conditions:

12

*

13

* The above copyright notice and this permission notice shall be included in all

14

* copies or substantial portions of the Software.

15

*

16

* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR

17

* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,

18

* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE

19

* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER

20

* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,

21

* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE

22

* SOFTWARE.

23

*/

24

#ifndef __ARM_COMPUTE_NEGEMMINTERLEAVEDWRAPPER_H__

25

#define __ARM_COMPUTE_NEGEMMINTERLEAVEDWRAPPER_H__

26

27

#include "arm_compute/core/NEON/kernels/assembly/Helpers.h"

28

#include "arm_compute/core/NEON/kernels/assembly/INEGEMMWrapperKernel.h"

Georgios Pinitas

7cd26d4

2019-01-09 18:35:17 +0000

[diff] [blame]

29

#include "arm_compute/core/NEON/kernels/assembly/NEGEMMInterleavedMatrixMultiplyWrapper.h"

30

#include "arm_compute/core/NEON/kernels/assembly/NEGEMMInterleavedPrepareBWrapperKernel.h"

31

#include "arm_compute/core/NEON/kernels/assembly/NEGEMMInterleavedTransformAWrapper.h"

Anthony Barbier

3d677cc

2018-07-23 16:42:59 +0100

[diff] [blame]

32

#include "arm_compute/runtime/IFunction.h"

33

#include "arm_compute/runtime/IMemoryManager.h"

34

#include "arm_compute/runtime/IScheduler.h"

35

#include "arm_compute/runtime/MemoryGroup.h"

36

#include "arm_compute/runtime/Tensor.h"

#include <memory>

namespace arm_compute

41

{

Georgios Pinitas

7cd26d4

2019-01-09 18:35:17 +0000

[diff] [blame]

42

// Forward declarations

Anthony Barbier

3d677cc

2018-07-23 16:42:59 +0100

[diff] [blame]

43

class ITensor;

Anthony Barbier

3d677cc

2018-07-23 16:42:59 +0100

[diff] [blame]

44

Anthony Barbier

ff0bccf

2018-11-30 10:42:40 +0000

[diff] [blame]

45

/** Buffer manager used when reshaping B on the fly

46

*

47

* The typical workflow is:

48

* - lock_to_reshape_if_needed()

49

* - If the previous lock was successful: mark_as_reshaped()

50

* - wait_for_reshaping() wait for the reshaping to be complete

51

* - mark_as_unused() once the thread is done using this given buffer.

52

*

53

* Calls for different indices might be interleaved, however the calls for a given index must always be in that order.

*/

class IBufferManager

{

public:

/** Lock a buffer for the given index if it's available else return

59

*

60

* @param[in] index Index of the buffer to lock

61

*

62

* @return True if the buffer has been successfully locked, false if it's already reshaped / being reshaped.

63

*/

64

virtual bool lock_to_reshape_if_needed(unsigned int index) = 0;

65

/** Mark a buffer previously locked as reshaped

66

*

67

* @pre The thread calling this function must have locked the given buffer through lock_to_reshape_if_needed()

68

*

69

* @param[in] index Index of the buffer to mark as reshaped

70

*/

71

virtual void mark_as_reshaped(unsigned int index) = 0;

72

/** Block until the given buffer is marked as reshaped

73

*

74

* @param[in] index Index of the buffer

75

*/

76

virtual void wait_for_reshaping(unsigned int index) = 0;

77

/** Mark a reshaped buffer as unused

78

*

79

* Once all the users have marked a buffer as unused then it goes back to being free

80

*/

81

virtual void mark_as_unused(unsigned int index) = 0;

82

83

/** Number of buffers used internally

84

*

85

* @return The number of buffers used by the manager.

86

*/

87

virtual unsigned int num_buffers() const = 0;

88

/** Default destructor */

89

virtual ~IBufferManager() = default;

90

};

91

Anthony Barbier

3d677cc

2018-07-23 16:42:59 +0100

[diff] [blame]

92

/** Equivalent to arm_gemm::GemmInterleaved but using Compute Library types.

93

*/

94

class NEGEMMInterleavedWrapper : public IFunction

95

{

96

public:

97

NEGEMMInterleavedWrapper(std::shared_ptr<IMemoryManager> memory_manager = nullptr);

Georgios Pinitas

7cd26d4

2019-01-09 18:35:17 +0000

[diff] [blame]

98

~NEGEMMInterleavedWrapper() = default;

Anthony Barbier

3d677cc

2018-07-23 16:42:59 +0100

[diff] [blame]

99

100

NEGEMMInterleavedWrapper(const NEGEMMInterleavedWrapper &) = delete;

101

NEGEMMInterleavedWrapper &operator=(const NEGEMMInterleavedWrapper &) = delete;

102

103

/** Initialise the kernel's input and output.

104

*

105

* @note The input and output tensor must have the same dimensions

106

*

107

* @param[in] a Input tensor (Matrix A)

108

* @param[in] b Input tensor (Matrix B)

109

* @param[out] c Output tensor to store the result of matrix multiplication. Data type supported: same as @p input0.

110

* @param[in] alpha Scalar multiplier to apply to AB matrix product.

111

* @param[in] beta Scalar multiplier to apply to input C matrix before adding product.

112

* @param[in] pretranspose_b If true, pretranspose B once during the prepare() stage instead of on the fly every time.

Anthony Barbier

3d677cc

2018-07-23 16:42:59 +0100

[diff] [blame]

113

*/

Georgios Pinitas

7cd26d4

2019-01-09 18:35:17 +0000

[diff] [blame]

114

void configure(const ITensor *a, const ITensor *b, ITensor *c, float alpha, float beta, bool pretranspose_b);

Anthony Barbier

3d677cc

2018-07-23 16:42:59 +0100

[diff] [blame]

115

116

// Inherited methods overridden:

117

void run() override;

118

void prepare() override;

119

120

private:

121

MemoryGroup _memory_group;

122

bool _is_prepared{ false };

123

bool _pretranspose_b{ false };

124

Window _block_walker{};

125

Window _batch_window{};

126

const ITensor *_a{ nullptr };

127

const ITensor *_b{ nullptr };

128

ITensor *_c{ nullptr };

129

Tensor _transformed_b{};

130

Tensor _transformed_a{};

131

Tensor _tmp_c{};

132

INEGEMMWrapperKernel::Params _params{};

133

BlockSizes _block_sizes{};

134

std::unique_ptr<NEGEMMInterleavedPrepareBWrapperKernel> _prepare_b{ nullptr };

135

std::unique_ptr<NEGEMMInterleavedTransformAWrapper> _transform_a{ nullptr };

136

std::unique_ptr<NEGEMMInterleavedMatrixMultiplyWrapper> _matrix_multiply{ nullptr };

Anthony Barbier

ff0bccf

2018-11-30 10:42:40 +0000

[diff] [blame]

137

std::unique_ptr<IBufferManager> _buffer_manager{ nullptr };

Anthony Barbier

3d677cc

2018-07-23 16:42:59 +0100

[diff] [blame]

138

std::vector<TransformAWorkload> _a_workloads{};

139

std::vector<PrepareBWorkload> _b_workloads{};

140

std::vector<MatrixMultiplyWorkload> _mm_workloads{};

141

std::vector<IScheduler::Workload> _workloads{};

Anthony Barbier

ac314c2

2018-09-11 17:49:10 +0100

[diff] [blame]

142

std::string _tag{};

Georgios Pinitas

1509e4b

2019-01-28 10:01:50 +0000

[diff] [blame]

143

unsigned int _num_windows{ 1 };

Anthony Barbier

3d677cc

2018-07-23 16:42:59 +0100

[diff] [blame]

144

};

Anthony Barbier

3d677cc

2018-07-23 16:42:59 +0100

[diff] [blame]

145

} // namespace arm_compute

146

#endif /* __ARM_COMPUTE_NEGEMMINTERLEAVEDWRAPPER_H__ */