Blame - src/cpu/operators/CpuGemmConv2d.h - ml/ComputeLibrary

* @param[out] dst Destination tensor info. 3 lower dimensions represent a single output [width, height, OFM], while the rest represent batch of outputs.

98

* Data types supported: Same as @p input.

99

* @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo.

100

* @param[in] weights_info Specifies if the weights tensor has been reshaped with NEWeightsReshapeKernel. If this is not part of the fully connected layer the weights

101

* tensor has also been transposed with cpu::kernels::CpuGemmTranspose1xWKernel. Data type supported: Same as @p input.

102

* @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).

103

* @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU supported.

104

* @param[in] enable_fast_math (Optional) Enable fast math computation. In case this flag were set, the function could dispatch the fastest implementation

105

* available which may introduce a drop of accuracy as well. Default is false

106

* @param[in] num_groups (Optional) Number of groups when performing a grouped convolution. num_groups != 1 is not supported

Manuel Bottini

29599d0

2021-07-06 15:01:35 +0100

[diff] [blame]

107

*/

108

void configure(const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *biases, ITensorInfo *dst, const PadStrideInfo &conv_info, const WeightsInfo &weights_info = WeightsInfo(),

Georgios Pinitas

69a9ac4

2021-07-22 13:30:13 +0100

[diff] [blame]

109

const Size2D &dilation = Size2D(1U, 1U), const ActivationLayerInfo &act_info = ActivationLayerInfo(), bool enable_fast_math = false, unsigned int num_groups = 1);

Manuel Bottini

29599d0

2021-07-06 15:01:35 +0100

[diff] [blame]

110

/** Static function to check if given info will lead to a valid configuration

111

*

112

* Similar to CpuGemmConvolution::configure()

*

* @return a status

*/

static Status validate(const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info,

Georgios Pinitas

69a9ac4

2021-07-22 13:30:13 +0100

[diff] [blame]

117

const WeightsInfo &weights_info = WeightsInfo(), const Size2D &dilation = Size2D(1U, 1U), const ActivationLayerInfo &act_info = ActivationLayerInfo(),

118

bool enable_fast_math = false, unsigned int num_groups = 1);

Manuel Bottini

29599d0

2021-07-06 15:01:35 +0100

[diff] [blame]

119

120

// Inherited methods overridden:

121

void run(ITensorPack &tensors) override;

122

void prepare(ITensorPack &tensors) override;

123

experimental::MemoryRequirements workspace() const override;

124

125

private:

126

/** Configures the appropriate matrix multiply routine

127

*

Georgios Pinitas

69a9ac4

2021-07-22 13:30:13 +0100

[diff] [blame]

128

* @param[in] src Input tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/BFLOAT16/F16/F32.

129

* @param[in] weights Weights tensor info. Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL/BFLOAT16/F16/F32.

130

* @param[in] biases Biases tensor info. Shared biases supported. Biases are 1D tensor with dimensions [OFM].

131

* Data type supported: Should match @p input data type, except for input of QASYMM8/QASYMM8_SIGNED type where biases should be of S32 type.

132

* @param[out] dst Output tensor info. Data types supported: Same as @p input,

133

* except for input of QASYMM8/QASYMM8_SIGNED type where output should be of S32 type.

134

* @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU supported.

135

* @param[in] enable_fast_math (Optional) Enable fast math computation. In case this flag were set, the function could dispatch the fastest implementation

136

* available which may introduce a drop of accuracy as well. Default is false

137

* @param[in] gemm_3d_depth (Optional) Depth of GEMM 3D (Defaults to 1)

Manuel Bottini

29599d0

2021-07-06 15:01:35 +0100

[diff] [blame]

138

*/

139

void configure_mm(const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *biases, ITensorInfo *output, const ActivationLayerInfo &act_info = ActivationLayerInfo(),

Georgios Pinitas

69a9ac4

2021-07-22 13:30:13 +0100

[diff] [blame]

140

bool enable_fast_math = false, int gemm_3d_depth = 1);

Manuel Bottini

29599d0

2021-07-06 15:01:35 +0100

[diff] [blame]

141

/** Static function to check if given info will lead to a valid configuration of @ref NEGEMMConvolutionLayer matrix multiply routines

142

*

Georgios Pinitas

69a9ac4

2021-07-22 13:30:13 +0100

[diff] [blame]

143

* @param[in] src Input tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/BFLOAT16/F16/F32.

144

* @param[in] weights Weights tensor info. Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL/BFLOAT16/F16/F32.

145

* @param[in] biases Biases tensor info. Shared biases supported. Biases are 1D tensor with dimensions [OFM].

146

* Data type supported: Should match @p input data type, except for input of QASYMM8/QASYMM8_SIGNED type where biases should be of S32 type.

147

* @param[in] dst Output tensor info. Data types supported: Same as @p input,

148

* except for input of QASYMM8/QASYMM8_SIGNED type where output should be of S32 type.

149

* @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU supported.

150

* @param[in] enable_fast_math (Optional) Enable fast math computation. In case this flag were set, the function could dispatch the fastest implementation

151

* available which may introduce a drop of accuracy as well. Default is false

152

* @param[in] gemm_3d_depth (Optional) Depth of GEMM 3D (Defaults to 1)

153

* @param[in] skip_im2col (Optional) Flag which specifies if im2col has to be skipped. i.e. 1x1 convolution with NHWC data layout. (Default to false)

Manuel Bottini

29599d0

2021-07-06 15:01:35 +0100

[diff] [blame]

*

* @return a status

*/

static Status validate_mm(const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *dst, const ActivationLayerInfo &act_info = ActivationLayerInfo(),

Georgios Pinitas

69a9ac4

2021-07-22 13:30:13 +0100

[diff] [blame]

158

bool enable_fast_math = false, int gemm_3d_depth = 1, bool skip_im2col = false);

Manuel Bottini

29599d0

2021-07-06 15:01:35 +0100

[diff] [blame]

159

/** Static function to check if GEMM3D is supported in @ref NEGEMM or in @ref CpuGemmMLowpMatrixMultiplyCore

160

*

161

* @param[in] src Input tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/BFLOAT16/F16/F32.

162

* @param[in] weights Weights tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/BFLOAT16/F16/F32.

163

* @param[in] act_info Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU supported.

164

* @param[in] gemm_3d_depth Depth of GEMM 3D

165

* @param[in] skip_im2col Flag which specifies if im2col has to be skipped. i.e. 1x1 convolution with NHWC data layout

*

* @return a status

*/

static Status validate_gemm3d(const ITensorInfo *src, const ITensorInfo *weights, const ActivationLayerInfo &act_info, int gemm_3d_depth, bool skip_im2col);

enum AuxTensorIdx

{

// CpuGemmLowpMatrixMultiplyCore has up to 8 internal tensors

174

Im2ColOutput = 9,

175

WeightsReshaped,

176