Blame - src/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.cpp - ml/ComputeLibrary

2017-11-22 20:42:13 +0700

[diff] [blame]

63

_vector_sum_col(), _vector_sum_row(), _tmp_a(), _tmp_b(), _a_offset(0), _b_offset(0), _is_interleaved_transposed(true), _is_first_run(true), _reshape_b_only_on_first_run(false)

Gian Marco

2017-11-21 10:57:50 +0000

[diff] [blame]

{

}

Chunosov

2017-11-22 20:42:13 +0700

[diff] [blame]

67

void CLGEMMLowpMatrixMultiplyCore::configure(const ICLTensor *a, const ICLTensor *b, ICLTensor *output, const GEMMInfo &gemm_info)

Gian Marco

2017-11-21 10:57:50 +0000

[diff] [blame]

68

{

Georgios Pinitas

2017-12-07 16:47:52 +0000

[diff] [blame]

69

ARM_COMPUTE_ERROR_ON_NULLPTR(a, b, output);

70

ARM_COMPUTE_UNUSED(gemm_info);

71

ARM_COMPUTE_ERROR_THROW_ON(CLGEMMLowpMatrixMultiplyCore::validate(a->info(), b->info(), output->info(), gemm_info));

Gian Marco

2017-11-21 10:57:50 +0000

[diff] [blame]

72

Chunosov

2017-11-22 20:42:13 +0700

[diff] [blame]

73

_reshape_b_only_on_first_run = gemm_info.reshape_b_only_on_first_run();

74

_a_offset = a->info()->quantization_info().offset;

75

_b_offset = b->info()->quantization_info().offset;

Gian Marco

2017-11-21 10:57:50 +0000

[diff] [blame]

76

Gian Marco

2018-01-30 13:35:54 +0000

[diff] [blame]

77

// Get the GPU target

78

const GPUTarget gpu_target = CLScheduler::get().target();

Gian Marco

7b4d547

2018-01-10 15:56:30 +0000

[diff] [blame]

79

Gian Marco

2018-01-30 13:35:54 +0000

[diff] [blame]

80

// Set the target for the kernels

81

_mtx_a_reshape_kernel.set_target(gpu_target);

82

_mm_kernel.set_target(gpu_target);

Gian Marco

2017-11-21 10:57:50 +0000

[diff] [blame]

83

84

const ICLTensor *matrix_a = a;

85

const ICLTensor *matrix_b = b;

86

Gian Marco

2018-01-30 13:35:54 +0000

[diff] [blame]

87

// Arguments used by GEMMReshapeInfo

88

// If we pass the matrix A and matrix B reshaped to CLGEMMMatrixMultiplyKernel, we need to pass m, n, k, mult_transpose1xW_width and mult_interleave4x4_height to CLGEMMReshapeInfo

89

// in order to know how the matrices have been reshaped

90

const int m = a->info()->dimension(1);

91

const int n = b->info()->dimension(0);

92

const int k = a->info()->dimension(0);

93

constexpr int mult_transpose1xW_width = 1;

94

constexpr int mult_interleave4x4_height = 1;

95

96

// Check if we need to reshape the matrix A and matrix B

97

_is_interleaved_transposed = is_interleaved_transposed(m, n, k, _reshape_b_only_on_first_run, gpu_target);

98

Gian Marco

2017-11-21 10:57:50 +0000

[diff] [blame]

99

if(_is_interleaved_transposed)

{

matrix_a = &_tmp_a;

matrix_b = &_tmp_b;

Gian Marco

2017-11-21 10:57:50 +0000

[diff] [blame]

104

_memory_group.manage(&_tmp_a);

Giorgio Arena

bb54e4e

2018-04-05 17:20:34 +0100

[diff] [blame^]

105

if(!_reshape_b_only_on_first_run)

106

{

107

_memory_group.manage(&_tmp_b);

108

}

Gian Marco

2017-11-21 10:57:50 +0000

[diff] [blame]

109

110

// Configure interleave kernel

Gian Marco

2018-01-30 13:35:54 +0000

[diff] [blame]

111

_mtx_a_reshape_kernel.configure(a, &_tmp_a, mult_interleave4x4_height);

Gian Marco

2017-11-21 10:57:50 +0000

[diff] [blame]

112

113

// Configure transpose kernel

Gian Marco

2018-01-30 13:35:54 +0000

[diff] [blame]

114

_mtx_b_reshape_kernel.configure(b, &_tmp_b, mult_transpose1xW_width);

Gian Marco

2017-11-21 10:57:50 +0000

[diff] [blame]

115

}

116

117

// Configure matrix multiply kernel

Gian Marco

2018-01-30 13:35:54 +0000

[diff] [blame]

118

_mm_kernel.configure(matrix_a, matrix_b, output, _is_interleaved_transposed, GEMMReshapeInfo(m, n, k, mult_transpose1xW_width, mult_interleave4x4_height));

Gian Marco

2017-11-21 10:57:50 +0000

[diff] [blame]

119

120

// Initialize matrix B reduction kernel only if _a_offset is not equal to 0

121

if(_a_offset != 0)

122

{

Georgios Pinitas

2017-12-07 16:47:52 +0000

[diff] [blame]

123

TensorInfo info_vector_sum_col(compute_reductionA_shape(*b->info()), 1, DataType::S32);

Gian Marco

2017-11-21 10:57:50 +0000

[diff] [blame]

124

_vector_sum_col.allocator()->init(info_vector_sum_col);

Giorgio Arena

bb54e4e

2018-04-05 17:20:34 +0100

[diff] [blame^]

125

if(!_reshape_b_only_on_first_run)

126

{

127

_memory_group.manage(&_vector_sum_col);

128

}

Gian Marco

2017-11-21 10:57:50 +0000

[diff] [blame]

129

130

// Configure Matrix B reduction kernel

131

_mtx_b_reduction_kernel.configure(b, &_vector_sum_col);

132

}

133

134

// Initialize Matrix A reduction kernel only if _b_offset is not equal to 0

135

if(_b_offset != 0)

136

{

Georgios Pinitas

2017-12-07 16:47:52 +0000

[diff] [blame]

137

TensorInfo info_vector_sum_row(compute_reductionB_shape(*a->info()), 1, DataType::S32);

Gian Marco

2017-11-21 10:57:50 +0000

[diff] [blame]

138

_vector_sum_row.allocator()->init(info_vector_sum_row);

139

_memory_group.manage(&_vector_sum_row);

140

141

// Configure matrix A reduction kernel

142

_mtx_a_reduction_kernel.configure(a, &_vector_sum_row);

143

}

144

145

// Configure offset contribution kernel

146

_offset_contribution_kernel.configure(output, _a_offset == 0 ? nullptr : &_vector_sum_col, _b_offset == 0 ? nullptr : &_vector_sum_row, a->info()->dimension(0), _a_offset, _b_offset);

147

148

// Allocate tensors

149

if(_is_interleaved_transposed)

150

{

151

_tmp_a.allocator()->allocate();

152

_tmp_b.allocator()->allocate();

}

if(_a_offset != 0)

{

_vector_sum_col.allocator()->allocate();

}

if(_b_offset != 0)

{

_vector_sum_row.allocator()->allocate();

}

}

Georgios Pinitas

2017-12-07 16:47:52 +0000

[diff] [blame]

166

Status CLGEMMLowpMatrixMultiplyCore::validate(const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *output, const GEMMInfo &gemm_info)

167

{

168

ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(a, 1, DataType::QASYMM8);

169

ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::S32);

170

ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(a, b);

171

ARM_COMPUTE_RETURN_ERROR_ON_MSG((a)->dimension(0) != (b)->dimension(1),

172

"The product AB is defined only if the number of columns in A is equal to the number of rows in B");

173

ARM_COMPUTE_RETURN_ERROR_ON_MSG((a)->dimension(1) != (output)->dimension(1),

174

"The output matrix must have the same number of rows as the matrix A");

175

ARM_COMPUTE_RETURN_ERROR_ON_MSG((b)->dimension(0) != (output)->dimension(0),

176

"The output matrix must have the same number of columns as the matrix B");

177

ARM_COMPUTE_RETURN_ERROR_ON_MSG(gemm_info.is_a_reshaped(), "Matrix A already reshaped is not supported");

178

ARM_COMPUTE_RETURN_ERROR_ON_MSG(gemm_info.is_b_reshaped(), "Matrix B already reshaped is not supported");

179

Gian Marco

2018-01-30 13:35:54 +0000

[diff] [blame]

180

int32_t a_offset = a->quantization_info().offset;

181

int32_t b_offset = b->quantization_info().offset;

Georgios Pinitas

2017-12-07 16:47:52 +0000

[diff] [blame]

182

Gian Marco

2018-01-30 13:35:54 +0000

[diff] [blame]

183

const int m = a->dimension(1);

184

const int n = b->dimension(0);

185

const int k = a->dimension(0);

186

constexpr int mult_transpose1xW_width = 1;

187

constexpr int mult_interleave4x4_height = 1;

188

const GEMMReshapeInfo reshape_info(m, n, k, mult_transpose1xW_width, mult_interleave4x4_height);

189

190

bool reshape_matrices = is_interleaved_transposed(m, n, k, gemm_info.reshape_b_only_on_first_run(), CLScheduler::get().target());

191

192

if(reshape_matrices)

Georgios Pinitas

2017-12-07 16:47:52 +0000

[diff] [blame]

193

{

Gian Marco

2018-01-30 13:35:54 +0000

[diff] [blame]

194

TensorInfo info_a(compute_interleaved_shape(*a, mult_interleave4x4_height), 1, a->data_type());

195

TensorInfo info_b(compute_transpose1xW_with_element_size_shape(*b, mult_transpose1xW_width), 1, b->data_type());

Georgios Pinitas

2017-12-07 16:47:52 +0000

[diff] [blame]

196

Gian Marco

2018-01-30 13:35:54 +0000

[diff] [blame]

197

ARM_COMPUTE_RETURN_ON_ERROR(CLGEMMInterleave4x4Kernel::validate(a, &info_a, mult_interleave4x4_height));

198

ARM_COMPUTE_RETURN_ON_ERROR(CLGEMMTranspose1xWKernel::validate(b, &info_b, mult_transpose1xW_width));

199

ARM_COMPUTE_RETURN_ON_ERROR(CLGEMMLowpMatrixMultiplyKernel::validate(&info_a, &info_b, output, reshape_matrices, reshape_info));

Georgios Pinitas

2017-12-07 16:47:52 +0000

[diff] [blame]

200

}

201

else

202

{

Gian Marco

2018-01-30 13:35:54 +0000

[diff] [blame]

203

ARM_COMPUTE_RETURN_ON_ERROR(CLGEMMLowpMatrixMultiplyKernel::validate(a, b, output, reshape_matrices, reshape_info));

Georgios Pinitas

2017-12-07 16:47:52 +0000

[diff] [blame]

204

}

205

206

TensorInfo info_vector_sum_col, info_vector_sum_row;

207

208

// Validate matrix B reduction kernel only if _a_offset is not equal to 0

209

if(a_offset != 0)

210

{

211

info_vector_sum_col = TensorInfo(compute_reductionA_shape(*b), 1, DataType::S32);

212

213

// Configure Matrix B reduction kernel

214

ARM_COMPUTE_RETURN_ON_ERROR(CLGEMMLowpMatrixBReductionKernel::validate(b, &info_vector_sum_col));

215

}

216

217

// Validate Matrix A reduction kernel only if _b_offset is not equal to 0

218

if(b_offset != 0)

219

{

220

info_vector_sum_row = TensorInfo(compute_reductionB_shape(*a), 1, DataType::S32);

221

222

// Configure matrix A reduction kernel

223

ARM_COMPUTE_RETURN_ON_ERROR(CLGEMMLowpMatrixAReductionKernel::validate(a, &info_vector_sum_row));

224

}

225

226

// Validate offset contribution kernel

227

ARM_COMPUTE_RETURN_ON_ERROR(CLGEMMLowpOffsetContributionKernel::validate(output,

228

a_offset == 0 ? nullptr : &info_vector_sum_col,

229

b_offset == 0 ? nullptr : &info_vector_sum_row,

230

a_offset, b_offset));

return Status{};

}

Gian Marco

2017-11-21 10:57:50 +0000

[diff] [blame]

235

void CLGEMMLowpMatrixMultiplyCore::run()

236

{

237

_memory_group.acquire();

238

239

if(_is_interleaved_transposed)

240

{

241

// Run reshape matrix A

242

CLScheduler::get().enqueue(_mtx_a_reshape_kernel, false);

243

Chunosov

2017-11-22 20:42:13 +0700

[diff] [blame]

244

if(_is_first_run || !_reshape_b_only_on_first_run)

245

{

246

// Run reshape matrix B

247

CLScheduler::get().enqueue(_mtx_b_reshape_kernel, false);

}

}

// Note: if _reshape_b_only_on_first_run = true, the reduction kernel can be executed only once

252

if(_is_first_run || !_reshape_b_only_on_first_run)

253

{

254

// Run matrix B reduction kernel only if _a_offset is not equal to 0

255

if(_a_offset != 0)

256

{

257

CLScheduler::get().enqueue(_mtx_b_reduction_kernel, false);

258

}

Gian Marco

2017-11-21 10:57:50 +0000

[diff] [blame]

259

}

260

261

// Run matrix multiply

262

CLScheduler::get().enqueue(_mm_kernel, false);

263

264

// Run matrix A reduction kernel only if _b_offset is not equal to 0

265

if(_b_offset != 0)

266

{

267

CLScheduler::get().enqueue(_mtx_a_reduction_kernel, false);

268

}

269

Gian Marco

2017-11-21 10:57:50 +0000

[diff] [blame]

270

// Run offset contribution kernel

271

CLScheduler::get().enqueue(_offset_contribution_kernel, true);

272

273

_memory_group.release();

Chunosov

2017-11-22 20:42:13 +0700

[diff] [blame]

274

275

_is_first_run = false;

Gian Marco