Blame - arm_compute/runtime/CL/functions/CLQLSTMLayer.h - ml/ComputeLibrary

2020-04-02 17:35:42 +0100

[diff] [blame]

88

* @param[in] lstm_params Weights tensors used in peephole, CIFG and layer normalization optimizations:

89

* input_intermediate_scale Scale of the intermediate result of matmul, i.e. input to layer normalization, at input gate.

90

* forget_intermediate_scale Scale of the intermediate result of matmul, i.e. input to layer normalization, at forget gate.

91

* cell_intermediate_scale Scale of the intermediate result of matmul, i.e. input to layer normalization, at cell gate.

92

* output_intermediate_scale Scale of the intermediate result of matmul, i.e. input to layer normalization, at output gate.

93

* hidden_state_zero The zero point of the hidden state.

94

* hidden_state_scale The scale of the hidden state.

95

* input_to_input_weights (Optional) 2D weights tensor with dimensions [input_size, num_units]. Data type supported: QSYMM8.

96

* recurrent_to_input_weights (Optional) 2D weights tensor with dimensions [output_size, num_units]. Data type supported: QSYMM8.

97

* cell_to_input_weights (Optional) 1D weights tensor with dimensions [num_units]. Can be nullptr. Data type supported: QSYMM16.

98

* cell_to_forget_weights (Optional) 1D weights tensor with dimensions [num_units]. Data type supported: QSYMM16.

99

* cell_to_output_weights (Optional) 1D weights tensor with dimensions [num_units]. Data type supported: QSYMM16.

100

* input_gate_bias (Optional) 1D weights tensor with dimensions [num_units]. Data type supported: S32.

101

* projection_weights (Optional) 2D weights tensor with dimensions [output_size, num_units]. Data type supported: QSYMM8.

102

* projection_bias (Optional) 1D weights tensor with dimensions [output_size]. S32.

103

* input_layer_norm_weights (Optional) 1D weights tensor with dimensions [num_units]. Data type supported: QSYMM16.

104

* forget_layer_norm_weights (Optional) 1D weights tensor with dimensions [num_units]. Data type supported: QSYMM16.

105

* cell_layer_norm_weights (Optional) 1D weights tensor with dimensions [num_units]. Data type supported: QSYMM16.

106

* output_layer_norm_weights (Optional) 1D weights tensor with dimensions [num_units]. Data type supported: QSYMM16.

107

* cell_threshold (Optional) The clipping threshold for the cell state, such that values are bound within [-cell_clip, cell_clip].

108

* If set to 0.0 then clipping is disabled.

109

* projection_threshold (Optional) The clipping threshold for the output from the projection layer, such that values are bound within

110

* [-proj_clip, proj_clip]. If set to 0.0 then clipping is disabled.

111

*/

112

void configure(const ICLTensor *input,

113

const ICLTensor *input_to_forget_weights, const ICLTensor *input_to_cell_weights, const ICLTensor *input_to_output_weights,

114

const ICLTensor *recurrent_to_forget_weights, const ICLTensor *recurrent_to_cell_weights, const ICLTensor *recurrent_to_output_weights,

115

const ICLTensor *forget_gate_bias, const ICLTensor *cell_bias, const ICLTensor *output_gate_bias,

Michalis Spyrou

2020-07-27 12:48:34 +0100

[diff] [blame]

116

ICLTensor *cell_state_in, const ICLTensor *output_state_in,

Michele Di Giorgio

2020-05-11 16:17:51 +0100

[diff] [blame]

117

ICLTensor *cell_state_out, ICLTensor *output_state_out, ICLTensor *output,

Michele Di Giorgio

2020-04-02 17:35:42 +0100

[diff] [blame]

118

const LSTMParams<ICLTensor> &lstm_params);

119

Manuel Bottini

2020-04-08 10:15:51 +0100

[diff] [blame]

120

/** Initialize function's tensors.

121

*

122

* @param[in] compile_context The compile context to be used.

123

* @param[in] input Source tensor. Input is a 2D tensor with dimensions [input_size, batch_size]. Data types supported: QASYMM8_SIGNED.

124

* @param[in] input_to_forget_weights 2D weights tensor with dimensions [input_size, num_units]. Data type supported: QSYMM8.

125

* @param[in] input_to_cell_weights 2D weights tensor with dimensions [input_size, num_units]. Data type supported: QSYMM8.

126

* @param[in] input_to_output_weights 2D weights tensor with dimensions [input_size, num_units]. Data type supported: QSYMM8.

127

* @param[in] recurrent_to_forget_weights 2D weights tensor with dimensions [output_size, num_units]. Data type supported: QSYMM8.

128

* @param[in] recurrent_to_cell_weights 2D weights tensor with dimensions [output_size, num_units]. Data type supported: QSYMM8.

129

* @param[in] recurrent_to_output_weights 2D weights tensor with dimensions [output_size, num_units]. Data type supported: QSYMM8.

130

* @param[in] forget_gate_bias 1D weights tensor with dimensions [num_units]. Data type supported: S32.

131

* @param[in] cell_bias 1D weights tensor with dimensions [num_units]. Data type supported: S32.

132

* @param[in] output_gate_bias 1D weights tensor with dimensions [num_units]. Data type supported: S32.

Michele Di Giorgio

2020-05-11 16:17:51 +0100

[diff] [blame]

133

* @param[in] cell_state_in 2D tensor with dimensions [num_units, batch_size]. Data type supported: QSYMM16.

134

* @param[in] output_state_in 2D tensor with dimensions [output_size, batch_size]. Data type supported: Same as @p input.

135

* @param[out] cell_state_out Destination tensor. Output is a 2D tensor with dimensions [num_units, batch_size]. Data type supported: QSYMM16.

136

* @param[out] output_state_out Destination tensor. Output is a 2D tensor with dimensions [output_size, batch_size].Data types supported: Same as @p input.

137

* @param[out] output Destination tensor. Output is a 2D tensor with dimensions [output_size, batch_size].Data types supported: Same as @p input.

Manuel Bottini

2020-04-08 10:15:51 +0100

[diff] [blame]

138

* @param[in] lstm_params Weights tensors used in peephole, CIFG and layer normalization optimizations:

139

* input_intermediate_scale Scale of the intermediate result of matmul, i.e. input to layer normalization, at input gate.

140

* forget_intermediate_scale Scale of the intermediate result of matmul, i.e. input to layer normalization, at forget gate.

141

* cell_intermediate_scale Scale of the intermediate result of matmul, i.e. input to layer normalization, at cell gate.

142

* output_intermediate_scale Scale of the intermediate result of matmul, i.e. input to layer normalization, at output gate.

143

* hidden_state_zero The zero point of the hidden state.

144

* hidden_state_scale The scale of the hidden state.

145

* input_to_input_weights (Optional) 2D weights tensor with dimensions [input_size, num_units]. Data type supported: QSYMM8.

146

* recurrent_to_input_weights (Optional) 2D weights tensor with dimensions [output_size, num_units]. Data type supported: QSYMM8.

147

* cell_to_input_weights (Optional) 1D weights tensor with dimensions [num_units]. Can be nullptr. Data type supported: QSYMM16.

148

* cell_to_forget_weights (Optional) 1D weights tensor with dimensions [num_units]. Data type supported: QSYMM16.

149

* cell_to_output_weights (Optional) 1D weights tensor with dimensions [num_units]. Data type supported: QSYMM16.

150

* input_gate_bias (Optional) 1D weights tensor with dimensions [num_units]. Data type supported: S32.

151

* projection_weights (Optional) 2D weights tensor with dimensions [output_size, num_units]. Data type supported: QSYMM8.

152

* projection_bias (Optional) 1D weights tensor with dimensions [output_size]. S32.

153

* input_layer_norm_weights (Optional) 1D weights tensor with dimensions [num_units]. Data type supported: QSYMM16.

154

* forget_layer_norm_weights (Optional) 1D weights tensor with dimensions [num_units]. Data type supported: QSYMM16.

155

* cell_layer_norm_weights (Optional) 1D weights tensor with dimensions [num_units]. Data type supported: QSYMM16.

156

* output_layer_norm_weights (Optional) 1D weights tensor with dimensions [num_units]. Data type supported: QSYMM16.

157

* cell_threshold (Optional) The clipping threshold for the cell state, such that values are bound within [-cell_clip, cell_clip].

158

* If set to 0.0 then clipping is disabled.

159

* projection_threshold (Optional) The clipping threshold for the output from the projection layer, such that values are bound within

160

* [-proj_clip, proj_clip]. If set to 0.0 then clipping is disabled.

161

*/

162

void configure(const CLCompileContext &compile_context, const ICLTensor *input,

163

const ICLTensor *input_to_forget_weights, const ICLTensor *input_to_cell_weights, const ICLTensor *input_to_output_weights,

164

const ICLTensor *recurrent_to_forget_weights, const ICLTensor *recurrent_to_cell_weights, const ICLTensor *recurrent_to_output_weights,

165

const ICLTensor *forget_gate_bias, const ICLTensor *cell_bias, const ICLTensor *output_gate_bias,

Michalis Spyrou

2020-07-27 12:48:34 +0100

[diff] [blame]

166

ICLTensor *cell_state_in, const ICLTensor *output_state_in,

Michele Di Giorgio

2020-05-11 16:17:51 +0100

[diff] [blame]

167

ICLTensor *cell_state_out, ICLTensor *output_state_out, ICLTensor *output,

Manuel Bottini

2020-04-08 10:15:51 +0100

[diff] [blame]

168

const LSTMParams<ICLTensor> &lstm_params);

169

Michele Di Giorgio

2020-04-02 17:35:42 +0100

[diff] [blame]

170

/** Static function to check if given info will lead to a valid configuration of @ref CLQLSTMLayer

171

*

Michele Di Giorgio

2020-05-11 16:17:51 +0100

[diff] [blame]

172

* @param[in] input Source tensor info. Input is a 2D tensor info with dimensions [input_size, batch_size]. Data types supported: QASYMM8_SIGNED.

173

* @param[in] input_to_forget_weights 2D weights tensor info with dimensions [input_size, num_units]. Data type supported: QSYMM8.

174

* @param[in] input_to_cell_weights 2D weights tensor info with dimensions [input_size, num_units]. Data type supported: QSYMM8.

175

* @param[in] input_to_output_weights 2D weights tensor info with dimensions [input_size, num_units]. Data type supported: QSYMM8.

176

* @param[in] recurrent_to_forget_weights 2D weights tensor info with dimensions [output_size, num_units]. Data type supported: QSYMM8.

177

* @param[in] recurrent_to_cell_weights 2D weights tensor info with dimensions [output_size, num_units]. Data type supported: QSYMM8.

178

* @param[in] recurrent_to_output_weights 2D weights tensor info with dimensions [output_size, num_units]. Data type supported: QSYMM8.

179

* @param[in] forget_gate_bias 1D weights tensor info with dimensions [num_units]. Data type supported: S32.

180

* @param[in] cell_bias 1D weights tensor info with dimensions [num_units]. Data type supported: S32.

181

* @param[in] output_gate_bias 1D weights tensor info with dimensions [num_units]. Data type supported: S32.

182

* @param[in] cell_state_in 2D tensor info with dimensions [num_units, batch_size]. Data type supported: QSYMM16.

183

* @param[in] output_state_in 2D tensor info with dimensions [output_size, batch_size]. Data type supported: Same as @p input.

184

* @param[in] cell_state_out Destination tensor info. Output is a 2D tensor info with dimensions [num_units, batch_size]. Data type supported: QSYMM16.

185

* @param[in] output_state_out Destination tensor info. Output is a 2D tensor info with dimensions [output_size, batch_size].Data types supported: Same as @p input.

186

* @param[in] output Destination tensor info. Output is a 2D tensor info with dimensions [output_size, batch_size].Data types supported: Same as @p input.

187

* @param[in] lstm_params Weights tensors info used in peephole, CIFG and layer normalization optimizations:

188

* input_intermediate_scale Scale of the intermediate result of matmul, i.e. input to layer normalization, at input gate.

189

* forget_intermediate_scale Scale of the intermediate result of matmul, i.e. input to layer normalization, at forget gate.

190

* cell_intermediate_scale Scale of the intermediate result of matmul, i.e. input to layer normalization, at cell gate.

191

* output_intermediate_scale Scale of the intermediate result of matmul, i.e. input to layer normalization, at output gate.

192

* hidden_state_zero The zero point of the hidden state.

193

* hidden_state_scale The scale of the hidden state.

194

* input_to_input_weights (Optional) 2D weights tensor with dimensions [input_size, num_units]. Data type supported: QSYMM8.

195

* recurrent_to_input_weights (Optional) 2D weights tensor with dimensions [output_size, num_units]. Data type supported: QSYMM8.

196

* cell_to_input_weights (Optional) 1D weights tensor with dimensions [num_units]. Can be nullptr. Data type supported: QSYMM16.

197

* cell_to_forget_weights (Optional) 1D weights tensor with dimensions [num_units]. Data type supported: QSYMM16.

198

* cell_to_output_weights (Optional) 1D weights tensor with dimensions [num_units]. Data type supported: QSYMM16.

199

* input_gate_bias (Optional) 1D weights tensor with dimensions [num_units]. Data type supported: S32.

200

* projection_weights (Optional) 2D weights tensor with dimensions [output_size, num_units]. Data type supported: QSYMM8.

201

* projection_bias (Optional) 1D weights tensor with dimensions [output_size]. S32.

202

* input_layer_norm_weights (Optional) 1D weights tensor with dimensions [num_units]. Data type supported: QSYMM16.

203

* forget_layer_norm_weights (Optional) 1D weights tensor with dimensions [num_units]. Data type supported: QSYMM16.

204

* cell_layer_norm_weights (Optional) 1D weights tensor with dimensions [num_units]. Data type supported: QSYMM16.

205

* output_layer_norm_weights (Optional) 1D weights tensor with dimensions [num_units]. Data type supported: QSYMM16.

206

* cell_threshold (Optional) The clipping threshold for the cell state, such that values are bound within [-cell_clip, cell_clip].

207

* If set to 0.0 then clipping is disabled.

208

* projection_threshold (Optional) The clipping threshold for the output from the projection layer, such that values are bound within

209

* [-proj_clip, proj_clip]. If set to 0.0 then clipping is disabled.

Michele Di Giorgio

2020-04-02 17:35:42 +0100

[diff] [blame]

210

* @return a status

211

*/

212

static Status validate(const ITensorInfo *input,

213

const ITensorInfo *input_to_forget_weights, const ITensorInfo *input_to_cell_weights, const ITensorInfo *input_to_output_weights,

214

const ITensorInfo *recurrent_to_forget_weights, const ITensorInfo *recurrent_to_cell_weights, const ITensorInfo *recurrent_to_output_weights,

215

const ITensorInfo *forget_gate_bias, const ITensorInfo *cell_bias, const ITensorInfo *output_gate_bias,

216

const ITensorInfo *cell_state_in, const ITensorInfo *output_state_in,

Michele Di Giorgio

2020-05-11 16:17:51 +0100

[diff] [blame]

217

const ITensorInfo *cell_state_out, const ITensorInfo *output_state_out, const ITensorInfo *output,

Michele Di Giorgio

2020-04-02 17:35:42 +0100

[diff] [blame]

218

const LSTMParams<ITensorInfo> &lstm_params);

219

220

// Inherited methods overridden:

221

void run() override;

222

void prepare() override;

223

224

private:

Sheri Zhang

2020-04-21 13:10:24 +0100

[diff] [blame]

225

enum class LayerNormGate : uint8_t

{

Forget,

Cell,

Input,

Output,

Count

};

Sang-Hoon Park

2020-05-12 11:13:30 +0100

[diff] [blame]

233

static constexpr uint8_t _layer_norm_count = static_cast<uint8_t>(LayerNormGate::Count);

234

static constexpr uint32_t _out_state_output_size_dimension_idx = 0;

Sheri Zhang

2020-04-21 13:10:24 +0100

[diff] [blame]

235

Michele Di Giorgio

2020-04-02 17:35:42 +0100

[diff] [blame]

236

/** Internal method to configure matrix multiplication plus output stage of each gate.

237

*

Manuel Bottini

2020-04-08 10:15:51 +0100

[diff] [blame]

238

* @param[in] compile_context The compile context to be used.

239

* @param[in] mm Matrix multiplication function to use.

240

* @param[in] outstage Output stage function to use.

241

* @param[in] gemmlowp_info GEMMLowp metadata to be used by the output stage.

242

* @param[in] mm_input Input tensor to matrix multiplication function.

243

* @param[in] mm_weights Weights tensor to matrix multiplication function.

244

* @param[in] bias Bias tensor to matrix multiplication function.

245

* @param[in] outstage_res Tensor to be used for storing the result of the output stage.

246

* @param[in] gemmlowp_scale Real multiplier to be used computing multiplier and shift for requantization.

247

* @param[in] mm_res_info Tensor info to be used to initialize matrix multiplication result tensor.

248

* @param[in] mm_res_info Tensor info to be used to initialize output stage result tensor.

Michele Di Giorgio

2020-04-02 17:35:42 +0100

[diff] [blame]

249

*

250

*/

Manuel Bottini

2020-04-08 10:15:51 +0100

[diff] [blame]

251

void configure_mm(const CLCompileContext &compile_context, CLGEMMLowpMatrixMultiplyCore &mm, CLGEMMLowpOutputStage &outstage, GEMMLowpOutputStageInfo &gemmlowp_info,

Michele Di Giorgio

2020-04-02 17:35:42 +0100

[diff] [blame]

252

const ICLTensor *mm_input, const ICLTensor *mm_weights, const ICLTensor *bias, CLTensor *mm_res,

253

CLTensor *outstage_res, float gemmlowp_scale,

254

const TensorInfo &mm_res_info, const TensorInfo &outstage_tensor_info);

255

256

MemoryGroup _memory_group{};

257

Sang-Hoon Park

2020-05-12 11:13:30 +0100

[diff] [blame]

258

/** A small internel kernel do the copy between two tensors */

259

class TensorCopyKernel

260

{

261

static constexpr uint32_t max_dimension_supported = 2;

262

263

ICLTensor *_src{ nullptr };

264

ICLTensor *_dst{ nullptr };

size_t _row_size{};

Window _window{};

public:

/** Static function to check if given info will lead to a valid configuration of @ref CLQLSTMLayer::TensorCopyKernel

270

*

271

* @param[in] src Source tensor info.

272

* @param[in] dst Destination tensor info

*

* @return a status

*/

static Status validate(const ITensorInfo &src, const ITensorInfo &dst);

277

/** Set the input and output tensors.

278

*

279

* @param[in] src Source tensor

280

* @param[out] dst Destination tensor

281

*/

282

void configure(ICLTensor &src, ICLTensor &dst);

283

/** run the kernel */

void run();

};

Michele Di Giorgio

2020-04-02 17:35:42 +0100

[diff] [blame]

287

// Functions used

Michalis Spyrou

2020-07-24 00:02:23 +0100

[diff] [blame]

288

CLTranspose _transpose_input_to_forget_weights{};

289

CLTranspose _transpose_input_to_cell_weights{};

290

CLTranspose _transpose_input_to_output_weights{};

291

CLTranspose _transpose_input_to_input_weights{};

292

CLTranspose _transpose_recurrent_to_forget_weights{};

293

CLTranspose _transpose_recurrent_to_cell_weights{};

294

CLTranspose _transpose_recurrent_to_output_weights{};

295

CLTranspose _transpose_recurrent_to_input_weights{};

296

CLTranspose _transpose_projection_weights{};

297

CLGEMMLowpMatrixAReductionKernel _input_to_input_reduction{};

298

CLGEMMLowpMatrixAReductionKernel _recurrent_to_input_reduction{};

299

CLGEMMLowpMatrixAReductionKernel _input_to_forget_reduction{};

300

CLGEMMLowpMatrixAReductionKernel _recurrent_to_forget_reduction{};

301

CLGEMMLowpMatrixAReductionKernel _input_to_cell_reduction{};

302

CLGEMMLowpMatrixAReductionKernel _recurrent_to_cell_reduction{};

303

CLGEMMLowpMatrixAReductionKernel _input_to_output_reduction{};

304

CLGEMMLowpMatrixAReductionKernel _recurrent_to_output_reduction{};

305

CLGEMMLowpMatrixAReductionKernel _projection_reduction{};

306

CLArithmeticAddition _projection_bias_add{};

307

CLGEMMLowpMatrixMultiplyCore _mm_input_to_forget{};

308

CLGEMMLowpMatrixMultiplyCore _mm_recurrent_to_forget{};

Michalis Spyrou

2020-07-27 12:48:34 +0100

[diff] [blame]

309

CLPixelWiseMultiplication _pixelwise_mul_cell_to_forget{};

Michalis Spyrou

2020-07-24 00:02:23 +0100

[diff] [blame]

310

CLGEMMLowpOutputStage _input_to_forget_outstage{};

311

CLGEMMLowpOutputStage _recurrent_to_forget_outstage{};

312

CLGEMMLowpOutputStage _cell_to_forget_outstage{};

313

CLArithmeticAddition _accumulate_input_recurrent_forget{};

314

CLArithmeticAddition _accumulate_cell_forget{};

315

CLActivationLayer _forget_gate_sigmoid{};

316

CLGEMMLowpMatrixMultiplyCore _mm_input_to_cell{};

317

CLGEMMLowpOutputStage _input_to_cell_outstage{};

318

CLGEMMLowpMatrixMultiplyCore _mm_recurrent_to_cell{};

319

CLGEMMLowpOutputStage _recurrent_to_cell_outstage{};

320

CLArithmeticAddition _accumulate_input_recurrent_modulation{};

321

CLActivationLayer _cell_gate_tanh{};

322

CLArithmeticSubtraction _input_gate_sub{};

323

CLGEMMLowpMatrixMultiplyCore _mm_input_to_input{};

324

CLGEMMLowpOutputStage _input_to_input_outstage{};

325

CLGEMMLowpMatrixMultiplyCore _mm_recurrent_to_input{};

326

CLGEMMLowpOutputStage _recurrent_to_input_outstage{};

327

CLArithmeticAddition _accumulate_input_recurrent_input{};

Michalis Spyrou

2020-07-27 12:48:34 +0100

[diff] [blame]

328

CLPixelWiseMultiplication _pixelwise_mul_cell_to_input{};

Michalis Spyrou

2020-07-24 00:02:23 +0100

[diff] [blame]

329

CLGEMMLowpOutputStage _cell_to_input_outstage{};

330

CLArithmeticAddition _accumulate_cell_input{};

331

CLActivationLayer _input_gate_sigmoid{};

Michalis Spyrou

2020-07-27 12:48:34 +0100

[diff] [blame]

332

CLPixelWiseMultiplication _pixelwise_mul_forget_cell{};

333

CLPixelWiseMultiplication _pixelwise_mul_input_cell{};

Michalis Spyrou

2020-07-24 00:02:23 +0100

[diff] [blame]

334

CLArithmeticAddition _add_forget_cell{};

335

CLActivationLayer _cell_clip{};

336

CLGEMMLowpMatrixMultiplyCore _mm_input_to_output{};

337

CLGEMMLowpOutputStage _input_to_output_outstage{};

338

CLGEMMLowpMatrixMultiplyCore _mm_recurrent_to_output{};

339

CLGEMMLowpOutputStage _recurrent_to_output_outstage{};

340

CLArithmeticAddition _accumulate_input_recurrent_output{};

Michalis Spyrou

2020-07-27 12:48:34 +0100

[diff] [blame]

341

CLPixelWiseMultiplication _pixelwise_mul_cell_to_output{};

Michalis Spyrou

2020-07-24 00:02:23 +0100

[diff] [blame]

342

CLGEMMLowpOutputStage _cell_to_output_outstage{};

343

CLArithmeticAddition _accumulate_cell_to_output{};

344

CLActivationLayer _output_gate_sigmoid{};

345

CLActivationLayer _hidden_tanh{};

Michalis Spyrou

2020-07-27 12:48:34 +0100

[diff] [blame]

346

CLPixelWiseMultiplication _pixelwise_mul_hidden{};

Michalis Spyrou

2020-07-24 00:02:23 +0100

[diff] [blame]

347

CLGEMMLowpOutputStage _hidden_outstage{};

348

CLGEMMLowpMatrixMultiplyCore _mm_projection{};

349

CLGEMMLowpOutputStage _projection_outstage{};

350

CLArithmeticAddition _accumulate_projection{};

351

CLActivationLayer _projection_clip{};

Sheri Zhang

2020-04-21 13:10:24 +0100

[diff] [blame]

352

std::array<CLQLSTMLayerNormalizationKernel, _layer_norm_count> _layer_norms{ {} };

Michele Di Giorgio

2020-05-11 16:17:51 +0100

[diff] [blame]

353

CLCopyKernel _copy_output{};

Michele Di Giorgio

2020-04-02 17:35:42 +0100

[diff] [blame]

354

Sang-Hoon Park

2020-05-12 11:13:30 +0100

[diff] [blame]

355

TensorCopyKernel _projection_bias_copy{};

356

TensorCopyKernel _projection_output_to_accumulate_copy{};

357

TensorCopyKernel _projection_accumulate_to_output_copy{};

358

TensorCopyKernel _hidden_to_output_copy{};

359

Michele Di Giorgio

2020-04-02 17:35:42 +0100

[diff] [blame]

360

// Tensor pointers

Michalis Spyrou

2020-07-24 00:02:23 +0100

[diff] [blame]

361

const ICLTensor *_input_to_input_weights

362

{

363

nullptr

364

};

Michele Di Giorgio

2020-04-02 17:35:42 +0100

[diff] [blame]

365

const ICLTensor *_recurrent_to_input_weights{ nullptr };

366

const ICLTensor *_projection_bias{ nullptr };

367

const ICLTensor *_input_to_forget_weights{ nullptr };

368

const ICLTensor *_input_to_cell_weights{ nullptr };

369

const ICLTensor *_input_to_output_weights{ nullptr };

370

const ICLTensor *_recurrent_to_forget_weights{ nullptr };

371

const ICLTensor *_recurrent_to_cell_weights{ nullptr };

372

const ICLTensor *_recurrent_to_output_weights{ nullptr };

373

const ICLTensor *_projection_weights{ nullptr };

Sheri Zhang

2020-04-21 13:10:24 +0100

[diff] [blame]

374

std::array<const ICLTensor *, _layer_norm_count> _layer_norm_weights{ {} };

375

std::array<const ICLTensor *, _layer_norm_count> _layer_norm_bias{ {} };

376

377

using LayerNormIndexType = typename std::underlying_type<LayerNormGate>::type;

378

inline LayerNormIndexType getGateIndex(LayerNormGate g)

379

{

380

return static_cast<LayerNormIndexType>(g);

381

}

382

383

inline void set_layer_norm_weight(const ICLTensor *t, LayerNormGate g)

384

{

385

_layer_norm_weights[getGateIndex(g)] = t;

386

}

387

388

inline void set_layer_norm_bias(const ICLTensor *t, LayerNormGate g)

389

{

390

_layer_norm_bias[getGateIndex(g)] = t;

391

}

392

393

inline const ICLTensor *get_layer_norm_weight(LayerNormGate g)

394

{

395

return _layer_norm_weights[getGateIndex(g)];

396

}

397

398

inline const ICLTensor *get_layer_norm_bias(LayerNormGate g)

399

{

400

return _layer_norm_bias[getGateIndex(g)];

401

}

402

403

inline CLQLSTMLayerNormalizationKernel &get_layer_norm(LayerNormGate g)

404

{

405

return _layer_norms[getGateIndex(g)];

406

}

407

408

inline void configure_layer_norm(LayerNormGate g, const ICLTensor *in)

409

{

410

ARM_COMPUTE_ERROR_ON(!_has_layer_norm);

411

412

CLTensor *out = &get_layer_norm_output(g);

413

_memory_group.manage(out);

414

out->allocator()->init(*(in->info()));

415

416

get_layer_norm(g).configure(in, out, get_layer_norm_weight(g), get_layer_norm_bias(g));

417

}

418

419

inline static Status validate_layer_norm(const ITensorInfo &in, const ITensorInfo &weight, const ITensorInfo &bias)

420

{

421

// Output quantization scale will be different, but ignored here

422

// since it will be configured at configure() stage.

const TensorInfo out

{

in

};

return CLQLSTMLayerNormalizationKernel::validate(&in, &out, &weight, &bias);

428

}

Michele Di Giorgio

2020-04-02 17:35:42 +0100

[diff] [blame]

429

430

// Temporary tensors

431

CLTensor _input_to_forget_weights_transposed{ nullptr };

432

CLTensor _input_to_cell_weights_transposed{ nullptr };

433

CLTensor _input_to_output_weights_transposed{ nullptr };

434

CLTensor _input_to_input_weights_transposed{ nullptr };

435

CLTensor _recurrent_to_forget_weights_transposed{ nullptr };

436

CLTensor _recurrent_to_cell_weights_transposed{ nullptr };

437

CLTensor _recurrent_to_output_weights_transposed{ nullptr };

438

CLTensor _recurrent_to_input_weights_transposed{ nullptr };

439

CLTensor _projection_weights_transposed{ nullptr };

440

CLTensor _input_to_input_eff_bias{ nullptr };

441

CLTensor _recurrent_to_input_eff_bias{ nullptr };

442

CLTensor _input_to_forget_eff_bias{ nullptr };

443

CLTensor _recurrent_to_forget_eff_bias{ nullptr };

444

CLTensor _input_to_cell_eff_bias{ nullptr };

445

CLTensor _recurrent_to_cell_eff_bias{ nullptr };

446

CLTensor _input_to_output_eff_bias{ nullptr };

447

CLTensor _recurrent_to_output_eff_bias{ nullptr };

448

CLTensor _projection_reduction_res{ nullptr };

449

CLTensor _projection_eff_bias{ nullptr };

450

CLTensor _mm_input_to_forget_res{ nullptr };

451

CLTensor _mm_recurrent_to_forget_res{ nullptr };

452

CLTensor _mul_cell_to_forget_res{ nullptr };

453

CLTensor _input_to_forget_outstage_res{ nullptr };

454

CLTensor _cell_to_forget_outstage_res{ nullptr };

455

CLTensor _recurrent_to_forget_outstage_res{ nullptr };

456

CLTensor _forget_gate{ nullptr };

457

CLTensor _mm_input_to_cell_res{ nullptr };

458

CLTensor _input_to_cell_outstage_res{ nullptr };

459

CLTensor _mm_recurrent_to_cell_res{ nullptr };

460

CLTensor _recurrent_to_cell_outstage_res{ nullptr };

461

CLTensor _cell_gate{ nullptr };

462

CLTensor _mul_input_cell_res{ nullptr };

463

CLTensor _mm_input_to_input_res{ nullptr };

464

CLTensor _input_to_input_outstage_res{ nullptr };

465

CLTensor _mm_recurrent_to_input_res{ nullptr };

466

CLTensor _mul_cell_to_input_res{ nullptr };

467

CLTensor _cell_to_input_outstage_res{ nullptr };

468

CLTensor _recurrent_to_input_outstage_res{ nullptr };

469

CLTensor _input_gate{ nullptr };

470

CLTensor _mm_input_to_output_res{ nullptr };

471

CLTensor _input_to_output_outstage_res{ nullptr };

472

CLTensor _mm_recurrent_to_output_res{ nullptr };

473

CLTensor _mul_cell_to_output_res{ nullptr };

Sang-Hoon Park

2020-05-12 11:13:30 +0100

[diff] [blame]

474

CLTensor _cell_to_output_outstage_res{ nullptr };

Michele Di Giorgio

2020-04-02 17:35:42 +0100

[diff] [blame]

475

CLTensor _recurrent_to_output_outstage_res{ nullptr };

476

CLTensor _output_gate{ nullptr };

477

CLTensor _hidden_mul_res{ nullptr };

Sang-Hoon Park

2020-05-12 11:13:30 +0100

[diff] [blame]

478

CLTensor _hidden_gate{ nullptr };

Michele Di Giorgio

2020-04-02 17:35:42 +0100

[diff] [blame]

479

CLTensor _mm_projection_res{ nullptr };

480

CLTensor _projection_outstage_res{ nullptr };

Sang-Hoon Park

2020-05-12 11:13:30 +0100

[diff] [blame]

481

CLTensor _projection_out_res{ nullptr };

482

CLTensor _projection_accumulate_res{ nullptr };

Michele Di Giorgio

2020-04-02 17:35:42 +0100

[diff] [blame]

483

CLTensor _ones{ nullptr };

Sheri Zhang

2020-04-21 13:10:24 +0100

[diff] [blame]

484

std::array<CLTensor, _layer_norm_count> _layer_norm_output{ {} };

485

486

inline CLTensor &get_layer_norm_output(LayerNormGate g)

487

{

488

return _layer_norm_output[getGateIndex(g)];

489

}

Michele Di Giorgio

2020-04-02 17:35:42 +0100

[diff] [blame]

490

491

bool _is_prepared{ false };

492

bool _has_cifg{ false };

493

bool _has_cell_clipping{ false };

494

bool _has_projection{ false };

495

bool _has_projection_clipping{ false };

496

bool _has_peephole{ false };

Sheri Zhang

2020-04-21 13:10:24 +0100

[diff] [blame]

497

bool _has_layer_norm{ false };

Sang-Hoon Park

2020-05-12 11:13:30 +0100

[diff] [blame]

498

bool _projection_tensor_copy_required{ false };

Michele Di Giorgio