Blame - arm_compute/runtime/CL/functions/CLQLSTMLayer.h - ml/ComputeLibrary

2020-04-02 17:35:42 +0100

[diff] [blame]

92

* @param[in] lstm_params Weights tensors used in peephole, CIFG and layer normalization optimizations:

93

* input_intermediate_scale Scale of the intermediate result of matmul, i.e. input to layer normalization, at input gate.

94

* forget_intermediate_scale Scale of the intermediate result of matmul, i.e. input to layer normalization, at forget gate.

95

* cell_intermediate_scale Scale of the intermediate result of matmul, i.e. input to layer normalization, at cell gate.

96

* output_intermediate_scale Scale of the intermediate result of matmul, i.e. input to layer normalization, at output gate.

97

* hidden_state_zero The zero point of the hidden state.

98

* hidden_state_scale The scale of the hidden state.

99

* input_to_input_weights (Optional) 2D weights tensor with dimensions [input_size, num_units]. Data type supported: QSYMM8.

100

* recurrent_to_input_weights (Optional) 2D weights tensor with dimensions [output_size, num_units]. Data type supported: QSYMM8.

101

* cell_to_input_weights (Optional) 1D weights tensor with dimensions [num_units]. Can be nullptr. Data type supported: QSYMM16.

102

* cell_to_forget_weights (Optional) 1D weights tensor with dimensions [num_units]. Data type supported: QSYMM16.

103

* cell_to_output_weights (Optional) 1D weights tensor with dimensions [num_units]. Data type supported: QSYMM16.

104

* input_gate_bias (Optional) 1D weights tensor with dimensions [num_units]. Data type supported: S32.

105

* projection_weights (Optional) 2D weights tensor with dimensions [output_size, num_units]. Data type supported: QSYMM8.

106

* projection_bias (Optional) 1D weights tensor with dimensions [output_size]. S32.

107

* input_layer_norm_weights (Optional) 1D weights tensor with dimensions [num_units]. Data type supported: QSYMM16.

108

* forget_layer_norm_weights (Optional) 1D weights tensor with dimensions [num_units]. Data type supported: QSYMM16.

109

* cell_layer_norm_weights (Optional) 1D weights tensor with dimensions [num_units]. Data type supported: QSYMM16.

110

* output_layer_norm_weights (Optional) 1D weights tensor with dimensions [num_units]. Data type supported: QSYMM16.

111

* cell_threshold (Optional) The clipping threshold for the cell state, such that values are bound within [-cell_clip, cell_clip].

112

* If set to 0.0 then clipping is disabled.

113

* projection_threshold (Optional) The clipping threshold for the output from the projection layer, such that values are bound within

114

* [-proj_clip, proj_clip]. If set to 0.0 then clipping is disabled.

115

*/

116

void configure(const ICLTensor *input,

117

const ICLTensor *input_to_forget_weights, const ICLTensor *input_to_cell_weights, const ICLTensor *input_to_output_weights,

118

const ICLTensor *recurrent_to_forget_weights, const ICLTensor *recurrent_to_cell_weights, const ICLTensor *recurrent_to_output_weights,

119

const ICLTensor *forget_gate_bias, const ICLTensor *cell_bias, const ICLTensor *output_gate_bias,

Sang-Hoon Park

840a72c

2020-09-23 13:24:13 +0100

[diff] [blame]

120

ICLTensor *cell_state_in, ICLTensor *output_state_in,

Michele Di Giorgio

2020-05-11 16:17:51 +0100

[diff] [blame]

121

ICLTensor *cell_state_out, ICLTensor *output_state_out, ICLTensor *output,

Michele Di Giorgio

2020-04-02 17:35:42 +0100

[diff] [blame]

122

const LSTMParams<ICLTensor> &lstm_params);

123

Manuel Bottini

2020-04-08 10:15:51 +0100

[diff] [blame]

124

/** Initialize function's tensors.

125

*

126

* @param[in] compile_context The compile context to be used.

127

* @param[in] input Source tensor. Input is a 2D tensor with dimensions [input_size, batch_size]. Data types supported: QASYMM8_SIGNED.

128

* @param[in] input_to_forget_weights 2D weights tensor with dimensions [input_size, num_units]. Data type supported: QSYMM8.

129

* @param[in] input_to_cell_weights 2D weights tensor with dimensions [input_size, num_units]. Data type supported: QSYMM8.

130

* @param[in] input_to_output_weights 2D weights tensor with dimensions [input_size, num_units]. Data type supported: QSYMM8.

131

* @param[in] recurrent_to_forget_weights 2D weights tensor with dimensions [output_size, num_units]. Data type supported: QSYMM8.

132

* @param[in] recurrent_to_cell_weights 2D weights tensor with dimensions [output_size, num_units]. Data type supported: QSYMM8.

133

* @param[in] recurrent_to_output_weights 2D weights tensor with dimensions [output_size, num_units]. Data type supported: QSYMM8.

134

* @param[in] forget_gate_bias 1D weights tensor with dimensions [num_units]. Data type supported: S32.

135

* @param[in] cell_bias 1D weights tensor with dimensions [num_units]. Data type supported: S32.

136

* @param[in] output_gate_bias 1D weights tensor with dimensions [num_units]. Data type supported: S32.

Michele Di Giorgio

2020-05-11 16:17:51 +0100

[diff] [blame]

137

* @param[in] cell_state_in 2D tensor with dimensions [num_units, batch_size]. Data type supported: QSYMM16.

138

* @param[in] output_state_in 2D tensor with dimensions [output_size, batch_size]. Data type supported: Same as @p input.

139

* @param[out] cell_state_out Destination tensor. Output is a 2D tensor with dimensions [num_units, batch_size]. Data type supported: QSYMM16.

140

* @param[out] output_state_out Destination tensor. Output is a 2D tensor with dimensions [output_size, batch_size].Data types supported: Same as @p input.

141

* @param[out] output Destination tensor. Output is a 2D tensor with dimensions [output_size, batch_size].Data types supported: Same as @p input.

Manuel Bottini

2020-04-08 10:15:51 +0100

[diff] [blame]

142

* @param[in] lstm_params Weights tensors used in peephole, CIFG and layer normalization optimizations:

143

* input_intermediate_scale Scale of the intermediate result of matmul, i.e. input to layer normalization, at input gate.

144

* forget_intermediate_scale Scale of the intermediate result of matmul, i.e. input to layer normalization, at forget gate.

145

* cell_intermediate_scale Scale of the intermediate result of matmul, i.e. input to layer normalization, at cell gate.

146

* output_intermediate_scale Scale of the intermediate result of matmul, i.e. input to layer normalization, at output gate.

147

* hidden_state_zero The zero point of the hidden state.

148

* hidden_state_scale The scale of the hidden state.

149

* input_to_input_weights (Optional) 2D weights tensor with dimensions [input_size, num_units]. Data type supported: QSYMM8.

150

* recurrent_to_input_weights (Optional) 2D weights tensor with dimensions [output_size, num_units]. Data type supported: QSYMM8.

151

* cell_to_input_weights (Optional) 1D weights tensor with dimensions [num_units]. Can be nullptr. Data type supported: QSYMM16.

152

* cell_to_forget_weights (Optional) 1D weights tensor with dimensions [num_units]. Data type supported: QSYMM16.

153

* cell_to_output_weights (Optional) 1D weights tensor with dimensions [num_units]. Data type supported: QSYMM16.

154

* input_gate_bias (Optional) 1D weights tensor with dimensions [num_units]. Data type supported: S32.

155

* projection_weights (Optional) 2D weights tensor with dimensions [output_size, num_units]. Data type supported: QSYMM8.

156

* projection_bias (Optional) 1D weights tensor with dimensions [output_size]. S32.

157

* input_layer_norm_weights (Optional) 1D weights tensor with dimensions [num_units]. Data type supported: QSYMM16.

158

* forget_layer_norm_weights (Optional) 1D weights tensor with dimensions [num_units]. Data type supported: QSYMM16.

159

* cell_layer_norm_weights (Optional) 1D weights tensor with dimensions [num_units]. Data type supported: QSYMM16.

160

* output_layer_norm_weights (Optional) 1D weights tensor with dimensions [num_units]. Data type supported: QSYMM16.

161

* cell_threshold (Optional) The clipping threshold for the cell state, such that values are bound within [-cell_clip, cell_clip].

162

* If set to 0.0 then clipping is disabled.

163

* projection_threshold (Optional) The clipping threshold for the output from the projection layer, such that values are bound within

164

* [-proj_clip, proj_clip]. If set to 0.0 then clipping is disabled.

165

*/

166

void configure(const CLCompileContext &compile_context, const ICLTensor *input,

167

const ICLTensor *input_to_forget_weights, const ICLTensor *input_to_cell_weights, const ICLTensor *input_to_output_weights,

168

const ICLTensor *recurrent_to_forget_weights, const ICLTensor *recurrent_to_cell_weights, const ICLTensor *recurrent_to_output_weights,

169

const ICLTensor *forget_gate_bias, const ICLTensor *cell_bias, const ICLTensor *output_gate_bias,

Sang-Hoon Park

840a72c

2020-09-23 13:24:13 +0100

[diff] [blame]

170

ICLTensor *cell_state_in, ICLTensor *output_state_in,

Michele Di Giorgio

2020-05-11 16:17:51 +0100

[diff] [blame]

171

ICLTensor *cell_state_out, ICLTensor *output_state_out, ICLTensor *output,

Manuel Bottini

2020-04-08 10:15:51 +0100

[diff] [blame]

172

const LSTMParams<ICLTensor> &lstm_params);

173

Michele Di Giorgio

2020-04-02 17:35:42 +0100

[diff] [blame]

174

/** Static function to check if given info will lead to a valid configuration of @ref CLQLSTMLayer

175

*

Michele Di Giorgio

2020-05-11 16:17:51 +0100

[diff] [blame]

176

* @param[in] input Source tensor info. Input is a 2D tensor info with dimensions [input_size, batch_size]. Data types supported: QASYMM8_SIGNED.

177

* @param[in] input_to_forget_weights 2D weights tensor info with dimensions [input_size, num_units]. Data type supported: QSYMM8.

178

* @param[in] input_to_cell_weights 2D weights tensor info with dimensions [input_size, num_units]. Data type supported: QSYMM8.

179

* @param[in] input_to_output_weights 2D weights tensor info with dimensions [input_size, num_units]. Data type supported: QSYMM8.

180

* @param[in] recurrent_to_forget_weights 2D weights tensor info with dimensions [output_size, num_units]. Data type supported: QSYMM8.

181

* @param[in] recurrent_to_cell_weights 2D weights tensor info with dimensions [output_size, num_units]. Data type supported: QSYMM8.

182

* @param[in] recurrent_to_output_weights 2D weights tensor info with dimensions [output_size, num_units]. Data type supported: QSYMM8.

183

* @param[in] forget_gate_bias 1D weights tensor info with dimensions [num_units]. Data type supported: S32.

184

* @param[in] cell_bias 1D weights tensor info with dimensions [num_units]. Data type supported: S32.

185

* @param[in] output_gate_bias 1D weights tensor info with dimensions [num_units]. Data type supported: S32.

186

* @param[in] cell_state_in 2D tensor info with dimensions [num_units, batch_size]. Data type supported: QSYMM16.

187

* @param[in] output_state_in 2D tensor info with dimensions [output_size, batch_size]. Data type supported: Same as @p input.

188

* @param[in] cell_state_out Destination tensor info. Output is a 2D tensor info with dimensions [num_units, batch_size]. Data type supported: QSYMM16.

189

* @param[in] output_state_out Destination tensor info. Output is a 2D tensor info with dimensions [output_size, batch_size].Data types supported: Same as @p input.

190

* @param[in] output Destination tensor info. Output is a 2D tensor info with dimensions [output_size, batch_size].Data types supported: Same as @p input.

191

* @param[in] lstm_params Weights tensors info used in peephole, CIFG and layer normalization optimizations:

192

* input_intermediate_scale Scale of the intermediate result of matmul, i.e. input to layer normalization, at input gate.

193

* forget_intermediate_scale Scale of the intermediate result of matmul, i.e. input to layer normalization, at forget gate.

194

* cell_intermediate_scale Scale of the intermediate result of matmul, i.e. input to layer normalization, at cell gate.

195

* output_intermediate_scale Scale of the intermediate result of matmul, i.e. input to layer normalization, at output gate.

196

* hidden_state_zero The zero point of the hidden state.

197

* hidden_state_scale The scale of the hidden state.

198

* input_to_input_weights (Optional) 2D weights tensor with dimensions [input_size, num_units]. Data type supported: QSYMM8.

199

* recurrent_to_input_weights (Optional) 2D weights tensor with dimensions [output_size, num_units]. Data type supported: QSYMM8.

200

* cell_to_input_weights (Optional) 1D weights tensor with dimensions [num_units]. Can be nullptr. Data type supported: QSYMM16.

201

* cell_to_forget_weights (Optional) 1D weights tensor with dimensions [num_units]. Data type supported: QSYMM16.

202

* cell_to_output_weights (Optional) 1D weights tensor with dimensions [num_units]. Data type supported: QSYMM16.

203

* input_gate_bias (Optional) 1D weights tensor with dimensions [num_units]. Data type supported: S32.

204

* projection_weights (Optional) 2D weights tensor with dimensions [output_size, num_units]. Data type supported: QSYMM8.

205

* projection_bias (Optional) 1D weights tensor with dimensions [output_size]. S32.

206

* input_layer_norm_weights (Optional) 1D weights tensor with dimensions [num_units]. Data type supported: QSYMM16.

207

* forget_layer_norm_weights (Optional) 1D weights tensor with dimensions [num_units]. Data type supported: QSYMM16.

208

* cell_layer_norm_weights (Optional) 1D weights tensor with dimensions [num_units]. Data type supported: QSYMM16.

209

* output_layer_norm_weights (Optional) 1D weights tensor with dimensions [num_units]. Data type supported: QSYMM16.

210

* cell_threshold (Optional) The clipping threshold for the cell state, such that values are bound within [-cell_clip, cell_clip].

211

* If set to 0.0 then clipping is disabled.

212

* projection_threshold (Optional) The clipping threshold for the output from the projection layer, such that values are bound within

213

* [-proj_clip, proj_clip]. If set to 0.0 then clipping is disabled.

Michele Di Giorgio

2020-04-02 17:35:42 +0100

[diff] [blame]

214

* @return a status

215

*/

216

static Status validate(const ITensorInfo *input,

217

const ITensorInfo *input_to_forget_weights, const ITensorInfo *input_to_cell_weights, const ITensorInfo *input_to_output_weights,

218

const ITensorInfo *recurrent_to_forget_weights, const ITensorInfo *recurrent_to_cell_weights, const ITensorInfo *recurrent_to_output_weights,

219

const ITensorInfo *forget_gate_bias, const ITensorInfo *cell_bias, const ITensorInfo *output_gate_bias,

220

const ITensorInfo *cell_state_in, const ITensorInfo *output_state_in,

Michele Di Giorgio

2020-05-11 16:17:51 +0100

[diff] [blame]

221

const ITensorInfo *cell_state_out, const ITensorInfo *output_state_out, const ITensorInfo *output,

Michele Di Giorgio

2020-04-02 17:35:42 +0100

[diff] [blame]

222

const LSTMParams<ITensorInfo> &lstm_params);

223

224

// Inherited methods overridden:

225

void run() override;

226

void prepare() override;

227

228

private:

Sheri Zhang

2020-04-21 13:10:24 +0100

[diff] [blame]

229

enum class LayerNormGate : uint8_t

{

Forget,

Cell,

Input,

Output,

Count

};

Sang-Hoon Park

2020-05-12 11:13:30 +0100

[diff] [blame]

237

static constexpr uint8_t _layer_norm_count = static_cast<uint8_t>(LayerNormGate::Count);

238

static constexpr uint32_t _out_state_output_size_dimension_idx = 0;

Sheri Zhang

2020-04-21 13:10:24 +0100

[diff] [blame]

239

Michele Di Giorgio

2020-04-02 17:35:42 +0100

[diff] [blame]

240

/** Internal method to configure matrix multiplication plus output stage of each gate.

241

*

Manuel Bottini

2020-04-08 10:15:51 +0100

[diff] [blame]

242

* @param[in] compile_context The compile context to be used.

243

* @param[in] mm Matrix multiplication function to use.

244

* @param[in] outstage Output stage function to use.

245

* @param[in] gemmlowp_info GEMMLowp metadata to be used by the output stage.

246

* @param[in] mm_input Input tensor to matrix multiplication function.

247

* @param[in] mm_weights Weights tensor to matrix multiplication function.

248

* @param[in] bias Bias tensor to matrix multiplication function.

249

* @param[in] outstage_res Tensor to be used for storing the result of the output stage.

250

* @param[in] gemmlowp_scale Real multiplier to be used computing multiplier and shift for requantization.

251

* @param[in] mm_res_info Tensor info to be used to initialize matrix multiplication result tensor.

252

* @param[in] mm_res_info Tensor info to be used to initialize output stage result tensor.

Michele Di Giorgio

2020-04-02 17:35:42 +0100

[diff] [blame]

253

*

254

*/

Manuel Bottini

2020-04-08 10:15:51 +0100

[diff] [blame]

255

void configure_mm(const CLCompileContext &compile_context, CLGEMMLowpMatrixMultiplyCore &mm, CLGEMMLowpOutputStage &outstage, GEMMLowpOutputStageInfo &gemmlowp_info,

Michele Di Giorgio

2020-04-02 17:35:42 +0100

[diff] [blame]

256

const ICLTensor *mm_input, const ICLTensor *mm_weights, const ICLTensor *bias, CLTensor *mm_res,

257

CLTensor *outstage_res, float gemmlowp_scale,

258

const TensorInfo &mm_res_info, const TensorInfo &outstage_tensor_info);

259

260

MemoryGroup _memory_group{};

261

Sang-Hoon Park

2020-05-12 11:13:30 +0100

[diff] [blame]

262

/** A small internel kernel do the copy between two tensors */

263

class TensorCopyKernel

264

{

265

static constexpr uint32_t max_dimension_supported = 2;

266

267

ICLTensor *_src{ nullptr };

268

ICLTensor *_dst{ nullptr };

size_t _row_size{};

Window _window{};

public:

/** Static function to check if given info will lead to a valid configuration of @ref CLQLSTMLayer::TensorCopyKernel

274

*

275

* @param[in] src Source tensor info.

276

* @param[in] dst Destination tensor info

*

* @return a status

*/

static Status validate(const ITensorInfo &src, const ITensorInfo &dst);

281

/** Set the input and output tensors.

282

*

283

* @param[in] src Source tensor

284

* @param[out] dst Destination tensor

285

*/

286

void configure(ICLTensor &src, ICLTensor &dst);

287

/** run the kernel */

void run();

};

Michele Di Giorgio

2020-04-02 17:35:42 +0100

[diff] [blame]

291

// Functions used

Sang-Hoon Park

bef7fa2

2020-10-21 15:58:54 +0100

[diff] [blame]

292

CLTranspose _transpose_input_to_forget_weights{};

293

CLTranspose _transpose_input_to_cell_weights{};

294

CLTranspose _transpose_input_to_output_weights{};

295

CLTranspose _transpose_input_to_input_weights{};

296

CLTranspose _transpose_recurrent_to_forget_weights{};

297

CLTranspose _transpose_recurrent_to_cell_weights{};

298

CLTranspose _transpose_recurrent_to_output_weights{};

299

CLTranspose _transpose_recurrent_to_input_weights{};

300

CLTranspose _transpose_projection_weights{};

301

std::unique_ptr<CLGEMMLowpMatrixAReductionKernel> _input_to_input_reduction;

302

std::unique_ptr<CLGEMMLowpMatrixAReductionKernel> _recurrent_to_input_reduction;

303

std::unique_ptr<CLGEMMLowpMatrixAReductionKernel> _input_to_forget_reduction;

304

std::unique_ptr<CLGEMMLowpMatrixAReductionKernel> _recurrent_to_forget_reduction;

305

std::unique_ptr<CLGEMMLowpMatrixAReductionKernel> _input_to_cell_reduction;

306

std::unique_ptr<CLGEMMLowpMatrixAReductionKernel> _recurrent_to_cell_reduction;

307

std::unique_ptr<CLGEMMLowpMatrixAReductionKernel> _input_to_output_reduction;

308

std::unique_ptr<CLGEMMLowpMatrixAReductionKernel> _recurrent_to_output_reduction;

309

std::unique_ptr<CLGEMMLowpMatrixAReductionKernel> _projection_reduction;

310

CLArithmeticAddition _projection_bias_add{};

311

CLGEMMLowpMatrixMultiplyCore _mm_input_to_forget{};

312

CLGEMMLowpMatrixMultiplyCore _mm_recurrent_to_forget{};

313

CLPixelWiseMultiplication _pixelwise_mul_cell_to_forget{};

314

CLGEMMLowpOutputStage _input_to_forget_outstage{};

315

CLGEMMLowpOutputStage _recurrent_to_forget_outstage{};

316

CLGEMMLowpOutputStage _cell_to_forget_outstage{};

317

CLArithmeticAddition _accumulate_input_recurrent_forget{};

318

CLArithmeticAddition _accumulate_cell_forget{};

319

CLActivationLayer _forget_gate_sigmoid{};

320

CLGEMMLowpMatrixMultiplyCore _mm_input_to_cell{};

321

CLGEMMLowpOutputStage _input_to_cell_outstage{};

322

CLGEMMLowpMatrixMultiplyCore _mm_recurrent_to_cell{};

323

CLGEMMLowpOutputStage _recurrent_to_cell_outstage{};

324

CLArithmeticAddition _accumulate_input_recurrent_modulation{};

325

CLActivationLayer _cell_gate_tanh{};

326

CLArithmeticSubtraction _input_gate_sub{};

327

CLGEMMLowpMatrixMultiplyCore _mm_input_to_input{};

328

CLGEMMLowpOutputStage _input_to_input_outstage{};

329

CLGEMMLowpMatrixMultiplyCore _mm_recurrent_to_input{};

330

CLGEMMLowpOutputStage _recurrent_to_input_outstage{};

331

CLArithmeticAddition _accumulate_input_recurrent_input{};

332

CLPixelWiseMultiplication _pixelwise_mul_cell_to_input{};

333

CLGEMMLowpOutputStage _cell_to_input_outstage{};

334

CLArithmeticAddition _accumulate_cell_input{};

335

CLActivationLayer _input_gate_sigmoid{};

336

CLPixelWiseMultiplication _pixelwise_mul_forget_cell{};

337

CLPixelWiseMultiplication _pixelwise_mul_input_cell{};

338

CLArithmeticAddition _add_forget_cell{};

339

CLActivationLayer _cell_clip{};

340

CLGEMMLowpMatrixMultiplyCore _mm_input_to_output{};

341

CLGEMMLowpOutputStage _input_to_output_outstage{};

342

CLGEMMLowpMatrixMultiplyCore _mm_recurrent_to_output{};

343

CLGEMMLowpOutputStage _recurrent_to_output_outstage{};

344

CLArithmeticAddition _accumulate_input_recurrent_output{};

345

CLPixelWiseMultiplication _pixelwise_mul_cell_to_output{};

346

CLGEMMLowpOutputStage _cell_to_output_outstage{};

347

CLArithmeticAddition _accumulate_cell_to_output{};

348

CLActivationLayer _output_gate_sigmoid{};

349

CLActivationLayer _hidden_tanh{};

350

CLPixelWiseMultiplication _pixelwise_mul_hidden{};

351

CLGEMMLowpOutputStage _hidden_outstage{};

352

CLGEMMLowpMatrixMultiplyCore _mm_projection{};

353

CLGEMMLowpOutputStage _projection_outstage{};

354

CLArithmeticAddition _accumulate_projection{};

355

CLActivationLayer _projection_clip{};

356

std::array<std::unique_ptr<CLQLSTMLayerNormalizationKernel>, _layer_norm_count> _layer_norms;

Sheri Zhang

7e20e29

2021-02-02 11:49:34 +0000

[diff] [blame]

357

CLCopy _copy_output;

Michele Di Giorgio

2020-04-02 17:35:42 +0100

[diff] [blame]

358

Sang-Hoon Park

2020-05-12 11:13:30 +0100

[diff] [blame]

359

TensorCopyKernel _projection_bias_copy{};

360

TensorCopyKernel _projection_output_to_accumulate_copy{};

361

TensorCopyKernel _projection_accumulate_to_output_copy{};

362

TensorCopyKernel _hidden_to_output_copy{};

363

Michele Di Giorgio

2020-04-02 17:35:42 +0100

[diff] [blame]

364

// Tensor pointers

Michalis Spyrou

ad7515d

2020-07-24 00:02:23 +0100

[diff] [blame]

365

const ICLTensor *_input_to_input_weights

366

{

367

nullptr

368

};

Michele Di Giorgio

2020-04-02 17:35:42 +0100

[diff] [blame]

369

const ICLTensor *_recurrent_to_input_weights{ nullptr };

370

const ICLTensor *_projection_bias{ nullptr };

371

const ICLTensor *_input_to_forget_weights{ nullptr };

372

const ICLTensor *_input_to_cell_weights{ nullptr };

373

const ICLTensor *_input_to_output_weights{ nullptr };

374

const ICLTensor *_recurrent_to_forget_weights{ nullptr };

375

const ICLTensor *_recurrent_to_cell_weights{ nullptr };

376

const ICLTensor *_recurrent_to_output_weights{ nullptr };

377

const ICLTensor *_projection_weights{ nullptr };

Sheri Zhang

2020-04-21 13:10:24 +0100

[diff] [blame]

378

std::array<const ICLTensor *, _layer_norm_count> _layer_norm_weights{ {} };

379

std::array<const ICLTensor *, _layer_norm_count> _layer_norm_bias{ {} };

380

381

using LayerNormIndexType = typename std::underlying_type<LayerNormGate>::type;

382

inline LayerNormIndexType getGateIndex(LayerNormGate g)

383

{

384

return static_cast<LayerNormIndexType>(g);

385

}

386

387

inline void set_layer_norm_weight(const ICLTensor *t, LayerNormGate g)

388

{

389

_layer_norm_weights[getGateIndex(g)] = t;

390

}

391

392

inline void set_layer_norm_bias(const ICLTensor *t, LayerNormGate g)

393

{

394

_layer_norm_bias[getGateIndex(g)] = t;

395

}

396

397

inline const ICLTensor *get_layer_norm_weight(LayerNormGate g)

398

{

399

return _layer_norm_weights[getGateIndex(g)];

400

}

401

402

inline const ICLTensor *get_layer_norm_bias(LayerNormGate g)

403

{

404

return _layer_norm_bias[getGateIndex(g)];

405

}

406

407

inline CLQLSTMLayerNormalizationKernel &get_layer_norm(LayerNormGate g)

408

{

Sang-Hoon Park

bef7fa2

2020-10-21 15:58:54 +0100

[diff] [blame]

409

return *_layer_norms[getGateIndex(g)];

Sheri Zhang

2020-04-21 13:10:24 +0100

[diff] [blame]

410

}

411

Sang-Hoon Park

bef7fa2

2020-10-21 15:58:54 +0100

[diff] [blame]

412

inline void configure_layer_norm(LayerNormGate g, const ICLTensor *in);

413

inline static Status validate_layer_norm(const ITensorInfo &in, const ITensorInfo &weight, const ITensorInfo &bias);

Michele Di Giorgio

2020-04-02 17:35:42 +0100

[diff] [blame]

414

415

// Temporary tensors

416

CLTensor _input_to_forget_weights_transposed{ nullptr };

417

CLTensor _input_to_cell_weights_transposed{ nullptr };

418

CLTensor _input_to_output_weights_transposed{ nullptr };

419

CLTensor _input_to_input_weights_transposed{ nullptr };

420

CLTensor _recurrent_to_forget_weights_transposed{ nullptr };

421

CLTensor _recurrent_to_cell_weights_transposed{ nullptr };

422

CLTensor _recurrent_to_output_weights_transposed{ nullptr };

423

CLTensor _recurrent_to_input_weights_transposed{ nullptr };

424

CLTensor _projection_weights_transposed{ nullptr };

425

CLTensor _input_to_input_eff_bias{ nullptr };

426

CLTensor _recurrent_to_input_eff_bias{ nullptr };

427

CLTensor _input_to_forget_eff_bias{ nullptr };

428

CLTensor _recurrent_to_forget_eff_bias{ nullptr };

429

CLTensor _input_to_cell_eff_bias{ nullptr };

430

CLTensor _recurrent_to_cell_eff_bias{ nullptr };

431

CLTensor _input_to_output_eff_bias{ nullptr };

432

CLTensor _recurrent_to_output_eff_bias{ nullptr };

433

CLTensor _projection_reduction_res{ nullptr };

434

CLTensor _projection_eff_bias{ nullptr };

435

CLTensor _mm_input_to_forget_res{ nullptr };

436

CLTensor _mm_recurrent_to_forget_res{ nullptr };

437

CLTensor _mul_cell_to_forget_res{ nullptr };

438

CLTensor _input_to_forget_outstage_res{ nullptr };

439

CLTensor _cell_to_forget_outstage_res{ nullptr };

440

CLTensor _recurrent_to_forget_outstage_res{ nullptr };

441

CLTensor _forget_gate{ nullptr };

442

CLTensor _mm_input_to_cell_res{ nullptr };

443

CLTensor _input_to_cell_outstage_res{ nullptr };

444

CLTensor _mm_recurrent_to_cell_res{ nullptr };

445

CLTensor _recurrent_to_cell_outstage_res{ nullptr };

446

CLTensor _cell_gate{ nullptr };

447

CLTensor _mul_input_cell_res{ nullptr };

448

CLTensor _mm_input_to_input_res{ nullptr };

449

CLTensor _input_to_input_outstage_res{ nullptr };

450

CLTensor _mm_recurrent_to_input_res{ nullptr };

451

CLTensor _mul_cell_to_input_res{ nullptr };

452

CLTensor _cell_to_input_outstage_res{ nullptr };

453

CLTensor _recurrent_to_input_outstage_res{ nullptr };

454

CLTensor _input_gate{ nullptr };

455

CLTensor _mm_input_to_output_res{ nullptr };

456

CLTensor _input_to_output_outstage_res{ nullptr };

457

CLTensor _mm_recurrent_to_output_res{ nullptr };

458

CLTensor _mul_cell_to_output_res{ nullptr };

Sang-Hoon Park

2020-05-12 11:13:30 +0100

[diff] [blame]

459

CLTensor _cell_to_output_outstage_res{ nullptr };

Michele Di Giorgio

2020-04-02 17:35:42 +0100

[diff] [blame]

460

CLTensor _recurrent_to_output_outstage_res{ nullptr };

461

CLTensor _output_gate{ nullptr };

462

CLTensor _hidden_mul_res{ nullptr };

Sang-Hoon Park

2020-05-12 11:13:30 +0100

[diff] [blame]

463

CLTensor _hidden_gate{ nullptr };

Michele Di Giorgio

2020-04-02 17:35:42 +0100

[diff] [blame]

464

CLTensor _mm_projection_res{ nullptr };

465

CLTensor _projection_outstage_res{ nullptr };

Sang-Hoon Park

2020-05-12 11:13:30 +0100

[diff] [blame]

466

CLTensor _projection_out_res{ nullptr };

467

CLTensor _projection_accumulate_res{ nullptr };

Michele Di Giorgio

2020-04-02 17:35:42 +0100

[diff] [blame]

468

CLTensor _ones{ nullptr };

Sheri Zhang

2020-04-21 13:10:24 +0100

[diff] [blame]

469

std::array<CLTensor, _layer_norm_count> _layer_norm_output{ {} };

470

471

inline CLTensor &get_layer_norm_output(LayerNormGate g)

472

{

473

return _layer_norm_output[getGateIndex(g)];

474

}

Michele Di Giorgio

2020-04-02 17:35:42 +0100

[diff] [blame]

475

476

bool _is_prepared{ false };

477

bool _has_cifg{ false };

478

bool _has_cell_clipping{ false };

479

bool _has_projection{ false };

480

bool _has_projection_clipping{ false };

481

bool _has_peephole{ false };

Sheri Zhang

2020-04-21 13:10:24 +0100

[diff] [blame]

482

bool _has_layer_norm{ false };

Sang-Hoon Park

2020-05-12 11:13:30 +0100

[diff] [blame]

483

bool _projection_tensor_copy_required{ false };

Michele Di Giorgio