Blame - arm_compute/runtime/CL/functions/CLQLSTMLayer.h - ml/ComputeLibrary

2020-04-02 17:35:42 +0100

[diff] [blame]

105

* @param[in] lstm_params Weights tensors used in peephole, CIFG and layer normalization optimizations:

106

* input_intermediate_scale Scale of the intermediate result of matmul, i.e. input to layer normalization, at input gate.

107

* forget_intermediate_scale Scale of the intermediate result of matmul, i.e. input to layer normalization, at forget gate.

108

* cell_intermediate_scale Scale of the intermediate result of matmul, i.e. input to layer normalization, at cell gate.

109

* output_intermediate_scale Scale of the intermediate result of matmul, i.e. input to layer normalization, at output gate.

110

* hidden_state_zero The zero point of the hidden state.

111

* hidden_state_scale The scale of the hidden state.

112

* input_to_input_weights (Optional) 2D weights tensor with dimensions [input_size, num_units]. Data type supported: QSYMM8.

113

* recurrent_to_input_weights (Optional) 2D weights tensor with dimensions [output_size, num_units]. Data type supported: QSYMM8.

114

* cell_to_input_weights (Optional) 1D weights tensor with dimensions [num_units]. Can be nullptr. Data type supported: QSYMM16.

115

* cell_to_forget_weights (Optional) 1D weights tensor with dimensions [num_units]. Data type supported: QSYMM16.

116

* cell_to_output_weights (Optional) 1D weights tensor with dimensions [num_units]. Data type supported: QSYMM16.

117

* input_gate_bias (Optional) 1D weights tensor with dimensions [num_units]. Data type supported: S32.

118

* projection_weights (Optional) 2D weights tensor with dimensions [output_size, num_units]. Data type supported: QSYMM8.

119

* projection_bias (Optional) 1D weights tensor with dimensions [output_size]. S32.

120

* input_layer_norm_weights (Optional) 1D weights tensor with dimensions [num_units]. Data type supported: QSYMM16.

121

* forget_layer_norm_weights (Optional) 1D weights tensor with dimensions [num_units]. Data type supported: QSYMM16.

122

* cell_layer_norm_weights (Optional) 1D weights tensor with dimensions [num_units]. Data type supported: QSYMM16.

123

* output_layer_norm_weights (Optional) 1D weights tensor with dimensions [num_units]. Data type supported: QSYMM16.

124

* cell_threshold (Optional) The clipping threshold for the cell state, such that values are bound within [-cell_clip, cell_clip].

125

* If set to 0.0 then clipping is disabled.

126

* projection_threshold (Optional) The clipping threshold for the output from the projection layer, such that values are bound within

127

* [-proj_clip, proj_clip]. If set to 0.0 then clipping is disabled.

128

*/

Felix Thomasmathibalan

2023-09-27 17:46:17 +0100

[diff] [blame]

129

void configure(const ICLTensor *input,

130

const ICLTensor *input_to_forget_weights,

131

const ICLTensor *input_to_cell_weights,

132

const ICLTensor *input_to_output_weights,

133

const ICLTensor *recurrent_to_forget_weights,

134

const ICLTensor *recurrent_to_cell_weights,

135

const ICLTensor *recurrent_to_output_weights,

136

const ICLTensor *forget_gate_bias,

137

const ICLTensor *cell_bias,

138

const ICLTensor *output_gate_bias,

139

ICLTensor *cell_state_in,

140

ICLTensor *output_state_in,

141

ICLTensor *cell_state_out,

142

ICLTensor *output_state_out,

143

ICLTensor *output,

Michele Di Giorgio

2020-04-02 17:35:42 +0100

[diff] [blame]

144

const LSTMParams<ICLTensor> &lstm_params);

145

Manuel Bottini

2020-04-08 10:15:51 +0100

[diff] [blame]

146

/** Initialize function's tensors.

147

*

148

* @param[in] compile_context The compile context to be used.

149

* @param[in] input Source tensor. Input is a 2D tensor with dimensions [input_size, batch_size]. Data types supported: QASYMM8_SIGNED.

150

* @param[in] input_to_forget_weights 2D weights tensor with dimensions [input_size, num_units]. Data type supported: QSYMM8.

151

* @param[in] input_to_cell_weights 2D weights tensor with dimensions [input_size, num_units]. Data type supported: QSYMM8.

152

* @param[in] input_to_output_weights 2D weights tensor with dimensions [input_size, num_units]. Data type supported: QSYMM8.

153

* @param[in] recurrent_to_forget_weights 2D weights tensor with dimensions [output_size, num_units]. Data type supported: QSYMM8.

154

* @param[in] recurrent_to_cell_weights 2D weights tensor with dimensions [output_size, num_units]. Data type supported: QSYMM8.

155

* @param[in] recurrent_to_output_weights 2D weights tensor with dimensions [output_size, num_units]. Data type supported: QSYMM8.

156

* @param[in] forget_gate_bias 1D weights tensor with dimensions [num_units]. Data type supported: S32.

157

* @param[in] cell_bias 1D weights tensor with dimensions [num_units]. Data type supported: S32.

158

* @param[in] output_gate_bias 1D weights tensor with dimensions [num_units]. Data type supported: S32.

Michele Di Giorgio

beb2d45

2020-05-11 16:17:51 +0100

[diff] [blame]

159

* @param[in] cell_state_in 2D tensor with dimensions [num_units, batch_size]. Data type supported: QSYMM16.

160

* @param[in] output_state_in 2D tensor with dimensions [output_size, batch_size]. Data type supported: Same as @p input.

161

* @param[out] cell_state_out Destination tensor. Output is a 2D tensor with dimensions [num_units, batch_size]. Data type supported: QSYMM16.

162

* @param[out] output_state_out Destination tensor. Output is a 2D tensor with dimensions [output_size, batch_size].Data types supported: Same as @p input.

163

* @param[out] output Destination tensor. Output is a 2D tensor with dimensions [output_size, batch_size].Data types supported: Same as @p input.

Manuel Bottini

2020-04-08 10:15:51 +0100

[diff] [blame]

164

* @param[in] lstm_params Weights tensors used in peephole, CIFG and layer normalization optimizations:

165

* input_intermediate_scale Scale of the intermediate result of matmul, i.e. input to layer normalization, at input gate.

166

* forget_intermediate_scale Scale of the intermediate result of matmul, i.e. input to layer normalization, at forget gate.

167

* cell_intermediate_scale Scale of the intermediate result of matmul, i.e. input to layer normalization, at cell gate.

168

* output_intermediate_scale Scale of the intermediate result of matmul, i.e. input to layer normalization, at output gate.

169

* hidden_state_zero The zero point of the hidden state.

170

* hidden_state_scale The scale of the hidden state.

171

* input_to_input_weights (Optional) 2D weights tensor with dimensions [input_size, num_units]. Data type supported: QSYMM8.

172

* recurrent_to_input_weights (Optional) 2D weights tensor with dimensions [output_size, num_units]. Data type supported: QSYMM8.

173

* cell_to_input_weights (Optional) 1D weights tensor with dimensions [num_units]. Can be nullptr. Data type supported: QSYMM16.

174

* cell_to_forget_weights (Optional) 1D weights tensor with dimensions [num_units]. Data type supported: QSYMM16.

175

* cell_to_output_weights (Optional) 1D weights tensor with dimensions [num_units]. Data type supported: QSYMM16.

176

* input_gate_bias (Optional) 1D weights tensor with dimensions [num_units]. Data type supported: S32.

177

* projection_weights (Optional) 2D weights tensor with dimensions [output_size, num_units]. Data type supported: QSYMM8.

178

* projection_bias (Optional) 1D weights tensor with dimensions [output_size]. S32.

179

* input_layer_norm_weights (Optional) 1D weights tensor with dimensions [num_units]. Data type supported: QSYMM16.

180

* forget_layer_norm_weights (Optional) 1D weights tensor with dimensions [num_units]. Data type supported: QSYMM16.

181

* cell_layer_norm_weights (Optional) 1D weights tensor with dimensions [num_units]. Data type supported: QSYMM16.

182

* output_layer_norm_weights (Optional) 1D weights tensor with dimensions [num_units]. Data type supported: QSYMM16.

183

* cell_threshold (Optional) The clipping threshold for the cell state, such that values are bound within [-cell_clip, cell_clip].

184

* If set to 0.0 then clipping is disabled.

185

* projection_threshold (Optional) The clipping threshold for the output from the projection layer, such that values are bound within

186

* [-proj_clip, proj_clip]. If set to 0.0 then clipping is disabled.

187

*/

Felix Thomasmathibalan

2023-09-27 17:46:17 +0100

[diff] [blame]

188

void configure(const CLCompileContext &compile_context,

189

const ICLTensor *input,

190

const ICLTensor *input_to_forget_weights,

191

const ICLTensor *input_to_cell_weights,

192

const ICLTensor *input_to_output_weights,

193

const ICLTensor *recurrent_to_forget_weights,

194

const ICLTensor *recurrent_to_cell_weights,

195

const ICLTensor *recurrent_to_output_weights,

196

const ICLTensor *forget_gate_bias,

197

const ICLTensor *cell_bias,

198

const ICLTensor *output_gate_bias,

199

ICLTensor *cell_state_in,

200

ICLTensor *output_state_in,

201

ICLTensor *cell_state_out,

202

ICLTensor *output_state_out,

203

ICLTensor *output,

Manuel Bottini

2020-04-08 10:15:51 +0100

[diff] [blame]

204

const LSTMParams<ICLTensor> &lstm_params);

205

Michele Di Giorgio

2020-04-02 17:35:42 +0100

[diff] [blame]

206

/** Static function to check if given info will lead to a valid configuration of @ref CLQLSTMLayer

207

*

Michele Di Giorgio

beb2d45

2020-05-11 16:17:51 +0100

[diff] [blame]

208

* @param[in] input Source tensor info. Input is a 2D tensor info with dimensions [input_size, batch_size]. Data types supported: QASYMM8_SIGNED.

209

* @param[in] input_to_forget_weights 2D weights tensor info with dimensions [input_size, num_units]. Data type supported: QSYMM8.

210

* @param[in] input_to_cell_weights 2D weights tensor info with dimensions [input_size, num_units]. Data type supported: QSYMM8.

211

* @param[in] input_to_output_weights 2D weights tensor info with dimensions [input_size, num_units]. Data type supported: QSYMM8.

212

* @param[in] recurrent_to_forget_weights 2D weights tensor info with dimensions [output_size, num_units]. Data type supported: QSYMM8.

213

* @param[in] recurrent_to_cell_weights 2D weights tensor info with dimensions [output_size, num_units]. Data type supported: QSYMM8.

214

* @param[in] recurrent_to_output_weights 2D weights tensor info with dimensions [output_size, num_units]. Data type supported: QSYMM8.

215

* @param[in] forget_gate_bias 1D weights tensor info with dimensions [num_units]. Data type supported: S32.

216

* @param[in] cell_bias 1D weights tensor info with dimensions [num_units]. Data type supported: S32.

217

* @param[in] output_gate_bias 1D weights tensor info with dimensions [num_units]. Data type supported: S32.

218

* @param[in] cell_state_in 2D tensor info with dimensions [num_units, batch_size]. Data type supported: QSYMM16.

219

* @param[in] output_state_in 2D tensor info with dimensions [output_size, batch_size]. Data type supported: Same as @p input.

220

* @param[in] cell_state_out Destination tensor info. Output is a 2D tensor info with dimensions [num_units, batch_size]. Data type supported: QSYMM16.

221

* @param[in] output_state_out Destination tensor info. Output is a 2D tensor info with dimensions [output_size, batch_size].Data types supported: Same as @p input.

222

* @param[in] output Destination tensor info. Output is a 2D tensor info with dimensions [output_size, batch_size].Data types supported: Same as @p input.

223

* @param[in] lstm_params Weights tensors info used in peephole, CIFG and layer normalization optimizations:

224

* input_intermediate_scale Scale of the intermediate result of matmul, i.e. input to layer normalization, at input gate.

225

* forget_intermediate_scale Scale of the intermediate result of matmul, i.e. input to layer normalization, at forget gate.

226

* cell_intermediate_scale Scale of the intermediate result of matmul, i.e. input to layer normalization, at cell gate.

227

* output_intermediate_scale Scale of the intermediate result of matmul, i.e. input to layer normalization, at output gate.

228

* hidden_state_zero The zero point of the hidden state.

229

* hidden_state_scale The scale of the hidden state.

230

* input_to_input_weights (Optional) 2D weights tensor with dimensions [input_size, num_units]. Data type supported: QSYMM8.

231

* recurrent_to_input_weights (Optional) 2D weights tensor with dimensions [output_size, num_units]. Data type supported: QSYMM8.

232

* cell_to_input_weights (Optional) 1D weights tensor with dimensions [num_units]. Can be nullptr. Data type supported: QSYMM16.

233

* cell_to_forget_weights (Optional) 1D weights tensor with dimensions [num_units]. Data type supported: QSYMM16.

234

* cell_to_output_weights (Optional) 1D weights tensor with dimensions [num_units]. Data type supported: QSYMM16.

235

* input_gate_bias (Optional) 1D weights tensor with dimensions [num_units]. Data type supported: S32.

236

* projection_weights (Optional) 2D weights tensor with dimensions [output_size, num_units]. Data type supported: QSYMM8.

237

* projection_bias (Optional) 1D weights tensor with dimensions [output_size]. S32.

238

* input_layer_norm_weights (Optional) 1D weights tensor with dimensions [num_units]. Data type supported: QSYMM16.

239

* forget_layer_norm_weights (Optional) 1D weights tensor with dimensions [num_units]. Data type supported: QSYMM16.

240

* cell_layer_norm_weights (Optional) 1D weights tensor with dimensions [num_units]. Data type supported: QSYMM16.

241

* output_layer_norm_weights (Optional) 1D weights tensor with dimensions [num_units]. Data type supported: QSYMM16.

242

* cell_threshold (Optional) The clipping threshold for the cell state, such that values are bound within [-cell_clip, cell_clip].

243

* If set to 0.0 then clipping is disabled.

244

* projection_threshold (Optional) The clipping threshold for the output from the projection layer, such that values are bound within

245

* [-proj_clip, proj_clip]. If set to 0.0 then clipping is disabled.

Michele Di Giorgio

2020-04-02 17:35:42 +0100

[diff] [blame]

246

* @return a status

247

*/

Felix Thomasmathibalan

2023-09-27 17:46:17 +0100

[diff] [blame]

248

static Status validate(const ITensorInfo *input,

249

const ITensorInfo *input_to_forget_weights,

250

const ITensorInfo *input_to_cell_weights,

251

const ITensorInfo *input_to_output_weights,

252

const ITensorInfo *recurrent_to_forget_weights,

253

const ITensorInfo *recurrent_to_cell_weights,

254

const ITensorInfo *recurrent_to_output_weights,

255

const ITensorInfo *forget_gate_bias,

256

const ITensorInfo *cell_bias,

257

const ITensorInfo *output_gate_bias,

258

const ITensorInfo *cell_state_in,

259

const ITensorInfo *output_state_in,

260

const ITensorInfo *cell_state_out,

261

const ITensorInfo *output_state_out,

262

const ITensorInfo *output,

Michele Di Giorgio

2020-04-02 17:35:42 +0100

[diff] [blame]

263

const LSTMParams<ITensorInfo> &lstm_params);

264

265

// Inherited methods overridden:

266

void run() override;

267

void prepare() override;

268

269

private:

Sheri Zhang

2020-04-21 13:10:24 +0100

[diff] [blame]

270

enum class LayerNormGate : uint8_t

{

Forget,

Cell,

Input,

Output,

Count

};

Sang-Hoon Park

2020-05-12 11:13:30 +0100

[diff] [blame]

278

static constexpr uint8_t _layer_norm_count = static_cast<uint8_t>(LayerNormGate::Count);

279

static constexpr uint32_t _out_state_output_size_dimension_idx = 0;

Sheri Zhang

2020-04-21 13:10:24 +0100

[diff] [blame]

280

Michele Di Giorgio

2020-04-02 17:35:42 +0100

[diff] [blame]

281

/** Internal method to configure matrix multiplication plus output stage of each gate.

282

*

Manuel Bottini

2020-04-08 10:15:51 +0100

[diff] [blame]

283

* @param[in] compile_context The compile context to be used.

284

* @param[in] mm Matrix multiplication function to use.

285

* @param[in] outstage Output stage function to use.

286

* @param[in] gemmlowp_info GEMMLowp metadata to be used by the output stage.

287

* @param[in] mm_input Input tensor to matrix multiplication function.

288

* @param[in] mm_weights Weights tensor to matrix multiplication function.

289

* @param[in] bias Bias tensor to matrix multiplication function.

290

* @param[in] outstage_res Tensor to be used for storing the result of the output stage.

291

* @param[in] gemmlowp_scale Real multiplier to be used computing multiplier and shift for requantization.

292

* @param[in] mm_res_info Tensor info to be used to initialize matrix multiplication result tensor.

293

* @param[in] mm_res_info Tensor info to be used to initialize output stage result tensor.

Michele Di Giorgio

2020-04-02 17:35:42 +0100

[diff] [blame]

294

*

295

*/

Felix Thomasmathibalan

2023-09-27 17:46:17 +0100

[diff] [blame]

296

void configure_mm(const CLCompileContext &compile_context,

297

CLGEMMLowpMatrixMultiplyCore &mm,

298

CLGEMMLowpOutputStage &outstage,

299

GEMMLowpOutputStageInfo &gemmlowp_info,

300

const ICLTensor *mm_input,

301

const ICLTensor *mm_weights,

302

const ICLTensor *bias,

303

CLTensor *mm_res,

304

CLTensor *outstage_res,

305

float gemmlowp_scale,

306

const TensorInfo &mm_res_info,

307

const TensorInfo &outstage_tensor_info);

Michele Di Giorgio

2020-04-02 17:35:42 +0100

[diff] [blame]

308

309

MemoryGroup _memory_group{};

310

Sang-Hoon Park

2020-05-12 11:13:30 +0100

[diff] [blame]

311

/** A small internel kernel do the copy between two tensors */

312

class TensorCopyKernel

313

{

314

static constexpr uint32_t max_dimension_supported = 2;

315

Felix Thomasmathibalan

2023-09-27 17:46:17 +0100

[diff] [blame]

316

ICLTensor *_src{nullptr};

317

ICLTensor *_dst{nullptr};

Sang-Hoon Park

2020-05-12 11:13:30 +0100

[diff] [blame]

size_t _row_size{};

Window _window{};

public:

/** Static function to check if given info will lead to a valid configuration of @ref CLQLSTMLayer::TensorCopyKernel

323

*

324

* @param[in] src Source tensor info.

325

* @param[in] dst Destination tensor info

*

* @return a status

*/

static Status validate(const ITensorInfo &src, const ITensorInfo &dst);

330

/** Set the input and output tensors.

331

*

332

* @param[in] src Source tensor

333

* @param[out] dst Destination tensor

334

*/

335

void configure(ICLTensor &src, ICLTensor &dst);

336

/** run the kernel */

void run();

};

Michele Di Giorgio

2020-04-02 17:35:42 +0100

[diff] [blame]

340

// Functions used

Georgios Pinitas

4a578b9

2021-06-25 12:13:49 +0100

[diff] [blame]

341

CLTranspose _transpose_input_to_forget_weights{};

342

CLTranspose _transpose_input_to_cell_weights{};

343

CLTranspose _transpose_input_to_output_weights{};

344

CLTranspose _transpose_input_to_input_weights{};

345

CLTranspose _transpose_recurrent_to_forget_weights{};

346

CLTranspose _transpose_recurrent_to_cell_weights{};

347

CLTranspose _transpose_recurrent_to_output_weights{};

348

CLTranspose _transpose_recurrent_to_input_weights{};

349

CLTranspose _transpose_projection_weights{};

350

std::unique_ptr<opencl::kernels::ClGemmLowpMatrixAReductionKernel> _input_to_input_reduction;

351

std::unique_ptr<opencl::kernels::ClGemmLowpMatrixAReductionKernel> _recurrent_to_input_reduction;

352

std::unique_ptr<opencl::kernels::ClGemmLowpMatrixAReductionKernel> _input_to_forget_reduction;

353

std::unique_ptr<opencl::kernels::ClGemmLowpMatrixAReductionKernel> _recurrent_to_forget_reduction;

354

std::unique_ptr<opencl::kernels::ClGemmLowpMatrixAReductionKernel> _input_to_cell_reduction;

355

std::unique_ptr<opencl::kernels::ClGemmLowpMatrixAReductionKernel> _recurrent_to_cell_reduction;

356

std::unique_ptr<opencl::kernels::ClGemmLowpMatrixAReductionKernel> _input_to_output_reduction;

357

std::unique_ptr<opencl::kernels::ClGemmLowpMatrixAReductionKernel> _recurrent_to_output_reduction;

358

std::unique_ptr<opencl::kernels::ClGemmLowpMatrixAReductionKernel> _projection_reduction;

359

CLArithmeticAddition _projection_bias_add{};

360

CLGEMMLowpMatrixMultiplyCore _mm_input_to_forget{};

361

CLGEMMLowpMatrixMultiplyCore _mm_recurrent_to_forget{};

362

CLPixelWiseMultiplication _pixelwise_mul_cell_to_forget{};

363

CLGEMMLowpOutputStage _input_to_forget_outstage{};

364

CLGEMMLowpOutputStage _recurrent_to_forget_outstage{};

365

CLGEMMLowpOutputStage _cell_to_forget_outstage{};

366

CLArithmeticAddition _accumulate_input_recurrent_forget{};

367

CLArithmeticAddition _accumulate_cell_forget{};

368

CLActivationLayer _forget_gate_sigmoid{};

369

CLGEMMLowpMatrixMultiplyCore _mm_input_to_cell{};

370

CLGEMMLowpOutputStage _input_to_cell_outstage{};

371

CLGEMMLowpMatrixMultiplyCore _mm_recurrent_to_cell{};

372

CLGEMMLowpOutputStage _recurrent_to_cell_outstage{};

373

CLArithmeticAddition _accumulate_input_recurrent_modulation{};

374

CLActivationLayer _cell_gate_tanh{};

375

CLArithmeticSubtraction _input_gate_sub{};

376

CLGEMMLowpMatrixMultiplyCore _mm_input_to_input{};

377

CLGEMMLowpOutputStage _input_to_input_outstage{};

378

CLGEMMLowpMatrixMultiplyCore _mm_recurrent_to_input{};

379

CLGEMMLowpOutputStage _recurrent_to_input_outstage{};

380

CLArithmeticAddition _accumulate_input_recurrent_input{};

381

CLPixelWiseMultiplication _pixelwise_mul_cell_to_input{};

382

CLGEMMLowpOutputStage _cell_to_input_outstage{};

383

CLArithmeticAddition _accumulate_cell_input{};

384

CLActivationLayer _input_gate_sigmoid{};

385

CLPixelWiseMultiplication _pixelwise_mul_forget_cell{};

386

CLPixelWiseMultiplication _pixelwise_mul_input_cell{};

387

CLArithmeticAddition _add_forget_cell{};

388

CLActivationLayer _cell_clip{};

389

CLGEMMLowpMatrixMultiplyCore _mm_input_to_output{};

390

CLGEMMLowpOutputStage _input_to_output_outstage{};

391

CLGEMMLowpMatrixMultiplyCore _mm_recurrent_to_output{};

392

CLGEMMLowpOutputStage _recurrent_to_output_outstage{};

393

CLArithmeticAddition _accumulate_input_recurrent_output{};

394

CLPixelWiseMultiplication _pixelwise_mul_cell_to_output{};

395

CLGEMMLowpOutputStage _cell_to_output_outstage{};

396

CLArithmeticAddition _accumulate_cell_to_output{};

397

CLActivationLayer _output_gate_sigmoid{};

398

CLActivationLayer _hidden_tanh{};

399

CLPixelWiseMultiplication _pixelwise_mul_hidden{};

400

CLGEMMLowpOutputStage _hidden_outstage{};

401

CLGEMMLowpMatrixMultiplyCore _mm_projection{};

402

CLGEMMLowpOutputStage _projection_outstage{};

403

CLArithmeticAddition _accumulate_projection{};

404

CLActivationLayer _projection_clip{};

Sang-Hoon Park

bef7fa2

2020-10-21 15:58:54 +0100

[diff] [blame]

405

std::array<std::unique_ptr<CLQLSTMLayerNormalizationKernel>, _layer_norm_count> _layer_norms;

Felix Thomasmathibalan

2023-09-27 17:46:17 +0100

[diff] [blame]

406

CLCopy _copy_output;

Michele Di Giorgio

2020-04-02 17:35:42 +0100

[diff] [blame]

407

Sang-Hoon Park

2020-05-12 11:13:30 +0100

[diff] [blame]

408

TensorCopyKernel _projection_bias_copy{};

409

TensorCopyKernel _projection_output_to_accumulate_copy{};

410

TensorCopyKernel _projection_accumulate_to_output_copy{};

411

TensorCopyKernel _hidden_to_output_copy{};

412

Michele Di Giorgio

2020-04-02 17:35:42 +0100

[diff] [blame]

413

// Tensor pointers

Felix Thomasmathibalan

2023-09-27 17:46:17 +0100

[diff] [blame]

414

const ICLTensor *_input_to_input_weights{nullptr};

415

const ICLTensor *_recurrent_to_input_weights{nullptr};

416

const ICLTensor *_projection_bias{nullptr};

417

const ICLTensor *_input_to_forget_weights{nullptr};

418

const ICLTensor *_input_to_cell_weights{nullptr};

419

const ICLTensor *_input_to_output_weights{nullptr};

420

const ICLTensor *_recurrent_to_forget_weights{nullptr};

421

const ICLTensor *_recurrent_to_cell_weights{nullptr};

422

const ICLTensor *_recurrent_to_output_weights{nullptr};

423

const ICLTensor *_projection_weights{nullptr};

424

std::array<const ICLTensor *, _layer_norm_count> _layer_norm_weights{{}};

425

std::array<const ICLTensor *, _layer_norm_count> _layer_norm_bias{{}};

Sheri Zhang

2020-04-21 13:10:24 +0100

[diff] [blame]

426

427

using LayerNormIndexType = typename std::underlying_type<LayerNormGate>::type;

428

inline LayerNormIndexType getGateIndex(LayerNormGate g)

429

{

430

return static_cast<LayerNormIndexType>(g);

431

}

432

433

inline void set_layer_norm_weight(const ICLTensor *t, LayerNormGate g)

434

{

435

_layer_norm_weights[getGateIndex(g)] = t;

436

}

437

438

inline void set_layer_norm_bias(const ICLTensor *t, LayerNormGate g)

439

{

440

_layer_norm_bias[getGateIndex(g)] = t;

441

}

442

443

inline const ICLTensor *get_layer_norm_weight(LayerNormGate g)

444

{

445

return _layer_norm_weights[getGateIndex(g)];

446

}

447

448

inline const ICLTensor *get_layer_norm_bias(LayerNormGate g)

449

{

450

return _layer_norm_bias[getGateIndex(g)];

451

}

452

453

inline CLQLSTMLayerNormalizationKernel &get_layer_norm(LayerNormGate g)

454

{

Sang-Hoon Park

bef7fa2

2020-10-21 15:58:54 +0100

[diff] [blame]

455

return *_layer_norms[getGateIndex(g)];

Sheri Zhang

2020-04-21 13:10:24 +0100

[diff] [blame]

456

}

457

Felix Thomasmathibalan

2023-09-27 17:46:17 +0100

[diff] [blame]

458

inline void configure_layer_norm(LayerNormGate g, const ICLTensor *in);

Sang-Hoon Park

bef7fa2

2020-10-21 15:58:54 +0100

[diff] [blame]

459

inline static Status validate_layer_norm(const ITensorInfo &in, const ITensorInfo &weight, const ITensorInfo &bias);

Michele Di Giorgio

2020-04-02 17:35:42 +0100

[diff] [blame]

460

461

// Temporary tensors

Felix Thomasmathibalan

2023-09-27 17:46:17 +0100

[diff] [blame]

462

CLTensor _input_to_forget_weights_transposed{nullptr};

463

CLTensor _input_to_cell_weights_transposed{nullptr};

464

CLTensor _input_to_output_weights_transposed{nullptr};

465

CLTensor _input_to_input_weights_transposed{nullptr};

466

CLTensor _recurrent_to_forget_weights_transposed{nullptr};

467

CLTensor _recurrent_to_cell_weights_transposed{nullptr};

468

CLTensor _recurrent_to_output_weights_transposed{nullptr};

469

CLTensor _recurrent_to_input_weights_transposed{nullptr};

470

CLTensor _projection_weights_transposed{nullptr};

471

CLTensor _input_to_input_eff_bias{nullptr};

472

CLTensor _recurrent_to_input_eff_bias{nullptr};

473

CLTensor _input_to_forget_eff_bias{nullptr};

474

CLTensor _recurrent_to_forget_eff_bias{nullptr};

475

CLTensor _input_to_cell_eff_bias{nullptr};

476

CLTensor _recurrent_to_cell_eff_bias{nullptr};

477

CLTensor _input_to_output_eff_bias{nullptr};

478

CLTensor _recurrent_to_output_eff_bias{nullptr};

479

CLTensor _projection_reduction_res{nullptr};

480

CLTensor _projection_eff_bias{nullptr};

481

CLTensor _mm_input_to_forget_res{nullptr};

482

CLTensor _mm_recurrent_to_forget_res{nullptr};

483

CLTensor _mul_cell_to_forget_res{nullptr};

484

CLTensor _input_to_forget_outstage_res{nullptr};

485

CLTensor _cell_to_forget_outstage_res{nullptr};

486

CLTensor _recurrent_to_forget_outstage_res{nullptr};

487

CLTensor _forget_gate{nullptr};

488

CLTensor _mm_input_to_cell_res{nullptr};

489

CLTensor _input_to_cell_outstage_res{nullptr};

490

CLTensor _mm_recurrent_to_cell_res{nullptr};

491

CLTensor _recurrent_to_cell_outstage_res{nullptr};

492

CLTensor _cell_gate{nullptr};

493

CLTensor _mul_input_cell_res{nullptr};

494

CLTensor _mm_input_to_input_res{nullptr};

495

CLTensor _input_to_input_outstage_res{nullptr};

496

CLTensor _mm_recurrent_to_input_res{nullptr};

497

CLTensor _mul_cell_to_input_res{nullptr};

498

CLTensor _cell_to_input_outstage_res{nullptr};

499

CLTensor _recurrent_to_input_outstage_res{nullptr};

500

CLTensor _input_gate{nullptr};

501

CLTensor _mm_input_to_output_res{nullptr};

502

CLTensor _input_to_output_outstage_res{nullptr};

503

CLTensor _mm_recurrent_to_output_res{nullptr};

504

CLTensor _mul_cell_to_output_res{nullptr};

505

CLTensor _cell_to_output_outstage_res{nullptr};

506

CLTensor _recurrent_to_output_outstage_res{nullptr};

507

CLTensor _output_gate{nullptr};

508

CLTensor _hidden_mul_res{nullptr};

509

CLTensor _hidden_gate{nullptr};

510

CLTensor _mm_projection_res{nullptr};

511

CLTensor _projection_outstage_res{nullptr};

512

CLTensor _projection_out_res{nullptr};

513

CLTensor _projection_accumulate_res{nullptr};

514

CLTensor _ones{nullptr};

515

std::array<CLTensor, _layer_norm_count> _layer_norm_output{{}};

Sheri Zhang

2020-04-21 13:10:24 +0100

[diff] [blame]

516

517

inline CLTensor &get_layer_norm_output(LayerNormGate g)

518

{

519

return _layer_norm_output[getGateIndex(g)];

520

}

Michele Di Giorgio

2020-04-02 17:35:42 +0100

[diff] [blame]

521

Felix Thomasmathibalan

2023-09-27 17:46:17 +0100

[diff] [blame]

522

bool _is_prepared{false};

523

bool _has_cifg{false};

524

bool _has_cell_clipping{false};

525

bool _has_projection{false};

526

bool _has_projection_clipping{false};

527

bool _has_peephole{false};

528

bool _has_layer_norm{false};

529

bool _projection_tensor_copy_required{false};

Michele Di Giorgio