blob: 9754bdcb82bd0f4bf1b67befc6823f306ab73dc0 [file] [log] [blame]
Michalis Spyroubcedf512018-03-22 14:55:08 +00001/*
Sheri Zhang7e20e292021-02-02 11:49:34 +00002 * Copyright (c) 2018-2021 Arm Limited.
Michalis Spyroubcedf512018-03-22 14:55:08 +00003 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24#include "arm_compute/runtime/CL/functions/CLLSTMLayer.h"
25
Michalis Spyroubcedf512018-03-22 14:55:08 +000026#include "arm_compute/core/Utils.h"
27#include "arm_compute/core/Validate.h"
Michele Di Giorgio47a89902020-03-09 19:32:33 +000028#include "arm_compute/core/utils/misc/InfoHelpers.h"
Michalis Spyroubcedf512018-03-22 14:55:08 +000029#include "arm_compute/core/utils/misc/ShapeCalculator.h"
30#include "arm_compute/core/utils/quantization/AsymmHelpers.h"
31#include "arm_compute/runtime/CL/CLScheduler.h"
Sang-Hoon Parkbef7fa22020-10-21 15:58:54 +010032#include "src/core/CL/kernels/CLFillBorderKernel.h"
Teresa Charlin27886092021-02-25 20:15:01 +000033#include "src/core/gpu/cl/kernels/ClTransposeKernel.h"
Michalis Spyroubcedf512018-03-22 14:55:08 +000034
Michele Di Giorgio47a89902020-03-09 19:32:33 +000035namespace arm_compute
36{
Michalis Spyroubcedf512018-03-22 14:55:08 +000037using namespace arm_compute::misc::shape_calculator;
Michele Di Giorgio47a89902020-03-09 19:32:33 +000038using namespace arm_compute::utils::info_helpers;
Michalis Spyroubcedf512018-03-22 14:55:08 +000039
40CLLSTMLayer::CLLSTMLayer(std::shared_ptr<IMemoryManager> memory_manager)
Michele Di Giorgio39438b42019-06-04 12:41:45 +010041 : _memory_group(std::move(memory_manager)), _fully_connected_input_gate(), _accum_input_gate1(), _subtract_input_gate(), _pixelwise_mul_input_gate(), _activation_input_gate(),
Sang-Hoon Parkbef7fa22020-10-21 15:58:54 +010042 _fully_connected_forget_gate(), _accum_forget_gate1(), _pixelwise_mul_forget_gate(), _activation_forget_gate(), _fully_connected_cell_state(), _gemm_cell_state1(),
Teresa Charlin27886092021-02-25 20:15:01 +000043 _transpose_cell_state(std::make_unique<opencl::kernels::ClTransposeKernel>()), _accum_cell_state1(), _accum_cell_state2(), _pixelwise_mul_cell_state1(), _activation_cell_state(), _cell_clip(),
Sang-Hoon Parkbef7fa22020-10-21 15:58:54 +010044 _pixelwise_mul_cell_state2(), _fully_connected_output(), _pixelwise_mul_output_state1(), _accum_output1(), _activation_output(), _activation_output_state(), _pixelwise_mul_output_state2(),
Sheri Zhang7e20e292021-02-02 11:49:34 +000045 _fully_connected_output_state(), _projection_clip(), _copy_cell_state(), _copy_output(), _concat_scratch_buffer(), _concat_inputs_forget_gate(), _concat_weights_forget_gate(),
46 _concat_weights_input_gate(), _concat_weights_output(), _ones_fill(), _mean_std_norm_input_gate(), _pixelwise_mul_input_gate_coeff(), _accum_input_gate_bias(), _mean_std_norm_forget_gate(),
47 _pixelwise_mul_forget_gate_coeff(), _accum_forget_gate_bias(), _mean_std_norm_cell_gate(), _pixelwise_mul_cell_gate_coeff(), _accum_cell_gate_bias(), _mean_std_norm_output_gate(),
48 _pixelwise_mul_output_gate_coeff(), _accum_output_gate_bias(), _input_gate_out1(), _input_gate_out2(), _input_gate_out3(), _input_gate_out4(), _forget_gate_out1(), _forget_gate_out2(),
49 _forget_gate_out3(), _forget_gate_out4(), _forget_gate_out5(), _forget_gate_out6(), _cell_state_out1(), _cell_state_out2(), _cell_state_out3(), _cell_state_out4(), _cell_state_out5(), _output1(),
50 _output2(), _output3(), _output4(), _cell_state_activation(), _output_state1(), _ones(), _input_layer_norm_out1(), _input_layer_norm_out2(), _forget_layer_norm_out1(), _forget_layer_norm_out2(),
51 _cell_layer_norm_out1(), _cell_layer_norm_out2(), _output_layer_norm_out1(), _output_layer_norm_out2(), _run_peephole_opt(false), _run_cifg_opt(false), _perform_cell_clipping(false),
52 _has_projection_weights(false), _perform_projection_clipping(false), _is_prepared(false), _is_layer_norm_lstm(false)
Michalis Spyroubcedf512018-03-22 14:55:08 +000053{
54}
55
Sang-Hoon Parkbef7fa22020-10-21 15:58:54 +010056CLLSTMLayer::~CLLSTMLayer() = default;
57
Georgios Pinitas8bc745d2018-07-18 19:51:24 +010058void CLLSTMLayer::configure(const ICLTensor *input,
59 const ICLTensor *input_to_forget_weights, const ICLTensor *input_to_cell_weights, const ICLTensor *input_to_output_weights,
Michalis Spyroubcedf512018-03-22 14:55:08 +000060 const ICLTensor *recurrent_to_forget_weights, const ICLTensor *recurrent_to_cell_weights, const ICLTensor *recurrent_to_output_weights,
61 const ICLTensor *forget_gate_bias, const ICLTensor *cell_bias, const ICLTensor *output_gate_bias,
Michalis Spyrou1009e872020-07-27 12:48:34 +010062 const ICLTensor *output_state_in, ICLTensor *cell_state_in,
Georgios Pinitas8bc745d2018-07-18 19:51:24 +010063 ICLTensor *scratch_buffer, ICLTensor *output_state_out, ICLTensor *cell_state_out, ICLTensor *output,
64 const LSTMParams<ICLTensor> &lstm_params, const ActivationLayerInfo &activation_info, float cell_threshold, float projection_threshold)
Michalis Spyroubcedf512018-03-22 14:55:08 +000065{
Manuel Bottini2b84be52020-04-08 10:15:51 +010066 configure(CLKernelLibrary::get().get_compile_context(), input, input_to_forget_weights, input_to_cell_weights, input_to_output_weights, recurrent_to_forget_weights, recurrent_to_cell_weights,
67 recurrent_to_output_weights, forget_gate_bias, cell_bias, output_gate_bias, output_state_in, cell_state_in, scratch_buffer, output_state_out, cell_state_out, output, lstm_params, activation_info,
68 cell_threshold, projection_threshold);
69}
70
71void CLLSTMLayer::configure(const CLCompileContext &compile_context, const ICLTensor *input,
72 const ICLTensor *input_to_forget_weights, const ICLTensor *input_to_cell_weights, const ICLTensor *input_to_output_weights,
73 const ICLTensor *recurrent_to_forget_weights, const ICLTensor *recurrent_to_cell_weights, const ICLTensor *recurrent_to_output_weights,
74 const ICLTensor *forget_gate_bias, const ICLTensor *cell_bias, const ICLTensor *output_gate_bias,
Michalis Spyrou1009e872020-07-27 12:48:34 +010075 const ICLTensor *output_state_in, ICLTensor *cell_state_in,
Manuel Bottini2b84be52020-04-08 10:15:51 +010076 ICLTensor *scratch_buffer, ICLTensor *output_state_out, ICLTensor *cell_state_out, ICLTensor *output,
77 const LSTMParams<ICLTensor> &lstm_params, const ActivationLayerInfo &activation_info, float cell_threshold, float projection_threshold)
78{
Georgios Pinitas8bc745d2018-07-18 19:51:24 +010079 ARM_COMPUTE_ERROR_ON_NULLPTR(input,
80 input_to_forget_weights, input_to_cell_weights, input_to_output_weights,
81 recurrent_to_forget_weights, recurrent_to_cell_weights, recurrent_to_output_weights,
82 forget_gate_bias, cell_bias, output_gate_bias,
83 output_state_in, cell_state_in,
84 scratch_buffer, output_state_out, cell_state_out, output);
85
Michele Di Giorgio39438b42019-06-04 12:41:45 +010086 _is_layer_norm_lstm = lstm_params.use_layer_norm();
87
Georgios Pinitas8bc745d2018-07-18 19:51:24 +010088 // Set lstm parameters
Michele Di Giorgio47a89902020-03-09 19:32:33 +000089 LSTMParams<ITensorInfo> lstm_params_info{};
90 build_lstm_params_tensor_info(lstm_params, &lstm_params_info);
Georgios Pinitas8bc745d2018-07-18 19:51:24 +010091
92 // Validate
Michalis Spyroubcedf512018-03-22 14:55:08 +000093 ARM_COMPUTE_ERROR_THROW_ON(CLLSTMLayer::validate(input->info(), input_to_forget_weights->info(),
94 input_to_cell_weights->info(), input_to_output_weights->info(),
95 recurrent_to_forget_weights->info(), recurrent_to_cell_weights->info(), recurrent_to_output_weights->info(),
96 forget_gate_bias->info(), cell_bias->info(), output_gate_bias->info(),
Georgios Pinitas8bc745d2018-07-18 19:51:24 +010097 output_state_in->info(), cell_state_in->info(),
98 scratch_buffer->info(), output_state_out->info(), cell_state_out->info(), output->info(),
99 lstm_params_info, activation_info, cell_threshold, projection_threshold));
Michalis Spyroubcedf512018-03-22 14:55:08 +0000100
Georgios Pinitas8bc745d2018-07-18 19:51:24 +0100101 const TensorShape cell_state_shape = cell_state_in->info()->tensor_shape();
Georgios Pinitas8bc745d2018-07-18 19:51:24 +0100102 // Configure block that calculates the forget gate
103 // forget_gate = Activation(input * input_to_forget_weights + output_state_in * recurrent_to_forget_weights + PixelWiseMul(cell_state, cell_to_forget_weights) + forget_gate_bias)
John Kesapidescafec8f2019-02-19 15:53:59 +0000104 // We optimize this as follows:
105 // forget_gate = Activation( (input,output_state_in) * (input_to_forget_weights,recurrent_to_forget_weights) + PixelWiseMul(cell_state, cell_to_forget_weights) + forget_gate_bias
Michalis Spyroubcedf512018-03-22 14:55:08 +0000106 _forget_gate_out1.allocator()->init(TensorInfo(cell_state_shape, 1, input->info()->data_type()));
Michalis Spyroubcedf512018-03-22 14:55:08 +0000107 _forget_gate_out3.allocator()->init(TensorInfo(cell_state_shape, 1, input->info()->data_type()));
Georgios Pinitas42a31722018-07-09 14:35:32 +0100108 _forget_gate_out5.allocator()->init(TensorInfo(cell_state_shape, 1, input->info()->data_type()));
Michalis Spyroubcedf512018-03-22 14:55:08 +0000109
John Kesapidescafec8f2019-02-19 15:53:59 +0000110 std::vector<const ICLTensor *> inputs_vector;
111 inputs_vector.emplace_back(input);
112 inputs_vector.emplace_back(output_state_in);
Georgios Pinitasdbfc2dc2019-04-02 12:51:21 +0100113 const TensorShape concat_shape = arm_compute::misc::shape_calculator::calculate_concatenate_shape(inputs_vector, 0);
John Kesapidescafec8f2019-02-19 15:53:59 +0000114 _forget_gate_out2.allocator()->init(TensorInfo(concat_shape, 1, input->info()->data_type()));
115
Michalis Spyroubcedf512018-03-22 14:55:08 +0000116 _memory_group.manage(&_forget_gate_out2);
Michele Di Giorgiof932d2c2020-07-06 11:27:21 +0100117 _concat_inputs_forget_gate.configure(compile_context, inputs_vector, &_forget_gate_out2, Window::DimX);
John Kesapidescafec8f2019-02-19 15:53:59 +0000118
119 std::vector<const ICLTensor *> weights_vector;
120
121 weights_vector.emplace_back(input_to_forget_weights);
122 weights_vector.emplace_back(recurrent_to_forget_weights);
Georgios Pinitasdbfc2dc2019-04-02 12:51:21 +0100123 const TensorShape weights_concat_shape = arm_compute::misc::shape_calculator::calculate_concatenate_shape(weights_vector, 0);
John Kesapidescafec8f2019-02-19 15:53:59 +0000124 _forget_gate_out6.allocator()->init(TensorInfo(weights_concat_shape, 1, input->info()->data_type()));
125
Michele Di Giorgiof932d2c2020-07-06 11:27:21 +0100126 _concat_weights_forget_gate.configure(compile_context, weights_vector, &_forget_gate_out6, Window::DimX);
John Kesapidescafec8f2019-02-19 15:53:59 +0000127
Georgios Pinitas42a31722018-07-09 14:35:32 +0100128 _memory_group.manage(&_forget_gate_out5);
Manuel Bottini2b84be52020-04-08 10:15:51 +0100129 _fully_connected_forget_gate.configure(compile_context, &_forget_gate_out2, &_forget_gate_out6, (_is_layer_norm_lstm) ? nullptr : forget_gate_bias, &_forget_gate_out5);
John Kesapidescafec8f2019-02-19 15:53:59 +0000130 _memory_group.manage(&_forget_gate_out1);
131 _memory_group.manage(&_forget_gate_out3);
132 _forget_gate_out6.allocator()->allocate();
133
Georgios Pinitas42a31722018-07-09 14:35:32 +0100134 CLTensor *forget_gate_out = &_forget_gate_out5;
Michalis Spyroubcedf512018-03-22 14:55:08 +0000135 if(lstm_params.has_peephole_opt())
136 {
Georgios Pinitas42a31722018-07-09 14:35:32 +0100137 _forget_gate_out4.allocator()->init(TensorInfo(cell_state_shape, 1, input->info()->data_type()));
Michalis Spyroubcedf512018-03-22 14:55:08 +0000138
139 _run_peephole_opt = true;
140 _memory_group.manage(&_forget_gate_out4);
Manuel Bottini2b84be52020-04-08 10:15:51 +0100141 _pixelwise_mul_forget_gate.configure(compile_context, cell_state_in, lstm_params.cell_to_forget_weights(), &_forget_gate_out4, 1, ConvertPolicy::SATURATE, RoundingPolicy::TO_NEAREST_EVEN);
142 _accum_forget_gate1.configure(compile_context, &_forget_gate_out5, &_forget_gate_out4, &_forget_gate_out3, ConvertPolicy::SATURATE);
Michalis Spyroubcedf512018-03-22 14:55:08 +0000143 _forget_gate_out4.allocator()->allocate();
Michalis Spyroubcedf512018-03-22 14:55:08 +0000144 _forget_gate_out5.allocator()->allocate();
Michalis Spyroubcedf512018-03-22 14:55:08 +0000145 forget_gate_out = &_forget_gate_out3;
146 }
147 else
148 {
149 _forget_gate_out3.allocator()->allocate();
150 }
Michele Di Giorgio39438b42019-06-04 12:41:45 +0100151 if(_is_layer_norm_lstm)
152 {
153 _forget_layer_norm_out1.allocator()->init(TensorInfo(cell_state_shape, 1, input->info()->data_type()));
154 _forget_layer_norm_out2.allocator()->init(TensorInfo(cell_state_shape, 1, input->info()->data_type()));
155 _memory_group.manage(&_forget_layer_norm_out1);
156 _memory_group.manage(&_forget_layer_norm_out2);
Manuel Bottini2b84be52020-04-08 10:15:51 +0100157 _mean_std_norm_forget_gate.configure(compile_context, forget_gate_out);
158 _pixelwise_mul_forget_gate_coeff.configure(compile_context, forget_gate_out, lstm_params.forget_layer_norm_weights(), &_forget_layer_norm_out1, 1, ConvertPolicy::SATURATE,
159 RoundingPolicy::TO_NEAREST_EVEN);
Michele Di Giorgio39438b42019-06-04 12:41:45 +0100160 // forget_gate_out is going to be reassigned, so allocate the tensor that it was assigned to before
161 forget_gate_out->allocator()->allocate();
Michalis Spyrouad7515d2020-07-24 00:02:23 +0100162 _accum_forget_gate_bias.configure(compile_context, &_forget_layer_norm_out1, forget_gate_bias, &_forget_layer_norm_out2, ConvertPolicy::SATURATE);
Michele Di Giorgio39438b42019-06-04 12:41:45 +0100163 _forget_layer_norm_out1.allocator()->allocate();
164 forget_gate_out = &_forget_layer_norm_out2;
165 }
Manuel Bottini2b84be52020-04-08 10:15:51 +0100166 _activation_forget_gate.configure(compile_context, forget_gate_out, nullptr, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LOGISTIC));
Michalis Spyroubcedf512018-03-22 14:55:08 +0000167
Michalis Spyroubcedf512018-03-22 14:55:08 +0000168 // Configure block that calculates the input gate
Georgios Pinitas42a31722018-07-09 14:35:32 +0100169 // input_gate = Activation(input * input_to_input_weights + output_state * recurrent_to_input_weights + PixelWiseMul(cell_state, cell_to_input_weights) + input_gate_bias), without CIFG
Michalis Spyroubcedf512018-03-22 14:55:08 +0000170 // input_gate = 1 - forget_gate, with CIFG
John Kesapidescafec8f2019-02-19 15:53:59 +0000171 // We optimize this as follows:
172 // input_gate = Activation((input,output_state) * (input_to_input_weights,recurrent_to_input_weights) + PixelWiseMul(cell_state, cell_to_input_weights) + input_gate_bias), without CIFG
Georgios Pinitas8bc745d2018-07-18 19:51:24 +0100173 _input_gate_out1.allocator()->init(TensorInfo(cell_state_shape, 1, input->info()->data_type()));
Georgios Pinitas4f859822019-02-06 18:08:04 +0000174 CLTensor *input_gate_out = &_input_gate_out1;
Michalis Spyroubcedf512018-03-22 14:55:08 +0000175 if(lstm_params.has_cifg_opt())
176 {
177 _memory_group.manage(&_input_gate_out1);
178 _ones.allocator()->init(TensorInfo(cell_state_shape, 1, input->info()->data_type()));
Sheri Zhang7e20e292021-02-02 11:49:34 +0000179 _ones_fill.configure(compile_context, &_ones, PixelValue(1, _ones.info()->data_type()));
Michalis Spyrouad7515d2020-07-24 00:02:23 +0100180 _subtract_input_gate.configure(compile_context, &_ones, forget_gate_out, &_input_gate_out1, ConvertPolicy::SATURATE);
Michalis Spyroubcedf512018-03-22 14:55:08 +0000181 _ones.allocator()->allocate();
182 _run_cifg_opt = true;
183 }
184 else
185 {
Michalis Spyroubcedf512018-03-22 14:55:08 +0000186 _input_gate_out3.allocator()->init(TensorInfo(cell_state_shape, 1, input->info()->data_type()));
Georgios Pinitas42a31722018-07-09 14:35:32 +0100187 _input_gate_out4.allocator()->init(TensorInfo(cell_state_shape, 1, input->info()->data_type()));
John Kesapidescafec8f2019-02-19 15:53:59 +0000188
189 std::vector<const ICLTensor *> lstm_weights;
190 lstm_weights.emplace_back(lstm_params.input_to_input_weights());
191 lstm_weights.emplace_back(lstm_params.recurrent_to_input_weights());
Georgios Pinitasdbfc2dc2019-04-02 12:51:21 +0100192 TensorShape lstm_weights_concat_shape = arm_compute::misc::shape_calculator::calculate_concatenate_shape(lstm_weights, 0);
John Kesapidescafec8f2019-02-19 15:53:59 +0000193 _input_gate_out2.allocator()->init(TensorInfo(lstm_weights_concat_shape, 1, input->info()->data_type()));
194
Michele Di Giorgiof932d2c2020-07-06 11:27:21 +0100195 _concat_weights_input_gate.configure(compile_context, lstm_weights, &_input_gate_out2, Window::DimX);
Michalis Spyroubcedf512018-03-22 14:55:08 +0000196
197 _memory_group.manage(&_input_gate_out1);
John Kesapidescafec8f2019-02-19 15:53:59 +0000198
Michalis Spyroubcedf512018-03-22 14:55:08 +0000199 _memory_group.manage(&_input_gate_out3);
Manuel Bottini2b84be52020-04-08 10:15:51 +0100200 _fully_connected_input_gate.configure(compile_context, &_forget_gate_out2, &_input_gate_out2, (_is_layer_norm_lstm) ? nullptr : lstm_params.input_gate_bias(), &_input_gate_out3);
Michalis Spyroubcedf512018-03-22 14:55:08 +0000201 _input_gate_out2.allocator()->allocate();
John Kesapidescafec8f2019-02-19 15:53:59 +0000202
203 input_gate_out = &_input_gate_out3;
Georgios Pinitas8bc745d2018-07-18 19:51:24 +0100204 if(_run_peephole_opt)
205 {
John Kesapidescafec8f2019-02-19 15:53:59 +0000206 _memory_group.manage(&_input_gate_out4);
Manuel Bottini2b84be52020-04-08 10:15:51 +0100207 _pixelwise_mul_input_gate.configure(compile_context, cell_state_in, lstm_params.cell_to_input_weights(), &_input_gate_out4, 1, ConvertPolicy::SATURATE, RoundingPolicy::TO_NEAREST_EVEN);
208 _accum_input_gate1.configure(compile_context, &_input_gate_out3, &_input_gate_out4, &_input_gate_out1, ConvertPolicy::SATURATE);
John Kesapidescafec8f2019-02-19 15:53:59 +0000209 _input_gate_out3.allocator()->allocate();
Georgios Pinitas4f859822019-02-06 18:08:04 +0000210 _input_gate_out4.allocator()->allocate();
Georgios Pinitas4f859822019-02-06 18:08:04 +0000211 input_gate_out = &_input_gate_out1;
Georgios Pinitas8bc745d2018-07-18 19:51:24 +0100212 }
Georgios Pinitas4f859822019-02-06 18:08:04 +0000213 else
214 {
215 _input_gate_out1.allocator()->allocate();
216 }
Michele Di Giorgio39438b42019-06-04 12:41:45 +0100217
218 if(_is_layer_norm_lstm)
219 {
220 _input_layer_norm_out1.allocator()->init(TensorInfo(cell_state_shape, 1, input->info()->data_type()));
221 _input_layer_norm_out2.allocator()->init(TensorInfo(cell_state_shape, 1, input->info()->data_type()));
222 _memory_group.manage(&_input_layer_norm_out1);
223 _memory_group.manage(&_input_layer_norm_out2);
Manuel Bottini2b84be52020-04-08 10:15:51 +0100224 _mean_std_norm_input_gate.configure(compile_context, input_gate_out);
225 _pixelwise_mul_input_gate_coeff.configure(compile_context, input_gate_out, lstm_params.input_layer_norm_weights(), &_input_layer_norm_out1, 1, ConvertPolicy::SATURATE,
226 RoundingPolicy::TO_NEAREST_EVEN);
Michele Di Giorgio39438b42019-06-04 12:41:45 +0100227 // input_gate_out is going to be reassigned, so allocate the tensor that it was assigned to before
228 input_gate_out->allocator()->allocate();
Michalis Spyrouad7515d2020-07-24 00:02:23 +0100229 _accum_input_gate_bias.configure(compile_context, &_input_layer_norm_out1, lstm_params.input_gate_bias(), &_input_layer_norm_out2, ConvertPolicy::SATURATE);
Michele Di Giorgio39438b42019-06-04 12:41:45 +0100230 _input_layer_norm_out1.allocator()->allocate();
231 input_gate_out = &_input_layer_norm_out2;
232 }
Manuel Bottini2b84be52020-04-08 10:15:51 +0100233 _activation_input_gate.configure(compile_context, input_gate_out, nullptr, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LOGISTIC));
Michalis Spyroubcedf512018-03-22 14:55:08 +0000234 }
235
Georgios Pinitas8bc745d2018-07-18 19:51:24 +0100236 // Configure block that calculates the cell state
237 // cell_state = Clip((PixelwiseMul(input_gate, Activation(input * input_to_cell_weights + output_state_in * recurrent_to_cell_weights + cell_bias)) + PixelwiseMul(forget_gate, cell_state)), cell_threshold)
Michalis Spyroubcedf512018-03-22 14:55:08 +0000238 TensorShape cell_state1_shape = compute_transposed_shape(*recurrent_to_output_weights->info());
239 _cell_state_out1.allocator()->init(TensorInfo(cell_state_shape, 1, input->info()->data_type()));
240 _cell_state_out2.allocator()->init(TensorInfo(cell_state1_shape, 1, input->info()->data_type()));
241 _cell_state_out3.allocator()->init(TensorInfo(cell_state_shape, 1, input->info()->data_type()));
242 _cell_state_out4.allocator()->init(TensorInfo(cell_state_shape, 1, input->info()->data_type()));
243 _cell_state_out5.allocator()->init(TensorInfo(cell_state_shape, 1, input->info()->data_type()));
244
Michalis Spyroubcedf512018-03-22 14:55:08 +0000245 _memory_group.manage(&_cell_state_out1);
Manuel Bottini2b84be52020-04-08 10:15:51 +0100246 _fully_connected_cell_state.configure(compile_context, input, input_to_cell_weights, (_is_layer_norm_lstm) ? nullptr : cell_bias, &_cell_state_out1);
Michalis Spyroubcedf512018-03-22 14:55:08 +0000247 _memory_group.manage(&_cell_state_out2);
Teresa Charlin27886092021-02-25 20:15:01 +0000248 _transpose_cell_state->configure(compile_context, recurrent_to_cell_weights->info(), _cell_state_out2.info());
249 _recurrent_to_cell_weights = recurrent_to_cell_weights;
Michalis Spyroubcedf512018-03-22 14:55:08 +0000250 _memory_group.manage(&_cell_state_out3);
Manuel Bottini2b84be52020-04-08 10:15:51 +0100251 _gemm_cell_state1.configure(compile_context, output_state_in, &_cell_state_out2, nullptr, &_cell_state_out3, 1.f, 0.f);
Michalis Spyroubcedf512018-03-22 14:55:08 +0000252 _cell_state_out2.allocator()->allocate();
253 _memory_group.manage(&_cell_state_out4);
Michalis Spyrouad7515d2020-07-24 00:02:23 +0100254 _accum_cell_state1.configure(compile_context, &_cell_state_out1, &_cell_state_out3, &_cell_state_out4, ConvertPolicy::SATURATE);
Michele Di Giorgio39438b42019-06-04 12:41:45 +0100255 CLTensor *cell_state_out_ptr = &_cell_state_out4;
256 if(_is_layer_norm_lstm)
257 {
258 _cell_layer_norm_out1.allocator()->init(TensorInfo(cell_state_shape, 1, input->info()->data_type()));
259 _cell_layer_norm_out2.allocator()->init(TensorInfo(cell_state_shape, 1, input->info()->data_type()));
260 _memory_group.manage(&_cell_layer_norm_out1);
261 _memory_group.manage(&_cell_layer_norm_out2);
Manuel Bottini2b84be52020-04-08 10:15:51 +0100262 _mean_std_norm_cell_gate.configure(compile_context, cell_state_out_ptr);
263 _pixelwise_mul_cell_gate_coeff.configure(compile_context, cell_state_out_ptr, lstm_params.cell_layer_norm_weights(), &_cell_layer_norm_out1, 1, ConvertPolicy::SATURATE,
264 RoundingPolicy::TO_NEAREST_EVEN);
Michele Di Giorgio39438b42019-06-04 12:41:45 +0100265 // cell_state_out_ptr is going to be reassigned, so allocate the tensor that it was assigned to before
266 cell_state_out_ptr->allocator()->allocate();
Michalis Spyrouad7515d2020-07-24 00:02:23 +0100267 _accum_cell_gate_bias.configure(compile_context, &_cell_layer_norm_out1, cell_bias, &_cell_layer_norm_out2, ConvertPolicy::SATURATE);
Michele Di Giorgio39438b42019-06-04 12:41:45 +0100268 _cell_layer_norm_out1.allocator()->allocate();
269 cell_state_out_ptr = &_cell_layer_norm_out2;
270 }
Manuel Bottini2b84be52020-04-08 10:15:51 +0100271 _activation_cell_state.configure(compile_context, cell_state_out_ptr, nullptr, activation_info);
Michalis Spyroubcedf512018-03-22 14:55:08 +0000272 _memory_group.manage(&_cell_state_out5);
Manuel Bottini2b84be52020-04-08 10:15:51 +0100273 _pixelwise_mul_cell_state1.configure(compile_context, cell_state_out_ptr, input_gate_out, &_cell_state_out5, 1, ConvertPolicy::SATURATE, RoundingPolicy::TO_NEAREST_EVEN);
Michele Di Giorgio39438b42019-06-04 12:41:45 +0100274 cell_state_out_ptr->allocator()->allocate();
Manuel Bottini2b84be52020-04-08 10:15:51 +0100275 _pixelwise_mul_cell_state2.configure(compile_context, forget_gate_out, cell_state_in, &_cell_state_out3, 1, ConvertPolicy::SATURATE, RoundingPolicy::TO_NEAREST_EVEN);
Michalis Spyrouad7515d2020-07-24 00:02:23 +0100276 _accum_cell_state2.configure(compile_context, &_cell_state_out5, &_cell_state_out3, &_cell_state_out1, ConvertPolicy::SATURATE);
Michalis Spyroubcedf512018-03-22 14:55:08 +0000277 _cell_state_out3.allocator()->allocate();
278 _cell_state_out5.allocator()->allocate();
Michalis Spyroubcedf512018-03-22 14:55:08 +0000279 // Perform clipping
280 if(cell_threshold != 0.f)
281 {
282 _perform_cell_clipping = true;
Manuel Bottini2b84be52020-04-08 10:15:51 +0100283 _cell_clip.configure(compile_context, &_cell_state_out1, nullptr, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, -cell_threshold, cell_threshold));
Michalis Spyroubcedf512018-03-22 14:55:08 +0000284 }
285
Georgios Pinitas8bc745d2018-07-18 19:51:24 +0100286 // Configure block that calculates the output
287 // output_state_out = Activation(input * input_to_output_weights + output_state_in * recurrent_to_output_weights + PixelWiseMul(cell_state, cell_to_output_weights) + output_gate_bias)
John Kesapidescafec8f2019-02-19 15:53:59 +0000288 // We optimize this as follows:
289 // output_state_out = Activation( (input,output_state_in) * (input_to_output_weights, recurrent_to_output_weights) + PixelWiseMul(cell_state, cell_to_output_weights) + output_gate_bias)
Michalis Spyroubcedf512018-03-22 14:55:08 +0000290 _output1.allocator()->init(TensorInfo(cell_state_shape, 1, input->info()->data_type()));
John Kesapidescafec8f2019-02-19 15:53:59 +0000291 _output4.allocator()->init(TensorInfo(cell_state_shape, 1, input->info()->data_type()));
292 std::vector<const ICLTensor *> in_out_weights;
293 in_out_weights.emplace_back(input_to_output_weights);
294 in_out_weights.emplace_back(recurrent_to_output_weights);
Georgios Pinitasdbfc2dc2019-04-02 12:51:21 +0100295 TensorShape in_out_weights_concat_shape = arm_compute::misc::shape_calculator::calculate_concatenate_shape(in_out_weights, 0);
John Kesapidescafec8f2019-02-19 15:53:59 +0000296 _output2.allocator()->init(TensorInfo(in_out_weights_concat_shape, 1, input->info()->data_type()));
297
Michele Di Giorgiof932d2c2020-07-06 11:27:21 +0100298 _concat_weights_output.configure(compile_context, in_out_weights, &_output2, Window::DimX);
Michalis Spyroubcedf512018-03-22 14:55:08 +0000299
Michalis Spyroubcedf512018-03-22 14:55:08 +0000300 _memory_group.manage(&_output1);
John Kesapidescafec8f2019-02-19 15:53:59 +0000301 _memory_group.manage(&_output4);
302
Manuel Bottini2b84be52020-04-08 10:15:51 +0100303 _fully_connected_output.configure(compile_context, &_forget_gate_out2, &_output2, (_is_layer_norm_lstm) ? nullptr : output_gate_bias, &_output4);
John Kesapidescafec8f2019-02-19 15:53:59 +0000304
Michalis Spyroubcedf512018-03-22 14:55:08 +0000305 _output2.allocator()->allocate();
John Kesapidescafec8f2019-02-19 15:53:59 +0000306 _forget_gate_out2.allocator()->allocate();
307
308 CLTensor *output_gate_out = &_output4;
Michalis Spyroubcedf512018-03-22 14:55:08 +0000309 if(lstm_params.has_peephole_opt())
310 {
John Kesapidescafec8f2019-02-19 15:53:59 +0000311 _output3.allocator()->init(TensorInfo(_cell_state_out1.info()->tensor_shape(), 1, input->info()->data_type()));
Michalis Spyroubcedf512018-03-22 14:55:08 +0000312
John Kesapidescafec8f2019-02-19 15:53:59 +0000313 _memory_group.manage(&_output3);
Manuel Bottini2b84be52020-04-08 10:15:51 +0100314 _pixelwise_mul_output_state1.configure(compile_context, &_cell_state_out1, lstm_params.cell_to_output_weights(), &_output3, 1, ConvertPolicy::SATURATE, RoundingPolicy::TO_NEAREST_EVEN);
315 _accum_output1.configure(compile_context, &_output4, &_output3, &_output1, ConvertPolicy::SATURATE);
John Kesapidescafec8f2019-02-19 15:53:59 +0000316 _output4.allocator()->allocate();
Michalis Spyroubcedf512018-03-22 14:55:08 +0000317 output_gate_out = &_output1;
318
319 // Allocate intermediate buffers
John Kesapidescafec8f2019-02-19 15:53:59 +0000320 _output3.allocator()->allocate();
Michalis Spyroubcedf512018-03-22 14:55:08 +0000321 }
322 else
323 {
324 _output1.allocator()->allocate();
325 }
Michele Di Giorgio39438b42019-06-04 12:41:45 +0100326 if(_is_layer_norm_lstm)
327 {
328 _output_layer_norm_out1.allocator()->init(TensorInfo(cell_state_shape, 1, input->info()->data_type()));
329 _output_layer_norm_out2.allocator()->init(TensorInfo(cell_state_shape, 1, input->info()->data_type()));
330 _memory_group.manage(&_output_layer_norm_out1);
331 _memory_group.manage(&_output_layer_norm_out2);
Manuel Bottini2b84be52020-04-08 10:15:51 +0100332 _mean_std_norm_output_gate.configure(compile_context, output_gate_out);
333 _pixelwise_mul_output_gate_coeff.configure(compile_context, output_gate_out, lstm_params.output_layer_norm_weights(), &_output_layer_norm_out1, 1, ConvertPolicy::SATURATE,
334 RoundingPolicy::TO_NEAREST_EVEN);
Michele Di Giorgio39438b42019-06-04 12:41:45 +0100335 // output_gate_out is going to be reassigned, so allocate the tensor that it was assigned to before
336 output_gate_out->allocator()->allocate();
Michalis Spyrouad7515d2020-07-24 00:02:23 +0100337 _accum_output_gate_bias.configure(compile_context, &_output_layer_norm_out1, output_gate_bias, &_output_layer_norm_out2, ConvertPolicy::SATURATE);
Michele Di Giorgio39438b42019-06-04 12:41:45 +0100338 _output_layer_norm_out1.allocator()->allocate();
339 output_gate_out = &_output_layer_norm_out2;
340 }
Manuel Bottini2b84be52020-04-08 10:15:51 +0100341 _activation_output.configure(compile_context, output_gate_out, nullptr, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LOGISTIC));
Michalis Spyroubcedf512018-03-22 14:55:08 +0000342
Michalis Spyroubcedf512018-03-22 14:55:08 +0000343 // Configure block that calculates the output state
344 /** lstm_res = PixelwiseMul(output, Activation(cell_state))
345 *
346 * -- Clip(lstm_res * projection_weights + projection_bias, projection_threshold) , if there is a projection
347 * /
348 * output_state = --
349 * \
350 * -- lstm_res , otherwise
351 */
Georgios Pinitas8bc745d2018-07-18 19:51:24 +0100352 ICLTensor *output_state_out_tmp = lstm_params.has_projection() ? &_output_state1 : output_state_out;
353 _cell_state_activation.allocator()->init(TensorInfo(cell_state_shape, 1, input->info()->data_type()));
354 _output_state1.allocator()->init(TensorInfo(cell_state_shape, 1, input->info()->data_type()));
355
Michalis Spyroubcedf512018-03-22 14:55:08 +0000356 _memory_group.manage(&_cell_state_activation);
Manuel Bottini2b84be52020-04-08 10:15:51 +0100357 _activation_output_state.configure(compile_context, &_cell_state_out1, &_cell_state_activation, activation_info);
358 _pixelwise_mul_output_state2.configure(compile_context, &_cell_state_activation, output_gate_out, output_state_out_tmp, 1, ConvertPolicy::SATURATE, RoundingPolicy::TO_NEAREST_EVEN);
Michalis Spyroubcedf512018-03-22 14:55:08 +0000359 _cell_state_activation.allocator()->allocate();
360
361 if(lstm_params.has_projection())
362 {
363 _has_projection_weights = true;
Manuel Bottini2b84be52020-04-08 10:15:51 +0100364 _fully_connected_output_state.configure(compile_context, output_state_out_tmp, lstm_params.projection_weights(), lstm_params.projection_bias(), output_state_out);
Georgios Pinitas8bc745d2018-07-18 19:51:24 +0100365 _output_state1.allocator()->allocate();
Michalis Spyroubcedf512018-03-22 14:55:08 +0000366 // Perform clipping
367 if(projection_threshold != 0.f)
368 {
369 _perform_projection_clipping = true;
Manuel Bottini2b84be52020-04-08 10:15:51 +0100370 _projection_clip.configure(compile_context, output_state_out, nullptr, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, -projection_threshold, projection_threshold));
Michalis Spyroubcedf512018-03-22 14:55:08 +0000371 }
Michalis Spyroubcedf512018-03-22 14:55:08 +0000372 }
373
374 // Copy cell state and output
Sheri Zhang7e20e292021-02-02 11:49:34 +0000375 _copy_cell_state.configure(compile_context, &_cell_state_out1, cell_state_out);
376 _copy_output.configure(compile_context, output_state_out, output);
Michalis Spyroubcedf512018-03-22 14:55:08 +0000377
378 // Vector for holding the tensors to store in scratch buffer
Michele Di Giorgiof932d2c2020-07-06 11:27:21 +0100379 std::vector<const ICLTensor *> scratch_inputs;
Georgios Pinitas0cc37c32018-11-14 15:54:26 +0000380 if(!lstm_params.has_cifg_opt())
Michalis Spyroubcedf512018-03-22 14:55:08 +0000381 {
Georgios Pinitas4f859822019-02-06 18:08:04 +0000382 scratch_inputs.emplace_back(input_gate_out);
Michalis Spyroubcedf512018-03-22 14:55:08 +0000383 }
384 scratch_inputs.emplace_back(&_cell_state_out1);
385 scratch_inputs.emplace_back(forget_gate_out);
386 scratch_inputs.emplace_back(output_gate_out);
Manuel Bottini2b84be52020-04-08 10:15:51 +0100387 _concat_scratch_buffer.configure(compile_context, scratch_inputs, scratch_buffer, Window::DimX);
Georgios Pinitas4f859822019-02-06 18:08:04 +0000388 input_gate_out->allocator()->allocate();
Michele Di Giorgiodd2619a2018-11-05 16:46:09 +0000389 _cell_state_out1.allocator()->allocate();
390 forget_gate_out->allocator()->allocate();
391 output_gate_out->allocator()->allocate();
Michalis Spyroubcedf512018-03-22 14:55:08 +0000392}
393
Georgios Pinitas8bc745d2018-07-18 19:51:24 +0100394Status CLLSTMLayer::validate(const ITensorInfo *input,
395 const ITensorInfo *input_to_forget_weights, const ITensorInfo *input_to_cell_weights, const ITensorInfo *input_to_output_weights,
Michalis Spyroubcedf512018-03-22 14:55:08 +0000396 const ITensorInfo *recurrent_to_forget_weights, const ITensorInfo *recurrent_to_cell_weights, const ITensorInfo *recurrent_to_output_weights,
397 const ITensorInfo *forget_gate_bias, const ITensorInfo *cell_bias, const ITensorInfo *output_gate_bias,
Georgios Pinitas8bc745d2018-07-18 19:51:24 +0100398 const ITensorInfo *output_state_in, const ITensorInfo *cell_state_in,
399 const ITensorInfo *scratch_buffer, const ITensorInfo *output_state_out, const ITensorInfo *cell_state_out, const ITensorInfo *output,
Michalis Spyroubcedf512018-03-22 14:55:08 +0000400 const LSTMParams<ITensorInfo> &lstm_params, const ActivationLayerInfo &activation_info, float cell_threshold, float projection_threshold)
401{
Georgios Pinitas8bc745d2018-07-18 19:51:24 +0100402 ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input,
403 input_to_forget_weights, input_to_cell_weights, input_to_output_weights,
404 recurrent_to_forget_weights, recurrent_to_cell_weights, recurrent_to_output_weights,
405 forget_gate_bias, cell_bias, output_gate_bias,
406 output_state_in, cell_state_in,
407 scratch_buffer, output_state_out, cell_state_out, output);
408
409 // Check data types
Michalis Spyroubcedf512018-03-22 14:55:08 +0000410 ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16, DataType::F32);
Georgios Pinitas8bc745d2018-07-18 19:51:24 +0100411 ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input,
412 input_to_forget_weights, input_to_cell_weights, input_to_output_weights,
413 recurrent_to_forget_weights, recurrent_to_cell_weights, recurrent_to_output_weights,
414 forget_gate_bias, cell_bias, output_gate_bias,
415 output_state_in, cell_state_in,
416 scratch_buffer, output_state_out, cell_state_out, output);
417
418 // Check dimensions
Georgios Pinitas42447c12018-07-16 17:01:20 +0100419 ARM_COMPUTE_RETURN_ERROR_ON(input->num_dimensions() > 2);
420 ARM_COMPUTE_RETURN_ERROR_ON(input_to_forget_weights->num_dimensions() > 2);
421 ARM_COMPUTE_RETURN_ERROR_ON(input_to_cell_weights->num_dimensions() > 2);
422 ARM_COMPUTE_RETURN_ERROR_ON(input_to_output_weights->num_dimensions() > 2);
423 ARM_COMPUTE_RETURN_ERROR_ON(recurrent_to_forget_weights->num_dimensions() > 2);
424 ARM_COMPUTE_RETURN_ERROR_ON(recurrent_to_cell_weights->num_dimensions() > 2);
425 ARM_COMPUTE_RETURN_ERROR_ON(recurrent_to_output_weights->num_dimensions() > 2);
426 ARM_COMPUTE_RETURN_ERROR_ON(forget_gate_bias->num_dimensions() > 1);
427 ARM_COMPUTE_RETURN_ERROR_ON(cell_bias->num_dimensions() > 1);
428 ARM_COMPUTE_RETURN_ERROR_ON(output_gate_bias->num_dimensions() > 1);
Georgios Pinitas8bc745d2018-07-18 19:51:24 +0100429 ARM_COMPUTE_RETURN_ERROR_ON(output_state_in->num_dimensions() > 2);
430 ARM_COMPUTE_RETURN_ERROR_ON(cell_state_in->num_dimensions() > 2);
Georgios Pinitas42447c12018-07-16 17:01:20 +0100431 ARM_COMPUTE_RETURN_ERROR_ON(scratch_buffer->num_dimensions() > 2);
Georgios Pinitas8bc745d2018-07-18 19:51:24 +0100432 ARM_COMPUTE_RETURN_ERROR_ON(output_state_out->num_dimensions() > 2);
433 ARM_COMPUTE_RETURN_ERROR_ON(cell_state_out->num_dimensions() > 2);
Georgios Pinitas42447c12018-07-16 17:01:20 +0100434 ARM_COMPUTE_RETURN_ERROR_ON(output->num_dimensions() > 2);
Georgios Pinitas8bc745d2018-07-18 19:51:24 +0100435 ARM_COMPUTE_RETURN_ERROR_ON(cell_bias->dimension(0) * 4 != scratch_buffer->dimension(0)
436 && cell_bias->dimension(0) * 3 != scratch_buffer->dimension(0));
Michalis Spyroubcedf512018-03-22 14:55:08 +0000437
Georgios Pinitas8bc745d2018-07-18 19:51:24 +0100438 const unsigned int num_batches = input->dimension(1);
439 const unsigned int num_cells = input_to_output_weights->dimension(1);
440
Michele Di Giorgio39438b42019-06-04 12:41:45 +0100441 if(lstm_params.use_layer_norm())
442 {
443 // If CIFG is used, input layer normalization weights tensor is omitted
444 if(lstm_params.has_cifg_opt())
445 {
446 ARM_COMPUTE_RETURN_ERROR_ON(lstm_params.input_layer_norm_weights() != nullptr);
447 }
448 else
449 {
450 ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(lstm_params.input_layer_norm_weights());
451 ARM_COMPUTE_RETURN_ERROR_ON(lstm_params.input_layer_norm_weights()->num_dimensions() > 1);
Sang-Hoon Parkd5c020a2020-05-06 21:01:19 +0100452 ARM_COMPUTE_RETURN_ERROR_ON(lstm_params.input_layer_norm_weights()->dimension(0) != num_cells);
Michele Di Giorgio39438b42019-06-04 12:41:45 +0100453 ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, lstm_params.input_layer_norm_weights());
454 }
455
456 ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(lstm_params.forget_layer_norm_weights(), lstm_params.cell_layer_norm_weights(), lstm_params.output_layer_norm_weights());
457 ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, lstm_params.forget_layer_norm_weights(), lstm_params.cell_layer_norm_weights(), lstm_params.output_layer_norm_weights());
458 ARM_COMPUTE_RETURN_ERROR_ON(lstm_params.forget_layer_norm_weights()->num_dimensions() > 1);
459 ARM_COMPUTE_RETURN_ERROR_ON(lstm_params.cell_layer_norm_weights()->num_dimensions() > 1);
460 ARM_COMPUTE_RETURN_ERROR_ON(lstm_params.output_layer_norm_weights()->num_dimensions() > 1);
Sang-Hoon Parkd5c020a2020-05-06 21:01:19 +0100461 ARM_COMPUTE_RETURN_ERROR_ON(lstm_params.forget_layer_norm_weights()->dimension(0) != num_cells);
462 ARM_COMPUTE_RETURN_ERROR_ON(lstm_params.cell_layer_norm_weights()->dimension(0) != num_cells);
463 ARM_COMPUTE_RETURN_ERROR_ON(lstm_params.output_layer_norm_weights()->dimension(0) != num_cells);
Michele Di Giorgio39438b42019-06-04 12:41:45 +0100464 }
465
Georgios Pinitas8bc745d2018-07-18 19:51:24 +0100466 // Check peephole optimization
Michalis Spyroubcedf512018-03-22 14:55:08 +0000467 if(lstm_params.has_peephole_opt())
468 {
Michalis Spyrou09daf4d2018-06-28 17:07:22 +0100469 ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(lstm_params.cell_to_output_weights(), lstm_params.cell_to_forget_weights());
Georgios Pinitas42447c12018-07-16 17:01:20 +0100470 ARM_COMPUTE_RETURN_ERROR_ON(lstm_params.cell_to_forget_weights()->num_dimensions() > 1);
471 ARM_COMPUTE_RETURN_ERROR_ON(lstm_params.cell_to_output_weights()->num_dimensions() > 1);
Michalis Spyroubcedf512018-03-22 14:55:08 +0000472 }
473
474 TensorShape units_out_transposed_shape = compute_transposed_shape(*recurrent_to_output_weights);
Michalis Spyroubcedf512018-03-22 14:55:08 +0000475 TensorShape num_units_transposed_shape = compute_transposed_shape(*forget_gate_bias);
476 const TensorInfo units_out_transposed_info = TensorInfo(units_out_transposed_shape, 1, input->data_type());
Michalis Spyroubcedf512018-03-22 14:55:08 +0000477 const TensorInfo num_units_transposed_info = TensorInfo(num_units_transposed_shape, 1, input->data_type());
478
Georgios Pinitas8bc745d2018-07-18 19:51:24 +0100479 TensorInfo input_gate = TensorInfo(TensorShape(num_cells, num_batches), 1, input->data_type());
480 TensorInfo forget_gate = TensorInfo(TensorShape(num_cells, num_batches), 1, input->data_type());
481 TensorInfo output_gate_tmp = TensorInfo(TensorShape(num_cells, num_batches), 1, input->data_type());
482 TensorInfo cell_state_tmp = TensorInfo(TensorShape(num_cells, num_batches), 1, input->data_type());
483
Michalis Spyroubcedf512018-03-22 14:55:08 +0000484 // Validate forget gate
Michele Di Giorgio39438b42019-06-04 12:41:45 +0100485 ARM_COMPUTE_RETURN_ON_ERROR(CLFullyConnectedLayer::validate(input, input_to_forget_weights, (lstm_params.use_layer_norm()) ? nullptr : forget_gate_bias, &forget_gate));
John Kesapidescafec8f2019-02-19 15:53:59 +0000486
487 std::vector<const ITensorInfo *> inputs_vector;
488 inputs_vector.emplace_back(input);
489 inputs_vector.emplace_back(output_state_in);
Georgios Pinitasdbfc2dc2019-04-02 12:51:21 +0100490 const TensorShape concat_shape = arm_compute::misc::shape_calculator::calculate_concatenate_shape(inputs_vector, 0);
John Kesapidescafec8f2019-02-19 15:53:59 +0000491 TensorInfo forget_gate_concat = TensorInfo(concat_shape, 1, input->data_type());
492
Michele Di Giorgiof932d2c2020-07-06 11:27:21 +0100493 ARM_COMPUTE_RETURN_ON_ERROR(CLConcatenateLayer::validate(inputs_vector, &forget_gate_concat, Window::DimX));
John Kesapidescafec8f2019-02-19 15:53:59 +0000494
Michalis Spyroubcedf512018-03-22 14:55:08 +0000495 if(lstm_params.has_peephole_opt())
496 {
Michalis Spyrou1009e872020-07-27 12:48:34 +0100497 ARM_COMPUTE_RETURN_ON_ERROR(CLPixelWiseMultiplication::validate(cell_state_in, lstm_params.cell_to_forget_weights(), &forget_gate, 1, ConvertPolicy::SATURATE, RoundingPolicy::TO_NEAREST_EVEN));
Georgios Pinitas8bc745d2018-07-18 19:51:24 +0100498 ARM_COMPUTE_RETURN_ON_ERROR(CLArithmeticAddition::validate(&forget_gate, &forget_gate, &forget_gate, ConvertPolicy::SATURATE));
Michalis Spyroubcedf512018-03-22 14:55:08 +0000499 }
Michele Di Giorgio39438b42019-06-04 12:41:45 +0100500 if(lstm_params.use_layer_norm())
501 {
502 ARM_COMPUTE_RETURN_ON_ERROR(CLMeanStdDevNormalizationLayer::validate(&forget_gate));
Michalis Spyrou1009e872020-07-27 12:48:34 +0100503 ARM_COMPUTE_RETURN_ON_ERROR(CLPixelWiseMultiplication::validate(&forget_gate, lstm_params.forget_layer_norm_weights(), &forget_gate, 1, ConvertPolicy::SATURATE,
504 RoundingPolicy::TO_NEAREST_EVEN));
Michele Di Giorgio39438b42019-06-04 12:41:45 +0100505 ARM_COMPUTE_RETURN_ON_ERROR(CLArithmeticAddition::validate(&forget_gate, forget_gate_bias, &forget_gate, ConvertPolicy::SATURATE));
506 }
Georgios Pinitasab23dd02020-07-06 14:57:36 +0100507 ARM_COMPUTE_RETURN_ON_ERROR(CLActivationLayer::validate(&forget_gate, &forget_gate, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LOGISTIC)));
Michalis Spyroubcedf512018-03-22 14:55:08 +0000508
509 // Validate input gate
510 if(!lstm_params.has_cifg_opt())
511 {
Georgios Pinitas8bc745d2018-07-18 19:51:24 +0100512 ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(lstm_params.input_to_input_weights(),
513 lstm_params.recurrent_to_input_weights(),
514 lstm_params.input_gate_bias());
Georgios Pinitas42447c12018-07-16 17:01:20 +0100515 ARM_COMPUTE_RETURN_ERROR_ON(lstm_params.input_to_input_weights()->num_dimensions() > 2);
516 ARM_COMPUTE_RETURN_ERROR_ON(lstm_params.recurrent_to_input_weights()->num_dimensions() > 2);
Georgios Pinitas42447c12018-07-16 17:01:20 +0100517 ARM_COMPUTE_RETURN_ERROR_ON(lstm_params.input_gate_bias()->num_dimensions() > 1);
Georgios Pinitas8bc745d2018-07-18 19:51:24 +0100518
John Kesapidescafec8f2019-02-19 15:53:59 +0000519 std::vector<const ITensorInfo *> lstm_weights;
520 lstm_weights.emplace_back(lstm_params.input_to_input_weights());
521 lstm_weights.emplace_back(lstm_params.recurrent_to_input_weights());
Georgios Pinitasdbfc2dc2019-04-02 12:51:21 +0100522 TensorShape lstm_weights_concat_shape = arm_compute::misc::shape_calculator::calculate_concatenate_shape(lstm_weights, 0);
John Kesapidescafec8f2019-02-19 15:53:59 +0000523 TensorInfo lstm_gate_concat = TensorInfo(lstm_weights_concat_shape, 1, input->data_type());
Michele Di Giorgiof932d2c2020-07-06 11:27:21 +0100524 ARM_COMPUTE_RETURN_ON_ERROR(CLConcatenateLayer::validate(lstm_weights, &lstm_gate_concat, Window::DimX));
John Kesapidescafec8f2019-02-19 15:53:59 +0000525
Michele Di Giorgio39438b42019-06-04 12:41:45 +0100526 ARM_COMPUTE_RETURN_ON_ERROR(CLFullyConnectedLayer::validate(input, lstm_params.input_to_input_weights(), (lstm_params.use_layer_norm()) ? nullptr : lstm_params.input_gate_bias(), &input_gate));
John Kesapidescafec8f2019-02-19 15:53:59 +0000527
Georgios Pinitas8bc745d2018-07-18 19:51:24 +0100528 if(lstm_params.has_peephole_opt())
529 {
530 ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(lstm_params.cell_to_input_weights());
531 ARM_COMPUTE_RETURN_ERROR_ON(lstm_params.cell_to_input_weights()->num_dimensions() > 1);
Michalis Spyrou1009e872020-07-27 12:48:34 +0100532 ARM_COMPUTE_RETURN_ON_ERROR(CLPixelWiseMultiplication::validate(cell_state_in, lstm_params.cell_to_input_weights(), &input_gate, 1, ConvertPolicy::SATURATE, RoundingPolicy::TO_NEAREST_EVEN));
Georgios Pinitas8bc745d2018-07-18 19:51:24 +0100533 ARM_COMPUTE_RETURN_ON_ERROR(CLArithmeticAddition::validate(&input_gate, &input_gate, &input_gate, ConvertPolicy::SATURATE));
534 }
Michele Di Giorgio39438b42019-06-04 12:41:45 +0100535
536 if(lstm_params.use_layer_norm())
537 {
538 ARM_COMPUTE_RETURN_ON_ERROR(CLMeanStdDevNormalizationLayer::validate(&input_gate));
Michalis Spyrou1009e872020-07-27 12:48:34 +0100539 ARM_COMPUTE_RETURN_ON_ERROR(CLPixelWiseMultiplication::validate(&input_gate, lstm_params.input_layer_norm_weights(), &input_gate, 1, ConvertPolicy::SATURATE, RoundingPolicy::TO_NEAREST_EVEN));
Michele Di Giorgio39438b42019-06-04 12:41:45 +0100540 ARM_COMPUTE_RETURN_ON_ERROR(CLArithmeticAddition::validate(&input_gate, lstm_params.input_gate_bias(), &input_gate, ConvertPolicy::SATURATE));
541 }
Georgios Pinitasab23dd02020-07-06 14:57:36 +0100542 ARM_COMPUTE_RETURN_ON_ERROR(CLActivationLayer::validate(&input_gate, nullptr, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LOGISTIC)));
Michalis Spyroubcedf512018-03-22 14:55:08 +0000543 }
544 else
545 {
Michalis Spyrouad7515d2020-07-24 00:02:23 +0100546 ARM_COMPUTE_RETURN_ON_ERROR(CLArithmeticSubtraction::validate(&forget_gate, &forget_gate, &forget_gate, ConvertPolicy::SATURATE));
Michalis Spyroubcedf512018-03-22 14:55:08 +0000547 }
548
549 // Validate cell state
Michele Di Giorgio39438b42019-06-04 12:41:45 +0100550 ARM_COMPUTE_RETURN_ON_ERROR(CLFullyConnectedLayer::validate(input, input_to_cell_weights, (lstm_params.use_layer_norm()) ? nullptr : cell_bias, &cell_state_tmp));
Georgios Pinitas8bc745d2018-07-18 19:51:24 +0100551 ARM_COMPUTE_RETURN_ON_ERROR(CLGEMM::validate(output_state_in, &units_out_transposed_info, nullptr, &cell_state_tmp, 1.f, 0.f, GEMMInfo()));
552 ARM_COMPUTE_RETURN_ON_ERROR(CLArithmeticAddition::validate(&cell_state_tmp, &cell_state_tmp, &cell_state_tmp, ConvertPolicy::SATURATE));
Michele Di Giorgio39438b42019-06-04 12:41:45 +0100553 if(lstm_params.use_layer_norm())
554 {
555 ARM_COMPUTE_RETURN_ON_ERROR(CLMeanStdDevNormalizationLayer::validate(&cell_state_tmp));
Michalis Spyrou1009e872020-07-27 12:48:34 +0100556 ARM_COMPUTE_RETURN_ON_ERROR(CLPixelWiseMultiplication::validate(&cell_state_tmp, lstm_params.cell_layer_norm_weights(), &cell_state_tmp, 1, ConvertPolicy::SATURATE,
557 RoundingPolicy::TO_NEAREST_EVEN));
Michele Di Giorgio39438b42019-06-04 12:41:45 +0100558 ARM_COMPUTE_RETURN_ON_ERROR(CLArithmeticAddition::validate(&cell_state_tmp, cell_bias, &cell_state_tmp, ConvertPolicy::SATURATE));
559 }
Georgios Pinitasab23dd02020-07-06 14:57:36 +0100560 ARM_COMPUTE_RETURN_ON_ERROR(CLActivationLayer::validate(&cell_state_tmp, nullptr, activation_info));
Michalis Spyrou1009e872020-07-27 12:48:34 +0100561 ARM_COMPUTE_RETURN_ON_ERROR(CLPixelWiseMultiplication::validate(&cell_state_tmp, &input_gate, &cell_state_tmp, 1, ConvertPolicy::SATURATE, RoundingPolicy::TO_NEAREST_EVEN));
562 ARM_COMPUTE_RETURN_ON_ERROR(CLPixelWiseMultiplication::validate(&cell_state_tmp, &forget_gate, &cell_state_tmp, 1, ConvertPolicy::SATURATE, RoundingPolicy::TO_NEAREST_EVEN));
Georgios Pinitas8bc745d2018-07-18 19:51:24 +0100563 ARM_COMPUTE_RETURN_ON_ERROR(CLArithmeticAddition::validate(&cell_state_tmp, &cell_state_tmp, &cell_state_tmp, ConvertPolicy::SATURATE));
Michalis Spyroubcedf512018-03-22 14:55:08 +0000564 if(cell_threshold != 0.f)
565 {
Georgios Pinitasab23dd02020-07-06 14:57:36 +0100566 ARM_COMPUTE_RETURN_ON_ERROR(CLActivationLayer::validate(&cell_state_tmp, nullptr, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, -cell_threshold,
567 cell_threshold)));
Michalis Spyroubcedf512018-03-22 14:55:08 +0000568 }
569
John Kesapidescafec8f2019-02-19 15:53:59 +0000570 std::vector<const ITensorInfo *> in_out_weights;
571 in_out_weights.emplace_back(input_to_output_weights);
572 in_out_weights.emplace_back(recurrent_to_output_weights);
Georgios Pinitasdbfc2dc2019-04-02 12:51:21 +0100573 TensorShape in_out_weights_concat_shape = arm_compute::misc::shape_calculator::calculate_concatenate_shape(in_out_weights, 0);
John Kesapidescafec8f2019-02-19 15:53:59 +0000574 TensorInfo in_out_gate_concat = TensorInfo(in_out_weights_concat_shape, 1, input->data_type());
Michele Di Giorgiof932d2c2020-07-06 11:27:21 +0100575 ARM_COMPUTE_RETURN_ON_ERROR(CLConcatenateLayer::validate(in_out_weights, &in_out_gate_concat, Window::DimX));
Georgios Pinitas8bc745d2018-07-18 19:51:24 +0100576 // Validate output gate tmp
Michele Di Giorgio39438b42019-06-04 12:41:45 +0100577 ARM_COMPUTE_RETURN_ON_ERROR(CLFullyConnectedLayer::validate(input, input_to_output_weights, (lstm_params.use_layer_norm()) ? nullptr : output_gate_bias, &output_gate_tmp));
John Kesapidescafec8f2019-02-19 15:53:59 +0000578
Michalis Spyroubcedf512018-03-22 14:55:08 +0000579 if(lstm_params.has_peephole_opt())
580 {
Michalis Spyrou1009e872020-07-27 12:48:34 +0100581 ARM_COMPUTE_RETURN_ON_ERROR(CLPixelWiseMultiplication::validate(&cell_state_tmp, lstm_params.cell_to_output_weights(), &output_gate_tmp, 1, ConvertPolicy::SATURATE,
582 RoundingPolicy::TO_NEAREST_EVEN));
Georgios Pinitas8bc745d2018-07-18 19:51:24 +0100583 ARM_COMPUTE_RETURN_ON_ERROR(CLArithmeticAddition::validate(&output_gate_tmp, &output_gate_tmp, &output_gate_tmp, ConvertPolicy::SATURATE));
Michalis Spyroubcedf512018-03-22 14:55:08 +0000584 }
Michele Di Giorgio39438b42019-06-04 12:41:45 +0100585 if(lstm_params.use_layer_norm())
586 {
587 ARM_COMPUTE_RETURN_ON_ERROR(CLMeanStdDevNormalizationLayer::validate(&output_gate_tmp));
Michalis Spyrou1009e872020-07-27 12:48:34 +0100588 ARM_COMPUTE_RETURN_ON_ERROR(CLPixelWiseMultiplication::validate(&output_gate_tmp, lstm_params.output_layer_norm_weights(), &output_gate_tmp, 1, ConvertPolicy::SATURATE,
589 RoundingPolicy::TO_NEAREST_EVEN));
Michele Di Giorgio39438b42019-06-04 12:41:45 +0100590 ARM_COMPUTE_RETURN_ON_ERROR(CLArithmeticAddition::validate(&output_gate_tmp, output_gate_bias, &output_gate_tmp, ConvertPolicy::SATURATE));
591 }
Georgios Pinitasab23dd02020-07-06 14:57:36 +0100592 ARM_COMPUTE_RETURN_ON_ERROR(CLActivationLayer::validate(&output_gate_tmp, nullptr, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LOGISTIC)));
Michalis Spyroubcedf512018-03-22 14:55:08 +0000593
594 // Validate output state
Georgios Pinitasab23dd02020-07-06 14:57:36 +0100595 ARM_COMPUTE_RETURN_ON_ERROR(CLActivationLayer::validate(&cell_state_tmp, &cell_state_tmp, activation_info));
Michalis Spyrou1009e872020-07-27 12:48:34 +0100596 ARM_COMPUTE_RETURN_ON_ERROR(CLPixelWiseMultiplication::validate(&cell_state_tmp, &output_gate_tmp, &output_gate_tmp, 1, ConvertPolicy::SATURATE, RoundingPolicy::TO_NEAREST_EVEN));
Michalis Spyroubcedf512018-03-22 14:55:08 +0000597 if(lstm_params.has_projection())
598 {
Georgios Pinitas8bc745d2018-07-18 19:51:24 +0100599 ARM_COMPUTE_RETURN_ON_ERROR(CLFullyConnectedLayer::validate(&output_gate_tmp, lstm_params.projection_weights(), lstm_params.projection_bias(), output_state_out));
Michalis Spyroubcedf512018-03-22 14:55:08 +0000600 if(projection_threshold != 0.f)
601 {
Georgios Pinitasab23dd02020-07-06 14:57:36 +0100602 ARM_COMPUTE_RETURN_ON_ERROR(CLActivationLayer::validate(output_state_out, output_state_out,
603 ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, -projection_threshold, projection_threshold)));
Michalis Spyroubcedf512018-03-22 14:55:08 +0000604 }
605 }
606
Georgios Pinitas8bc745d2018-07-18 19:51:24 +0100607 // Validate copy kernel
Sheri Zhang7e20e292021-02-02 11:49:34 +0000608 ARM_COMPUTE_RETURN_ON_ERROR(CLCopy::validate(&cell_state_tmp, cell_state_out));
609 ARM_COMPUTE_RETURN_ON_ERROR(CLCopy::validate(output_state_out, output));
Georgios Pinitas8bc745d2018-07-18 19:51:24 +0100610
611 // Validate scratch concatenation
Michele Di Giorgiof932d2c2020-07-06 11:27:21 +0100612 std::vector<const ITensorInfo *> inputs_vector_info_raw;
Georgios Pinitas0cc37c32018-11-14 15:54:26 +0000613 if(!lstm_params.has_cifg_opt())
Michalis Spyroubcedf512018-03-22 14:55:08 +0000614 {
Georgios Pinitas8bc745d2018-07-18 19:51:24 +0100615 inputs_vector_info_raw.push_back(&input_gate);
Michalis Spyroubcedf512018-03-22 14:55:08 +0000616 }
Georgios Pinitas8bc745d2018-07-18 19:51:24 +0100617 inputs_vector_info_raw.push_back(&cell_state_tmp);
618 inputs_vector_info_raw.push_back(&forget_gate);
619 inputs_vector_info_raw.push_back(&output_gate_tmp);
Michalis Spyroubcedf512018-03-22 14:55:08 +0000620
Georgios Pinitas09f24972019-05-17 18:14:40 +0100621 ARM_COMPUTE_RETURN_ON_ERROR(CLConcatenateLayer::validate(inputs_vector_info_raw, scratch_buffer, Window::DimX));
Michalis Spyroubcedf512018-03-22 14:55:08 +0000622 return Status{};
623}
624
625void CLLSTMLayer::run()
626{
John Kesapidescafec8f2019-02-19 15:53:59 +0000627 prepare();
628
Georgios Pinitasda953f22019-04-02 17:27:03 +0100629 MemoryGroupResourceScope scope_mg(_memory_group);
Michalis Spyroubcedf512018-03-22 14:55:08 +0000630
Michele Di Giorgiof932d2c2020-07-06 11:27:21 +0100631 _concat_inputs_forget_gate.run();
John Kesapidescafec8f2019-02-19 15:53:59 +0000632
Michalis Spyroubcedf512018-03-22 14:55:08 +0000633 _fully_connected_forget_gate.run();
Michalis Spyroubcedf512018-03-22 14:55:08 +0000634
635 if(_run_peephole_opt)
636 {
Michalis Spyrou1009e872020-07-27 12:48:34 +0100637 _pixelwise_mul_forget_gate.run();
Michele Di Giorgio39438b42019-06-04 12:41:45 +0100638 _accum_forget_gate1.run();
639 }
640 if(_is_layer_norm_lstm)
641 {
642 _mean_std_norm_forget_gate.run();
Michalis Spyrou1009e872020-07-27 12:48:34 +0100643 _pixelwise_mul_forget_gate_coeff.run();
Michalis Spyrouad7515d2020-07-24 00:02:23 +0100644 _accum_forget_gate_bias.run();
Michalis Spyroubcedf512018-03-22 14:55:08 +0000645 }
Georgios Pinitasab23dd02020-07-06 14:57:36 +0100646 _activation_forget_gate.run();
Michalis Spyroubcedf512018-03-22 14:55:08 +0000647
648 if(_run_cifg_opt)
649 {
Sheri Zhang7e20e292021-02-02 11:49:34 +0000650 _ones_fill.run();
Michalis Spyrouad7515d2020-07-24 00:02:23 +0100651 _subtract_input_gate.run();
Michalis Spyroubcedf512018-03-22 14:55:08 +0000652 }
653 else
654 {
655 _fully_connected_input_gate.run();
John Kesapidescafec8f2019-02-19 15:53:59 +0000656
Georgios Pinitas8bc745d2018-07-18 19:51:24 +0100657 if(_run_peephole_opt)
658 {
Michalis Spyrou1009e872020-07-27 12:48:34 +0100659 _pixelwise_mul_input_gate.run();
Michele Di Giorgio39438b42019-06-04 12:41:45 +0100660 _accum_input_gate1.run();
661 }
662
663 if(_is_layer_norm_lstm)
664 {
665 _mean_std_norm_input_gate.run();
Michalis Spyrou1009e872020-07-27 12:48:34 +0100666 _pixelwise_mul_input_gate_coeff.run();
Michalis Spyrouad7515d2020-07-24 00:02:23 +0100667 _accum_input_gate_bias.run();
Georgios Pinitas8bc745d2018-07-18 19:51:24 +0100668 }
Georgios Pinitasab23dd02020-07-06 14:57:36 +0100669 _activation_input_gate.run();
Michalis Spyroubcedf512018-03-22 14:55:08 +0000670 }
671
672 _fully_connected_cell_state.run();
Teresa Charlin27886092021-02-25 20:15:01 +0000673 ITensorPack pack;
674 pack.add_tensor(TensorType::ACL_SRC, _recurrent_to_cell_weights);
675 pack.add_tensor(TensorType::ACL_DST, &_cell_state_out2);
676 CLScheduler::get().enqueue_op(*_transpose_cell_state,
677 pack,
678 false);
Michalis Spyroubcedf512018-03-22 14:55:08 +0000679 _gemm_cell_state1.run();
Michalis Spyrouad7515d2020-07-24 00:02:23 +0100680 _accum_cell_state1.run();
Michele Di Giorgio39438b42019-06-04 12:41:45 +0100681 if(_is_layer_norm_lstm)
682 {
683 _mean_std_norm_cell_gate.run();
Michalis Spyrou1009e872020-07-27 12:48:34 +0100684 _pixelwise_mul_cell_gate_coeff.run();
Michalis Spyrouad7515d2020-07-24 00:02:23 +0100685 _accum_cell_gate_bias.run();
Michele Di Giorgio39438b42019-06-04 12:41:45 +0100686 }
Georgios Pinitasab23dd02020-07-06 14:57:36 +0100687 _activation_cell_state.run();
Michalis Spyrou1009e872020-07-27 12:48:34 +0100688 _pixelwise_mul_cell_state1.run();
689 _pixelwise_mul_cell_state2.run();
Michalis Spyrouad7515d2020-07-24 00:02:23 +0100690 _accum_cell_state2.run();
Michalis Spyroubcedf512018-03-22 14:55:08 +0000691
692 if(_perform_cell_clipping)
693 {
Georgios Pinitasab23dd02020-07-06 14:57:36 +0100694 _cell_clip.run();
Michalis Spyroubcedf512018-03-22 14:55:08 +0000695 }
696
697 _fully_connected_output.run();
Michalis Spyroubcedf512018-03-22 14:55:08 +0000698
699 if(_run_peephole_opt)
700 {
Michalis Spyrou1009e872020-07-27 12:48:34 +0100701 _pixelwise_mul_output_state1.run();
Michele Di Giorgio39438b42019-06-04 12:41:45 +0100702 _accum_output1.run();
703 }
704 if(_is_layer_norm_lstm)
705 {
706 _mean_std_norm_output_gate.run();
Michalis Spyrou1009e872020-07-27 12:48:34 +0100707 _pixelwise_mul_output_gate_coeff.run();
Michalis Spyrouad7515d2020-07-24 00:02:23 +0100708 _accum_output_gate_bias.run();
Michalis Spyroubcedf512018-03-22 14:55:08 +0000709 }
Georgios Pinitasab23dd02020-07-06 14:57:36 +0100710 _activation_output.run();
Michalis Spyroubcedf512018-03-22 14:55:08 +0000711
Georgios Pinitasab23dd02020-07-06 14:57:36 +0100712 _activation_output_state.run();
Michalis Spyrou1009e872020-07-27 12:48:34 +0100713 _pixelwise_mul_output_state2.run();
Michalis Spyroubcedf512018-03-22 14:55:08 +0000714
715 if(_has_projection_weights)
716 {
717 _fully_connected_output_state.run();
718 if(_perform_projection_clipping)
719 {
Georgios Pinitasab23dd02020-07-06 14:57:36 +0100720 _projection_clip.run();
Michalis Spyroubcedf512018-03-22 14:55:08 +0000721 }
722 }
723
Sheri Zhang7e20e292021-02-02 11:49:34 +0000724 _copy_cell_state.run();
725 _copy_output.run();
Michalis Spyroubcedf512018-03-22 14:55:08 +0000726
727 _concat_scratch_buffer.run();
giuros01164a2722018-11-20 18:34:46 +0000728}
John Kesapidescafec8f2019-02-19 15:53:59 +0000729
730void CLLSTMLayer::prepare()
731{
732 if(!_is_prepared)
733 {
Michele Di Giorgiof932d2c2020-07-06 11:27:21 +0100734 _concat_weights_forget_gate.run();
John Kesapidescafec8f2019-02-19 15:53:59 +0000735 if(!_run_cifg_opt)
736 {
Michele Di Giorgiof932d2c2020-07-06 11:27:21 +0100737 _concat_weights_input_gate.run();
John Kesapidescafec8f2019-02-19 15:53:59 +0000738 }
Michele Di Giorgiof932d2c2020-07-06 11:27:21 +0100739 _concat_weights_output.run();
John Kesapidescafec8f2019-02-19 15:53:59 +0000740 _is_prepared = true;
741 }
742}
Michele Di Giorgio47a89902020-03-09 19:32:33 +0000743} // namespace arm_compute