blob: f3a3d2325679df5c2e23b63c2e7a931095923fe4 [file] [log] [blame]
Michele Di Giorgio47a89902020-03-09 19:32:33 +00001/*
Michele Di Giorgio93b75e02021-06-21 12:00:43 +01002 * Copyright (c) 2020-2021 Arm Limited.
Michele Di Giorgio47a89902020-03-09 19:32:33 +00003 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24#include "arm_compute/runtime/NEON/functions/NEQLSTMLayer.h"
25
26#include "arm_compute/core/KernelDescriptors.h"
27#include "arm_compute/core/QuantizationInfo.h"
28#include "arm_compute/core/Utils.h"
29#include "arm_compute/core/Validate.h"
30#include "arm_compute/core/utils/misc/InfoHelpers.h"
31#include "arm_compute/core/utils/quantization/AsymmHelpers.h"
32#include "arm_compute/runtime/NEON/NEScheduler.h"
Michalis Spyrouebcebf12020-10-21 00:04:14 +010033#include "src/core/NEON/kernels/NEConvertQuantizedSignednessKernel.h"
Michalis Spyrouebcebf12020-10-21 00:04:14 +010034#include "src/core/NEON/kernels/NEGEMMLowpMatrixMultiplyKernel.h"
35#include "src/core/NEON/kernels/NEGEMMLowpOffsetContributionKernel.h"
36#include "src/core/NEON/kernels/NEGEMMLowpOffsetContributionOutputStageKernel.h"
37#include "src/core/NEON/kernels/NEGEMMLowpReductionKernel.h"
Michalis Spyrouebcebf12020-10-21 00:04:14 +010038#include "src/core/NEON/kernels/NEQLSTMLayerNormalizationKernel.h"
Sang-Hoon Park68dd25f2020-10-19 16:00:11 +010039#include "src/core/helpers/WindowHelpers.h"
Michele Di Giorgio47a89902020-03-09 19:32:33 +000040
41namespace arm_compute
42{
43using namespace arm_compute::utils::info_helpers;
44namespace
45{
46Status validate_mm(GEMMLowpOutputStageInfo &gemmlowp_info, const ITensorInfo *mm_input, const ITensorInfo *mm_weights, const ITensorInfo *bias,
47 float gemmlowp_scale, const TensorInfo *mm_res_info, const TensorInfo *outstage_tensor_info)
48{
49 ARM_COMPUTE_RETURN_ON_ERROR(NEGEMMLowpMatrixMultiplyCore::validate(mm_input, mm_weights, nullptr, mm_res_info));
50 ARM_COMPUTE_RETURN_ON_ERROR(quantization::calculate_quantized_multiplier(gemmlowp_scale, &gemmlowp_info.gemmlowp_multiplier, &gemmlowp_info.gemmlowp_shift));
51 ARM_COMPUTE_RETURN_ON_ERROR(NEGEMMLowpOutputStage::validate(mm_res_info, bias, outstage_tensor_info, gemmlowp_info));
52 return Status{};
53}
54} // namespace
55
Michalis Spyrouebcebf12020-10-21 00:04:14 +010056Status NEQLSTMLayer::validate_layer_norm(const ITensorInfo &in, const ITensorInfo &weight, const ITensorInfo &bias)
57{
58 // Output quantization scale will be different, but ignored here
59 // since it will be configured at configure() stage.
60 const TensorInfo out
61 {
62 in
63 };
64 return NEQLSTMLayerNormalizationKernel::validate(&in, &out, &weight, &bias);
65}
66
67void NEQLSTMLayer::configure_layer_norm(NEQLSTMLayer::LayerNormGate g, const ITensor *in)
68{
69 ARM_COMPUTE_ERROR_ON(!_has_layer_norm);
70
71 Tensor &out = get_layer_norm_output(g);
72 _memory_group.manage(&out);
73 out.allocator()->init(*(in->info()));
74
Georgios Pinitas40f51a62020-11-21 03:04:18 +000075 get_layer_norm(g) = std::make_unique<NEQLSTMLayerNormalizationKernel>();
Michalis Spyrouebcebf12020-10-21 00:04:14 +010076 get_layer_norm(g)->configure(in, &out, get_layer_norm_weight(g), get_layer_norm_bias(g));
77}
78
79NEQLSTMLayer::TensorCopyKernel::~TensorCopyKernel() = default;
80
Sang-Hoon Parkd5c020a2020-05-06 21:01:19 +010081Status NEQLSTMLayer::TensorCopyKernel::validate(const ITensorInfo &src, const ITensorInfo &dst)
82{
83 ARM_COMPUTE_RETURN_ERROR_ON(src.tensor_shape().num_dimensions() > max_dimension_supported);
84 ARM_COMPUTE_RETURN_ERROR_ON(dst.tensor_shape().num_dimensions() > max_dimension_supported);
85 ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(&src, &dst);
86 ARM_COMPUTE_RETURN_ERROR_ON(dst.tensor_shape().y() != src.tensor_shape().y());
87 return Status{};
88}
89
90void NEQLSTMLayer::TensorCopyKernel::configure(ITensor &src, ITensor &dst)
91{
92 ARM_COMPUTE_ERROR_THROW_ON(NEQLSTMLayer::TensorCopyKernel::validate(*src.info(), *dst.info()));
93 _src = &src;
94 _dst = &dst;
95 _row_size = std::min(_src->info()->tensor_shape().x(), _dst->info()->tensor_shape().x());
96 _window = calculate_max_window(*_src->info(), Steps());
97}
98
99void NEQLSTMLayer::TensorCopyKernel::run()
100{
101 Iterator input_iter{ _src, _window };
102 Iterator output_iter{ _dst, _window };
103
104 execute_window_loop(_window, [&](const Coordinates &)
105 {
106 memcpy(output_iter.ptr(), input_iter.ptr(), _row_size);
107 },
108 input_iter, output_iter);
109}
110
Michalis Spyrouebcebf12020-10-21 00:04:14 +0100111NEQLSTMLayer::~NEQLSTMLayer() = default;
112
Michele Di Giorgio47a89902020-03-09 19:32:33 +0000113NEQLSTMLayer::NEQLSTMLayer(std::shared_ptr<IMemoryManager> memory_manager)
Michalis Spyrouebcebf12020-10-21 00:04:14 +0100114 : _memory_group(), _transpose_input_to_forget_weights(), _transpose_input_to_cell_weights(), _transpose_input_to_output_weights(), _transpose_input_to_input_weights(),
115 _transpose_recurrent_to_forget_weights(), _transpose_recurrent_to_cell_weights(), _transpose_recurrent_to_output_weights(), _transpose_recurrent_to_input_weights(), _transpose_projection_weights(),
116 _input_to_input_reduction(), _recurrent_to_input_reduction(), _input_to_forget_reduction(), _recurrent_to_forget_reduction(), _input_to_cell_reduction(), _recurrent_to_cell_reduction(),
117 _input_to_output_reduction(), _recurrent_to_output_reduction(), _projection_reduction(), _projection_bias_add(), _mm_input_to_forget(), _mm_recurrent_to_forget(), _pixelwise_mul_cell_to_forget(),
118 _input_to_forget_outstage(), _recurrent_to_forget_outstage(), _cell_to_forget_outstage(), _accumulate_input_recurrent_forget(), _accumulate_cell_forget(), _forget_gate_sigmoid(), _mm_input_to_cell(),
119 _input_to_cell_outstage(), _mm_recurrent_to_cell(), _recurrent_to_cell_outstage(), _accumulate_input_recurrent_modulation(), _cell_gate_tanh(), _input_gate_sub(), _mm_input_to_input(),
120 _input_to_input_outstage(), _mm_recurrent_to_input(), _recurrent_to_input_outstage(), _accumulate_input_recurrent_input(), _pixelwise_mul_cell_to_input(), _cell_to_input_outstage(),
121 _accumulate_cell_input(), _input_gate_sigmoid(), _pixelwise_mul_forget_cell(), _pixelwise_mul_input_cell(), _add_forget_cell(), _cell_clip(), _mm_input_to_output(), _input_to_output_outstage(),
122 _mm_recurrent_to_output(), _recurrent_to_output_outstage(), _accumulate_input_recurrent_output(), _pixelwise_mul_cell_to_output(), _cell_to_output_outstage(), _accumulate_cell_to_output(),
123 _output_gate_sigmoid(), _hidden_tanh(), _pixelwise_mul_hidden(), _hidden_outstage(), _mm_projection(), _projection_outstage(), _accumulate_projection(), _projection_clip(), _projection_bias_copy(),
124 _projection_output_to_accumulate_copy(), _projection_accumulate_to_output_copy(), _hidden_to_output_copy(), _layer_norms(), _copy_output(), _layer_norm_weights(), _layer_norm_bias(),
125 _layer_norm_output()
Michele Di Giorgio47a89902020-03-09 19:32:33 +0000126{
127 _memory_group = MemoryGroup(std::move(memory_manager));
128}
129
130void NEQLSTMLayer::configure_mm(NEGEMMLowpMatrixMultiplyCore &mm, NEGEMMLowpOutputStage &outstage, GEMMLowpOutputStageInfo &gemmlowp_info,
131 const ITensor *mm_input, const ITensor *mm_weights, const ITensor *bias,
132 Tensor *mm_res, Tensor *outstage_res, float gemmlowp_scale,
133 const TensorInfo &mm_res_info, const TensorInfo &outstage_tensor_info)
134{
135 _memory_group.manage(mm_res);
136 _memory_group.manage(outstage_res);
137
138 mm_res->allocator()->init(mm_res_info);
139 outstage_res->allocator()->init(outstage_tensor_info);
140
141 // Configure matrix-multiplication
142 mm.configure(mm_input, mm_weights, nullptr, mm_res);
143
144 // Configure output stage
145 quantization::calculate_quantized_multiplier(gemmlowp_scale, &gemmlowp_info.gemmlowp_multiplier, &gemmlowp_info.gemmlowp_shift);
146 outstage.configure(mm_res, bias, outstage_res, gemmlowp_info);
147 mm_res->allocator()->allocate();
148}
149
150void NEQLSTMLayer::configure(const ITensor *input,
151 const ITensor *input_to_forget_weights, const ITensor *input_to_cell_weights, const ITensor *input_to_output_weights,
152 const ITensor *recurrent_to_forget_weights, const ITensor *recurrent_to_cell_weights, const ITensor *recurrent_to_output_weights,
153 const ITensor *forget_gate_bias, const ITensor *cell_bias, const ITensor *output_gate_bias,
Sang-Hoon Park840a72c2020-09-23 13:24:13 +0100154 const ITensor *cell_state_in, ITensor *output_state_in,
Michele Di Giorgiobeb2d452020-05-11 16:17:51 +0100155 ITensor *cell_state_out, ITensor *output_state_out, ITensor *output,
Michele Di Giorgio47a89902020-03-09 19:32:33 +0000156 const LSTMParams<ITensor> &lstm_params)
157{
Michele Di Giorgio47a89902020-03-09 19:32:33 +0000158 ARM_COMPUTE_ERROR_ON_NULLPTR(input, input_to_forget_weights, input_to_cell_weights, input_to_output_weights,
159 recurrent_to_forget_weights, recurrent_to_cell_weights, recurrent_to_output_weights,
160 forget_gate_bias, cell_bias, output_gate_bias, cell_state_in, output_state_in, cell_state_out, output_state_out);
161
162 // Set lstm parameters
163 LSTMParams<ITensorInfo> lstm_params_info{};
164 build_lstm_params_tensor_info(lstm_params, &lstm_params_info);
165
166 // Validate
167 ARM_COMPUTE_ERROR_THROW_ON(NEQLSTMLayer::validate(input->info(), input_to_forget_weights->info(), input_to_cell_weights->info(), input_to_output_weights->info(),
168 recurrent_to_forget_weights->info(), recurrent_to_cell_weights->info(), recurrent_to_output_weights->info(),
169 forget_gate_bias->info(), cell_bias->info(), output_gate_bias->info(),
Michele Di Giorgiobeb2d452020-05-11 16:17:51 +0100170 cell_state_in->info(), output_state_in->info(), cell_state_out->info(), output_state_out->info(), output->info(),
171 lstm_params_info));
Michele Di Giorgio47a89902020-03-09 19:32:33 +0000172
Sang-Hoon Parkd5c020a2020-05-06 21:01:19 +0100173 const int batch_size = input->info()->dimension(1);
174 const int num_units = input_to_output_weights->info()->dimension(1);
175 const int output_size = output_state_out->info()->dimension(_out_state_output_size_dimension_idx);
Michele Di Giorgio47a89902020-03-09 19:32:33 +0000176
177 const UniformQuantizationInfo qinput = input->info()->quantization_info().uniform();
178 const UniformQuantizationInfo qcell_state_in = cell_state_in->info()->quantization_info().uniform();
179 const UniformQuantizationInfo qoutput_state_in = output_state_in->info()->quantization_info().uniform();
180
181 _projection_bias = lstm_params.projection_bias();
182 _input_to_forget_weights = input_to_forget_weights;
183 _input_to_cell_weights = input_to_cell_weights;
184 _input_to_output_weights = input_to_output_weights;
185 _recurrent_to_forget_weights = recurrent_to_forget_weights;
186 _recurrent_to_cell_weights = recurrent_to_cell_weights;
187 _recurrent_to_output_weights = recurrent_to_output_weights;
188 _projection_weights = lstm_params.projection_weights();
189
Sang-Hoon Park9230e272020-04-18 00:46:34 +0100190 // Layer normalization
191 _has_layer_norm = lstm_params.use_layer_norm();
192 if(_has_layer_norm)
193 {
194 set_layer_norm_weight(lstm_params.forget_layer_norm_weights(), LayerNormGate::Forget);
195 set_layer_norm_weight(lstm_params.cell_layer_norm_weights(), LayerNormGate::Cell);
196 set_layer_norm_weight(lstm_params.input_layer_norm_weights(), LayerNormGate::Input);
197 set_layer_norm_weight(lstm_params.output_layer_norm_weights(), LayerNormGate::Output);
198
199 set_layer_norm_bias(forget_gate_bias, LayerNormGate::Forget);
200 set_layer_norm_bias(cell_bias, LayerNormGate::Cell);
201 set_layer_norm_bias(lstm_params.input_gate_bias(), LayerNormGate::Input);
202 set_layer_norm_bias(output_gate_bias, LayerNormGate::Output);
203 }
204
Michele Di Giorgio47a89902020-03-09 19:32:33 +0000205 _has_cifg = lstm_params.has_cifg_opt();
206 _has_projection = lstm_params.has_projection();
207 _has_peephole = lstm_params.has_peephole_opt();
208
209 // Calculate and decompose effective scales for optimizing matmul calculation
210 const int32_t cell_shift = log2(qcell_state_in.scale);
211
212 // Calculate quantized parameters for clipping.
213 int16_t quantized_cell_clip = 0;
214 if(lstm_params.cell_clip() > 0.0f)
215 {
216 quantized_cell_clip = quantize_qsymm16(lstm_params.cell_clip(), qcell_state_in);
217 }
218 _has_cell_clipping = quantized_cell_clip > 0;
219
220 // Precompute effective bias for optimizing the matmul computations.
221 if(!_has_cifg)
222 {
223 _input_to_input_weights = lstm_params.input_to_input_weights();
224 _recurrent_to_input_weights = lstm_params.recurrent_to_input_weights();
225
Georgios Pinitas40f51a62020-11-21 03:04:18 +0000226 _input_to_input_reduction = std::make_unique<NEGEMMLowpMatrixAReductionKernel>();
227 _recurrent_to_input_reduction = std::make_unique<NEGEMMLowpMatrixAReductionKernel>();
Michalis Spyrouebcebf12020-10-21 00:04:14 +0100228 _input_to_input_reduction->configure(_input_to_input_weights, &_input_to_input_eff_bias, GEMMLowpReductionKernelInfo(num_units, false, -qinput.offset, true));
229 _recurrent_to_input_reduction->configure(_recurrent_to_input_weights, &_recurrent_to_input_eff_bias, GEMMLowpReductionKernelInfo(num_units, false, -qoutput_state_in.offset, true));
Michele Di Giorgio47a89902020-03-09 19:32:33 +0000230 }
Michalis Spyrouebcebf12020-10-21 00:04:14 +0100231
Georgios Pinitas40f51a62020-11-21 03:04:18 +0000232 _input_to_forget_reduction = std::make_unique<NEGEMMLowpMatrixAReductionKernel>();
233 _recurrent_to_forget_reduction = std::make_unique<NEGEMMLowpMatrixAReductionKernel>();
234 _input_to_cell_reduction = std::make_unique<NEGEMMLowpMatrixAReductionKernel>();
235 _recurrent_to_cell_reduction = std::make_unique<NEGEMMLowpMatrixAReductionKernel>();
236 _input_to_output_reduction = std::make_unique<NEGEMMLowpMatrixAReductionKernel>();
237 _recurrent_to_output_reduction = std::make_unique<NEGEMMLowpMatrixAReductionKernel>();
Michalis Spyrouebcebf12020-10-21 00:04:14 +0100238
239 _recurrent_to_cell_reduction->configure(input_to_forget_weights, &_input_to_forget_eff_bias, GEMMLowpReductionKernelInfo(num_units, false, -qinput.offset, true));
240 _recurrent_to_forget_reduction->configure(recurrent_to_forget_weights, &_recurrent_to_forget_eff_bias, GEMMLowpReductionKernelInfo(num_units, false, -qoutput_state_in.offset, true));
241 _input_to_cell_reduction->configure(input_to_cell_weights, &_input_to_cell_eff_bias, GEMMLowpReductionKernelInfo(num_units, false, -qinput.offset, true));
242 _recurrent_to_cell_reduction->configure(recurrent_to_cell_weights, &_recurrent_to_cell_eff_bias, GEMMLowpReductionKernelInfo(num_units, false, -qoutput_state_in.offset, true));
243 _input_to_output_reduction->configure(input_to_output_weights, &_input_to_output_eff_bias, GEMMLowpReductionKernelInfo(num_units, false, -qinput.offset, true));
244 _recurrent_to_output_reduction->configure(recurrent_to_output_weights, &_recurrent_to_output_eff_bias, GEMMLowpReductionKernelInfo(num_units, false, -qoutput_state_in.offset, true));
Sang-Hoon Parkd5c020a2020-05-06 21:01:19 +0100245 if(_has_projection)
Michele Di Giorgio47a89902020-03-09 19:32:33 +0000246 {
Georgios Pinitas40f51a62020-11-21 03:04:18 +0000247 _projection_reduction = std::make_unique<NEGEMMLowpMatrixAReductionKernel>();
Michalis Spyrouebcebf12020-10-21 00:04:14 +0100248 _projection_reduction->configure(_projection_weights, &_projection_eff_bias, GEMMLowpReductionKernelInfo(output_size, false, lstm_params.hidden_state_zero(), true));
Michele Di Giorgio11c562c2020-06-10 16:34:50 +0100249 if(_projection_bias != nullptr)
250 {
Michele Di Giorgio19023832020-06-17 16:08:10 +0000251 _projection_bias_add.configure(_projection_bias, &_projection_eff_bias, &_projection_eff_bias, ConvertPolicy::SATURATE);
Michele Di Giorgio11c562c2020-06-10 16:34:50 +0100252 }
Michele Di Giorgio47a89902020-03-09 19:32:33 +0000253 }
254
255 // Pre-transpose weights to be used in GEMM.
256 _transpose_input_to_forget_weights.configure(input_to_forget_weights, &_input_to_forget_weights_transposed);
257 _transpose_input_to_cell_weights.configure(input_to_cell_weights, &_input_to_cell_weights_transposed);
258 _transpose_input_to_output_weights.configure(input_to_output_weights, &_input_to_output_weights_transposed);
259 _transpose_recurrent_to_forget_weights.configure(recurrent_to_forget_weights, &_recurrent_to_forget_weights_transposed);
260 _transpose_recurrent_to_cell_weights.configure(recurrent_to_cell_weights, &_recurrent_to_cell_weights_transposed);
261 _transpose_recurrent_to_output_weights.configure(recurrent_to_output_weights, &_recurrent_to_output_weights_transposed);
262 if(!_has_cifg)
263 {
264 _transpose_input_to_input_weights.configure(lstm_params.input_to_input_weights(), &_input_to_input_weights_transposed);
265 _transpose_recurrent_to_input_weights.configure(lstm_params.recurrent_to_input_weights(), &_recurrent_to_input_weights_transposed);
266 }
267 if(_has_projection)
268 {
269 _transpose_projection_weights.configure(_projection_weights, &_projection_weights_transposed);
270 }
271
272 GEMMLowpOutputStageInfo gemmlowp_info;
273 gemmlowp_info.type = GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT;
274 gemmlowp_info.gemmlowp_min_bound = std::numeric_limits<int16_t>::lowest();
275 gemmlowp_info.gemmlowp_max_bound = std::numeric_limits<int16_t>::max();
276 gemmlowp_info.output_data_type = DataType::QSYMM16;
277
278 const TensorInfo mm_out_info(TensorShape(num_units, batch_size), 1, DataType::S32);
279 // Forget gate.
280 const TensorInfo forget_gate_outstage_info(mm_out_info.tensor_shape(), 1, DataType::QSYMM16, QuantizationInfo(lstm_params.forget_intermediate_scale(), 0));
281 const float input_to_forget_scale = input_to_forget_weights->info()->quantization_info().uniform().scale * qinput.scale / lstm_params.forget_intermediate_scale();
282 configure_mm(_mm_input_to_forget, _input_to_forget_outstage, gemmlowp_info,
283 input, &_input_to_forget_weights_transposed, &_input_to_forget_eff_bias,
284 &_mm_input_to_forget_res, &_input_to_forget_outstage_res, input_to_forget_scale,
285 mm_out_info, forget_gate_outstage_info);
286
287 const float recurrent_to_forget_scale = recurrent_to_forget_weights->info()->quantization_info().uniform().scale * qoutput_state_in.scale / lstm_params.forget_intermediate_scale();
288 configure_mm(_mm_recurrent_to_forget, _recurrent_to_forget_outstage, gemmlowp_info,
289 output_state_in, &_recurrent_to_forget_weights_transposed, &_recurrent_to_forget_eff_bias,
290 &_mm_recurrent_to_forget_res, &_recurrent_to_forget_outstage_res, recurrent_to_forget_scale,
291 mm_out_info, forget_gate_outstage_info);
292
293 _accumulate_input_recurrent_forget.configure(&_input_to_forget_outstage_res, &_recurrent_to_forget_outstage_res, &_recurrent_to_forget_outstage_res, ConvertPolicy::SATURATE);
294 _input_to_forget_outstage_res.allocator()->allocate();
295
296 if(_has_peephole)
297 {
Sang-Hoon Parkd5c020a2020-05-06 21:01:19 +0100298 _mul_cell_to_forget_res.allocator()->init(TensorInfo(cell_state_in->info()->tensor_shape(), 1, DataType::S32));
Michele Di Giorgio47a89902020-03-09 19:32:33 +0000299 _memory_group.manage(&_mul_cell_to_forget_res);
300 _pixelwise_mul_cell_to_forget.configure(cell_state_in, lstm_params.cell_to_forget_weights(), &_mul_cell_to_forget_res, 1.f, ConvertPolicy::SATURATE, RoundingPolicy::TO_ZERO);
301 _cell_to_forget_outstage_res.allocator()->init(TensorInfo(_mul_cell_to_forget_res.info()->tensor_shape(), 1, DataType::QSYMM16, QuantizationInfo(lstm_params.forget_intermediate_scale(), 0)));
302 _memory_group.manage(&_cell_to_forget_outstage_res);
303 const float cell_to_forget_scale = std::pow(2, cell_shift) * lstm_params.cell_to_forget_weights()->info()->quantization_info().uniform().scale / lstm_params.forget_intermediate_scale();
304 quantization::calculate_quantized_multiplier(cell_to_forget_scale, &gemmlowp_info.gemmlowp_multiplier, &gemmlowp_info.gemmlowp_shift);
305 _cell_to_forget_outstage.configure(&_mul_cell_to_forget_res, nullptr, &_cell_to_forget_outstage_res, gemmlowp_info);
306 _mul_cell_to_forget_res.allocator()->allocate();
307 _accumulate_cell_forget.configure(&_recurrent_to_forget_outstage_res, &_cell_to_forget_outstage_res, &_recurrent_to_forget_outstage_res, ConvertPolicy::SATURATE);
308 _cell_to_forget_outstage_res.allocator()->allocate();
309 }
310
Sang-Hoon Park9230e272020-04-18 00:46:34 +0100311 Tensor *forget_activation_input = &_recurrent_to_forget_outstage_res;
312
313 if(_has_layer_norm)
314 {
315 configure_layer_norm(LayerNormGate::Forget, forget_activation_input);
316 forget_activation_input->allocator()->allocate();
317 forget_activation_input = &get_layer_norm_output(LayerNormGate::Forget);
318 }
319
Michele Di Giorgio47a89902020-03-09 19:32:33 +0000320 // Output quantization info of Sigmoid and Tanh activations
321 const QuantizationInfo sigmoid_tanh_outqinfo(1.f / 32768.f, 0);
Sang-Hoon Park9230e272020-04-18 00:46:34 +0100322 const TensorInfo forget_gate_info(TensorShape(num_units, batch_size), 1, DataType::QSYMM16, sigmoid_tanh_outqinfo);
Michele Di Giorgio47a89902020-03-09 19:32:33 +0000323
Michele Di Giorgio47a89902020-03-09 19:32:33 +0000324 _memory_group.manage(&_forget_gate);
325 _forget_gate.allocator()->init(forget_gate_info);
Sang-Hoon Park9230e272020-04-18 00:46:34 +0100326 _forget_gate_sigmoid.configure(forget_activation_input, &_forget_gate, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LOGISTIC));
327 forget_activation_input->allocator()->allocate();
Michele Di Giorgio47a89902020-03-09 19:32:33 +0000328
329 // Modulation gate.
330 const TensorInfo cell_outstage_info(mm_out_info.tensor_shape(), 1, DataType::QSYMM16, QuantizationInfo(lstm_params.cell_intermediate_scale(), 0));
331 const float input_to_cell_scale = input_to_cell_weights->info()->quantization_info().uniform().scale * qinput.scale / lstm_params.cell_intermediate_scale();
332 configure_mm(_mm_input_to_cell, _input_to_cell_outstage, gemmlowp_info,
333 input, &_input_to_cell_weights_transposed, &_input_to_cell_eff_bias,
334 &_mm_input_to_cell_res, &_input_to_cell_outstage_res, input_to_cell_scale,
335 mm_out_info, cell_outstage_info);
336
337 const float recurrent_to_cell_scale = recurrent_to_cell_weights->info()->quantization_info().uniform().scale * qoutput_state_in.scale / lstm_params.cell_intermediate_scale();
338 configure_mm(_mm_recurrent_to_cell, _recurrent_to_cell_outstage, gemmlowp_info,
339 output_state_in, &_recurrent_to_cell_weights_transposed, &_recurrent_to_cell_eff_bias,
340 &_mm_recurrent_to_cell_res, &_recurrent_to_cell_outstage_res, recurrent_to_cell_scale,
341 mm_out_info, cell_outstage_info);
342
343 _accumulate_input_recurrent_modulation.configure(&_input_to_cell_outstage_res, &_recurrent_to_cell_outstage_res, &_recurrent_to_cell_outstage_res, ConvertPolicy::SATURATE);
344 _input_to_cell_outstage_res.allocator()->allocate();
345
Sang-Hoon Park9230e272020-04-18 00:46:34 +0100346 Tensor *cell_activation_input = &_recurrent_to_cell_outstage_res;
347
348 if(_has_layer_norm)
349 {
350 configure_layer_norm(LayerNormGate::Cell, cell_activation_input);
351 cell_activation_input->allocator()->allocate();
352 cell_activation_input = &get_layer_norm_output(LayerNormGate::Cell);
353 }
354
Michele Di Giorgio47a89902020-03-09 19:32:33 +0000355 const TensorInfo cell_gate_info(TensorShape(num_units, batch_size), 1, DataType::QSYMM16, sigmoid_tanh_outqinfo);
Sang-Hoon Park9230e272020-04-18 00:46:34 +0100356
Michele Di Giorgio47a89902020-03-09 19:32:33 +0000357 _memory_group.manage(&_cell_gate);
358 _cell_gate.allocator()->init(cell_gate_info);
Sang-Hoon Park9230e272020-04-18 00:46:34 +0100359 _cell_gate_tanh.configure(cell_activation_input, &_cell_gate, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::TANH, 1.f, 1.f));
360 cell_activation_input->allocator()->allocate();
Michele Di Giorgio47a89902020-03-09 19:32:33 +0000361
362 // Input gate.
363 const TensorInfo input_gate_info(TensorShape(num_units, batch_size), 1, DataType::QSYMM16, sigmoid_tanh_outqinfo);
364 _input_gate.allocator()->init(input_gate_info);
365 _memory_group.manage(&_input_gate);
366 if(_has_cifg)
367 {
368 _ones.allocator()->init(*_forget_gate.info());
369 _input_gate_sub.configure(&_ones, &_forget_gate, &_input_gate, ConvertPolicy::SATURATE);
370 _ones.allocator()->allocate();
371 }
372 else
373 {
374 const TensorInfo input_outstage_info(TensorShape(num_units, batch_size), 1, DataType::QSYMM16, QuantizationInfo(lstm_params.input_intermediate_scale(), 0));
375 const float input_to_input_scale = _input_to_input_weights->info()->quantization_info().uniform().scale * qinput.scale / lstm_params.input_intermediate_scale();
376 configure_mm(_mm_input_to_input, _input_to_input_outstage, gemmlowp_info,
377 input, &_input_to_input_weights_transposed, &_input_to_input_eff_bias,
378 &_mm_input_to_input_res, &_input_to_input_outstage_res, input_to_input_scale,
379 mm_out_info, input_outstage_info);
380
381 const float recurrent_to_input_scale = _recurrent_to_input_weights->info()->quantization_info().uniform().scale * qoutput_state_in.scale / lstm_params.input_intermediate_scale();
382 configure_mm(_mm_recurrent_to_input, _recurrent_to_input_outstage, gemmlowp_info,
Sang-Hoon Parkd5c020a2020-05-06 21:01:19 +0100383 output_state_in, &_recurrent_to_input_weights_transposed, &_recurrent_to_input_eff_bias,
Michele Di Giorgio47a89902020-03-09 19:32:33 +0000384 &_mm_recurrent_to_input_res, &_recurrent_to_input_outstage_res, recurrent_to_input_scale,
385 mm_out_info, input_outstage_info);
386 _accumulate_input_recurrent_input.configure(&_input_to_input_outstage_res, &_recurrent_to_input_outstage_res, &_recurrent_to_input_outstage_res, ConvertPolicy::SATURATE);
387 _input_to_input_outstage_res.allocator()->allocate();
388
389 if(_has_peephole)
390 {
Sang-Hoon Parkd5c020a2020-05-06 21:01:19 +0100391 _mul_cell_to_input_res.allocator()->init(TensorInfo(cell_state_in->info()->tensor_shape(), 1, DataType::S32));
Michele Di Giorgio47a89902020-03-09 19:32:33 +0000392 _memory_group.manage(&_mul_cell_to_input_res);
393 _pixelwise_mul_cell_to_input.configure(cell_state_in, lstm_params.cell_to_input_weights(), &_mul_cell_to_input_res, 1.f, ConvertPolicy::SATURATE, RoundingPolicy::TO_ZERO);
394 const float cell_to_input_scale = std::pow(2, cell_shift) * lstm_params.cell_to_input_weights()->info()->quantization_info().uniform().scale / lstm_params.input_intermediate_scale();
395 quantization::calculate_quantized_multiplier(cell_to_input_scale, &gemmlowp_info.gemmlowp_multiplier, &gemmlowp_info.gemmlowp_shift);
396 _cell_to_input_outstage_res.allocator()->init(TensorInfo(_mul_cell_to_input_res.info()->tensor_shape(), 1, DataType::QSYMM16, QuantizationInfo(lstm_params.input_intermediate_scale(), 0)));
397 _memory_group.manage(&_cell_to_input_outstage_res);
398 _cell_to_input_outstage.configure(&_mul_cell_to_input_res, nullptr, &_cell_to_input_outstage_res, gemmlowp_info);
399 _mul_cell_to_input_res.allocator()->allocate();
400 _accumulate_cell_input.configure(&_recurrent_to_input_outstage_res, &_cell_to_input_outstage_res, &_recurrent_to_input_outstage_res, ConvertPolicy::SATURATE);
401 _cell_to_input_outstage_res.allocator()->allocate();
402 }
403
Sang-Hoon Park9230e272020-04-18 00:46:34 +0100404 Tensor *input_activation_input = &_recurrent_to_input_outstage_res;
405
406 if(_has_layer_norm)
407 {
408 configure_layer_norm(LayerNormGate::Input, input_activation_input);
409 input_activation_input->allocator()->allocate();
410 input_activation_input = &get_layer_norm_output(LayerNormGate::Input);
411 }
412
Sang-Hoon Parkd5c020a2020-05-06 21:01:19 +0100413 _input_gate_sigmoid.configure(input_activation_input, &_input_gate, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LOGISTIC));
Sang-Hoon Park9230e272020-04-18 00:46:34 +0100414 input_activation_input->allocator()->allocate();
Michele Di Giorgio47a89902020-03-09 19:32:33 +0000415 }
416 // Cell.
Michalis Spyrou6eb73452020-07-02 17:39:25 +0100417 // TODO(COMPMID-3395): Perform multiplication in the quantized domain in NEPixelWiseMultiplication
Michele Di Giorgio47a89902020-03-09 19:32:33 +0000418 _pixelwise_mul_forget_cell.configure(&_forget_gate, cell_state_in, &_forget_gate, 1.f, ConvertPolicy::SATURATE, RoundingPolicy::TO_ZERO);
419 const float cell_gate_scale = _cell_gate.info()->quantization_info().uniform().scale;
420 const float mul_input_cell_scale = cell_gate_scale * std::pow(2, 15 + cell_shift);
421 const TensorInfo mul_input_cell_info(TensorShape(num_units, batch_size), 1, DataType::QSYMM16, QuantizationInfo(mul_input_cell_scale, 0));
422 _memory_group.manage(&_mul_input_cell_res);
423 _mul_input_cell_res.allocator()->init(mul_input_cell_info);
424 _pixelwise_mul_input_cell.configure(&_input_gate, &_cell_gate, &_mul_input_cell_res, 1.f, ConvertPolicy::SATURATE, RoundingPolicy::TO_ZERO);
425 _cell_gate.allocator()->allocate();
426 _add_forget_cell.configure(&_forget_gate, &_mul_input_cell_res, cell_state_out, ConvertPolicy::SATURATE);
427 _mul_input_cell_res.allocator()->allocate();
428 _forget_gate.allocator()->allocate();
429 if(_has_cell_clipping)
430 {
431 _cell_clip.configure(cell_state_out, nullptr, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, -quantized_cell_clip, quantized_cell_clip));
432 }
433 // Output gate.
434 const TensorInfo output_outstage_info(TensorShape(num_units, batch_size), 1, DataType::QSYMM16, QuantizationInfo(lstm_params.output_intermediate_scale(), 0));
435 const float input_to_output_scale = input_to_output_weights->info()->quantization_info().uniform().scale * qinput.scale / lstm_params.output_intermediate_scale();
436 configure_mm(_mm_input_to_output, _input_to_output_outstage, gemmlowp_info,
437 input, &_input_to_output_weights_transposed, &_input_to_output_eff_bias,
438 &_mm_input_to_output_res, &_input_to_output_outstage_res, input_to_output_scale,
439 mm_out_info, output_outstage_info);
440
441 const float recurrent_to_output_scale = recurrent_to_output_weights->info()->quantization_info().uniform().scale * qoutput_state_in.scale / lstm_params.output_intermediate_scale();
442 configure_mm(_mm_recurrent_to_output, _recurrent_to_output_outstage, gemmlowp_info,
443 output_state_in, &_recurrent_to_output_weights_transposed, &_recurrent_to_output_eff_bias,
444 &_mm_recurrent_to_output_res, &_recurrent_to_output_outstage_res, recurrent_to_output_scale,
445 mm_out_info, output_outstage_info);
446
447 _accumulate_input_recurrent_output.configure(&_recurrent_to_output_outstage_res, &_input_to_output_outstage_res, &_recurrent_to_output_outstage_res, ConvertPolicy::SATURATE);
448 _input_to_output_outstage_res.allocator()->allocate();
449
450 if(_has_peephole)
451 {
Michalis Spyrou6eb73452020-07-02 17:39:25 +0100452 // TODO(COMPMID-3395): Perform multiplication in the quantized domain in NEPixelWiseMultiplication
Michele Di Giorgio47a89902020-03-09 19:32:33 +0000453 // Here we are not using the output stage because all operations are done in float
Sang-Hoon Parkd5c020a2020-05-06 21:01:19 +0100454 _mul_cell_to_output_res.allocator()->init(TensorInfo(cell_state_out->info()->tensor_shape(), 1, DataType::S32));
Michele Di Giorgio47a89902020-03-09 19:32:33 +0000455 _memory_group.manage(&_mul_cell_to_output_res);
456 _pixelwise_mul_cell_to_output.configure(cell_state_out, lstm_params.cell_to_output_weights(), &_mul_cell_to_output_res, 1.f, ConvertPolicy::SATURATE, RoundingPolicy::TO_ZERO);
Sang-Hoon Parkd5c020a2020-05-06 21:01:19 +0100457
458 const float cell_to_output_scale = std::pow(2, cell_shift) * lstm_params.cell_to_output_weights()->info()->quantization_info().uniform().scale / lstm_params.output_intermediate_scale();
459 quantization::calculate_quantized_multiplier(cell_to_output_scale, &gemmlowp_info.gemmlowp_multiplier, &gemmlowp_info.gemmlowp_shift);
460 _cell_to_output_outstage_res.allocator()->init(TensorInfo(_mul_cell_to_output_res.info()->tensor_shape(), 1, DataType::QSYMM16, QuantizationInfo(lstm_params.output_intermediate_scale(), 0)));
461 _memory_group.manage(&_cell_to_output_outstage_res);
462 _cell_to_output_outstage.configure(&_mul_cell_to_output_res, nullptr, &_cell_to_output_outstage_res, gemmlowp_info);
Michele Di Giorgio47a89902020-03-09 19:32:33 +0000463 _mul_cell_to_output_res.allocator()->allocate();
Sang-Hoon Parkd5c020a2020-05-06 21:01:19 +0100464
465 _accumulate_cell_to_output.configure(&_recurrent_to_output_outstage_res, &_cell_to_output_outstage_res, &_recurrent_to_output_outstage_res, ConvertPolicy::SATURATE);
466 _cell_to_output_outstage_res.allocator()->allocate();
Michele Di Giorgio47a89902020-03-09 19:32:33 +0000467 }
468
Sang-Hoon Park9230e272020-04-18 00:46:34 +0100469 Tensor *output_activation_input = &_recurrent_to_output_outstage_res;
470
471 if(_has_layer_norm)
472 {
473 configure_layer_norm(LayerNormGate::Output, output_activation_input);
474 output_activation_input->allocator()->allocate();
475 output_activation_input = &get_layer_norm_output(LayerNormGate::Output);
476 }
Michele Di Giorgio47a89902020-03-09 19:32:33 +0000477 const TensorInfo output_gate_info(TensorShape(num_units, batch_size), 1, DataType::QSYMM16, sigmoid_tanh_outqinfo);
Sang-Hoon Park9230e272020-04-18 00:46:34 +0100478
Michele Di Giorgio47a89902020-03-09 19:32:33 +0000479 _memory_group.manage(&_output_gate);
480 _output_gate.allocator()->init(output_gate_info);
Sang-Hoon Park9230e272020-04-18 00:46:34 +0100481 _output_gate_sigmoid.configure(output_activation_input, &_output_gate, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LOGISTIC));
482 output_activation_input->allocator()->allocate();
Michele Di Giorgio47a89902020-03-09 19:32:33 +0000483
484 // Hidden.
485 _hidden_tanh.configure(cell_state_out, &_input_gate, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::TANH, 1.f, 1.f));
Michalis Spyrou6eb73452020-07-02 17:39:25 +0100486 // TODO(COMPMID-3395): Perform multiplication in the quantized domain in NEPixelWiseMultiplication
Michele Di Giorgio47a89902020-03-09 19:32:33 +0000487 _memory_group.manage(&_hidden_mul_res);
488 const TensorInfo hidden_mul_res(_input_gate.info()->tensor_shape(), 1, DataType::S32);
489 _hidden_mul_res.allocator()->init(hidden_mul_res);
490 _pixelwise_mul_hidden.configure(&_output_gate, &_input_gate, &_hidden_mul_res, 1.f, ConvertPolicy::SATURATE, RoundingPolicy::TO_ZERO);
491 _output_gate.allocator()->allocate();
492 _input_gate.allocator()->allocate();
493 const float hidden_state_scale = std::pow(2, -15) / lstm_params.hidden_state_scale() * std::pow(2, -15);
Sang-Hoon Park30b46a62020-04-18 01:40:57 +0100494 quantization::calculate_quantized_multiplier(hidden_state_scale, &gemmlowp_info.gemmlowp_multiplier, &gemmlowp_info.gemmlowp_shift, /* ignore_epsilon */ true);
Michele Di Giorgio47a89902020-03-09 19:32:33 +0000495 gemmlowp_info.gemmlowp_offset = lstm_params.hidden_state_zero();
496 gemmlowp_info.output_data_type = output_state_in->info()->data_type();
Sang-Hoon Parkd5c020a2020-05-06 21:01:19 +0100497
498 _projection_tensor_copy_required = (num_units != output_size);
499 ITensor *hidden_gate_result = output_state_out;
500
Sang-Hoon Parka7431ae2020-05-12 11:13:30 +0100501 _memory_group.manage(&_hidden_gate);
502
Sang-Hoon Parkd5c020a2020-05-06 21:01:19 +0100503 if(_projection_tensor_copy_required)
504 {
505 _hidden_gate.allocator()->init(*output_state_out->info());
506 _hidden_gate.info()->set_tensor_shape(_hidden_mul_res.info()->tensor_shape());
507 hidden_gate_result = &_hidden_gate;
508 }
509
510 _hidden_outstage.configure(&_hidden_mul_res, nullptr, hidden_gate_result, gemmlowp_info);
Michele Di Giorgio47a89902020-03-09 19:32:33 +0000511 _hidden_mul_res.allocator()->allocate();
512
513 // Projection.
514 if(_has_projection)
515 {
Sang-Hoon Parka7431ae2020-05-12 11:13:30 +0100516 const TensorInfo projection_outstage_info(*output_state_out->info());
Michele Di Giorgio47a89902020-03-09 19:32:33 +0000517 const UniformQuantizationInfo qprojection = _projection_weights->info()->quantization_info().uniform();
518 const float projection_scale = qprojection.scale * lstm_params.hidden_state_scale() / qoutput_state_in.scale;
519 gemmlowp_info.gemmlowp_offset = qoutput_state_in.offset;
520 gemmlowp_info.gemmlowp_min_bound = std::numeric_limits<int8_t>::lowest();
521 gemmlowp_info.gemmlowp_max_bound = std::numeric_limits<int8_t>::max();
522 gemmlowp_info.output_data_type = DataType::QASYMM8_SIGNED;
523
Sang-Hoon Parka7431ae2020-05-12 11:13:30 +0100524 TensorInfo projection_mm_out_info{ mm_out_info };
525 projection_mm_out_info.set_tensor_shape(TensorShape(output_size, batch_size));
Sang-Hoon Parkd5c020a2020-05-06 21:01:19 +0100526
Michele Di Giorgio47a89902020-03-09 19:32:33 +0000527 configure_mm(_mm_projection, _projection_outstage, gemmlowp_info,
Sang-Hoon Parka7431ae2020-05-12 11:13:30 +0100528 hidden_gate_result, &_projection_weights_transposed, &_projection_eff_bias,
Michele Di Giorgio47a89902020-03-09 19:32:33 +0000529 &_mm_projection_res, &_projection_outstage_res, projection_scale,
Sang-Hoon Parka7431ae2020-05-12 11:13:30 +0100530 projection_mm_out_info, projection_outstage_info);
Sang-Hoon Parkd5c020a2020-05-06 21:01:19 +0100531
532 ITensor *accumulate_destination = output_state_out;
533
534 if(_projection_tensor_copy_required)
535 {
536 _hidden_gate.allocator()->allocate();
Sang-Hoon Park840a72c2020-09-23 13:24:13 +0100537 _projection_accumulate_res.allocator()->init(*output_state_in->info());
Sang-Hoon Parkd5c020a2020-05-06 21:01:19 +0100538 _projection_accumulate_res.info()->set_tensor_shape(_projection_outstage_res.info()->tensor_shape());
Sang-Hoon Park840a72c2020-09-23 13:24:13 +0100539 _projection_output_to_accumulate_copy.configure(*output_state_in, _projection_accumulate_res);
Sang-Hoon Parkd5c020a2020-05-06 21:01:19 +0100540 accumulate_destination = &_projection_accumulate_res;
541 }
542
543 _accumulate_projection.configure(&_projection_outstage_res, accumulate_destination, accumulate_destination, ConvertPolicy::SATURATE);
Michele Di Giorgio47a89902020-03-09 19:32:33 +0000544 _projection_outstage_res.allocator()->allocate();
545
Sang-Hoon Parkd5c020a2020-05-06 21:01:19 +0100546 if(_projection_tensor_copy_required)
547 {
548 _projection_accumulate_to_output_copy.configure(_projection_accumulate_res, *output_state_out);
549 _projection_accumulate_res.allocator()->allocate();
550 }
551
Michele Di Giorgio47a89902020-03-09 19:32:33 +0000552 int8_t quantized_projection_clip{ 0 };
553 if(lstm_params.projection_clip() > 0.0f)
554 {
555 quantized_projection_clip = utility::clamp<int8_t>(lstm_params.projection_clip() / qprojection.scale, -128, 127);
556 }
557
558 if(quantized_projection_clip > 0)
559 {
560 _projection_clip.configure(output_state_out, nullptr, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, -quantized_projection_clip, quantized_projection_clip));
561 _has_projection_clipping = true;
562 }
563 }
Sang-Hoon Parkd5c020a2020-05-06 21:01:19 +0100564 else
565 {
566 if(_projection_tensor_copy_required)
567 {
568 _hidden_to_output_copy.configure(_hidden_gate, *output_state_out);
569 _hidden_gate.allocator()->allocate();
570 }
571 }
Michele Di Giorgiobeb2d452020-05-11 16:17:51 +0100572
573 // Copy output_state_out to output
574 _copy_output.configure(output_state_out, output);
Michele Di Giorgio47a89902020-03-09 19:32:33 +0000575}
576
577Status NEQLSTMLayer::validate(const ITensorInfo *input,
578 const ITensorInfo *input_to_forget_weights, const ITensorInfo *input_to_cell_weights, const ITensorInfo *input_to_output_weights,
579 const ITensorInfo *recurrent_to_forget_weights, const ITensorInfo *recurrent_to_cell_weights, const ITensorInfo *recurrent_to_output_weights,
580 const ITensorInfo *forget_gate_bias, const ITensorInfo *cell_bias, const ITensorInfo *output_gate_bias,
581 const ITensorInfo *cell_state_in, const ITensorInfo *output_state_in,
Michele Di Giorgiobeb2d452020-05-11 16:17:51 +0100582 const ITensorInfo *cell_state_out, const ITensorInfo *output_state_out, const ITensorInfo *output,
Michele Di Giorgio47a89902020-03-09 19:32:33 +0000583 const LSTMParams<ITensorInfo> &lstm_params)
584{
585 ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, input_to_forget_weights, input_to_cell_weights, input_to_output_weights, recurrent_to_forget_weights, recurrent_to_cell_weights,
Michele Di Giorgiobeb2d452020-05-11 16:17:51 +0100586 recurrent_to_output_weights, forget_gate_bias, cell_bias, output_gate_bias, cell_state_in, output_state_in,
587 cell_state_out, output_state_out, output);
Michele Di Giorgio47a89902020-03-09 19:32:33 +0000588
589 ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8_SIGNED);
590 ARM_COMPUTE_RETURN_ERROR_ON_MSG(input->num_dimensions() != 2, "Input must have exactly 2 dimensions");
591
592 const unsigned int input_size = input->dimension(0);
593 const unsigned int batch_size = input->dimension(1);
594 const unsigned int num_units = input_to_output_weights->dimension(1);
Sang-Hoon Parkd5c020a2020-05-06 21:01:19 +0100595 const unsigned int output_size = output_state_out->dimension(_out_state_output_size_dimension_idx);
Michele Di Giorgio47a89902020-03-09 19:32:33 +0000596
597 ARM_COMPUTE_RETURN_ERROR_ON(input_to_output_weights->num_dimensions() != 2);
598 ARM_COMPUTE_RETURN_ERROR_ON(input_to_output_weights->dimension(0) != input_size);
599 ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(input_to_output_weights, input_to_forget_weights, input_to_cell_weights);
600 ARM_COMPUTE_RETURN_ERROR_ON(recurrent_to_output_weights->num_dimensions() != 2);
601 ARM_COMPUTE_RETURN_ERROR_ON(recurrent_to_output_weights->dimension(1) != num_units);
602 ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(recurrent_to_output_weights, recurrent_to_forget_weights, recurrent_to_cell_weights);
603 ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input_to_forget_weights, 1, DataType::QSYMM8);
604 ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input_to_forget_weights, input_to_cell_weights, input_to_output_weights,
605 recurrent_to_forget_weights, recurrent_to_cell_weights, recurrent_to_output_weights);
606
607 ARM_COMPUTE_RETURN_ERROR_ON(forget_gate_bias->num_dimensions() != 1);
608 ARM_COMPUTE_RETURN_ERROR_ON(forget_gate_bias->dimension(0) != num_units);
609 ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(forget_gate_bias, cell_bias, output_gate_bias);
610 ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(forget_gate_bias, 1, DataType::S32);
611 ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(forget_gate_bias, cell_bias, output_gate_bias);
612
613 ARM_COMPUTE_RETURN_ERROR_ON(cell_state_in->num_dimensions() != 2);
614 ARM_COMPUTE_RETURN_ERROR_ON(cell_state_in->dimension(0) != num_units);
615 ARM_COMPUTE_RETURN_ERROR_ON(cell_state_in->dimension(1) != batch_size);
616 ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(cell_state_in, 1, DataType::QSYMM16);
617
618 ARM_COMPUTE_RETURN_ERROR_ON(output_state_in->num_dimensions() != 2);
619 ARM_COMPUTE_RETURN_ERROR_ON(output_state_in->dimension(0) != output_size);
620 ARM_COMPUTE_RETURN_ERROR_ON(output_state_in->dimension(1) != batch_size);
621 ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output_state_in);
622
623 // Check whether peephole weights are all there or none
624 if(lstm_params.has_peephole_opt())
625 {
626 ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(lstm_params.cell_to_forget_weights(), lstm_params.cell_to_output_weights());
627 ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(lstm_params.cell_to_forget_weights(), 1, DataType::QSYMM16);
628 ARM_COMPUTE_RETURN_ERROR_ON(lstm_params.cell_to_forget_weights()->num_dimensions() != 1);
629 ARM_COMPUTE_RETURN_ERROR_ON(lstm_params.cell_to_forget_weights()->dimension(0) != num_units);
630 ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(lstm_params.cell_to_forget_weights(), lstm_params.cell_to_output_weights());
631 ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(lstm_params.cell_to_forget_weights(), lstm_params.cell_to_output_weights());
632
633 if(!lstm_params.has_cifg_opt())
634 {
635 ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(lstm_params.cell_to_input_weights());
636 ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(lstm_params.cell_to_forget_weights(), lstm_params.cell_to_input_weights());
637 ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(lstm_params.cell_to_forget_weights(), lstm_params.cell_to_input_weights());
638 }
639 }
640
641 const UniformQuantizationInfo qinput = input->quantization_info().uniform();
642 const UniformQuantizationInfo qcell_state_in = cell_state_in->quantization_info().uniform();
643 const UniformQuantizationInfo qoutput_state_in = output_state_in->quantization_info().uniform();
644
645 // Calculate and decompose effective scales for optimizing matmul calculation
646 const int32_t cell_shift = log2(qcell_state_in.scale);
647 ARM_COMPUTE_RETURN_ERROR_ON(cell_shift > -9);
648
649 // Calculate quantized parameters for clipping.
650 int16_t quantized_cell_clip = 0;
651 if(lstm_params.cell_clip() > 0.0f)
652 {
653 quantized_cell_clip = quantize_qsymm16(lstm_params.cell_clip(), qcell_state_in);
654 }
655
656 // Precompute effective bias for optimizing the matmul computations.
657 const TensorInfo eff_bias_info(TensorShape(num_units), 1, DataType::S32);
Sang-Hoon Parkd5c020a2020-05-06 21:01:19 +0100658 const TensorInfo projection_eff_bias_info(TensorShape(output_size), 1, DataType::S32);
Michele Di Giorgio47a89902020-03-09 19:32:33 +0000659 if(!lstm_params.has_cifg_opt())
660 {
661 ARM_COMPUTE_RETURN_ON_ERROR(NEGEMMLowpMatrixAReductionKernel::validate(lstm_params.input_to_input_weights(), &eff_bias_info, GEMMLowpReductionKernelInfo(num_units, false, -qinput.offset, true)));
662 ARM_COMPUTE_RETURN_ON_ERROR(NEGEMMLowpMatrixAReductionKernel::validate(lstm_params.recurrent_to_input_weights(), &eff_bias_info, GEMMLowpReductionKernelInfo(num_units, false, -qoutput_state_in.offset,
663 true)));
664 }
665 ARM_COMPUTE_RETURN_ON_ERROR(NEGEMMLowpMatrixAReductionKernel::validate(input_to_forget_weights, &eff_bias_info, GEMMLowpReductionKernelInfo(num_units, false, -qinput.offset, true)));
666 ARM_COMPUTE_RETURN_ON_ERROR(NEGEMMLowpMatrixAReductionKernel::validate(recurrent_to_forget_weights, &eff_bias_info, GEMMLowpReductionKernelInfo(num_units, false, -qoutput_state_in.offset, true)));
667 ARM_COMPUTE_RETURN_ON_ERROR(NEGEMMLowpMatrixAReductionKernel::validate(input_to_cell_weights, &eff_bias_info, GEMMLowpReductionKernelInfo(num_units, false, -qinput.offset, true)));
668 ARM_COMPUTE_RETURN_ON_ERROR(NEGEMMLowpMatrixAReductionKernel::validate(recurrent_to_cell_weights, &eff_bias_info, GEMMLowpReductionKernelInfo(num_units, false, -qoutput_state_in.offset, true)));
669 ARM_COMPUTE_RETURN_ON_ERROR(NEGEMMLowpMatrixAReductionKernel::validate(input_to_output_weights, &eff_bias_info, GEMMLowpReductionKernelInfo(num_units, false, -qinput.offset, true)));
670 ARM_COMPUTE_RETURN_ON_ERROR(NEGEMMLowpMatrixAReductionKernel::validate(recurrent_to_output_weights, &eff_bias_info, GEMMLowpReductionKernelInfo(num_units, false, -qoutput_state_in.offset, true)));
Sang-Hoon Parkd5c020a2020-05-06 21:01:19 +0100671 if(lstm_params.has_projection())
Michele Di Giorgio47a89902020-03-09 19:32:33 +0000672 {
Sang-Hoon Parkd5c020a2020-05-06 21:01:19 +0100673 ARM_COMPUTE_RETURN_ON_ERROR(NEGEMMLowpMatrixAReductionKernel::validate(lstm_params.projection_weights(), &projection_eff_bias_info, GEMMLowpReductionKernelInfo(output_size, false,
674 lstm_params.hidden_state_zero(),
Michele Di Giorgio47a89902020-03-09 19:32:33 +0000675 true)));
Michele Di Giorgio11c562c2020-06-10 16:34:50 +0100676 if(lstm_params.projection_bias() != nullptr)
677 {
678 ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(lstm_params.projection_bias(), 1, DataType::S32);
Michalis Spyrou173ba9b2020-06-23 17:25:43 +0100679 ARM_COMPUTE_RETURN_ON_ERROR(NEArithmeticAddition::validate(lstm_params.projection_bias(), &projection_eff_bias_info, &projection_eff_bias_info, ConvertPolicy::SATURATE));
Michele Di Giorgio11c562c2020-06-10 16:34:50 +0100680 }
Michele Di Giorgio47a89902020-03-09 19:32:33 +0000681 }
682
683 const TensorInfo input_weights_transposed(TensorShape(num_units, input_size), 1, input_to_forget_weights->data_type(), input_to_forget_weights->quantization_info());
684 const TensorInfo recurrent_weights_transposed(TensorShape(num_units, output_size), 1, recurrent_to_forget_weights->data_type(), recurrent_to_forget_weights->quantization_info());
685
686 // Validate weights transpose
687 ARM_COMPUTE_RETURN_ON_ERROR(NETranspose::validate(input_to_forget_weights, &input_weights_transposed));
688 ARM_COMPUTE_RETURN_ON_ERROR(NETranspose::validate(input_to_cell_weights, &input_weights_transposed));
689 ARM_COMPUTE_RETURN_ON_ERROR(NETranspose::validate(input_to_output_weights, &input_weights_transposed));
690 ARM_COMPUTE_RETURN_ON_ERROR(NETranspose::validate(recurrent_to_forget_weights, &recurrent_weights_transposed));
691 ARM_COMPUTE_RETURN_ON_ERROR(NETranspose::validate(recurrent_to_cell_weights, &recurrent_weights_transposed));
692 ARM_COMPUTE_RETURN_ON_ERROR(NETranspose::validate(recurrent_to_output_weights, &recurrent_weights_transposed));
693 if(!lstm_params.has_cifg_opt())
694 {
695 ARM_COMPUTE_RETURN_ON_ERROR(NETranspose::validate(lstm_params.input_to_input_weights(), &input_weights_transposed));
696 ARM_COMPUTE_RETURN_ON_ERROR(NETranspose::validate(lstm_params.recurrent_to_input_weights(), &recurrent_weights_transposed));
697 }
698 if(lstm_params.has_projection())
699 {
Sang-Hoon Parkd5c020a2020-05-06 21:01:19 +0100700 const TensorInfo projection_weights_transposed(TensorShape(output_size, num_units), 1, lstm_params.projection_weights()->data_type(), lstm_params.projection_weights()->quantization_info());
701 ARM_COMPUTE_RETURN_ON_ERROR(NETranspose::validate(lstm_params.projection_weights(), &projection_weights_transposed));
Michele Di Giorgio47a89902020-03-09 19:32:33 +0000702 }
703
704 GEMMLowpOutputStageInfo gemmlowp_info;
705 gemmlowp_info.type = GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT;
706 gemmlowp_info.gemmlowp_min_bound = std::numeric_limits<int16_t>::lowest();
707 gemmlowp_info.gemmlowp_max_bound = std::numeric_limits<int16_t>::max();
708 gemmlowp_info.output_data_type = DataType::QSYMM16;
709
Sang-Hoon Park9230e272020-04-18 00:46:34 +0100710 const bool has_layer_norm = lstm_params.use_layer_norm();
711
Michele Di Giorgio47a89902020-03-09 19:32:33 +0000712 // Forget gate.
Sang-Hoon Parkee4833d2020-05-20 09:13:32 +0100713 ARM_COMPUTE_RETURN_ERROR_ON(lstm_params.forget_intermediate_scale() == 0);
Michele Di Giorgio47a89902020-03-09 19:32:33 +0000714 const TensorInfo forget_outstage_info(TensorShape(num_units, batch_size), 1, DataType::QSYMM16, QuantizationInfo(lstm_params.forget_intermediate_scale(), 0));
715 const TensorInfo mm_out_info(TensorShape(num_units, batch_size), 1, DataType::S32);
716 const float input_to_forget_scale = input_to_forget_weights->quantization_info().uniform().scale * qinput.scale / lstm_params.forget_intermediate_scale();
Sang-Hoon Parka7431ae2020-05-12 11:13:30 +0100717 ARM_COMPUTE_RETURN_ON_ERROR(validate_mm(gemmlowp_info, input, &input_weights_transposed, &eff_bias_info, input_to_forget_scale, &mm_out_info, &forget_outstage_info));
Michele Di Giorgio47a89902020-03-09 19:32:33 +0000718
719 const float recurrent_to_forget_scale = recurrent_to_forget_weights->quantization_info().uniform().scale * qoutput_state_in.scale / lstm_params.forget_intermediate_scale();
Sang-Hoon Parka7431ae2020-05-12 11:13:30 +0100720 ARM_COMPUTE_RETURN_ON_ERROR(validate_mm(gemmlowp_info, output_state_in, &recurrent_weights_transposed, &eff_bias_info, recurrent_to_forget_scale, &mm_out_info, &forget_outstage_info));
Michele Di Giorgio47a89902020-03-09 19:32:33 +0000721
Michalis Spyrou173ba9b2020-06-23 17:25:43 +0100722 ARM_COMPUTE_RETURN_ON_ERROR(NEArithmeticAddition::validate(&forget_outstage_info, &forget_outstage_info, &forget_outstage_info, ConvertPolicy::SATURATE));
Michele Di Giorgio47a89902020-03-09 19:32:33 +0000723
724 if(lstm_params.has_peephole_opt())
725 {
726 ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(lstm_params.cell_to_forget_weights(), 1, DataType::QSYMM16);
Michalis Spyrou6eb73452020-07-02 17:39:25 +0100727 ARM_COMPUTE_RETURN_ON_ERROR(NEPixelWiseMultiplication::validate(cell_state_in, lstm_params.cell_to_forget_weights(), &mm_out_info, 1.f, ConvertPolicy::SATURATE,
728 RoundingPolicy::TO_ZERO));
Michele Di Giorgio47a89902020-03-09 19:32:33 +0000729 const float cell_to_forget_scale = std::pow(2, cell_shift) * lstm_params.cell_to_forget_weights()->quantization_info().uniform().scale / lstm_params.forget_intermediate_scale();
730 ARM_COMPUTE_RETURN_ON_ERROR(quantization::calculate_quantized_multiplier(cell_to_forget_scale, &gemmlowp_info.gemmlowp_multiplier, &gemmlowp_info.gemmlowp_shift));
731 ARM_COMPUTE_RETURN_ON_ERROR(NEGEMMLowpOutputStage::validate(&mm_out_info, nullptr, &forget_outstage_info, gemmlowp_info));
Michalis Spyrou173ba9b2020-06-23 17:25:43 +0100732 ARM_COMPUTE_RETURN_ON_ERROR(NEArithmeticAddition::validate(&forget_outstage_info, &forget_outstage_info, &forget_outstage_info, ConvertPolicy::SATURATE));
Michele Di Giorgio47a89902020-03-09 19:32:33 +0000733 }
734
Sang-Hoon Park9230e272020-04-18 00:46:34 +0100735 if(has_layer_norm)
736 {
737 const ITensorInfo *w_info = lstm_params.forget_layer_norm_weights();
738 const ITensorInfo *b_info = forget_gate_bias;
739 ARM_COMPUTE_RETURN_ON_ERROR(validate_layer_norm(forget_outstage_info, *w_info, *b_info));
740 }
741
Michele Di Giorgio47a89902020-03-09 19:32:33 +0000742 // Output quantization info of Sigmoid and Tanh activations
743 const QuantizationInfo sigmoid_tanh_outqinfo(1.f / 32768.f, 0);
Sang-Hoon Park9230e272020-04-18 00:46:34 +0100744 const TensorInfo forget_gate_info(TensorShape(num_units, batch_size), 1, DataType::QSYMM16, sigmoid_tanh_outqinfo);
Michele Di Giorgio47a89902020-03-09 19:32:33 +0000745
Michele Di Giorgio47a89902020-03-09 19:32:33 +0000746 ARM_COMPUTE_RETURN_ON_ERROR(NEActivationLayer::validate(&forget_outstage_info, &forget_gate_info, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LOGISTIC)));
747
748 // Modulation gate.
Sang-Hoon Parkee4833d2020-05-20 09:13:32 +0100749 ARM_COMPUTE_RETURN_ERROR_ON(lstm_params.cell_intermediate_scale() == 0);
Michele Di Giorgio47a89902020-03-09 19:32:33 +0000750 const TensorInfo cell_outstage_info(TensorShape(num_units, batch_size), 1, DataType::QSYMM16, QuantizationInfo(lstm_params.cell_intermediate_scale(), 0));
751 const float input_to_cell_scale = input_to_cell_weights->quantization_info().uniform().scale * qinput.scale / lstm_params.cell_intermediate_scale();
Sang-Hoon Parka7431ae2020-05-12 11:13:30 +0100752 ARM_COMPUTE_RETURN_ON_ERROR(validate_mm(gemmlowp_info, input, &input_weights_transposed, &eff_bias_info, input_to_cell_scale, &mm_out_info, &cell_outstage_info));
Michele Di Giorgio47a89902020-03-09 19:32:33 +0000753
754 const float recurrent_to_cell_scale = recurrent_to_cell_weights->quantization_info().uniform().scale * qoutput_state_in.scale / lstm_params.cell_intermediate_scale();
Sang-Hoon Parka7431ae2020-05-12 11:13:30 +0100755 ARM_COMPUTE_RETURN_ON_ERROR(validate_mm(gemmlowp_info, output_state_in, &recurrent_weights_transposed, &eff_bias_info, recurrent_to_cell_scale, &mm_out_info, &cell_outstage_info));
Michele Di Giorgio47a89902020-03-09 19:32:33 +0000756
Michalis Spyrou173ba9b2020-06-23 17:25:43 +0100757 ARM_COMPUTE_RETURN_ON_ERROR(NEArithmeticAddition::validate(&cell_outstage_info, &cell_outstage_info, &cell_outstage_info, ConvertPolicy::SATURATE));
Michele Di Giorgio47a89902020-03-09 19:32:33 +0000758
Sang-Hoon Park9230e272020-04-18 00:46:34 +0100759 if(has_layer_norm)
760 {
761 const ITensorInfo *w_info = lstm_params.cell_layer_norm_weights();
762 const ITensorInfo *b_info = cell_bias;
763 ARM_COMPUTE_RETURN_ON_ERROR(validate_layer_norm(cell_outstage_info, *w_info, *b_info));
764 }
Michele Di Giorgio47a89902020-03-09 19:32:33 +0000765 const TensorInfo cell_gate_info(TensorShape(num_units, batch_size), 1, DataType::QSYMM16, sigmoid_tanh_outqinfo);
Sang-Hoon Park9230e272020-04-18 00:46:34 +0100766
Michele Di Giorgio47a89902020-03-09 19:32:33 +0000767 ARM_COMPUTE_RETURN_ON_ERROR(NEActivationLayer::validate(&cell_outstage_info, &cell_gate_info, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::TANH, 1.f, 1.f)));
768
769 // Input gate.
770 const TensorInfo input_gate_info(TensorShape(num_units, batch_size), 1, DataType::QSYMM16, sigmoid_tanh_outqinfo);
771 if(lstm_params.has_cifg_opt())
772 {
773 ARM_COMPUTE_RETURN_ERROR_ON_MSG(lstm_params.input_gate_bias() != nullptr, "Input gate bias must not be present when CIFG is used");
Michalis Spyrou173ba9b2020-06-23 17:25:43 +0100774 ARM_COMPUTE_RETURN_ON_ERROR(NEArithmeticSubtraction::validate(&input_gate_info, &forget_gate_info, &forget_gate_info, ConvertPolicy::SATURATE));
Michele Di Giorgio47a89902020-03-09 19:32:33 +0000775 }
776 else
777 {
778 ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(lstm_params.input_to_input_weights(), lstm_params.recurrent_to_input_weights(), lstm_params.input_gate_bias());
779 ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input_to_forget_weights, lstm_params.input_to_input_weights(), lstm_params.recurrent_to_input_weights());
780 ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(input_to_forget_weights, lstm_params.input_to_input_weights());
781 ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(recurrent_to_forget_weights, lstm_params.recurrent_to_input_weights());
782 ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(forget_gate_bias, lstm_params.input_gate_bias());
783 ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(forget_gate_bias, lstm_params.input_gate_bias());
784
Sang-Hoon Parkee4833d2020-05-20 09:13:32 +0100785 ARM_COMPUTE_RETURN_ERROR_ON(lstm_params.input_intermediate_scale() == 0);
Michele Di Giorgio47a89902020-03-09 19:32:33 +0000786 const TensorInfo input_outstage_info(TensorShape(num_units, batch_size), 1, DataType::QSYMM16, QuantizationInfo(lstm_params.input_intermediate_scale(), 0));
787 const float input_to_input_scale = lstm_params.input_to_input_weights()->quantization_info().uniform().scale * qinput.scale / lstm_params.input_intermediate_scale();
Sang-Hoon Parka7431ae2020-05-12 11:13:30 +0100788 ARM_COMPUTE_RETURN_ON_ERROR(validate_mm(gemmlowp_info, input, &input_weights_transposed, &eff_bias_info, input_to_input_scale, &mm_out_info, &input_outstage_info));
Michele Di Giorgio47a89902020-03-09 19:32:33 +0000789
790 const float recurrent_to_input_scale = lstm_params.recurrent_to_input_weights()->quantization_info().uniform().scale * qoutput_state_in.scale / lstm_params.input_intermediate_scale();
Sang-Hoon Parka7431ae2020-05-12 11:13:30 +0100791 ARM_COMPUTE_RETURN_ON_ERROR(validate_mm(gemmlowp_info, output_state_in, &recurrent_weights_transposed, &eff_bias_info, recurrent_to_input_scale, &mm_out_info, &input_outstage_info));
Michele Di Giorgio47a89902020-03-09 19:32:33 +0000792
Michalis Spyrou173ba9b2020-06-23 17:25:43 +0100793 ARM_COMPUTE_RETURN_ON_ERROR(NEArithmeticAddition::validate(&input_outstage_info, &input_outstage_info, &input_outstage_info, ConvertPolicy::SATURATE));
Michele Di Giorgio47a89902020-03-09 19:32:33 +0000794
795 if(lstm_params.has_peephole_opt())
796 {
Michalis Spyrou6eb73452020-07-02 17:39:25 +0100797 ARM_COMPUTE_RETURN_ON_ERROR(NEPixelWiseMultiplication::validate(cell_state_in, lstm_params.cell_to_input_weights(), &mm_out_info, 1.f, ConvertPolicy::SATURATE,
798 RoundingPolicy::TO_ZERO));
Michele Di Giorgio47a89902020-03-09 19:32:33 +0000799 const float cell_to_input_scale = std::pow(2, cell_shift) * lstm_params.cell_to_input_weights()->quantization_info().uniform().scale / lstm_params.input_intermediate_scale();
800 ARM_COMPUTE_RETURN_ON_ERROR(quantization::calculate_quantized_multiplier(cell_to_input_scale, &gemmlowp_info.gemmlowp_multiplier, &gemmlowp_info.gemmlowp_shift));
Sang-Hoon Parkd5c020a2020-05-06 21:01:19 +0100801 ARM_COMPUTE_RETURN_ON_ERROR(NEGEMMLowpOutputStage::validate(&mm_out_info, &eff_bias_info, &input_outstage_info, gemmlowp_info));
Michalis Spyrou173ba9b2020-06-23 17:25:43 +0100802 ARM_COMPUTE_RETURN_ON_ERROR(NEArithmeticAddition::validate(&input_outstage_info, &input_outstage_info, &input_outstage_info, ConvertPolicy::SATURATE));
Michele Di Giorgio47a89902020-03-09 19:32:33 +0000803 }
804
Sang-Hoon Park9230e272020-04-18 00:46:34 +0100805 if(has_layer_norm)
806 {
807 const ITensorInfo *w_info = lstm_params.input_layer_norm_weights();
808 const ITensorInfo *b_info = lstm_params.input_gate_bias();
809 ARM_COMPUTE_RETURN_ON_ERROR(validate_layer_norm(input_outstage_info, *w_info, *b_info));
810 }
811
Sang-Hoon Parkd5c020a2020-05-06 21:01:19 +0100812 ARM_COMPUTE_RETURN_ON_ERROR(NEActivationLayer::validate(&input_outstage_info, &input_gate_info, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::TANH, 1.f, 1.f)));
Michele Di Giorgio47a89902020-03-09 19:32:33 +0000813 }
814 // Cell.
Michalis Spyrou6eb73452020-07-02 17:39:25 +0100815 ARM_COMPUTE_RETURN_ON_ERROR(NEPixelWiseMultiplication::validate(&forget_gate_info, cell_state_in, &forget_gate_info, 1.f, ConvertPolicy::SATURATE, RoundingPolicy::TO_ZERO));
816 ARM_COMPUTE_RETURN_ON_ERROR(NEPixelWiseMultiplication::validate(&input_gate_info, cell_state_in, &cell_gate_info, 1.f, ConvertPolicy::SATURATE, RoundingPolicy::TO_ZERO));
Michalis Spyrou173ba9b2020-06-23 17:25:43 +0100817 ARM_COMPUTE_RETURN_ON_ERROR(NEArithmeticAddition::validate(&forget_gate_info, &cell_gate_info, cell_state_out, ConvertPolicy::SATURATE));
Michele Di Giorgio47a89902020-03-09 19:32:33 +0000818 if(quantized_cell_clip > 0)
819 {
820 ARM_COMPUTE_RETURN_ON_ERROR(NEActivationLayer::validate(cell_state_out, nullptr, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, -quantized_cell_clip,
821 quantized_cell_clip)));
822 }
823 // Output gate.
Sang-Hoon Parkee4833d2020-05-20 09:13:32 +0100824 ARM_COMPUTE_RETURN_ERROR_ON(lstm_params.output_intermediate_scale() == 0);
Michele Di Giorgio47a89902020-03-09 19:32:33 +0000825 const TensorInfo output_outstage_info(TensorShape(num_units, batch_size), 1, DataType::QSYMM16, QuantizationInfo(lstm_params.output_intermediate_scale(), 0));
826 const float input_to_output_scale = input_to_output_weights->quantization_info().uniform().scale * qinput.scale / lstm_params.output_intermediate_scale();
Sang-Hoon Parka7431ae2020-05-12 11:13:30 +0100827 ARM_COMPUTE_RETURN_ON_ERROR(validate_mm(gemmlowp_info, input, &input_weights_transposed, &eff_bias_info, input_to_output_scale, &mm_out_info, &output_outstage_info));
Michele Di Giorgio47a89902020-03-09 19:32:33 +0000828
829 const float recurrent_to_output_scale = recurrent_to_output_weights->quantization_info().uniform().scale * qoutput_state_in.scale / lstm_params.output_intermediate_scale();
Sang-Hoon Parka7431ae2020-05-12 11:13:30 +0100830 ARM_COMPUTE_RETURN_ON_ERROR(validate_mm(gemmlowp_info, output_state_in, &recurrent_weights_transposed, &eff_bias_info, recurrent_to_output_scale, &mm_out_info, &output_outstage_info));
Michele Di Giorgio47a89902020-03-09 19:32:33 +0000831
Michalis Spyrou173ba9b2020-06-23 17:25:43 +0100832 ARM_COMPUTE_RETURN_ON_ERROR(NEArithmeticAddition::validate(&output_outstage_info, &output_outstage_info, &output_outstage_info, ConvertPolicy::SATURATE));
Michele Di Giorgio47a89902020-03-09 19:32:33 +0000833 if(lstm_params.has_peephole_opt())
834 {
835 ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(lstm_params.cell_to_output_weights(), 1, DataType::QSYMM16);
Michalis Spyrou6eb73452020-07-02 17:39:25 +0100836 // TODO(COMPMID-3395): Perform multiplication in the quantized domain in NEPixelWiseMultiplication
Michele Di Giorgio47a89902020-03-09 19:32:33 +0000837 // Here we are not using the output stage because all operations are done in float
838 // const float cell_to_output_scale = std::pow(2, cell_shift) * lstm_params.cell_to_output_weights()->quantization_info().uniform().scale / lstm_params.output_intermediate_scale();
839 // ARM_COMPUTE_RETURN_ON_ERROR(quantization::calculate_quantized_multiplier(cell_to_output_scale, &gemmlowp_info.gemmlowp_multiplier, &gemmlowp_info.gemmlowp_shift));
Michalis Spyrou6eb73452020-07-02 17:39:25 +0100840 ARM_COMPUTE_RETURN_ON_ERROR(NEPixelWiseMultiplication::validate(cell_state_out, lstm_params.cell_to_output_weights(), &output_outstage_info, 1.f, ConvertPolicy::SATURATE,
841 RoundingPolicy::TO_ZERO));
Michalis Spyrou173ba9b2020-06-23 17:25:43 +0100842 ARM_COMPUTE_RETURN_ON_ERROR(NEArithmeticAddition::validate(&output_outstage_info, &output_outstage_info, &output_outstage_info, ConvertPolicy::SATURATE));
Michele Di Giorgio47a89902020-03-09 19:32:33 +0000843 }
844
Sang-Hoon Park9230e272020-04-18 00:46:34 +0100845 if(has_layer_norm)
846 {
847 const ITensorInfo *w_info = lstm_params.output_layer_norm_weights();
848 const ITensorInfo *b_info = output_gate_bias;
849 ARM_COMPUTE_RETURN_ON_ERROR(validate_layer_norm(output_outstage_info, *w_info, *b_info));
850 }
851
Michele Di Giorgio47a89902020-03-09 19:32:33 +0000852 const TensorInfo output_gate_info(TensorShape(num_units, batch_size), 1, DataType::QSYMM16, sigmoid_tanh_outqinfo);
853 ARM_COMPUTE_RETURN_ON_ERROR(NEActivationLayer::validate(&output_outstage_info, &output_gate_info, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LOGISTIC)));
854
855 // Hidden.
856 ARM_COMPUTE_RETURN_ON_ERROR(NEActivationLayer::validate(cell_state_out, &input_gate_info, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::TANH, 1.f, 1.f)));
857 const TensorInfo hidden_mul_res(TensorShape(num_units, batch_size), 1, DataType::S32);
Sang-Hoon Parkd5c020a2020-05-06 21:01:19 +0100858 const TensorInfo hidden_out_info(TensorShape(num_units, batch_size), 1, DataType::QASYMM8_SIGNED);
Michalis Spyrou6eb73452020-07-02 17:39:25 +0100859 ARM_COMPUTE_RETURN_ON_ERROR(NEPixelWiseMultiplication::validate(&output_gate_info, &input_gate_info, &hidden_mul_res, 1.f, ConvertPolicy::SATURATE, RoundingPolicy::TO_ZERO));
Sang-Hoon Parkee4833d2020-05-20 09:13:32 +0100860
861 ARM_COMPUTE_RETURN_ERROR_ON(lstm_params.hidden_state_scale() == 0);
Michele Di Giorgio47a89902020-03-09 19:32:33 +0000862 const float hidden_state_scale = std::pow(2, -15) / lstm_params.hidden_state_scale() * std::pow(2, -15);
Sang-Hoon Park30b46a62020-04-18 01:40:57 +0100863 ARM_COMPUTE_RETURN_ON_ERROR(quantization::calculate_quantized_multiplier(hidden_state_scale, &gemmlowp_info.gemmlowp_multiplier, &gemmlowp_info.gemmlowp_shift, /* ignore_epsilon */ true));
Sang-Hoon Park9f893752020-10-20 15:33:31 +0100864 gemmlowp_info.gemmlowp_offset = lstm_params.hidden_state_zero();
865 gemmlowp_info.output_data_type = hidden_out_info.data_type();
Sang-Hoon Parkd5c020a2020-05-06 21:01:19 +0100866 ARM_COMPUTE_RETURN_ON_ERROR(NEGEMMLowpOutputStage::validate(&hidden_mul_res, nullptr, &hidden_out_info, gemmlowp_info));
867
868 const bool projection_tensor_copy_required = num_units != output_size;
Michele Di Giorgio47a89902020-03-09 19:32:33 +0000869
870 // Projection.
871 if(lstm_params.has_projection())
872 {
873 ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(recurrent_to_forget_weights, lstm_params.projection_weights());
Sang-Hoon Parkee4833d2020-05-20 09:13:32 +0100874 ARM_COMPUTE_RETURN_ERROR_ON(qoutput_state_in.scale == 0);
Michele Di Giorgio47a89902020-03-09 19:32:33 +0000875
876 const UniformQuantizationInfo qprojection = lstm_params.projection_weights()->quantization_info().uniform();
877 const float projection_scale = qprojection.scale * lstm_params.hidden_state_scale() / qoutput_state_in.scale;
878 ARM_COMPUTE_RETURN_ON_ERROR(quantization::calculate_quantized_multiplier(projection_scale, &gemmlowp_info.gemmlowp_multiplier, &gemmlowp_info.gemmlowp_shift));
879 gemmlowp_info.gemmlowp_offset = qoutput_state_in.offset;
880 gemmlowp_info.gemmlowp_min_bound = std::numeric_limits<int8_t>::lowest();
881 gemmlowp_info.gemmlowp_max_bound = std::numeric_limits<int8_t>::max();
882 gemmlowp_info.output_data_type = DataType::QASYMM8_SIGNED;
883
Sang-Hoon Parka7431ae2020-05-12 11:13:30 +0100884 const TensorInfo projection_outstage_info(*output_state_out);
885 const TensorInfo projection_weights_transposed(TensorShape(output_size, num_units), 1, lstm_params.projection_weights()->data_type(), lstm_params.projection_weights()->quantization_info());
Sang-Hoon Parkd5c020a2020-05-06 21:01:19 +0100886
Sang-Hoon Parka7431ae2020-05-12 11:13:30 +0100887 TensorInfo projection_mm_out_info{ mm_out_info };
888 projection_mm_out_info.set_tensor_shape(TensorShape(output_size, batch_size));
Sang-Hoon Parkd5c020a2020-05-06 21:01:19 +0100889
Sang-Hoon Parka7431ae2020-05-12 11:13:30 +0100890 ARM_COMPUTE_RETURN_ON_ERROR(validate_mm(gemmlowp_info, &hidden_out_info, &projection_weights_transposed, &projection_eff_bias_info, projection_scale, &projection_mm_out_info,
891 &projection_outstage_info));
Sang-Hoon Parkd5c020a2020-05-06 21:01:19 +0100892
893 if(projection_tensor_copy_required)
894 {
Sang-Hoon Park840a72c2020-09-23 13:24:13 +0100895 ARM_COMPUTE_RETURN_ON_ERROR(NEQLSTMLayer::TensorCopyKernel::validate(*output_state_in, projection_outstage_info));
Sang-Hoon Parkd5c020a2020-05-06 21:01:19 +0100896 }
Michele Di Giorgio47a89902020-03-09 19:32:33 +0000897
Michalis Spyrou173ba9b2020-06-23 17:25:43 +0100898 ARM_COMPUTE_RETURN_ON_ERROR(NEArithmeticAddition::validate(output_state_out, output_state_out, output_state_out, ConvertPolicy::SATURATE));
Michele Di Giorgio47a89902020-03-09 19:32:33 +0000899
Sang-Hoon Parkd5c020a2020-05-06 21:01:19 +0100900 if(projection_tensor_copy_required)
901 {
902 ARM_COMPUTE_RETURN_ON_ERROR(NEQLSTMLayer::TensorCopyKernel::validate(projection_outstage_info, *output_state_out));
903 }
904
Michele Di Giorgio47a89902020-03-09 19:32:33 +0000905 int8_t quantized_projection_clip{ 0 };
906 if(lstm_params.projection_clip() > 0.0f)
907 {
908 quantized_projection_clip = quantize_qasymm8_signed(lstm_params.projection_clip(), qprojection);
909 }
910
911 if(quantized_projection_clip > 0)
912 {
913 ARM_COMPUTE_RETURN_ON_ERROR(NEActivationLayer::validate(output_state_out, nullptr, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, -quantized_projection_clip,
914 quantized_projection_clip)));
915 }
916 }
Sang-Hoon Parkd5c020a2020-05-06 21:01:19 +0100917 else
918 {
919 if(projection_tensor_copy_required)
920 {
921 ARM_COMPUTE_RETURN_ON_ERROR(NEQLSTMLayer::TensorCopyKernel::validate(hidden_out_info, *output_state_out));
922 }
923 }
Michele Di Giorgio47a89902020-03-09 19:32:33 +0000924
925 if(cell_state_out->total_size() > 0)
926 {
927 ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(cell_state_in, cell_state_out);
928 ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(cell_state_in, cell_state_out);
929 }
930
931 if(output_state_out->total_size() > 0)
932 {
933 ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output_state_out);
934 ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(output_state_in, output_state_out);
935 }
936
Michalis Spyrouebcebf12020-10-21 00:04:14 +0100937 ARM_COMPUTE_RETURN_ON_ERROR(NECopy::validate(output_state_out, output));
Michele Di Giorgio47a89902020-03-09 19:32:33 +0000938 return Status{};
939}
940
941void NEQLSTMLayer::run()
942{
943 prepare();
944
945 // Acquire all the temporaries
946 MemoryGroupResourceScope scope_mg(_memory_group);
947
948 // Forget gate.
949 _mm_input_to_forget.run();
950 _input_to_forget_outstage.run();
951
952 _mm_recurrent_to_forget.run();
953 _recurrent_to_forget_outstage.run();
Michalis Spyrou173ba9b2020-06-23 17:25:43 +0100954 _accumulate_input_recurrent_forget.run();
Michele Di Giorgio47a89902020-03-09 19:32:33 +0000955
956 if(_has_peephole)
957 {
Michalis Spyrou6eb73452020-07-02 17:39:25 +0100958 _pixelwise_mul_cell_to_forget.run();
Michele Di Giorgio47a89902020-03-09 19:32:33 +0000959 _cell_to_forget_outstage.run();
Michalis Spyrou173ba9b2020-06-23 17:25:43 +0100960 _accumulate_cell_forget.run();
Michele Di Giorgio47a89902020-03-09 19:32:33 +0000961 }
962
Sang-Hoon Park9230e272020-04-18 00:46:34 +0100963 if(_has_layer_norm)
964 {
Michalis Spyrouebcebf12020-10-21 00:04:14 +0100965 NEScheduler::get().schedule(get_layer_norm(LayerNormGate::Forget).get(), Window::DimY);
Sang-Hoon Park9230e272020-04-18 00:46:34 +0100966 }
967
Michele Di Giorgio47a89902020-03-09 19:32:33 +0000968 _forget_gate_sigmoid.run();
969
970 // Modulation gate.
971 _mm_input_to_cell.run();
972 _input_to_cell_outstage.run();
973
974 _mm_recurrent_to_cell.run();
975 _recurrent_to_cell_outstage.run();
Michalis Spyrou173ba9b2020-06-23 17:25:43 +0100976 _accumulate_input_recurrent_modulation.run();
Michele Di Giorgio47a89902020-03-09 19:32:33 +0000977
Sang-Hoon Park9230e272020-04-18 00:46:34 +0100978 if(_has_layer_norm)
979 {
Michalis Spyrouebcebf12020-10-21 00:04:14 +0100980 NEScheduler::get().schedule(get_layer_norm(LayerNormGate::Cell).get(), Window::DimY);
Sang-Hoon Park9230e272020-04-18 00:46:34 +0100981 }
982
Michele Di Giorgio47a89902020-03-09 19:32:33 +0000983 _cell_gate_tanh.run();
984
985 // Input gate
986 if(_has_cifg)
987 {
Michalis Spyrou173ba9b2020-06-23 17:25:43 +0100988 _input_gate_sub.run();
Michele Di Giorgio47a89902020-03-09 19:32:33 +0000989 }
990 else
991 {
992 _mm_input_to_input.run();
993 _input_to_input_outstage.run();
994 _mm_recurrent_to_input.run();
995 _recurrent_to_input_outstage.run();
Michalis Spyrou173ba9b2020-06-23 17:25:43 +0100996 _accumulate_input_recurrent_input.run();
Michele Di Giorgio47a89902020-03-09 19:32:33 +0000997
998 if(_has_peephole)
999 {
Michalis Spyrou6eb73452020-07-02 17:39:25 +01001000 _pixelwise_mul_cell_to_input.run();
Michele Di Giorgio47a89902020-03-09 19:32:33 +00001001 _cell_to_input_outstage.run();
Michalis Spyrou173ba9b2020-06-23 17:25:43 +01001002 _accumulate_cell_input.run();
Michele Di Giorgio47a89902020-03-09 19:32:33 +00001003 }
1004
Sang-Hoon Park9230e272020-04-18 00:46:34 +01001005 if(_has_layer_norm)
1006 {
Michalis Spyrouebcebf12020-10-21 00:04:14 +01001007 NEScheduler::get().schedule(get_layer_norm(LayerNormGate::Input).get(), Window::DimY);
Sang-Hoon Park9230e272020-04-18 00:46:34 +01001008 }
1009
Sang-Hoon Parkd5c020a2020-05-06 21:01:19 +01001010 _input_gate_sigmoid.run();
Michele Di Giorgio47a89902020-03-09 19:32:33 +00001011 }
1012
1013 // Cell.
Michalis Spyrou6eb73452020-07-02 17:39:25 +01001014 _pixelwise_mul_forget_cell.run();
1015 _pixelwise_mul_input_cell.run();
Michalis Spyrou173ba9b2020-06-23 17:25:43 +01001016 _add_forget_cell.run();
1017
Michele Di Giorgio47a89902020-03-09 19:32:33 +00001018 if(_has_cell_clipping)
1019 {
1020 _cell_clip.run();
1021 }
1022
1023 // Output gate.
1024 _mm_input_to_output.run();
1025 _input_to_output_outstage.run();
1026 _mm_recurrent_to_output.run();
1027 _recurrent_to_output_outstage.run();
Michalis Spyrou173ba9b2020-06-23 17:25:43 +01001028 _accumulate_input_recurrent_output.run();
Michele Di Giorgio47a89902020-03-09 19:32:33 +00001029 if(_has_peephole)
1030 {
Michalis Spyrou6eb73452020-07-02 17:39:25 +01001031 _pixelwise_mul_cell_to_output.run();
Sang-Hoon Parkd5c020a2020-05-06 21:01:19 +01001032 _cell_to_output_outstage.run();
Michalis Spyrou173ba9b2020-06-23 17:25:43 +01001033 _accumulate_cell_to_output.run();
Michele Di Giorgio47a89902020-03-09 19:32:33 +00001034 }
1035
Sang-Hoon Park9230e272020-04-18 00:46:34 +01001036 if(_has_layer_norm)
1037 {
Michalis Spyrouebcebf12020-10-21 00:04:14 +01001038 NEScheduler::get().schedule(get_layer_norm(LayerNormGate::Output).get(), Window::DimY);
Sang-Hoon Park9230e272020-04-18 00:46:34 +01001039 }
1040
Michele Di Giorgio47a89902020-03-09 19:32:33 +00001041 _output_gate_sigmoid.run();
1042
1043 // Hidden.
1044 _hidden_tanh.run();
Michalis Spyrou6eb73452020-07-02 17:39:25 +01001045 _pixelwise_mul_hidden.run();
Michele Di Giorgio47a89902020-03-09 19:32:33 +00001046 _hidden_outstage.run();
1047
1048 // Projection.
1049 if(_has_projection)
1050 {
1051 _mm_projection.run();
1052 _projection_outstage.run();
Sang-Hoon Parkd5c020a2020-05-06 21:01:19 +01001053
1054 if(_projection_tensor_copy_required)
1055 {
1056 _projection_output_to_accumulate_copy.run();
1057 }
1058
Michalis Spyrou173ba9b2020-06-23 17:25:43 +01001059 _accumulate_projection.run();
Sang-Hoon Parkd5c020a2020-05-06 21:01:19 +01001060
1061 if(_projection_tensor_copy_required)
1062 {
1063 _projection_accumulate_to_output_copy.run();
1064 }
1065
Michele Di Giorgio47a89902020-03-09 19:32:33 +00001066 if(_has_projection_clipping)
1067 {
1068 _projection_clip.run();
1069 }
1070 }
Sang-Hoon Parkd5c020a2020-05-06 21:01:19 +01001071 else
1072 {
1073 if(_projection_tensor_copy_required)
1074 {
1075 _hidden_to_output_copy.run();
1076 }
1077 }
Michele Di Giorgiobeb2d452020-05-11 16:17:51 +01001078
1079 // Copy output_state_out to output
Michalis Spyrouebcebf12020-10-21 00:04:14 +01001080 _copy_output.run();
Michele Di Giorgio47a89902020-03-09 19:32:33 +00001081}
1082
1083void NEQLSTMLayer::prepare()
1084{
1085 if(!_is_prepared)
1086 {
1087 // Pre-transpose weights to be used in GEMM.
1088 _input_to_forget_weights_transposed.allocator()->allocate();
1089 _input_to_cell_weights_transposed.allocator()->allocate();
1090 _input_to_output_weights_transposed.allocator()->allocate();
1091 _recurrent_to_forget_weights_transposed.allocator()->allocate();
1092 _recurrent_to_cell_weights_transposed.allocator()->allocate();
1093 _recurrent_to_output_weights_transposed.allocator()->allocate();
1094 _transpose_input_to_forget_weights.run();
1095 _transpose_input_to_cell_weights.run();
1096 _transpose_input_to_output_weights.run();
1097 _transpose_recurrent_to_forget_weights.run();
1098 _transpose_recurrent_to_cell_weights.run();
1099 _transpose_recurrent_to_output_weights.run();
1100
1101 // Precompute effective biases
1102 if(_has_cifg)
1103 {
1104 std::fill_n(reinterpret_cast<int16_t *>(_ones.buffer()), _ones.info()->total_size() / _ones.info()->element_size(), 32767);
1105 }
1106 else
1107 {
1108 _input_to_input_eff_bias.allocator()->allocate();
1109 _recurrent_to_input_eff_bias.allocator()->allocate();
Michalis Spyrouebcebf12020-10-21 00:04:14 +01001110 NEScheduler::get().schedule(_input_to_input_reduction.get(), Window::DimY);
1111 NEScheduler::get().schedule(_recurrent_to_input_reduction.get(), Window::DimY);
Michele Di Giorgio47a89902020-03-09 19:32:33 +00001112
1113 _input_to_input_weights_transposed.allocator()->allocate();
1114 _recurrent_to_input_weights_transposed.allocator()->allocate();
1115 _transpose_input_to_input_weights.run();
1116 _transpose_recurrent_to_input_weights.run();
1117 _input_to_input_weights->mark_as_unused();
1118 _recurrent_to_input_weights->mark_as_unused();
1119 }
1120 _input_to_forget_eff_bias.allocator()->allocate();
1121 _recurrent_to_forget_eff_bias.allocator()->allocate();
1122 _input_to_cell_eff_bias.allocator()->allocate();
1123 _recurrent_to_cell_eff_bias.allocator()->allocate();
1124 _input_to_output_eff_bias.allocator()->allocate();
1125 _recurrent_to_output_eff_bias.allocator()->allocate();
Michalis Spyrouebcebf12020-10-21 00:04:14 +01001126 NEScheduler::get().schedule(_input_to_forget_reduction.get(), Window::DimY);
1127 NEScheduler::get().schedule(_recurrent_to_forget_reduction.get(), Window::DimY);
1128 NEScheduler::get().schedule(_input_to_cell_reduction.get(), Window::DimY);
1129 NEScheduler::get().schedule(_recurrent_to_cell_reduction.get(), Window::DimY);
1130 NEScheduler::get().schedule(_input_to_output_reduction.get(), Window::DimY);
1131 NEScheduler::get().schedule(_recurrent_to_output_reduction.get(), Window::DimY);
Michele Di Giorgio47a89902020-03-09 19:32:33 +00001132
1133 if(_has_projection)
1134 {
Michele Di Giorgio11c562c2020-06-10 16:34:50 +01001135 _projection_eff_bias.allocator()->allocate();
Michalis Spyrouebcebf12020-10-21 00:04:14 +01001136 NEScheduler::get().schedule(_projection_reduction.get(), Window::DimY);
Michele Di Giorgio47a89902020-03-09 19:32:33 +00001137 if(_projection_bias != nullptr)
1138 {
Michalis Spyrou173ba9b2020-06-23 17:25:43 +01001139 _projection_bias_add.run();
Michele Di Giorgio47a89902020-03-09 19:32:33 +00001140 _projection_bias->mark_as_unused();
1141 }
1142
1143 _projection_weights_transposed.allocator()->allocate();
1144 _transpose_projection_weights.run();
1145 _projection_weights->mark_as_unused();
Sang-Hoon Parkd5c020a2020-05-06 21:01:19 +01001146
1147 if(!_projection_tensor_copy_required)
1148 {
1149 _hidden_gate.mark_as_unused();
Sang-Hoon Parkd5c020a2020-05-06 21:01:19 +01001150 _projection_accumulate_res.mark_as_unused();
1151 }
Michele Di Giorgio47a89902020-03-09 19:32:33 +00001152 }
1153
1154 // Mark weights as unused
1155 _input_to_forget_weights->mark_as_unused();
1156 _input_to_cell_weights->mark_as_unused();
1157 _input_to_output_weights->mark_as_unused();
1158 _recurrent_to_forget_weights->mark_as_unused();
1159 _recurrent_to_cell_weights->mark_as_unused();
1160 _recurrent_to_output_weights->mark_as_unused();
1161
1162 _is_prepared = true;
1163 }
1164}
Michele Di Giorgio47a89902020-03-09 19:32:33 +00001165} // namespace arm_compute