blob: 85d62ac058f46ada15b641b49b8624e251a61ef3 [file] [log] [blame]
Michele Di Giorgio47a89902020-03-09 19:32:33 +00001/*
Michele Di Giorgiod9eaf612020-07-08 11:12:57 +01002 * Copyright (c) 2020 Arm Limited.
Michele Di Giorgio47a89902020-03-09 19:32:33 +00003 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24#include "arm_compute/runtime/NEON/functions/NEQLSTMLayer.h"
25
26#include "arm_compute/core/KernelDescriptors.h"
27#include "arm_compute/core/QuantizationInfo.h"
28#include "arm_compute/core/Utils.h"
29#include "arm_compute/core/Validate.h"
30#include "arm_compute/core/utils/misc/InfoHelpers.h"
31#include "arm_compute/core/utils/quantization/AsymmHelpers.h"
32#include "arm_compute/runtime/NEON/NEScheduler.h"
Michalis Spyrouebcebf12020-10-21 00:04:14 +010033#include "src/core/NEON/kernels/NEConvertQuantizedSignednessKernel.h"
34#include "src/core/NEON/kernels/NEGEMMInterleave4x4Kernel.h"
35#include "src/core/NEON/kernels/NEGEMMLowpMatrixMultiplyKernel.h"
36#include "src/core/NEON/kernels/NEGEMMLowpOffsetContributionKernel.h"
37#include "src/core/NEON/kernels/NEGEMMLowpOffsetContributionOutputStageKernel.h"
38#include "src/core/NEON/kernels/NEGEMMLowpReductionKernel.h"
39#include "src/core/NEON/kernels/NEGEMMTranspose1xWKernel.h"
40#include "src/core/NEON/kernels/NEQLSTMLayerNormalizationKernel.h"
Sang-Hoon Park68dd25f2020-10-19 16:00:11 +010041#include "src/core/helpers/WindowHelpers.h"
Michele Di Giorgio47a89902020-03-09 19:32:33 +000042
43namespace arm_compute
44{
45using namespace arm_compute::utils::info_helpers;
46namespace
47{
48Status validate_mm(GEMMLowpOutputStageInfo &gemmlowp_info, const ITensorInfo *mm_input, const ITensorInfo *mm_weights, const ITensorInfo *bias,
49 float gemmlowp_scale, const TensorInfo *mm_res_info, const TensorInfo *outstage_tensor_info)
50{
51 ARM_COMPUTE_RETURN_ON_ERROR(NEGEMMLowpMatrixMultiplyCore::validate(mm_input, mm_weights, nullptr, mm_res_info));
52 ARM_COMPUTE_RETURN_ON_ERROR(quantization::calculate_quantized_multiplier(gemmlowp_scale, &gemmlowp_info.gemmlowp_multiplier, &gemmlowp_info.gemmlowp_shift));
53 ARM_COMPUTE_RETURN_ON_ERROR(NEGEMMLowpOutputStage::validate(mm_res_info, bias, outstage_tensor_info, gemmlowp_info));
54 return Status{};
55}
56} // namespace
57
Michalis Spyrouebcebf12020-10-21 00:04:14 +010058Status NEQLSTMLayer::validate_layer_norm(const ITensorInfo &in, const ITensorInfo &weight, const ITensorInfo &bias)
59{
60 // Output quantization scale will be different, but ignored here
61 // since it will be configured at configure() stage.
62 const TensorInfo out
63 {
64 in
65 };
66 return NEQLSTMLayerNormalizationKernel::validate(&in, &out, &weight, &bias);
67}
68
69void NEQLSTMLayer::configure_layer_norm(NEQLSTMLayer::LayerNormGate g, const ITensor *in)
70{
71 ARM_COMPUTE_ERROR_ON(!_has_layer_norm);
72
73 Tensor &out = get_layer_norm_output(g);
74 _memory_group.manage(&out);
75 out.allocator()->init(*(in->info()));
76
Georgios Pinitas40f51a62020-11-21 03:04:18 +000077 get_layer_norm(g) = std::make_unique<NEQLSTMLayerNormalizationKernel>();
Michalis Spyrouebcebf12020-10-21 00:04:14 +010078 get_layer_norm(g)->configure(in, &out, get_layer_norm_weight(g), get_layer_norm_bias(g));
79}
80
81NEQLSTMLayer::TensorCopyKernel::~TensorCopyKernel() = default;
82
Sang-Hoon Parkd5c020a2020-05-06 21:01:19 +010083Status NEQLSTMLayer::TensorCopyKernel::validate(const ITensorInfo &src, const ITensorInfo &dst)
84{
85 ARM_COMPUTE_RETURN_ERROR_ON(src.tensor_shape().num_dimensions() > max_dimension_supported);
86 ARM_COMPUTE_RETURN_ERROR_ON(dst.tensor_shape().num_dimensions() > max_dimension_supported);
87 ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(&src, &dst);
88 ARM_COMPUTE_RETURN_ERROR_ON(dst.tensor_shape().y() != src.tensor_shape().y());
89 return Status{};
90}
91
92void NEQLSTMLayer::TensorCopyKernel::configure(ITensor &src, ITensor &dst)
93{
94 ARM_COMPUTE_ERROR_THROW_ON(NEQLSTMLayer::TensorCopyKernel::validate(*src.info(), *dst.info()));
95 _src = &src;
96 _dst = &dst;
97 _row_size = std::min(_src->info()->tensor_shape().x(), _dst->info()->tensor_shape().x());
98 _window = calculate_max_window(*_src->info(), Steps());
99}
100
101void NEQLSTMLayer::TensorCopyKernel::run()
102{
103 Iterator input_iter{ _src, _window };
104 Iterator output_iter{ _dst, _window };
105
106 execute_window_loop(_window, [&](const Coordinates &)
107 {
108 memcpy(output_iter.ptr(), input_iter.ptr(), _row_size);
109 },
110 input_iter, output_iter);
111}
112
Michalis Spyrouebcebf12020-10-21 00:04:14 +0100113NEQLSTMLayer::~NEQLSTMLayer() = default;
114
Michele Di Giorgio47a89902020-03-09 19:32:33 +0000115NEQLSTMLayer::NEQLSTMLayer(std::shared_ptr<IMemoryManager> memory_manager)
Michalis Spyrouebcebf12020-10-21 00:04:14 +0100116 : _memory_group(), _transpose_input_to_forget_weights(), _transpose_input_to_cell_weights(), _transpose_input_to_output_weights(), _transpose_input_to_input_weights(),
117 _transpose_recurrent_to_forget_weights(), _transpose_recurrent_to_cell_weights(), _transpose_recurrent_to_output_weights(), _transpose_recurrent_to_input_weights(), _transpose_projection_weights(),
118 _input_to_input_reduction(), _recurrent_to_input_reduction(), _input_to_forget_reduction(), _recurrent_to_forget_reduction(), _input_to_cell_reduction(), _recurrent_to_cell_reduction(),
119 _input_to_output_reduction(), _recurrent_to_output_reduction(), _projection_reduction(), _projection_bias_add(), _mm_input_to_forget(), _mm_recurrent_to_forget(), _pixelwise_mul_cell_to_forget(),
120 _input_to_forget_outstage(), _recurrent_to_forget_outstage(), _cell_to_forget_outstage(), _accumulate_input_recurrent_forget(), _accumulate_cell_forget(), _forget_gate_sigmoid(), _mm_input_to_cell(),
121 _input_to_cell_outstage(), _mm_recurrent_to_cell(), _recurrent_to_cell_outstage(), _accumulate_input_recurrent_modulation(), _cell_gate_tanh(), _input_gate_sub(), _mm_input_to_input(),
122 _input_to_input_outstage(), _mm_recurrent_to_input(), _recurrent_to_input_outstage(), _accumulate_input_recurrent_input(), _pixelwise_mul_cell_to_input(), _cell_to_input_outstage(),
123 _accumulate_cell_input(), _input_gate_sigmoid(), _pixelwise_mul_forget_cell(), _pixelwise_mul_input_cell(), _add_forget_cell(), _cell_clip(), _mm_input_to_output(), _input_to_output_outstage(),
124 _mm_recurrent_to_output(), _recurrent_to_output_outstage(), _accumulate_input_recurrent_output(), _pixelwise_mul_cell_to_output(), _cell_to_output_outstage(), _accumulate_cell_to_output(),
125 _output_gate_sigmoid(), _hidden_tanh(), _pixelwise_mul_hidden(), _hidden_outstage(), _mm_projection(), _projection_outstage(), _accumulate_projection(), _projection_clip(), _projection_bias_copy(),
126 _projection_output_to_accumulate_copy(), _projection_accumulate_to_output_copy(), _hidden_to_output_copy(), _layer_norms(), _copy_output(), _layer_norm_weights(), _layer_norm_bias(),
127 _layer_norm_output()
Michele Di Giorgio47a89902020-03-09 19:32:33 +0000128{
129 _memory_group = MemoryGroup(std::move(memory_manager));
130}
131
132void NEQLSTMLayer::configure_mm(NEGEMMLowpMatrixMultiplyCore &mm, NEGEMMLowpOutputStage &outstage, GEMMLowpOutputStageInfo &gemmlowp_info,
133 const ITensor *mm_input, const ITensor *mm_weights, const ITensor *bias,
134 Tensor *mm_res, Tensor *outstage_res, float gemmlowp_scale,
135 const TensorInfo &mm_res_info, const TensorInfo &outstage_tensor_info)
136{
137 _memory_group.manage(mm_res);
138 _memory_group.manage(outstage_res);
139
140 mm_res->allocator()->init(mm_res_info);
141 outstage_res->allocator()->init(outstage_tensor_info);
142
143 // Configure matrix-multiplication
144 mm.configure(mm_input, mm_weights, nullptr, mm_res);
145
146 // Configure output stage
147 quantization::calculate_quantized_multiplier(gemmlowp_scale, &gemmlowp_info.gemmlowp_multiplier, &gemmlowp_info.gemmlowp_shift);
148 outstage.configure(mm_res, bias, outstage_res, gemmlowp_info);
149 mm_res->allocator()->allocate();
150}
151
152void NEQLSTMLayer::configure(const ITensor *input,
153 const ITensor *input_to_forget_weights, const ITensor *input_to_cell_weights, const ITensor *input_to_output_weights,
154 const ITensor *recurrent_to_forget_weights, const ITensor *recurrent_to_cell_weights, const ITensor *recurrent_to_output_weights,
155 const ITensor *forget_gate_bias, const ITensor *cell_bias, const ITensor *output_gate_bias,
Sang-Hoon Park840a72c2020-09-23 13:24:13 +0100156 const ITensor *cell_state_in, ITensor *output_state_in,
Michele Di Giorgiobeb2d452020-05-11 16:17:51 +0100157 ITensor *cell_state_out, ITensor *output_state_out, ITensor *output,
Michele Di Giorgio47a89902020-03-09 19:32:33 +0000158 const LSTMParams<ITensor> &lstm_params)
159{
Michele Di Giorgio47a89902020-03-09 19:32:33 +0000160 ARM_COMPUTE_ERROR_ON_NULLPTR(input, input_to_forget_weights, input_to_cell_weights, input_to_output_weights,
161 recurrent_to_forget_weights, recurrent_to_cell_weights, recurrent_to_output_weights,
162 forget_gate_bias, cell_bias, output_gate_bias, cell_state_in, output_state_in, cell_state_out, output_state_out);
163
164 // Set lstm parameters
165 LSTMParams<ITensorInfo> lstm_params_info{};
166 build_lstm_params_tensor_info(lstm_params, &lstm_params_info);
167
168 // Validate
169 ARM_COMPUTE_ERROR_THROW_ON(NEQLSTMLayer::validate(input->info(), input_to_forget_weights->info(), input_to_cell_weights->info(), input_to_output_weights->info(),
170 recurrent_to_forget_weights->info(), recurrent_to_cell_weights->info(), recurrent_to_output_weights->info(),
171 forget_gate_bias->info(), cell_bias->info(), output_gate_bias->info(),
Michele Di Giorgiobeb2d452020-05-11 16:17:51 +0100172 cell_state_in->info(), output_state_in->info(), cell_state_out->info(), output_state_out->info(), output->info(),
173 lstm_params_info));
Michele Di Giorgio47a89902020-03-09 19:32:33 +0000174
Sang-Hoon Parkd5c020a2020-05-06 21:01:19 +0100175 const int batch_size = input->info()->dimension(1);
176 const int num_units = input_to_output_weights->info()->dimension(1);
177 const int output_size = output_state_out->info()->dimension(_out_state_output_size_dimension_idx);
Michele Di Giorgio47a89902020-03-09 19:32:33 +0000178
179 const UniformQuantizationInfo qinput = input->info()->quantization_info().uniform();
180 const UniformQuantizationInfo qcell_state_in = cell_state_in->info()->quantization_info().uniform();
181 const UniformQuantizationInfo qoutput_state_in = output_state_in->info()->quantization_info().uniform();
182
183 _projection_bias = lstm_params.projection_bias();
184 _input_to_forget_weights = input_to_forget_weights;
185 _input_to_cell_weights = input_to_cell_weights;
186 _input_to_output_weights = input_to_output_weights;
187 _recurrent_to_forget_weights = recurrent_to_forget_weights;
188 _recurrent_to_cell_weights = recurrent_to_cell_weights;
189 _recurrent_to_output_weights = recurrent_to_output_weights;
190 _projection_weights = lstm_params.projection_weights();
191
Sang-Hoon Park9230e272020-04-18 00:46:34 +0100192 // Layer normalization
193 _has_layer_norm = lstm_params.use_layer_norm();
194 if(_has_layer_norm)
195 {
196 set_layer_norm_weight(lstm_params.forget_layer_norm_weights(), LayerNormGate::Forget);
197 set_layer_norm_weight(lstm_params.cell_layer_norm_weights(), LayerNormGate::Cell);
198 set_layer_norm_weight(lstm_params.input_layer_norm_weights(), LayerNormGate::Input);
199 set_layer_norm_weight(lstm_params.output_layer_norm_weights(), LayerNormGate::Output);
200
201 set_layer_norm_bias(forget_gate_bias, LayerNormGate::Forget);
202 set_layer_norm_bias(cell_bias, LayerNormGate::Cell);
203 set_layer_norm_bias(lstm_params.input_gate_bias(), LayerNormGate::Input);
204 set_layer_norm_bias(output_gate_bias, LayerNormGate::Output);
205 }
206
Michele Di Giorgio47a89902020-03-09 19:32:33 +0000207 _has_cifg = lstm_params.has_cifg_opt();
208 _has_projection = lstm_params.has_projection();
209 _has_peephole = lstm_params.has_peephole_opt();
210
211 // Calculate and decompose effective scales for optimizing matmul calculation
212 const int32_t cell_shift = log2(qcell_state_in.scale);
213
214 // Calculate quantized parameters for clipping.
215 int16_t quantized_cell_clip = 0;
216 if(lstm_params.cell_clip() > 0.0f)
217 {
218 quantized_cell_clip = quantize_qsymm16(lstm_params.cell_clip(), qcell_state_in);
219 }
220 _has_cell_clipping = quantized_cell_clip > 0;
221
222 // Precompute effective bias for optimizing the matmul computations.
223 if(!_has_cifg)
224 {
225 _input_to_input_weights = lstm_params.input_to_input_weights();
226 _recurrent_to_input_weights = lstm_params.recurrent_to_input_weights();
227
Georgios Pinitas40f51a62020-11-21 03:04:18 +0000228 _input_to_input_reduction = std::make_unique<NEGEMMLowpMatrixAReductionKernel>();
229 _recurrent_to_input_reduction = std::make_unique<NEGEMMLowpMatrixAReductionKernel>();
Michalis Spyrouebcebf12020-10-21 00:04:14 +0100230 _input_to_input_reduction->configure(_input_to_input_weights, &_input_to_input_eff_bias, GEMMLowpReductionKernelInfo(num_units, false, -qinput.offset, true));
231 _recurrent_to_input_reduction->configure(_recurrent_to_input_weights, &_recurrent_to_input_eff_bias, GEMMLowpReductionKernelInfo(num_units, false, -qoutput_state_in.offset, true));
Michele Di Giorgio47a89902020-03-09 19:32:33 +0000232 }
Michalis Spyrouebcebf12020-10-21 00:04:14 +0100233
Georgios Pinitas40f51a62020-11-21 03:04:18 +0000234 _input_to_forget_reduction = std::make_unique<NEGEMMLowpMatrixAReductionKernel>();
235 _recurrent_to_forget_reduction = std::make_unique<NEGEMMLowpMatrixAReductionKernel>();
236 _input_to_cell_reduction = std::make_unique<NEGEMMLowpMatrixAReductionKernel>();
237 _recurrent_to_cell_reduction = std::make_unique<NEGEMMLowpMatrixAReductionKernel>();
238 _input_to_output_reduction = std::make_unique<NEGEMMLowpMatrixAReductionKernel>();
239 _recurrent_to_output_reduction = std::make_unique<NEGEMMLowpMatrixAReductionKernel>();
Michalis Spyrouebcebf12020-10-21 00:04:14 +0100240
241 _recurrent_to_cell_reduction->configure(input_to_forget_weights, &_input_to_forget_eff_bias, GEMMLowpReductionKernelInfo(num_units, false, -qinput.offset, true));
242 _recurrent_to_forget_reduction->configure(recurrent_to_forget_weights, &_recurrent_to_forget_eff_bias, GEMMLowpReductionKernelInfo(num_units, false, -qoutput_state_in.offset, true));
243 _input_to_cell_reduction->configure(input_to_cell_weights, &_input_to_cell_eff_bias, GEMMLowpReductionKernelInfo(num_units, false, -qinput.offset, true));
244 _recurrent_to_cell_reduction->configure(recurrent_to_cell_weights, &_recurrent_to_cell_eff_bias, GEMMLowpReductionKernelInfo(num_units, false, -qoutput_state_in.offset, true));
245 _input_to_output_reduction->configure(input_to_output_weights, &_input_to_output_eff_bias, GEMMLowpReductionKernelInfo(num_units, false, -qinput.offset, true));
246 _recurrent_to_output_reduction->configure(recurrent_to_output_weights, &_recurrent_to_output_eff_bias, GEMMLowpReductionKernelInfo(num_units, false, -qoutput_state_in.offset, true));
Sang-Hoon Parkd5c020a2020-05-06 21:01:19 +0100247 if(_has_projection)
Michele Di Giorgio47a89902020-03-09 19:32:33 +0000248 {
Georgios Pinitas40f51a62020-11-21 03:04:18 +0000249 _projection_reduction = std::make_unique<NEGEMMLowpMatrixAReductionKernel>();
Michalis Spyrouebcebf12020-10-21 00:04:14 +0100250 _projection_reduction->configure(_projection_weights, &_projection_eff_bias, GEMMLowpReductionKernelInfo(output_size, false, lstm_params.hidden_state_zero(), true));
Michele Di Giorgio11c562c2020-06-10 16:34:50 +0100251 if(_projection_bias != nullptr)
252 {
Michele Di Giorgio19023832020-06-17 16:08:10 +0000253 _projection_bias_add.configure(_projection_bias, &_projection_eff_bias, &_projection_eff_bias, ConvertPolicy::SATURATE);
Michele Di Giorgio11c562c2020-06-10 16:34:50 +0100254 }
Michele Di Giorgio47a89902020-03-09 19:32:33 +0000255 }
256
257 // Pre-transpose weights to be used in GEMM.
258 _transpose_input_to_forget_weights.configure(input_to_forget_weights, &_input_to_forget_weights_transposed);
259 _transpose_input_to_cell_weights.configure(input_to_cell_weights, &_input_to_cell_weights_transposed);
260 _transpose_input_to_output_weights.configure(input_to_output_weights, &_input_to_output_weights_transposed);
261 _transpose_recurrent_to_forget_weights.configure(recurrent_to_forget_weights, &_recurrent_to_forget_weights_transposed);
262 _transpose_recurrent_to_cell_weights.configure(recurrent_to_cell_weights, &_recurrent_to_cell_weights_transposed);
263 _transpose_recurrent_to_output_weights.configure(recurrent_to_output_weights, &_recurrent_to_output_weights_transposed);
264 if(!_has_cifg)
265 {
266 _transpose_input_to_input_weights.configure(lstm_params.input_to_input_weights(), &_input_to_input_weights_transposed);
267 _transpose_recurrent_to_input_weights.configure(lstm_params.recurrent_to_input_weights(), &_recurrent_to_input_weights_transposed);
268 }
269 if(_has_projection)
270 {
271 _transpose_projection_weights.configure(_projection_weights, &_projection_weights_transposed);
272 }
273
274 GEMMLowpOutputStageInfo gemmlowp_info;
275 gemmlowp_info.type = GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT;
276 gemmlowp_info.gemmlowp_min_bound = std::numeric_limits<int16_t>::lowest();
277 gemmlowp_info.gemmlowp_max_bound = std::numeric_limits<int16_t>::max();
278 gemmlowp_info.output_data_type = DataType::QSYMM16;
279
280 const TensorInfo mm_out_info(TensorShape(num_units, batch_size), 1, DataType::S32);
281 // Forget gate.
282 const TensorInfo forget_gate_outstage_info(mm_out_info.tensor_shape(), 1, DataType::QSYMM16, QuantizationInfo(lstm_params.forget_intermediate_scale(), 0));
283 const float input_to_forget_scale = input_to_forget_weights->info()->quantization_info().uniform().scale * qinput.scale / lstm_params.forget_intermediate_scale();
284 configure_mm(_mm_input_to_forget, _input_to_forget_outstage, gemmlowp_info,
285 input, &_input_to_forget_weights_transposed, &_input_to_forget_eff_bias,
286 &_mm_input_to_forget_res, &_input_to_forget_outstage_res, input_to_forget_scale,
287 mm_out_info, forget_gate_outstage_info);
288
289 const float recurrent_to_forget_scale = recurrent_to_forget_weights->info()->quantization_info().uniform().scale * qoutput_state_in.scale / lstm_params.forget_intermediate_scale();
290 configure_mm(_mm_recurrent_to_forget, _recurrent_to_forget_outstage, gemmlowp_info,
291 output_state_in, &_recurrent_to_forget_weights_transposed, &_recurrent_to_forget_eff_bias,
292 &_mm_recurrent_to_forget_res, &_recurrent_to_forget_outstage_res, recurrent_to_forget_scale,
293 mm_out_info, forget_gate_outstage_info);
294
295 _accumulate_input_recurrent_forget.configure(&_input_to_forget_outstage_res, &_recurrent_to_forget_outstage_res, &_recurrent_to_forget_outstage_res, ConvertPolicy::SATURATE);
296 _input_to_forget_outstage_res.allocator()->allocate();
297
298 if(_has_peephole)
299 {
Sang-Hoon Parkd5c020a2020-05-06 21:01:19 +0100300 _mul_cell_to_forget_res.allocator()->init(TensorInfo(cell_state_in->info()->tensor_shape(), 1, DataType::S32));
Michele Di Giorgio47a89902020-03-09 19:32:33 +0000301 _memory_group.manage(&_mul_cell_to_forget_res);
302 _pixelwise_mul_cell_to_forget.configure(cell_state_in, lstm_params.cell_to_forget_weights(), &_mul_cell_to_forget_res, 1.f, ConvertPolicy::SATURATE, RoundingPolicy::TO_ZERO);
303 _cell_to_forget_outstage_res.allocator()->init(TensorInfo(_mul_cell_to_forget_res.info()->tensor_shape(), 1, DataType::QSYMM16, QuantizationInfo(lstm_params.forget_intermediate_scale(), 0)));
304 _memory_group.manage(&_cell_to_forget_outstage_res);
305 const float cell_to_forget_scale = std::pow(2, cell_shift) * lstm_params.cell_to_forget_weights()->info()->quantization_info().uniform().scale / lstm_params.forget_intermediate_scale();
306 quantization::calculate_quantized_multiplier(cell_to_forget_scale, &gemmlowp_info.gemmlowp_multiplier, &gemmlowp_info.gemmlowp_shift);
307 _cell_to_forget_outstage.configure(&_mul_cell_to_forget_res, nullptr, &_cell_to_forget_outstage_res, gemmlowp_info);
308 _mul_cell_to_forget_res.allocator()->allocate();
309 _accumulate_cell_forget.configure(&_recurrent_to_forget_outstage_res, &_cell_to_forget_outstage_res, &_recurrent_to_forget_outstage_res, ConvertPolicy::SATURATE);
310 _cell_to_forget_outstage_res.allocator()->allocate();
311 }
312
Sang-Hoon Park9230e272020-04-18 00:46:34 +0100313 Tensor *forget_activation_input = &_recurrent_to_forget_outstage_res;
314
315 if(_has_layer_norm)
316 {
317 configure_layer_norm(LayerNormGate::Forget, forget_activation_input);
318 forget_activation_input->allocator()->allocate();
319 forget_activation_input = &get_layer_norm_output(LayerNormGate::Forget);
320 }
321
Michele Di Giorgio47a89902020-03-09 19:32:33 +0000322 // Output quantization info of Sigmoid and Tanh activations
323 const QuantizationInfo sigmoid_tanh_outqinfo(1.f / 32768.f, 0);
Sang-Hoon Park9230e272020-04-18 00:46:34 +0100324 const TensorInfo forget_gate_info(TensorShape(num_units, batch_size), 1, DataType::QSYMM16, sigmoid_tanh_outqinfo);
Michele Di Giorgio47a89902020-03-09 19:32:33 +0000325
Michele Di Giorgio47a89902020-03-09 19:32:33 +0000326 _memory_group.manage(&_forget_gate);
327 _forget_gate.allocator()->init(forget_gate_info);
Sang-Hoon Park9230e272020-04-18 00:46:34 +0100328 _forget_gate_sigmoid.configure(forget_activation_input, &_forget_gate, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LOGISTIC));
329 forget_activation_input->allocator()->allocate();
Michele Di Giorgio47a89902020-03-09 19:32:33 +0000330
331 // Modulation gate.
332 const TensorInfo cell_outstage_info(mm_out_info.tensor_shape(), 1, DataType::QSYMM16, QuantizationInfo(lstm_params.cell_intermediate_scale(), 0));
333 const float input_to_cell_scale = input_to_cell_weights->info()->quantization_info().uniform().scale * qinput.scale / lstm_params.cell_intermediate_scale();
334 configure_mm(_mm_input_to_cell, _input_to_cell_outstage, gemmlowp_info,
335 input, &_input_to_cell_weights_transposed, &_input_to_cell_eff_bias,
336 &_mm_input_to_cell_res, &_input_to_cell_outstage_res, input_to_cell_scale,
337 mm_out_info, cell_outstage_info);
338
339 const float recurrent_to_cell_scale = recurrent_to_cell_weights->info()->quantization_info().uniform().scale * qoutput_state_in.scale / lstm_params.cell_intermediate_scale();
340 configure_mm(_mm_recurrent_to_cell, _recurrent_to_cell_outstage, gemmlowp_info,
341 output_state_in, &_recurrent_to_cell_weights_transposed, &_recurrent_to_cell_eff_bias,
342 &_mm_recurrent_to_cell_res, &_recurrent_to_cell_outstage_res, recurrent_to_cell_scale,
343 mm_out_info, cell_outstage_info);
344
345 _accumulate_input_recurrent_modulation.configure(&_input_to_cell_outstage_res, &_recurrent_to_cell_outstage_res, &_recurrent_to_cell_outstage_res, ConvertPolicy::SATURATE);
346 _input_to_cell_outstage_res.allocator()->allocate();
347
Sang-Hoon Park9230e272020-04-18 00:46:34 +0100348 Tensor *cell_activation_input = &_recurrent_to_cell_outstage_res;
349
350 if(_has_layer_norm)
351 {
352 configure_layer_norm(LayerNormGate::Cell, cell_activation_input);
353 cell_activation_input->allocator()->allocate();
354 cell_activation_input = &get_layer_norm_output(LayerNormGate::Cell);
355 }
356
Michele Di Giorgio47a89902020-03-09 19:32:33 +0000357 const TensorInfo cell_gate_info(TensorShape(num_units, batch_size), 1, DataType::QSYMM16, sigmoid_tanh_outqinfo);
Sang-Hoon Park9230e272020-04-18 00:46:34 +0100358
Michele Di Giorgio47a89902020-03-09 19:32:33 +0000359 _memory_group.manage(&_cell_gate);
360 _cell_gate.allocator()->init(cell_gate_info);
Sang-Hoon Park9230e272020-04-18 00:46:34 +0100361 _cell_gate_tanh.configure(cell_activation_input, &_cell_gate, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::TANH, 1.f, 1.f));
362 cell_activation_input->allocator()->allocate();
Michele Di Giorgio47a89902020-03-09 19:32:33 +0000363
364 // Input gate.
365 const TensorInfo input_gate_info(TensorShape(num_units, batch_size), 1, DataType::QSYMM16, sigmoid_tanh_outqinfo);
366 _input_gate.allocator()->init(input_gate_info);
367 _memory_group.manage(&_input_gate);
368 if(_has_cifg)
369 {
370 _ones.allocator()->init(*_forget_gate.info());
371 _input_gate_sub.configure(&_ones, &_forget_gate, &_input_gate, ConvertPolicy::SATURATE);
372 _ones.allocator()->allocate();
373 }
374 else
375 {
376 const TensorInfo input_outstage_info(TensorShape(num_units, batch_size), 1, DataType::QSYMM16, QuantizationInfo(lstm_params.input_intermediate_scale(), 0));
377 const float input_to_input_scale = _input_to_input_weights->info()->quantization_info().uniform().scale * qinput.scale / lstm_params.input_intermediate_scale();
378 configure_mm(_mm_input_to_input, _input_to_input_outstage, gemmlowp_info,
379 input, &_input_to_input_weights_transposed, &_input_to_input_eff_bias,
380 &_mm_input_to_input_res, &_input_to_input_outstage_res, input_to_input_scale,
381 mm_out_info, input_outstage_info);
382
383 const float recurrent_to_input_scale = _recurrent_to_input_weights->info()->quantization_info().uniform().scale * qoutput_state_in.scale / lstm_params.input_intermediate_scale();
384 configure_mm(_mm_recurrent_to_input, _recurrent_to_input_outstage, gemmlowp_info,
Sang-Hoon Parkd5c020a2020-05-06 21:01:19 +0100385 output_state_in, &_recurrent_to_input_weights_transposed, &_recurrent_to_input_eff_bias,
Michele Di Giorgio47a89902020-03-09 19:32:33 +0000386 &_mm_recurrent_to_input_res, &_recurrent_to_input_outstage_res, recurrent_to_input_scale,
387 mm_out_info, input_outstage_info);
388 _accumulate_input_recurrent_input.configure(&_input_to_input_outstage_res, &_recurrent_to_input_outstage_res, &_recurrent_to_input_outstage_res, ConvertPolicy::SATURATE);
389 _input_to_input_outstage_res.allocator()->allocate();
390
391 if(_has_peephole)
392 {
Sang-Hoon Parkd5c020a2020-05-06 21:01:19 +0100393 _mul_cell_to_input_res.allocator()->init(TensorInfo(cell_state_in->info()->tensor_shape(), 1, DataType::S32));
Michele Di Giorgio47a89902020-03-09 19:32:33 +0000394 _memory_group.manage(&_mul_cell_to_input_res);
395 _pixelwise_mul_cell_to_input.configure(cell_state_in, lstm_params.cell_to_input_weights(), &_mul_cell_to_input_res, 1.f, ConvertPolicy::SATURATE, RoundingPolicy::TO_ZERO);
396 const float cell_to_input_scale = std::pow(2, cell_shift) * lstm_params.cell_to_input_weights()->info()->quantization_info().uniform().scale / lstm_params.input_intermediate_scale();
397 quantization::calculate_quantized_multiplier(cell_to_input_scale, &gemmlowp_info.gemmlowp_multiplier, &gemmlowp_info.gemmlowp_shift);
398 _cell_to_input_outstage_res.allocator()->init(TensorInfo(_mul_cell_to_input_res.info()->tensor_shape(), 1, DataType::QSYMM16, QuantizationInfo(lstm_params.input_intermediate_scale(), 0)));
399 _memory_group.manage(&_cell_to_input_outstage_res);
400 _cell_to_input_outstage.configure(&_mul_cell_to_input_res, nullptr, &_cell_to_input_outstage_res, gemmlowp_info);
401 _mul_cell_to_input_res.allocator()->allocate();
402 _accumulate_cell_input.configure(&_recurrent_to_input_outstage_res, &_cell_to_input_outstage_res, &_recurrent_to_input_outstage_res, ConvertPolicy::SATURATE);
403 _cell_to_input_outstage_res.allocator()->allocate();
404 }
405
Sang-Hoon Park9230e272020-04-18 00:46:34 +0100406 Tensor *input_activation_input = &_recurrent_to_input_outstage_res;
407
408 if(_has_layer_norm)
409 {
410 configure_layer_norm(LayerNormGate::Input, input_activation_input);
411 input_activation_input->allocator()->allocate();
412 input_activation_input = &get_layer_norm_output(LayerNormGate::Input);
413 }
414
Sang-Hoon Parkd5c020a2020-05-06 21:01:19 +0100415 _input_gate_sigmoid.configure(input_activation_input, &_input_gate, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LOGISTIC));
Sang-Hoon Park9230e272020-04-18 00:46:34 +0100416 input_activation_input->allocator()->allocate();
Michele Di Giorgio47a89902020-03-09 19:32:33 +0000417 }
418 // Cell.
Michalis Spyrou6eb73452020-07-02 17:39:25 +0100419 // TODO(COMPMID-3395): Perform multiplication in the quantized domain in NEPixelWiseMultiplication
Michele Di Giorgio47a89902020-03-09 19:32:33 +0000420 _pixelwise_mul_forget_cell.configure(&_forget_gate, cell_state_in, &_forget_gate, 1.f, ConvertPolicy::SATURATE, RoundingPolicy::TO_ZERO);
421 const float cell_gate_scale = _cell_gate.info()->quantization_info().uniform().scale;
422 const float mul_input_cell_scale = cell_gate_scale * std::pow(2, 15 + cell_shift);
423 const TensorInfo mul_input_cell_info(TensorShape(num_units, batch_size), 1, DataType::QSYMM16, QuantizationInfo(mul_input_cell_scale, 0));
424 _memory_group.manage(&_mul_input_cell_res);
425 _mul_input_cell_res.allocator()->init(mul_input_cell_info);
426 _pixelwise_mul_input_cell.configure(&_input_gate, &_cell_gate, &_mul_input_cell_res, 1.f, ConvertPolicy::SATURATE, RoundingPolicy::TO_ZERO);
427 _cell_gate.allocator()->allocate();
428 _add_forget_cell.configure(&_forget_gate, &_mul_input_cell_res, cell_state_out, ConvertPolicy::SATURATE);
429 _mul_input_cell_res.allocator()->allocate();
430 _forget_gate.allocator()->allocate();
431 if(_has_cell_clipping)
432 {
433 _cell_clip.configure(cell_state_out, nullptr, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, -quantized_cell_clip, quantized_cell_clip));
434 }
435 // Output gate.
436 const TensorInfo output_outstage_info(TensorShape(num_units, batch_size), 1, DataType::QSYMM16, QuantizationInfo(lstm_params.output_intermediate_scale(), 0));
437 const float input_to_output_scale = input_to_output_weights->info()->quantization_info().uniform().scale * qinput.scale / lstm_params.output_intermediate_scale();
438 configure_mm(_mm_input_to_output, _input_to_output_outstage, gemmlowp_info,
439 input, &_input_to_output_weights_transposed, &_input_to_output_eff_bias,
440 &_mm_input_to_output_res, &_input_to_output_outstage_res, input_to_output_scale,
441 mm_out_info, output_outstage_info);
442
443 const float recurrent_to_output_scale = recurrent_to_output_weights->info()->quantization_info().uniform().scale * qoutput_state_in.scale / lstm_params.output_intermediate_scale();
444 configure_mm(_mm_recurrent_to_output, _recurrent_to_output_outstage, gemmlowp_info,
445 output_state_in, &_recurrent_to_output_weights_transposed, &_recurrent_to_output_eff_bias,
446 &_mm_recurrent_to_output_res, &_recurrent_to_output_outstage_res, recurrent_to_output_scale,
447 mm_out_info, output_outstage_info);
448
449 _accumulate_input_recurrent_output.configure(&_recurrent_to_output_outstage_res, &_input_to_output_outstage_res, &_recurrent_to_output_outstage_res, ConvertPolicy::SATURATE);
450 _input_to_output_outstage_res.allocator()->allocate();
451
452 if(_has_peephole)
453 {
Michalis Spyrou6eb73452020-07-02 17:39:25 +0100454 // TODO(COMPMID-3395): Perform multiplication in the quantized domain in NEPixelWiseMultiplication
Michele Di Giorgio47a89902020-03-09 19:32:33 +0000455 // Here we are not using the output stage because all operations are done in float
Sang-Hoon Parkd5c020a2020-05-06 21:01:19 +0100456 _mul_cell_to_output_res.allocator()->init(TensorInfo(cell_state_out->info()->tensor_shape(), 1, DataType::S32));
Michele Di Giorgio47a89902020-03-09 19:32:33 +0000457 _memory_group.manage(&_mul_cell_to_output_res);
458 _pixelwise_mul_cell_to_output.configure(cell_state_out, lstm_params.cell_to_output_weights(), &_mul_cell_to_output_res, 1.f, ConvertPolicy::SATURATE, RoundingPolicy::TO_ZERO);
Sang-Hoon Parkd5c020a2020-05-06 21:01:19 +0100459
460 const float cell_to_output_scale = std::pow(2, cell_shift) * lstm_params.cell_to_output_weights()->info()->quantization_info().uniform().scale / lstm_params.output_intermediate_scale();
461 quantization::calculate_quantized_multiplier(cell_to_output_scale, &gemmlowp_info.gemmlowp_multiplier, &gemmlowp_info.gemmlowp_shift);
462 _cell_to_output_outstage_res.allocator()->init(TensorInfo(_mul_cell_to_output_res.info()->tensor_shape(), 1, DataType::QSYMM16, QuantizationInfo(lstm_params.output_intermediate_scale(), 0)));
463 _memory_group.manage(&_cell_to_output_outstage_res);
464 _cell_to_output_outstage.configure(&_mul_cell_to_output_res, nullptr, &_cell_to_output_outstage_res, gemmlowp_info);
Michele Di Giorgio47a89902020-03-09 19:32:33 +0000465 _mul_cell_to_output_res.allocator()->allocate();
Sang-Hoon Parkd5c020a2020-05-06 21:01:19 +0100466
467 _accumulate_cell_to_output.configure(&_recurrent_to_output_outstage_res, &_cell_to_output_outstage_res, &_recurrent_to_output_outstage_res, ConvertPolicy::SATURATE);
468 _cell_to_output_outstage_res.allocator()->allocate();
Michele Di Giorgio47a89902020-03-09 19:32:33 +0000469 }
470
Sang-Hoon Park9230e272020-04-18 00:46:34 +0100471 Tensor *output_activation_input = &_recurrent_to_output_outstage_res;
472
473 if(_has_layer_norm)
474 {
475 configure_layer_norm(LayerNormGate::Output, output_activation_input);
476 output_activation_input->allocator()->allocate();
477 output_activation_input = &get_layer_norm_output(LayerNormGate::Output);
478 }
Michele Di Giorgio47a89902020-03-09 19:32:33 +0000479 const TensorInfo output_gate_info(TensorShape(num_units, batch_size), 1, DataType::QSYMM16, sigmoid_tanh_outqinfo);
Sang-Hoon Park9230e272020-04-18 00:46:34 +0100480
Michele Di Giorgio47a89902020-03-09 19:32:33 +0000481 _memory_group.manage(&_output_gate);
482 _output_gate.allocator()->init(output_gate_info);
Sang-Hoon Park9230e272020-04-18 00:46:34 +0100483 _output_gate_sigmoid.configure(output_activation_input, &_output_gate, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LOGISTIC));
484 output_activation_input->allocator()->allocate();
Michele Di Giorgio47a89902020-03-09 19:32:33 +0000485
486 // Hidden.
487 _hidden_tanh.configure(cell_state_out, &_input_gate, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::TANH, 1.f, 1.f));
Michalis Spyrou6eb73452020-07-02 17:39:25 +0100488 // TODO(COMPMID-3395): Perform multiplication in the quantized domain in NEPixelWiseMultiplication
Michele Di Giorgio47a89902020-03-09 19:32:33 +0000489 _memory_group.manage(&_hidden_mul_res);
490 const TensorInfo hidden_mul_res(_input_gate.info()->tensor_shape(), 1, DataType::S32);
491 _hidden_mul_res.allocator()->init(hidden_mul_res);
492 _pixelwise_mul_hidden.configure(&_output_gate, &_input_gate, &_hidden_mul_res, 1.f, ConvertPolicy::SATURATE, RoundingPolicy::TO_ZERO);
493 _output_gate.allocator()->allocate();
494 _input_gate.allocator()->allocate();
495 const float hidden_state_scale = std::pow(2, -15) / lstm_params.hidden_state_scale() * std::pow(2, -15);
Sang-Hoon Park30b46a62020-04-18 01:40:57 +0100496 quantization::calculate_quantized_multiplier(hidden_state_scale, &gemmlowp_info.gemmlowp_multiplier, &gemmlowp_info.gemmlowp_shift, /* ignore_epsilon */ true);
Michele Di Giorgio47a89902020-03-09 19:32:33 +0000497 gemmlowp_info.gemmlowp_offset = lstm_params.hidden_state_zero();
498 gemmlowp_info.output_data_type = output_state_in->info()->data_type();
Sang-Hoon Parkd5c020a2020-05-06 21:01:19 +0100499
500 _projection_tensor_copy_required = (num_units != output_size);
501 ITensor *hidden_gate_result = output_state_out;
502
Sang-Hoon Parka7431ae2020-05-12 11:13:30 +0100503 _memory_group.manage(&_hidden_gate);
504
Sang-Hoon Parkd5c020a2020-05-06 21:01:19 +0100505 if(_projection_tensor_copy_required)
506 {
507 _hidden_gate.allocator()->init(*output_state_out->info());
508 _hidden_gate.info()->set_tensor_shape(_hidden_mul_res.info()->tensor_shape());
509 hidden_gate_result = &_hidden_gate;
510 }
511
512 _hidden_outstage.configure(&_hidden_mul_res, nullptr, hidden_gate_result, gemmlowp_info);
Michele Di Giorgio47a89902020-03-09 19:32:33 +0000513 _hidden_mul_res.allocator()->allocate();
514
515 // Projection.
516 if(_has_projection)
517 {
Sang-Hoon Parka7431ae2020-05-12 11:13:30 +0100518 const TensorInfo projection_outstage_info(*output_state_out->info());
Michele Di Giorgio47a89902020-03-09 19:32:33 +0000519 const UniformQuantizationInfo qprojection = _projection_weights->info()->quantization_info().uniform();
520 const float projection_scale = qprojection.scale * lstm_params.hidden_state_scale() / qoutput_state_in.scale;
521 gemmlowp_info.gemmlowp_offset = qoutput_state_in.offset;
522 gemmlowp_info.gemmlowp_min_bound = std::numeric_limits<int8_t>::lowest();
523 gemmlowp_info.gemmlowp_max_bound = std::numeric_limits<int8_t>::max();
524 gemmlowp_info.output_data_type = DataType::QASYMM8_SIGNED;
525
Sang-Hoon Parka7431ae2020-05-12 11:13:30 +0100526 TensorInfo projection_mm_out_info{ mm_out_info };
527 projection_mm_out_info.set_tensor_shape(TensorShape(output_size, batch_size));
Sang-Hoon Parkd5c020a2020-05-06 21:01:19 +0100528
Michele Di Giorgio47a89902020-03-09 19:32:33 +0000529 configure_mm(_mm_projection, _projection_outstage, gemmlowp_info,
Sang-Hoon Parka7431ae2020-05-12 11:13:30 +0100530 hidden_gate_result, &_projection_weights_transposed, &_projection_eff_bias,
Michele Di Giorgio47a89902020-03-09 19:32:33 +0000531 &_mm_projection_res, &_projection_outstage_res, projection_scale,
Sang-Hoon Parka7431ae2020-05-12 11:13:30 +0100532 projection_mm_out_info, projection_outstage_info);
Sang-Hoon Parkd5c020a2020-05-06 21:01:19 +0100533
534 ITensor *accumulate_destination = output_state_out;
535
536 if(_projection_tensor_copy_required)
537 {
538 _hidden_gate.allocator()->allocate();
Sang-Hoon Park840a72c2020-09-23 13:24:13 +0100539 _projection_accumulate_res.allocator()->init(*output_state_in->info());
Sang-Hoon Parkd5c020a2020-05-06 21:01:19 +0100540 _projection_accumulate_res.info()->set_tensor_shape(_projection_outstage_res.info()->tensor_shape());
Sang-Hoon Park840a72c2020-09-23 13:24:13 +0100541 _projection_output_to_accumulate_copy.configure(*output_state_in, _projection_accumulate_res);
Sang-Hoon Parkd5c020a2020-05-06 21:01:19 +0100542 accumulate_destination = &_projection_accumulate_res;
543 }
544
545 _accumulate_projection.configure(&_projection_outstage_res, accumulate_destination, accumulate_destination, ConvertPolicy::SATURATE);
Michele Di Giorgio47a89902020-03-09 19:32:33 +0000546 _projection_outstage_res.allocator()->allocate();
547
Sang-Hoon Parkd5c020a2020-05-06 21:01:19 +0100548 if(_projection_tensor_copy_required)
549 {
550 _projection_accumulate_to_output_copy.configure(_projection_accumulate_res, *output_state_out);
551 _projection_accumulate_res.allocator()->allocate();
552 }
553
Michele Di Giorgio47a89902020-03-09 19:32:33 +0000554 int8_t quantized_projection_clip{ 0 };
555 if(lstm_params.projection_clip() > 0.0f)
556 {
557 quantized_projection_clip = utility::clamp<int8_t>(lstm_params.projection_clip() / qprojection.scale, -128, 127);
558 }
559
560 if(quantized_projection_clip > 0)
561 {
562 _projection_clip.configure(output_state_out, nullptr, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, -quantized_projection_clip, quantized_projection_clip));
563 _has_projection_clipping = true;
564 }
565 }
Sang-Hoon Parkd5c020a2020-05-06 21:01:19 +0100566 else
567 {
568 if(_projection_tensor_copy_required)
569 {
570 _hidden_to_output_copy.configure(_hidden_gate, *output_state_out);
571 _hidden_gate.allocator()->allocate();
572 }
573 }
Michele Di Giorgiobeb2d452020-05-11 16:17:51 +0100574
575 // Copy output_state_out to output
576 _copy_output.configure(output_state_out, output);
Michele Di Giorgio47a89902020-03-09 19:32:33 +0000577}
578
579Status NEQLSTMLayer::validate(const ITensorInfo *input,
580 const ITensorInfo *input_to_forget_weights, const ITensorInfo *input_to_cell_weights, const ITensorInfo *input_to_output_weights,
581 const ITensorInfo *recurrent_to_forget_weights, const ITensorInfo *recurrent_to_cell_weights, const ITensorInfo *recurrent_to_output_weights,
582 const ITensorInfo *forget_gate_bias, const ITensorInfo *cell_bias, const ITensorInfo *output_gate_bias,
583 const ITensorInfo *cell_state_in, const ITensorInfo *output_state_in,
Michele Di Giorgiobeb2d452020-05-11 16:17:51 +0100584 const ITensorInfo *cell_state_out, const ITensorInfo *output_state_out, const ITensorInfo *output,
Michele Di Giorgio47a89902020-03-09 19:32:33 +0000585 const LSTMParams<ITensorInfo> &lstm_params)
586{
587 ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, input_to_forget_weights, input_to_cell_weights, input_to_output_weights, recurrent_to_forget_weights, recurrent_to_cell_weights,
Michele Di Giorgiobeb2d452020-05-11 16:17:51 +0100588 recurrent_to_output_weights, forget_gate_bias, cell_bias, output_gate_bias, cell_state_in, output_state_in,
589 cell_state_out, output_state_out, output);
Michele Di Giorgio47a89902020-03-09 19:32:33 +0000590
591 ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8_SIGNED);
592 ARM_COMPUTE_RETURN_ERROR_ON_MSG(input->num_dimensions() != 2, "Input must have exactly 2 dimensions");
593
594 const unsigned int input_size = input->dimension(0);
595 const unsigned int batch_size = input->dimension(1);
596 const unsigned int num_units = input_to_output_weights->dimension(1);
Sang-Hoon Parkd5c020a2020-05-06 21:01:19 +0100597 const unsigned int output_size = output_state_out->dimension(_out_state_output_size_dimension_idx);
Michele Di Giorgio47a89902020-03-09 19:32:33 +0000598
599 ARM_COMPUTE_RETURN_ERROR_ON(input_to_output_weights->num_dimensions() != 2);
600 ARM_COMPUTE_RETURN_ERROR_ON(input_to_output_weights->dimension(0) != input_size);
601 ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(input_to_output_weights, input_to_forget_weights, input_to_cell_weights);
602 ARM_COMPUTE_RETURN_ERROR_ON(recurrent_to_output_weights->num_dimensions() != 2);
603 ARM_COMPUTE_RETURN_ERROR_ON(recurrent_to_output_weights->dimension(1) != num_units);
604 ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(recurrent_to_output_weights, recurrent_to_forget_weights, recurrent_to_cell_weights);
605 ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input_to_forget_weights, 1, DataType::QSYMM8);
606 ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input_to_forget_weights, input_to_cell_weights, input_to_output_weights,
607 recurrent_to_forget_weights, recurrent_to_cell_weights, recurrent_to_output_weights);
608
609 ARM_COMPUTE_RETURN_ERROR_ON(forget_gate_bias->num_dimensions() != 1);
610 ARM_COMPUTE_RETURN_ERROR_ON(forget_gate_bias->dimension(0) != num_units);
611 ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(forget_gate_bias, cell_bias, output_gate_bias);
612 ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(forget_gate_bias, 1, DataType::S32);
613 ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(forget_gate_bias, cell_bias, output_gate_bias);
614
615 ARM_COMPUTE_RETURN_ERROR_ON(cell_state_in->num_dimensions() != 2);
616 ARM_COMPUTE_RETURN_ERROR_ON(cell_state_in->dimension(0) != num_units);
617 ARM_COMPUTE_RETURN_ERROR_ON(cell_state_in->dimension(1) != batch_size);
618 ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(cell_state_in, 1, DataType::QSYMM16);
619
620 ARM_COMPUTE_RETURN_ERROR_ON(output_state_in->num_dimensions() != 2);
621 ARM_COMPUTE_RETURN_ERROR_ON(output_state_in->dimension(0) != output_size);
622 ARM_COMPUTE_RETURN_ERROR_ON(output_state_in->dimension(1) != batch_size);
623 ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output_state_in);
624
625 // Check whether peephole weights are all there or none
626 if(lstm_params.has_peephole_opt())
627 {
628 ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(lstm_params.cell_to_forget_weights(), lstm_params.cell_to_output_weights());
629 ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(lstm_params.cell_to_forget_weights(), 1, DataType::QSYMM16);
630 ARM_COMPUTE_RETURN_ERROR_ON(lstm_params.cell_to_forget_weights()->num_dimensions() != 1);
631 ARM_COMPUTE_RETURN_ERROR_ON(lstm_params.cell_to_forget_weights()->dimension(0) != num_units);
632 ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(lstm_params.cell_to_forget_weights(), lstm_params.cell_to_output_weights());
633 ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(lstm_params.cell_to_forget_weights(), lstm_params.cell_to_output_weights());
634
635 if(!lstm_params.has_cifg_opt())
636 {
637 ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(lstm_params.cell_to_input_weights());
638 ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(lstm_params.cell_to_forget_weights(), lstm_params.cell_to_input_weights());
639 ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(lstm_params.cell_to_forget_weights(), lstm_params.cell_to_input_weights());
640 }
641 }
642
643 const UniformQuantizationInfo qinput = input->quantization_info().uniform();
644 const UniformQuantizationInfo qcell_state_in = cell_state_in->quantization_info().uniform();
645 const UniformQuantizationInfo qoutput_state_in = output_state_in->quantization_info().uniform();
646
647 // Calculate and decompose effective scales for optimizing matmul calculation
648 const int32_t cell_shift = log2(qcell_state_in.scale);
649 ARM_COMPUTE_RETURN_ERROR_ON(cell_shift > -9);
650
651 // Calculate quantized parameters for clipping.
652 int16_t quantized_cell_clip = 0;
653 if(lstm_params.cell_clip() > 0.0f)
654 {
655 quantized_cell_clip = quantize_qsymm16(lstm_params.cell_clip(), qcell_state_in);
656 }
657
658 // Precompute effective bias for optimizing the matmul computations.
659 const TensorInfo eff_bias_info(TensorShape(num_units), 1, DataType::S32);
Sang-Hoon Parkd5c020a2020-05-06 21:01:19 +0100660 const TensorInfo projection_eff_bias_info(TensorShape(output_size), 1, DataType::S32);
Michele Di Giorgio47a89902020-03-09 19:32:33 +0000661 if(!lstm_params.has_cifg_opt())
662 {
663 ARM_COMPUTE_RETURN_ON_ERROR(NEGEMMLowpMatrixAReductionKernel::validate(lstm_params.input_to_input_weights(), &eff_bias_info, GEMMLowpReductionKernelInfo(num_units, false, -qinput.offset, true)));
664 ARM_COMPUTE_RETURN_ON_ERROR(NEGEMMLowpMatrixAReductionKernel::validate(lstm_params.recurrent_to_input_weights(), &eff_bias_info, GEMMLowpReductionKernelInfo(num_units, false, -qoutput_state_in.offset,
665 true)));
666 }
667 ARM_COMPUTE_RETURN_ON_ERROR(NEGEMMLowpMatrixAReductionKernel::validate(input_to_forget_weights, &eff_bias_info, GEMMLowpReductionKernelInfo(num_units, false, -qinput.offset, true)));
668 ARM_COMPUTE_RETURN_ON_ERROR(NEGEMMLowpMatrixAReductionKernel::validate(recurrent_to_forget_weights, &eff_bias_info, GEMMLowpReductionKernelInfo(num_units, false, -qoutput_state_in.offset, true)));
669 ARM_COMPUTE_RETURN_ON_ERROR(NEGEMMLowpMatrixAReductionKernel::validate(input_to_cell_weights, &eff_bias_info, GEMMLowpReductionKernelInfo(num_units, false, -qinput.offset, true)));
670 ARM_COMPUTE_RETURN_ON_ERROR(NEGEMMLowpMatrixAReductionKernel::validate(recurrent_to_cell_weights, &eff_bias_info, GEMMLowpReductionKernelInfo(num_units, false, -qoutput_state_in.offset, true)));
671 ARM_COMPUTE_RETURN_ON_ERROR(NEGEMMLowpMatrixAReductionKernel::validate(input_to_output_weights, &eff_bias_info, GEMMLowpReductionKernelInfo(num_units, false, -qinput.offset, true)));
672 ARM_COMPUTE_RETURN_ON_ERROR(NEGEMMLowpMatrixAReductionKernel::validate(recurrent_to_output_weights, &eff_bias_info, GEMMLowpReductionKernelInfo(num_units, false, -qoutput_state_in.offset, true)));
Sang-Hoon Parkd5c020a2020-05-06 21:01:19 +0100673 if(lstm_params.has_projection())
Michele Di Giorgio47a89902020-03-09 19:32:33 +0000674 {
Sang-Hoon Parkd5c020a2020-05-06 21:01:19 +0100675 ARM_COMPUTE_RETURN_ON_ERROR(NEGEMMLowpMatrixAReductionKernel::validate(lstm_params.projection_weights(), &projection_eff_bias_info, GEMMLowpReductionKernelInfo(output_size, false,
676 lstm_params.hidden_state_zero(),
Michele Di Giorgio47a89902020-03-09 19:32:33 +0000677 true)));
Michele Di Giorgio11c562c2020-06-10 16:34:50 +0100678 if(lstm_params.projection_bias() != nullptr)
679 {
680 ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(lstm_params.projection_bias(), 1, DataType::S32);
Michalis Spyrou173ba9b2020-06-23 17:25:43 +0100681 ARM_COMPUTE_RETURN_ON_ERROR(NEArithmeticAddition::validate(lstm_params.projection_bias(), &projection_eff_bias_info, &projection_eff_bias_info, ConvertPolicy::SATURATE));
Michele Di Giorgio11c562c2020-06-10 16:34:50 +0100682 }
Michele Di Giorgio47a89902020-03-09 19:32:33 +0000683 }
684
685 const TensorInfo input_weights_transposed(TensorShape(num_units, input_size), 1, input_to_forget_weights->data_type(), input_to_forget_weights->quantization_info());
686 const TensorInfo recurrent_weights_transposed(TensorShape(num_units, output_size), 1, recurrent_to_forget_weights->data_type(), recurrent_to_forget_weights->quantization_info());
687
688 // Validate weights transpose
689 ARM_COMPUTE_RETURN_ON_ERROR(NETranspose::validate(input_to_forget_weights, &input_weights_transposed));
690 ARM_COMPUTE_RETURN_ON_ERROR(NETranspose::validate(input_to_cell_weights, &input_weights_transposed));
691 ARM_COMPUTE_RETURN_ON_ERROR(NETranspose::validate(input_to_output_weights, &input_weights_transposed));
692 ARM_COMPUTE_RETURN_ON_ERROR(NETranspose::validate(recurrent_to_forget_weights, &recurrent_weights_transposed));
693 ARM_COMPUTE_RETURN_ON_ERROR(NETranspose::validate(recurrent_to_cell_weights, &recurrent_weights_transposed));
694 ARM_COMPUTE_RETURN_ON_ERROR(NETranspose::validate(recurrent_to_output_weights, &recurrent_weights_transposed));
695 if(!lstm_params.has_cifg_opt())
696 {
697 ARM_COMPUTE_RETURN_ON_ERROR(NETranspose::validate(lstm_params.input_to_input_weights(), &input_weights_transposed));
698 ARM_COMPUTE_RETURN_ON_ERROR(NETranspose::validate(lstm_params.recurrent_to_input_weights(), &recurrent_weights_transposed));
699 }
700 if(lstm_params.has_projection())
701 {
Sang-Hoon Parkd5c020a2020-05-06 21:01:19 +0100702 const TensorInfo projection_weights_transposed(TensorShape(output_size, num_units), 1, lstm_params.projection_weights()->data_type(), lstm_params.projection_weights()->quantization_info());
703 ARM_COMPUTE_RETURN_ON_ERROR(NETranspose::validate(lstm_params.projection_weights(), &projection_weights_transposed));
Michele Di Giorgio47a89902020-03-09 19:32:33 +0000704 }
705
706 GEMMLowpOutputStageInfo gemmlowp_info;
707 gemmlowp_info.type = GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT;
708 gemmlowp_info.gemmlowp_min_bound = std::numeric_limits<int16_t>::lowest();
709 gemmlowp_info.gemmlowp_max_bound = std::numeric_limits<int16_t>::max();
710 gemmlowp_info.output_data_type = DataType::QSYMM16;
711
Sang-Hoon Park9230e272020-04-18 00:46:34 +0100712 const bool has_layer_norm = lstm_params.use_layer_norm();
713
Michele Di Giorgio47a89902020-03-09 19:32:33 +0000714 // Forget gate.
Sang-Hoon Parkee4833d2020-05-20 09:13:32 +0100715 ARM_COMPUTE_RETURN_ERROR_ON(lstm_params.forget_intermediate_scale() == 0);
Michele Di Giorgio47a89902020-03-09 19:32:33 +0000716 const TensorInfo forget_outstage_info(TensorShape(num_units, batch_size), 1, DataType::QSYMM16, QuantizationInfo(lstm_params.forget_intermediate_scale(), 0));
717 const TensorInfo mm_out_info(TensorShape(num_units, batch_size), 1, DataType::S32);
718 const float input_to_forget_scale = input_to_forget_weights->quantization_info().uniform().scale * qinput.scale / lstm_params.forget_intermediate_scale();
Sang-Hoon Parka7431ae2020-05-12 11:13:30 +0100719 ARM_COMPUTE_RETURN_ON_ERROR(validate_mm(gemmlowp_info, input, &input_weights_transposed, &eff_bias_info, input_to_forget_scale, &mm_out_info, &forget_outstage_info));
Michele Di Giorgio47a89902020-03-09 19:32:33 +0000720
721 const float recurrent_to_forget_scale = recurrent_to_forget_weights->quantization_info().uniform().scale * qoutput_state_in.scale / lstm_params.forget_intermediate_scale();
Sang-Hoon Parka7431ae2020-05-12 11:13:30 +0100722 ARM_COMPUTE_RETURN_ON_ERROR(validate_mm(gemmlowp_info, output_state_in, &recurrent_weights_transposed, &eff_bias_info, recurrent_to_forget_scale, &mm_out_info, &forget_outstage_info));
Michele Di Giorgio47a89902020-03-09 19:32:33 +0000723
Michalis Spyrou173ba9b2020-06-23 17:25:43 +0100724 ARM_COMPUTE_RETURN_ON_ERROR(NEArithmeticAddition::validate(&forget_outstage_info, &forget_outstage_info, &forget_outstage_info, ConvertPolicy::SATURATE));
Michele Di Giorgio47a89902020-03-09 19:32:33 +0000725
726 if(lstm_params.has_peephole_opt())
727 {
728 ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(lstm_params.cell_to_forget_weights(), 1, DataType::QSYMM16);
Michalis Spyrou6eb73452020-07-02 17:39:25 +0100729 ARM_COMPUTE_RETURN_ON_ERROR(NEPixelWiseMultiplication::validate(cell_state_in, lstm_params.cell_to_forget_weights(), &mm_out_info, 1.f, ConvertPolicy::SATURATE,
730 RoundingPolicy::TO_ZERO));
Michele Di Giorgio47a89902020-03-09 19:32:33 +0000731 const float cell_to_forget_scale = std::pow(2, cell_shift) * lstm_params.cell_to_forget_weights()->quantization_info().uniform().scale / lstm_params.forget_intermediate_scale();
732 ARM_COMPUTE_RETURN_ON_ERROR(quantization::calculate_quantized_multiplier(cell_to_forget_scale, &gemmlowp_info.gemmlowp_multiplier, &gemmlowp_info.gemmlowp_shift));
733 ARM_COMPUTE_RETURN_ON_ERROR(NEGEMMLowpOutputStage::validate(&mm_out_info, nullptr, &forget_outstage_info, gemmlowp_info));
Michalis Spyrou173ba9b2020-06-23 17:25:43 +0100734 ARM_COMPUTE_RETURN_ON_ERROR(NEArithmeticAddition::validate(&forget_outstage_info, &forget_outstage_info, &forget_outstage_info, ConvertPolicy::SATURATE));
Michele Di Giorgio47a89902020-03-09 19:32:33 +0000735 }
736
Sang-Hoon Park9230e272020-04-18 00:46:34 +0100737 if(has_layer_norm)
738 {
739 const ITensorInfo *w_info = lstm_params.forget_layer_norm_weights();
740 const ITensorInfo *b_info = forget_gate_bias;
741 ARM_COMPUTE_RETURN_ON_ERROR(validate_layer_norm(forget_outstage_info, *w_info, *b_info));
742 }
743
Michele Di Giorgio47a89902020-03-09 19:32:33 +0000744 // Output quantization info of Sigmoid and Tanh activations
745 const QuantizationInfo sigmoid_tanh_outqinfo(1.f / 32768.f, 0);
Sang-Hoon Park9230e272020-04-18 00:46:34 +0100746 const TensorInfo forget_gate_info(TensorShape(num_units, batch_size), 1, DataType::QSYMM16, sigmoid_tanh_outqinfo);
Michele Di Giorgio47a89902020-03-09 19:32:33 +0000747
Michele Di Giorgio47a89902020-03-09 19:32:33 +0000748 ARM_COMPUTE_RETURN_ON_ERROR(NEActivationLayer::validate(&forget_outstage_info, &forget_gate_info, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LOGISTIC)));
749
750 // Modulation gate.
Sang-Hoon Parkee4833d2020-05-20 09:13:32 +0100751 ARM_COMPUTE_RETURN_ERROR_ON(lstm_params.cell_intermediate_scale() == 0);
Michele Di Giorgio47a89902020-03-09 19:32:33 +0000752 const TensorInfo cell_outstage_info(TensorShape(num_units, batch_size), 1, DataType::QSYMM16, QuantizationInfo(lstm_params.cell_intermediate_scale(), 0));
753 const float input_to_cell_scale = input_to_cell_weights->quantization_info().uniform().scale * qinput.scale / lstm_params.cell_intermediate_scale();
Sang-Hoon Parka7431ae2020-05-12 11:13:30 +0100754 ARM_COMPUTE_RETURN_ON_ERROR(validate_mm(gemmlowp_info, input, &input_weights_transposed, &eff_bias_info, input_to_cell_scale, &mm_out_info, &cell_outstage_info));
Michele Di Giorgio47a89902020-03-09 19:32:33 +0000755
756 const float recurrent_to_cell_scale = recurrent_to_cell_weights->quantization_info().uniform().scale * qoutput_state_in.scale / lstm_params.cell_intermediate_scale();
Sang-Hoon Parka7431ae2020-05-12 11:13:30 +0100757 ARM_COMPUTE_RETURN_ON_ERROR(validate_mm(gemmlowp_info, output_state_in, &recurrent_weights_transposed, &eff_bias_info, recurrent_to_cell_scale, &mm_out_info, &cell_outstage_info));
Michele Di Giorgio47a89902020-03-09 19:32:33 +0000758
Michalis Spyrou173ba9b2020-06-23 17:25:43 +0100759 ARM_COMPUTE_RETURN_ON_ERROR(NEArithmeticAddition::validate(&cell_outstage_info, &cell_outstage_info, &cell_outstage_info, ConvertPolicy::SATURATE));
Michele Di Giorgio47a89902020-03-09 19:32:33 +0000760
Sang-Hoon Park9230e272020-04-18 00:46:34 +0100761 if(has_layer_norm)
762 {
763 const ITensorInfo *w_info = lstm_params.cell_layer_norm_weights();
764 const ITensorInfo *b_info = cell_bias;
765 ARM_COMPUTE_RETURN_ON_ERROR(validate_layer_norm(cell_outstage_info, *w_info, *b_info));
766 }
Michele Di Giorgio47a89902020-03-09 19:32:33 +0000767 const TensorInfo cell_gate_info(TensorShape(num_units, batch_size), 1, DataType::QSYMM16, sigmoid_tanh_outqinfo);
Sang-Hoon Park9230e272020-04-18 00:46:34 +0100768
Michele Di Giorgio47a89902020-03-09 19:32:33 +0000769 ARM_COMPUTE_RETURN_ON_ERROR(NEActivationLayer::validate(&cell_outstage_info, &cell_gate_info, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::TANH, 1.f, 1.f)));
770
771 // Input gate.
772 const TensorInfo input_gate_info(TensorShape(num_units, batch_size), 1, DataType::QSYMM16, sigmoid_tanh_outqinfo);
773 if(lstm_params.has_cifg_opt())
774 {
775 ARM_COMPUTE_RETURN_ERROR_ON_MSG(lstm_params.input_gate_bias() != nullptr, "Input gate bias must not be present when CIFG is used");
Michalis Spyrou173ba9b2020-06-23 17:25:43 +0100776 ARM_COMPUTE_RETURN_ON_ERROR(NEArithmeticSubtraction::validate(&input_gate_info, &forget_gate_info, &forget_gate_info, ConvertPolicy::SATURATE));
Michele Di Giorgio47a89902020-03-09 19:32:33 +0000777 }
778 else
779 {
780 ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(lstm_params.input_to_input_weights(), lstm_params.recurrent_to_input_weights(), lstm_params.input_gate_bias());
781 ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input_to_forget_weights, lstm_params.input_to_input_weights(), lstm_params.recurrent_to_input_weights());
782 ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(input_to_forget_weights, lstm_params.input_to_input_weights());
783 ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(recurrent_to_forget_weights, lstm_params.recurrent_to_input_weights());
784 ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(forget_gate_bias, lstm_params.input_gate_bias());
785 ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(forget_gate_bias, lstm_params.input_gate_bias());
786
Sang-Hoon Parkee4833d2020-05-20 09:13:32 +0100787 ARM_COMPUTE_RETURN_ERROR_ON(lstm_params.input_intermediate_scale() == 0);
Michele Di Giorgio47a89902020-03-09 19:32:33 +0000788 const TensorInfo input_outstage_info(TensorShape(num_units, batch_size), 1, DataType::QSYMM16, QuantizationInfo(lstm_params.input_intermediate_scale(), 0));
789 const float input_to_input_scale = lstm_params.input_to_input_weights()->quantization_info().uniform().scale * qinput.scale / lstm_params.input_intermediate_scale();
Sang-Hoon Parka7431ae2020-05-12 11:13:30 +0100790 ARM_COMPUTE_RETURN_ON_ERROR(validate_mm(gemmlowp_info, input, &input_weights_transposed, &eff_bias_info, input_to_input_scale, &mm_out_info, &input_outstage_info));
Michele Di Giorgio47a89902020-03-09 19:32:33 +0000791
792 const float recurrent_to_input_scale = lstm_params.recurrent_to_input_weights()->quantization_info().uniform().scale * qoutput_state_in.scale / lstm_params.input_intermediate_scale();
Sang-Hoon Parka7431ae2020-05-12 11:13:30 +0100793 ARM_COMPUTE_RETURN_ON_ERROR(validate_mm(gemmlowp_info, output_state_in, &recurrent_weights_transposed, &eff_bias_info, recurrent_to_input_scale, &mm_out_info, &input_outstage_info));
Michele Di Giorgio47a89902020-03-09 19:32:33 +0000794
Michalis Spyrou173ba9b2020-06-23 17:25:43 +0100795 ARM_COMPUTE_RETURN_ON_ERROR(NEArithmeticAddition::validate(&input_outstage_info, &input_outstage_info, &input_outstage_info, ConvertPolicy::SATURATE));
Michele Di Giorgio47a89902020-03-09 19:32:33 +0000796
797 if(lstm_params.has_peephole_opt())
798 {
Michalis Spyrou6eb73452020-07-02 17:39:25 +0100799 ARM_COMPUTE_RETURN_ON_ERROR(NEPixelWiseMultiplication::validate(cell_state_in, lstm_params.cell_to_input_weights(), &mm_out_info, 1.f, ConvertPolicy::SATURATE,
800 RoundingPolicy::TO_ZERO));
Michele Di Giorgio47a89902020-03-09 19:32:33 +0000801 const float cell_to_input_scale = std::pow(2, cell_shift) * lstm_params.cell_to_input_weights()->quantization_info().uniform().scale / lstm_params.input_intermediate_scale();
802 ARM_COMPUTE_RETURN_ON_ERROR(quantization::calculate_quantized_multiplier(cell_to_input_scale, &gemmlowp_info.gemmlowp_multiplier, &gemmlowp_info.gemmlowp_shift));
Sang-Hoon Parkd5c020a2020-05-06 21:01:19 +0100803 ARM_COMPUTE_RETURN_ON_ERROR(NEGEMMLowpOutputStage::validate(&mm_out_info, &eff_bias_info, &input_outstage_info, gemmlowp_info));
Michalis Spyrou173ba9b2020-06-23 17:25:43 +0100804 ARM_COMPUTE_RETURN_ON_ERROR(NEArithmeticAddition::validate(&input_outstage_info, &input_outstage_info, &input_outstage_info, ConvertPolicy::SATURATE));
Michele Di Giorgio47a89902020-03-09 19:32:33 +0000805 }
806
Sang-Hoon Park9230e272020-04-18 00:46:34 +0100807 if(has_layer_norm)
808 {
809 const ITensorInfo *w_info = lstm_params.input_layer_norm_weights();
810 const ITensorInfo *b_info = lstm_params.input_gate_bias();
811 ARM_COMPUTE_RETURN_ON_ERROR(validate_layer_norm(input_outstage_info, *w_info, *b_info));
812 }
813
Sang-Hoon Parkd5c020a2020-05-06 21:01:19 +0100814 ARM_COMPUTE_RETURN_ON_ERROR(NEActivationLayer::validate(&input_outstage_info, &input_gate_info, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::TANH, 1.f, 1.f)));
Michele Di Giorgio47a89902020-03-09 19:32:33 +0000815 }
816 // Cell.
Michalis Spyrou6eb73452020-07-02 17:39:25 +0100817 ARM_COMPUTE_RETURN_ON_ERROR(NEPixelWiseMultiplication::validate(&forget_gate_info, cell_state_in, &forget_gate_info, 1.f, ConvertPolicy::SATURATE, RoundingPolicy::TO_ZERO));
818 ARM_COMPUTE_RETURN_ON_ERROR(NEPixelWiseMultiplication::validate(&input_gate_info, cell_state_in, &cell_gate_info, 1.f, ConvertPolicy::SATURATE, RoundingPolicy::TO_ZERO));
Michalis Spyrou173ba9b2020-06-23 17:25:43 +0100819 ARM_COMPUTE_RETURN_ON_ERROR(NEArithmeticAddition::validate(&forget_gate_info, &cell_gate_info, cell_state_out, ConvertPolicy::SATURATE));
Michele Di Giorgio47a89902020-03-09 19:32:33 +0000820 if(quantized_cell_clip > 0)
821 {
822 ARM_COMPUTE_RETURN_ON_ERROR(NEActivationLayer::validate(cell_state_out, nullptr, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, -quantized_cell_clip,
823 quantized_cell_clip)));
824 }
825 // Output gate.
Sang-Hoon Parkee4833d2020-05-20 09:13:32 +0100826 ARM_COMPUTE_RETURN_ERROR_ON(lstm_params.output_intermediate_scale() == 0);
Michele Di Giorgio47a89902020-03-09 19:32:33 +0000827 const TensorInfo output_outstage_info(TensorShape(num_units, batch_size), 1, DataType::QSYMM16, QuantizationInfo(lstm_params.output_intermediate_scale(), 0));
828 const float input_to_output_scale = input_to_output_weights->quantization_info().uniform().scale * qinput.scale / lstm_params.output_intermediate_scale();
Sang-Hoon Parka7431ae2020-05-12 11:13:30 +0100829 ARM_COMPUTE_RETURN_ON_ERROR(validate_mm(gemmlowp_info, input, &input_weights_transposed, &eff_bias_info, input_to_output_scale, &mm_out_info, &output_outstage_info));
Michele Di Giorgio47a89902020-03-09 19:32:33 +0000830
831 const float recurrent_to_output_scale = recurrent_to_output_weights->quantization_info().uniform().scale * qoutput_state_in.scale / lstm_params.output_intermediate_scale();
Sang-Hoon Parka7431ae2020-05-12 11:13:30 +0100832 ARM_COMPUTE_RETURN_ON_ERROR(validate_mm(gemmlowp_info, output_state_in, &recurrent_weights_transposed, &eff_bias_info, recurrent_to_output_scale, &mm_out_info, &output_outstage_info));
Michele Di Giorgio47a89902020-03-09 19:32:33 +0000833
Michalis Spyrou173ba9b2020-06-23 17:25:43 +0100834 ARM_COMPUTE_RETURN_ON_ERROR(NEArithmeticAddition::validate(&output_outstage_info, &output_outstage_info, &output_outstage_info, ConvertPolicy::SATURATE));
Michele Di Giorgio47a89902020-03-09 19:32:33 +0000835 if(lstm_params.has_peephole_opt())
836 {
837 ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(lstm_params.cell_to_output_weights(), 1, DataType::QSYMM16);
Michalis Spyrou6eb73452020-07-02 17:39:25 +0100838 // TODO(COMPMID-3395): Perform multiplication in the quantized domain in NEPixelWiseMultiplication
Michele Di Giorgio47a89902020-03-09 19:32:33 +0000839 // Here we are not using the output stage because all operations are done in float
840 // const float cell_to_output_scale = std::pow(2, cell_shift) * lstm_params.cell_to_output_weights()->quantization_info().uniform().scale / lstm_params.output_intermediate_scale();
841 // ARM_COMPUTE_RETURN_ON_ERROR(quantization::calculate_quantized_multiplier(cell_to_output_scale, &gemmlowp_info.gemmlowp_multiplier, &gemmlowp_info.gemmlowp_shift));
Michalis Spyrou6eb73452020-07-02 17:39:25 +0100842 ARM_COMPUTE_RETURN_ON_ERROR(NEPixelWiseMultiplication::validate(cell_state_out, lstm_params.cell_to_output_weights(), &output_outstage_info, 1.f, ConvertPolicy::SATURATE,
843 RoundingPolicy::TO_ZERO));
Michalis Spyrou173ba9b2020-06-23 17:25:43 +0100844 ARM_COMPUTE_RETURN_ON_ERROR(NEArithmeticAddition::validate(&output_outstage_info, &output_outstage_info, &output_outstage_info, ConvertPolicy::SATURATE));
Michele Di Giorgio47a89902020-03-09 19:32:33 +0000845 }
846
Sang-Hoon Park9230e272020-04-18 00:46:34 +0100847 if(has_layer_norm)
848 {
849 const ITensorInfo *w_info = lstm_params.output_layer_norm_weights();
850 const ITensorInfo *b_info = output_gate_bias;
851 ARM_COMPUTE_RETURN_ON_ERROR(validate_layer_norm(output_outstage_info, *w_info, *b_info));
852 }
853
Michele Di Giorgio47a89902020-03-09 19:32:33 +0000854 const TensorInfo output_gate_info(TensorShape(num_units, batch_size), 1, DataType::QSYMM16, sigmoid_tanh_outqinfo);
855 ARM_COMPUTE_RETURN_ON_ERROR(NEActivationLayer::validate(&output_outstage_info, &output_gate_info, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LOGISTIC)));
856
857 // Hidden.
858 ARM_COMPUTE_RETURN_ON_ERROR(NEActivationLayer::validate(cell_state_out, &input_gate_info, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::TANH, 1.f, 1.f)));
859 const TensorInfo hidden_mul_res(TensorShape(num_units, batch_size), 1, DataType::S32);
Sang-Hoon Parkd5c020a2020-05-06 21:01:19 +0100860 const TensorInfo hidden_out_info(TensorShape(num_units, batch_size), 1, DataType::QASYMM8_SIGNED);
Michalis Spyrou6eb73452020-07-02 17:39:25 +0100861 ARM_COMPUTE_RETURN_ON_ERROR(NEPixelWiseMultiplication::validate(&output_gate_info, &input_gate_info, &hidden_mul_res, 1.f, ConvertPolicy::SATURATE, RoundingPolicy::TO_ZERO));
Sang-Hoon Parkee4833d2020-05-20 09:13:32 +0100862
863 ARM_COMPUTE_RETURN_ERROR_ON(lstm_params.hidden_state_scale() == 0);
Michele Di Giorgio47a89902020-03-09 19:32:33 +0000864 const float hidden_state_scale = std::pow(2, -15) / lstm_params.hidden_state_scale() * std::pow(2, -15);
Sang-Hoon Park30b46a62020-04-18 01:40:57 +0100865 ARM_COMPUTE_RETURN_ON_ERROR(quantization::calculate_quantized_multiplier(hidden_state_scale, &gemmlowp_info.gemmlowp_multiplier, &gemmlowp_info.gemmlowp_shift, /* ignore_epsilon */ true));
Sang-Hoon Park9f893752020-10-20 15:33:31 +0100866 gemmlowp_info.gemmlowp_offset = lstm_params.hidden_state_zero();
867 gemmlowp_info.output_data_type = hidden_out_info.data_type();
Sang-Hoon Parkd5c020a2020-05-06 21:01:19 +0100868 ARM_COMPUTE_RETURN_ON_ERROR(NEGEMMLowpOutputStage::validate(&hidden_mul_res, nullptr, &hidden_out_info, gemmlowp_info));
869
870 const bool projection_tensor_copy_required = num_units != output_size;
Michele Di Giorgio47a89902020-03-09 19:32:33 +0000871
872 // Projection.
873 if(lstm_params.has_projection())
874 {
875 ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(recurrent_to_forget_weights, lstm_params.projection_weights());
Sang-Hoon Parkee4833d2020-05-20 09:13:32 +0100876 ARM_COMPUTE_RETURN_ERROR_ON(qoutput_state_in.scale == 0);
Michele Di Giorgio47a89902020-03-09 19:32:33 +0000877
878 const UniformQuantizationInfo qprojection = lstm_params.projection_weights()->quantization_info().uniform();
879 const float projection_scale = qprojection.scale * lstm_params.hidden_state_scale() / qoutput_state_in.scale;
880 ARM_COMPUTE_RETURN_ON_ERROR(quantization::calculate_quantized_multiplier(projection_scale, &gemmlowp_info.gemmlowp_multiplier, &gemmlowp_info.gemmlowp_shift));
881 gemmlowp_info.gemmlowp_offset = qoutput_state_in.offset;
882 gemmlowp_info.gemmlowp_min_bound = std::numeric_limits<int8_t>::lowest();
883 gemmlowp_info.gemmlowp_max_bound = std::numeric_limits<int8_t>::max();
884 gemmlowp_info.output_data_type = DataType::QASYMM8_SIGNED;
885
Sang-Hoon Parka7431ae2020-05-12 11:13:30 +0100886 const TensorInfo projection_outstage_info(*output_state_out);
887 const TensorInfo projection_weights_transposed(TensorShape(output_size, num_units), 1, lstm_params.projection_weights()->data_type(), lstm_params.projection_weights()->quantization_info());
Sang-Hoon Parkd5c020a2020-05-06 21:01:19 +0100888
Sang-Hoon Parka7431ae2020-05-12 11:13:30 +0100889 TensorInfo projection_mm_out_info{ mm_out_info };
890 projection_mm_out_info.set_tensor_shape(TensorShape(output_size, batch_size));
Sang-Hoon Parkd5c020a2020-05-06 21:01:19 +0100891
Sang-Hoon Parka7431ae2020-05-12 11:13:30 +0100892 ARM_COMPUTE_RETURN_ON_ERROR(validate_mm(gemmlowp_info, &hidden_out_info, &projection_weights_transposed, &projection_eff_bias_info, projection_scale, &projection_mm_out_info,
893 &projection_outstage_info));
Sang-Hoon Parkd5c020a2020-05-06 21:01:19 +0100894
895 if(projection_tensor_copy_required)
896 {
Sang-Hoon Park840a72c2020-09-23 13:24:13 +0100897 ARM_COMPUTE_RETURN_ON_ERROR(NEQLSTMLayer::TensorCopyKernel::validate(*output_state_in, projection_outstage_info));
Sang-Hoon Parkd5c020a2020-05-06 21:01:19 +0100898 }
Michele Di Giorgio47a89902020-03-09 19:32:33 +0000899
Michalis Spyrou173ba9b2020-06-23 17:25:43 +0100900 ARM_COMPUTE_RETURN_ON_ERROR(NEArithmeticAddition::validate(output_state_out, output_state_out, output_state_out, ConvertPolicy::SATURATE));
Michele Di Giorgio47a89902020-03-09 19:32:33 +0000901
Sang-Hoon Parkd5c020a2020-05-06 21:01:19 +0100902 if(projection_tensor_copy_required)
903 {
904 ARM_COMPUTE_RETURN_ON_ERROR(NEQLSTMLayer::TensorCopyKernel::validate(projection_outstage_info, *output_state_out));
905 }
906
Michele Di Giorgio47a89902020-03-09 19:32:33 +0000907 int8_t quantized_projection_clip{ 0 };
908 if(lstm_params.projection_clip() > 0.0f)
909 {
910 quantized_projection_clip = quantize_qasymm8_signed(lstm_params.projection_clip(), qprojection);
911 }
912
913 if(quantized_projection_clip > 0)
914 {
915 ARM_COMPUTE_RETURN_ON_ERROR(NEActivationLayer::validate(output_state_out, nullptr, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, -quantized_projection_clip,
916 quantized_projection_clip)));
917 }
918 }
Sang-Hoon Parkd5c020a2020-05-06 21:01:19 +0100919 else
920 {
921 if(projection_tensor_copy_required)
922 {
923 ARM_COMPUTE_RETURN_ON_ERROR(NEQLSTMLayer::TensorCopyKernel::validate(hidden_out_info, *output_state_out));
924 }
925 }
Michele Di Giorgio47a89902020-03-09 19:32:33 +0000926
927 if(cell_state_out->total_size() > 0)
928 {
929 ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(cell_state_in, cell_state_out);
930 ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(cell_state_in, cell_state_out);
931 }
932
933 if(output_state_out->total_size() > 0)
934 {
935 ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output_state_out);
936 ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(output_state_in, output_state_out);
937 }
938
Michalis Spyrouebcebf12020-10-21 00:04:14 +0100939 ARM_COMPUTE_RETURN_ON_ERROR(NECopy::validate(output_state_out, output));
Michele Di Giorgio47a89902020-03-09 19:32:33 +0000940 return Status{};
941}
942
943void NEQLSTMLayer::run()
944{
945 prepare();
946
947 // Acquire all the temporaries
948 MemoryGroupResourceScope scope_mg(_memory_group);
949
950 // Forget gate.
951 _mm_input_to_forget.run();
952 _input_to_forget_outstage.run();
953
954 _mm_recurrent_to_forget.run();
955 _recurrent_to_forget_outstage.run();
Michalis Spyrou173ba9b2020-06-23 17:25:43 +0100956 _accumulate_input_recurrent_forget.run();
Michele Di Giorgio47a89902020-03-09 19:32:33 +0000957
958 if(_has_peephole)
959 {
Michalis Spyrou6eb73452020-07-02 17:39:25 +0100960 _pixelwise_mul_cell_to_forget.run();
Michele Di Giorgio47a89902020-03-09 19:32:33 +0000961 _cell_to_forget_outstage.run();
Michalis Spyrou173ba9b2020-06-23 17:25:43 +0100962 _accumulate_cell_forget.run();
Michele Di Giorgio47a89902020-03-09 19:32:33 +0000963 }
964
Sang-Hoon Park9230e272020-04-18 00:46:34 +0100965 if(_has_layer_norm)
966 {
Michalis Spyrouebcebf12020-10-21 00:04:14 +0100967 NEScheduler::get().schedule(get_layer_norm(LayerNormGate::Forget).get(), Window::DimY);
Sang-Hoon Park9230e272020-04-18 00:46:34 +0100968 }
969
Michele Di Giorgio47a89902020-03-09 19:32:33 +0000970 _forget_gate_sigmoid.run();
971
972 // Modulation gate.
973 _mm_input_to_cell.run();
974 _input_to_cell_outstage.run();
975
976 _mm_recurrent_to_cell.run();
977 _recurrent_to_cell_outstage.run();
Michalis Spyrou173ba9b2020-06-23 17:25:43 +0100978 _accumulate_input_recurrent_modulation.run();
Michele Di Giorgio47a89902020-03-09 19:32:33 +0000979
Sang-Hoon Park9230e272020-04-18 00:46:34 +0100980 if(_has_layer_norm)
981 {
Michalis Spyrouebcebf12020-10-21 00:04:14 +0100982 NEScheduler::get().schedule(get_layer_norm(LayerNormGate::Cell).get(), Window::DimY);
Sang-Hoon Park9230e272020-04-18 00:46:34 +0100983 }
984
Michele Di Giorgio47a89902020-03-09 19:32:33 +0000985 _cell_gate_tanh.run();
986
987 // Input gate
988 if(_has_cifg)
989 {
Michalis Spyrou173ba9b2020-06-23 17:25:43 +0100990 _input_gate_sub.run();
Michele Di Giorgio47a89902020-03-09 19:32:33 +0000991 }
992 else
993 {
994 _mm_input_to_input.run();
995 _input_to_input_outstage.run();
996 _mm_recurrent_to_input.run();
997 _recurrent_to_input_outstage.run();
Michalis Spyrou173ba9b2020-06-23 17:25:43 +0100998 _accumulate_input_recurrent_input.run();
Michele Di Giorgio47a89902020-03-09 19:32:33 +0000999
1000 if(_has_peephole)
1001 {
Michalis Spyrou6eb73452020-07-02 17:39:25 +01001002 _pixelwise_mul_cell_to_input.run();
Michele Di Giorgio47a89902020-03-09 19:32:33 +00001003 _cell_to_input_outstage.run();
Michalis Spyrou173ba9b2020-06-23 17:25:43 +01001004 _accumulate_cell_input.run();
Michele Di Giorgio47a89902020-03-09 19:32:33 +00001005 }
1006
Sang-Hoon Park9230e272020-04-18 00:46:34 +01001007 if(_has_layer_norm)
1008 {
Michalis Spyrouebcebf12020-10-21 00:04:14 +01001009 NEScheduler::get().schedule(get_layer_norm(LayerNormGate::Input).get(), Window::DimY);
Sang-Hoon Park9230e272020-04-18 00:46:34 +01001010 }
1011
Sang-Hoon Parkd5c020a2020-05-06 21:01:19 +01001012 _input_gate_sigmoid.run();
Michele Di Giorgio47a89902020-03-09 19:32:33 +00001013 }
1014
1015 // Cell.
Michalis Spyrou6eb73452020-07-02 17:39:25 +01001016 _pixelwise_mul_forget_cell.run();
1017 _pixelwise_mul_input_cell.run();
Michalis Spyrou173ba9b2020-06-23 17:25:43 +01001018 _add_forget_cell.run();
1019
Michele Di Giorgio47a89902020-03-09 19:32:33 +00001020 if(_has_cell_clipping)
1021 {
1022 _cell_clip.run();
1023 }
1024
1025 // Output gate.
1026 _mm_input_to_output.run();
1027 _input_to_output_outstage.run();
1028 _mm_recurrent_to_output.run();
1029 _recurrent_to_output_outstage.run();
Michalis Spyrou173ba9b2020-06-23 17:25:43 +01001030 _accumulate_input_recurrent_output.run();
Michele Di Giorgio47a89902020-03-09 19:32:33 +00001031 if(_has_peephole)
1032 {
Michalis Spyrou6eb73452020-07-02 17:39:25 +01001033 _pixelwise_mul_cell_to_output.run();
Sang-Hoon Parkd5c020a2020-05-06 21:01:19 +01001034 _cell_to_output_outstage.run();
Michalis Spyrou173ba9b2020-06-23 17:25:43 +01001035 _accumulate_cell_to_output.run();
Michele Di Giorgio47a89902020-03-09 19:32:33 +00001036 }
1037
Sang-Hoon Park9230e272020-04-18 00:46:34 +01001038 if(_has_layer_norm)
1039 {
Michalis Spyrouebcebf12020-10-21 00:04:14 +01001040 NEScheduler::get().schedule(get_layer_norm(LayerNormGate::Output).get(), Window::DimY);
Sang-Hoon Park9230e272020-04-18 00:46:34 +01001041 }
1042
Michele Di Giorgio47a89902020-03-09 19:32:33 +00001043 _output_gate_sigmoid.run();
1044
1045 // Hidden.
1046 _hidden_tanh.run();
Michalis Spyrou6eb73452020-07-02 17:39:25 +01001047 _pixelwise_mul_hidden.run();
Michele Di Giorgio47a89902020-03-09 19:32:33 +00001048 _hidden_outstage.run();
1049
1050 // Projection.
1051 if(_has_projection)
1052 {
1053 _mm_projection.run();
1054 _projection_outstage.run();
Sang-Hoon Parkd5c020a2020-05-06 21:01:19 +01001055
1056 if(_projection_tensor_copy_required)
1057 {
1058 _projection_output_to_accumulate_copy.run();
1059 }
1060
Michalis Spyrou173ba9b2020-06-23 17:25:43 +01001061 _accumulate_projection.run();
Sang-Hoon Parkd5c020a2020-05-06 21:01:19 +01001062
1063 if(_projection_tensor_copy_required)
1064 {
1065 _projection_accumulate_to_output_copy.run();
1066 }
1067
Michele Di Giorgio47a89902020-03-09 19:32:33 +00001068 if(_has_projection_clipping)
1069 {
1070 _projection_clip.run();
1071 }
1072 }
Sang-Hoon Parkd5c020a2020-05-06 21:01:19 +01001073 else
1074 {
1075 if(_projection_tensor_copy_required)
1076 {
1077 _hidden_to_output_copy.run();
1078 }
1079 }
Michele Di Giorgiobeb2d452020-05-11 16:17:51 +01001080
1081 // Copy output_state_out to output
Michalis Spyrouebcebf12020-10-21 00:04:14 +01001082 _copy_output.run();
Michele Di Giorgio47a89902020-03-09 19:32:33 +00001083}
1084
1085void NEQLSTMLayer::prepare()
1086{
1087 if(!_is_prepared)
1088 {
1089 // Pre-transpose weights to be used in GEMM.
1090 _input_to_forget_weights_transposed.allocator()->allocate();
1091 _input_to_cell_weights_transposed.allocator()->allocate();
1092 _input_to_output_weights_transposed.allocator()->allocate();
1093 _recurrent_to_forget_weights_transposed.allocator()->allocate();
1094 _recurrent_to_cell_weights_transposed.allocator()->allocate();
1095 _recurrent_to_output_weights_transposed.allocator()->allocate();
1096 _transpose_input_to_forget_weights.run();
1097 _transpose_input_to_cell_weights.run();
1098 _transpose_input_to_output_weights.run();
1099 _transpose_recurrent_to_forget_weights.run();
1100 _transpose_recurrent_to_cell_weights.run();
1101 _transpose_recurrent_to_output_weights.run();
1102
1103 // Precompute effective biases
1104 if(_has_cifg)
1105 {
1106 std::fill_n(reinterpret_cast<int16_t *>(_ones.buffer()), _ones.info()->total_size() / _ones.info()->element_size(), 32767);
1107 }
1108 else
1109 {
1110 _input_to_input_eff_bias.allocator()->allocate();
1111 _recurrent_to_input_eff_bias.allocator()->allocate();
Michalis Spyrouebcebf12020-10-21 00:04:14 +01001112 NEScheduler::get().schedule(_input_to_input_reduction.get(), Window::DimY);
1113 NEScheduler::get().schedule(_recurrent_to_input_reduction.get(), Window::DimY);
Michele Di Giorgio47a89902020-03-09 19:32:33 +00001114
1115 _input_to_input_weights_transposed.allocator()->allocate();
1116 _recurrent_to_input_weights_transposed.allocator()->allocate();
1117 _transpose_input_to_input_weights.run();
1118 _transpose_recurrent_to_input_weights.run();
1119 _input_to_input_weights->mark_as_unused();
1120 _recurrent_to_input_weights->mark_as_unused();
1121 }
1122 _input_to_forget_eff_bias.allocator()->allocate();
1123 _recurrent_to_forget_eff_bias.allocator()->allocate();
1124 _input_to_cell_eff_bias.allocator()->allocate();
1125 _recurrent_to_cell_eff_bias.allocator()->allocate();
1126 _input_to_output_eff_bias.allocator()->allocate();
1127 _recurrent_to_output_eff_bias.allocator()->allocate();
Michalis Spyrouebcebf12020-10-21 00:04:14 +01001128 NEScheduler::get().schedule(_input_to_forget_reduction.get(), Window::DimY);
1129 NEScheduler::get().schedule(_recurrent_to_forget_reduction.get(), Window::DimY);
1130 NEScheduler::get().schedule(_input_to_cell_reduction.get(), Window::DimY);
1131 NEScheduler::get().schedule(_recurrent_to_cell_reduction.get(), Window::DimY);
1132 NEScheduler::get().schedule(_input_to_output_reduction.get(), Window::DimY);
1133 NEScheduler::get().schedule(_recurrent_to_output_reduction.get(), Window::DimY);
Michele Di Giorgio47a89902020-03-09 19:32:33 +00001134
1135 if(_has_projection)
1136 {
Michele Di Giorgio11c562c2020-06-10 16:34:50 +01001137 _projection_eff_bias.allocator()->allocate();
Michalis Spyrouebcebf12020-10-21 00:04:14 +01001138 NEScheduler::get().schedule(_projection_reduction.get(), Window::DimY);
Michele Di Giorgio47a89902020-03-09 19:32:33 +00001139 if(_projection_bias != nullptr)
1140 {
Michalis Spyrou173ba9b2020-06-23 17:25:43 +01001141 _projection_bias_add.run();
Michele Di Giorgio47a89902020-03-09 19:32:33 +00001142 _projection_bias->mark_as_unused();
1143 }
1144
1145 _projection_weights_transposed.allocator()->allocate();
1146 _transpose_projection_weights.run();
1147 _projection_weights->mark_as_unused();
Sang-Hoon Parkd5c020a2020-05-06 21:01:19 +01001148
1149 if(!_projection_tensor_copy_required)
1150 {
1151 _hidden_gate.mark_as_unused();
Sang-Hoon Parkd5c020a2020-05-06 21:01:19 +01001152 _projection_accumulate_res.mark_as_unused();
1153 }
Michele Di Giorgio47a89902020-03-09 19:32:33 +00001154 }
1155
1156 // Mark weights as unused
1157 _input_to_forget_weights->mark_as_unused();
1158 _input_to_cell_weights->mark_as_unused();
1159 _input_to_output_weights->mark_as_unused();
1160 _recurrent_to_forget_weights->mark_as_unused();
1161 _recurrent_to_cell_weights->mark_as_unused();
1162 _recurrent_to_output_weights->mark_as_unused();
1163
1164 _is_prepared = true;
1165 }
1166}
Michele Di Giorgio47a89902020-03-09 19:32:33 +00001167} // namespace arm_compute