blob: c49347166772c1396d75dba6dcbef1f1feab137a [file] [log] [blame]
Michele Di Giorgio1c1b3aa2020-04-02 17:35:42 +01001/*
Michele Di Giorgiod9eaf612020-07-08 11:12:57 +01002 * Copyright (c) 2020 Arm Limited.
Michele Di Giorgio1c1b3aa2020-04-02 17:35:42 +01003 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24#include "arm_compute/runtime/CL/functions/CLQLSTMLayer.h"
25
26#include "arm_compute/core/KernelDescriptors.h"
27#include "arm_compute/core/QuantizationInfo.h"
28#include "arm_compute/core/Utils.h"
29#include "arm_compute/core/Validate.h"
30#include "arm_compute/core/utils/misc/InfoHelpers.h"
31#include "arm_compute/core/utils/quantization/AsymmHelpers.h"
32#include "arm_compute/runtime/CL/CLScheduler.h"
Sang-Hoon Park68dd25f2020-10-19 16:00:11 +010033#include "src/core/helpers/WindowHelpers.h"
Michele Di Giorgio1c1b3aa2020-04-02 17:35:42 +010034
35namespace arm_compute
36{
37using namespace arm_compute::utils::info_helpers;
38namespace
39{
40Status validate_mm(GEMMLowpOutputStageInfo &gemmlowp_info, const ITensorInfo *mm_input, const ITensorInfo *mm_weights, const ITensorInfo *bias,
41 float gemmlowp_scale, const TensorInfo *mm_res_info, const TensorInfo *outstage_tensor_info)
42{
43 ARM_COMPUTE_RETURN_ON_ERROR(CLGEMMLowpMatrixMultiplyCore::validate(mm_input, mm_weights, nullptr, mm_res_info));
44 ARM_COMPUTE_RETURN_ON_ERROR(quantization::calculate_quantized_multiplier(gemmlowp_scale, &gemmlowp_info.gemmlowp_multiplier, &gemmlowp_info.gemmlowp_shift));
45 ARM_COMPUTE_RETURN_ON_ERROR(CLGEMMLowpOutputStage::validate(mm_res_info, bias, outstage_tensor_info, gemmlowp_info));
46 return Status{};
47}
48} // namespace
49
Sang-Hoon Parka7431ae2020-05-12 11:13:30 +010050Status CLQLSTMLayer::TensorCopyKernel::validate(const ITensorInfo &src, const ITensorInfo &dst)
51{
52 ARM_COMPUTE_RETURN_ERROR_ON(src.tensor_shape().num_dimensions() > max_dimension_supported);
53 ARM_COMPUTE_RETURN_ERROR_ON(dst.tensor_shape().num_dimensions() > max_dimension_supported);
54 ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(&src, &dst);
55 ARM_COMPUTE_RETURN_ERROR_ON(dst.tensor_shape().y() != src.tensor_shape().y());
56 return Status{};
57}
58
59void CLQLSTMLayer::TensorCopyKernel::configure(ICLTensor &src, ICLTensor &dst)
60{
61 ARM_COMPUTE_ERROR_THROW_ON(CLQLSTMLayer::TensorCopyKernel::validate(*src.info(), *dst.info()));
62 _src = &src;
63 _dst = &dst;
64 _row_size = std::min(_src->info()->tensor_shape().x(), _dst->info()->tensor_shape().x());
65 _window = calculate_max_window(*_src->info(), Steps());
66}
67
68void CLQLSTMLayer::TensorCopyKernel::run()
69{
70 auto &q = CLScheduler::get().queue();
71
72 _src->map(q, true);
73 _dst->map(q, true);
74
75 Iterator input_iter{ _src, _window };
76 Iterator output_iter{ _dst, _window };
77
78 execute_window_loop(_window, [&](const Coordinates &)
79 {
80 memcpy(output_iter.ptr(), input_iter.ptr(), _row_size);
81 },
82 input_iter, output_iter);
83
84 _src->unmap(q);
85 _dst->unmap(q);
86}
87
Michele Di Giorgio1c1b3aa2020-04-02 17:35:42 +010088CLQLSTMLayer::CLQLSTMLayer(std::shared_ptr<IMemoryManager> memory_manager)
89{
90 _memory_group = MemoryGroup(std::move(memory_manager));
91}
92
Manuel Bottini2b84be52020-04-08 10:15:51 +010093void CLQLSTMLayer::configure_mm(const CLCompileContext &compile_context, CLGEMMLowpMatrixMultiplyCore &mm, CLGEMMLowpOutputStage &outstage, GEMMLowpOutputStageInfo &gemmlowp_info,
Michele Di Giorgio1c1b3aa2020-04-02 17:35:42 +010094 const ICLTensor *mm_input, const ICLTensor *mm_weights, const ICLTensor *bias,
95 CLTensor *mm_res, CLTensor *outstage_res, float gemmlowp_scale,
96 const TensorInfo &mm_res_info, const TensorInfo &outstage_tensor_info)
97{
98 _memory_group.manage(mm_res);
99 _memory_group.manage(outstage_res);
100
101 mm_res->allocator()->init(mm_res_info);
102 outstage_res->allocator()->init(outstage_tensor_info);
103
104 // Configure matrix-multiplication
Manuel Bottini2b84be52020-04-08 10:15:51 +0100105 mm.configure(compile_context, mm_input, mm_weights, nullptr, mm_res);
Michele Di Giorgio1c1b3aa2020-04-02 17:35:42 +0100106
107 // Configure output stage
108 quantization::calculate_quantized_multiplier(gemmlowp_scale, &gemmlowp_info.gemmlowp_multiplier, &gemmlowp_info.gemmlowp_shift);
Manuel Bottini2b84be52020-04-08 10:15:51 +0100109 outstage.configure(compile_context, mm_res, bias, outstage_res, gemmlowp_info);
Michele Di Giorgio1c1b3aa2020-04-02 17:35:42 +0100110 mm_res->allocator()->allocate();
111}
112
113void CLQLSTMLayer::configure(const ICLTensor *input,
114 const ICLTensor *input_to_forget_weights, const ICLTensor *input_to_cell_weights, const ICLTensor *input_to_output_weights,
115 const ICLTensor *recurrent_to_forget_weights, const ICLTensor *recurrent_to_cell_weights, const ICLTensor *recurrent_to_output_weights,
116 const ICLTensor *forget_gate_bias, const ICLTensor *cell_bias, const ICLTensor *output_gate_bias,
Sang-Hoon Park840a72c2020-09-23 13:24:13 +0100117 ICLTensor *cell_state_in, ICLTensor *output_state_in,
Michele Di Giorgiobeb2d452020-05-11 16:17:51 +0100118 ICLTensor *cell_state_out, ICLTensor *output_state_out, ICLTensor *output,
Michele Di Giorgio1c1b3aa2020-04-02 17:35:42 +0100119 const LSTMParams<ICLTensor> &lstm_params)
120{
Manuel Bottini2b84be52020-04-08 10:15:51 +0100121 configure(CLKernelLibrary::get().get_compile_context(), input, input_to_forget_weights, input_to_cell_weights, input_to_output_weights,
122 recurrent_to_forget_weights, recurrent_to_cell_weights, recurrent_to_output_weights, forget_gate_bias, cell_bias, output_gate_bias,
Michalis Spyroue6bd70c2020-05-21 15:10:25 +0100123 cell_state_in, output_state_in, cell_state_out, output_state_out, output, lstm_params);
Manuel Bottini2b84be52020-04-08 10:15:51 +0100124}
125
126void CLQLSTMLayer::configure(const CLCompileContext &compile_context, const ICLTensor *input,
127 const ICLTensor *input_to_forget_weights, const ICLTensor *input_to_cell_weights, const ICLTensor *input_to_output_weights,
128 const ICLTensor *recurrent_to_forget_weights, const ICLTensor *recurrent_to_cell_weights, const ICLTensor *recurrent_to_output_weights,
129 const ICLTensor *forget_gate_bias, const ICLTensor *cell_bias, const ICLTensor *output_gate_bias,
Sang-Hoon Park840a72c2020-09-23 13:24:13 +0100130 ICLTensor *cell_state_in, ICLTensor *output_state_in,
Michele Di Giorgiobeb2d452020-05-11 16:17:51 +0100131 ICLTensor *cell_state_out, ICLTensor *output_state_out, ICLTensor *output,
Manuel Bottini2b84be52020-04-08 10:15:51 +0100132 const LSTMParams<ICLTensor> &lstm_params)
133{
Michele Di Giorgio1c1b3aa2020-04-02 17:35:42 +0100134 ARM_COMPUTE_ERROR_ON_NULLPTR(input, input_to_forget_weights, input_to_cell_weights, input_to_output_weights,
135 recurrent_to_forget_weights, recurrent_to_cell_weights, recurrent_to_output_weights,
Michele Di Giorgiobeb2d452020-05-11 16:17:51 +0100136 forget_gate_bias, cell_bias, output_gate_bias, cell_state_in, output_state_in,
137 cell_state_out, output_state_out, output);
Michele Di Giorgio1c1b3aa2020-04-02 17:35:42 +0100138
139 // Set lstm parameters
140 LSTMParams<ITensorInfo> lstm_params_info{};
141 build_lstm_params_tensor_info(lstm_params, &lstm_params_info);
142
143 // Validate
144 ARM_COMPUTE_ERROR_THROW_ON(CLQLSTMLayer::validate(input->info(), input_to_forget_weights->info(), input_to_cell_weights->info(), input_to_output_weights->info(),
145 recurrent_to_forget_weights->info(), recurrent_to_cell_weights->info(), recurrent_to_output_weights->info(),
146 forget_gate_bias->info(), cell_bias->info(), output_gate_bias->info(),
Michele Di Giorgiobeb2d452020-05-11 16:17:51 +0100147 cell_state_in->info(), output_state_in->info(), cell_state_out->info(), output_state_out->info(), output->info(),
148 lstm_params_info));
Michele Di Giorgio1c1b3aa2020-04-02 17:35:42 +0100149
Sang-Hoon Parka7431ae2020-05-12 11:13:30 +0100150 const int batch_size = input->info()->dimension(1);
151 const int num_units = input_to_output_weights->info()->dimension(1);
152 const int output_size = output_state_out->info()->dimension(_out_state_output_size_dimension_idx);
Michele Di Giorgio1c1b3aa2020-04-02 17:35:42 +0100153
154 const UniformQuantizationInfo qinput = input->info()->quantization_info().uniform();
155 const UniformQuantizationInfo qcell_state_in = cell_state_in->info()->quantization_info().uniform();
156 const UniformQuantizationInfo qoutput_state_in = output_state_in->info()->quantization_info().uniform();
157
158 _projection_bias = lstm_params.projection_bias();
159 _input_to_forget_weights = input_to_forget_weights;
160 _input_to_cell_weights = input_to_cell_weights;
161 _input_to_output_weights = input_to_output_weights;
162 _recurrent_to_forget_weights = recurrent_to_forget_weights;
163 _recurrent_to_cell_weights = recurrent_to_cell_weights;
164 _recurrent_to_output_weights = recurrent_to_output_weights;
165 _projection_weights = lstm_params.projection_weights();
166
Sheri Zhang3a353982020-04-21 13:10:24 +0100167 // Layer normalization
168 _has_layer_norm = lstm_params.use_layer_norm();
169 if(_has_layer_norm)
170 {
171 set_layer_norm_weight(lstm_params.forget_layer_norm_weights(), LayerNormGate::Forget);
172 set_layer_norm_weight(lstm_params.cell_layer_norm_weights(), LayerNormGate::Cell);
173 set_layer_norm_weight(lstm_params.input_layer_norm_weights(), LayerNormGate::Input);
174 set_layer_norm_weight(lstm_params.output_layer_norm_weights(), LayerNormGate::Output);
175
176 set_layer_norm_bias(forget_gate_bias, LayerNormGate::Forget);
177 set_layer_norm_bias(cell_bias, LayerNormGate::Cell);
178 set_layer_norm_bias(lstm_params.input_gate_bias(), LayerNormGate::Input);
179 set_layer_norm_bias(output_gate_bias, LayerNormGate::Output);
180 }
181
Michele Di Giorgio1c1b3aa2020-04-02 17:35:42 +0100182 _has_cifg = lstm_params.has_cifg_opt();
183 _has_projection = lstm_params.has_projection();
184 _has_peephole = lstm_params.has_peephole_opt();
185
186 // Calculate and decompose effective scales for optimizing matmul calculation
187 const int32_t cell_shift = log2(qcell_state_in.scale);
188
189 // Calculate quantized parameters for clipping.
190 int16_t quantized_cell_clip = 0;
191 if(lstm_params.cell_clip() > 0.0f)
192 {
193 quantized_cell_clip = quantize_qsymm16(lstm_params.cell_clip(), qcell_state_in);
194 }
195 _has_cell_clipping = quantized_cell_clip > 0;
196
197 // Precompute effective bias for optimizing the matmul computations.
198 if(!_has_cifg)
199 {
200 _input_to_input_weights = lstm_params.input_to_input_weights();
201 _recurrent_to_input_weights = lstm_params.recurrent_to_input_weights();
202
Manuel Bottini2b84be52020-04-08 10:15:51 +0100203 _input_to_input_reduction.configure(compile_context, _input_to_input_weights, &_input_to_input_eff_bias, GEMMLowpReductionKernelInfo(num_units, false, -qinput.offset, true));
204 _recurrent_to_input_reduction.configure(compile_context, _recurrent_to_input_weights, &_recurrent_to_input_eff_bias, GEMMLowpReductionKernelInfo(num_units, false, -qoutput_state_in.offset, true));
Michele Di Giorgio1c1b3aa2020-04-02 17:35:42 +0100205 }
Manuel Bottini2b84be52020-04-08 10:15:51 +0100206 _input_to_forget_reduction.configure(compile_context, input_to_forget_weights, &_input_to_forget_eff_bias, GEMMLowpReductionKernelInfo(num_units, false, -qinput.offset, true));
207 _recurrent_to_forget_reduction.configure(compile_context, recurrent_to_forget_weights, &_recurrent_to_forget_eff_bias, GEMMLowpReductionKernelInfo(num_units, false, -qoutput_state_in.offset, true));
208 _input_to_cell_reduction.configure(compile_context, input_to_cell_weights, &_input_to_cell_eff_bias, GEMMLowpReductionKernelInfo(num_units, false, -qinput.offset, true));
209 _recurrent_to_cell_reduction.configure(compile_context, recurrent_to_cell_weights, &_recurrent_to_cell_eff_bias, GEMMLowpReductionKernelInfo(num_units, false, -qoutput_state_in.offset, true));
210 _input_to_output_reduction.configure(compile_context, input_to_output_weights, &_input_to_output_eff_bias, GEMMLowpReductionKernelInfo(num_units, false, -qinput.offset, true));
211 _recurrent_to_output_reduction.configure(compile_context, recurrent_to_output_weights, &_recurrent_to_output_eff_bias, GEMMLowpReductionKernelInfo(num_units, false, -qoutput_state_in.offset, true));
Sang-Hoon Parka7431ae2020-05-12 11:13:30 +0100212 if(_has_projection)
Michele Di Giorgio1c1b3aa2020-04-02 17:35:42 +0100213 {
Sang-Hoon Parka7431ae2020-05-12 11:13:30 +0100214 _projection_reduction.configure(compile_context, _projection_weights, &_projection_eff_bias, GEMMLowpReductionKernelInfo(output_size, false, lstm_params.hidden_state_zero(), true));
Michele Di Giorgio11c562c2020-06-10 16:34:50 +0100215 if(_projection_bias != nullptr)
216 {
Michalis Spyrouad7515d2020-07-24 00:02:23 +0100217 _projection_bias_add.configure(compile_context, _projection_bias, &_projection_eff_bias, &_projection_eff_bias, ConvertPolicy::SATURATE);
Michele Di Giorgio11c562c2020-06-10 16:34:50 +0100218 }
Michele Di Giorgio1c1b3aa2020-04-02 17:35:42 +0100219 }
220
221 // Pre-transpose weights to be used in GEMM.
Manuel Bottini2b84be52020-04-08 10:15:51 +0100222 _transpose_input_to_forget_weights.configure(compile_context, input_to_forget_weights, &_input_to_forget_weights_transposed);
223 _transpose_input_to_cell_weights.configure(compile_context, input_to_cell_weights, &_input_to_cell_weights_transposed);
224 _transpose_input_to_output_weights.configure(compile_context, input_to_output_weights, &_input_to_output_weights_transposed);
225 _transpose_recurrent_to_forget_weights.configure(compile_context, recurrent_to_forget_weights, &_recurrent_to_forget_weights_transposed);
226 _transpose_recurrent_to_cell_weights.configure(compile_context, recurrent_to_cell_weights, &_recurrent_to_cell_weights_transposed);
227 _transpose_recurrent_to_output_weights.configure(compile_context, recurrent_to_output_weights, &_recurrent_to_output_weights_transposed);
Michele Di Giorgio1c1b3aa2020-04-02 17:35:42 +0100228 if(!_has_cifg)
229 {
Manuel Bottini2b84be52020-04-08 10:15:51 +0100230 _transpose_input_to_input_weights.configure(compile_context, lstm_params.input_to_input_weights(), &_input_to_input_weights_transposed);
231 _transpose_recurrent_to_input_weights.configure(compile_context, lstm_params.recurrent_to_input_weights(), &_recurrent_to_input_weights_transposed);
Michele Di Giorgio1c1b3aa2020-04-02 17:35:42 +0100232 }
233 if(_has_projection)
234 {
Manuel Bottini2b84be52020-04-08 10:15:51 +0100235 _transpose_projection_weights.configure(compile_context, _projection_weights, &_projection_weights_transposed);
Michele Di Giorgio1c1b3aa2020-04-02 17:35:42 +0100236 }
237
238 GEMMLowpOutputStageInfo gemmlowp_info;
239 gemmlowp_info.type = GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT;
240 gemmlowp_info.gemmlowp_min_bound = std::numeric_limits<int16_t>::lowest();
241 gemmlowp_info.gemmlowp_max_bound = std::numeric_limits<int16_t>::max();
242 gemmlowp_info.output_data_type = DataType::QSYMM16;
243
244 const TensorInfo mm_out_info(TensorShape(num_units, batch_size), 1, DataType::S32);
245 // Forget gate.
246 const TensorInfo forget_gate_outstage_info(mm_out_info.tensor_shape(), 1, DataType::QSYMM16, QuantizationInfo(lstm_params.forget_intermediate_scale(), 0));
247 const float input_to_forget_scale = input_to_forget_weights->info()->quantization_info().uniform().scale * qinput.scale / lstm_params.forget_intermediate_scale();
Manuel Bottini2b84be52020-04-08 10:15:51 +0100248 configure_mm(compile_context, _mm_input_to_forget, _input_to_forget_outstage, gemmlowp_info,
Michele Di Giorgio1c1b3aa2020-04-02 17:35:42 +0100249 input, &_input_to_forget_weights_transposed, &_input_to_forget_eff_bias,
250 &_mm_input_to_forget_res, &_input_to_forget_outstage_res, input_to_forget_scale,
251 mm_out_info, forget_gate_outstage_info);
252
253 const float recurrent_to_forget_scale = recurrent_to_forget_weights->info()->quantization_info().uniform().scale * qoutput_state_in.scale / lstm_params.forget_intermediate_scale();
Manuel Bottini2b84be52020-04-08 10:15:51 +0100254 configure_mm(compile_context, _mm_recurrent_to_forget, _recurrent_to_forget_outstage, gemmlowp_info,
Michele Di Giorgio1c1b3aa2020-04-02 17:35:42 +0100255 output_state_in, &_recurrent_to_forget_weights_transposed, &_recurrent_to_forget_eff_bias,
256 &_mm_recurrent_to_forget_res, &_recurrent_to_forget_outstage_res, recurrent_to_forget_scale,
257 mm_out_info, forget_gate_outstage_info);
258
Michalis Spyrouad7515d2020-07-24 00:02:23 +0100259 _accumulate_input_recurrent_forget.configure(compile_context, &_input_to_forget_outstage_res, &_recurrent_to_forget_outstage_res, &_recurrent_to_forget_outstage_res,
Manuel Bottini2b84be52020-04-08 10:15:51 +0100260 ConvertPolicy::SATURATE);
Michele Di Giorgio1c1b3aa2020-04-02 17:35:42 +0100261 _input_to_forget_outstage_res.allocator()->allocate();
262
263 if(_has_peephole)
264 {
Sang-Hoon Parka7431ae2020-05-12 11:13:30 +0100265 _mul_cell_to_forget_res.allocator()->init(TensorInfo(cell_state_in->info()->tensor_shape(), 1, DataType::S32));
Michele Di Giorgio1c1b3aa2020-04-02 17:35:42 +0100266 _memory_group.manage(&_mul_cell_to_forget_res);
Manuel Bottini2b84be52020-04-08 10:15:51 +0100267 _pixelwise_mul_cell_to_forget.configure(compile_context, cell_state_in, lstm_params.cell_to_forget_weights(), &_mul_cell_to_forget_res, 1.f, ConvertPolicy::SATURATE, RoundingPolicy::TO_ZERO);
Michele Di Giorgio1c1b3aa2020-04-02 17:35:42 +0100268 _cell_to_forget_outstage_res.allocator()->init(TensorInfo(_mul_cell_to_forget_res.info()->tensor_shape(), 1, DataType::QSYMM16, QuantizationInfo(lstm_params.forget_intermediate_scale(), 0)));
269 _memory_group.manage(&_cell_to_forget_outstage_res);
270 const float cell_to_forget_scale = std::pow(2, cell_shift) * lstm_params.cell_to_forget_weights()->info()->quantization_info().uniform().scale / lstm_params.forget_intermediate_scale();
271 quantization::calculate_quantized_multiplier(cell_to_forget_scale, &gemmlowp_info.gemmlowp_multiplier, &gemmlowp_info.gemmlowp_shift);
Manuel Bottini2b84be52020-04-08 10:15:51 +0100272 _cell_to_forget_outstage.configure(compile_context, &_mul_cell_to_forget_res, nullptr, &_cell_to_forget_outstage_res, gemmlowp_info);
Michele Di Giorgio1c1b3aa2020-04-02 17:35:42 +0100273 _mul_cell_to_forget_res.allocator()->allocate();
Michalis Spyrouad7515d2020-07-24 00:02:23 +0100274 _accumulate_cell_forget.configure(compile_context, &_recurrent_to_forget_outstage_res, &_cell_to_forget_outstage_res, &_recurrent_to_forget_outstage_res,
Manuel Bottini2b84be52020-04-08 10:15:51 +0100275 ConvertPolicy::SATURATE);
Michele Di Giorgio1c1b3aa2020-04-02 17:35:42 +0100276 _cell_to_forget_outstage_res.allocator()->allocate();
277 }
278
Sheri Zhang3a353982020-04-21 13:10:24 +0100279 CLTensor *forget_activation_input = &_recurrent_to_forget_outstage_res;
280
281 if(_has_layer_norm)
282 {
283 configure_layer_norm(LayerNormGate::Forget, &_recurrent_to_forget_outstage_res);
284 _recurrent_to_forget_outstage_res.allocator()->allocate();
285 forget_activation_input = &get_layer_norm_output(LayerNormGate::Forget);
286 }
287
Michele Di Giorgio1c1b3aa2020-04-02 17:35:42 +0100288 // Output quantization info of Sigmoid and Tanh activations
289 const QuantizationInfo sigmoid_tanh_outqinfo(1.f / 32768.f, 0);
290
291 const TensorInfo forget_gate_info(TensorShape(num_units, batch_size), 1, DataType::QSYMM16, sigmoid_tanh_outqinfo);
292 _memory_group.manage(&_forget_gate);
293 _forget_gate.allocator()->init(forget_gate_info);
Sheri Zhang3a353982020-04-21 13:10:24 +0100294 _forget_gate_sigmoid.configure(compile_context, forget_activation_input, &_forget_gate, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LOGISTIC));
295 forget_activation_input->allocator()->allocate();
Michele Di Giorgio1c1b3aa2020-04-02 17:35:42 +0100296
297 // Modulation gate.
298 const TensorInfo cell_outstage_info(mm_out_info.tensor_shape(), 1, DataType::QSYMM16, QuantizationInfo(lstm_params.cell_intermediate_scale(), 0));
299 const float input_to_cell_scale = input_to_cell_weights->info()->quantization_info().uniform().scale * qinput.scale / lstm_params.cell_intermediate_scale();
Manuel Bottini2b84be52020-04-08 10:15:51 +0100300 configure_mm(compile_context, _mm_input_to_cell, _input_to_cell_outstage, gemmlowp_info,
Michele Di Giorgio1c1b3aa2020-04-02 17:35:42 +0100301 input, &_input_to_cell_weights_transposed, &_input_to_cell_eff_bias,
302 &_mm_input_to_cell_res, &_input_to_cell_outstage_res, input_to_cell_scale,
303 mm_out_info, cell_outstage_info);
304
305 const float recurrent_to_cell_scale = recurrent_to_cell_weights->info()->quantization_info().uniform().scale * qoutput_state_in.scale / lstm_params.cell_intermediate_scale();
Manuel Bottini2b84be52020-04-08 10:15:51 +0100306 configure_mm(compile_context, _mm_recurrent_to_cell, _recurrent_to_cell_outstage, gemmlowp_info,
Michele Di Giorgio1c1b3aa2020-04-02 17:35:42 +0100307 output_state_in, &_recurrent_to_cell_weights_transposed, &_recurrent_to_cell_eff_bias,
308 &_mm_recurrent_to_cell_res, &_recurrent_to_cell_outstage_res, recurrent_to_cell_scale,
309 mm_out_info, cell_outstage_info);
310
Michalis Spyrouad7515d2020-07-24 00:02:23 +0100311 _accumulate_input_recurrent_modulation.configure(compile_context, &_input_to_cell_outstage_res, &_recurrent_to_cell_outstage_res, &_recurrent_to_cell_outstage_res,
Manuel Bottini2b84be52020-04-08 10:15:51 +0100312 ConvertPolicy::SATURATE);
Michele Di Giorgio1c1b3aa2020-04-02 17:35:42 +0100313 _input_to_cell_outstage_res.allocator()->allocate();
314
Sheri Zhang3a353982020-04-21 13:10:24 +0100315 CLTensor *cell_activation_input = &_recurrent_to_cell_outstage_res;
316
317 if(_has_layer_norm)
318 {
319 configure_layer_norm(LayerNormGate::Cell, &_recurrent_to_cell_outstage_res);
320 _recurrent_to_cell_outstage_res.allocator()->allocate();
321 cell_activation_input = &get_layer_norm_output(LayerNormGate::Cell);
322 }
323
Michele Di Giorgio1c1b3aa2020-04-02 17:35:42 +0100324 const TensorInfo cell_gate_info(TensorShape(num_units, batch_size), 1, DataType::QSYMM16, sigmoid_tanh_outqinfo);
325 _memory_group.manage(&_cell_gate);
326 _cell_gate.allocator()->init(cell_gate_info);
Sheri Zhang3a353982020-04-21 13:10:24 +0100327 _cell_gate_tanh.configure(compile_context, cell_activation_input, &_cell_gate, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::TANH, 1.f, 1.f));
328 cell_activation_input->allocator()->allocate();
Michele Di Giorgio1c1b3aa2020-04-02 17:35:42 +0100329
330 // Input gate.
331 const TensorInfo input_gate_info(TensorShape(num_units, batch_size), 1, DataType::QSYMM16, sigmoid_tanh_outqinfo);
332 _input_gate.allocator()->init(input_gate_info);
333 _memory_group.manage(&_input_gate);
334 if(_has_cifg)
335 {
336 _ones.allocator()->init(*_forget_gate.info());
Michalis Spyrouad7515d2020-07-24 00:02:23 +0100337 _input_gate_sub.configure(compile_context, &_ones, &_forget_gate, &_input_gate, ConvertPolicy::SATURATE);
Michele Di Giorgio1c1b3aa2020-04-02 17:35:42 +0100338 _ones.allocator()->allocate();
339 }
340 else
341 {
342 const TensorInfo input_outstage_info(TensorShape(num_units, batch_size), 1, DataType::QSYMM16, QuantizationInfo(lstm_params.input_intermediate_scale(), 0));
343 const float input_to_input_scale = _input_to_input_weights->info()->quantization_info().uniform().scale * qinput.scale / lstm_params.input_intermediate_scale();
Manuel Bottini2b84be52020-04-08 10:15:51 +0100344 configure_mm(compile_context, _mm_input_to_input, _input_to_input_outstage, gemmlowp_info,
Michele Di Giorgio1c1b3aa2020-04-02 17:35:42 +0100345 input, &_input_to_input_weights_transposed, &_input_to_input_eff_bias,
346 &_mm_input_to_input_res, &_input_to_input_outstage_res, input_to_input_scale,
347 mm_out_info, input_outstage_info);
348
349 const float recurrent_to_input_scale = _recurrent_to_input_weights->info()->quantization_info().uniform().scale * qoutput_state_in.scale / lstm_params.input_intermediate_scale();
Manuel Bottini2b84be52020-04-08 10:15:51 +0100350 configure_mm(compile_context, _mm_recurrent_to_input, _recurrent_to_input_outstage, gemmlowp_info,
Sang-Hoon Parka7431ae2020-05-12 11:13:30 +0100351 output_state_in, &_recurrent_to_input_weights_transposed, &_recurrent_to_input_eff_bias,
Michele Di Giorgio1c1b3aa2020-04-02 17:35:42 +0100352 &_mm_recurrent_to_input_res, &_recurrent_to_input_outstage_res, recurrent_to_input_scale,
353 mm_out_info, input_outstage_info);
Michalis Spyrouad7515d2020-07-24 00:02:23 +0100354 _accumulate_input_recurrent_input.configure(compile_context, &_input_to_input_outstage_res, &_recurrent_to_input_outstage_res, &_recurrent_to_input_outstage_res,
Manuel Bottini2b84be52020-04-08 10:15:51 +0100355 ConvertPolicy::SATURATE);
Michele Di Giorgio1c1b3aa2020-04-02 17:35:42 +0100356 _input_to_input_outstage_res.allocator()->allocate();
357
358 if(_has_peephole)
359 {
Sang-Hoon Parka7431ae2020-05-12 11:13:30 +0100360 _mul_cell_to_input_res.allocator()->init(TensorInfo(cell_state_in->info()->tensor_shape(), 1, DataType::S32));
Michele Di Giorgio1c1b3aa2020-04-02 17:35:42 +0100361 _memory_group.manage(&_mul_cell_to_input_res);
Manuel Bottini2b84be52020-04-08 10:15:51 +0100362 _pixelwise_mul_cell_to_input.configure(compile_context, cell_state_in, lstm_params.cell_to_input_weights(), &_mul_cell_to_input_res, 1.f, ConvertPolicy::SATURATE, RoundingPolicy::TO_ZERO);
Michele Di Giorgio1c1b3aa2020-04-02 17:35:42 +0100363 const float cell_to_input_scale = std::pow(2, cell_shift) * lstm_params.cell_to_input_weights()->info()->quantization_info().uniform().scale / lstm_params.input_intermediate_scale();
364 quantization::calculate_quantized_multiplier(cell_to_input_scale, &gemmlowp_info.gemmlowp_multiplier, &gemmlowp_info.gemmlowp_shift);
365 _cell_to_input_outstage_res.allocator()->init(TensorInfo(_mul_cell_to_input_res.info()->tensor_shape(), 1, DataType::QSYMM16, QuantizationInfo(lstm_params.input_intermediate_scale(), 0)));
366 _memory_group.manage(&_cell_to_input_outstage_res);
Manuel Bottini2b84be52020-04-08 10:15:51 +0100367 _cell_to_input_outstage.configure(compile_context, &_mul_cell_to_input_res, nullptr, &_cell_to_input_outstage_res, gemmlowp_info);
Michele Di Giorgio1c1b3aa2020-04-02 17:35:42 +0100368 _mul_cell_to_input_res.allocator()->allocate();
Michalis Spyrouad7515d2020-07-24 00:02:23 +0100369 _accumulate_cell_input.configure(&_recurrent_to_input_outstage_res, &_cell_to_input_outstage_res, &_recurrent_to_input_outstage_res, ConvertPolicy::SATURATE);
Michele Di Giorgio1c1b3aa2020-04-02 17:35:42 +0100370 _cell_to_input_outstage_res.allocator()->allocate();
371 }
372
Sheri Zhang3a353982020-04-21 13:10:24 +0100373 CLTensor *input_activation_input = &_recurrent_to_input_outstage_res;
374
375 if(_has_layer_norm)
376 {
377 configure_layer_norm(LayerNormGate::Input, &_recurrent_to_input_outstage_res);
378 _recurrent_to_input_outstage_res.allocator()->allocate();
379 input_activation_input = &get_layer_norm_output(LayerNormGate::Input);
380 }
381
Sang-Hoon Parka7431ae2020-05-12 11:13:30 +0100382 _input_gate_sigmoid.configure(compile_context, input_activation_input, &_input_gate, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LOGISTIC));
Sheri Zhang3a353982020-04-21 13:10:24 +0100383 input_activation_input->allocator()->allocate();
Michele Di Giorgio1c1b3aa2020-04-02 17:35:42 +0100384 }
385 // Cell.
Michalis Spyrou1009e872020-07-27 12:48:34 +0100386 // TODO(COMPMID-3396): Perform multiplication in the quantized domain in CLPixelWiseMultiplication
Manuel Bottini2b84be52020-04-08 10:15:51 +0100387 _pixelwise_mul_forget_cell.configure(compile_context, &_forget_gate, cell_state_in, &_forget_gate, 1.f, ConvertPolicy::SATURATE, RoundingPolicy::TO_ZERO);
Michele Di Giorgio1c1b3aa2020-04-02 17:35:42 +0100388 const float cell_gate_scale = _cell_gate.info()->quantization_info().uniform().scale;
389 const float mul_input_cell_scale = cell_gate_scale * std::pow(2, 15 + cell_shift);
390 const TensorInfo mul_input_cell_info(TensorShape(num_units, batch_size), 1, DataType::QSYMM16, QuantizationInfo(mul_input_cell_scale, 0));
391 _memory_group.manage(&_mul_input_cell_res);
392 _mul_input_cell_res.allocator()->init(mul_input_cell_info);
Manuel Bottini2b84be52020-04-08 10:15:51 +0100393 _pixelwise_mul_input_cell.configure(compile_context, &_input_gate, &_cell_gate, &_mul_input_cell_res, 1.f, ConvertPolicy::SATURATE, RoundingPolicy::TO_ZERO);
Michele Di Giorgio1c1b3aa2020-04-02 17:35:42 +0100394 _cell_gate.allocator()->allocate();
Michalis Spyrouad7515d2020-07-24 00:02:23 +0100395 _add_forget_cell.configure(compile_context, &_forget_gate, &_mul_input_cell_res, cell_state_out, ConvertPolicy::SATURATE);
Michele Di Giorgio1c1b3aa2020-04-02 17:35:42 +0100396 _mul_input_cell_res.allocator()->allocate();
397 _forget_gate.allocator()->allocate();
398 if(_has_cell_clipping)
399 {
Manuel Bottini2b84be52020-04-08 10:15:51 +0100400 _cell_clip.configure(compile_context, cell_state_out, nullptr, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, -quantized_cell_clip, quantized_cell_clip));
Michele Di Giorgio1c1b3aa2020-04-02 17:35:42 +0100401 }
402 // Output gate.
403 const TensorInfo output_outstage_info(TensorShape(num_units, batch_size), 1, DataType::QSYMM16, QuantizationInfo(lstm_params.output_intermediate_scale(), 0));
404 const float input_to_output_scale = input_to_output_weights->info()->quantization_info().uniform().scale * qinput.scale / lstm_params.output_intermediate_scale();
Manuel Bottini2b84be52020-04-08 10:15:51 +0100405 configure_mm(compile_context, _mm_input_to_output, _input_to_output_outstage, gemmlowp_info,
Michele Di Giorgio1c1b3aa2020-04-02 17:35:42 +0100406 input, &_input_to_output_weights_transposed, &_input_to_output_eff_bias,
407 &_mm_input_to_output_res, &_input_to_output_outstage_res, input_to_output_scale,
408 mm_out_info, output_outstage_info);
409
410 const float recurrent_to_output_scale = recurrent_to_output_weights->info()->quantization_info().uniform().scale * qoutput_state_in.scale / lstm_params.output_intermediate_scale();
Manuel Bottini2b84be52020-04-08 10:15:51 +0100411 configure_mm(compile_context, _mm_recurrent_to_output, _recurrent_to_output_outstage, gemmlowp_info,
Michele Di Giorgio1c1b3aa2020-04-02 17:35:42 +0100412 output_state_in, &_recurrent_to_output_weights_transposed, &_recurrent_to_output_eff_bias,
413 &_mm_recurrent_to_output_res, &_recurrent_to_output_outstage_res, recurrent_to_output_scale,
414 mm_out_info, output_outstage_info);
415
Michalis Spyrouad7515d2020-07-24 00:02:23 +0100416 _accumulate_input_recurrent_output.configure(compile_context, &_recurrent_to_output_outstage_res, &_input_to_output_outstage_res, &_recurrent_to_output_outstage_res,
Manuel Bottini2b84be52020-04-08 10:15:51 +0100417 ConvertPolicy::SATURATE);
Michele Di Giorgio1c1b3aa2020-04-02 17:35:42 +0100418 _input_to_output_outstage_res.allocator()->allocate();
419
420 if(_has_peephole)
421 {
Michalis Spyrou1009e872020-07-27 12:48:34 +0100422 // TODO(COMPMID-3396): Perform multiplication in the quantized domain in CLPixelWiseMultiplication
Michele Di Giorgio1c1b3aa2020-04-02 17:35:42 +0100423 // Here we are not using the output stage because all operations are done in float
Sang-Hoon Parka7431ae2020-05-12 11:13:30 +0100424 _mul_cell_to_output_res.allocator()->init(TensorInfo(cell_state_out->info()->tensor_shape(), 1, DataType::S32));
Michele Di Giorgio1c1b3aa2020-04-02 17:35:42 +0100425 _memory_group.manage(&_mul_cell_to_output_res);
Manuel Bottini2b84be52020-04-08 10:15:51 +0100426 _pixelwise_mul_cell_to_output.configure(compile_context, cell_state_out, lstm_params.cell_to_output_weights(), &_mul_cell_to_output_res, 1.f, ConvertPolicy::SATURATE, RoundingPolicy::TO_ZERO);
Sang-Hoon Parka7431ae2020-05-12 11:13:30 +0100427
428 const float cell_to_output_scale = std::pow(2, cell_shift) * lstm_params.cell_to_output_weights()->info()->quantization_info().uniform().scale / lstm_params.output_intermediate_scale();
429 quantization::calculate_quantized_multiplier(cell_to_output_scale, &gemmlowp_info.gemmlowp_multiplier, &gemmlowp_info.gemmlowp_shift);
430 _cell_to_output_outstage_res.allocator()->init(TensorInfo(_mul_cell_to_output_res.info()->tensor_shape(), 1, DataType::QSYMM16, QuantizationInfo(lstm_params.output_intermediate_scale(), 0)));
431 _memory_group.manage(&_cell_to_output_outstage_res);
432 _cell_to_output_outstage.configure(compile_context, &_mul_cell_to_output_res, nullptr, &_cell_to_output_outstage_res, gemmlowp_info);
Michele Di Giorgio1c1b3aa2020-04-02 17:35:42 +0100433 _mul_cell_to_output_res.allocator()->allocate();
Sang-Hoon Parka7431ae2020-05-12 11:13:30 +0100434
Michalis Spyrouad7515d2020-07-24 00:02:23 +0100435 _accumulate_cell_to_output.configure(compile_context, &_recurrent_to_output_outstage_res, &_cell_to_output_outstage_res, &_recurrent_to_output_outstage_res,
Sang-Hoon Parka7431ae2020-05-12 11:13:30 +0100436 ConvertPolicy::SATURATE);
437 _cell_to_output_outstage_res.allocator()->allocate();
Michele Di Giorgio1c1b3aa2020-04-02 17:35:42 +0100438 }
439
Sheri Zhang3a353982020-04-21 13:10:24 +0100440 CLTensor *output_activation_input = &_recurrent_to_output_outstage_res;
441
442 if(_has_layer_norm)
443 {
444 configure_layer_norm(LayerNormGate::Output, &_recurrent_to_output_outstage_res);
445 _recurrent_to_output_outstage_res.allocator()->allocate();
446 output_activation_input = &get_layer_norm_output(LayerNormGate::Output);
447 }
448
Michele Di Giorgio1c1b3aa2020-04-02 17:35:42 +0100449 const TensorInfo output_gate_info(TensorShape(num_units, batch_size), 1, DataType::QSYMM16, sigmoid_tanh_outqinfo);
450 _memory_group.manage(&_output_gate);
451 _output_gate.allocator()->init(output_gate_info);
Sheri Zhang3a353982020-04-21 13:10:24 +0100452 _output_gate_sigmoid.configure(compile_context, output_activation_input, &_output_gate, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LOGISTIC));
453 output_activation_input->allocator()->allocate();
Michele Di Giorgio1c1b3aa2020-04-02 17:35:42 +0100454
455 // Hidden.
Manuel Bottini2b84be52020-04-08 10:15:51 +0100456 _hidden_tanh.configure(compile_context, cell_state_out, &_input_gate, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::TANH, 1.f, 1.f));
Michalis Spyrou1009e872020-07-27 12:48:34 +0100457 // TODO(COMPMID-3396): Perform multiplication in the quantized domain in CLPixelWiseMultiplication
Michele Di Giorgio1c1b3aa2020-04-02 17:35:42 +0100458 _memory_group.manage(&_hidden_mul_res);
459 const TensorInfo hidden_mul_res(_input_gate.info()->tensor_shape(), 1, DataType::S32);
460 _hidden_mul_res.allocator()->init(hidden_mul_res);
Manuel Bottini2b84be52020-04-08 10:15:51 +0100461 _pixelwise_mul_hidden.configure(compile_context, &_output_gate, &_input_gate, &_hidden_mul_res, 1.f, ConvertPolicy::SATURATE, RoundingPolicy::TO_ZERO);
Michele Di Giorgio1c1b3aa2020-04-02 17:35:42 +0100462 _output_gate.allocator()->allocate();
463 _input_gate.allocator()->allocate();
464 const float hidden_state_scale = std::pow(2, -15) / lstm_params.hidden_state_scale() * std::pow(2, -15);
465 quantization::calculate_quantized_multiplier(hidden_state_scale, &gemmlowp_info.gemmlowp_multiplier, &gemmlowp_info.gemmlowp_shift, /* ignore_epsilon */ true);
466 gemmlowp_info.gemmlowp_offset = lstm_params.hidden_state_zero();
467 gemmlowp_info.output_data_type = output_state_in->info()->data_type();
Sang-Hoon Parka7431ae2020-05-12 11:13:30 +0100468
469 _projection_tensor_copy_required = (num_units != output_size);
470 ICLTensor *hidden_gate_result = output_state_out;
471
472 _memory_group.manage(&_hidden_gate);
473
474 if(_projection_tensor_copy_required)
475 {
476 _hidden_gate.allocator()->init(*output_state_out->info());
477 _hidden_gate.info()->set_tensor_shape(_hidden_mul_res.info()->tensor_shape());
478 hidden_gate_result = &_hidden_gate;
479 }
480
481 _hidden_outstage.configure(compile_context, &_hidden_mul_res, nullptr, hidden_gate_result, gemmlowp_info);
Michele Di Giorgio1c1b3aa2020-04-02 17:35:42 +0100482 _hidden_mul_res.allocator()->allocate();
483
484 // Projection.
485 if(_has_projection)
486 {
487 const TensorInfo projection_outstage_info(*output_state_out->info());
488 const UniformQuantizationInfo qprojection = _projection_weights->info()->quantization_info().uniform();
489 const float projection_scale = qprojection.scale * lstm_params.hidden_state_scale() / qoutput_state_in.scale;
490 gemmlowp_info.gemmlowp_offset = qoutput_state_in.offset;
491 gemmlowp_info.gemmlowp_min_bound = std::numeric_limits<int8_t>::lowest();
492 gemmlowp_info.gemmlowp_max_bound = std::numeric_limits<int8_t>::max();
493 gemmlowp_info.output_data_type = DataType::QASYMM8_SIGNED;
494
Sang-Hoon Parka7431ae2020-05-12 11:13:30 +0100495 TensorInfo projection_mm_out_info{ mm_out_info };
496 projection_mm_out_info.set_tensor_shape(TensorShape(output_size, batch_size));
Michele Di Giorgio1c1b3aa2020-04-02 17:35:42 +0100497
Sang-Hoon Parka7431ae2020-05-12 11:13:30 +0100498 configure_mm(compile_context, _mm_projection, _projection_outstage, gemmlowp_info,
499 hidden_gate_result, &_projection_weights_transposed, &_projection_eff_bias,
500 &_mm_projection_res, &_projection_outstage_res, projection_scale,
501 projection_mm_out_info, projection_outstage_info);
502
503 ICLTensor *accumulate_destination = output_state_out;
504
505 if(_projection_tensor_copy_required)
506 {
507 _hidden_gate.allocator()->allocate();
Sang-Hoon Park840a72c2020-09-23 13:24:13 +0100508 _projection_accumulate_res.allocator()->init(*output_state_in->info());
Sang-Hoon Parka7431ae2020-05-12 11:13:30 +0100509 _projection_accumulate_res.info()->set_tensor_shape(_projection_outstage_res.info()->tensor_shape());
Sang-Hoon Park840a72c2020-09-23 13:24:13 +0100510 _projection_output_to_accumulate_copy.configure(*output_state_in, _projection_accumulate_res);
Sang-Hoon Parka7431ae2020-05-12 11:13:30 +0100511 accumulate_destination = &_projection_accumulate_res;
512 }
513
Michalis Spyrouad7515d2020-07-24 00:02:23 +0100514 _accumulate_projection.configure(compile_context, &_projection_outstage_res, accumulate_destination, accumulate_destination, ConvertPolicy::SATURATE);
Michele Di Giorgio1c1b3aa2020-04-02 17:35:42 +0100515 _projection_outstage_res.allocator()->allocate();
516
Sang-Hoon Parka7431ae2020-05-12 11:13:30 +0100517 if(_projection_tensor_copy_required)
518 {
519 _projection_accumulate_to_output_copy.configure(_projection_accumulate_res, *output_state_out);
520 _projection_accumulate_res.allocator()->allocate();
521 }
522
Michele Di Giorgio1c1b3aa2020-04-02 17:35:42 +0100523 int8_t quantized_projection_clip{ 0 };
524 if(lstm_params.projection_clip() > 0.0f)
525 {
526 quantized_projection_clip = utility::clamp<int8_t>(lstm_params.projection_clip() / qprojection.scale, -128, 127);
527 }
528
529 if(quantized_projection_clip > 0)
530 {
Manuel Bottini2b84be52020-04-08 10:15:51 +0100531 _projection_clip.configure(compile_context, output_state_out, nullptr, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, -quantized_projection_clip,
532 quantized_projection_clip));
Michele Di Giorgio1c1b3aa2020-04-02 17:35:42 +0100533 _has_projection_clipping = true;
534 }
535 }
Sang-Hoon Parka7431ae2020-05-12 11:13:30 +0100536 else
537 {
538 if(_projection_tensor_copy_required)
539 {
540 _hidden_to_output_copy.configure(_hidden_gate, *output_state_out);
541 _hidden_gate.allocator()->allocate();
542 }
543 }
Michele Di Giorgiobeb2d452020-05-11 16:17:51 +0100544
545 // Copy output_state_out to output
546 _copy_output.configure(compile_context, output_state_out, output);
Michele Di Giorgio1c1b3aa2020-04-02 17:35:42 +0100547}
548
549Status CLQLSTMLayer::validate(const ITensorInfo *input,
550 const ITensorInfo *input_to_forget_weights, const ITensorInfo *input_to_cell_weights, const ITensorInfo *input_to_output_weights,
551 const ITensorInfo *recurrent_to_forget_weights, const ITensorInfo *recurrent_to_cell_weights, const ITensorInfo *recurrent_to_output_weights,
552 const ITensorInfo *forget_gate_bias, const ITensorInfo *cell_bias, const ITensorInfo *output_gate_bias,
553 const ITensorInfo *cell_state_in, const ITensorInfo *output_state_in,
Michele Di Giorgiobeb2d452020-05-11 16:17:51 +0100554 const ITensorInfo *cell_state_out, const ITensorInfo *output_state_out, const ITensorInfo *output,
Michele Di Giorgio1c1b3aa2020-04-02 17:35:42 +0100555 const LSTMParams<ITensorInfo> &lstm_params)
556{
557 ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, input_to_forget_weights, input_to_cell_weights, input_to_output_weights, recurrent_to_forget_weights, recurrent_to_cell_weights,
Michele Di Giorgiobeb2d452020-05-11 16:17:51 +0100558 recurrent_to_output_weights, forget_gate_bias, cell_bias, output_gate_bias, cell_state_in, output_state_in,
559 cell_state_out, output_state_out, output);
Michele Di Giorgio1c1b3aa2020-04-02 17:35:42 +0100560
561 ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8_SIGNED);
562 ARM_COMPUTE_RETURN_ERROR_ON_MSG(input->num_dimensions() != 2, "Input must have exactly 2 dimensions");
563
564 const unsigned int input_size = input->dimension(0);
565 const unsigned int batch_size = input->dimension(1);
566 const unsigned int num_units = input_to_output_weights->dimension(1);
Sang-Hoon Parka7431ae2020-05-12 11:13:30 +0100567 const unsigned int output_size = output_state_out->dimension(_out_state_output_size_dimension_idx);
Michele Di Giorgio1c1b3aa2020-04-02 17:35:42 +0100568
569 ARM_COMPUTE_RETURN_ERROR_ON(input_to_output_weights->num_dimensions() != 2);
570 ARM_COMPUTE_RETURN_ERROR_ON(input_to_output_weights->dimension(0) != input_size);
571 ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(input_to_output_weights, input_to_forget_weights, input_to_cell_weights);
572 ARM_COMPUTE_RETURN_ERROR_ON(recurrent_to_output_weights->num_dimensions() != 2);
573 ARM_COMPUTE_RETURN_ERROR_ON(recurrent_to_output_weights->dimension(1) != num_units);
574 ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(recurrent_to_output_weights, recurrent_to_forget_weights, recurrent_to_cell_weights);
575 ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input_to_forget_weights, 1, DataType::QSYMM8);
576 ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input_to_forget_weights, input_to_cell_weights, input_to_output_weights,
577 recurrent_to_forget_weights, recurrent_to_cell_weights, recurrent_to_output_weights);
578
579 ARM_COMPUTE_RETURN_ERROR_ON(forget_gate_bias->num_dimensions() != 1);
580 ARM_COMPUTE_RETURN_ERROR_ON(forget_gate_bias->dimension(0) != num_units);
581 ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(forget_gate_bias, cell_bias, output_gate_bias);
582 ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(forget_gate_bias, 1, DataType::S32);
583 ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(forget_gate_bias, cell_bias, output_gate_bias);
584
585 ARM_COMPUTE_RETURN_ERROR_ON(cell_state_in->num_dimensions() != 2);
586 ARM_COMPUTE_RETURN_ERROR_ON(cell_state_in->dimension(0) != num_units);
587 ARM_COMPUTE_RETURN_ERROR_ON(cell_state_in->dimension(1) != batch_size);
588 ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(cell_state_in, 1, DataType::QSYMM16);
589
590 ARM_COMPUTE_RETURN_ERROR_ON(output_state_in->num_dimensions() != 2);
591 ARM_COMPUTE_RETURN_ERROR_ON(output_state_in->dimension(0) != output_size);
592 ARM_COMPUTE_RETURN_ERROR_ON(output_state_in->dimension(1) != batch_size);
593 ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output_state_in);
594
595 // Check whether peephole weights are all there or none
596 if(lstm_params.has_peephole_opt())
597 {
598 ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(lstm_params.cell_to_forget_weights(), lstm_params.cell_to_output_weights());
599 ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(lstm_params.cell_to_forget_weights(), 1, DataType::QSYMM16);
600 ARM_COMPUTE_RETURN_ERROR_ON(lstm_params.cell_to_forget_weights()->num_dimensions() != 1);
601 ARM_COMPUTE_RETURN_ERROR_ON(lstm_params.cell_to_forget_weights()->dimension(0) != num_units);
602 ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(lstm_params.cell_to_forget_weights(), lstm_params.cell_to_output_weights());
603 ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(lstm_params.cell_to_forget_weights(), lstm_params.cell_to_output_weights());
604
605 if(!lstm_params.has_cifg_opt())
606 {
607 ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(lstm_params.cell_to_input_weights());
608 ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(lstm_params.cell_to_forget_weights(), lstm_params.cell_to_input_weights());
609 ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(lstm_params.cell_to_forget_weights(), lstm_params.cell_to_input_weights());
610 }
611 }
612
613 const UniformQuantizationInfo qinput = input->quantization_info().uniform();
614 const UniformQuantizationInfo qcell_state_in = cell_state_in->quantization_info().uniform();
615 const UniformQuantizationInfo qoutput_state_in = output_state_in->quantization_info().uniform();
616
617 // Calculate and decompose effective scales for optimizing matmul calculation
618 const int32_t cell_shift = log2(qcell_state_in.scale);
619 ARM_COMPUTE_RETURN_ERROR_ON(cell_shift > -9);
620
621 // Calculate quantized parameters for clipping.
622 int16_t quantized_cell_clip = 0;
623 if(lstm_params.cell_clip() > 0.0f)
624 {
625 quantized_cell_clip = quantize_qsymm16(lstm_params.cell_clip(), qcell_state_in);
626 }
627
628 // Precompute effective bias for optimizing the matmul computations.
629 const TensorInfo eff_bias_info(TensorShape(num_units), 1, DataType::S32);
Sang-Hoon Parka7431ae2020-05-12 11:13:30 +0100630 const TensorInfo projection_eff_bias_info(TensorShape(output_size), 1, DataType::S32);
Michele Di Giorgio1c1b3aa2020-04-02 17:35:42 +0100631 if(!lstm_params.has_cifg_opt())
632 {
633 ARM_COMPUTE_RETURN_ON_ERROR(CLGEMMLowpMatrixAReductionKernel::validate(lstm_params.input_to_input_weights(), &eff_bias_info, GEMMLowpReductionKernelInfo(num_units, false, -qinput.offset, true)));
634 ARM_COMPUTE_RETURN_ON_ERROR(CLGEMMLowpMatrixAReductionKernel::validate(lstm_params.recurrent_to_input_weights(), &eff_bias_info, GEMMLowpReductionKernelInfo(num_units, false, -qoutput_state_in.offset,
635 true)));
636 }
637 ARM_COMPUTE_RETURN_ON_ERROR(CLGEMMLowpMatrixAReductionKernel::validate(input_to_forget_weights, &eff_bias_info, GEMMLowpReductionKernelInfo(num_units, false, -qinput.offset, true)));
638 ARM_COMPUTE_RETURN_ON_ERROR(CLGEMMLowpMatrixAReductionKernel::validate(recurrent_to_forget_weights, &eff_bias_info, GEMMLowpReductionKernelInfo(num_units, false, -qoutput_state_in.offset, true)));
639 ARM_COMPUTE_RETURN_ON_ERROR(CLGEMMLowpMatrixAReductionKernel::validate(input_to_cell_weights, &eff_bias_info, GEMMLowpReductionKernelInfo(num_units, false, -qinput.offset, true)));
640 ARM_COMPUTE_RETURN_ON_ERROR(CLGEMMLowpMatrixAReductionKernel::validate(recurrent_to_cell_weights, &eff_bias_info, GEMMLowpReductionKernelInfo(num_units, false, -qoutput_state_in.offset, true)));
641 ARM_COMPUTE_RETURN_ON_ERROR(CLGEMMLowpMatrixAReductionKernel::validate(input_to_output_weights, &eff_bias_info, GEMMLowpReductionKernelInfo(num_units, false, -qinput.offset, true)));
642 ARM_COMPUTE_RETURN_ON_ERROR(CLGEMMLowpMatrixAReductionKernel::validate(recurrent_to_output_weights, &eff_bias_info, GEMMLowpReductionKernelInfo(num_units, false, -qoutput_state_in.offset, true)));
Sang-Hoon Parka7431ae2020-05-12 11:13:30 +0100643 if(lstm_params.has_projection())
Michele Di Giorgio1c1b3aa2020-04-02 17:35:42 +0100644 {
Sang-Hoon Parka7431ae2020-05-12 11:13:30 +0100645 ARM_COMPUTE_RETURN_ON_ERROR(CLGEMMLowpMatrixAReductionKernel::validate(lstm_params.projection_weights(), &projection_eff_bias_info, GEMMLowpReductionKernelInfo(output_size, false,
646 lstm_params.hidden_state_zero(),
Michele Di Giorgio1c1b3aa2020-04-02 17:35:42 +0100647 true)));
Michele Di Giorgio11c562c2020-06-10 16:34:50 +0100648 if(lstm_params.projection_bias() != nullptr)
649 {
650 ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(lstm_params.projection_bias(), 1, DataType::S32);
Michalis Spyrouad7515d2020-07-24 00:02:23 +0100651 ARM_COMPUTE_RETURN_ON_ERROR(CLArithmeticAddition::validate(lstm_params.projection_bias(), &projection_eff_bias_info,
652 &projection_eff_bias_info, ConvertPolicy::SATURATE));
Michele Di Giorgio11c562c2020-06-10 16:34:50 +0100653 }
Michele Di Giorgio1c1b3aa2020-04-02 17:35:42 +0100654 }
655
656 const TensorInfo input_weights_transposed(TensorShape(num_units, input_size), 1, input_to_forget_weights->data_type(), input_to_forget_weights->quantization_info());
657 const TensorInfo recurrent_weights_transposed(TensorShape(num_units, output_size), 1, recurrent_to_forget_weights->data_type(), recurrent_to_forget_weights->quantization_info());
658
659 // Validate weights transpose
660 ARM_COMPUTE_RETURN_ON_ERROR(CLTranspose::validate(input_to_forget_weights, &input_weights_transposed));
661 ARM_COMPUTE_RETURN_ON_ERROR(CLTranspose::validate(input_to_cell_weights, &input_weights_transposed));
662 ARM_COMPUTE_RETURN_ON_ERROR(CLTranspose::validate(input_to_output_weights, &input_weights_transposed));
663 ARM_COMPUTE_RETURN_ON_ERROR(CLTranspose::validate(recurrent_to_forget_weights, &recurrent_weights_transposed));
664 ARM_COMPUTE_RETURN_ON_ERROR(CLTranspose::validate(recurrent_to_cell_weights, &recurrent_weights_transposed));
665 ARM_COMPUTE_RETURN_ON_ERROR(CLTranspose::validate(recurrent_to_output_weights, &recurrent_weights_transposed));
666 if(!lstm_params.has_cifg_opt())
667 {
668 ARM_COMPUTE_RETURN_ON_ERROR(CLTranspose::validate(lstm_params.input_to_input_weights(), &input_weights_transposed));
669 ARM_COMPUTE_RETURN_ON_ERROR(CLTranspose::validate(lstm_params.recurrent_to_input_weights(), &recurrent_weights_transposed));
670 }
671 if(lstm_params.has_projection())
672 {
Sang-Hoon Parka7431ae2020-05-12 11:13:30 +0100673 const TensorInfo projection_weights_transposed(TensorShape(output_size, num_units), 1, lstm_params.projection_weights()->data_type(), lstm_params.projection_weights()->quantization_info());
674 ARM_COMPUTE_RETURN_ON_ERROR(CLTranspose::validate(lstm_params.projection_weights(), &projection_weights_transposed));
Michele Di Giorgio1c1b3aa2020-04-02 17:35:42 +0100675 }
676
677 GEMMLowpOutputStageInfo gemmlowp_info;
678 gemmlowp_info.type = GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT;
679 gemmlowp_info.gemmlowp_min_bound = std::numeric_limits<int16_t>::lowest();
680 gemmlowp_info.gemmlowp_max_bound = std::numeric_limits<int16_t>::max();
681 gemmlowp_info.output_data_type = DataType::QSYMM16;
682
Sheri Zhang3a353982020-04-21 13:10:24 +0100683 const bool has_layer_norm = lstm_params.use_layer_norm();
684
Michele Di Giorgio1c1b3aa2020-04-02 17:35:42 +0100685 // Forget gate.
Sang-Hoon Parkee4833d2020-05-20 09:13:32 +0100686 ARM_COMPUTE_RETURN_ERROR_ON(lstm_params.forget_intermediate_scale() == 0);
Michele Di Giorgio1c1b3aa2020-04-02 17:35:42 +0100687 const TensorInfo forget_outstage_info(TensorShape(num_units, batch_size), 1, DataType::QSYMM16, QuantizationInfo(lstm_params.forget_intermediate_scale(), 0));
688 const TensorInfo mm_out_info(TensorShape(num_units, batch_size), 1, DataType::S32);
689 const float input_to_forget_scale = input_to_forget_weights->quantization_info().uniform().scale * qinput.scale / lstm_params.forget_intermediate_scale();
Sang-Hoon Parka7431ae2020-05-12 11:13:30 +0100690 ARM_COMPUTE_RETURN_ON_ERROR(validate_mm(gemmlowp_info, input, &input_weights_transposed, &eff_bias_info, input_to_forget_scale, &mm_out_info, &forget_outstage_info));
Michele Di Giorgio1c1b3aa2020-04-02 17:35:42 +0100691
692 const float recurrent_to_forget_scale = recurrent_to_forget_weights->quantization_info().uniform().scale * qoutput_state_in.scale / lstm_params.forget_intermediate_scale();
Sang-Hoon Parka7431ae2020-05-12 11:13:30 +0100693 ARM_COMPUTE_RETURN_ON_ERROR(validate_mm(gemmlowp_info, output_state_in, &recurrent_weights_transposed, &eff_bias_info, recurrent_to_forget_scale, &mm_out_info, &forget_outstage_info));
Michele Di Giorgio1c1b3aa2020-04-02 17:35:42 +0100694
Michalis Spyrouad7515d2020-07-24 00:02:23 +0100695 ARM_COMPUTE_RETURN_ON_ERROR(CLArithmeticAddition::validate(&forget_outstage_info, &forget_outstage_info, &forget_outstage_info, ConvertPolicy::SATURATE));
Michele Di Giorgio1c1b3aa2020-04-02 17:35:42 +0100696
697 if(lstm_params.has_peephole_opt())
698 {
699 ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(lstm_params.cell_to_forget_weights(), 1, DataType::QSYMM16);
Michalis Spyrou1009e872020-07-27 12:48:34 +0100700 ARM_COMPUTE_RETURN_ON_ERROR(CLPixelWiseMultiplication::validate(cell_state_in, lstm_params.cell_to_forget_weights(), &mm_out_info, 1.f, ConvertPolicy::SATURATE,
701 RoundingPolicy::TO_ZERO));
Michele Di Giorgio1c1b3aa2020-04-02 17:35:42 +0100702 const float cell_to_forget_scale = std::pow(2, cell_shift) * lstm_params.cell_to_forget_weights()->quantization_info().uniform().scale / lstm_params.forget_intermediate_scale();
703 ARM_COMPUTE_RETURN_ON_ERROR(quantization::calculate_quantized_multiplier(cell_to_forget_scale, &gemmlowp_info.gemmlowp_multiplier, &gemmlowp_info.gemmlowp_shift));
704 ARM_COMPUTE_RETURN_ON_ERROR(CLGEMMLowpOutputStage::validate(&mm_out_info, nullptr, &forget_outstage_info, gemmlowp_info));
Michalis Spyrouad7515d2020-07-24 00:02:23 +0100705 ARM_COMPUTE_RETURN_ON_ERROR(CLArithmeticAddition::validate(&forget_outstage_info, &forget_outstage_info, &forget_outstage_info, ConvertPolicy::SATURATE));
Michele Di Giorgio1c1b3aa2020-04-02 17:35:42 +0100706 }
707
Sheri Zhang3a353982020-04-21 13:10:24 +0100708 if(has_layer_norm)
709 {
710 const ITensorInfo *w_info = lstm_params.forget_layer_norm_weights();
711 const ITensorInfo *b_info = forget_gate_bias;
712 ARM_COMPUTE_RETURN_ON_ERROR(validate_layer_norm(forget_outstage_info, *w_info, *b_info));
713 }
714
Michele Di Giorgio1c1b3aa2020-04-02 17:35:42 +0100715 // Output quantization info of Sigmoid and Tanh activations
716 const QuantizationInfo sigmoid_tanh_outqinfo(1.f / 32768.f, 0);
717
718 const TensorInfo forget_gate_info(TensorShape(num_units, batch_size), 1, DataType::QSYMM16, sigmoid_tanh_outqinfo);
719 ARM_COMPUTE_RETURN_ON_ERROR(CLActivationLayer::validate(&forget_outstage_info, &forget_gate_info, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LOGISTIC)));
720
721 // Modulation gate.
Sang-Hoon Parkee4833d2020-05-20 09:13:32 +0100722 ARM_COMPUTE_RETURN_ERROR_ON(lstm_params.cell_intermediate_scale() == 0);
Michele Di Giorgio1c1b3aa2020-04-02 17:35:42 +0100723 const TensorInfo cell_outstage_info(TensorShape(num_units, batch_size), 1, DataType::QSYMM16, QuantizationInfo(lstm_params.cell_intermediate_scale(), 0));
724 const float input_to_cell_scale = input_to_cell_weights->quantization_info().uniform().scale * qinput.scale / lstm_params.cell_intermediate_scale();
Sang-Hoon Parka7431ae2020-05-12 11:13:30 +0100725 ARM_COMPUTE_RETURN_ON_ERROR(validate_mm(gemmlowp_info, input, &input_weights_transposed, &eff_bias_info, input_to_cell_scale, &mm_out_info, &cell_outstage_info));
Michele Di Giorgio1c1b3aa2020-04-02 17:35:42 +0100726
727 const float recurrent_to_cell_scale = recurrent_to_cell_weights->quantization_info().uniform().scale * qoutput_state_in.scale / lstm_params.cell_intermediate_scale();
Sang-Hoon Parka7431ae2020-05-12 11:13:30 +0100728 ARM_COMPUTE_RETURN_ON_ERROR(validate_mm(gemmlowp_info, output_state_in, &input_weights_transposed, &eff_bias_info, recurrent_to_cell_scale, &mm_out_info, &cell_outstage_info));
Michele Di Giorgio1c1b3aa2020-04-02 17:35:42 +0100729
Michalis Spyrouad7515d2020-07-24 00:02:23 +0100730 ARM_COMPUTE_RETURN_ON_ERROR(CLArithmeticAddition::validate(&cell_outstage_info, &cell_outstage_info, &cell_outstage_info, ConvertPolicy::SATURATE));
Michele Di Giorgio1c1b3aa2020-04-02 17:35:42 +0100731
Sheri Zhang3a353982020-04-21 13:10:24 +0100732 if(has_layer_norm)
733 {
734 const ITensorInfo *w_info = lstm_params.cell_layer_norm_weights();
735 const ITensorInfo *b_info = cell_bias;
736 ARM_COMPUTE_RETURN_ON_ERROR(validate_layer_norm(cell_outstage_info, *w_info, *b_info));
737 }
738
Michele Di Giorgio1c1b3aa2020-04-02 17:35:42 +0100739 const TensorInfo cell_gate_info(TensorShape(num_units, batch_size), 1, DataType::QSYMM16, sigmoid_tanh_outqinfo);
740 ARM_COMPUTE_RETURN_ON_ERROR(CLActivationLayer::validate(&cell_outstage_info, &cell_gate_info, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::TANH, 1.f, 1.f)));
741
742 // Input gate.
743 const TensorInfo input_gate_info(TensorShape(num_units, batch_size), 1, DataType::QSYMM16, sigmoid_tanh_outqinfo);
744 if(lstm_params.has_cifg_opt())
745 {
746 ARM_COMPUTE_RETURN_ERROR_ON_MSG(lstm_params.input_gate_bias() != nullptr, "Input gate bias must not be present when CIFG is used");
Michalis Spyrouad7515d2020-07-24 00:02:23 +0100747 ARM_COMPUTE_RETURN_ON_ERROR(CLArithmeticSubtraction::validate(&input_gate_info, &forget_gate_info, &forget_gate_info, ConvertPolicy::SATURATE));
Michele Di Giorgio1c1b3aa2020-04-02 17:35:42 +0100748 }
749 else
750 {
751 ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(lstm_params.input_to_input_weights(), lstm_params.recurrent_to_input_weights(), lstm_params.input_gate_bias());
752 ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input_to_forget_weights, lstm_params.input_to_input_weights(), lstm_params.recurrent_to_input_weights());
753 ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(input_to_forget_weights, lstm_params.input_to_input_weights());
754 ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(recurrent_to_forget_weights, lstm_params.recurrent_to_input_weights());
755 ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(forget_gate_bias, lstm_params.input_gate_bias());
756 ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(forget_gate_bias, lstm_params.input_gate_bias());
757
Sang-Hoon Parkee4833d2020-05-20 09:13:32 +0100758 ARM_COMPUTE_RETURN_ERROR_ON(lstm_params.input_intermediate_scale() == 0);
Michele Di Giorgio1c1b3aa2020-04-02 17:35:42 +0100759 const TensorInfo input_outstage_info(TensorShape(num_units, batch_size), 1, DataType::QSYMM16, QuantizationInfo(lstm_params.input_intermediate_scale(), 0));
760 const float input_to_input_scale = lstm_params.input_to_input_weights()->quantization_info().uniform().scale * qinput.scale / lstm_params.input_intermediate_scale();
Sang-Hoon Parka7431ae2020-05-12 11:13:30 +0100761 ARM_COMPUTE_RETURN_ON_ERROR(validate_mm(gemmlowp_info, input, &input_weights_transposed, &eff_bias_info, input_to_input_scale, &mm_out_info, &input_outstage_info));
Michele Di Giorgio1c1b3aa2020-04-02 17:35:42 +0100762
763 const float recurrent_to_input_scale = lstm_params.recurrent_to_input_weights()->quantization_info().uniform().scale * qoutput_state_in.scale / lstm_params.input_intermediate_scale();
Sang-Hoon Parka7431ae2020-05-12 11:13:30 +0100764 ARM_COMPUTE_RETURN_ON_ERROR(validate_mm(gemmlowp_info, output_state_in, &recurrent_weights_transposed, &eff_bias_info, recurrent_to_input_scale, &mm_out_info, &input_outstage_info));
Michele Di Giorgio1c1b3aa2020-04-02 17:35:42 +0100765
Michalis Spyrouad7515d2020-07-24 00:02:23 +0100766 ARM_COMPUTE_RETURN_ON_ERROR(CLArithmeticAddition::validate(&input_outstage_info, &input_outstage_info, &input_outstage_info, ConvertPolicy::SATURATE));
Michele Di Giorgio1c1b3aa2020-04-02 17:35:42 +0100767
768 if(lstm_params.has_peephole_opt())
769 {
Michalis Spyrou1009e872020-07-27 12:48:34 +0100770 ARM_COMPUTE_RETURN_ON_ERROR(CLPixelWiseMultiplication::validate(cell_state_in, lstm_params.cell_to_input_weights(), &mm_out_info, 1.f, ConvertPolicy::SATURATE,
771 RoundingPolicy::TO_ZERO));
Michele Di Giorgio1c1b3aa2020-04-02 17:35:42 +0100772 const float cell_to_input_scale = std::pow(2, cell_shift) * lstm_params.cell_to_input_weights()->quantization_info().uniform().scale / lstm_params.input_intermediate_scale();
773 ARM_COMPUTE_RETURN_ON_ERROR(quantization::calculate_quantized_multiplier(cell_to_input_scale, &gemmlowp_info.gemmlowp_multiplier, &gemmlowp_info.gemmlowp_shift));
Sang-Hoon Parka7431ae2020-05-12 11:13:30 +0100774 ARM_COMPUTE_RETURN_ON_ERROR(CLGEMMLowpOutputStage::validate(&mm_out_info, &eff_bias_info, &input_outstage_info, gemmlowp_info));
Michalis Spyrouad7515d2020-07-24 00:02:23 +0100775 ARM_COMPUTE_RETURN_ON_ERROR(CLArithmeticAddition::validate(&input_outstage_info, &input_outstage_info, &input_outstage_info, ConvertPolicy::SATURATE));
Michele Di Giorgio1c1b3aa2020-04-02 17:35:42 +0100776 }
777
Sheri Zhang3a353982020-04-21 13:10:24 +0100778 if(has_layer_norm)
779 {
780 const ITensorInfo *w_info = lstm_params.input_layer_norm_weights();
781 const ITensorInfo *b_info = lstm_params.input_gate_bias();
782 ARM_COMPUTE_RETURN_ON_ERROR(validate_layer_norm(cell_outstage_info, *w_info, *b_info));
783 }
784
Sang-Hoon Parka7431ae2020-05-12 11:13:30 +0100785 ARM_COMPUTE_RETURN_ON_ERROR(CLActivationLayer::validate(&input_outstage_info, &input_gate_info, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LOGISTIC, 1.f, 1.f)));
Michele Di Giorgio1c1b3aa2020-04-02 17:35:42 +0100786 }
787 // Cell.
Michalis Spyrou1009e872020-07-27 12:48:34 +0100788 ARM_COMPUTE_RETURN_ON_ERROR(CLPixelWiseMultiplication::validate(&forget_gate_info, cell_state_in, &forget_gate_info, 1.f, ConvertPolicy::SATURATE, RoundingPolicy::TO_ZERO));
789 ARM_COMPUTE_RETURN_ON_ERROR(CLPixelWiseMultiplication::validate(&input_gate_info, cell_state_in, &cell_gate_info, 1.f, ConvertPolicy::SATURATE, RoundingPolicy::TO_ZERO));
Michalis Spyrouad7515d2020-07-24 00:02:23 +0100790 ARM_COMPUTE_RETURN_ON_ERROR(CLArithmeticAddition::validate(&forget_gate_info, &cell_gate_info, cell_state_out, ConvertPolicy::SATURATE));
Michele Di Giorgio1c1b3aa2020-04-02 17:35:42 +0100791 if(quantized_cell_clip > 0)
792 {
793 ARM_COMPUTE_RETURN_ON_ERROR(CLActivationLayer::validate(cell_state_out, nullptr, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, -quantized_cell_clip,
794 quantized_cell_clip)));
795 }
796 // Output gate.
Sang-Hoon Parkee4833d2020-05-20 09:13:32 +0100797 ARM_COMPUTE_RETURN_ERROR_ON(lstm_params.output_intermediate_scale() == 0);
Michele Di Giorgio1c1b3aa2020-04-02 17:35:42 +0100798 const TensorInfo output_outstage_info(TensorShape(num_units, batch_size), 1, DataType::QSYMM16, QuantizationInfo(lstm_params.output_intermediate_scale(), 0));
799 const float input_to_output_scale = input_to_output_weights->quantization_info().uniform().scale * qinput.scale / lstm_params.output_intermediate_scale();
Sang-Hoon Parka7431ae2020-05-12 11:13:30 +0100800 ARM_COMPUTE_RETURN_ON_ERROR(validate_mm(gemmlowp_info, input, &input_weights_transposed, &eff_bias_info, input_to_output_scale, &mm_out_info, &output_outstage_info));
Michele Di Giorgio1c1b3aa2020-04-02 17:35:42 +0100801
802 const float recurrent_to_output_scale = recurrent_to_output_weights->quantization_info().uniform().scale * qoutput_state_in.scale / lstm_params.output_intermediate_scale();
Sang-Hoon Parka7431ae2020-05-12 11:13:30 +0100803 ARM_COMPUTE_RETURN_ON_ERROR(validate_mm(gemmlowp_info, output_state_in, &recurrent_weights_transposed, &eff_bias_info, recurrent_to_output_scale, &mm_out_info, &output_outstage_info));
Michele Di Giorgio1c1b3aa2020-04-02 17:35:42 +0100804
Michalis Spyrouad7515d2020-07-24 00:02:23 +0100805 ARM_COMPUTE_RETURN_ON_ERROR(CLArithmeticAddition::validate(&output_outstage_info, &output_outstage_info, &output_outstage_info, ConvertPolicy::SATURATE));
Michele Di Giorgio1c1b3aa2020-04-02 17:35:42 +0100806 if(lstm_params.has_peephole_opt())
807 {
808 ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(lstm_params.cell_to_output_weights(), 1, DataType::QSYMM16);
809 // TODO(COMPMID-3395): Perform multiplication in the quantized domain in NEPixelWiseMultiplicationKernel
810 // Here we are not using the output stage because all operations are done in float
811 // const float cell_to_output_scale = std::pow(2, cell_shift) * lstm_params.cell_to_output_weights()->quantization_info().uniform().scale / lstm_params.output_intermediate_scale();
812 // ARM_COMPUTE_RETURN_ON_ERROR(quantization::calculate_quantized_multiplier(cell_to_output_scale, &gemmlowp_info.gemmlowp_multiplier, &gemmlowp_info.gemmlowp_shift));
Michalis Spyrou1009e872020-07-27 12:48:34 +0100813 ARM_COMPUTE_RETURN_ON_ERROR(CLPixelWiseMultiplication::validate(cell_state_out, lstm_params.cell_to_output_weights(), &output_outstage_info, 1.f, ConvertPolicy::SATURATE,
814 RoundingPolicy::TO_ZERO));
Michalis Spyrouad7515d2020-07-24 00:02:23 +0100815 ARM_COMPUTE_RETURN_ON_ERROR(CLArithmeticAddition::validate(&output_outstage_info, &output_outstage_info, &output_outstage_info, ConvertPolicy::SATURATE));
Michele Di Giorgio1c1b3aa2020-04-02 17:35:42 +0100816 }
817
Sheri Zhang3a353982020-04-21 13:10:24 +0100818 if(has_layer_norm)
819 {
820 const ITensorInfo *w_info = lstm_params.output_layer_norm_weights();
821 const ITensorInfo *b_info = output_gate_bias;
822 ARM_COMPUTE_RETURN_ON_ERROR(validate_layer_norm(output_outstage_info, *w_info, *b_info));
823 }
824
Michele Di Giorgio1c1b3aa2020-04-02 17:35:42 +0100825 const TensorInfo output_gate_info(TensorShape(num_units, batch_size), 1, DataType::QSYMM16, sigmoid_tanh_outqinfo);
826 ARM_COMPUTE_RETURN_ON_ERROR(CLActivationLayer::validate(&output_outstage_info, &output_gate_info, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LOGISTIC)));
827
828 // Hidden.
829 ARM_COMPUTE_RETURN_ON_ERROR(CLActivationLayer::validate(cell_state_out, &input_gate_info, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::TANH, 1.f, 1.f)));
830 const TensorInfo hidden_mul_res(TensorShape(num_units, batch_size), 1, DataType::S32);
Sang-Hoon Parka7431ae2020-05-12 11:13:30 +0100831 const TensorInfo hidden_out_info(TensorShape(num_units, batch_size), 1, DataType::QASYMM8_SIGNED);
832
Sang-Hoon Parkee4833d2020-05-20 09:13:32 +0100833 ARM_COMPUTE_RETURN_ERROR_ON(lstm_params.hidden_state_scale() == 0);
Michalis Spyrou1009e872020-07-27 12:48:34 +0100834 ARM_COMPUTE_RETURN_ON_ERROR(CLPixelWiseMultiplication::validate(&output_gate_info, &input_gate_info, &hidden_mul_res, 1.f, ConvertPolicy::SATURATE, RoundingPolicy::TO_ZERO));
Michele Di Giorgio1c1b3aa2020-04-02 17:35:42 +0100835 const float hidden_state_scale = std::pow(2, -15) / lstm_params.hidden_state_scale() * std::pow(2, -15);
836 ARM_COMPUTE_RETURN_ON_ERROR(quantization::calculate_quantized_multiplier(hidden_state_scale, &gemmlowp_info.gemmlowp_multiplier, &gemmlowp_info.gemmlowp_shift, /* ignore_epsilon */ true));
837 gemmlowp_info.gemmlowp_offset = lstm_params.hidden_state_zero();
Sang-Hoon Parka7431ae2020-05-12 11:13:30 +0100838 ARM_COMPUTE_RETURN_ON_ERROR(CLGEMMLowpOutputStage::validate(&hidden_mul_res, nullptr, &hidden_out_info, gemmlowp_info));
839
840 const bool projection_tensor_copy_required = num_units != output_size;
Michele Di Giorgio1c1b3aa2020-04-02 17:35:42 +0100841
842 // Projection.
843 if(lstm_params.has_projection())
844 {
845 ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(recurrent_to_forget_weights, lstm_params.projection_weights());
Sang-Hoon Parkee4833d2020-05-20 09:13:32 +0100846 ARM_COMPUTE_RETURN_ERROR_ON(qoutput_state_in.scale == 0);
Michele Di Giorgio1c1b3aa2020-04-02 17:35:42 +0100847
848 const UniformQuantizationInfo qprojection = lstm_params.projection_weights()->quantization_info().uniform();
849 const float projection_scale = qprojection.scale * lstm_params.hidden_state_scale() / qoutput_state_in.scale;
850 ARM_COMPUTE_RETURN_ON_ERROR(quantization::calculate_quantized_multiplier(projection_scale, &gemmlowp_info.gemmlowp_multiplier, &gemmlowp_info.gemmlowp_shift));
851 gemmlowp_info.gemmlowp_offset = qoutput_state_in.offset;
852 gemmlowp_info.gemmlowp_min_bound = std::numeric_limits<int8_t>::lowest();
853 gemmlowp_info.gemmlowp_max_bound = std::numeric_limits<int8_t>::max();
854 gemmlowp_info.output_data_type = DataType::QASYMM8_SIGNED;
855
856 const TensorInfo projection_outstage_info(*output_state_out);
Sang-Hoon Parka7431ae2020-05-12 11:13:30 +0100857 const TensorInfo projection_weights_transposed(TensorShape(output_size, num_units), 1, lstm_params.projection_weights()->data_type(), lstm_params.projection_weights()->quantization_info());
858
859 TensorInfo projection_mm_out_info{ mm_out_info };
860 projection_mm_out_info.set_tensor_shape(TensorShape(output_size, batch_size));
861
862 ARM_COMPUTE_RETURN_ON_ERROR(validate_mm(gemmlowp_info, &hidden_out_info, &projection_weights_transposed, &projection_eff_bias_info, projection_scale, &projection_mm_out_info,
863 &projection_outstage_info));
864
865 if(projection_tensor_copy_required)
866 {
Sang-Hoon Park840a72c2020-09-23 13:24:13 +0100867 ARM_COMPUTE_RETURN_ON_ERROR(CLQLSTMLayer::TensorCopyKernel::validate(*output_state_in, projection_outstage_info));
Sang-Hoon Parka7431ae2020-05-12 11:13:30 +0100868 }
Michele Di Giorgio1c1b3aa2020-04-02 17:35:42 +0100869
Michalis Spyrouad7515d2020-07-24 00:02:23 +0100870 ARM_COMPUTE_RETURN_ON_ERROR(CLArithmeticAddition::validate(output_state_out, output_state_out, output_state_out, ConvertPolicy::SATURATE));
Michele Di Giorgio1c1b3aa2020-04-02 17:35:42 +0100871
Sang-Hoon Parka7431ae2020-05-12 11:13:30 +0100872 if(projection_tensor_copy_required)
873 {
874 ARM_COMPUTE_RETURN_ON_ERROR(CLQLSTMLayer::TensorCopyKernel::validate(projection_outstage_info, *output_state_out));
875 }
876
Michele Di Giorgio1c1b3aa2020-04-02 17:35:42 +0100877 int8_t quantized_projection_clip{ 0 };
878 if(lstm_params.projection_clip() > 0.0f)
879 {
880 quantized_projection_clip = quantize_qasymm8_signed(lstm_params.projection_clip(), qprojection);
881 }
882
883 if(quantized_projection_clip > 0)
884 {
885 ARM_COMPUTE_RETURN_ON_ERROR(CLActivationLayer::validate(output_state_out, nullptr, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, -quantized_projection_clip,
886 quantized_projection_clip)));
887 }
888 }
Sang-Hoon Parka7431ae2020-05-12 11:13:30 +0100889 else
890 {
891 if(projection_tensor_copy_required)
892 {
893 ARM_COMPUTE_RETURN_ON_ERROR(CLQLSTMLayer::TensorCopyKernel::validate(hidden_out_info, *output_state_out));
894 }
895 }
Michele Di Giorgio1c1b3aa2020-04-02 17:35:42 +0100896
897 if(cell_state_out->total_size() > 0)
898 {
899 ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(cell_state_in, cell_state_out);
900 ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(cell_state_in, cell_state_out);
901 }
902
903 if(output_state_out->total_size() > 0)
904 {
905 ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output_state_out);
906 ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(output_state_in, output_state_out);
907 }
908
Michele Di Giorgiobeb2d452020-05-11 16:17:51 +0100909 ARM_COMPUTE_RETURN_ON_ERROR(CLCopyKernel::validate(output_state_out, output));
Michele Di Giorgio1c1b3aa2020-04-02 17:35:42 +0100910 return Status{};
911}
912
913void CLQLSTMLayer::run()
914{
915 prepare();
916
917 // Acquire all the temporaries
918 MemoryGroupResourceScope scope_mg(_memory_group);
919
920 // Forget gate.
921 _mm_input_to_forget.run();
922 _input_to_forget_outstage.run();
923
924 _mm_recurrent_to_forget.run();
925 _recurrent_to_forget_outstage.run();
Michalis Spyrouad7515d2020-07-24 00:02:23 +0100926 _accumulate_input_recurrent_forget.run();
Michele Di Giorgio1c1b3aa2020-04-02 17:35:42 +0100927
928 if(_has_peephole)
929 {
Michalis Spyrou1009e872020-07-27 12:48:34 +0100930 _pixelwise_mul_cell_to_forget.run();
Michele Di Giorgio1c1b3aa2020-04-02 17:35:42 +0100931 _cell_to_forget_outstage.run();
Michalis Spyrouad7515d2020-07-24 00:02:23 +0100932 _accumulate_cell_forget.run();
Michele Di Giorgio1c1b3aa2020-04-02 17:35:42 +0100933 }
934
Sheri Zhang3a353982020-04-21 13:10:24 +0100935 if(_has_layer_norm)
936 {
937 CLScheduler::get().enqueue(get_layer_norm(LayerNormGate::Forget));
938 }
939
Michele Di Giorgio1c1b3aa2020-04-02 17:35:42 +0100940 _forget_gate_sigmoid.run();
941
942 // Modulation gate.
943 _mm_input_to_cell.run();
944 _input_to_cell_outstage.run();
945
946 _mm_recurrent_to_cell.run();
947 _recurrent_to_cell_outstage.run();
Michalis Spyrouad7515d2020-07-24 00:02:23 +0100948 _accumulate_input_recurrent_modulation.run();
Michele Di Giorgio1c1b3aa2020-04-02 17:35:42 +0100949
Sheri Zhang3a353982020-04-21 13:10:24 +0100950 if(_has_layer_norm)
951 {
952 CLScheduler::get().enqueue(get_layer_norm(LayerNormGate::Cell));
953 }
954
Michele Di Giorgio1c1b3aa2020-04-02 17:35:42 +0100955 _cell_gate_tanh.run();
956
957 // Input gate
958 if(_has_cifg)
959 {
Michalis Spyrouad7515d2020-07-24 00:02:23 +0100960 _input_gate_sub.run();
Michele Di Giorgio1c1b3aa2020-04-02 17:35:42 +0100961 }
962 else
963 {
964 _mm_input_to_input.run();
965 _input_to_input_outstage.run();
966 _mm_recurrent_to_input.run();
967 _recurrent_to_input_outstage.run();
Michalis Spyrouad7515d2020-07-24 00:02:23 +0100968 _accumulate_input_recurrent_input.run();
Michele Di Giorgio1c1b3aa2020-04-02 17:35:42 +0100969
970 if(_has_peephole)
971 {
Michalis Spyrou1009e872020-07-27 12:48:34 +0100972 _pixelwise_mul_cell_to_input.run();
Michele Di Giorgio1c1b3aa2020-04-02 17:35:42 +0100973 _cell_to_input_outstage.run();
Michalis Spyrouad7515d2020-07-24 00:02:23 +0100974 _accumulate_cell_input.run();
Michele Di Giorgio1c1b3aa2020-04-02 17:35:42 +0100975 }
976
Sheri Zhang3a353982020-04-21 13:10:24 +0100977 if(_has_layer_norm)
978 {
979 CLScheduler::get().enqueue(get_layer_norm(LayerNormGate::Input));
980 }
981
Sang-Hoon Parka7431ae2020-05-12 11:13:30 +0100982 _input_gate_sigmoid.run();
Michele Di Giorgio1c1b3aa2020-04-02 17:35:42 +0100983 }
984
985 // Cell.
Michalis Spyrou1009e872020-07-27 12:48:34 +0100986 _pixelwise_mul_forget_cell.run();
987 _pixelwise_mul_input_cell.run();
Michalis Spyrouad7515d2020-07-24 00:02:23 +0100988 _add_forget_cell.run();
Michele Di Giorgio1c1b3aa2020-04-02 17:35:42 +0100989 if(_has_cell_clipping)
990 {
991 _cell_clip.run();
992 }
993
994 // Output gate.
995 _mm_input_to_output.run();
996 _input_to_output_outstage.run();
997 _mm_recurrent_to_output.run();
998 _recurrent_to_output_outstage.run();
Michalis Spyrouad7515d2020-07-24 00:02:23 +0100999 _accumulate_input_recurrent_output.run();
Michele Di Giorgio1c1b3aa2020-04-02 17:35:42 +01001000 if(_has_peephole)
1001 {
Michalis Spyrou1009e872020-07-27 12:48:34 +01001002 _pixelwise_mul_cell_to_output.run();
Sang-Hoon Parka7431ae2020-05-12 11:13:30 +01001003 _cell_to_output_outstage.run();
Michalis Spyrouad7515d2020-07-24 00:02:23 +01001004 _accumulate_cell_to_output.run();
Michele Di Giorgio1c1b3aa2020-04-02 17:35:42 +01001005 }
1006
Sheri Zhang3a353982020-04-21 13:10:24 +01001007 if(_has_layer_norm)
1008 {
1009 CLScheduler::get().enqueue(get_layer_norm(LayerNormGate::Output));
1010 }
1011
Michele Di Giorgio1c1b3aa2020-04-02 17:35:42 +01001012 _output_gate_sigmoid.run();
1013
1014 // Hidden.
1015 _hidden_tanh.run();
Michalis Spyrou1009e872020-07-27 12:48:34 +01001016 _pixelwise_mul_hidden.run();
Michele Di Giorgio1c1b3aa2020-04-02 17:35:42 +01001017 _hidden_outstage.run();
1018
1019 // Projection.
1020 if(_has_projection)
1021 {
1022 _mm_projection.run();
1023 _projection_outstage.run();
Sang-Hoon Parka7431ae2020-05-12 11:13:30 +01001024
1025 if(_projection_tensor_copy_required)
1026 {
1027 _projection_output_to_accumulate_copy.run();
1028 }
1029
Michalis Spyrouad7515d2020-07-24 00:02:23 +01001030 _accumulate_projection.run();
Sang-Hoon Parka7431ae2020-05-12 11:13:30 +01001031
1032 if(_projection_tensor_copy_required)
1033 {
1034 _projection_accumulate_to_output_copy.run();
1035 }
1036
Michele Di Giorgio1c1b3aa2020-04-02 17:35:42 +01001037 if(_has_projection_clipping)
1038 {
1039 _projection_clip.run();
1040 }
1041 }
Sang-Hoon Parka7431ae2020-05-12 11:13:30 +01001042 else
1043 {
1044 if(_projection_tensor_copy_required)
1045 {
1046 _hidden_to_output_copy.run();
1047 }
1048 }
Michele Di Giorgiobeb2d452020-05-11 16:17:51 +01001049
1050 // Copy output_state_out to output
1051 CLScheduler::get().enqueue(_copy_output);
Michele Di Giorgio1c1b3aa2020-04-02 17:35:42 +01001052}
1053
1054void CLQLSTMLayer::prepare()
1055{
1056 if(!_is_prepared)
1057 {
1058 // Pre-transpose weights to be used in GEMM.
1059 _input_to_forget_weights_transposed.allocator()->allocate();
1060 _input_to_cell_weights_transposed.allocator()->allocate();
1061 _input_to_output_weights_transposed.allocator()->allocate();
1062 _recurrent_to_forget_weights_transposed.allocator()->allocate();
1063 _recurrent_to_cell_weights_transposed.allocator()->allocate();
1064 _recurrent_to_output_weights_transposed.allocator()->allocate();
1065 _transpose_input_to_forget_weights.run();
1066 _transpose_input_to_cell_weights.run();
1067 _transpose_input_to_output_weights.run();
1068 _transpose_recurrent_to_forget_weights.run();
1069 _transpose_recurrent_to_cell_weights.run();
1070 _transpose_recurrent_to_output_weights.run();
1071
1072 // Precompute effective biases
1073 if(_has_cifg)
1074 {
1075 _ones.map(true);
1076 std::fill_n(reinterpret_cast<int16_t *>(_ones.buffer()), _ones.info()->total_size() / _ones.info()->element_size(), 32767);
1077 _ones.unmap();
1078 }
1079 else
1080 {
1081 _input_to_input_eff_bias.allocator()->allocate();
1082 _recurrent_to_input_eff_bias.allocator()->allocate();
1083 CLScheduler::get().enqueue(_input_to_input_reduction);
1084 CLScheduler::get().enqueue(_recurrent_to_input_reduction);
1085
1086 _input_to_input_weights_transposed.allocator()->allocate();
1087 _recurrent_to_input_weights_transposed.allocator()->allocate();
1088 _transpose_input_to_input_weights.run();
1089 _transpose_recurrent_to_input_weights.run();
1090 _input_to_input_weights->mark_as_unused();
1091 _recurrent_to_input_weights->mark_as_unused();
1092 }
1093 _input_to_forget_eff_bias.allocator()->allocate();
1094 _recurrent_to_forget_eff_bias.allocator()->allocate();
1095 _input_to_cell_eff_bias.allocator()->allocate();
1096 _recurrent_to_cell_eff_bias.allocator()->allocate();
1097 _input_to_output_eff_bias.allocator()->allocate();
1098 _recurrent_to_output_eff_bias.allocator()->allocate();
1099 CLScheduler::get().enqueue(_input_to_forget_reduction);
1100 CLScheduler::get().enqueue(_recurrent_to_forget_reduction);
1101 CLScheduler::get().enqueue(_input_to_cell_reduction);
1102 CLScheduler::get().enqueue(_recurrent_to_cell_reduction);
1103 CLScheduler::get().enqueue(_input_to_output_reduction);
1104 CLScheduler::get().enqueue(_recurrent_to_output_reduction);
1105
1106 if(_has_projection)
1107 {
Michele Di Giorgio11c562c2020-06-10 16:34:50 +01001108 _projection_eff_bias.allocator()->allocate();
1109 CLScheduler::get().enqueue(_projection_reduction);
Michele Di Giorgio1c1b3aa2020-04-02 17:35:42 +01001110 if(_projection_bias != nullptr)
1111 {
Michalis Spyrouad7515d2020-07-24 00:02:23 +01001112 _projection_bias_add.run();
Michele Di Giorgio1c1b3aa2020-04-02 17:35:42 +01001113 _projection_bias->mark_as_unused();
1114 }
1115
1116 _projection_weights_transposed.allocator()->allocate();
1117 _transpose_projection_weights.run();
1118 _projection_weights->mark_as_unused();
Sang-Hoon Parka7431ae2020-05-12 11:13:30 +01001119
1120 if(!_projection_tensor_copy_required)
1121 {
1122 _hidden_gate.mark_as_unused();
1123 _projection_accumulate_res.mark_as_unused();
1124 }
Michele Di Giorgio1c1b3aa2020-04-02 17:35:42 +01001125 }
1126
1127 // Mark weights as unused
1128 _input_to_forget_weights->mark_as_unused();
1129 _input_to_cell_weights->mark_as_unused();
1130 _input_to_output_weights->mark_as_unused();
1131 _recurrent_to_forget_weights->mark_as_unused();
1132 _recurrent_to_cell_weights->mark_as_unused();
1133 _recurrent_to_output_weights->mark_as_unused();
1134
1135 CLScheduler::get().queue().finish();
1136 _is_prepared = true;
1137 }
1138}
1139
1140} // namespace arm_compute