blob: a19310d8ea920ad295fff875f2065002fdae9cde [file] [log] [blame]
Michele Di Giorgio47a89902020-03-09 19:32:33 +00001/*
2 * Copyright (c) 2020 ARM Limited.
3 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24#ifndef ARM_COMPUTE_NEQLSTMLAYER_H
25#define ARM_COMPUTE_NEQLSTMLAYER_H
26
Michele Di Giorgiobeb2d452020-05-11 16:17:51 +010027#include "arm_compute/core/NEON/kernels/NECopyKernel.h"
Michele Di Giorgio47a89902020-03-09 19:32:33 +000028#include "arm_compute/core/NEON/kernels/NEGEMMLowpReductionKernel.h"
Sang-Hoon Park9230e272020-04-18 00:46:34 +010029#include "arm_compute/core/NEON/kernels/NEQLSTMLayerNormalizationKernel.h"
Michele Di Giorgio47a89902020-03-09 19:32:33 +000030#include "arm_compute/core/Types.h"
31#include "arm_compute/runtime/NEON/functions/NEActivationLayer.h"
Michalis Spyrou173ba9b2020-06-23 17:25:43 +010032#include "arm_compute/runtime/NEON/functions/NEArithmeticAddition.h"
33#include "arm_compute/runtime/NEON/functions/NEArithmeticSubtraction.h"
Michele Di Giorgio47a89902020-03-09 19:32:33 +000034#include "arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.h"
35#include "arm_compute/runtime/NEON/functions/NEGEMMLowpOutputStage.h"
Michalis Spyrou6eb73452020-07-02 17:39:25 +010036#include "arm_compute/runtime/NEON/functions/NEPixelWiseMultiplication.h"
Michele Di Giorgio47a89902020-03-09 19:32:33 +000037#include "arm_compute/runtime/NEON/functions/NETranspose.h"
38
39#include "arm_compute/runtime/common/LSTMParams.h"
40
41namespace arm_compute
42{
43// Forward declarations
44class ITensor;
45
46/** Basic function to run @ref NEQLSTMLayer
47 *
48 * This function calls the following NEON functions/kernels:
49 *
50 * -# @ref NEActivationLayer Activation functions (tanh and logistic)
Michalis Spyrou173ba9b2020-06-23 17:25:43 +010051 * -# @ref NEArithmeticAddition Elementwise addition
Michele Di Giorgio47a89902020-03-09 19:32:33 +000052 * -# @ref NEArithmeticSubtractionKernel Elementwise subtraction
Michele Di Giorgiobeb2d452020-05-11 16:17:51 +010053 * -# @ref NECopyKernel Copy kernel for copying output_state_out to output
Michele Di Giorgio47a89902020-03-09 19:32:33 +000054 * -# @ref NEGEMMLowpMatrixMultiplyCore Quantized matrix multiplication core. Accumulators are 32-bit integers
55 * -# @ref NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPoint Convert 32-bit integers into QSYMM16
56 * -# @ref NEGEMMLowpMatrixAReductionKernel For precomputing effective biases to use
Michalis Spyrou6eb73452020-07-02 17:39:25 +010057 * -# @ref NEPixelWiseMultiplication Elementwise multiplication
Michele Di Giorgio47a89902020-03-09 19:32:33 +000058 * -# @ref NETranspose Transpose function for reshaping the weights
59 * */
60class NEQLSTMLayer : public IFunction
61{
62public:
63 /** Default constructor */
64 NEQLSTMLayer(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
65 /** Prevent instances of this class from being copied (As this class contains pointers) */
66 NEQLSTMLayer(const NEQLSTMLayer &) = delete;
67 /** Default move constructor */
68 NEQLSTMLayer(NEQLSTMLayer &&) = default;
69 /** Prevent instances of this class from being copied (As this class contains pointers) */
70 NEQLSTMLayer &operator=(const NEQLSTMLayer &) = delete;
71 /** Default move assignment operator */
72 NEQLSTMLayer &operator=(NEQLSTMLayer &&) = default;
73 /** Initialize function's tensors.
74 *
75 * @param[in] input Source tensor. Input is a 2D tensor with dimensions [input_size, batch_size]. Data types supported: QASYMM8_SIGNED.
76 * @param[in] input_to_forget_weights 2D weights tensor with dimensions [input_size, num_units]. Data type supported: QSYMM8.
77 * @param[in] input_to_cell_weights 2D weights tensor with dimensions [input_size, num_units]. Data type supported: QSYMM8.
78 * @param[in] input_to_output_weights 2D weights tensor with dimensions [input_size, num_units]. Data type supported: QSYMM8.
79 * @param[in] recurrent_to_forget_weights 2D weights tensor with dimensions [output_size, num_units]. Data type supported: QSYMM8.
80 * @param[in] recurrent_to_cell_weights 2D weights tensor with dimensions [output_size, num_units]. Data type supported: QSYMM8.
81 * @param[in] recurrent_to_output_weights 2D weights tensor with dimensions [output_size, num_units]. Data type supported: QSYMM8.
82 * @param[in] forget_gate_bias 1D weights tensor with dimensions [num_units]. Data type supported: S32.
83 * @param[in] cell_bias 1D weights tensor with dimensions [num_units]. Data type supported: S32.
84 * @param[in] output_gate_bias 1D weights tensor with dimensions [num_units]. Data type supported: S32.
Michele Di Giorgiobeb2d452020-05-11 16:17:51 +010085 * @param[in] cell_state_in 2D tensor with dimensions [num_units, batch_size]. Data type supported: QSYMM16.
86 * @param[in] output_state_in 2D tensor with dimensions [output_size, batch_size]. Data type supported: Same as @p input.
87 * @param[out] cell_state_out Destination tensor. Output is a 2D tensor with dimensions [num_units, batch_size]. Data type supported: QSYMM16.
88 * @param[out] output_state_out Destination tensor. Output is a 2D tensor with dimensions [output_size, batch_size].Data types supported: Same as @p input.
89 * @param[out] output Destination tensor. Output is a 2D tensor with dimensions [output_size, batch_size].Data types supported: Same as @p input.
Michele Di Giorgio47a89902020-03-09 19:32:33 +000090 * @param[in] lstm_params Weights tensors used in peephole, CIFG and layer normalization optimizations:
91 * input_intermediate_scale Scale of the intermediate result of matmul, i.e. input to layer normalization, at input gate.
92 * forget_intermediate_scale Scale of the intermediate result of matmul, i.e. input to layer normalization, at forget gate.
93 * cell_intermediate_scale Scale of the intermediate result of matmul, i.e. input to layer normalization, at cell gate.
94 * output_intermediate_scale Scale of the intermediate result of matmul, i.e. input to layer normalization, at output gate.
95 * hidden_state_zero The zero point of the hidden state.
96 * hidden_state_scale The scale of the hidden state.
97 * input_to_input_weights (Optional) 2D weights tensor with dimensions [input_size, num_units]. Data type supported: QSYMM8.
98 * recurrent_to_input_weights (Optional) 2D weights tensor with dimensions [output_size, num_units]. Data type supported: QSYMM8.
99 * cell_to_input_weights (Optional) 1D weights tensor with dimensions [num_units]. Can be nullptr. Data type supported: QSYMM16.
100 * cell_to_forget_weights (Optional) 1D weights tensor with dimensions [num_units]. Data type supported: QSYMM16.
101 * cell_to_output_weights (Optional) 1D weights tensor with dimensions [num_units]. Data type supported: QSYMM16.
102 * input_gate_bias (Optional) 1D weights tensor with dimensions [num_units]. Data type supported: S32.
103 * projection_weights (Optional) 2D weights tensor with dimensions [output_size, num_units]. Data type supported: QSYMM8.
104 * projection_bias (Optional) 1D weights tensor with dimensions [output_size]. S32.
105 * input_layer_norm_weights (Optional) 1D weights tensor with dimensions [num_units]. Data type supported: QSYMM16.
106 * forget_layer_norm_weights (Optional) 1D weights tensor with dimensions [num_units]. Data type supported: QSYMM16.
107 * cell_layer_norm_weights (Optional) 1D weights tensor with dimensions [num_units]. Data type supported: QSYMM16.
108 * output_layer_norm_weights (Optional) 1D weights tensor with dimensions [num_units]. Data type supported: QSYMM16.
109 * cell_threshold (Optional) The clipping threshold for the cell state, such that values are bound within [-cell_clip, cell_clip].
110 * If set to 0.0 then clipping is disabled.
111 * projection_threshold (Optional) The clipping threshold for the output from the projection layer, such that values are bound within
112 * [-proj_clip, proj_clip]. If set to 0.0 then clipping is disabled.
113 */
114 void configure(const ITensor *input,
115 const ITensor *input_to_forget_weights, const ITensor *input_to_cell_weights, const ITensor *input_to_output_weights,
116 const ITensor *recurrent_to_forget_weights, const ITensor *recurrent_to_cell_weights, const ITensor *recurrent_to_output_weights,
117 const ITensor *forget_gate_bias, const ITensor *cell_bias, const ITensor *output_gate_bias,
118 const ITensor *cell_state_in, const ITensor *output_state_in,
Michele Di Giorgiobeb2d452020-05-11 16:17:51 +0100119 ITensor *cell_state_out, ITensor *output_state_out, ITensor *output,
Michele Di Giorgio47a89902020-03-09 19:32:33 +0000120 const LSTMParams<ITensor> &lstm_params);
121
122 /** Static function to check if given info will lead to a valid configuration of @ref NEQLSTMLayer
123 *
Michele Di Giorgiobeb2d452020-05-11 16:17:51 +0100124 * @param[in] input Source tensor info. Input is a 2D tensor info with dimensions [input_size, batch_size]. Data types supported: QASYMM8_SIGNED.
125 * @param[in] input_to_forget_weights 2D weights tensor info with dimensions [input_size, num_units]. Data type supported: QSYMM8.
126 * @param[in] input_to_cell_weights 2D weights tensor info with dimensions [input_size, num_units]. Data type supported: QSYMM8.
127 * @param[in] input_to_output_weights 2D weights tensor info with dimensions [input_size, num_units]. Data type supported: QSYMM8.
128 * @param[in] recurrent_to_forget_weights 2D weights tensor info with dimensions [output_size, num_units]. Data type supported: QSYMM8.
129 * @param[in] recurrent_to_cell_weights 2D weights tensor info with dimensions [output_size, num_units]. Data type supported: QSYMM8.
130 * @param[in] recurrent_to_output_weights 2D weights tensor info with dimensions [output_size, num_units]. Data type supported: QSYMM8.
131 * @param[in] forget_gate_bias 1D weights tensor info with dimensions [num_units]. Data type supported: S32.
132 * @param[in] cell_bias 1D weights tensor info with dimensions [num_units]. Data type supported: S32.
133 * @param[in] output_gate_bias 1D weights tensor info with dimensions [num_units]. Data type supported: S32.
134 * @param[in] cell_state_in 2D tensor info with dimensions [num_units, batch_size]. Data type supported: QSYMM16.
135 * @param[in] output_state_in 2D tensor info with dimensions [output_size, batch_size]. Data type supported: Same as @p input.
136 * @param[in] cell_state_out Destination tensor info. Output is a 2D tensor info with dimensions [num_units, batch_size]. Data type supported: QSYMM16.
137 * @param[in] output_state_out Destination tensor info. Output is a 2D tensor info with dimensions [output_size, batch_size].Data types supported: Same as @p input.
138 * @param[in] output Destination tensor info. Output is a 2D tensor info with dimensions [output_size, batch_size].Data types supported: Same as @p input.
139 * @param[in] lstm_params Weights tensors info used in peephole, CIFG and layer normalization optimizations:
140 * input_intermediate_scale Scale of the intermediate result of matmul, i.e. input to layer normalization, at input gate.
141 * forget_intermediate_scale Scale of the intermediate result of matmul, i.e. input to layer normalization, at forget gate.
142 * cell_intermediate_scale Scale of the intermediate result of matmul, i.e. input to layer normalization, at cell gate.
143 * output_intermediate_scale Scale of the intermediate result of matmul, i.e. input to layer normalization, at output gate.
144 * hidden_state_zero The zero point of the hidden state.
145 * hidden_state_scale The scale of the hidden state.
146 * input_to_input_weights (Optional) 2D weights tensor with dimensions [input_size, num_units]. Data type supported: QSYMM8.
147 * recurrent_to_input_weights (Optional) 2D weights tensor with dimensions [output_size, num_units]. Data type supported: QSYMM8.
148 * cell_to_input_weights (Optional) 1D weights tensor with dimensions [num_units]. Can be nullptr. Data type supported: QSYMM16.
149 * cell_to_forget_weights (Optional) 1D weights tensor with dimensions [num_units]. Data type supported: QSYMM16.
150 * cell_to_output_weights (Optional) 1D weights tensor with dimensions [num_units]. Data type supported: QSYMM16.
151 * input_gate_bias (Optional) 1D weights tensor with dimensions [num_units]. Data type supported: S32.
152 * projection_weights (Optional) 2D weights tensor with dimensions [output_size, num_units]. Data type supported: QSYMM8.
153 * projection_bias (Optional) 1D weights tensor with dimensions [output_size]. S32.
154 * input_layer_norm_weights (Optional) 1D weights tensor with dimensions [num_units]. Data type supported: QSYMM16.
155 * forget_layer_norm_weights (Optional) 1D weights tensor with dimensions [num_units]. Data type supported: QSYMM16.
156 * cell_layer_norm_weights (Optional) 1D weights tensor with dimensions [num_units]. Data type supported: QSYMM16.
157 * output_layer_norm_weights (Optional) 1D weights tensor with dimensions [num_units]. Data type supported: QSYMM16.
158 * cell_threshold (Optional) The clipping threshold for the cell state, such that values are bound within [-cell_clip, cell_clip].
159 * If set to 0.0 then clipping is disabled.
160 * projection_threshold (Optional) The clipping threshold for the output from the projection layer, such that values are bound within
161 * [-proj_clip, proj_clip]. If set to 0.0 then clipping is disabled.
Michele Di Giorgio47a89902020-03-09 19:32:33 +0000162 * @return a status
163 */
164 static Status validate(const ITensorInfo *input,
165 const ITensorInfo *input_to_forget_weights, const ITensorInfo *input_to_cell_weights, const ITensorInfo *input_to_output_weights,
166 const ITensorInfo *recurrent_to_forget_weights, const ITensorInfo *recurrent_to_cell_weights, const ITensorInfo *recurrent_to_output_weights,
167 const ITensorInfo *forget_gate_bias, const ITensorInfo *cell_bias, const ITensorInfo *output_gate_bias,
168 const ITensorInfo *cell_state_in, const ITensorInfo *output_state_in,
Michele Di Giorgiobeb2d452020-05-11 16:17:51 +0100169 const ITensorInfo *cell_state_out, const ITensorInfo *output_state_out, const ITensorInfo *output,
Michele Di Giorgio47a89902020-03-09 19:32:33 +0000170 const LSTMParams<ITensorInfo> &lstm_params);
171
172 // Inherited methods overridden:
173 void run() override;
174 void prepare() override;
175
176private:
Sang-Hoon Park9230e272020-04-18 00:46:34 +0100177 enum class LayerNormGate : uint8_t
178 {
179 Forget,
180 Cell,
181 Input,
182 Output,
183 Count
184 };
Sang-Hoon Parkd5c020a2020-05-06 21:01:19 +0100185 static constexpr uint8_t _layer_norm_count = static_cast<uint8_t>(LayerNormGate::Count);
186 static constexpr uint32_t _out_state_output_size_dimension_idx = 0;
Sang-Hoon Park9230e272020-04-18 00:46:34 +0100187
Michele Di Giorgio47a89902020-03-09 19:32:33 +0000188 /** Internal method to configure matrix multiplication plus output stage of each gate.
189 *
190 * @param[in] mm Matrix multiplication function to use.
191 * @param[in] outstage Output stage function to use.
192 * @param[in] gemmlowp_info GEMMLowp metadata to be used by the output stage.
193 * @param[in] mm_input Input tensor to matrix multiplication function.
194 * @param[in] mm_weights Weights tensor to matrix multiplication function.
195 * @param[in] bias Bias tensor to matrix multiplication function.
196 * @param[in] outstage_res Tensor to be used for storing the result of the output stage.
197 * @param[in] gemmlowp_scale Real multiplier to be used computing multiplier and shift for requantization.
198 * @param[in] mm_res_info Tensor info to be used to initialize matrix multiplication result tensor.
199 * @param[in] mm_res_info Tensor info to be used to initialize output stage result tensor.
200 *
201 */
202 void configure_mm(NEGEMMLowpMatrixMultiplyCore &mm, NEGEMMLowpOutputStage &outstage, GEMMLowpOutputStageInfo &gemmlowp_info,
203 const ITensor *mm_input, const ITensor *mm_weights, const ITensor *bias, Tensor *mm_res,
204 Tensor *outstage_res, float gemmlowp_scale,
205 const TensorInfo &mm_res_info, const TensorInfo &outstage_tensor_info);
206
207 MemoryGroup _memory_group{};
208
Sang-Hoon Parkd5c020a2020-05-06 21:01:19 +0100209 /** A small internel kernel do the copy between two tensors */
210 class TensorCopyKernel
211 {
212 static constexpr uint32_t max_dimension_supported = 2;
213
214 ITensor *_src{ nullptr };
215 ITensor *_dst{ nullptr };
216 size_t _row_size{};
217 Window _window{};
218
219 public:
220 /** Static function to check if given info will lead to a valid configuration of @ref NEQLSTMLayer::TensorCopyKernel
221 *
222 * @param[in] src Source tensor info.
223 * @param[in] dst Destination tensor info
224 *
225 * @return a status
226 */
227 static Status validate(const ITensorInfo &src, const ITensorInfo &dst);
228 /** Set the input and output tensors.
229 *
230 * @param[in] src Source tensor
231 * @param[out] dst Destination tensor
232 */
233 void configure(ITensor &src, ITensor &dst);
234 /** run the kernel */
235 void run();
236 };
237
Michele Di Giorgio47a89902020-03-09 19:32:33 +0000238 // Functions used
239 NETranspose _transpose_input_to_forget_weights{};
240 NETranspose _transpose_input_to_cell_weights{};
241 NETranspose _transpose_input_to_output_weights{};
242 NETranspose _transpose_input_to_input_weights{};
243 NETranspose _transpose_recurrent_to_forget_weights{};
244 NETranspose _transpose_recurrent_to_cell_weights{};
245 NETranspose _transpose_recurrent_to_output_weights{};
246 NETranspose _transpose_recurrent_to_input_weights{};
247 NETranspose _transpose_projection_weights{};
248 NEGEMMLowpMatrixAReductionKernel _input_to_input_reduction{};
249 NEGEMMLowpMatrixAReductionKernel _recurrent_to_input_reduction{};
250 NEGEMMLowpMatrixAReductionKernel _input_to_forget_reduction{};
251 NEGEMMLowpMatrixAReductionKernel _recurrent_to_forget_reduction{};
252 NEGEMMLowpMatrixAReductionKernel _input_to_cell_reduction{};
253 NEGEMMLowpMatrixAReductionKernel _recurrent_to_cell_reduction{};
254 NEGEMMLowpMatrixAReductionKernel _input_to_output_reduction{};
255 NEGEMMLowpMatrixAReductionKernel _recurrent_to_output_reduction{};
256 NEGEMMLowpMatrixAReductionKernel _projection_reduction{};
Michalis Spyrou173ba9b2020-06-23 17:25:43 +0100257 NEArithmeticAddition _projection_bias_add{};
Michele Di Giorgio47a89902020-03-09 19:32:33 +0000258 NEGEMMLowpMatrixMultiplyCore _mm_input_to_forget{};
259 NEGEMMLowpMatrixMultiplyCore _mm_recurrent_to_forget{};
Michalis Spyrou6eb73452020-07-02 17:39:25 +0100260 NEPixelWiseMultiplication _pixelwise_mul_cell_to_forget{};
Michele Di Giorgio47a89902020-03-09 19:32:33 +0000261 NEGEMMLowpOutputStage _input_to_forget_outstage{};
262 NEGEMMLowpOutputStage _recurrent_to_forget_outstage{};
263 NEGEMMLowpOutputStage _cell_to_forget_outstage{};
Michalis Spyrou173ba9b2020-06-23 17:25:43 +0100264 NEArithmeticAddition _accumulate_input_recurrent_forget{};
265 NEArithmeticAddition _accumulate_cell_forget{};
Michele Di Giorgio47a89902020-03-09 19:32:33 +0000266 NEActivationLayer _forget_gate_sigmoid{};
267 NEGEMMLowpMatrixMultiplyCore _mm_input_to_cell{};
268 NEGEMMLowpOutputStage _input_to_cell_outstage{};
269 NEGEMMLowpMatrixMultiplyCore _mm_recurrent_to_cell{};
270 NEGEMMLowpOutputStage _recurrent_to_cell_outstage{};
Michalis Spyrou173ba9b2020-06-23 17:25:43 +0100271 NEArithmeticAddition _accumulate_input_recurrent_modulation{};
Michele Di Giorgio47a89902020-03-09 19:32:33 +0000272 NEActivationLayer _cell_gate_tanh{};
Michalis Spyrou173ba9b2020-06-23 17:25:43 +0100273 NEArithmeticSubtraction _input_gate_sub{};
Michele Di Giorgio47a89902020-03-09 19:32:33 +0000274 NEGEMMLowpMatrixMultiplyCore _mm_input_to_input{};
275 NEGEMMLowpOutputStage _input_to_input_outstage{};
276 NEGEMMLowpMatrixMultiplyCore _mm_recurrent_to_input{};
277 NEGEMMLowpOutputStage _recurrent_to_input_outstage{};
Michalis Spyrou173ba9b2020-06-23 17:25:43 +0100278 NEArithmeticAddition _accumulate_input_recurrent_input{};
Michalis Spyrou6eb73452020-07-02 17:39:25 +0100279 NEPixelWiseMultiplication _pixelwise_mul_cell_to_input{};
Michele Di Giorgio47a89902020-03-09 19:32:33 +0000280 NEGEMMLowpOutputStage _cell_to_input_outstage{};
Michalis Spyrou173ba9b2020-06-23 17:25:43 +0100281 NEArithmeticAddition _accumulate_cell_input{};
Sang-Hoon Parkd5c020a2020-05-06 21:01:19 +0100282 NEActivationLayer _input_gate_sigmoid{};
Michalis Spyrou6eb73452020-07-02 17:39:25 +0100283 NEPixelWiseMultiplication _pixelwise_mul_forget_cell{};
284 NEPixelWiseMultiplication _pixelwise_mul_input_cell{};
Michalis Spyrou173ba9b2020-06-23 17:25:43 +0100285 NEArithmeticAddition _add_forget_cell{};
Michele Di Giorgio47a89902020-03-09 19:32:33 +0000286 NEActivationLayer _cell_clip{};
287 NEGEMMLowpMatrixMultiplyCore _mm_input_to_output{};
288 NEGEMMLowpOutputStage _input_to_output_outstage{};
289 NEGEMMLowpMatrixMultiplyCore _mm_recurrent_to_output{};
290 NEGEMMLowpOutputStage _recurrent_to_output_outstage{};
Michalis Spyrou173ba9b2020-06-23 17:25:43 +0100291 NEArithmeticAddition _accumulate_input_recurrent_output{};
Michalis Spyrou6eb73452020-07-02 17:39:25 +0100292 NEPixelWiseMultiplication _pixelwise_mul_cell_to_output{};
Sang-Hoon Parkd5c020a2020-05-06 21:01:19 +0100293 NEGEMMLowpOutputStage _cell_to_output_outstage{};
Michalis Spyrou173ba9b2020-06-23 17:25:43 +0100294 NEArithmeticAddition _accumulate_cell_to_output{};
Michele Di Giorgio47a89902020-03-09 19:32:33 +0000295 NEActivationLayer _output_gate_sigmoid{};
296 NEActivationLayer _hidden_tanh{};
Michalis Spyrou6eb73452020-07-02 17:39:25 +0100297 NEPixelWiseMultiplication _pixelwise_mul_hidden{};
Michele Di Giorgio47a89902020-03-09 19:32:33 +0000298 NEGEMMLowpOutputStage _hidden_outstage{};
299 NEGEMMLowpMatrixMultiplyCore _mm_projection{};
300 NEGEMMLowpOutputStage _projection_outstage{};
Michalis Spyrou173ba9b2020-06-23 17:25:43 +0100301 NEArithmeticAddition _accumulate_projection{};
Michele Di Giorgio47a89902020-03-09 19:32:33 +0000302 NEActivationLayer _projection_clip{};
Sang-Hoon Parkd5c020a2020-05-06 21:01:19 +0100303
304 TensorCopyKernel _projection_bias_copy{};
305 TensorCopyKernel _projection_output_to_accumulate_copy{};
306 TensorCopyKernel _projection_accumulate_to_output_copy{};
307 TensorCopyKernel _hidden_to_output_copy{};
308
Sang-Hoon Parkcf0f6bc2020-04-23 10:21:11 +0100309 std::array<NEQLSTMLayerNormalizationKernel, _layer_norm_count> _layer_norms{ {} };
Michele Di Giorgio47a89902020-03-09 19:32:33 +0000310
Michele Di Giorgiobeb2d452020-05-11 16:17:51 +0100311 NECopyKernel _copy_output{};
312
Michele Di Giorgio47a89902020-03-09 19:32:33 +0000313 // Tensor pointers
Michalis Spyrou173ba9b2020-06-23 17:25:43 +0100314 const ITensor *_input_to_input_weights
315 {
316 nullptr
317 };
Michele Di Giorgio47a89902020-03-09 19:32:33 +0000318 const ITensor *_recurrent_to_input_weights{ nullptr };
319 const ITensor *_projection_bias{ nullptr };
320 const ITensor *_input_to_forget_weights{ nullptr };
321 const ITensor *_input_to_cell_weights{ nullptr };
322 const ITensor *_input_to_output_weights{ nullptr };
323 const ITensor *_recurrent_to_forget_weights{ nullptr };
324 const ITensor *_recurrent_to_cell_weights{ nullptr };
325 const ITensor *_recurrent_to_output_weights{ nullptr };
326 const ITensor *_projection_weights{ nullptr };
Sang-Hoon Parkcf0f6bc2020-04-23 10:21:11 +0100327 std::array<const ITensor *, _layer_norm_count> _layer_norm_weights{ {} };
328 std::array<const ITensor *, _layer_norm_count> _layer_norm_bias{ {} };
Sang-Hoon Park9230e272020-04-18 00:46:34 +0100329
330 using LayerNormIndexType = typename std::underlying_type<LayerNormGate>::type;
331 inline LayerNormIndexType getGateIndex(LayerNormGate g)
332 {
333 return static_cast<LayerNormIndexType>(g);
334 }
335
336 inline void set_layer_norm_weight(const ITensor *t, LayerNormGate g)
337 {
338 _layer_norm_weights[getGateIndex(g)] = t;
339 }
340
341 inline void set_layer_norm_bias(const ITensor *t, LayerNormGate g)
342 {
343 _layer_norm_bias[getGateIndex(g)] = t;
344 }
345
346 inline const ITensor *get_layer_norm_weight(LayerNormGate g)
347 {
348 return _layer_norm_weights[getGateIndex(g)];
349 }
350
351 inline const ITensor *get_layer_norm_bias(LayerNormGate g)
352 {
353 return _layer_norm_bias[getGateIndex(g)];
354 }
355
356 inline NEQLSTMLayerNormalizationKernel &get_layer_norm(LayerNormGate g)
357 {
358 return _layer_norms[getGateIndex(g)];
359 }
360
361 inline void configure_layer_norm(LayerNormGate g, const ITensor *in)
362 {
363 ARM_COMPUTE_ERROR_ON(!_has_layer_norm);
364
365 Tensor &out = get_layer_norm_output(g);
366 _memory_group.manage(&out);
367 out.allocator()->init(*(in->info()));
368
369 get_layer_norm(g).configure(in, &out, get_layer_norm_weight(g), get_layer_norm_bias(g));
370 }
371
372 inline static Status validate_layer_norm(const ITensorInfo &in, const ITensorInfo &weight, const ITensorInfo &bias)
373 {
374 // Output quantization scale will be different, but ignored here
375 // since it will be configured at configure() stage.
Michalis Spyrou173ba9b2020-06-23 17:25:43 +0100376 const TensorInfo out
377 {
378 in
379 };
Sang-Hoon Park9230e272020-04-18 00:46:34 +0100380 return NEQLSTMLayerNormalizationKernel::validate(&in, &out, &weight, &bias);
381 }
Michele Di Giorgio47a89902020-03-09 19:32:33 +0000382
383 // Temporary tensors
384 Tensor _input_to_forget_weights_transposed{ nullptr };
385 Tensor _input_to_cell_weights_transposed{ nullptr };
386 Tensor _input_to_output_weights_transposed{ nullptr };
387 Tensor _input_to_input_weights_transposed{ nullptr };
388 Tensor _recurrent_to_forget_weights_transposed{ nullptr };
389 Tensor _recurrent_to_cell_weights_transposed{ nullptr };
390 Tensor _recurrent_to_output_weights_transposed{ nullptr };
391 Tensor _recurrent_to_input_weights_transposed{ nullptr };
392 Tensor _projection_weights_transposed{ nullptr };
393 Tensor _input_to_input_eff_bias{ nullptr };
394 Tensor _recurrent_to_input_eff_bias{ nullptr };
395 Tensor _input_to_forget_eff_bias{ nullptr };
396 Tensor _recurrent_to_forget_eff_bias{ nullptr };
397 Tensor _input_to_cell_eff_bias{ nullptr };
398 Tensor _recurrent_to_cell_eff_bias{ nullptr };
399 Tensor _input_to_output_eff_bias{ nullptr };
400 Tensor _recurrent_to_output_eff_bias{ nullptr };
401 Tensor _projection_reduction_res{ nullptr };
402 Tensor _projection_eff_bias{ nullptr };
403 Tensor _mm_input_to_forget_res{ nullptr };
404 Tensor _mm_recurrent_to_forget_res{ nullptr };
405 Tensor _mul_cell_to_forget_res{ nullptr };
406 Tensor _input_to_forget_outstage_res{ nullptr };
407 Tensor _cell_to_forget_outstage_res{ nullptr };
408 Tensor _recurrent_to_forget_outstage_res{ nullptr };
409 Tensor _forget_gate{ nullptr };
410 Tensor _mm_input_to_cell_res{ nullptr };
411 Tensor _input_to_cell_outstage_res{ nullptr };
412 Tensor _mm_recurrent_to_cell_res{ nullptr };
413 Tensor _recurrent_to_cell_outstage_res{ nullptr };
414 Tensor _cell_gate{ nullptr };
415 Tensor _mul_input_cell_res{ nullptr };
416 Tensor _mm_input_to_input_res{ nullptr };
417 Tensor _input_to_input_outstage_res{ nullptr };
418 Tensor _mm_recurrent_to_input_res{ nullptr };
419 Tensor _mul_cell_to_input_res{ nullptr };
420 Tensor _cell_to_input_outstage_res{ nullptr };
421 Tensor _recurrent_to_input_outstage_res{ nullptr };
422 Tensor _input_gate{ nullptr };
423 Tensor _mm_input_to_output_res{ nullptr };
424 Tensor _input_to_output_outstage_res{ nullptr };
425 Tensor _mm_recurrent_to_output_res{ nullptr };
426 Tensor _mul_cell_to_output_res{ nullptr };
Sang-Hoon Parkd5c020a2020-05-06 21:01:19 +0100427 Tensor _cell_to_output_outstage_res{ nullptr };
Michele Di Giorgio47a89902020-03-09 19:32:33 +0000428 Tensor _recurrent_to_output_outstage_res{ nullptr };
429 Tensor _output_gate{ nullptr };
430 Tensor _hidden_mul_res{ nullptr };
Sang-Hoon Parkd5c020a2020-05-06 21:01:19 +0100431 Tensor _hidden_gate{ nullptr };
Michele Di Giorgio47a89902020-03-09 19:32:33 +0000432 Tensor _mm_projection_res{ nullptr };
433 Tensor _projection_outstage_res{ nullptr };
Sang-Hoon Parkd5c020a2020-05-06 21:01:19 +0100434 Tensor _projection_out_res{ nullptr };
Sang-Hoon Parkd5c020a2020-05-06 21:01:19 +0100435 Tensor _projection_accumulate_res{ nullptr };
Michele Di Giorgio47a89902020-03-09 19:32:33 +0000436 Tensor _ones{ nullptr };
Sang-Hoon Parkcf0f6bc2020-04-23 10:21:11 +0100437 std::array<Tensor, _layer_norm_count> _layer_norm_output{ {} };
Sang-Hoon Park9230e272020-04-18 00:46:34 +0100438
439 inline Tensor &get_layer_norm_output(LayerNormGate g)
440 {
441 return _layer_norm_output[getGateIndex(g)];
442 }
Michele Di Giorgio47a89902020-03-09 19:32:33 +0000443
444 bool _is_prepared{ false };
445 bool _has_cifg{ false };
446 bool _has_cell_clipping{ false };
447 bool _has_projection{ false };
448 bool _has_projection_clipping{ false };
449 bool _has_peephole{ false };
Sang-Hoon Park9230e272020-04-18 00:46:34 +0100450 bool _has_layer_norm{ false };
Sang-Hoon Parkd5c020a2020-05-06 21:01:19 +0100451 bool _projection_tensor_copy_required{ false };
Michele Di Giorgio47a89902020-03-09 19:32:33 +0000452};
453} // namespace arm_compute
454#endif /* ARM_COMPUTE_NEQLSTMLAYER_H */