blob: 53f337bc615a58f428d15d95602d858117fbda2e [file] [log] [blame]
Michele Di Giorgio1c1b3aa2020-04-02 17:35:42 +01001/*
Michele Di Giorgiod9eaf612020-07-08 11:12:57 +01002 * Copyright (c) 2020 Arm Limited.
Michele Di Giorgio1c1b3aa2020-04-02 17:35:42 +01003 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24#ifndef ARM_COMPUTE_CLQLSTMLAYER_H
25#define ARM_COMPUTE_CLQLSTMLAYER_H
26
Michele Di Giorgiobeb2d452020-05-11 16:17:51 +010027#include "arm_compute/core/CL/kernels/CLCopyKernel.h"
Michele Di Giorgio1c1b3aa2020-04-02 17:35:42 +010028#include "arm_compute/core/CL/kernels/CLGEMMLowpReductionKernel.h"
Sheri Zhang3a353982020-04-21 13:10:24 +010029#include "arm_compute/core/CL/kernels/CLQLSTMLayerNormalizationKernel.h"
Michele Di Giorgio1c1b3aa2020-04-02 17:35:42 +010030#include "arm_compute/core/Types.h"
31#include "arm_compute/runtime/CL/functions/CLActivationLayer.h"
Michalis Spyrouad7515d2020-07-24 00:02:23 +010032#include "arm_compute/runtime/CL/functions/CLElementwiseOperations.h"
Michele Di Giorgio1c1b3aa2020-04-02 17:35:42 +010033#include "arm_compute/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.h"
34#include "arm_compute/runtime/CL/functions/CLGEMMLowpOutputStage.h"
Michalis Spyrou1009e872020-07-27 12:48:34 +010035#include "arm_compute/runtime/CL/functions/CLPixelWiseMultiplication.h"
Michele Di Giorgio1c1b3aa2020-04-02 17:35:42 +010036#include "arm_compute/runtime/CL/functions/CLTranspose.h"
37
38#include "arm_compute/runtime/common/LSTMParams.h"
39
40namespace arm_compute
41{
42// Forward declarations
43class ICLTensor;
44
45/** Basic function to run @ref CLQLSTMLayer
46 *
47 * This function calls the following CL functions/kernels:
48 *
49 * -# @ref CLActivationLayer Activation functions (tanh and logistic)
Michele Di Giorgiobeb2d452020-05-11 16:17:51 +010050 * -# @ref CLCopyKernel Copy kernel for copying output_state_out to output
Michalis Spyrouad7515d2020-07-24 00:02:23 +010051 * -# @ref CLArithmeticAddition Elementwise addition and subtraction
Michele Di Giorgio1c1b3aa2020-04-02 17:35:42 +010052 * -# @ref CLGEMMLowpMatrixMultiplyCore Quantized matrix multiplication core. Accumulators are 32-bit integers
53 * -# @ref CLGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPoint Convert 32-bit integers into QSYMM16
54 * -# @ref CLGEMMLowpMatrixAReductionKernel For precomputing effective biases to use
Michalis Spyrou1009e872020-07-27 12:48:34 +010055 * -# @ref CLPixelWiseMultiplication Elementwise multiplication
Michele Di Giorgio1c1b3aa2020-04-02 17:35:42 +010056 * -# @ref CLTranspose Transpose function for reshaping the weights
57 * */
58class CLQLSTMLayer : public IFunction
59{
60public:
61 /** Default constructor */
62 CLQLSTMLayer(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
63 /** Prevent instances of this class from being copied (As this class contains pointers) */
64 CLQLSTMLayer(const CLQLSTMLayer &) = delete;
65 /** Default move constructor */
66 CLQLSTMLayer(CLQLSTMLayer &&) = default;
67 /** Prevent instances of this class from being copied (As this class contains pointers) */
68 CLQLSTMLayer &operator=(const CLQLSTMLayer &) = delete;
69 /** Default move assignment operator */
70 CLQLSTMLayer &operator=(CLQLSTMLayer &&) = default;
71 /** Initialize function's tensors.
72 *
73 * @param[in] input Source tensor. Input is a 2D tensor with dimensions [input_size, batch_size]. Data types supported: QASYMM8_SIGNED.
74 * @param[in] input_to_forget_weights 2D weights tensor with dimensions [input_size, num_units]. Data type supported: QSYMM8.
75 * @param[in] input_to_cell_weights 2D weights tensor with dimensions [input_size, num_units]. Data type supported: QSYMM8.
76 * @param[in] input_to_output_weights 2D weights tensor with dimensions [input_size, num_units]. Data type supported: QSYMM8.
77 * @param[in] recurrent_to_forget_weights 2D weights tensor with dimensions [output_size, num_units]. Data type supported: QSYMM8.
78 * @param[in] recurrent_to_cell_weights 2D weights tensor with dimensions [output_size, num_units]. Data type supported: QSYMM8.
79 * @param[in] recurrent_to_output_weights 2D weights tensor with dimensions [output_size, num_units]. Data type supported: QSYMM8.
80 * @param[in] forget_gate_bias 1D weights tensor with dimensions [num_units]. Data type supported: S32.
81 * @param[in] cell_bias 1D weights tensor with dimensions [num_units]. Data type supported: S32.
82 * @param[in] output_gate_bias 1D weights tensor with dimensions [num_units]. Data type supported: S32.
Michele Di Giorgiobeb2d452020-05-11 16:17:51 +010083 * @param[in] cell_state_in 2D tensor with dimensions [num_units, batch_size]. Data type supported: QSYMM16.
84 * @param[in] output_state_in 2D tensor with dimensions [output_size, batch_size]. Data type supported: Same as @p input.
85 * @param[out] cell_state_out Destination tensor. Output is a 2D tensor with dimensions [num_units, batch_size]. Data type supported: QSYMM16.
86 * @param[out] output_state_out Destination tensor. Output is a 2D tensor with dimensions [output_size, batch_size].Data types supported: Same as @p input.
87 * @param[out] output Destination tensor. Output is a 2D tensor with dimensions [output_size, batch_size].Data types supported: Same as @p input.
Michele Di Giorgio1c1b3aa2020-04-02 17:35:42 +010088 * @param[in] lstm_params Weights tensors used in peephole, CIFG and layer normalization optimizations:
89 * input_intermediate_scale Scale of the intermediate result of matmul, i.e. input to layer normalization, at input gate.
90 * forget_intermediate_scale Scale of the intermediate result of matmul, i.e. input to layer normalization, at forget gate.
91 * cell_intermediate_scale Scale of the intermediate result of matmul, i.e. input to layer normalization, at cell gate.
92 * output_intermediate_scale Scale of the intermediate result of matmul, i.e. input to layer normalization, at output gate.
93 * hidden_state_zero The zero point of the hidden state.
94 * hidden_state_scale The scale of the hidden state.
95 * input_to_input_weights (Optional) 2D weights tensor with dimensions [input_size, num_units]. Data type supported: QSYMM8.
96 * recurrent_to_input_weights (Optional) 2D weights tensor with dimensions [output_size, num_units]. Data type supported: QSYMM8.
97 * cell_to_input_weights (Optional) 1D weights tensor with dimensions [num_units]. Can be nullptr. Data type supported: QSYMM16.
98 * cell_to_forget_weights (Optional) 1D weights tensor with dimensions [num_units]. Data type supported: QSYMM16.
99 * cell_to_output_weights (Optional) 1D weights tensor with dimensions [num_units]. Data type supported: QSYMM16.
100 * input_gate_bias (Optional) 1D weights tensor with dimensions [num_units]. Data type supported: S32.
101 * projection_weights (Optional) 2D weights tensor with dimensions [output_size, num_units]. Data type supported: QSYMM8.
102 * projection_bias (Optional) 1D weights tensor with dimensions [output_size]. S32.
103 * input_layer_norm_weights (Optional) 1D weights tensor with dimensions [num_units]. Data type supported: QSYMM16.
104 * forget_layer_norm_weights (Optional) 1D weights tensor with dimensions [num_units]. Data type supported: QSYMM16.
105 * cell_layer_norm_weights (Optional) 1D weights tensor with dimensions [num_units]. Data type supported: QSYMM16.
106 * output_layer_norm_weights (Optional) 1D weights tensor with dimensions [num_units]. Data type supported: QSYMM16.
107 * cell_threshold (Optional) The clipping threshold for the cell state, such that values are bound within [-cell_clip, cell_clip].
108 * If set to 0.0 then clipping is disabled.
109 * projection_threshold (Optional) The clipping threshold for the output from the projection layer, such that values are bound within
110 * [-proj_clip, proj_clip]. If set to 0.0 then clipping is disabled.
111 */
112 void configure(const ICLTensor *input,
113 const ICLTensor *input_to_forget_weights, const ICLTensor *input_to_cell_weights, const ICLTensor *input_to_output_weights,
114 const ICLTensor *recurrent_to_forget_weights, const ICLTensor *recurrent_to_cell_weights, const ICLTensor *recurrent_to_output_weights,
115 const ICLTensor *forget_gate_bias, const ICLTensor *cell_bias, const ICLTensor *output_gate_bias,
Michalis Spyrou1009e872020-07-27 12:48:34 +0100116 ICLTensor *cell_state_in, const ICLTensor *output_state_in,
Michele Di Giorgiobeb2d452020-05-11 16:17:51 +0100117 ICLTensor *cell_state_out, ICLTensor *output_state_out, ICLTensor *output,
Michele Di Giorgio1c1b3aa2020-04-02 17:35:42 +0100118 const LSTMParams<ICLTensor> &lstm_params);
119
Manuel Bottini2b84be52020-04-08 10:15:51 +0100120 /** Initialize function's tensors.
121 *
122 * @param[in] compile_context The compile context to be used.
123 * @param[in] input Source tensor. Input is a 2D tensor with dimensions [input_size, batch_size]. Data types supported: QASYMM8_SIGNED.
124 * @param[in] input_to_forget_weights 2D weights tensor with dimensions [input_size, num_units]. Data type supported: QSYMM8.
125 * @param[in] input_to_cell_weights 2D weights tensor with dimensions [input_size, num_units]. Data type supported: QSYMM8.
126 * @param[in] input_to_output_weights 2D weights tensor with dimensions [input_size, num_units]. Data type supported: QSYMM8.
127 * @param[in] recurrent_to_forget_weights 2D weights tensor with dimensions [output_size, num_units]. Data type supported: QSYMM8.
128 * @param[in] recurrent_to_cell_weights 2D weights tensor with dimensions [output_size, num_units]. Data type supported: QSYMM8.
129 * @param[in] recurrent_to_output_weights 2D weights tensor with dimensions [output_size, num_units]. Data type supported: QSYMM8.
130 * @param[in] forget_gate_bias 1D weights tensor with dimensions [num_units]. Data type supported: S32.
131 * @param[in] cell_bias 1D weights tensor with dimensions [num_units]. Data type supported: S32.
132 * @param[in] output_gate_bias 1D weights tensor with dimensions [num_units]. Data type supported: S32.
Michele Di Giorgiobeb2d452020-05-11 16:17:51 +0100133 * @param[in] cell_state_in 2D tensor with dimensions [num_units, batch_size]. Data type supported: QSYMM16.
134 * @param[in] output_state_in 2D tensor with dimensions [output_size, batch_size]. Data type supported: Same as @p input.
135 * @param[out] cell_state_out Destination tensor. Output is a 2D tensor with dimensions [num_units, batch_size]. Data type supported: QSYMM16.
136 * @param[out] output_state_out Destination tensor. Output is a 2D tensor with dimensions [output_size, batch_size].Data types supported: Same as @p input.
137 * @param[out] output Destination tensor. Output is a 2D tensor with dimensions [output_size, batch_size].Data types supported: Same as @p input.
Manuel Bottini2b84be52020-04-08 10:15:51 +0100138 * @param[in] lstm_params Weights tensors used in peephole, CIFG and layer normalization optimizations:
139 * input_intermediate_scale Scale of the intermediate result of matmul, i.e. input to layer normalization, at input gate.
140 * forget_intermediate_scale Scale of the intermediate result of matmul, i.e. input to layer normalization, at forget gate.
141 * cell_intermediate_scale Scale of the intermediate result of matmul, i.e. input to layer normalization, at cell gate.
142 * output_intermediate_scale Scale of the intermediate result of matmul, i.e. input to layer normalization, at output gate.
143 * hidden_state_zero The zero point of the hidden state.
144 * hidden_state_scale The scale of the hidden state.
145 * input_to_input_weights (Optional) 2D weights tensor with dimensions [input_size, num_units]. Data type supported: QSYMM8.
146 * recurrent_to_input_weights (Optional) 2D weights tensor with dimensions [output_size, num_units]. Data type supported: QSYMM8.
147 * cell_to_input_weights (Optional) 1D weights tensor with dimensions [num_units]. Can be nullptr. Data type supported: QSYMM16.
148 * cell_to_forget_weights (Optional) 1D weights tensor with dimensions [num_units]. Data type supported: QSYMM16.
149 * cell_to_output_weights (Optional) 1D weights tensor with dimensions [num_units]. Data type supported: QSYMM16.
150 * input_gate_bias (Optional) 1D weights tensor with dimensions [num_units]. Data type supported: S32.
151 * projection_weights (Optional) 2D weights tensor with dimensions [output_size, num_units]. Data type supported: QSYMM8.
152 * projection_bias (Optional) 1D weights tensor with dimensions [output_size]. S32.
153 * input_layer_norm_weights (Optional) 1D weights tensor with dimensions [num_units]. Data type supported: QSYMM16.
154 * forget_layer_norm_weights (Optional) 1D weights tensor with dimensions [num_units]. Data type supported: QSYMM16.
155 * cell_layer_norm_weights (Optional) 1D weights tensor with dimensions [num_units]. Data type supported: QSYMM16.
156 * output_layer_norm_weights (Optional) 1D weights tensor with dimensions [num_units]. Data type supported: QSYMM16.
157 * cell_threshold (Optional) The clipping threshold for the cell state, such that values are bound within [-cell_clip, cell_clip].
158 * If set to 0.0 then clipping is disabled.
159 * projection_threshold (Optional) The clipping threshold for the output from the projection layer, such that values are bound within
160 * [-proj_clip, proj_clip]. If set to 0.0 then clipping is disabled.
161 */
162 void configure(const CLCompileContext &compile_context, const ICLTensor *input,
163 const ICLTensor *input_to_forget_weights, const ICLTensor *input_to_cell_weights, const ICLTensor *input_to_output_weights,
164 const ICLTensor *recurrent_to_forget_weights, const ICLTensor *recurrent_to_cell_weights, const ICLTensor *recurrent_to_output_weights,
165 const ICLTensor *forget_gate_bias, const ICLTensor *cell_bias, const ICLTensor *output_gate_bias,
Michalis Spyrou1009e872020-07-27 12:48:34 +0100166 ICLTensor *cell_state_in, const ICLTensor *output_state_in,
Michele Di Giorgiobeb2d452020-05-11 16:17:51 +0100167 ICLTensor *cell_state_out, ICLTensor *output_state_out, ICLTensor *output,
Manuel Bottini2b84be52020-04-08 10:15:51 +0100168 const LSTMParams<ICLTensor> &lstm_params);
169
Michele Di Giorgio1c1b3aa2020-04-02 17:35:42 +0100170 /** Static function to check if given info will lead to a valid configuration of @ref CLQLSTMLayer
171 *
Michele Di Giorgiobeb2d452020-05-11 16:17:51 +0100172 * @param[in] input Source tensor info. Input is a 2D tensor info with dimensions [input_size, batch_size]. Data types supported: QASYMM8_SIGNED.
173 * @param[in] input_to_forget_weights 2D weights tensor info with dimensions [input_size, num_units]. Data type supported: QSYMM8.
174 * @param[in] input_to_cell_weights 2D weights tensor info with dimensions [input_size, num_units]. Data type supported: QSYMM8.
175 * @param[in] input_to_output_weights 2D weights tensor info with dimensions [input_size, num_units]. Data type supported: QSYMM8.
176 * @param[in] recurrent_to_forget_weights 2D weights tensor info with dimensions [output_size, num_units]. Data type supported: QSYMM8.
177 * @param[in] recurrent_to_cell_weights 2D weights tensor info with dimensions [output_size, num_units]. Data type supported: QSYMM8.
178 * @param[in] recurrent_to_output_weights 2D weights tensor info with dimensions [output_size, num_units]. Data type supported: QSYMM8.
179 * @param[in] forget_gate_bias 1D weights tensor info with dimensions [num_units]. Data type supported: S32.
180 * @param[in] cell_bias 1D weights tensor info with dimensions [num_units]. Data type supported: S32.
181 * @param[in] output_gate_bias 1D weights tensor info with dimensions [num_units]. Data type supported: S32.
182 * @param[in] cell_state_in 2D tensor info with dimensions [num_units, batch_size]. Data type supported: QSYMM16.
183 * @param[in] output_state_in 2D tensor info with dimensions [output_size, batch_size]. Data type supported: Same as @p input.
184 * @param[in] cell_state_out Destination tensor info. Output is a 2D tensor info with dimensions [num_units, batch_size]. Data type supported: QSYMM16.
185 * @param[in] output_state_out Destination tensor info. Output is a 2D tensor info with dimensions [output_size, batch_size].Data types supported: Same as @p input.
186 * @param[in] output Destination tensor info. Output is a 2D tensor info with dimensions [output_size, batch_size].Data types supported: Same as @p input.
187 * @param[in] lstm_params Weights tensors info used in peephole, CIFG and layer normalization optimizations:
188 * input_intermediate_scale Scale of the intermediate result of matmul, i.e. input to layer normalization, at input gate.
189 * forget_intermediate_scale Scale of the intermediate result of matmul, i.e. input to layer normalization, at forget gate.
190 * cell_intermediate_scale Scale of the intermediate result of matmul, i.e. input to layer normalization, at cell gate.
191 * output_intermediate_scale Scale of the intermediate result of matmul, i.e. input to layer normalization, at output gate.
192 * hidden_state_zero The zero point of the hidden state.
193 * hidden_state_scale The scale of the hidden state.
194 * input_to_input_weights (Optional) 2D weights tensor with dimensions [input_size, num_units]. Data type supported: QSYMM8.
195 * recurrent_to_input_weights (Optional) 2D weights tensor with dimensions [output_size, num_units]. Data type supported: QSYMM8.
196 * cell_to_input_weights (Optional) 1D weights tensor with dimensions [num_units]. Can be nullptr. Data type supported: QSYMM16.
197 * cell_to_forget_weights (Optional) 1D weights tensor with dimensions [num_units]. Data type supported: QSYMM16.
198 * cell_to_output_weights (Optional) 1D weights tensor with dimensions [num_units]. Data type supported: QSYMM16.
199 * input_gate_bias (Optional) 1D weights tensor with dimensions [num_units]. Data type supported: S32.
200 * projection_weights (Optional) 2D weights tensor with dimensions [output_size, num_units]. Data type supported: QSYMM8.
201 * projection_bias (Optional) 1D weights tensor with dimensions [output_size]. S32.
202 * input_layer_norm_weights (Optional) 1D weights tensor with dimensions [num_units]. Data type supported: QSYMM16.
203 * forget_layer_norm_weights (Optional) 1D weights tensor with dimensions [num_units]. Data type supported: QSYMM16.
204 * cell_layer_norm_weights (Optional) 1D weights tensor with dimensions [num_units]. Data type supported: QSYMM16.
205 * output_layer_norm_weights (Optional) 1D weights tensor with dimensions [num_units]. Data type supported: QSYMM16.
206 * cell_threshold (Optional) The clipping threshold for the cell state, such that values are bound within [-cell_clip, cell_clip].
207 * If set to 0.0 then clipping is disabled.
208 * projection_threshold (Optional) The clipping threshold for the output from the projection layer, such that values are bound within
209 * [-proj_clip, proj_clip]. If set to 0.0 then clipping is disabled.
Michele Di Giorgio1c1b3aa2020-04-02 17:35:42 +0100210 * @return a status
211 */
212 static Status validate(const ITensorInfo *input,
213 const ITensorInfo *input_to_forget_weights, const ITensorInfo *input_to_cell_weights, const ITensorInfo *input_to_output_weights,
214 const ITensorInfo *recurrent_to_forget_weights, const ITensorInfo *recurrent_to_cell_weights, const ITensorInfo *recurrent_to_output_weights,
215 const ITensorInfo *forget_gate_bias, const ITensorInfo *cell_bias, const ITensorInfo *output_gate_bias,
216 const ITensorInfo *cell_state_in, const ITensorInfo *output_state_in,
Michele Di Giorgiobeb2d452020-05-11 16:17:51 +0100217 const ITensorInfo *cell_state_out, const ITensorInfo *output_state_out, const ITensorInfo *output,
Michele Di Giorgio1c1b3aa2020-04-02 17:35:42 +0100218 const LSTMParams<ITensorInfo> &lstm_params);
219
220 // Inherited methods overridden:
221 void run() override;
222 void prepare() override;
223
224private:
Sheri Zhang3a353982020-04-21 13:10:24 +0100225 enum class LayerNormGate : uint8_t
226 {
227 Forget,
228 Cell,
229 Input,
230 Output,
231 Count
232 };
Sang-Hoon Parka7431ae2020-05-12 11:13:30 +0100233 static constexpr uint8_t _layer_norm_count = static_cast<uint8_t>(LayerNormGate::Count);
234 static constexpr uint32_t _out_state_output_size_dimension_idx = 0;
Sheri Zhang3a353982020-04-21 13:10:24 +0100235
Michele Di Giorgio1c1b3aa2020-04-02 17:35:42 +0100236 /** Internal method to configure matrix multiplication plus output stage of each gate.
237 *
Manuel Bottini2b84be52020-04-08 10:15:51 +0100238 * @param[in] compile_context The compile context to be used.
239 * @param[in] mm Matrix multiplication function to use.
240 * @param[in] outstage Output stage function to use.
241 * @param[in] gemmlowp_info GEMMLowp metadata to be used by the output stage.
242 * @param[in] mm_input Input tensor to matrix multiplication function.
243 * @param[in] mm_weights Weights tensor to matrix multiplication function.
244 * @param[in] bias Bias tensor to matrix multiplication function.
245 * @param[in] outstage_res Tensor to be used for storing the result of the output stage.
246 * @param[in] gemmlowp_scale Real multiplier to be used computing multiplier and shift for requantization.
247 * @param[in] mm_res_info Tensor info to be used to initialize matrix multiplication result tensor.
248 * @param[in] mm_res_info Tensor info to be used to initialize output stage result tensor.
Michele Di Giorgio1c1b3aa2020-04-02 17:35:42 +0100249 *
250 */
Manuel Bottini2b84be52020-04-08 10:15:51 +0100251 void configure_mm(const CLCompileContext &compile_context, CLGEMMLowpMatrixMultiplyCore &mm, CLGEMMLowpOutputStage &outstage, GEMMLowpOutputStageInfo &gemmlowp_info,
Michele Di Giorgio1c1b3aa2020-04-02 17:35:42 +0100252 const ICLTensor *mm_input, const ICLTensor *mm_weights, const ICLTensor *bias, CLTensor *mm_res,
253 CLTensor *outstage_res, float gemmlowp_scale,
254 const TensorInfo &mm_res_info, const TensorInfo &outstage_tensor_info);
255
256 MemoryGroup _memory_group{};
257
Sang-Hoon Parka7431ae2020-05-12 11:13:30 +0100258 /** A small internel kernel do the copy between two tensors */
259 class TensorCopyKernel
260 {
261 static constexpr uint32_t max_dimension_supported = 2;
262
263 ICLTensor *_src{ nullptr };
264 ICLTensor *_dst{ nullptr };
265 size_t _row_size{};
266 Window _window{};
267
268 public:
269 /** Static function to check if given info will lead to a valid configuration of @ref CLQLSTMLayer::TensorCopyKernel
270 *
271 * @param[in] src Source tensor info.
272 * @param[in] dst Destination tensor info
273 *
274 * @return a status
275 */
276 static Status validate(const ITensorInfo &src, const ITensorInfo &dst);
277 /** Set the input and output tensors.
278 *
279 * @param[in] src Source tensor
280 * @param[out] dst Destination tensor
281 */
282 void configure(ICLTensor &src, ICLTensor &dst);
283 /** run the kernel */
284 void run();
285 };
286
Michele Di Giorgio1c1b3aa2020-04-02 17:35:42 +0100287 // Functions used
Michalis Spyrouad7515d2020-07-24 00:02:23 +0100288 CLTranspose _transpose_input_to_forget_weights{};
289 CLTranspose _transpose_input_to_cell_weights{};
290 CLTranspose _transpose_input_to_output_weights{};
291 CLTranspose _transpose_input_to_input_weights{};
292 CLTranspose _transpose_recurrent_to_forget_weights{};
293 CLTranspose _transpose_recurrent_to_cell_weights{};
294 CLTranspose _transpose_recurrent_to_output_weights{};
295 CLTranspose _transpose_recurrent_to_input_weights{};
296 CLTranspose _transpose_projection_weights{};
297 CLGEMMLowpMatrixAReductionKernel _input_to_input_reduction{};
298 CLGEMMLowpMatrixAReductionKernel _recurrent_to_input_reduction{};
299 CLGEMMLowpMatrixAReductionKernel _input_to_forget_reduction{};
300 CLGEMMLowpMatrixAReductionKernel _recurrent_to_forget_reduction{};
301 CLGEMMLowpMatrixAReductionKernel _input_to_cell_reduction{};
302 CLGEMMLowpMatrixAReductionKernel _recurrent_to_cell_reduction{};
303 CLGEMMLowpMatrixAReductionKernel _input_to_output_reduction{};
304 CLGEMMLowpMatrixAReductionKernel _recurrent_to_output_reduction{};
305 CLGEMMLowpMatrixAReductionKernel _projection_reduction{};
306 CLArithmeticAddition _projection_bias_add{};
307 CLGEMMLowpMatrixMultiplyCore _mm_input_to_forget{};
308 CLGEMMLowpMatrixMultiplyCore _mm_recurrent_to_forget{};
Michalis Spyrou1009e872020-07-27 12:48:34 +0100309 CLPixelWiseMultiplication _pixelwise_mul_cell_to_forget{};
Michalis Spyrouad7515d2020-07-24 00:02:23 +0100310 CLGEMMLowpOutputStage _input_to_forget_outstage{};
311 CLGEMMLowpOutputStage _recurrent_to_forget_outstage{};
312 CLGEMMLowpOutputStage _cell_to_forget_outstage{};
313 CLArithmeticAddition _accumulate_input_recurrent_forget{};
314 CLArithmeticAddition _accumulate_cell_forget{};
315 CLActivationLayer _forget_gate_sigmoid{};
316 CLGEMMLowpMatrixMultiplyCore _mm_input_to_cell{};
317 CLGEMMLowpOutputStage _input_to_cell_outstage{};
318 CLGEMMLowpMatrixMultiplyCore _mm_recurrent_to_cell{};
319 CLGEMMLowpOutputStage _recurrent_to_cell_outstage{};
320 CLArithmeticAddition _accumulate_input_recurrent_modulation{};
321 CLActivationLayer _cell_gate_tanh{};
322 CLArithmeticSubtraction _input_gate_sub{};
323 CLGEMMLowpMatrixMultiplyCore _mm_input_to_input{};
324 CLGEMMLowpOutputStage _input_to_input_outstage{};
325 CLGEMMLowpMatrixMultiplyCore _mm_recurrent_to_input{};
326 CLGEMMLowpOutputStage _recurrent_to_input_outstage{};
327 CLArithmeticAddition _accumulate_input_recurrent_input{};
Michalis Spyrou1009e872020-07-27 12:48:34 +0100328 CLPixelWiseMultiplication _pixelwise_mul_cell_to_input{};
Michalis Spyrouad7515d2020-07-24 00:02:23 +0100329 CLGEMMLowpOutputStage _cell_to_input_outstage{};
330 CLArithmeticAddition _accumulate_cell_input{};
331 CLActivationLayer _input_gate_sigmoid{};
Michalis Spyrou1009e872020-07-27 12:48:34 +0100332 CLPixelWiseMultiplication _pixelwise_mul_forget_cell{};
333 CLPixelWiseMultiplication _pixelwise_mul_input_cell{};
Michalis Spyrouad7515d2020-07-24 00:02:23 +0100334 CLArithmeticAddition _add_forget_cell{};
335 CLActivationLayer _cell_clip{};
336 CLGEMMLowpMatrixMultiplyCore _mm_input_to_output{};
337 CLGEMMLowpOutputStage _input_to_output_outstage{};
338 CLGEMMLowpMatrixMultiplyCore _mm_recurrent_to_output{};
339 CLGEMMLowpOutputStage _recurrent_to_output_outstage{};
340 CLArithmeticAddition _accumulate_input_recurrent_output{};
Michalis Spyrou1009e872020-07-27 12:48:34 +0100341 CLPixelWiseMultiplication _pixelwise_mul_cell_to_output{};
Michalis Spyrouad7515d2020-07-24 00:02:23 +0100342 CLGEMMLowpOutputStage _cell_to_output_outstage{};
343 CLArithmeticAddition _accumulate_cell_to_output{};
344 CLActivationLayer _output_gate_sigmoid{};
345 CLActivationLayer _hidden_tanh{};
Michalis Spyrou1009e872020-07-27 12:48:34 +0100346 CLPixelWiseMultiplication _pixelwise_mul_hidden{};
Michalis Spyrouad7515d2020-07-24 00:02:23 +0100347 CLGEMMLowpOutputStage _hidden_outstage{};
348 CLGEMMLowpMatrixMultiplyCore _mm_projection{};
349 CLGEMMLowpOutputStage _projection_outstage{};
350 CLArithmeticAddition _accumulate_projection{};
351 CLActivationLayer _projection_clip{};
Sheri Zhang3a353982020-04-21 13:10:24 +0100352 std::array<CLQLSTMLayerNormalizationKernel, _layer_norm_count> _layer_norms{ {} };
Michele Di Giorgiobeb2d452020-05-11 16:17:51 +0100353 CLCopyKernel _copy_output{};
Michele Di Giorgio1c1b3aa2020-04-02 17:35:42 +0100354
Sang-Hoon Parka7431ae2020-05-12 11:13:30 +0100355 TensorCopyKernel _projection_bias_copy{};
356 TensorCopyKernel _projection_output_to_accumulate_copy{};
357 TensorCopyKernel _projection_accumulate_to_output_copy{};
358 TensorCopyKernel _hidden_to_output_copy{};
359
Michele Di Giorgio1c1b3aa2020-04-02 17:35:42 +0100360 // Tensor pointers
Michalis Spyrouad7515d2020-07-24 00:02:23 +0100361 const ICLTensor *_input_to_input_weights
362 {
363 nullptr
364 };
Michele Di Giorgio1c1b3aa2020-04-02 17:35:42 +0100365 const ICLTensor *_recurrent_to_input_weights{ nullptr };
366 const ICLTensor *_projection_bias{ nullptr };
367 const ICLTensor *_input_to_forget_weights{ nullptr };
368 const ICLTensor *_input_to_cell_weights{ nullptr };
369 const ICLTensor *_input_to_output_weights{ nullptr };
370 const ICLTensor *_recurrent_to_forget_weights{ nullptr };
371 const ICLTensor *_recurrent_to_cell_weights{ nullptr };
372 const ICLTensor *_recurrent_to_output_weights{ nullptr };
373 const ICLTensor *_projection_weights{ nullptr };
Sheri Zhang3a353982020-04-21 13:10:24 +0100374 std::array<const ICLTensor *, _layer_norm_count> _layer_norm_weights{ {} };
375 std::array<const ICLTensor *, _layer_norm_count> _layer_norm_bias{ {} };
376
377 using LayerNormIndexType = typename std::underlying_type<LayerNormGate>::type;
378 inline LayerNormIndexType getGateIndex(LayerNormGate g)
379 {
380 return static_cast<LayerNormIndexType>(g);
381 }
382
383 inline void set_layer_norm_weight(const ICLTensor *t, LayerNormGate g)
384 {
385 _layer_norm_weights[getGateIndex(g)] = t;
386 }
387
388 inline void set_layer_norm_bias(const ICLTensor *t, LayerNormGate g)
389 {
390 _layer_norm_bias[getGateIndex(g)] = t;
391 }
392
393 inline const ICLTensor *get_layer_norm_weight(LayerNormGate g)
394 {
395 return _layer_norm_weights[getGateIndex(g)];
396 }
397
398 inline const ICLTensor *get_layer_norm_bias(LayerNormGate g)
399 {
400 return _layer_norm_bias[getGateIndex(g)];
401 }
402
403 inline CLQLSTMLayerNormalizationKernel &get_layer_norm(LayerNormGate g)
404 {
405 return _layer_norms[getGateIndex(g)];
406 }
407
408 inline void configure_layer_norm(LayerNormGate g, const ICLTensor *in)
409 {
410 ARM_COMPUTE_ERROR_ON(!_has_layer_norm);
411
412 CLTensor *out = &get_layer_norm_output(g);
413 _memory_group.manage(out);
414 out->allocator()->init(*(in->info()));
415
416 get_layer_norm(g).configure(in, out, get_layer_norm_weight(g), get_layer_norm_bias(g));
417 }
418
419 inline static Status validate_layer_norm(const ITensorInfo &in, const ITensorInfo &weight, const ITensorInfo &bias)
420 {
421 // Output quantization scale will be different, but ignored here
422 // since it will be configured at configure() stage.
423 const TensorInfo out
424 {
425 in
426 };
427 return CLQLSTMLayerNormalizationKernel::validate(&in, &out, &weight, &bias);
428 }
Michele Di Giorgio1c1b3aa2020-04-02 17:35:42 +0100429
430 // Temporary tensors
431 CLTensor _input_to_forget_weights_transposed{ nullptr };
432 CLTensor _input_to_cell_weights_transposed{ nullptr };
433 CLTensor _input_to_output_weights_transposed{ nullptr };
434 CLTensor _input_to_input_weights_transposed{ nullptr };
435 CLTensor _recurrent_to_forget_weights_transposed{ nullptr };
436 CLTensor _recurrent_to_cell_weights_transposed{ nullptr };
437 CLTensor _recurrent_to_output_weights_transposed{ nullptr };
438 CLTensor _recurrent_to_input_weights_transposed{ nullptr };
439 CLTensor _projection_weights_transposed{ nullptr };
440 CLTensor _input_to_input_eff_bias{ nullptr };
441 CLTensor _recurrent_to_input_eff_bias{ nullptr };
442 CLTensor _input_to_forget_eff_bias{ nullptr };
443 CLTensor _recurrent_to_forget_eff_bias{ nullptr };
444 CLTensor _input_to_cell_eff_bias{ nullptr };
445 CLTensor _recurrent_to_cell_eff_bias{ nullptr };
446 CLTensor _input_to_output_eff_bias{ nullptr };
447 CLTensor _recurrent_to_output_eff_bias{ nullptr };
448 CLTensor _projection_reduction_res{ nullptr };
449 CLTensor _projection_eff_bias{ nullptr };
450 CLTensor _mm_input_to_forget_res{ nullptr };
451 CLTensor _mm_recurrent_to_forget_res{ nullptr };
452 CLTensor _mul_cell_to_forget_res{ nullptr };
453 CLTensor _input_to_forget_outstage_res{ nullptr };
454 CLTensor _cell_to_forget_outstage_res{ nullptr };
455 CLTensor _recurrent_to_forget_outstage_res{ nullptr };
456 CLTensor _forget_gate{ nullptr };
457 CLTensor _mm_input_to_cell_res{ nullptr };
458 CLTensor _input_to_cell_outstage_res{ nullptr };
459 CLTensor _mm_recurrent_to_cell_res{ nullptr };
460 CLTensor _recurrent_to_cell_outstage_res{ nullptr };
461 CLTensor _cell_gate{ nullptr };
462 CLTensor _mul_input_cell_res{ nullptr };
463 CLTensor _mm_input_to_input_res{ nullptr };
464 CLTensor _input_to_input_outstage_res{ nullptr };
465 CLTensor _mm_recurrent_to_input_res{ nullptr };
466 CLTensor _mul_cell_to_input_res{ nullptr };
467 CLTensor _cell_to_input_outstage_res{ nullptr };
468 CLTensor _recurrent_to_input_outstage_res{ nullptr };
469 CLTensor _input_gate{ nullptr };
470 CLTensor _mm_input_to_output_res{ nullptr };
471 CLTensor _input_to_output_outstage_res{ nullptr };
472 CLTensor _mm_recurrent_to_output_res{ nullptr };
473 CLTensor _mul_cell_to_output_res{ nullptr };
Sang-Hoon Parka7431ae2020-05-12 11:13:30 +0100474 CLTensor _cell_to_output_outstage_res{ nullptr };
Michele Di Giorgio1c1b3aa2020-04-02 17:35:42 +0100475 CLTensor _recurrent_to_output_outstage_res{ nullptr };
476 CLTensor _output_gate{ nullptr };
477 CLTensor _hidden_mul_res{ nullptr };
Sang-Hoon Parka7431ae2020-05-12 11:13:30 +0100478 CLTensor _hidden_gate{ nullptr };
Michele Di Giorgio1c1b3aa2020-04-02 17:35:42 +0100479 CLTensor _mm_projection_res{ nullptr };
480 CLTensor _projection_outstage_res{ nullptr };
Sang-Hoon Parka7431ae2020-05-12 11:13:30 +0100481 CLTensor _projection_out_res{ nullptr };
482 CLTensor _projection_accumulate_res{ nullptr };
Michele Di Giorgio1c1b3aa2020-04-02 17:35:42 +0100483 CLTensor _ones{ nullptr };
Sheri Zhang3a353982020-04-21 13:10:24 +0100484 std::array<CLTensor, _layer_norm_count> _layer_norm_output{ {} };
485
486 inline CLTensor &get_layer_norm_output(LayerNormGate g)
487 {
488 return _layer_norm_output[getGateIndex(g)];
489 }
Michele Di Giorgio1c1b3aa2020-04-02 17:35:42 +0100490
491 bool _is_prepared{ false };
492 bool _has_cifg{ false };
493 bool _has_cell_clipping{ false };
494 bool _has_projection{ false };
495 bool _has_projection_clipping{ false };
496 bool _has_peephole{ false };
Sheri Zhang3a353982020-04-21 13:10:24 +0100497 bool _has_layer_norm{ false };
Sang-Hoon Parka7431ae2020-05-12 11:13:30 +0100498 bool _projection_tensor_copy_required{ false };
Michele Di Giorgio1c1b3aa2020-04-02 17:35:42 +0100499};
500} // namespace arm_compute
501#endif /* ARM_COMPUTE_CLQLSTMLAYER_H */