blob: bcb89d997d78406a30b4f928e772a50d0d6e6328 [file] [log] [blame]
Michalis Spyrouba27e442019-05-28 10:04:57 +01001/*
Sheri Zhangac6499a2021-02-10 15:32:38 +00002 * Copyright (c) 2019-2021 Arm Limited.
Michalis Spyrouba27e442019-05-28 10:04:57 +01003 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
Michalis Spyrouf4643372019-11-29 16:17:13 +000024#ifndef ARM_COMPUTE_NELSTMLAYERQUANTIZED_H
25#define ARM_COMPUTE_NELSTMLAYERQUANTIZED_H
Michalis Spyrouba27e442019-05-28 10:04:57 +010026
27#include "arm_compute/core/Types.h"
28#include "arm_compute/runtime/NEON/functions/NEActivationLayer.h"
29#include "arm_compute/runtime/NEON/functions/NEArithmeticAddition.h"
30#include "arm_compute/runtime/NEON/functions/NEConcatenateLayer.h"
31#include "arm_compute/runtime/NEON/functions/NEDequantizationLayer.h"
32#include "arm_compute/runtime/NEON/functions/NEElementwiseOperations.h"
33#include "arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h"
34#include "arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.h"
35#include "arm_compute/runtime/NEON/functions/NEGEMMLowpOutputStage.h"
36#include "arm_compute/runtime/NEON/functions/NEPixelWiseMultiplication.h"
37#include "arm_compute/runtime/NEON/functions/NEQuantizationLayer.h"
38#include "arm_compute/runtime/NEON/functions/NESlice.h"
39#include "arm_compute/runtime/NEON/functions/NETranspose.h"
40
41#include "arm_compute/runtime/common/LSTMParams.h"
42
43namespace arm_compute
44{
45// Forward declarations
46class ITensor;
47
48/** Basic function to run @ref NELSTMLayerQuantized
49 *
Michele Di Giorgio33f41fa2021-03-09 14:09:08 +000050 * This function calls the following functions/kernels:
Michalis Spyrouba27e442019-05-28 10:04:57 +010051 *
52 * -# @ref NEGEMMLowpMatrixMultiplyCore Quantized matrix multiplication core. Accumulators are 32-bit integers
Manuel Bottiniae58bdf2021-06-17 17:18:45 +010053 * -# @ref NEGEMMLowpOutputStage Convert 32-bit integers into QSYMM16
Michalis Spyrouba27e442019-05-28 10:04:57 +010054 * -# @ref NETranspose Matrix transpose
55 * -# @ref NEConcatenateLayer Tensor concatenation
Manuel Bottini10c53f12019-07-17 16:11:53 +010056 * -# @ref NEActivationLayer Activation functions (tanh and logistic)
Michalis Spyrouba27e442019-05-28 10:04:57 +010057 * -# @ref NEArithmeticAddition Elementwise addition
58 * -# @ref NEPixelWiseMultiplication Elementwise multiplication
59 * -# @ref NESlice Tensor slicing
60 * -# @ref NEDequantizationLayer Dequantize into float
61 * -# @ref NEQuantizationLayer Quantize from float
62 * */
63class NELSTMLayerQuantized : public IFunction
64{
65public:
66 /** Default constructor */
67 NELSTMLayerQuantized(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
68 /** Prevent instances of this class from being copied (As this class contains pointers) */
69 NELSTMLayerQuantized(const NELSTMLayerQuantized &) = delete;
Michalis Spyrou770dfeb2020-11-04 18:55:34 +000070 /** Prevent instances of this class from being moved (As this class contains pointers) */
71 NELSTMLayerQuantized(NELSTMLayerQuantized &&) = delete;
Michalis Spyrouba27e442019-05-28 10:04:57 +010072 /** Prevent instances of this class from being copied (As this class contains pointers) */
73 NELSTMLayerQuantized &operator=(const NELSTMLayerQuantized &) = delete;
Michalis Spyrou770dfeb2020-11-04 18:55:34 +000074 /** Prevent instances of this class from being moved (As this class contains pointers) */
75 NELSTMLayerQuantized &operator=(NELSTMLayerQuantized &&) = delete;
Michalis Spyrouebcebf12020-10-21 00:04:14 +010076 /** Default destructor */
77 ~NELSTMLayerQuantized();
Michalis Spyrouba27e442019-05-28 10:04:57 +010078 /** Initialize function's tensors.
79 *
Teresa Charlin62687422021-04-28 10:58:49 +010080 * Valid data layouts:
81 * - All
82 *
83 * Valid data type configurations:
84 * |src0 - src8 |src9 - src12 |src13 |src14 |dst0 |dst1 |
85 * |:-----------|:------------|:-------|:------|:------|:------|
86 * |QASYMM8 |S32 |QSYMM16 |QASYMM8|QSYMM16|QASYMM8|
87 *
Michalis Spyrouba27e442019-05-28 10:04:57 +010088 * @param[in] input Source tensor. Input is a 2D tensor with dimensions [input_size, batch_size]. Data types supported: QASYMM8.
89 * @param[in] input_to_input_weights 2D weights tensor with dimensions [input_size, output_size]. Data type supported: Same as @p input.
90 * @param[in] input_to_forget_weights 2D weights tensor with dimensions [input_size, output_size]. Data type supported: Same as @p input.
91 * @param[in] input_to_cell_weights 2D weights tensor with dimensions [input_size, output_size]. Data type supported: Same as @p input.
92 * @param[in] input_to_output_weights 2D weights tensor with dimensions [input_size, output_size]. Data type supported: Same as @p input.
93 * @param[in] recurrent_to_input_weights 2D weights tensor with dimensions [output_size, output_size]. Data type supported: Same as @p input.
94 * @param[in] recurrent_to_forget_weights 2D weights tensor with dimensions [output_size, output_size]. Data type supported: Same as @p input.
95 * @param[in] recurrent_to_cell_weights 2D weights tensor with dimensions [output_size, output_size]. Data type supported: Same as @p input.
96 * @param[in] recurrent_to_output_weights 2D weights tensor with dimensions [output_size, output_size]. Data type supported: Same as @p input.
97 * @param[in] input_gate_bias 1D weights tensor with dimensions [output_size]. Data type supported: S32.
98 * @param[in] forget_gate_bias 1D weights tensor with dimensions [output_size]. Data type supported: S32.
99 * @param[in] cell_bias 1D weights tensor with dimensions [output_size]. Data type supported: S32.
100 * @param[in] output_gate_bias 1D weights tensor with dimensions [output_size]. Data type supported: S32.
101 * @param[in] cell_state_in 2D tensor with dimensions [output_size, batch_size]. Data type supported: QSYMM16.
102 * @param[in] output_state_in 2D tensor with dimensions [output_size, batch_size]. Data type supported: Same as @p input.
103 * @param[out] cell_state_out Destination tensor. Output is a 2D tensor with dimensions [output_size, batch_size]. Data type supported: QSYMM16.
104 * @param[out] output_state_out Destination tensor. Output is a 2D tensor with dimensions [output_size, batch_size].Data types supported: Same as @p input.
105 */
106 void configure(const ITensor *input,
107 const ITensor *input_to_input_weights, const ITensor *input_to_forget_weights, const ITensor *input_to_cell_weights, const ITensor *input_to_output_weights,
108 const ITensor *recurrent_to_input_weights, const ITensor *recurrent_to_forget_weights, const ITensor *recurrent_to_cell_weights, const ITensor *recurrent_to_output_weights,
109 const ITensor *input_gate_bias, const ITensor *forget_gate_bias, const ITensor *cell_bias, const ITensor *output_gate_bias,
110 ITensor *cell_state_in, const ITensor *output_state_in,
111 ITensor *cell_state_out, ITensor *output_state_out);
112
113 /** Static function to check if given info will lead to a valid configuration of @ref NELSTMLayer
114 *
115 * @param[in] input Source tensor info. Input is a 2D tensor info with dimensions [input_size, batch_size]. Data types supported: QASYMM8.
116 * @param[in] input_to_input_weights 2D weights tensor info with dimensions [input_size, output_size]. Data type supported: Same as @p input.
117 * @param[in] input_to_forget_weights 2D weights tensor info with dimensions [input_size, output_size]. Data type supported: Same as @p input.
118 * @param[in] input_to_cell_weights 2D weights tensor info with dimensions [input_size, output_size]. Data type supported: Same as @p input.
119 * @param[in] input_to_output_weights 2D weights tensor info with dimensions [input_size, output_size]. Data type supported: Same as @p input.
120 * @param[in] recurrent_to_input_weights 2D weights tensor info with dimensions [output_size, output_size]. Data type supported: Same as @p input.
121 * @param[in] recurrent_to_forget_weights 2D weights tensor info with dimensions [output_size, output_size]. Data type supported: Same as @p input.
122 * @param[in] recurrent_to_cell_weights 2D weights tensor info with dimensions [output_size, output_size]. Data type supported: Same as @p input.
123 * @param[in] recurrent_to_output_weights 2D weights tensor info with dimensions [output_size, output_size]. Data type supported: Same as @p input.
124 * @param[in] input_gate_bias 1D weights tensor info with dimensions [output_size]. Data type supported: S32.
125 * @param[in] forget_gate_bias 1D weights tensor info with dimensions [output_size]. Data type supported: S32.
126 * @param[in] cell_bias 1D weights tensor info with dimensions [output_size]. Data type supported: S32.
127 * @param[in] output_gate_bias 1D weights tensor info with dimensions [output_size]. Data type supported: S32.
128 * @param[in] cell_state_in 2D tensor info with dimensions [output_size, batch_size]. Data type supported: QSYMM16.
129 * @param[in] output_state_in 2D tensor info with dimensions [output_size, batch_size]. Data type supported: Same as @p input.
130 * @param[out] cell_state_out Destination tensor info. Output is a 2D tensor info with dimensions [output_size, batch_size]. Data type supported: QSYMM16.
131 * @param[out] output_state_out Destination tensor info. Output is a 2D tensor info with dimensions [output_size, batch_size].Data types supported: Same as @p input.
132 *
133 * @return a status
134 */
135 static Status validate(const ITensorInfo *input,
136 const ITensorInfo *input_to_input_weights, const ITensorInfo *input_to_forget_weights, const ITensorInfo *input_to_cell_weights, const ITensorInfo *input_to_output_weights,
137 const ITensorInfo *recurrent_to_input_weights, const ITensorInfo *recurrent_to_forget_weights, const ITensorInfo *recurrent_to_cell_weights, const ITensorInfo *recurrent_to_output_weights,
138 const ITensorInfo *input_gate_bias, const ITensorInfo *forget_gate_bias, const ITensorInfo *cell_bias, const ITensorInfo *output_gate_bias,
139 const ITensorInfo *cell_state_in, const ITensorInfo *output_state_in,
140 const ITensorInfo *cell_state_out, const ITensorInfo *output_state_out);
141
142 // Inherited methods overridden:
143 void run() override;
144 void prepare() override;
145
146private:
147 MemoryGroup _memory_group;
148
149 // Functions used
Manuel Bottiniae58bdf2021-06-17 17:18:45 +0100150 NEGEMMLowpMatrixMultiplyCore _gemmlowp;
151 NEGEMMLowpOutputStage _output_stage;
152 NETranspose _transpose_weights;
153 NEConcatenateLayer _concat_input_weights;
154 NEConcatenateLayer _concat_recurrent_weights;
155 NEConcatenateLayer _concat_weights;
156 NEConcatenateLayer _concat_inputs;
157 NEConcatenateLayer _concat_bias;
158 NEActivationLayer _sigmoid_forget_gate;
159 NEActivationLayer _sigmoid_input_gate;
160 NEActivationLayer _sigmoid_output_gate;
161 NEActivationLayer _tanh_modulation_gate;
162 NEActivationLayer _tanh_output_state;
163 NEArithmeticAddition _add1;
164 NEArithmeticAddition _add2;
165 NEPixelWiseMultiplication _mul1;
166 NEPixelWiseMultiplication _mul2;
167 NEPixelWiseMultiplication _mul3;
168 NESlice _slice_input_tensor;
169 NESlice _slice_forget_tensor;
170 NESlice _slice_cell_tensor;
171 NESlice _slice_output_tensor;
172 NEDequantizationLayer _dequantize;
173 NEQuantizationLayer _quantize;
Michalis Spyrouba27e442019-05-28 10:04:57 +0100174
175 // Tensor pointers
176 const ITensor *_input_to_input_weights;
177 const ITensor *_input_to_forget_weights;
178 const ITensor *_input_to_cell_weights;
179 const ITensor *_input_to_output_weights;
180 const ITensor *_recurrent_to_input_weights;
181 const ITensor *_recurrent_to_forget_weights;
182 const ITensor *_recurrent_to_cell_weights;
183 const ITensor *_recurrent_to_output_weights;
184 const ITensor *_input_gate_bias;
185 const ITensor *_forget_gate_bias;
186 const ITensor *_cell_bias;
187 const ITensor *_output_gate_bias;
188
189 // Temporary tensors
190 Tensor _recurrent_weights;
191 Tensor _input_weights;
192 Tensor _weights;
193 Tensor _input;
194 Tensor _weights_transposed;
195 Tensor _output_highp;
196 Tensor _output_lowp;
197 Tensor _bias;
198 Tensor _forget_gate_input;
199 Tensor _input_gate_input;
200 Tensor _output_gate_input;
201 Tensor _input_modulation_gate_input;
202 Tensor _forget_gate_output;
203 Tensor _input_gate_output;
204 Tensor _output_gate_output;
205 Tensor _input_modulation_gate_output;
206 Tensor _cell_state1;
207 Tensor _cell_state2;
208 Tensor _output_state_tmp;
209 Tensor _output_state_out_symm;
210 Tensor _output_state_out_f32;
211
212 bool _is_prepared;
213};
214} // namespace arm_compute
Michalis Spyrouf4643372019-11-29 16:17:13 +0000215#endif /* ARM_COMPUTE_NELSTMLAYERQUANTIZED_H */