blob: a354a4df7b39a25818095bc094026c57c9d1aa84 [file] [log] [blame]
Michalis Spyrouba27e442019-05-28 10:04:57 +01001/*
Michalis Spyrouebcebf12020-10-21 00:04:14 +01002 * Copyright (c) 2019-2020 Arm Limited.
Michalis Spyrouba27e442019-05-28 10:04:57 +01003 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
Michalis Spyrouf4643372019-11-29 16:17:13 +000024#ifndef ARM_COMPUTE_NELSTMLAYERQUANTIZED_H
25#define ARM_COMPUTE_NELSTMLAYERQUANTIZED_H
Michalis Spyrouba27e442019-05-28 10:04:57 +010026
27#include "arm_compute/core/Types.h"
28#include "arm_compute/runtime/NEON/functions/NEActivationLayer.h"
29#include "arm_compute/runtime/NEON/functions/NEArithmeticAddition.h"
30#include "arm_compute/runtime/NEON/functions/NEConcatenateLayer.h"
31#include "arm_compute/runtime/NEON/functions/NEDequantizationLayer.h"
32#include "arm_compute/runtime/NEON/functions/NEElementwiseOperations.h"
33#include "arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h"
34#include "arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.h"
35#include "arm_compute/runtime/NEON/functions/NEGEMMLowpOutputStage.h"
36#include "arm_compute/runtime/NEON/functions/NEPixelWiseMultiplication.h"
37#include "arm_compute/runtime/NEON/functions/NEQuantizationLayer.h"
38#include "arm_compute/runtime/NEON/functions/NESlice.h"
39#include "arm_compute/runtime/NEON/functions/NETranspose.h"
40
41#include "arm_compute/runtime/common/LSTMParams.h"
42
43namespace arm_compute
44{
45// Forward declarations
46class ITensor;
47
48/** Basic function to run @ref NELSTMLayerQuantized
49 *
50 * This function calls the following NEON functions/kernels:
51 *
52 * -# @ref NEGEMMLowpMatrixMultiplyCore Quantized matrix multiplication core. Accumulators are 32-bit integers
53 * -# @ref NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPoint Convert 32-bit integers into QSYMM16
54 * -# @ref NETranspose Matrix transpose
55 * -# @ref NEConcatenateLayer Tensor concatenation
Manuel Bottini10c53f12019-07-17 16:11:53 +010056 * -# @ref NEActivationLayer Activation functions (tanh and logistic)
Michalis Spyrouba27e442019-05-28 10:04:57 +010057 * -# @ref NEArithmeticAddition Elementwise addition
58 * -# @ref NEPixelWiseMultiplication Elementwise multiplication
59 * -# @ref NESlice Tensor slicing
60 * -# @ref NEDequantizationLayer Dequantize into float
61 * -# @ref NEQuantizationLayer Quantize from float
62 * */
63class NELSTMLayerQuantized : public IFunction
64{
65public:
66 /** Default constructor */
67 NELSTMLayerQuantized(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
68 /** Prevent instances of this class from being copied (As this class contains pointers) */
69 NELSTMLayerQuantized(const NELSTMLayerQuantized &) = delete;
Michalis Spyrou770dfeb2020-11-04 18:55:34 +000070 /** Prevent instances of this class from being moved (As this class contains pointers) */
71 NELSTMLayerQuantized(NELSTMLayerQuantized &&) = delete;
Michalis Spyrouba27e442019-05-28 10:04:57 +010072 /** Prevent instances of this class from being copied (As this class contains pointers) */
73 NELSTMLayerQuantized &operator=(const NELSTMLayerQuantized &) = delete;
Michalis Spyrou770dfeb2020-11-04 18:55:34 +000074 /** Prevent instances of this class from being moved (As this class contains pointers) */
75 NELSTMLayerQuantized &operator=(NELSTMLayerQuantized &&) = delete;
Michalis Spyrouebcebf12020-10-21 00:04:14 +010076 /** Default destructor */
77 ~NELSTMLayerQuantized();
Michalis Spyrouba27e442019-05-28 10:04:57 +010078 /** Initialize function's tensors.
79 *
80 * @param[in] input Source tensor. Input is a 2D tensor with dimensions [input_size, batch_size]. Data types supported: QASYMM8.
81 * @param[in] input_to_input_weights 2D weights tensor with dimensions [input_size, output_size]. Data type supported: Same as @p input.
82 * @param[in] input_to_forget_weights 2D weights tensor with dimensions [input_size, output_size]. Data type supported: Same as @p input.
83 * @param[in] input_to_cell_weights 2D weights tensor with dimensions [input_size, output_size]. Data type supported: Same as @p input.
84 * @param[in] input_to_output_weights 2D weights tensor with dimensions [input_size, output_size]. Data type supported: Same as @p input.
85 * @param[in] recurrent_to_input_weights 2D weights tensor with dimensions [output_size, output_size]. Data type supported: Same as @p input.
86 * @param[in] recurrent_to_forget_weights 2D weights tensor with dimensions [output_size, output_size]. Data type supported: Same as @p input.
87 * @param[in] recurrent_to_cell_weights 2D weights tensor with dimensions [output_size, output_size]. Data type supported: Same as @p input.
88 * @param[in] recurrent_to_output_weights 2D weights tensor with dimensions [output_size, output_size]. Data type supported: Same as @p input.
89 * @param[in] input_gate_bias 1D weights tensor with dimensions [output_size]. Data type supported: S32.
90 * @param[in] forget_gate_bias 1D weights tensor with dimensions [output_size]. Data type supported: S32.
91 * @param[in] cell_bias 1D weights tensor with dimensions [output_size]. Data type supported: S32.
92 * @param[in] output_gate_bias 1D weights tensor with dimensions [output_size]. Data type supported: S32.
93 * @param[in] cell_state_in 2D tensor with dimensions [output_size, batch_size]. Data type supported: QSYMM16.
94 * @param[in] output_state_in 2D tensor with dimensions [output_size, batch_size]. Data type supported: Same as @p input.
95 * @param[out] cell_state_out Destination tensor. Output is a 2D tensor with dimensions [output_size, batch_size]. Data type supported: QSYMM16.
96 * @param[out] output_state_out Destination tensor. Output is a 2D tensor with dimensions [output_size, batch_size].Data types supported: Same as @p input.
97 */
98 void configure(const ITensor *input,
99 const ITensor *input_to_input_weights, const ITensor *input_to_forget_weights, const ITensor *input_to_cell_weights, const ITensor *input_to_output_weights,
100 const ITensor *recurrent_to_input_weights, const ITensor *recurrent_to_forget_weights, const ITensor *recurrent_to_cell_weights, const ITensor *recurrent_to_output_weights,
101 const ITensor *input_gate_bias, const ITensor *forget_gate_bias, const ITensor *cell_bias, const ITensor *output_gate_bias,
102 ITensor *cell_state_in, const ITensor *output_state_in,
103 ITensor *cell_state_out, ITensor *output_state_out);
104
105 /** Static function to check if given info will lead to a valid configuration of @ref NELSTMLayer
106 *
107 * @param[in] input Source tensor info. Input is a 2D tensor info with dimensions [input_size, batch_size]. Data types supported: QASYMM8.
108 * @param[in] input_to_input_weights 2D weights tensor info with dimensions [input_size, output_size]. Data type supported: Same as @p input.
109 * @param[in] input_to_forget_weights 2D weights tensor info with dimensions [input_size, output_size]. Data type supported: Same as @p input.
110 * @param[in] input_to_cell_weights 2D weights tensor info with dimensions [input_size, output_size]. Data type supported: Same as @p input.
111 * @param[in] input_to_output_weights 2D weights tensor info with dimensions [input_size, output_size]. Data type supported: Same as @p input.
112 * @param[in] recurrent_to_input_weights 2D weights tensor info with dimensions [output_size, output_size]. Data type supported: Same as @p input.
113 * @param[in] recurrent_to_forget_weights 2D weights tensor info with dimensions [output_size, output_size]. Data type supported: Same as @p input.
114 * @param[in] recurrent_to_cell_weights 2D weights tensor info with dimensions [output_size, output_size]. Data type supported: Same as @p input.
115 * @param[in] recurrent_to_output_weights 2D weights tensor info with dimensions [output_size, output_size]. Data type supported: Same as @p input.
116 * @param[in] input_gate_bias 1D weights tensor info with dimensions [output_size]. Data type supported: S32.
117 * @param[in] forget_gate_bias 1D weights tensor info with dimensions [output_size]. Data type supported: S32.
118 * @param[in] cell_bias 1D weights tensor info with dimensions [output_size]. Data type supported: S32.
119 * @param[in] output_gate_bias 1D weights tensor info with dimensions [output_size]. Data type supported: S32.
120 * @param[in] cell_state_in 2D tensor info with dimensions [output_size, batch_size]. Data type supported: QSYMM16.
121 * @param[in] output_state_in 2D tensor info with dimensions [output_size, batch_size]. Data type supported: Same as @p input.
122 * @param[out] cell_state_out Destination tensor info. Output is a 2D tensor info with dimensions [output_size, batch_size]. Data type supported: QSYMM16.
123 * @param[out] output_state_out Destination tensor info. Output is a 2D tensor info with dimensions [output_size, batch_size].Data types supported: Same as @p input.
124 *
125 * @return a status
126 */
127 static Status validate(const ITensorInfo *input,
128 const ITensorInfo *input_to_input_weights, const ITensorInfo *input_to_forget_weights, const ITensorInfo *input_to_cell_weights, const ITensorInfo *input_to_output_weights,
129 const ITensorInfo *recurrent_to_input_weights, const ITensorInfo *recurrent_to_forget_weights, const ITensorInfo *recurrent_to_cell_weights, const ITensorInfo *recurrent_to_output_weights,
130 const ITensorInfo *input_gate_bias, const ITensorInfo *forget_gate_bias, const ITensorInfo *cell_bias, const ITensorInfo *output_gate_bias,
131 const ITensorInfo *cell_state_in, const ITensorInfo *output_state_in,
132 const ITensorInfo *cell_state_out, const ITensorInfo *output_state_out);
133
134 // Inherited methods overridden:
135 void run() override;
136 void prepare() override;
137
138private:
139 MemoryGroup _memory_group;
140
141 // Functions used
142 NEGEMMLowpMatrixMultiplyCore _gemmlowp;
143 NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPoint _output_stage;
144 NETranspose _transpose_weights;
145 NEConcatenateLayer _concat_input_weights;
146 NEConcatenateLayer _concat_recurrent_weights;
147 NEConcatenateLayer _concat_weights;
148 NEConcatenateLayer _concat_inputs;
149 NEConcatenateLayer _concat_bias;
150 NEActivationLayer _sigmoid_forget_gate;
151 NEActivationLayer _sigmoid_input_gate;
152 NEActivationLayer _sigmoid_output_gate;
153 NEActivationLayer _tanh_modulation_gate;
154 NEActivationLayer _tanh_output_state;
155 NEArithmeticAddition _add1;
156 NEArithmeticAddition _add2;
157 NEPixelWiseMultiplication _mul1;
158 NEPixelWiseMultiplication _mul2;
159 NEPixelWiseMultiplication _mul3;
160 NESlice _slice_input_tensor;
161 NESlice _slice_forget_tensor;
162 NESlice _slice_cell_tensor;
163 NESlice _slice_output_tensor;
164 NEDequantizationLayer _dequantize;
165 NEQuantizationLayer _quantize;
166
167 // Tensor pointers
168 const ITensor *_input_to_input_weights;
169 const ITensor *_input_to_forget_weights;
170 const ITensor *_input_to_cell_weights;
171 const ITensor *_input_to_output_weights;
172 const ITensor *_recurrent_to_input_weights;
173 const ITensor *_recurrent_to_forget_weights;
174 const ITensor *_recurrent_to_cell_weights;
175 const ITensor *_recurrent_to_output_weights;
176 const ITensor *_input_gate_bias;
177 const ITensor *_forget_gate_bias;
178 const ITensor *_cell_bias;
179 const ITensor *_output_gate_bias;
180
181 // Temporary tensors
182 Tensor _recurrent_weights;
183 Tensor _input_weights;
184 Tensor _weights;
185 Tensor _input;
186 Tensor _weights_transposed;
187 Tensor _output_highp;
188 Tensor _output_lowp;
189 Tensor _bias;
190 Tensor _forget_gate_input;
191 Tensor _input_gate_input;
192 Tensor _output_gate_input;
193 Tensor _input_modulation_gate_input;
194 Tensor _forget_gate_output;
195 Tensor _input_gate_output;
196 Tensor _output_gate_output;
197 Tensor _input_modulation_gate_output;
198 Tensor _cell_state1;
199 Tensor _cell_state2;
200 Tensor _output_state_tmp;
201 Tensor _output_state_out_symm;
202 Tensor _output_state_out_f32;
203
204 bool _is_prepared;
205};
206} // namespace arm_compute
Michalis Spyrouf4643372019-11-29 16:17:13 +0000207#endif /* ARM_COMPUTE_NELSTMLAYERQUANTIZED_H */