IVGCVSW-6806 Add Unidirectional Sequence Lstm support to Neon * Corrected TensorInfo order for IsUnidirectionalSequenceLstmSupported * outputStateOut TensorInfo is not optional. * cellStateOut TensorInfo is not optional. * TensorInfo Order matches other QLSTM/LSTM layers. * Added missing parameters to UnidirectionalSequenceLstmOperator for delegate. * Added quantized UnidirectionalSequenceLstm support to Neon !android-nn-driver:7457 Signed-off-by: Mike Kelly <mike.kelly@arm.com> Change-Id: I26dde1bb96793dd25eb9081ca5ae5f63752288c4

commit: 1299496996bc332f02218f926640a9255ed60310 [log] [tgz]
author: Mike Kelly <mike.kelly@arm.com> Thu Apr 21 11:57:09 2022 +0100
committer: mike.kelly <mike.kelly@arm.com> Thu May 05 08:29:20 2022 +0000
tree: 2d242e142bd8fe7387140bcf8cdf39cd13ffc9eb
parent: 8272a7bda2974c39b6c06e3eb3a000f2bdb749f7 [diff] [blame]
diff --git a/src/backends/neon/workloads/NeonUnidirectionalSequenceLstmFloatWorkload.cpp b/src/backends/neon/workloads/NeonUnidirectionalSequenceLstmFloatWorkload.cpp
index c911afb..8dba719 100644
--- a/src/backends/neon/workloads/NeonUnidirectionalSequenceLstmFloatWorkload.cpp
+++ b/src/backends/neon/workloads/NeonUnidirectionalSequenceLstmFloatWorkload.cpp

@@ -39,7 +39,7 @@
                                          GetGuid());
 
     const arm_compute::ITensor& input = static_cast<IAclTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
-    arm_compute::ITensor& output = static_cast<IAclTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
+    arm_compute::ITensor& output = static_cast<IAclTensorHandle*>(m_Data.m_Outputs[2])->GetTensor();
 
     TensorInfo inputInfo = info.m_InputTensorInfos[0];
     TensorInfo outputInfo = info.m_OutputTensorInfos[0];
@@ -49,7 +49,7 @@
 
     TensorShape inputLayerShape = static_cast<IAclTensorHandle*>(m_Data.m_Inputs[0])->GetShape();
     TensorShape cellStateLayerShape = static_cast<IAclTensorHandle*>(m_Data.m_Inputs[2])->GetShape();
-    TensorShape outputLayerShape = static_cast<IAclTensorHandle*>(m_Data.m_Outputs[0])->GetShape();
+    TensorShape outputLayerShape = static_cast<IAclTensorHandle*>(m_Data.m_Outputs[2])->GetShape();
 
     unsigned int maxTime = m_Data.m_Parameters.m_TimeMajor ? inputLayerShape[0] : inputLayerShape[1];
     unsigned int batchSize = m_Data.m_Parameters.m_TimeMajor ? inputLayerShape[1] : inputLayerShape[0];
@@ -288,7 +288,7 @@
         // LSTM input/output cannot be > 2 dimensions so need to resize its TensorInfo.
         if (maxTime == 1 && m_Data.m_Parameters.m_TimeMajor)
         {
-            TensorShape inputShape = GetTensorShape((&input)->info()->tensor_shape(), 1U);
+            TensorShape inputShape = GetTensorShape(input.info()->tensor_shape(), 1U);
             TensorShape outputShape = GetTensorShape((&output)->info()->tensor_shape(), 1U);
 
             TensorShape inputShapeShrink({inputShape[1], inputShape[2]});
@@ -297,10 +297,10 @@
             auto acl_input_shape_shrink = BuildArmComputeTensorShape(inputShapeShrink);
             auto acl_output_shape_shrink = BuildArmComputeTensorShape(outputShapeShrink);
 
-            (&input)->info()->set_tensor_shape(acl_input_shape_shrink);
+            input.info()->set_tensor_shape(acl_input_shape_shrink);
             inputLSTM = const_cast<arm_compute::ITensor*>(&input);
 
-            (&output)->info()->set_tensor_shape(acl_output_shape_shrink);
+            output.info()->set_tensor_shape(acl_output_shape_shrink);
             outputLSTM = &output;
         }
         // If there is only one LSTM batch major batch, we will not concat, only permute.
@@ -432,9 +432,9 @@
         unsigned int aclAxisConcat = CalcAclAxis(concatDescriptor.GetNumDimensions(), concatDescriptor.GetConcatAxis());
         if (!m_Data.m_Parameters.m_TimeMajor)
         {
-            TensorInfo concatOuputTensorInfo = outputInfo;
-            concatOuputTensorInfo.SetShape(timeMajorShapeOutput);
-            BuildArmComputeTensor(concat_out, concatOuputTensorInfo);
+            TensorInfo concatOutputTensorInfo = outputInfo;
+            concatOutputTensorInfo.SetShape(timeMajorShapeOutput);
+            BuildArmComputeTensor(concat_out, concatOutputTensorInfo);
             armcomputetensorutils::InitialiseArmComputeTensorEmpty(concat_out);
 
             m_Concat->configure(m_ConcatInputs, &concat_out, aclAxisConcat);
@@ -452,11 +452,11 @@
     {
         if (!m_Data.m_Parameters.m_TimeMajor)
         {
-            (&output)->info()->set_tensor_shape(BuildArmComputeTensorShape(shapeExpandBatchMajor));
+            output.info()->set_tensor_shape(BuildArmComputeTensorShape(shapeExpandBatchMajor));
         }
         else
         {
-            (&output)->info()->set_tensor_shape(BuildArmComputeTensorShape(shapeExpandTimeMajor));
+            output.info()->set_tensor_shape(BuildArmComputeTensorShape(shapeExpandTimeMajor));
         }
     }
 
@@ -510,14 +510,12 @@
 NeonUnidirectionalSequenceLstmFloatWorkloadValidate(const TensorInfo& input,
                                                     const TensorInfo& outputStateIn,
                                                     const TensorInfo& cellStateIn,
+                                                    const TensorInfo& outputStateOut,
+                                                    const TensorInfo& cellStateOut,
                                                     const TensorInfo& output,
-                                                    const Optional<TensorInfo>& hiddenStateOutput,
-                                                    const Optional<TensorInfo>& cellStateOutput,
                                                     const UnidirectionalSequenceLstmDescriptor& descriptor,
                                                     const LstmInputParamsInfo& paramsInfo)
 {
-    IgnoreUnused(hiddenStateOutput, cellStateOutput);
-
     TensorShape inputLayerShape = input.GetShape();
     TensorShape outputLayerShape = outputStateIn.GetShape();
 
@@ -612,8 +610,6 @@
     arm_compute::LSTMParams<arm_compute::ITensorInfo> lstm_params_info;
 
     const TensorInfo& scratchBuffer = TensorInfo(cellStateIn.GetShape(), input.GetDataType());
-    const TensorInfo& outputStateOut = TensorInfo(outputStateIn.GetShape(), input.GetDataType());
-    const TensorInfo& cellStateOut = TensorInfo(cellStateIn.GetShape(), input.GetDataType());
 
     // The inputs and outputs
     const arm_compute::TensorInfo aclOutputStateInInfo = BuildArmComputeTensorInfo(outputStateIn);
@@ -704,7 +700,7 @@
         aclOutputLayerNormWeightsInfo = BuildArmComputeTensorInfo(paramsInfo.GetOutputLayerNormWeights());
 
         lstm_params_info.set_layer_normalization_params(descriptor.m_CifgEnabled ? nullptr :
-                                                            &aclInputLayerNormWeightsInfo,
+                                                        &aclInputLayerNormWeightsInfo,
                                                         &aclForgetLayerNormWeightsInfo,
                                                         &aclCellLayerNormWeightsInfo,
                                                         &aclOutputLayerNormWeightsInfo);
@@ -803,9 +799,9 @@
     TensorShape shapeExpandTimeMajor({1, shape[0], shape[1]});
     TensorShape shapeExpandBatchMajor({shape[0], 1, shape[1]});
 
-    TensorInfo concatOuputTensorInfo = TensorInfo(output);
-    concatOuputTensorInfo.SetShape(timeMajorShapeOutput);
-    arm_compute::TensorInfo aclConcatOuputTensorInfo= BuildArmComputeTensorInfo(concatOuputTensorInfo);
+    TensorInfo concatOutputTensorInfo = TensorInfo(output);
+    concatOutputTensorInfo.SetShape(timeMajorShapeOutput);
+    arm_compute::TensorInfo aclConcatOutputTensorInfo= BuildArmComputeTensorInfo(concatOutputTensorInfo);
 
     if (maxTime != 1) // ACL concat does not work with only one element to concatenate.
     {
@@ -819,7 +815,7 @@
         if (!descriptor.m_TimeMajor)
         {
             statusConcat = arm_compute::NEConcatenateLayer::validate(concatInputsTensorInfosPtr,
-                                                                     &aclConcatOuputTensorInfo,
+                                                                     &aclConcatOutputTensorInfo,
                                                                      aclAxisConcat);
         }
         else
@@ -853,7 +849,7 @@
         // Output now time major. Permute output back to batch major.
         if (maxTime != 1)
         {
-            statusPermute2 = arm_compute::NEPermute::validate(&aclConcatOuputTensorInfo,
+            statusPermute2 = arm_compute::NEPermute::validate(&aclConcatOutputTensorInfo,
                                                               &aclOutputInfo,
                                                               arm_compute::PermutationVector(0U, 2U, 1U));
         }
commit	1299496996bc332f02218f926640a9255ed60310	[log] [tgz]
author	Mike Kelly <mike.kelly@arm.com>	Thu Apr 21 11:57:09 2022 +0100
committer	mike.kelly <mike.kelly@arm.com>	Thu May 05 08:29:20 2022 +0000
tree	2d242e142bd8fe7387140bcf8cdf39cd13ffc9eb
parent	8272a7bda2974c39b6c06e3eb3a000f2bdb749f7 [diff] [blame]