COMPMID-1124 : Fixes in CLLSTM layer
Change-Id: Ifc8e12c296d3ef2bf8e0f0bf1b87b7fd47a1fad7
Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/139248
Tested-by: Jenkins <bsgcomp@arm.com>
Reviewed-by: Ruomei Yan <ruomei.yan@arm.com>
Reviewed-by: Michalis Spyrou <michalis.spyrou@arm.com>
diff --git a/arm_compute/runtime/CL/functions/CLLSTMLayer.h b/arm_compute/runtime/CL/functions/CLLSTMLayer.h
index bed1fa9..cf7e078 100644
--- a/arm_compute/runtime/CL/functions/CLLSTMLayer.h
+++ b/arm_compute/runtime/CL/functions/CLLSTMLayer.h
@@ -265,26 +265,24 @@
private:
CLMemoryGroup _memory_group;
CLFullyConnectedLayer _fully_connected_input_gate;
- CLGEMM _gemm_input_gate1;
- CLGEMM _gemm_input_gate2;
- CLTransposeKernel _transpose_input_gate1;
- CLTransposeKernel _transpose_input_gate2;
+ CLGEMM _gemm_input_gate;
+ CLTransposeKernel _transpose_input_gate;
CLArithmeticAdditionKernel _accum_input_gate1;
CLArithmeticAddition _accum_input_gate2;
CLArithmeticSubtractionKernel _subtract_input_gate;
+ CLPixelWiseMultiplicationKernel _pixelwise_mul_input_gate;
CLActivationLayerKernel _activation_input_gate;
CLFullyConnectedLayer _fully_connected_forget_gate;
- CLGEMM _gemm_forget_gate1;
- CLGEMM _gemm_forget_gate2;
- CLTransposeKernel _transpose_forget_gate1;
- CLTransposeKernel _transpose_forget_gate2;
+ CLGEMM _gemm_forget_gate;
+ CLTransposeKernel _transpose_forget_gate;
CLArithmeticAdditionKernel _accum_forget_gate1;
CLArithmeticAddition _accum_forget_gate2;
+ CLPixelWiseMultiplicationKernel _pixelwise_mul_forget_gate;
CLActivationLayerKernel _activation_forget_gate;
CLFullyConnectedLayer _fully_connected_cell_state;
CLGEMM _gemm_cell_state1;
CLGEMM _gemm_cell_state2;
- CLTransposeKernel _transpose_cell_state1;
+ CLTransposeKernel _transpose_cell_state;
CLArithmeticAdditionKernel _accum_cell_state1;
CLArithmeticAdditionKernel _accum_cell_state2;
CLPixelWiseMultiplicationKernel _pixelwise_mul_cell_state1;
@@ -292,15 +290,14 @@
CLActivationLayerKernel _cell_clip;
CLPixelWiseMultiplicationKernel _pixelwise_mul_cell_state2;
CLFullyConnectedLayer _fully_connected_output;
- CLGEMM _gemm_output1;
- CLGEMM _gemm_output2;
- CLTransposeKernel _transpose_output1;
- CLTransposeKernel _transpose_output2;
+ CLGEMM _gemm_output;
+ CLPixelWiseMultiplicationKernel _pixelwise_mul_output_state1;
+ CLTransposeKernel _transpose_output;
CLArithmeticAdditionKernel _accum_output1;
CLArithmeticAddition _accum_output2;
CLActivationLayerKernel _activation_output;
CLActivationLayerKernel _activation_output_state;
- CLPixelWiseMultiplicationKernel _pixelwise_mul_output_state;
+ CLPixelWiseMultiplicationKernel _pixelwise_mul_output_state2;
CLFullyConnectedLayer _fully_connected_output_state;
CLGEMM _gemm_output_state;
CLArithmeticAdditionKernel _accum_output_state;
@@ -313,13 +310,11 @@
CLTensor _input_gate_out3;
CLTensor _input_gate_out4;
CLTensor _input_gate_out5;
- CLTensor _input_gate_out6;
CLTensor _forget_gate_out1;
CLTensor _forget_gate_out2;
CLTensor _forget_gate_out3;
CLTensor _forget_gate_out4;
CLTensor _forget_gate_out5;
- CLTensor _forget_gate_out6;
CLTensor _cell_state_out1;
CLTensor _cell_state_out2;
CLTensor _cell_state_out3;
@@ -330,7 +325,6 @@
CLTensor _output3;
CLTensor _output4;
CLTensor _output5;
- CLTensor _output6;
CLTensor _cell_state_activation;
CLTensor _output_projection1;
CLTensor _ones;