COMPMID-2847: Fuse output stage in GEMMLowpMatrixMultiplyReshapedOnlyRHS Change-Id: Icd60eb368a34295434e8c141885b4666973a92a1 Signed-off-by: Michele Di Giorgio <michele.digiorgio@arm.com> Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/2732 Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com> Reviewed-by: Gian Marco Iodice <gianmarco.iodice@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>

commit: b54ba2848515bf0aee0619c760518481f58c7525 [log] [tgz]
author: Michele Di Giorgio <michele.digiorgio@arm.com> Tue Jan 14 15:31:55 2020 +0000
committer: Michele Di Giorgio <michele.digiorgio@arm.com> Fri Mar 06 09:14:46 2020 +0000
tree: 7082afffb3b087401904454e33e005544a6d7ab2
parent: b46702118eddcfec11487be8dd23234066642d62 [diff] [blame]
diff --git a/arm_compute/core/KernelDescriptors.h b/arm_compute/core/KernelDescriptors.h
index 4b04beb..58400b1 100644
--- a/arm_compute/core/KernelDescriptors.h
+++ b/arm_compute/core/KernelDescriptors.h

@@ -54,14 +54,21 @@
 /** Descriptor used by the GEMM kernels */
 struct GEMMKernelInfo
 {
-    unsigned int        m{ 0 };                           /**< Number of LHS rows*/
-    unsigned int        n{ 0 };                           /**< Number of RHS columns*/
-    unsigned int        k{ 0 };                           /**< Number of LHS columns or RHS rows */
-    unsigned int        depth_output_gemm3d{ 0 };         /**< Depth of the output tensor in case is reinterpreted as 3D */
-    bool                reinterpret_input_as_3d{ false }; /**< Flag used to reinterpret the input as 3D */
-    bool                broadcast_bias{ false };          /**< Flag used to broadcase the bias addition */
-    bool                fp_mixed_precision{ false };      /**< Flag used to indicate wider accumulators (32 bit instead of 16 for FP16). */
-    ActivationLayerInfo activation_info{};                /**< Activation function to perform after the matrix multiplication */
+    unsigned int            m{ 0 };                           /**< Number of LHS rows*/
+    unsigned int            n{ 0 };                           /**< Number of RHS columns*/
+    unsigned int            k{ 0 };                           /**< Number of LHS columns or RHS rows */
+    unsigned int            depth_output_gemm3d{ 0 };         /**< Depth of the output tensor in case is reinterpreted as 3D */
+    bool                    reinterpret_input_as_3d{ false }; /**< Flag used to reinterpret the input as 3D */
+    bool                    broadcast_bias{ false };          /**< Flag used to broadcast the bias addition */
+    bool                    fp_mixed_precision{ false };      /**< Flag used to indicate wider accumulators (32 bit instead of 16 for FP16). */
+    ActivationLayerInfo     activation_info{};                /**< Activation function to perform after the matrix multiplication */
+    int                     mult_transpose1xW_width{ 1 };     /**< Multiplication factor for the width of the 1xW transposed block */
+    int                     mult_interleave4x4_height{ 1 };   /**< Multiplication factor for the height of the 4x4 interleaved block */
+    GEMMLHSMatrixInfo       lhs_info{};                       /**< LHS matrix information used to retrieve the number of rows processed by each thread */
+    GEMMRHSMatrixInfo       rhs_info{};                       /**< RHS matrix information used for reshaping the RHS matrix */
+    int32_t                 a_offset{ 0 };                    /**< Offset to be added to each element of the matrix A */
+    int32_t                 b_offset{ 0 };                    /**< Offset to be added to each element of the matrix B */
+    GEMMLowpOutputStageInfo output_stage{};                   /**< GEMMLowp output stage information */
 };
 
 /** Descriptor used by the depthwise convolution kernels */
commit	b54ba2848515bf0aee0619c760518481f58c7525	[log] [tgz]
author	Michele Di Giorgio <michele.digiorgio@arm.com>	Tue Jan 14 15:31:55 2020 +0000
committer	Michele Di Giorgio <michele.digiorgio@arm.com>	Fri Mar 06 09:14:46 2020 +0000
tree	7082afffb3b087401904454e33e005544a6d7ab2
parent	b46702118eddcfec11487be8dd23234066642d62 [diff] [blame]