COMPMID-922 - CLGEMM FP16 optimizations - part2 This patch improves of ~30 % GEMM fp16 when the reshape is required The results have been reported at the following confluence page: https://confluence.arm.com/display/MLENG/GEMM+FP16+performance%3A+ACL+18.05 Change-Id: I8233095a7e9ab06f1f915782a25dd41653b49140 Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/128254 Reviewed-by: Anthony Barbier <anthony.barbier@arm.com> Tested-by: Jenkins <bsgcomp@arm.com>

commit: bb36a8efc1092f66798e3b880c55ec488021bb02 [log] [tgz]
author: Gian Marco Iodice <gianmarco.iodice@arm.com> Thu Apr 19 12:05:08 2018 +0100
committer: Anthony Barbier <anthony.barbier@arm.com> Fri Nov 02 16:51:17 2018 +0000
tree: 62e0265d84575bc10496c84f4908ed27529166ea
parent: 4dcb583c052e14f08809cc9ee420e690264e7bbe [diff] [blame]
diff --git a/src/runtime/CL/functions/CLGEMM.cpp b/src/runtime/CL/functions/CLGEMM.cpp
index e735adb..1ee51a0 100644
--- a/src/runtime/CL/functions/CLGEMM.cpp
+++ b/src/runtime/CL/functions/CLGEMM.cpp

@@ -32,6 +32,7 @@
 #include "arm_compute/core/Helpers.h"
 #include "arm_compute/core/TensorInfo.h"
 #include "arm_compute/core/Types.h"
+#include "arm_compute/core/Utils.h"
 #include "arm_compute/core/Validate.h"
 #include "arm_compute/runtime/CL/CLScheduler.h"
 #include "arm_compute/runtime/ITensorAllocator.h"
@@ -47,7 +48,7 @@
     if(gpu_target_is_in(gpu_target, GPUTarget::G71, GPUTarget::G72, GPUTarget::G51, GPUTarget::G51BIG, GPUTarget::G51LIT, GPUTarget::TNOX))
     {
         // COMPMID-852
-        if(k > 256 && m > 4 && data_type == DataType::F32 && reshape_b_only_on_first_run)
+        if(k > 256 && m > 4 && is_data_type_float(data_type) && reshape_b_only_on_first_run)
         {
             const float scale = k < 1024 ? 2.0f : 2.5f;
             flag              = (scale * n) > ((1.66f * n) + 38.4f);
commit	bb36a8efc1092f66798e3b880c55ec488021bb02	[log] [tgz]
author	Gian Marco Iodice <gianmarco.iodice@arm.com>	Thu Apr 19 12:05:08 2018 +0100
committer	Anthony Barbier <anthony.barbier@arm.com>	Fri Nov 02 16:51:17 2018 +0000
tree	62e0265d84575bc10496c84f4908ed27529166ea
parent	4dcb583c052e14f08809cc9ee420e690264e7bbe [diff] [blame]