COMPMID-661: Vectorize im2col and add lws heuristics for convolution kernels #46

Change-Id: Idaab987384d6a12a114f609abd50446fd94536b2
Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/110879
Tested-by: BSG Visual Compute Jenkins server to access repositories on http://mpd-gerrit.cambridge.arm.com <bsgcomp@arm.com>
Reviewed-by: Anthony Barbier <anthony.barbier@arm.com>
diff --git a/src/core/CL/kernels/CLGEMMMatrixMultiplyKernel.cpp b/src/core/CL/kernels/CLGEMMMatrixMultiplyKernel.cpp
index d39dcdb..16706dd 100644
--- a/src/core/CL/kernels/CLGEMMMatrixMultiplyKernel.cpp
+++ b/src/core/CL/kernels/CLGEMMMatrixMultiplyKernel.cpp
@@ -68,7 +68,19 @@
     GPUTarget arch_target = get_arch_from_target(get_target());
 
     // Configure LWS hint
-    _lws_hint = (output->info()->dimension(1) == 196) ? cl::NDRange(1, 7) : cl::NDRange(8, 8);
+    if(arch_target == GPUTarget::BIFROST && input1->info()->dimension(1) == 24)
+    {
+        // LWS optimized for the 11x11 AlexNet convolution on Bifrost.
+        _lws_hint = cl::NDRange(2, 2);
+    }
+    else if(output->info()->dimension(1) == 196)
+    {
+        _lws_hint = cl::NDRange(1, 7);
+    }
+    else
+    {
+        _lws_hint = cl::NDRange(8, 8);
+    }
 
     // Create build options
     CLBuildOptions build_opts;