Allow running without cl_khr_fp16

Change-Id: I0f5396c8f32acc28914e2ff9fe953f977a3077b9
Reviewed-on: http://mpd-gerrit.cambridge.arm.com/93405
Reviewed-by: Anthony Barbier <anthony.barbier@arm.com>
Tested-by: Kaizen <jeremy.johnson+kaizengerrit@arm.com>
diff --git a/src/core/CL/CLHelpers.cpp b/src/core/CL/CLHelpers.cpp
index 09ec329..901ac3f 100644
--- a/src/core/CL/CLHelpers.cpp
+++ b/src/core/CL/CLHelpers.cpp
@@ -58,6 +58,13 @@
             return arm_compute::GPUTarget::MIDGARD;
     }
 }
+
+bool extension_support(const cl::Device &device, const char *extension_name)
+{
+    std::string extensions = device.getInfo<CL_DEVICE_EXTENSIONS>();
+    auto        pos        = extensions.find(extension_name);
+    return (pos != std::string::npos);
+}
 } // namespace
 
 namespace arm_compute
@@ -206,21 +213,12 @@
 
 bool non_uniform_workgroup_support(const cl::Device &device)
 {
-    std::vector<char> extension;
-    size_t            extension_size = 0;
-    cl_int            err            = clGetDeviceInfo(device.get(), CL_DEVICE_EXTENSIONS, 0, nullptr, &extension_size);
-    ARM_COMPUTE_ERROR_ON_MSG((err != 0) || (extension_size == 0), "clGetDeviceInfo failed to return valid information");
-    ARM_COMPUTE_UNUSED(err);
-    // Resize vector
-    extension.resize(extension_size);
-    // Query extension
-    err = clGetDeviceInfo(device.get(), CL_DEVICE_EXTENSIONS, extension_size, extension.data(), nullptr);
-    ARM_COMPUTE_ERROR_ON_MSG(err != 0, "clGetDeviceInfo failed to return valid information");
-    ARM_COMPUTE_UNUSED(err);
+    return extension_support(device, "cl_arm_non_uniform_work_group_size");
+}
 
-    std::string extension_str(extension.begin(), extension.end());
-    auto        pos = extension_str.find("cl_arm_non_uniform_work_group_size");
-    return (pos != std::string::npos);
+bool fp16_support(const cl::Device &device)
+{
+    return extension_support(device, "cl_khr_fp16");
 }
 
 CLVersion get_cl_version(const cl::Device &device)
diff --git a/src/core/CL/CLKernelLibrary.cpp b/src/core/CL/CLKernelLibrary.cpp
index 62ef259..9e2b5bd 100644
--- a/src/core/CL/CLKernelLibrary.cpp
+++ b/src/core/CL/CLKernelLibrary.cpp
@@ -596,6 +596,11 @@
 
     std::string concat_str;
 
+    if(fp16_support(_device))
+    {
+        concat_str += " -DARM_COMPUTE_OPENCL_FP16_ENABLED=1 ";
+    }
+
     if(non_uniform_workgroup_support(_device))
     {
         concat_str += " -cl-arm-non-uniform-work-group-size ";
diff --git a/src/core/CL/cl_kernels/gemm.cl b/src/core/CL/cl_kernels/gemm.cl
index 7f2a08b..d08e821 100644
--- a/src/core/CL/cl_kernels/gemm.cl
+++ b/src/core/CL/cl_kernels/gemm.cl
@@ -704,6 +704,7 @@
     vstore4((float4)(c30, c31, c32, c33), 0, (__global float *)(offset(&dst, 0, 3)));
 }
 
+#if defined(ARM_COMPUTE_OPENCL_FP16_ENABLED)
 /** This OpenCL kernel computes the matrix multiplication between matrix A (src0) and matrix B (src1)
  *  Matrix A and matrix B must be reshaped respectively with @ref gemm_interleave4x4_16bit and @ref gemm_transpose1x8 before running the matrix multiplication
  *
@@ -802,6 +803,7 @@
     vstore8(c20, 0, (__global half *)(offset(&dst, 0, 2)));
     vstore8(c30, 0, (__global half *)(offset(&dst, 0, 3)));
 }
+#endif // defined(ARM_COMPUTE_OPENCL_FP16_ENABLED)
 
 #ifdef FIXED_POINT_POSITION
 /** This OpenCL kernel computes the matrix multiplication between matrix A (src0) and matrix B (src1) in 8 bit fixed point precision
@@ -1652,4 +1654,4 @@
 
     vstore4(acc, 0, (__global float *)(offset(&dst, 0, 0)));
 }
-#endif /* WIDTH_VECTOR_A */
\ No newline at end of file
+#endif /* WIDTH_VECTOR_A */
diff --git a/src/core/CL/cl_kernels/helpers.h b/src/core/CL/cl_kernels/helpers.h
index 4421e74..330d67d 100644
--- a/src/core/CL/cl_kernels/helpers.h
+++ b/src/core/CL/cl_kernels/helpers.h
@@ -24,7 +24,10 @@
 #ifndef ARM_COMPUTE_HELPER_H
 #define ARM_COMPUTE_HELPER_H
 
+#if defined(ARM_COMPUTE_OPENCL_FP16_ENABLED)
 #pragma OPENCL EXTENSION cl_khr_fp16 : enable
+#endif // defined(ARM_COMPUTE_OPENCL_FP16_ENABLED)
+
 #if defined(ARM_COMPUTE_DEBUG_ENABLED)
 #pragma OPENCL EXTENSION cl_arm_printf : enable
 #endif // defined(ARM_COMPUTE_DEBUG_ENABLED)