COMPMID-415: Use half_float library for F16

3RDPARTY_UPDATE

Change-Id: Iee572e18d5b1df71300d738cc8690f49d7203d5c
Reviewed-on: http://mpd-gerrit.cambridge.arm.com/81353
Tested-by: Kaizen <jeremy.johnson+kaizengerrit@arm.com>
Reviewed-by: Anthony Barbier <anthony.barbier@arm.com>
diff --git a/src/core/CL/cl_kernels/gemm.cl b/src/core/CL/cl_kernels/gemm.cl
index db15720..00c73e7 100644
--- a/src/core/CL/cl_kernels/gemm.cl
+++ b/src/core/CL/cl_kernels/gemm.cl
@@ -754,7 +754,7 @@
     half8 c20 = 0.0f;
     half8 c30 = 0.0f;
 
-    for(; src_addr.s1 <= (end_row_mtx_b - 8); src_addr += (int2)(8, 16))
+    for(; src_addr.s1 <= (end_row_mtx_b - 16); src_addr += (int2)(8, 16))
     {
         /* Load values from matrix A (interleaved) and matrix B (transposed) */
         half4 a0 = vload4(0, ((__global half *)src0_ptr) + src_addr.s0);