COMPMID-417 Fix reduction kernel's __local buffer size

Change-Id: If97a79d86b174b1d9b41360303d624e3b2d22001
Reviewed-on: http://mpd-gerrit.cambridge.arm.com/87703
Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com>
Tested-by: Kaizen <jeremy.johnson+kaizengerrit@arm.com>
diff --git a/src/core/CL/kernels/CLReductionOperationKernel.cpp b/src/core/CL/kernels/CLReductionOperationKernel.cpp
index 7595d8e..18a8e35 100644
--- a/src/core/CL/kernels/CLReductionOperationKernel.cpp
+++ b/src/core/CL/kernels/CLReductionOperationKernel.cpp
@@ -126,7 +126,8 @@
     in_slice.set(Window::DimX, Window::Dimension(in_slice.x().start(), in_slice.x().end() + border_width, in_slice.x().step()));
 
     // Set local sums buffer
-    _kernel.setArg(num_arguments_per_1D_tensor() * 2, _lws_hint[0], nullptr);
+    unsigned int local_sum_size = _lws_hint[0] * _input->info()->element_size();
+    _kernel.setArg(num_arguments_per_1D_tensor() * 2, local_sum_size, nullptr);
 
     do
     {