COMPMID-556: Fix CLNormalization issues.

-Extracts calculations from the CL kernel core loop.
-Changes the access elements for CROSS_MAP to reduce the applied
redundant padding.

Change-Id: If41c3adddd977be9386fe34940d055c301ccbb91
Reviewed-on: http://mpd-gerrit.cambridge.arm.com/95917
Tested-by: Kaizen <jeremy.johnson+kaizengerrit@arm.com>
Reviewed-by: Anthony Barbier <anthony.barbier@arm.com>
diff --git a/src/core/CL/cl_kernels/normalization_layer.cl b/src/core/CL/cl_kernels/normalization_layer.cl
index 4e65560..f870589 100644
--- a/src/core/CL/cl_kernels/normalization_layer.cl
+++ b/src/core/CL/cl_kernels/normalization_layer.cl
@@ -93,13 +93,13 @@
 
     const int current_slice = get_global_id(2);
 
-    const int left_slice  = max(current_slice - (int)RADIUS, (int)0);
-    const int right_slice = min(current_slice + (int)RADIUS, (int)(NUM_SLICES - 1));
+    const int left_slice  = max(-(int)RADIUS, -current_slice);
+    const int right_slice = min((int)RADIUS, (int)NUM_SLICES - 1 - current_slice);
 
     for(int i = left_slice; i <= right_slice; i++)
     {
         VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE)
-        values = LOAD_OP(0, (__global DATA_TYPE *)tensor3D_offset(&in, 0, 0, i - current_slice));
+        values = LOAD_OP(0, (__global DATA_TYPE *)tensor3D_offset(&in, 0, 0, i));
         acc    = ADD_OP(acc, MUL_OP(values, values));
     }