COMPMID-556: Fix CLNormalization issues.
-Extracts calculations from the CL kernel core loop.
-Changes the access elements for CROSS_MAP to reduce the applied
redundant padding.
Change-Id: If41c3adddd977be9386fe34940d055c301ccbb91
Reviewed-on: http://mpd-gerrit.cambridge.arm.com/95917
Tested-by: Kaizen <jeremy.johnson+kaizengerrit@arm.com>
Reviewed-by: Anthony Barbier <anthony.barbier@arm.com>
diff --git a/src/core/CL/cl_kernels/normalization_layer.cl b/src/core/CL/cl_kernels/normalization_layer.cl
index 4e65560..f870589 100644
--- a/src/core/CL/cl_kernels/normalization_layer.cl
+++ b/src/core/CL/cl_kernels/normalization_layer.cl
@@ -93,13 +93,13 @@
const int current_slice = get_global_id(2);
- const int left_slice = max(current_slice - (int)RADIUS, (int)0);
- const int right_slice = min(current_slice + (int)RADIUS, (int)(NUM_SLICES - 1));
+ const int left_slice = max(-(int)RADIUS, -current_slice);
+ const int right_slice = min((int)RADIUS, (int)NUM_SLICES - 1 - current_slice);
for(int i = left_slice; i <= right_slice; i++)
{
VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE)
- values = LOAD_OP(0, (__global DATA_TYPE *)tensor3D_offset(&in, 0, 0, i - current_slice));
+ values = LOAD_OP(0, (__global DATA_TYPE *)tensor3D_offset(&in, 0, 0, i));
acc = ADD_OP(acc, MUL_OP(values, values));
}