APPBROWSER-314: Performance optimazation for BatchNormalizationLayer

Change-Id: Ie3ad9abb64e90720609bb6e67662eaf9dd4f3689
Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/111826
Reviewed-by: Joel Liang <joel.liang@arm.com>
Tested-by: BSG Visual Compute Jenkins server to access repositories on http://mpd-gerrit.cambridge.arm.com <bsgcomp@arm.com>
(cherry picked from commit 02c1fa663926cc4fcd1995d4d18d7528e0c85d94)
Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/111834
Reviewed-by: Anthony Barbier <anthony.barbier@arm.com>
diff --git a/src/core/GLES_COMPUTE/kernels/GCBatchNormalizationLayerKernel.cpp b/src/core/GLES_COMPUTE/kernels/GCBatchNormalizationLayerKernel.cpp
index 982143f..dee2a55 100644
--- a/src/core/GLES_COMPUTE/kernels/GCBatchNormalizationLayerKernel.cpp
+++ b/src/core/GLES_COMPUTE/kernels/GCBatchNormalizationLayerKernel.cpp
@@ -64,7 +64,11 @@
     _gamma   = gamma;
     _epsilon = epsilon;
 
-    const unsigned int num_elems_processed_per_iteration = 4 / input->info()->element_size();
+    unsigned int num_elems_processed_per_iteration = 1;
+    if(input->info()->data_type() == DataType::F16)
+    {
+        num_elems_processed_per_iteration = 4;
+    }
 
     // Set build options
     std::set<std::string> build_opts;
@@ -83,10 +87,10 @@
 
     AccessWindowHorizontal input_access(input->info(), 0, num_elems_processed_per_iteration);
     AccessWindowHorizontal output_access(output->info(), 0, num_elems_processed_per_iteration);
-    AccessWindowStatic     mean_access(mean->info(), 0, 0, mean->info()->dimension(0) + 1, mean->info()->dimension(1));
-    AccessWindowStatic     var_access(var->info(), 0, 0, var->info()->dimension(0) + 1, var->info()->dimension(1));
-    AccessWindowStatic     beta_access(beta->info(), 0, 0, beta->info()->dimension(0) + 1, beta->info()->dimension(1));
-    AccessWindowStatic     gamma_access(gamma->info(), 0, 0, gamma->info()->dimension(0) + 1, gamma->info()->dimension(1));
+    AccessWindowStatic     mean_access(mean->info(), 0, 0, mean->info()->dimension(0) + 3, mean->info()->dimension(1));
+    AccessWindowStatic     var_access(var->info(), 0, 0, var->info()->dimension(0) + 3, var->info()->dimension(1));
+    AccessWindowStatic     beta_access(beta->info(), 0, 0, beta->info()->dimension(0) + 3, beta->info()->dimension(1));
+    AccessWindowStatic     gamma_access(gamma->info(), 0, 0, gamma->info()->dimension(0) + 3, gamma->info()->dimension(1));
 
     update_window_and_padding(win, input_access, output_access, mean_access, var_access, beta_access, gamma_access);
     output_access.set_valid_region(win, input->info()->valid_region());