COMPMID-897 Merge batch normalization with bounded relu

Change-Id: I9a607fe620f795cdea1a99fdd3f5f8c2fc76f980
Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/119234
Tested-by: Jenkins <bsgcomp@arm.com>
Reviewed-by: Gian Marco Iodice <gianmarco.iodice@arm.com>
Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com>
diff --git a/src/core/GLES_COMPUTE/cs_shaders/batchnormalization_layer.cs b/src/core/GLES_COMPUTE/cs_shaders/batchnormalization_layer.cs
index 53fb515..7629b25 100644
--- a/src/core/GLES_COMPUTE/cs_shaders/batchnormalization_layer.cs
+++ b/src/core/GLES_COMPUTE/cs_shaders/batchnormalization_layer.cs
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -36,6 +36,16 @@
 #define INVSQRT_OP(a) inversesqrt((a))
 #define SQCVT_SAT(a) (a)
 
+#if defined(LU_BRELU)
+#define ACTIVATION_FUNC(x) min(max(x, float(B_VAL)), float(A_VAL))
+#elif defined(BRELU)
+#define ACTIVATION_FUNC(x) min(max(x, float(0)), float(A_VAL))
+#elif defined(RELU)
+#define ACTIVATION_FUNC(x) max(x, float(0))
+#else /* defined(FUSED_ACT) */
+#define ACTIVATION_FUNC(x) (x)
+#endif /* defined(FUSED_ACT) */
+
 /** Apply batch normalization.
  *
  * @note The data type must be passed at compile time using "#define DATA_TYPE_NAME". e.g. "#define DATA_TYPE_FP32"
@@ -102,7 +112,7 @@
     gamma_param = LOAD(gamma_ptr, TENSOR_OFFSET_ADVANCE_IN_BYTES(gamma_iter, current_slice * beta_attrs.stride_x));
     beta_param  = LOAD(beta_ptr, TENSOR_OFFSET_ADVANCE_IN_BYTES(beta_iter, current_slice * beta_attrs.stride_x));
 
-    STORE_CURRENT_ITEM(dst_ptr, dst_iter, ADD_OP(MUL_OP(gamma_param, x_bar), beta_param));
+    STORE_CURRENT_ITEM(dst_ptr, dst_iter, ACTIVATION_FUNC(ADD_OP(MUL_OP(gamma_param, x_bar), beta_param)));
 }
 
 #elif defined(DATA_TYPE_FP16)
@@ -148,7 +158,7 @@
 
         gamma_param = unpacked_s[3].x;
         beta_param  = unpacked_s[4].x;
-        result      = ADD_OP(MUL_OP(gamma_param, x_bar), beta_param);
+        result      = ACTIVATION_FUNC(ADD_OP(MUL_OP(gamma_param, x_bar), beta_param));
 
         STORE_PACK4_CURRENT_ITEM_HALF(dst_ptr, dst_iter, result);
     }
@@ -163,7 +173,7 @@
 
         gamma_param = unpacked_s[3].y;
         beta_param  = unpacked_s[4].y;
-        result      = ADD_OP(MUL_OP(gamma_param, x_bar), beta_param);
+        result      = ACTIVATION_FUNC(ADD_OP(MUL_OP(gamma_param, x_bar), beta_param));
 
         STORE_PACK4_CURRENT_ITEM_HALF(dst_ptr, dst_iter, result);
     }
@@ -178,7 +188,7 @@
 
         gamma_param = unpacked_s[3].z;
         beta_param  = unpacked_s[4].z;
-        result      = ADD_OP(MUL_OP(gamma_param, x_bar), beta_param);
+        result      = ACTIVATION_FUNC(ADD_OP(MUL_OP(gamma_param, x_bar), beta_param));
 
         STORE_PACK4_CURRENT_ITEM_HALF(dst_ptr, dst_iter, result);
     }
@@ -193,7 +203,7 @@
 
         gamma_param = unpacked_s[3].w;
         beta_param  = unpacked_s[4].w;
-        result      = ADD_OP(MUL_OP(gamma_param, x_bar), beta_param);
+        result      = ACTIVATION_FUNC(ADD_OP(MUL_OP(gamma_param, x_bar), beta_param));
 
         STORE_PACK4_CURRENT_ITEM_HALF(dst_ptr, dst_iter, result);
     }