COMPMID-897 Merge batch normalization with bounded relu
Change-Id: I9a607fe620f795cdea1a99fdd3f5f8c2fc76f980
Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/119234
Tested-by: Jenkins <bsgcomp@arm.com>
Reviewed-by: Gian Marco Iodice <gianmarco.iodice@arm.com>
Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com>
diff --git a/src/core/GLES_COMPUTE/cs_shaders/batchnormalization_layer.cs b/src/core/GLES_COMPUTE/cs_shaders/batchnormalization_layer.cs
index 53fb515..7629b25 100644
--- a/src/core/GLES_COMPUTE/cs_shaders/batchnormalization_layer.cs
+++ b/src/core/GLES_COMPUTE/cs_shaders/batchnormalization_layer.cs
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -36,6 +36,16 @@
#define INVSQRT_OP(a) inversesqrt((a))
#define SQCVT_SAT(a) (a)
+#if defined(LU_BRELU)
+#define ACTIVATION_FUNC(x) min(max(x, float(B_VAL)), float(A_VAL))
+#elif defined(BRELU)
+#define ACTIVATION_FUNC(x) min(max(x, float(0)), float(A_VAL))
+#elif defined(RELU)
+#define ACTIVATION_FUNC(x) max(x, float(0))
+#else /* defined(FUSED_ACT) */
+#define ACTIVATION_FUNC(x) (x)
+#endif /* defined(FUSED_ACT) */
+
/** Apply batch normalization.
*
* @note The data type must be passed at compile time using "#define DATA_TYPE_NAME". e.g. "#define DATA_TYPE_FP32"
@@ -102,7 +112,7 @@
gamma_param = LOAD(gamma_ptr, TENSOR_OFFSET_ADVANCE_IN_BYTES(gamma_iter, current_slice * beta_attrs.stride_x));
beta_param = LOAD(beta_ptr, TENSOR_OFFSET_ADVANCE_IN_BYTES(beta_iter, current_slice * beta_attrs.stride_x));
- STORE_CURRENT_ITEM(dst_ptr, dst_iter, ADD_OP(MUL_OP(gamma_param, x_bar), beta_param));
+ STORE_CURRENT_ITEM(dst_ptr, dst_iter, ACTIVATION_FUNC(ADD_OP(MUL_OP(gamma_param, x_bar), beta_param)));
}
#elif defined(DATA_TYPE_FP16)
@@ -148,7 +158,7 @@
gamma_param = unpacked_s[3].x;
beta_param = unpacked_s[4].x;
- result = ADD_OP(MUL_OP(gamma_param, x_bar), beta_param);
+ result = ACTIVATION_FUNC(ADD_OP(MUL_OP(gamma_param, x_bar), beta_param));
STORE_PACK4_CURRENT_ITEM_HALF(dst_ptr, dst_iter, result);
}
@@ -163,7 +173,7 @@
gamma_param = unpacked_s[3].y;
beta_param = unpacked_s[4].y;
- result = ADD_OP(MUL_OP(gamma_param, x_bar), beta_param);
+ result = ACTIVATION_FUNC(ADD_OP(MUL_OP(gamma_param, x_bar), beta_param));
STORE_PACK4_CURRENT_ITEM_HALF(dst_ptr, dst_iter, result);
}
@@ -178,7 +188,7 @@
gamma_param = unpacked_s[3].z;
beta_param = unpacked_s[4].z;
- result = ADD_OP(MUL_OP(gamma_param, x_bar), beta_param);
+ result = ACTIVATION_FUNC(ADD_OP(MUL_OP(gamma_param, x_bar), beta_param));
STORE_PACK4_CURRENT_ITEM_HALF(dst_ptr, dst_iter, result);
}
@@ -193,7 +203,7 @@
gamma_param = unpacked_s[3].w;
beta_param = unpacked_s[4].w;
- result = ADD_OP(MUL_OP(gamma_param, x_bar), beta_param);
+ result = ACTIVATION_FUNC(ADD_OP(MUL_OP(gamma_param, x_bar), beta_param));
STORE_PACK4_CURRENT_ITEM_HALF(dst_ptr, dst_iter, result);
}