COMPMID-2266: [CL] add support for Log Softmax

Change-Id: I4a8f3519328553e24cbb4fe45a8ca4d47c90975d
Signed-off-by: Sang-Hoon Park <sang-hoon.park@arm.com>
Reviewed-on: https://review.mlplatform.org/c/2182
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
diff --git a/src/core/CL/cl_kernels/softmax_layer_quantized.cl b/src/core/CL/cl_kernels/softmax_layer_quantized.cl
index 95d6d4b..8ccc5d3 100644
--- a/src/core/CL/cl_kernels/softmax_layer_quantized.cl
+++ b/src/core/CL/cl_kernels/softmax_layer_quantized.cl
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2018 ARM Limited.
+ * Copyright (c) 2017-2019 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -25,6 +25,7 @@
 
 #define MAX_OP(x, y, type, size) max((x), (y))
 #define ADD_OP(x, y, type, size) ((x) + (y))
+#define SUB_OP(x, y, type, size) ((x) - (y))
 
 /* Number of workitems in dimension 0. */
 #if !defined(GRID_SIZE)
@@ -559,12 +560,14 @@
     int sum_val = *((__global int *)offset(&sum, 0, get_global_id(1)));
 
     // It will be better to calculate this in prev layer and pass here as parameter
-    uint  sum_val_u               = convert_uint(sum_val);
+    uint sum_val_u = convert_uint(sum_val);
+#ifndef LOG_SOFTMAX
     int   headroom_plus_one       = clz(sum_val_u);
     int   num_bits_over_unit      = EXP_ACCUMULATION_INT_BITS - headroom_plus_one;
     int   shifted_sum_minus_one_1 = convert_int((sum_val_u << headroom_plus_one) - (1u << 31));
     int16 shifted_sum_minus_one   = shifted_sum_minus_one_1;
     int16 shifted_scale           = ASYMM_ONE_OVER_ONE_PLUS_X_FOR_X_IN_0_1(shifted_sum_minus_one, 16);
+#endif /* LOG_SOFTMAX */
 
     // It was already calculated in prev layer, should be stored into tmp output and reused
     int16 data_diff      = vload16(0, (__global int *)offset(&src, 0, 0));
@@ -577,8 +580,12 @@
 #endif /* defined(INPUT_BETA_MULTIPLIER) && defined(INPUT_BETA_LEFT_SHIFT) */
     int16 data = ASYMM_EXP_ON_NEGATIVE_VALUES(data_diff_mult, SCALED_DIFF_INT_BITS, 16);
 
+#ifdef LOG_SOFTMAX
+    data = SUB_OP(data_diff_mult, (int16)sum_val_u, int, 16);
+#else  /* LOG_SOFTMAX */
     data = ASYMM_MULT(shifted_scale, data, 16);
     data = ASYMM_ROUNDING_DIVIDE_BY_POW2(data, num_bits_over_unit + 31 - 8, 16);
+#endif /* LOG_SOFTMAX */
     data = select(0, data, data_diff >= (int16)(DIFF_MIN));
     vstore16(convert_uchar16_sat(data), 0, (__global uchar *)offset(&dst, 0, 0));
 }