COMPMID-431 Port CLDepthConvert to use 8-bit and 16-bit fixed point

Change-Id: Iedea9e985427e6242f34a5362615f79c0526d5bd
Reviewed-on: http://mpd-gerrit.cambridge.arm.com/79786
Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com>
Tested-by: Kaizen <jeremy.johnson+kaizengerrit@arm.com>
diff --git a/src/core/CL/cl_kernels/depth_convert.cl b/src/core/CL/cl_kernels/depth_convert.cl
index 3a1c7ca..a9b7284 100644
--- a/src/core/CL/cl_kernels/depth_convert.cl
+++ b/src/core/CL/cl_kernels/depth_convert.cl
@@ -23,24 +23,47 @@
  */
 #include "helpers.h"
 
+#if defined(FIXED_POINT_POSITION)
+
+#include "fixed_point.h"
+
+#ifdef SATURATE
+#define CONVERT_DOWN(x, in_type, out_type, fixed_point_position) CONVERT_DOWN1_SAT(x, in_type, out_type, fixed_point_position)
+#define CONVERT_DOWN1_SAT(x, in_type, out_type, fixed_point_position) convert_##out_type##_##in_type##_sat(x, fixed_point_position)
+#else /* SATURATE */
+#define CONVERT_DOWN(x, in_type, out_type, fixed_point_position) CONVERT_DOWN1(x, in_type, out_type, fixed_point_position)
+#define CONVERT_DOWN1(x, in_type, out_type, fixed_point_position) convert_##out_type##_##in_type(x, fixed_point_position)
+#endif /* SATURATE */
+
+#define CONVERT_UP(x, in_type, out_type, fixed_point_position) CONVERT_UP1(x, in_type, out_type, fixed_point_position)
+#define CONVERT_UP1(x, in_type, out_type, fixed_point_position) convert_##out_type##_##in_type(x, fixed_point_position)
+
+#else /* FIXED_POINT_POSITION */
+
 #ifdef SATURATE
 #define CONVERT_DOWN(x, type) CONVERT_SAT(x, type)
 #else /* SATURATE */
 #define CONVERT_DOWN(x, type) CONVERT(x, type)
 #endif /* SATURATE */
 
+#define CONVERT_UP(x, type) CONVERT(x, type)
+
+#endif /* FIXED_POINT_POSITION */
+
 /** This function performs a down-scaling depth conversion.
  *
  * @attention The input and output data_types need to be passed at compile time using -DDATA_TYPE_IN and -DDATA_TYPE_OUT:
  * e.g. -DDATA_TYPE_IN=uchar -DDATA_TYPE_OUT=short
  *
- * @param[in]  in_ptr                            Pointer to the source image. Supported data types: U8, U16, S16, U32 or S32
+ * @note In case of fixed-point operation -DFIXED_POINT_POSITION=fixed_point_position must be provided: e.g. -DFIXED_POINT_POSITION=3
+ *
+ * @param[in]  in_ptr                            Pointer to the source image. Supported data types: U8, U16, S16, U32, S32, F16, F32
  * @param[in]  in_stride_x                       Stride of the source image in X dimension (in bytes)
  * @param[in]  in_step_x                         in_stride_x * number of elements along X processed per workitem(in bytes)
  * @param[in]  in_stride_y                       Stride of the source image in Y dimension (in bytes)
  * @param[in]  in_step_y                         in_stride_y * number of elements along Y processed per workitem(in bytes)
  * @param[in]  in_offset_first_element_in_bytes  The offset of the first element in the source image
- * @param[out] out_ptr                           Pointer to the destination image. Supported data types: U8, U16, S16, U32 or S32
+ * @param[out] out_ptr                           Pointer to the destination image. Supported data types: QS8, U8, QS16, U16, S16, U32, S32
  * @param[in]  out_stride_x                      Stride of the destination image in X dimension (in bytes)
  * @param[in]  out_step_x                        out_stride_x * number of elements along X processed per workitem(in bytes)
  * @param[in]  out_stride_y                      Stride of the destination image in Y dimension (in bytes)
@@ -60,7 +83,12 @@
     // Load data
     VEC_DATA_TYPE(DATA_TYPE_IN, 16)
     in_data = vload16(0, (__global DATA_TYPE_IN *)in.ptr);
+
+#if defined(FIXED_POINT_POSITION)
+    vstore16(CONVERT_DOWN(in_data, VEC_DATA_TYPE(DATA_TYPE_IN, 16), VEC_DATA_TYPE(DATA_TYPE_OUT, 16), FIXED_POINT_POSITION), 0, (__global DATA_TYPE_OUT *)out.ptr);
+#else  /* FIXED_POINT_POSITION */
     vstore16(CONVERT_DOWN(in_data >> shift, VEC_DATA_TYPE(DATA_TYPE_OUT, 16)), 0, (__global DATA_TYPE_OUT *)out.ptr);
+#endif /* FIXED_POINT_POSITION */
 }
 
 /** This function performs a up-scaling depth conversion.
@@ -68,13 +96,15 @@
  * @attention The input and output data_types need to be passed at compile time using -DDATA_TYPE_IN and -DDATA_TYPE_OUT:
  * e.g. -DDATA_TYPE_IN=uchar -DDATA_TYPE_OUT=short
  *
- * @param[in]  in_ptr                            Pointer to the source image. Supported data types: U8, U16, S16, U32 or S32
+ * @note In case of fixed-point operation -DFIXED_POINT_POSITION=fixed_point_position must be provided: e.g. -DFIXED_POINT_POSITION=3
+ *
+ * @param[in]  in_ptr                            Pointer to the source image. Supported data types: U8, QS8, U16, S16, QS16, U32 or S32
  * @param[in]  in_stride_x                       Stride of the source image in X dimension (in bytes)
  * @param[in]  in_step_x                         in_stride_x * number of elements along X processed per workitem(in bytes)
  * @param[in]  in_stride_y                       Stride of the source image in Y dimension (in bytes)
  * @param[in]  in_step_y                         in_stride_y * number of elements along Y processed per workitem(in bytes)
  * @param[in]  in_offset_first_element_in_bytes  The offset of the first element in the source image
- * @param[out] out_ptr                           Pointer to the destination image. Supported data types: U8, U16, S16, U32 or S32
+ * @param[out] out_ptr                           Pointer to the destination image. Supported data types: U8, U16, S16, U32, S32, F16 or F32
  * @param[in]  out_stride_x                      Stride of the destination image in X dimension (in bytes)
  * @param[in]  out_step_x                        out_stride_x * number of elements along X processed per workitem(in bytes)
  * @param[in]  out_stride_y                      Stride of the destination image in Y dimension (in bytes)
@@ -92,7 +122,12 @@
     Image out = CONVERT_TO_IMAGE_STRUCT(out);
 
     // Load data
-    VEC_DATA_TYPE(DATA_TYPE_OUT, 16)
-    in_data = CONVERT(vload16(0, (__global DATA_TYPE_IN *)in.ptr), VEC_DATA_TYPE(DATA_TYPE_OUT, 16));
-    vstore16(in_data << shift, 0, (__global DATA_TYPE_OUT *)out.ptr);
+    VEC_DATA_TYPE(DATA_TYPE_IN, 16)
+    in_data = vload16(0, (__global DATA_TYPE_IN *)in.ptr);
+
+#if defined(FIXED_POINT_POSITION)
+    vstore16(CONVERT_UP(in_data, VEC_DATA_TYPE(DATA_TYPE_IN, 16), VEC_DATA_TYPE(DATA_TYPE_OUT, 16), FIXED_POINT_POSITION), 0, (__global DATA_TYPE_OUT *)out.ptr);
+#else  /* FIXED_POINT_POSITION */
+    vstore16(CONVERT_UP(in_data, VEC_DATA_TYPE(DATA_TYPE_OUT, 16)) << shift, 0, (__global DATA_TYPE_OUT *)out.ptr);
+#endif /* FIXED_POINT_POSITION */
 }
diff --git a/src/core/CL/cl_kernels/fixed_point.h b/src/core/CL/cl_kernels/fixed_point.h
index 9fd3a6f..5d340c4 100644
--- a/src/core/CL/cl_kernels/fixed_point.h
+++ b/src/core/CL/cl_kernels/fixed_point.h
@@ -381,4 +381,34 @@
 #define INVSQRT_OP_EXPAND_STR(a, type, size, position) invsqrt_sat_##type##x##size((a), (position))
 #define INVSQRT_OP_EXPAND(a, type, size, position) INVSQRT_OP_EXPAND_STR(a, type, size, position)
 
+#define floatx16 float16
+#define float16_TYPE float16
+
+#define CONVERTQ_DOWN_IMPL(in_type, out_type)                                                                                      \
+    inline out_type convert_##out_type##_##in_type(in_type a, int fixed_point_position)                                            \
+    {                                                                                                                              \
+        return CONVERT(a * (1 << fixed_point_position) + select((in_type)-0.5, (in_type)0.5, isgreater(a, (in_type)0)), out_type); \
+    }
+
+CONVERTQ_DOWN_IMPL(float16, qs8x16)
+CONVERTQ_DOWN_IMPL(float16, qs16x16)
+
+#define CONVERTQ_DOWN_SAT_IMPL(in_type, out_type)                                                                                      \
+    inline out_type convert_##out_type##_##in_type##_sat(in_type a, int fixed_point_position)                                          \
+    {                                                                                                                                  \
+        return CONVERT_SAT(a * (1 << fixed_point_position) + select((in_type)-0.5, (in_type)0.5, isgreater(a, (in_type)0)), out_type); \
+    }
+
+CONVERTQ_DOWN_SAT_IMPL(float16, qs8x16)
+CONVERTQ_DOWN_SAT_IMPL(float16, qs16x16)
+
+#define CONVERTQ_UP_IMPL(in_type, out_type)                                             \
+    inline out_type convert_##out_type##_##in_type(in_type a, int fixed_point_position) \
+    {                                                                                   \
+        return CONVERT(a, out_type) / (1 << fixed_point_position);                      \
+    }
+
+CONVERTQ_UP_IMPL(qs8x16, float16)
+CONVERTQ_UP_IMPL(qs16x16, float16)
+
 #endif // ARM_COMPUTE_FIXED_POINT_H
diff --git a/src/core/CL/kernels/CLDepthConvertKernel.cpp b/src/core/CL/kernels/CLDepthConvertKernel.cpp
index 24608bd..c43884a 100644
--- a/src/core/CL/kernels/CLDepthConvertKernel.cpp
+++ b/src/core/CL/kernels/CLDepthConvertKernel.cpp
@@ -40,13 +40,21 @@
 
 void CLDepthConvertKernel::configure(const ICLTensor *input, ICLTensor *output, ConvertPolicy policy, uint32_t shift)
 {
-    ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8, DataType::S16, DataType::U16, DataType::U32, DataType::S32);
-    ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8, DataType::S16, DataType::U16, DataType::U32, DataType::S32);
+    ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QS8, DataType::U8, DataType::S16, DataType::QS16,
+                                                  DataType::U16, DataType::U32, DataType::S32, DataType::F32);
+    ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::QS8, DataType::U8, DataType::S16, DataType::QS16,
+                                                  DataType::U16, DataType::U32, DataType::S32, DataType::F32);
     ARM_COMPUTE_ERROR_ON(input == output);
     ARM_COMPUTE_ERROR_ON_MSG(input->info()->data_type() == output->info()->data_type(), "Input and output data types must be different");
     ARM_COMPUTE_ERROR_ON(shift >= 8);
 
     // Check if convertion is supported
+    ARM_COMPUTE_ERROR_ON_MSG(input->info()->data_type() == DataType::QS8 && output->info()->data_type() != DataType::F32,
+                             "Only data types supported [in] QS8 -> [out] F32");
+    ARM_COMPUTE_ERROR_ON_MSG(input->info()->data_type() == DataType::QS16 && (output->info()->data_type() != DataType::F32),
+                             "Only data types supported [in] QS16 -> [out] F32");
+    ARM_COMPUTE_ERROR_ON_MSG(input->info()->data_type() == DataType::F32 && ((output->info()->data_type() != DataType::QS8) && output->info()->data_type() != DataType::QS16),
+                             "Only data types supported [in] F32 -> [out] QS8, QS16");
     ARM_COMPUTE_ERROR_ON_MSG(input->info()->data_type() == DataType::U8 && (output->info()->data_type() != DataType::U16 && output->info()->data_type() != DataType::S16
                                                                             && output->info()->data_type() != DataType::U32 && output->info()->data_type() != DataType::S32),
                              "Only data types supported [in] U8 -> [out] U16, S16, U32, S32");
@@ -67,6 +75,11 @@
                                                                              && output->info()->data_type() != DataType::S16),
                              "Only data types supported [in] S32 ->  [out] U8, U16, S16");
 
+    // Auto initialize output shape if not initialized (We can only auto-configure the shape, datatype must be given)
+    set_shape_if_empty(*output->info(), input->info()->tensor_shape());
+
+    ARM_COMPUTE_ERROR_ON_MISMATCHING_SHAPES(input, output);
+
     // Get data sizes
     const size_t input_size  = data_size_from_type(input->info()->data_type());
     const size_t output_size = data_size_from_type(output->info()->data_type());
@@ -83,8 +96,12 @@
     {
         kernel_name += "_up";
     }
-    build_opts.insert("-DDATA_TYPE_IN=" + get_cl_type_from_data_type(input->info()->data_type()));
-    build_opts.insert("-DDATA_TYPE_OUT=" + get_cl_type_from_data_type(output->info()->data_type()));
+    build_opts.emplace("-DDATA_TYPE_IN=" + get_cl_type_from_data_type(input->info()->data_type()));
+    build_opts.emplace("-DDATA_TYPE_OUT=" + get_cl_type_from_data_type(output->info()->data_type()));
+    if(is_data_type_fixed_point(input->info()->data_type()) || is_data_type_fixed_point(output->info()->data_type()))
+    {
+        build_opts.emplace("-DFIXED_POINT_POSITION=" + support::cpp11::to_string(input->info()->fixed_point_position()));
+    }
 
     // Create kernel
     _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel(kernel_name, build_opts));