COMPMID-286: CL colour convert to U8

Change-Id: I62bbf510cc106a90ed2884be3c9c0c127da25898
Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/150681
Reviewed-by: Giuseppe Rossini <giuseppe.rossini@arm.com>
Tested-by: bsgcomp <bsgcomp@arm.com>
diff --git a/arm_compute/core/CL/kernels/CLColorConvertKernel.h b/arm_compute/core/CL/kernels/CLColorConvertKernel.h
index edd05ef..63e11bb 100644
--- a/arm_compute/core/CL/kernels/CLColorConvertKernel.h
+++ b/arm_compute/core/CL/kernels/CLColorConvertKernel.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016, 2017 ARM Limited.
+ * Copyright (c) 2016-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -55,7 +55,8 @@
      *
      * @param[in]  input  Source tensor. Formats supported: RGBA8888/UYVY422/YUYV422/RGB888
      * @param[out] output Destination tensor. Formats supported: RGB888 (if the formats of @p input are RGBA8888/UYVY422/YUYV422),
-     *                                                          RGBA8888 (if the formats of @p input are UYVY422/YUYV422/RGB888/)
+     *                                                          RGBA8888 (if the formats of @p input are UYVY422/YUYV422/RGB888/),
+     *                                                          U8 (if the formats of @p input is RGB888)
      */
     void configure(const ICLTensor *input, ICLTensor *output);
     /** Set the input and output of the kernel
diff --git a/arm_compute/runtime/CL/functions/CLColorConvert.h b/arm_compute/runtime/CL/functions/CLColorConvert.h
index dd7de45..8f4fa56 100644
--- a/arm_compute/runtime/CL/functions/CLColorConvert.h
+++ b/arm_compute/runtime/CL/functions/CLColorConvert.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016, 2017 ARM Limited.
+ * Copyright (c) 2016-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -43,7 +43,8 @@
      *
      * @param[in]  input  Source tensor. Formats supported: RGBA8888/UYVY422/YUYV422/RGB888
      * @param[out] output Destination tensor. Formats supported: RGB888 (if the formats of @p input are RGBA8888/UYVY422/YUYV422),
-     *                                                          RGBA8888 (if the formats of @p input are UYVY422/YUYV422/RGB888/)
+     *                                                          RGBA8888 (if the formats of @p input are UYVY422/YUYV422/RGB888/),
+     *                                                          U8 (if the formats of @p input is RGB888)
      */
     void configure(const ICLTensor *input, ICLTensor *output);
     /** Initialize the function's source, destination
diff --git a/src/core/CL/CLKernelLibrary.cpp b/src/core/CL/CLKernelLibrary.cpp
index 7e8ef6b..87b588e 100644
--- a/src/core/CL/CLKernelLibrary.cpp
+++ b/src/core/CL/CLKernelLibrary.cpp
@@ -352,6 +352,7 @@
     { "RGB888_to_IYUV_bt709", "color_convert.cl" },
     { "RGB888_to_NV12_bt709", "color_convert.cl" },
     { "RGB888_to_RGBA8888_bt709", "color_convert.cl" },
+    { "RGB888_to_U8_bt709", "color_convert.cl" },
     { "RGB888_to_YUV444_bt709", "color_convert.cl" },
     { "RGBA8888_to_IYUV_bt709", "color_convert.cl" },
     { "RGBA8888_to_NV12_bt709", "color_convert.cl" },
diff --git a/src/core/CL/cl_kernels/color_convert.cl b/src/core/CL/cl_kernels/color_convert.cl
index 02a0c8e..7a872b4 100644
--- a/src/core/CL/cl_kernels/color_convert.cl
+++ b/src/core/CL/cl_kernels/color_convert.cl
@@ -64,6 +64,54 @@
     vstore16(rgba_3, 0, out.ptr + 48);
 }
 
+/** Convert an RGB888 image to U8
+ *
+ * Global Workgroup Size [ DIV_CEIL(width, 16), height ]
+ * No offset.
+ *
+ * @param[in]  input_ptr                            Pointer to the source image. Supported Format: RGB888
+ * @param[in]  input_stride_x                       Stride of the source image in X dimension (in bytes)
+ * @param[in]  input_step_x                         input_stride_x * number of elements along X processed per workitem(in bytes)
+ * @param[in]  input_stride_y                       Stride of the source image in Y dimension (in bytes)
+ * @param[in]  input_step_y                         input_stride_y * number of elements along Y processed per workitem(in bytes)
+ * @param[in]  input_offset_first_element_in_bytes  The offset of the first element in the source image
+ * @param[out] output_ptr                           Pointer to the destination image. Supported Format: U8
+ * @param[in]  output_stride_x                      Stride of the destination image in X dimension (in bytes)
+ * @param[in]  output_step_x                        output_stride_x * number of elements along X processed per workitem(in bytes)
+ * @param[in]  output_stride_y                      Stride of the destination image in Y dimension (in bytes)
+ * @param[in]  output_step_y                        output_stride_y * number of elements along Y processed per workitem(in bytes)
+ * @param[in]  output_offset_first_element_in_bytes The offset of the first element in the destination image
+ */
+__kernel void RGB888_to_U8_bt709(
+    IMAGE_DECLARATION(input),
+    IMAGE_DECLARATION(output))
+{
+    Image in  = CONVERT_TO_IMAGE_STRUCT(input);
+    Image out = CONVERT_TO_IMAGE_STRUCT(output);
+
+    // handle 16 pixels every time
+    const uchar16 rgb_0 = vload16(0, in.ptr);
+    const uchar16 rgb_1 = vload16(0, in.ptr + 16);
+    const uchar16 rgb_2 = vload16(0, in.ptr + 32);
+
+    //Resequence values from a sequence of 16 RGB values to sequence of 16 R, 16 G, 16 B values
+    const uchar16 rgb_r = (uchar16)(rgb_0.s0369, rgb_0.scf, rgb_1.s258b, rgb_1.se, rgb_2.s147a, rgb_2.sd);
+    const uchar16 rgb_g = (uchar16)(rgb_0.s147a, rgb_0.sd, rgb_1.s0369, rgb_1.scf, rgb_2.s258b, rgb_2.se);
+    const uchar16 rgb_b = (uchar16)(rgb_0.s258b, rgb_0.se, rgb_1.s147a, rgb_1.sd, rgb_2.s0369, rgb_2.scf);
+
+    const float16 rgb2u8_red_coef_bt709   = 0.2126f;
+    const float16 rgb2u8_green_coef_bt709 = 0.7152f;
+    const float16 rgb2u8_blue_coef_bt709  = 0.0722f;
+
+    //Computation of 16 greyscale values in float
+    const float16 greyscale_f_0 = rgb2u8_red_coef_bt709 * convert_float16(rgb_r) + rgb2u8_green_coef_bt709 * convert_float16(rgb_g) + rgb2u8_blue_coef_bt709 * convert_float16(rgb_b);
+
+    //Convert it to 16 grayscale uchar values
+    const uchar16 greyscale_u8_0 = convert_uchar16_sat_rtz(greyscale_f_0);
+
+    vstore16(greyscale_u8_0, 0, out.ptr);
+}
+
 /** Convert an RGB888 image to RGBX8888
  *
  * Global Workgroup Size [ DIV_CEIL(width, 16), height ]
diff --git a/src/core/CL/kernels/CLColorConvertKernel.cpp b/src/core/CL/kernels/CLColorConvertKernel.cpp
index e79019e..4f178c9 100644
--- a/src/core/CL/kernels/CLColorConvertKernel.cpp
+++ b/src/core/CL/kernels/CLColorConvertKernel.cpp
@@ -61,6 +61,7 @@
                     num_elems_processed_per_iteration = 16;
                     break;
                 default:
+                    ARM_COMPUTE_ERROR("Not supported");
                     break;
             }
             break;
@@ -75,6 +76,7 @@
                     num_elems_processed_per_iteration = 8;
                     break;
                 default:
+                    ARM_COMPUTE_ERROR("Not supported");
                     break;
             }
             break;
@@ -84,9 +86,11 @@
             switch(output->info()->format())
             {
                 case Format::RGBA8888:
+                case Format::U8:
                     num_elems_processed_per_iteration = 16;
                     break;
                 default:
+                    ARM_COMPUTE_ERROR("Not supported");
                     break;
             }
             break;
@@ -143,6 +147,7 @@
                     num_elems_processed_per_iteration = 4;
                     break;
                 default:
+                    ARM_COMPUTE_ERROR("Not supported");
                     break;
             }
             break;
@@ -220,6 +225,7 @@
                     num_elems_read_per_iteration_x    = 16;
                     break;
                 default:
+                    ARM_COMPUTE_ERROR("Not supported");
                     break;
             }
             break;
@@ -235,6 +241,7 @@
                     num_elems_read_per_iteration_x    = 8;
                     break;
                 default:
+                    ARM_COMPUTE_ERROR("Not supported");
                     break;
             }
             break;
@@ -303,6 +310,7 @@
                     num_elems_processed_per_iteration = 16;
                     break;
                 default:
+                    ARM_COMPUTE_ERROR("Not supported");
                     break;
             }
             break;
@@ -316,6 +324,7 @@
                     num_elems_processed_per_iteration = 16;
                     break;
                 default:
+                    ARM_COMPUTE_ERROR("Not supported");
                     break;
             }
             break;
diff --git a/tests/benchmark/CL/ColorConvert.cpp b/tests/benchmark/CL/ColorConvert.cpp
index a633791..a89357b 100644
--- a/tests/benchmark/CL/ColorConvert.cpp
+++ b/tests/benchmark/CL/ColorConvert.cpp
@@ -47,6 +47,9 @@
 const auto ColorConvert_RGB_to_RGBA = combine(framework::dataset::make("FormatType", { Format::RGB888 }),
                                               framework::dataset::make("FormatType", { Format::RGBA8888 }));
 
+const auto ColorConvert_RGB_to_U8 = combine(framework::dataset::make("FormatType", { Format::RGB888 }),
+                                            framework::dataset::make("FormatType", { Format::U8 }));
+
 const auto ColorConvert_YUYV_to_RGBDataset = combine(YUYVDataset,
                                                      RGBDataset);
 
@@ -82,6 +85,13 @@
 REGISTER_FIXTURE_DATA_TEST_CASE(RunLarge, CLColorConvertFixture, framework::DatasetMode::NIGHTLY, combine(datasets::Large2DShapes(), ColorConvert_RGB_to_RGBA));
 TEST_SUITE_END()
 
+TEST_SUITE(RGBtoU8)
+// *INDENT-OFF*
+// clang-format off
+REGISTER_FIXTURE_DATA_TEST_CASE(RunSmall, CLColorConvertFixture, framework::DatasetMode::PRECOMMIT, combine(datasets::Small2DShapes(), ColorConvert_RGB_to_U8));
+REGISTER_FIXTURE_DATA_TEST_CASE(RunLarge, CLColorConvertFixture, framework::DatasetMode::NIGHTLY, combine(datasets::Large2DShapes(), ColorConvert_RGB_to_U8));
+TEST_SUITE_END()
+
 TEST_SUITE(YUV)
 // *INDENT-OFF*
 // clang-format off
diff --git a/tests/validation/CL/ColorConvert.cpp b/tests/validation/CL/ColorConvert.cpp
index 34b0e0d..7210a7a 100644
--- a/tests/validation/CL/ColorConvert.cpp
+++ b/tests/validation/CL/ColorConvert.cpp
@@ -52,6 +52,9 @@
 const auto ColorConvert_RGB_to_RGBA = combine(framework::dataset::make("FormatType", { Format::RGB888 }),
                                               framework::dataset::make("FormatType", { Format::RGBA8888 }));
 
+const auto ColorConvert_RGB_to_U8 = combine(framework::dataset::make("FormatType", { Format::RGB888 }),
+                                            framework::dataset::make("FormatType", { Format::U8 }));
+
 const auto ColorConvert_YUYV_to_RGBDataset = combine(YUYVDataset,
                                                      RGBDataset);
 
@@ -143,6 +146,12 @@
     validate_configuration(shape, src_format, dst_format);
 }
 
+DATA_TEST_CASE(RGBtoU8, framework::DatasetMode::ALL, combine(concat(datasets::Small2DShapes(), datasets::Large2DShapes()), ColorConvert_RGB_to_U8),
+               shape, src_format, dst_format)
+{
+    validate_configuration(shape, src_format, dst_format);
+}
+
 DATA_TEST_CASE(YUV, framework::DatasetMode::ALL, combine(concat(datasets::Small2DShapes(), datasets::Large2DShapes()), ColorConvert_YUYV_to_RGBDataset),
                shape, src_format, dst_format)
 {
@@ -213,6 +222,25 @@
 }
 TEST_SUITE_END()
 
+TEST_SUITE(RGBtoU8)
+FIXTURE_DATA_TEST_CASE(RunSmall, CLColorConvertFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(datasets::Small2DShapes(), ColorConvert_RGB_to_U8))
+{
+    // Validate output
+    for(unsigned int plane_idx = 0; plane_idx < _dst_num_planes; ++plane_idx)
+    {
+        validate(CLAccessor(*_target.cl_plane(plane_idx)), _reference[plane_idx]);
+    }
+}
+FIXTURE_DATA_TEST_CASE(RunLarge, CLColorConvertFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(datasets::Large2DShapes(), ColorConvert_RGB_to_U8))
+{
+    // Validate output
+    for(unsigned int plane_idx = 0; plane_idx < _dst_num_planes; ++plane_idx)
+    {
+        validate(CLAccessor(*_target.cl_plane(plane_idx)), _reference[plane_idx]);
+    }
+}
+TEST_SUITE_END()
+
 TEST_SUITE(YUV)
 FIXTURE_DATA_TEST_CASE(RunSmall, CLColorConvertFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(datasets::Small2DShapes(), ColorConvert_YUYV_to_RGBDataset))
 {