COMPMID-879: Investigate CL mismatches in Convolution S16

Changes and simplifies the validation to divide the scale in integer format instead of double.

Change-Id: Ib9156e9515e4e542391eeda11548f3d15613a0af
Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/119256
Tested-by: Jenkins <bsgcomp@arm.com>
Reviewed-by: Gian Marco Iodice <gianmarco.iodice@arm.com>
diff --git a/src/core/CL/kernels/CLConvolutionKernel.cpp b/src/core/CL/kernels/CLConvolutionKernel.cpp
index fd64dc4..2b08c8d 100644
--- a/src/core/CL/kernels/CLConvolutionKernel.cpp
+++ b/src/core/CL/kernels/CLConvolutionKernel.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016, 2017 ARM Limited.
+ * Copyright (c) 2016-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -63,8 +63,8 @@
     _input  = input;
     _output = output;
 
-    std::stringstream     kernel_name;
-    std::set<std::string> options;
+    std::stringstream kernel_name;
+    CLBuildOptions    build_opts;
     kernel_name << "convolution" << matrix_size << "x" << matrix_size << "_static";
 
     if(scale == 0)
@@ -76,19 +76,19 @@
     {
         std::stringstream mat_str;
         mat_str << "-DMAT" << i << "=" << conv[i];
-        options.insert(mat_str.str());
+        build_opts.add_option(mat_str.str());
     }
 
-    options.insert("-DSCALE=" + support::cpp11::to_string(scale));
+    build_opts.add_option("-DSCALE=" + support::cpp11::to_string(scale));
 
     DataType data_type = data_type_for_convolution_matrix(conv, matrix_size * matrix_size);
-    options.insert("-DDATA_TYPE=" + get_cl_type_from_data_type(data_type));
+    build_opts.add_option("-DDATA_TYPE=" + get_cl_type_from_data_type(data_type));
 
     std::stringstream out_type;
     out_type << "-DDATA_TYPE_OUT=" << get_cl_type_from_data_type(output->info()->data_type());
-    options.insert(out_type.str());
+    build_opts.add_option(out_type.str());
 
-    _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel(kernel_name.str(), options));
+    _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel(kernel_name.str(), build_opts.options()));
 
     // Configure kernel window
     constexpr unsigned int num_elems_processed_per_iteration = 8;
diff --git a/tests/validation/CL/Convolution.cpp b/tests/validation/CL/Convolution.cpp
index c7724a5..4328536 100644
--- a/tests/validation/CL/Convolution.cpp
+++ b/tests/validation/CL/Convolution.cpp
@@ -41,11 +41,6 @@
 {
 namespace validation
 {
-namespace
-{
-constexpr AbsoluteTolerance<int> tolerance_s16(1); //COMPMID-879: Investigate S16 mismatches
-} // namespace
-
 TEST_SUITE(CL)
 TEST_SUITE(CustomConvolution)
 TEST_SUITE(Square3x3)
@@ -118,7 +113,7 @@
                                                                                                            framework::dataset::make("filter_size", { 3 })))
 {
     // Validate output
-    validate(CLAccessor(_target), _reference, shape_to_valid_region(_reference.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(_height / 2, _width / 2)), tolerance_s16);
+    validate(CLAccessor(_target), _reference, shape_to_valid_region(_reference.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(_height / 2, _width / 2)));
 }
 FIXTURE_DATA_TEST_CASE(RunLarge, CLConvolutionFixture<int16_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), framework::dataset::make("DataType",
                                                                                                                  DataType::S16)),
@@ -126,7 +121,7 @@
                                                                                                          framework::dataset::make("filter_size", { 3 })))
 {
     // Validate output
-    validate(CLAccessor(_target), _reference, shape_to_valid_region(_reference.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(_height / 2, _width / 2)), tolerance_s16);
+    validate(CLAccessor(_target), _reference, shape_to_valid_region(_reference.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(_height / 2, _width / 2)));
 }
 TEST_SUITE_END()
 TEST_SUITE_END() /* Square 3x3 */
@@ -201,7 +196,7 @@
                                                                                                            framework::dataset::make("filter_size", { 5 })))
 {
     // Validate output
-    validate(CLAccessor(_target), _reference, shape_to_valid_region(_reference.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(_height / 2, _width / 2)), tolerance_s16);
+    validate(CLAccessor(_target), _reference, shape_to_valid_region(_reference.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(_height / 2, _width / 2)));
 }
 
 FIXTURE_DATA_TEST_CASE(RunLarge, CLConvolutionFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), framework::dataset::make("DataType",
@@ -210,7 +205,7 @@
                                                                                                          framework::dataset::make("filter_size", { 5 })))
 {
     // Validate output
-    validate(CLAccessor(_target), _reference, shape_to_valid_region(_reference.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(_height / 2, _width / 2)), tolerance_s16);
+    validate(CLAccessor(_target), _reference, shape_to_valid_region(_reference.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(_height / 2, _width / 2)));
 }
 TEST_SUITE_END()
 TEST_SUITE_END() /* Square5x5 */
@@ -285,7 +280,7 @@
                                                                                                            framework::dataset::make("filter_size", { 7 })))
 {
     // Validate output
-    validate(CLAccessor(_target), _reference, shape_to_valid_region(_reference.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(_height / 2, _width / 2)), tolerance_s16);
+    validate(CLAccessor(_target), _reference, shape_to_valid_region(_reference.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(_height / 2, _width / 2)));
 }
 
 FIXTURE_DATA_TEST_CASE(RunLarge, CLConvolutionFixture<int16_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), framework::dataset::make("DataType",
@@ -294,7 +289,7 @@
                                                                                                          framework::dataset::make("filter_size", { 7 })))
 {
     // Validate output
-    validate(CLAccessor(_target), _reference, shape_to_valid_region(_reference.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(_height / 2, _width / 2)), tolerance_s16);
+    validate(CLAccessor(_target), _reference, shape_to_valid_region(_reference.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(_height / 2, _width / 2)));
 }
 TEST_SUITE_END()
 TEST_SUITE_END() /* Square7x7 */
@@ -369,7 +364,7 @@
                                                                                                            framework::dataset::make("filter_size", { 9 })))
 {
     // Validate output
-    validate(CLAccessor(_target), _reference, shape_to_valid_region(_reference.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(_height / 2, _width / 2)), tolerance_s16);
+    validate(CLAccessor(_target), _reference, shape_to_valid_region(_reference.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(_height / 2, _width / 2)));
 }
 
 FIXTURE_DATA_TEST_CASE(RunLarge, CLConvolutionFixture<int16_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), framework::dataset::make("DataType",
@@ -378,7 +373,7 @@
                                                                                                          framework::dataset::make("filter_size", { 9 })))
 {
     // Validate output
-    validate(CLAccessor(_target), _reference, shape_to_valid_region(_reference.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(_height / 2, _width / 2)), tolerance_s16);
+    validate(CLAccessor(_target), _reference, shape_to_valid_region(_reference.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(_height / 2, _width / 2)));
 }
 TEST_SUITE_END()
 TEST_SUITE_END() /* Square9x9 */
@@ -462,7 +457,7 @@
                                                                                                            framework::dataset::make("filter_height", { 3, 5, 7, 9 })))
 {
     // Validate output
-    validate(CLAccessor(_target), _reference, shape_to_valid_region(_reference.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(_height / 2, _width / 2)), tolerance_s16);
+    validate(CLAccessor(_target), _reference, shape_to_valid_region(_reference.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(_height / 2, _width / 2)));
 }
 
 FIXTURE_DATA_TEST_CASE(RunLarge, CLConvolutionFixture<int16_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(datasets::LargeShapes(), framework::dataset::make("DataType",
@@ -472,7 +467,7 @@
                                                                                                          framework::dataset::make("filter_height", { 3, 5, 7, 9 })))
 {
     // Validate output
-    validate(CLAccessor(_target), _reference, shape_to_valid_region(_reference.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(_height / 2, _width / 2)), tolerance_s16);
+    validate(CLAccessor(_target), _reference, shape_to_valid_region(_reference.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(_height / 2, _width / 2)));
 }
 TEST_SUITE_END()
 TEST_SUITE_END() /* Rectangle */
@@ -508,7 +503,7 @@
                                                                                                            framework::dataset::make("filter_size", { 5 })))
 {
     // Validate output
-    validate(CLAccessor(_target), _reference, shape_to_valid_region(_reference.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(_height / 2, _width / 2)), tolerance_s16);
+    validate(CLAccessor(_target), _reference, shape_to_valid_region(_reference.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(_height / 2, _width / 2)));
 }
 
 FIXTURE_DATA_TEST_CASE(RunLarge, CLConvolutionFixture<int16_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), framework::dataset::make("DataType",
@@ -517,7 +512,7 @@
                                                                                                          framework::dataset::make("filter_size", { 5 })))
 {
     // Validate output
-    validate(CLAccessor(_target), _reference, shape_to_valid_region(_reference.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(_height / 2, _width / 2)), tolerance_s16);
+    validate(CLAccessor(_target), _reference, shape_to_valid_region(_reference.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(_height / 2, _width / 2)));
 }
 TEST_SUITE_END()
 TEST_SUITE_END() /* Separable5x5 */
@@ -553,7 +548,7 @@
                                                                                                            framework::dataset::make("filter_size", { 7 })))
 {
     // Validate output
-    validate(CLAccessor(_target), _reference, shape_to_valid_region(_reference.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(_height / 2, _width / 2)), tolerance_s16);
+    validate(CLAccessor(_target), _reference, shape_to_valid_region(_reference.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(_height / 2, _width / 2)));
 }
 
 FIXTURE_DATA_TEST_CASE(RunLarge, CLConvolutionFixture<int16_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), framework::dataset::make("DataType",
@@ -562,7 +557,7 @@
                                                                                                          framework::dataset::make("filter_size", { 7 })))
 {
     // Validate output
-    validate(CLAccessor(_target), _reference, shape_to_valid_region(_reference.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(_height / 2, _width / 2)), tolerance_s16);
+    validate(CLAccessor(_target), _reference, shape_to_valid_region(_reference.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(_height / 2, _width / 2)));
 }
 TEST_SUITE_END()
 TEST_SUITE_END() /* Separable7x7 */
@@ -598,7 +593,7 @@
                                                                                                            framework::dataset::make("filter_size", { 9 })))
 {
     // Validate output
-    validate(CLAccessor(_target), _reference, shape_to_valid_region(_reference.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(_height / 2, _width / 2)), tolerance_s16);
+    validate(CLAccessor(_target), _reference, shape_to_valid_region(_reference.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(_height / 2, _width / 2)));
 }
 
 FIXTURE_DATA_TEST_CASE(RunLarge, CLConvolutionFixture<int16_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), framework::dataset::make("DataType",
@@ -607,7 +602,7 @@
                                                                                                          framework::dataset::make("filter_size", { 9 })))
 {
     // Validate output
-    validate(CLAccessor(_target), _reference, shape_to_valid_region(_reference.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(_height / 2, _width / 2)), tolerance_s16);
+    validate(CLAccessor(_target), _reference, shape_to_valid_region(_reference.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(_height / 2, _width / 2)));
 }
 TEST_SUITE_END()
 TEST_SUITE_END() /* Separable9x9 */
diff --git a/tests/validation/reference/Convolution.cpp b/tests/validation/reference/Convolution.cpp
index 5d0cf1e..e3d4b4a 100644
--- a/tests/validation/reference/Convolution.cpp
+++ b/tests/validation/reference/Convolution.cpp
@@ -39,49 +39,19 @@
                             const unsigned int width,
                             const unsigned int height)
 {
-    ARM_COMPUTE_ERROR_ON(0 == scale);
+    ARM_COMPUTE_ERROR_ON(scale == 0);
+    ARM_COMPUTE_ERROR_ON(scale >= std::numeric_limits<int32_t>::max());
 
-    SimpleTensor<T> dst(src.shape(), output_data_type);
+    SimpleTensor<T>       dst(src.shape(), output_data_type);
+    SimpleTensor<int32_t> sum(src.shape(), output_data_type);
 
-    switch(output_data_type)
+    for(int element_idx = 0; element_idx < src.num_elements(); ++element_idx)
     {
-        case DataType::S16:
-        {
-            SimpleTensor<int16_t> sum(src.shape(), output_data_type);
-            for(int element_idx = 0; element_idx < src.num_elements(); ++element_idx)
-            {
-                const Coordinates id = index2coord(src.shape(), element_idx);
-                apply_2d_spatial_filter(id, src, sum, TensorShape(width, height), conv, 1 / static_cast<double>(scale), border_mode, constant_border_value);
-                dst[element_idx] = tensor_elem_at<int16_t>(sum, id, border_mode, constant_border_value);
-            }
-        }
-        break;
-        case DataType::U8:
-        {
-            SimpleTensor<int32_t> sum(src.shape(), output_data_type);
-            for(int element_idx = 0; element_idx < src.num_elements(); ++element_idx)
-            {
-                const Coordinates id = index2coord(src.shape(), element_idx);
-                apply_2d_spatial_filter(id, src, sum, TensorShape(width, height), conv, 1, border_mode, constant_border_value);
-                if(tensor_elem_at<int32_t>(sum, id, border_mode, constant_border_value) < 0)
-                {
-                    dst[element_idx] = 0;
-                }
-                else if((tensor_elem_at<int32_t>(sum, id, border_mode, constant_border_value) / scale) > 255)
-                {
-                    dst[element_idx] = 255;
-                }
-                else
-                {
-                    dst[element_idx] = tensor_elem_at<int32_t>(sum, id, border_mode, constant_border_value) / scale;
-                }
-            }
-        }
-        break;
-        default:
-            ARM_COMPUTE_ERROR("Not supported DataType");
-            break;
+        const Coordinates id = index2coord(src.shape(), element_idx);
+        apply_2d_spatial_filter(id, src, sum, TensorShape(width, height), conv, 1, border_mode, constant_border_value);
+        dst[element_idx] = saturate_cast<T>(tensor_elem_at<int32_t>(sum, id, border_mode, constant_border_value) / static_cast<int>(scale));
     }
+
     return dst;
 }