COMPMID-1673: Collapse window in CLArithmeticAddition when one operand is a vector
When one of the operands is a vector, the kernel does a broadcast addition and
the window is not collapsed. This represent an issue because it leads to a lot
of enqueues that increases the time taken by the OpenCL driver. This patch
allows to collapse the window when one of the two operands is a vector.
Furthermore, it adds LWS tuner to the kernel.
It also changes the number of elements processed per iteration to 8 to make
better usage of the cache.
Change-Id: I5f09ab0ddcffb3b7f9326a987c79a997b2d7fa8c
Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/155003
Reviewed-by: Giuseppe Rossini <giuseppe.rossini@arm.com>
Tested-by: bsgcomp <bsgcomp@arm.com>
diff --git a/tests/validation/CL/ArithmeticAddition.cpp b/tests/validation/CL/ArithmeticAddition.cpp
index e327769..09f1b7c 100644
--- a/tests/validation/CL/ArithmeticAddition.cpp
+++ b/tests/validation/CL/ArithmeticAddition.cpp
@@ -43,6 +43,7 @@
{
namespace
{
+constexpr unsigned int num_elems_processed_per_iteration = 8;
/** Input data sets **/
const auto ArithmeticAdditionU8Dataset = combine(combine(framework::dataset::make("DataType", DataType::U8), framework::dataset::make("DataType", DataType::U8)), framework::dataset::make("DataType",
DataType::U8));
@@ -110,7 +111,7 @@
validate(dst.info()->valid_region(), valid_region);
// Validate padding
- const PaddingSize padding = PaddingCalculator(shape.x(), 16).required_padding();
+ const PaddingSize padding = PaddingCalculator(shape.x(), num_elems_processed_per_iteration).required_padding();
validate(ref_src1.info()->padding(), padding);
validate(ref_src2.info()->padding(), padding);
validate(dst.info()->padding(), padding);
@@ -146,7 +147,7 @@
validate(dst.info()->valid_region(), valid_region);
// Validate padding
- const PaddingSize padding = PaddingCalculator(shape.x(), 16).required_padding();
+ const PaddingSize padding = PaddingCalculator(shape.x(), num_elems_processed_per_iteration).required_padding();
validate(ref_src1.info()->padding(), padding);
validate(ref_src2.info()->padding(), padding);
validate(dst.info()->padding(), padding);
@@ -186,7 +187,7 @@
validate(dst.info()->valid_region(), valid_region);
// Validate padding
- const PaddingSize padding = PaddingCalculator(shape.x(), 16).required_padding();
+ const PaddingSize padding = PaddingCalculator(shape.x(), num_elems_processed_per_iteration).required_padding();
validate(ref_src1.info()->padding(), padding);
validate(ref_src2.info()->padding(), padding);
validate(dst.info()->padding(), padding);
@@ -235,7 +236,7 @@
validate(dst.info()->valid_region(), valid_region);
// Validate padding
- const PaddingSize padding = PaddingCalculator(shape.x(), 16).required_padding();
+ const PaddingSize padding = PaddingCalculator(shape.x(), num_elems_processed_per_iteration).required_padding();
validate(ref_src1.info()->padding(), padding);
validate(ref_src2.info()->padding(), padding);
validate(dst.info()->padding(), padding);