COMPMID-3960: Mismatch on NEArithmeticSubtraction
Corner-case failure when both input shapes had unit shape on the X axis.
Broadcasting was enabled leading to invalid window execution.
Check is updated to cross-validate the presence of broadcasting by
checking the X dimension in both input shapes.
Signed-off-by: Georgios Pinitas <georgios.pinitas@arm.com>
Change-Id: I0b79542279e8d155d2661fddff9691d94a1f6855
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/4391
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Gian Marco Iodice <gianmarco.iodice@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
diff --git a/src/core/NEON/kernels/NEArithmeticAdditionKernel.cpp b/src/core/NEON/kernels/NEArithmeticAdditionKernel.cpp
index fa26b90..aa7af54 100644
--- a/src/core/NEON/kernels/NEArithmeticAdditionKernel.cpp
+++ b/src/core/NEON/kernels/NEArithmeticAdditionKernel.cpp
@@ -56,7 +56,7 @@
constexpr int window_step_x = 16 / sizeof(T);
const auto window_start_x = static_cast<int>(window.x().start());
const auto window_end_x = static_cast<int>(window.x().end());
- const bool is_broadcast_across_x = (input1_win.x().step() == 0) || (input2_win.x().step() == 0);
+ const bool is_broadcast_across_x = in1->info()->tensor_shape().x() != in2->info()->tensor_shape().x();
if(is_broadcast_across_x)
{
@@ -152,7 +152,7 @@
const int window_step_x = 16;
const auto window_start_x = static_cast<int>(window.x().start());
const auto window_end_x = static_cast<int>(window.x().end());
- const bool is_broadcast_across_x = (input1_win.x().step() == 0) || (input2_win.x().step() == 0);
+ const bool is_broadcast_across_x = in1->info()->tensor_shape().x() != in2->info()->tensor_shape().x();
const UniformQuantizationInfo iq1_info = in1->info()->quantization_info().uniform();
const UniformQuantizationInfo iq2_info = in2->info()->quantization_info().uniform();
@@ -345,7 +345,7 @@
const int window_step_x = 16;
const auto window_start_x = static_cast<int>(window.x().start());
const auto window_end_x = static_cast<int>(window.x().end());
- const bool is_broadcast_across_x = (input1_win.x().step() == 0) || (input2_win.x().step() == 0);
+ const bool is_broadcast_across_x = in1->info()->tensor_shape().x() != in2->info()->tensor_shape().x();
const UniformQuantizationInfo iq1_info = in1->info()->quantization_info().uniform();
const UniformQuantizationInfo iq2_info = in2->info()->quantization_info().uniform();
@@ -537,7 +537,7 @@
const int window_step_x = 8;
const auto window_start_x = static_cast<int>(window.x().start());
const auto window_end_x = static_cast<int>(window.x().end());
- const bool is_broadcast_across_x = (input1_win.x().step() == 0) || (input2_win.x().step() == 0);
+ const bool is_broadcast_across_x = in1->info()->tensor_shape().x() != in2->info()->tensor_shape().x();
const UniformQuantizationInfo iq1_info = in1->info()->quantization_info().uniform();
const UniformQuantizationInfo iq2_info = in2->info()->quantization_info().uniform();
diff --git a/src/core/NEON/kernels/NEArithmeticSubtractionKernel.cpp b/src/core/NEON/kernels/NEArithmeticSubtractionKernel.cpp
index bdd356a..187e97d 100644
--- a/src/core/NEON/kernels/NEArithmeticSubtractionKernel.cpp
+++ b/src/core/NEON/kernels/NEArithmeticSubtractionKernel.cpp
@@ -67,7 +67,7 @@
constexpr int window_step_x = 16 / sizeof(T);
const auto window_start_x = static_cast<int>(window.x().start());
const auto window_end_x = static_cast<int>(window.x().end());
- const bool is_broadcast_across_x = (input1_win.x().step() == 0) || (input2_win.x().step() == 0);
+ const bool is_broadcast_across_x = in1->info()->tensor_shape().x() != in2->info()->tensor_shape().x();
Iterator input1(in1, window.broadcast_if_dimension_le_one(in1->info()->tensor_shape()));
Iterator input2(in2, window.broadcast_if_dimension_le_one(in2->info()->tensor_shape()));
@@ -178,7 +178,7 @@
const int window_step_x = 16;
const auto window_start_x = static_cast<int>(window.x().start());
const auto window_end_x = static_cast<int>(window.x().end());
- const bool is_broadcast_across_x = (input1_win.x().step() == 0) || (input2_win.x().step() == 0);
+ const bool is_broadcast_across_x = in1->info()->tensor_shape().x() != in2->info()->tensor_shape().x();
const UniformQuantizationInfo iq1_info = in1->info()->quantization_info().uniform();
const UniformQuantizationInfo iq2_info = in2->info()->quantization_info().uniform();
@@ -372,7 +372,7 @@
const int window_step_x = 8;
const auto window_start_x = static_cast<int>(window.x().start());
const auto window_end_x = static_cast<int>(window.x().end());
- const bool is_broadcast_across_x = (input1_win.x().step() == 0) || (input2_win.x().step() == 0);
+ const bool is_broadcast_across_x = in1->info()->tensor_shape().x() != in2->info()->tensor_shape().x();
const UniformQuantizationInfo iq1_info = in1->info()->quantization_info().uniform();
const UniformQuantizationInfo iq2_info = in2->info()->quantization_info().uniform();
diff --git a/src/core/NEON/kernels/NEElementwiseOperationKernel.cpp b/src/core/NEON/kernels/NEElementwiseOperationKernel.cpp
index bb4e9a6..412ae24 100644
--- a/src/core/NEON/kernels/NEElementwiseOperationKernel.cpp
+++ b/src/core/NEON/kernels/NEElementwiseOperationKernel.cpp
@@ -656,7 +656,7 @@
const int window_step_x = std::min(16 / static_cast<int>(sizeof(OutputScalarType)), 8);
const auto window_start_x = static_cast<int>(window.x().start());
const auto window_end_x = static_cast<int>(window.x().end());
- const bool is_broadcast_across_x = (input1_win.x().step() == 0) || (input2_win.x().step() == 0);
+ const bool is_broadcast_across_x = in1->info()->tensor_shape().x() != in2->info()->tensor_shape().x();
if(is_broadcast_across_x)
{
@@ -735,7 +735,7 @@
const int window_step_x = 16;
const auto window_start_x = static_cast<int>(window.x().start());
const auto window_end_x = static_cast<int>(window.x().end());
- const bool is_broadcast_across_x = (input1_win.x().step() == 0) || (input2_win.x().step() == 0);
+ const bool is_broadcast_across_x = in1->info()->tensor_shape().x() != in2->info()->tensor_shape().x();
const UniformQuantizationInfo output_qinfo = out->info()->quantization_info().uniform();
@@ -843,7 +843,7 @@
const int window_step_x = 16;
const auto window_start_x = static_cast<int>(window.x().start());
const auto window_end_x = static_cast<int>(window.x().end());
- const bool is_broadcast_across_x = (input1_win.x().step() == 0) || (input2_win.x().step() == 0);
+ const bool is_broadcast_across_x = in1->info()->tensor_shape().x() != in2->info()->tensor_shape().x();
const UniformQuantizationInfo output_qinfo = out->info()->quantization_info().uniform();
@@ -950,7 +950,7 @@
const int window_step_x = 16;
const auto window_start_x = static_cast<int>(window.x().start());
const auto window_end_x = static_cast<int>(window.x().end());
- const bool is_broadcast_across_x = (input1_win.x().step() == 0) || (input2_win.x().step() == 0);
+ const bool is_broadcast_across_x = in1->info()->tensor_shape().x() != in2->info()->tensor_shape().x();
const UniformQuantizationInfo output_qinfo = out->info()->quantization_info().uniform();
diff --git a/src/core/NEON/kernels/NEPixelWiseMultiplicationKernel.cpp b/src/core/NEON/kernels/NEPixelWiseMultiplicationKernel.cpp
index f646ea5..39517f6 100644
--- a/src/core/NEON/kernels/NEPixelWiseMultiplicationKernel.cpp
+++ b/src/core/NEON/kernels/NEPixelWiseMultiplicationKernel.cpp
@@ -156,7 +156,7 @@
const int window_step_x = 16 / sizeof(T);
const auto window_start_x = static_cast<int>(window.x().start());
const auto window_end_x = static_cast<int>(window.x().end());
- const bool is_broadcast_across_x = (input1_win.x().step() == 0) || (input2_win.x().step() == 0);
+ const bool is_broadcast_across_x = in1->info()->tensor_shape().x() != in2->info()->tensor_shape().x();
const UniformQuantizationInfo output_qua_info = out->info()->quantization_info().uniform();
const UniformQuantizationInfo tmp_qua_info = { output_qua_info.scale / scale, output_qua_info.offset };
@@ -785,7 +785,7 @@
const int window_step_x = 8;
const auto window_start_x = static_cast<int>(window.x().start());
const auto window_end_x = static_cast<int>(window.x().end());
- const bool is_broadcast_across_x = (input1_win.x().step() == 0) || (input2_win.x().step() == 0);
+ const bool is_broadcast_across_x = in1->info()->tensor_shape().x() != in2->info()->tensor_shape().x();
if(is_broadcast_across_x)
{
@@ -935,7 +935,7 @@
constexpr int window_step_x = 16 / sizeof(float);
const auto window_start_x = static_cast<int>(window.x().start());
const auto window_end_x = static_cast<int>(window.x().end());
- const bool is_broadcast_across_x = (input1_win.x().step() == 0) || (input2_win.x().step() == 0);
+ const bool is_broadcast_across_x = in1->info()->tensor_shape().x() != in2->info()->tensor_shape().x();
using ExactTagType = typename wrapper::traits::neon_vector<float, window_step_x>::tag_type;
@@ -1033,7 +1033,7 @@
constexpr int window_step_x = 8 / sizeof(float);
const auto window_start_x = static_cast<int>(window.x().start());
const auto window_end_x = static_cast<int>(window.x().end());
- const bool is_broadcast_across_x = (input1_win.x().step() == 0) || (input2_win.x().step() == 0);
+ const bool is_broadcast_across_x = in1->info()->tensor_shape().x() != in2->info()->tensor_shape().x();
using ExactTagType = typename wrapper::traits::neon_vector<float, 2>::tag_type;