Optimize CL reduction operation
* Batch dimension is added to reduction operation.
- All the dimensions higher than the batch dimension are collapsed
so that the input and output tensors are always 3-4D.
- CL kernel is called once instead of being repeatedly called
to process each sliding window.
Resolves: COMPMID-6443
Signed-off-by: Viet-Hoa Do <viet-hoa.do@arm.com>
Change-Id: Icd99939d52d3bb648f08537e5f52ef27e894061b
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/10456
Reviewed-by: Jakub Sujak <jakub.sujak@arm.com>
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Benchmark: Arm Jenkins <bsgcomp@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
diff --git a/arm_compute/core/Window.inl b/arm_compute/core/Window.inl
index d935507..0f7c4fb 100644
--- a/arm_compute/core/Window.inl
+++ b/arm_compute/core/Window.inl
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-2020, 2022 Arm Limited.
+ * Copyright (c) 2016-2020, 2022-2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -21,6 +21,10 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
+
+#ifndef ACL_ARM_COMPUTE_CORE_WINDOW_INL
+#define ACL_ARM_COMPUTE_CORE_WINDOW_INL
+
namespace arm_compute
{
inline Window::Window(const Window &src)
@@ -100,13 +104,21 @@
return collapsed;
}
-inline Window Window::shift_dimensions(unsigned int shift_value) const
+inline Window Window::shift_dimensions(unsigned int shift_value, unsigned int start_dim) const
{
Window shifted_window;
- for (size_t n = 0; n < (Coordinates::num_max_dimensions - shift_value); n++)
+ size_t n = 0;
+
+ for (; n < start_dim; ++n)
+ {
+ shifted_window.set(n, _dims[n]);
+ }
+
+ for (; n < (Coordinates::num_max_dimensions - shift_value); n++)
{
shifted_window.set(n, _dims[n + shift_value]);
}
+
return shifted_window;
}
@@ -313,3 +325,5 @@
return (lhs._dims == rhs._dims) && (lhs._is_broadcasted == rhs._is_broadcasted);
}
} // namespace arm_compute
+
+#endif // ACL_ARM_COMPUTE_CORE_WINDOW_INL