Optimize Neon™ SUB operator by squashing execution window
Resolves: COMPMID-5462
Change-Id: I2c7151c8faf4016cc33592fff04d492d7cbc8fd6
Signed-off-by: Jakub Sujak <jakub.sujak@arm.com>
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/8366
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Gunes Bayir <gunes.bayir@arm.com>
Benchmark: Arm Jenkins <bsgcomp@arm.com>
diff --git a/src/cpu/kernels/CpuSubKernel.cpp b/src/cpu/kernels/CpuSubKernel.cpp
index c55d11e..d908e4e 100644
--- a/src/cpu/kernels/CpuSubKernel.cpp
+++ b/src/cpu/kernels/CpuSubKernel.cpp
@@ -131,7 +131,8 @@
_name = std::string("CpuSubKernel").append("/").append(uk->name);
// CpuSubKernel doesn't need padding so update_window_and_padding() can be skipped
- Window win = calculate_max_window(out_shape, Steps());
+ Window win;
+ std::tie(win, _split_dimension) = calculate_squashed_or_max_window(*src0, *src1);
ICpuKernel::configure(win);
}
diff --git a/src/cpu/kernels/CpuSubKernel.h b/src/cpu/kernels/CpuSubKernel.h
index 323a3f1..e835bac 100644
--- a/src/cpu/kernels/CpuSubKernel.h
+++ b/src/cpu/kernels/CpuSubKernel.h
@@ -82,10 +82,16 @@
static const std::vector<SubKernel> &get_available_kernels();
+ size_t get_split_dimension() const
+ {
+ return _split_dimension;
+ }
+
private:
ConvertPolicy _policy{};
SubKernelPtr _run_method{ nullptr };
std::string _name{};
+ size_t _split_dimension{ Window::DimY };
};
} // namespace kernels
} // namespace cpu
diff --git a/src/cpu/operators/CpuSub.cpp b/src/cpu/operators/CpuSub.cpp
index f0a7770..91a5b6e 100644
--- a/src/cpu/operators/CpuSub.cpp
+++ b/src/cpu/operators/CpuSub.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2021 Arm Limited.
+ * Copyright (c) 2021-2022 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -27,6 +27,8 @@
#include "src/common/utils/Log.h"
+#include "arm_compute/runtime/NEON/NEScheduler.h"
+
namespace arm_compute
{
namespace cpu
@@ -45,5 +47,12 @@
ARM_COMPUTE_RETURN_ERROR_ON(act_info.enabled());
return kernels::CpuSubKernel::validate(src0, src1, dst, policy);
}
+
+void CpuSub::run(ITensorPack &tensors)
+{
+ const auto split_dimension = static_cast<kernels::CpuSubKernel *>(_kernel.get())->get_split_dimension();
+
+ NEScheduler::get().schedule_op(_kernel.get(), split_dimension, _kernel->window(), tensors);
+}
} // namespace cpu
} // namespace arm_compute
diff --git a/src/cpu/operators/CpuSub.h b/src/cpu/operators/CpuSub.h
index 025c928..d463d1e 100644
--- a/src/cpu/operators/CpuSub.h
+++ b/src/cpu/operators/CpuSub.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2021 Arm Limited.
+ * Copyright (c) 2021-2022 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -60,6 +60,9 @@
* @return a status
*/
static Status validate(const ITensorInfo *src0, const ITensorInfo *src1, const ITensorInfo *dst, ConvertPolicy policy, const ActivationLayerInfo &act_info = ActivationLayerInfo());
+
+ // Inherited methods overridden:
+ void run(ITensorPack &tensors) override;
};
} // namespace cpu
} // namespace arm_compute