Optimize Neon™ SUB operator by squashing execution window

Resolves: COMPMID-5462
Change-Id: I2c7151c8faf4016cc33592fff04d492d7cbc8fd6
Signed-off-by: Jakub Sujak <jakub.sujak@arm.com>
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/8366
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Gunes Bayir <gunes.bayir@arm.com>
Benchmark: Arm Jenkins <bsgcomp@arm.com>
diff --git a/src/cpu/kernels/CpuSubKernel.h b/src/cpu/kernels/CpuSubKernel.h
index 323a3f1..e835bac 100644
--- a/src/cpu/kernels/CpuSubKernel.h
+++ b/src/cpu/kernels/CpuSubKernel.h
@@ -82,10 +82,16 @@
 
     static const std::vector<SubKernel> &get_available_kernels();
 
+    size_t get_split_dimension() const
+    {
+        return _split_dimension;
+    }
+
 private:
     ConvertPolicy _policy{};
     SubKernelPtr  _run_method{ nullptr };
     std::string   _name{};
+    size_t        _split_dimension{ Window::DimY };
 };
 } // namespace kernels
 } // namespace cpu