Make Sub kernel and operator stateless

- Rename NEArithmeticSubstractionKernel to CpuSubKernel and move files appropriately

- Add CpuSub under src/runtime/cpu/operators

Partially resolves: COMPMID-4007

Signed-off-by: Sheri Zhang <sheri.zhang@arm.com>
Change-Id: I4754ca9101d82dccacca744be6d069764a9c6b55
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/4868
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Pablo Marquez Tello <pablo.tello@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
diff --git a/src/runtime/NEON/functions/NEArithmeticSubtraction.cpp b/src/runtime/NEON/functions/NEArithmeticSubtraction.cpp
index 512cfd6..0263d4c 100644
--- a/src/runtime/NEON/functions/NEArithmeticSubtraction.cpp
+++ b/src/runtime/NEON/functions/NEArithmeticSubtraction.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2020 Arm Limited.
+ * Copyright (c) 2017-2021 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -24,35 +24,18 @@
 #include "arm_compute/runtime/NEON/functions/NEArithmeticSubtraction.h"
 
 #include "arm_compute/core/ITensor.h"
-#include "src/core/NEON/kernels/NEArithmeticSubtractionKernel.h"
+#include "src/runtime/cpu/operators/CpuSub.h"
 
 #include <utility>
 
 namespace arm_compute
 {
-namespace experimental
-{
-void NEArithmeticSubtraction::configure(const ITensorInfo *input1, const ITensorInfo *input2, ITensorInfo *output, ConvertPolicy policy, const ActivationLayerInfo &act_info)
-{
-    ARM_COMPUTE_UNUSED(act_info);
-    auto k = std::make_unique<NEArithmeticSubtractionKernel>();
-    k->configure(input1, input2, output, policy);
-    _kernel = std::move(k);
-}
-
-Status NEArithmeticSubtraction::validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, ConvertPolicy policy, const ActivationLayerInfo &act_info)
-{
-    ARM_COMPUTE_RETURN_ERROR_ON(act_info.enabled());
-    return NEArithmeticSubtractionKernel::validate(input1, input2, output, policy);
-}
-} // namespace experimental
-
 struct NEArithmeticSubtraction::Impl
 {
-    const ITensor                                         *src_0{ nullptr };
-    const ITensor                                         *src_1{ nullptr };
-    ITensor                                               *dst{ nullptr };
-    std::unique_ptr<experimental::NEArithmeticSubtraction> op{ nullptr };
+    const ITensor               *src_0{ nullptr };
+    const ITensor               *src_1{ nullptr };
+    ITensor                     *dst{ nullptr };
+    std::unique_ptr<cpu::CpuSub> op{ nullptr };
 };
 
 NEArithmeticSubtraction::NEArithmeticSubtraction()
@@ -65,7 +48,7 @@
 
 Status NEArithmeticSubtraction::validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, ConvertPolicy policy, const ActivationLayerInfo &act_info)
 {
-    return experimental::NEArithmeticSubtraction::validate(input1, input2, output, policy, act_info);
+    return cpu::CpuSub::validate(input1, input2, output, policy, act_info);
 }
 
 void NEArithmeticSubtraction::configure(const ITensor *input1, const ITensor *input2, ITensor *output, ConvertPolicy policy, const ActivationLayerInfo &act_info)
@@ -73,7 +56,7 @@
     _impl->src_0 = input1;
     _impl->src_1 = input2;
     _impl->dst   = output;
-    _impl->op    = std::make_unique<experimental::NEArithmeticSubtraction>();
+    _impl->op    = std::make_unique<cpu::CpuSub>();
     _impl->op->configure(input1->info(), input2->info(), output->info(), policy, act_info);
 }