Rename functions/classes for elementwise operations
* Create CpuElementwise operator
* Rename kernel classes
* Make the kernels stateless
Partially implements: COMPMID-4003
Change-Id: I4ef9c61a3acc3ac5dbe46463d62dcb88a5face21
Signed-off-by: Sang-Hoon Park <sang-hoon.park@arm.com>
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/4881
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Sheri Zhang <sheri.zhang@arm.com>
Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
diff --git a/src/core/NEON/NEKernels.h b/src/core/NEON/NEKernels.h
index a678a86..c009a6d 100644
--- a/src/core/NEON/NEKernels.h
+++ b/src/core/NEON/NEKernels.h
@@ -54,7 +54,6 @@
#include "src/core/NEON/kernels/NEDilateKernel.h"
#include "src/core/NEON/kernels/NEDirectConvolutionLayerKernel.h"
#include "src/core/NEON/kernels/NEDirectConvolutionLayerOutputStageKernel.h"
-#include "src/core/NEON/kernels/NEElementwiseOperationKernel.h"
#include "src/core/NEON/kernels/NEElementwiseUnaryKernel.h"
#include "src/core/NEON/kernels/NEErodeKernel.h"
#include "src/core/NEON/kernels/NEFFTDigitReverseKernel.h"
diff --git a/src/core/NEON/kernels/NEElementwiseOperationKernel.cpp b/src/core/cpu/kernels/CpuElementwiseKernel.cpp
similarity index 72%
rename from src/core/NEON/kernels/NEElementwiseOperationKernel.cpp
rename to src/core/cpu/kernels/CpuElementwiseKernel.cpp
index b250465..ab915b9 100644
--- a/src/core/NEON/kernels/NEElementwiseOperationKernel.cpp
+++ b/src/core/cpu/kernels/CpuElementwiseKernel.cpp
@@ -21,16 +21,16 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "src/core/NEON/kernels/NEElementwiseOperationKernel.h"
+#include "src/core/cpu/kernels/CpuElementwiseKernel.h"
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/IAccessWindow.h"
#include "src/core/CPP/Validate.h"
-#include "src/core/NEON/kernels/elementwise/impl/elementwise_list.h"
-#include "src/core/NEON/kernels/elementwise/impl/elementwise_quantized_list.h"
-#include "src/core/SVE/kernels/elementwise/impl/elementwise_list.h"
-#include "src/core/SVE/kernels/elementwise/impl/elementwise_quantized_list.h"
#include "src/core/common/Registrars.h"
+#include "src/core/cpu/kernels/elementwise/neon/elementwise_list.h"
+#include "src/core/cpu/kernels/elementwise/neon/elementwise_quantized_list.h"
+#include "src/core/cpu/kernels/elementwise/sve/elementwise_list.h"
+#include "src/core/cpu/kernels/elementwise/sve/elementwise_quantized_list.h"
#include "src/core/helpers/AutoConfiguration.h"
#include "src/core/helpers/WindowHelpers.h"
@@ -38,10 +38,14 @@
namespace arm_compute
{
+namespace cpu
+{
+namespace kernels
+{
namespace
{
using ElementwiseSelector = std::add_pointer<bool(DataType)>::type;
-using UKernelType = NEElementwiseOperationKernel::ElementwiseFunction;
+using UKernelType = CpuElementwiseKernel::ElementwiseFunction;
struct ElementwiseKernel
{
const char *name;
@@ -154,12 +158,7 @@
}
} // namespace
-NEElementwiseOperationKernel::NEElementwiseOperationKernel()
- : _function(nullptr), _input1(nullptr), _input2(nullptr), _output(nullptr)
-{
-}
-
-Status NEElementwiseOperationKernel::validate_arguments_common(const ITensorInfo &input1, const ITensorInfo &input2, const ITensorInfo &output)
+Status CpuElementwiseKernel::validate_arguments_common(const ITensorInfo &input1, const ITensorInfo &input2, const ITensorInfo &output)
{
ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(&input1);
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(&input1, &input2);
@@ -178,7 +177,7 @@
return Status{};
}
-void NEElementwiseOperationKernel::configure_common(const ITensorInfo *input1, const ITensorInfo *input2, ITensorInfo *output)
+void CpuElementwiseKernel::configure_common(const ITensorInfo *input1, const ITensorInfo *input2, ITensorInfo *output)
{
ARM_COMPUTE_ERROR_ON_NULLPTR(input1, input2, output);
@@ -192,45 +191,33 @@
Window win = calculate_max_window(valid_region);
- INEKernel::configure(win);
+ ICpuKernel::configure(win);
}
-void NEElementwiseOperationKernel::run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info)
+void CpuElementwiseKernel::run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info)
{
ARM_COMPUTE_UNUSED(info, window);
ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
- ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window);
- ARM_COMPUTE_ERROR_ON(_function == nullptr);
- _function(tensors.get_const_tensor(TensorType::ACL_SRC_0),
- tensors.get_const_tensor(TensorType::ACL_SRC_1),
- tensors.get_tensor(TensorType::ACL_DST), window);
+ ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICpuKernel::window(), window);
+
+ auto src0 = tensors.get_const_tensor(TensorType::ACL_SRC_0);
+ auto src1 = tensors.get_const_tensor(TensorType::ACL_SRC_1);
+ auto dst = tensors.get_tensor(TensorType::ACL_DST);
+
+ auto function = get_implementation(src0->info(), src1->info(), dst->info());
+ ARM_COMPUTE_ERROR_ON(function == nullptr);
+ function(src0, src1, dst, window);
}
/** Arithmetic operators (min, max, squared_diff) */
-void NEArithmeticOperationKernel::configure(ArithmeticOperation op, const ITensorInfo *input1, const ITensorInfo *input2, ITensorInfo *output)
+void CpuArithmeticKernel::configure(ArithmeticOperation op, const ITensorInfo *input1, const ITensorInfo *input2, ITensorInfo *output)
{
ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(*input1, *input2, *output));
configure_common(input1, input2, output);
- switch(op)
- {
- case ArithmeticOperation::MAX:
- _function = configure_arithm_func<ArithmeticOperation::MAX>(input1, input2, output);
- break;
- case ArithmeticOperation::MIN:
- _function = configure_arithm_func<ArithmeticOperation::MIN>(input1, input2, output);
- break;
- case ArithmeticOperation::SQUARED_DIFF:
- _function = configure_arithm_func<ArithmeticOperation::SQUARED_DIFF>(input1, input2, output);
- break;
- case ArithmeticOperation::PRELU:
- _function = configure_arithm_func<ArithmeticOperation::PRELU>(input1, input2, output);
- break;
- default:
- ARM_COMPUTE_ERROR("NOT_SUPPORTED!");
- }
+ _op = op;
}
-Status NEArithmeticOperationKernel::validate_arguments(const ITensorInfo &input1, const ITensorInfo &input2, const ITensorInfo &output)
+Status CpuArithmeticKernel::validate_arguments(const ITensorInfo &input1, const ITensorInfo &input2, const ITensorInfo &output)
{
ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(&input1, 1, DataType::QASYMM8, DataType::QASYMM8_SIGNED, DataType::S16, DataType::F16, DataType::S32, DataType::F32);
// Validate in case of configured output
@@ -241,7 +228,7 @@
return validate_arguments_common(input1, input2, output);
}
-Status NEArithmeticOperationKernel::validate(ArithmeticOperation op, const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output)
+Status CpuArithmeticKernel::validate(ArithmeticOperation op, const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output)
{
ARM_COMPUTE_UNUSED(op);
ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input1, input2, output);
@@ -249,22 +236,45 @@
return Status{};
}
+std::function<CpuElementwiseKernel::ElementwiseFunction>
+CpuArithmeticKernel::get_implementation(const ITensorInfo *input1, const ITensorInfo *input2, ITensorInfo *output)
+{
+ switch(_op)
+ {
+ case ArithmeticOperation::MAX:
+ return configure_arithm_func<ArithmeticOperation::MAX>(input1, input2, output);
+ case ArithmeticOperation::MIN:
+ return configure_arithm_func<ArithmeticOperation::MIN>(input1, input2, output);
+ case ArithmeticOperation::SQUARED_DIFF:
+ return configure_arithm_func<ArithmeticOperation::SQUARED_DIFF>(input1, input2, output);
+ case ArithmeticOperation::PRELU:
+ return configure_arithm_func<ArithmeticOperation::PRELU>(input1, input2, output);
+ case ArithmeticOperation::DIV:
+ return configure_arithm_func<ArithmeticOperation::DIV>(input1, input2, output);
+ case ArithmeticOperation::POWER:
+ return configure_arithm_func<ArithmeticOperation::POWER>(input1, input2, output);
+ default:
+ ARM_COMPUTE_ERROR("NOT_SUPPORTED!");
+ }
+ return nullptr;
+}
+
/** The division operator */
-void NEDivisionOperationKernel::configure(const ITensorInfo *input1, const ITensorInfo *input2, ITensorInfo *output)
+void CpuDivisionKernel::configure(const ITensorInfo *input1, const ITensorInfo *input2, ITensorInfo *output)
{
ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(*input1, *input2, *output));
configure_common(input1, input2, output);
- _function = configure_arithm_func<ArithmeticOperation::DIV>(input1, input2, output);
+ _op = ArithmeticOperation::DIV;
}
-Status NEDivisionOperationKernel::validate_arguments(const ITensorInfo &input1, const ITensorInfo &input2, const ITensorInfo &output)
+Status CpuDivisionKernel::validate_arguments(const ITensorInfo &input1, const ITensorInfo &input2, const ITensorInfo &output)
{
ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(&input1, 1, DataType::S32, DataType::F16, DataType::F32);
- return NEArithmeticOperationKernel::validate_arguments(input1, input2, output);
+ return CpuArithmeticKernel::validate_arguments(input1, input2, output);
}
-Status NEDivisionOperationKernel::validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output)
+Status CpuDivisionKernel::validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output)
{
ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input1, input2, output);
ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(*input1, *input2, *output));
@@ -272,20 +282,20 @@
}
/** The power operator */
-void NEPowerOperationKernel::configure(const ITensorInfo *input1, const ITensorInfo *input2, ITensorInfo *output)
+void CpuPowerKernel::configure(const ITensorInfo *input1, const ITensorInfo *input2, ITensorInfo *output)
{
ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(*input1, *input2, *output));
configure_common(input1, input2, output);
- _function = configure_arithm_func<ArithmeticOperation::POWER>(input1, input2, output);
+ _op = ArithmeticOperation::POWER;
}
-Status NEPowerOperationKernel::validate_arguments(const ITensorInfo &input1, const ITensorInfo &input2, const ITensorInfo &output)
+Status CpuPowerKernel::validate_arguments(const ITensorInfo &input1, const ITensorInfo &input2, const ITensorInfo &output)
{
ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(&input1, 1, DataType::F16, DataType::F32);
- return NEArithmeticOperationKernel::validate_arguments(input1, input2, output);
+ return CpuArithmeticKernel::validate_arguments(input1, input2, output);
}
-Status NEPowerOperationKernel::validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output)
+Status CpuPowerKernel::validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output)
{
ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input1, input2, output);
ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(*input1, *input2, *output));
@@ -293,36 +303,14 @@
}
/** Comparison operators (equal, not equal, less than, greater than, less than or equal, greater than or equal) */
-void NEComparisonOperationKernel::configure(ComparisonOperation op, const ITensorInfo *input1, const ITensorInfo *input2, ITensorInfo *output)
+void CpuComparisonKernel::configure(ComparisonOperation op, const ITensorInfo *input1, const ITensorInfo *input2, ITensorInfo *output)
{
ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(*input1, *input2, *output));
configure_common(input1, input2, output);
- switch(op)
- {
- case ComparisonOperation::Equal:
- _function = configure_comp_func<ComparisonOperation::Equal>(input1, input2, output);
- break;
- case ComparisonOperation::NotEqual:
- _function = configure_comp_func<ComparisonOperation::NotEqual>(input1, input2, output);
- break;
- case ComparisonOperation::Greater:
- _function = configure_comp_func<ComparisonOperation::Greater>(input1, input2, output);
- break;
- case ComparisonOperation::GreaterEqual:
- _function = configure_comp_func<ComparisonOperation::GreaterEqual>(input1, input2, output);
- break;
- case ComparisonOperation::Less:
- _function = configure_comp_func<ComparisonOperation::Less>(input1, input2, output);
- break;
- case ComparisonOperation::LessEqual:
- _function = configure_comp_func<ComparisonOperation::LessEqual>(input1, input2, output);
- break;
- default:
- ARM_COMPUTE_ERROR("NOT_SUPPORTED!");
- }
+ _op = op;
}
-Status NEComparisonOperationKernel::validate_arguments(const ITensorInfo &input1, const ITensorInfo &input2, const ITensorInfo &output)
+Status CpuComparisonKernel::validate_arguments(const ITensorInfo &input1, const ITensorInfo &input2, const ITensorInfo &output)
{
ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(&input1, 1, DataType::U8, DataType::QASYMM8, DataType::QASYMM8_SIGNED, DataType::S16, DataType::F16, DataType::S32, DataType::F32);
// Validate in case of configured output
@@ -333,11 +321,36 @@
return validate_arguments_common(input1, input2, output);
}
-Status NEComparisonOperationKernel::validate(ComparisonOperation op, const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output)
+Status CpuComparisonKernel::validate(ComparisonOperation op, const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output)
{
ARM_COMPUTE_UNUSED(op);
ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input1, input2, output);
ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(*input1, *input2, *output));
return Status{};
}
+
+std::function<CpuElementwiseKernel::ElementwiseFunction>
+CpuComparisonKernel::get_implementation(const ITensorInfo *input1, const ITensorInfo *input2, ITensorInfo *output)
+{
+ switch(_op)
+ {
+ case ComparisonOperation::Equal:
+ return configure_comp_func<ComparisonOperation::Equal>(input1, input2, output);
+ case ComparisonOperation::NotEqual:
+ return configure_comp_func<ComparisonOperation::NotEqual>(input1, input2, output);
+ case ComparisonOperation::Greater:
+ return configure_comp_func<ComparisonOperation::Greater>(input1, input2, output);
+ case ComparisonOperation::GreaterEqual:
+ return configure_comp_func<ComparisonOperation::GreaterEqual>(input1, input2, output);
+ case ComparisonOperation::Less:
+ return configure_comp_func<ComparisonOperation::Less>(input1, input2, output);
+ case ComparisonOperation::LessEqual:
+ return configure_comp_func<ComparisonOperation::LessEqual>(input1, input2, output);
+ default:
+ ARM_COMPUTE_ERROR("NOT_SUPPORTED!");
+ }
+ return nullptr;
+}
+} // namespace kernels
+} // namespace cpu
} // namespace arm_compute
diff --git a/src/core/NEON/kernels/NEElementwiseOperationKernel.h b/src/core/cpu/kernels/CpuElementwiseKernel.h
similarity index 73%
rename from src/core/NEON/kernels/NEElementwiseOperationKernel.h
rename to src/core/cpu/kernels/CpuElementwiseKernel.h
index b0037d3..92cf880 100644
--- a/src/core/NEON/kernels/NEElementwiseOperationKernel.h
+++ b/src/core/cpu/kernels/CpuElementwiseKernel.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2020 Arm Limited.
+ * Copyright (c) 2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -21,41 +21,35 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#ifndef ARM_COMPUTE_NEELEMENTWISEOPERATIONKERNEL_H
-#define ARM_COMPUTE_NEELEMENTWISEOPERATIONKERNEL_H
+#ifndef ARM_COMPUTE_CPU_ELEMENTWISE_KERNEL_H
+#define ARM_COMPUTE_CPU_ELEMENTWISE_KERNEL_H
-#include "arm_compute/core/Types.h"
-#include "src/core/NEON/INEKernel.h"
+#include "src/core/common/Macros.h"
+#include "src/core/cpu/ICpuKernel.h"
namespace arm_compute
{
class ITensor;
-
+namespace cpu
+{
+namespace kernels
+{
/** Interface for an element-wise operation kernel
*
* Element-wise operation is computed by:
* @f[ output(x,y) = OP(input1(x,y), input2(x,y))@f]
*
*/
-class NEElementwiseOperationKernel : public INEKernel
+class CpuElementwiseKernel : public ICpuKernel
{
public:
const char *name() const override
{
- return "NEElementwiseOperationKernel";
+ return "CpuElementwiseKernel";
}
- /** Default constructor */
- NEElementwiseOperationKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEElementwiseOperationKernel(const NEElementwiseOperationKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEElementwiseOperationKernel &operator=(const NEElementwiseOperationKernel &) = delete;
- /** Allow instances of this class to be moved */
- NEElementwiseOperationKernel(NEElementwiseOperationKernel &&) = default;
- /** Allow instances of this class to be moved */
- NEElementwiseOperationKernel &operator=(NEElementwiseOperationKernel &&) = default;
- /** Default destructor */
- ~NEElementwiseOperationKernel() = default;
+
+ CpuElementwiseKernel() = default;
+ ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE(CpuElementwiseKernel);
/** Common signature for all the specialised arithmetic functions
*
@@ -64,7 +58,7 @@
* @param[out] output Output tensor info. Data types supported: Dependent on subclass.
* @param[in] window Region on which to execute the kernel.
*/
- using ElementwiseFunction = void(const ITensor *input1, const ITensor *input2, ITensor *output, const Window &window);
+ using ElementwiseFunction = void(const ITensor *, const ITensor *, ITensor *, const Window &);
// Inherited methods overridden:
void run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) override;
@@ -83,19 +77,22 @@
*/
void configure_common(const ITensorInfo *input1, const ITensorInfo *input2, ITensorInfo *output);
- /** Function to use for the particular tensor types passed to configure() */
- std::function<void(const ITensor *input1, const ITensor *input2, ITensor *output, const Window &window)> _function;
-
- const ITensor *_input1;
- const ITensor *_input2;
- ITensor *_output;
+ /** Function to get the micro kernel implementation
+ *
+ * @param[in] input1 First input tensor information
+ * @param[in] input2 Second input tensor information
+ * @param[in] output Output tensor information
+ *
+ * @return the function instance for the micro kernel
+ */
+ virtual std::function<ElementwiseFunction> get_implementation(const ITensorInfo *input1, const ITensorInfo *input2, ITensorInfo *output) = 0;
};
-class NEArithmeticOperationKernel : public NEElementwiseOperationKernel
+class CpuArithmeticKernel : public CpuElementwiseKernel
{
public:
/** Default constructor */
- NEArithmeticOperationKernel() = default;
+ CpuArithmeticKernel() = default;
/** Configure kernel
*
@@ -106,7 +103,7 @@
*/
void configure(ArithmeticOperation op, const ITensorInfo *input1, const ITensorInfo *input2, ITensorInfo *output);
- /** Static function to check if given info will lead to a valid configuration of @ref NEArithmeticOperationKernel
+ /** Static function to check if given info will lead to a valid configuration of @ref cpu::kernels::CpuArithmeticKernel
*
* @param[in] op Arithmetic operation to be executed.
* @param[in] input1 First tensor input info. Data types supported: QASYMM8/S16/F16/S32/F32.
@@ -120,13 +117,26 @@
protected:
// Inherited methods overridden:
static Status validate_arguments(const ITensorInfo &input1, const ITensorInfo &input2, const ITensorInfo &output);
+
+ ArithmeticOperation _op{};
+
+private:
+ /** Function to get the micro kernel implementation
+ *
+ * @param[in] input1 First input tensor information
+ * @param[in] input2 Second input tensor information
+ * @param[in] output Output tensor information
+ *
+ * @return the function instance for the micro kernel
+ */
+ std::function<ElementwiseFunction> get_implementation(const ITensorInfo *input1, const ITensorInfo *input2, ITensorInfo *output) override;
};
-class NEDivisionOperationKernel : public NEArithmeticOperationKernel
+class CpuDivisionKernel : public CpuArithmeticKernel
{
public:
/** Default constructor */
- NEDivisionOperationKernel() = default;
+ CpuDivisionKernel() = default;
/** Configure kernel
*
@@ -136,7 +146,7 @@
*/
void configure(const ITensorInfo *input1, const ITensorInfo *input2, ITensorInfo *output);
- /** Static function to check if given info will lead to a valid configuration of @ref NEDivisionOperationKernel
+ /** Static function to check if given info will lead to a valid configuration of @ref CpuDivisionKernel
*
* @param[in] input1 First tensor input info. Data types supported: S32/F16/F32.
* @param[in] input2 Second tensor input info. Data types supported: Same as @p input1.
@@ -151,11 +161,11 @@
static Status validate_arguments(const ITensorInfo &input1, const ITensorInfo &input2, const ITensorInfo &output);
};
-class NEPowerOperationKernel : public NEArithmeticOperationKernel
+class CpuPowerKernel : public CpuArithmeticKernel
{
public:
/** Default constructor */
- NEPowerOperationKernel() = default;
+ CpuPowerKernel() = default;
/** Configure kernel
*
@@ -165,7 +175,7 @@
*/
void configure(const ITensorInfo *input1, const ITensorInfo *input2, ITensorInfo *output);
- /** Static function to check if given info will lead to a valid configuration of @ref NEPowerOperationKernel
+ /** Static function to check if given info will lead to a valid configuration of @ref CpuPowerKernel
*
* @param[in] input1 First tensor input info. Data types supported: F16/F32.
* @param[in] input2 Second tensor input info. Data types supported: Same as @p input1.
@@ -180,11 +190,11 @@
static Status validate_arguments(const ITensorInfo &input1, const ITensorInfo &input2, const ITensorInfo &output);
};
-class NEComparisonOperationKernel : public NEElementwiseOperationKernel
+class CpuComparisonKernel : public CpuElementwiseKernel
{
public:
/** Default constructor */
- NEComparisonOperationKernel() = default;
+ CpuComparisonKernel() = default;
/** Configure kernel
*
@@ -195,7 +205,7 @@
*/
void configure(ComparisonOperation op, const ITensorInfo *input1, const ITensorInfo *input2, ITensorInfo *output);
- /** Static function to check if given info will lead to a valid configuration of @ref NEComparisonOperationKernel
+ /** Static function to check if given info will lead to a valid configuration of @ref cpu::kernels::CpuComparisonKernel
*
* @param[in] op Comparison operation to be executed.
* @param[in] input1 First tensor input info. Data types supported: QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32.
@@ -209,6 +219,21 @@
protected:
// Inherited methods overridden:
static Status validate_arguments(const ITensorInfo &input1, const ITensorInfo &input2, const ITensorInfo &output);
+
+private:
+ /** Function to get the micro kernel implementation
+ *
+ * @param[in] input1 First input tensor information
+ * @param[in] input2 Second input tensor information
+ * @param[in] output Output tensor information
+ *
+ * @return the function instance for the micro kernel
+ */
+ std::function<ElementwiseFunction> get_implementation(const ITensorInfo *input1, const ITensorInfo *input2, ITensorInfo *output) override;
+
+ ComparisonOperation _op{};
};
+} // namespace kernels
+} // namespace cpu
} // namespace arm_compute
-#endif /* ARM_COMPUTE_NEELEMENTWISEOPERATIONKERNEL_H */
+#endif /* ARM_COMPUTE_CPU_ELEMENTWISE_KERNEL_H */
\ No newline at end of file
diff --git a/src/core/NEON/kernels/elementwise/impl/elementwise_list.h b/src/core/cpu/kernels/elementwise/neon/elementwise_list.h
similarity index 100%
rename from src/core/NEON/kernels/elementwise/impl/elementwise_list.h
rename to src/core/cpu/kernels/elementwise/neon/elementwise_list.h
diff --git a/src/core/NEON/kernels/elementwise/impl/elementwise_quantized_list.h b/src/core/cpu/kernels/elementwise/neon/elementwise_quantized_list.h
similarity index 99%
rename from src/core/NEON/kernels/elementwise/impl/elementwise_quantized_list.h
rename to src/core/cpu/kernels/elementwise/neon/elementwise_quantized_list.h
index fd1fb00..1ff4632 100644
--- a/src/core/NEON/kernels/elementwise/impl/elementwise_quantized_list.h
+++ b/src/core/cpu/kernels/elementwise/neon/elementwise_quantized_list.h
@@ -24,7 +24,7 @@
#ifndef SRC_CORE_NEON_KERNELS_ELEMENTWISE_QUANTIZED_LIST_H
#define SRC_CORE_NEON_KERNELS_ELEMENTWISE_QUANTIZED_LIST_H
-#include "src/core/NEON/kernels/elementwise/impl/elementwise_list.h"
+#include "src/core/cpu/kernels/elementwise/neon/elementwise_list.h"
namespace arm_compute
{
diff --git a/src/core/SVE/kernels/elementwise/impl/elementwise_list.h b/src/core/cpu/kernels/elementwise/sve/elementwise_list.h
similarity index 100%
rename from src/core/SVE/kernels/elementwise/impl/elementwise_list.h
rename to src/core/cpu/kernels/elementwise/sve/elementwise_list.h
diff --git a/src/core/SVE/kernels/elementwise/impl/elementwise_quantized_list.h b/src/core/cpu/kernels/elementwise/sve/elementwise_quantized_list.h
similarity index 99%
rename from src/core/SVE/kernels/elementwise/impl/elementwise_quantized_list.h
rename to src/core/cpu/kernels/elementwise/sve/elementwise_quantized_list.h
index e85b089..b6342c7 100644
--- a/src/core/SVE/kernels/elementwise/impl/elementwise_quantized_list.h
+++ b/src/core/cpu/kernels/elementwise/sve/elementwise_quantized_list.h
@@ -26,7 +26,7 @@
#if defined(__ARM_FEATURE_SVE2)
-#include "src/core/SVE/kernels/elementwise/impl/elementwise_list.h"
+#include "src/core/cpu/kernels/elementwise/sve/elementwise_list.h"
namespace arm_compute
{
diff --git a/src/runtime/NEON/functions/NEElementwiseOperations.cpp b/src/runtime/NEON/functions/NEElementwiseOperations.cpp
new file mode 100644
index 0000000..946bbb2
--- /dev/null
+++ b/src/runtime/NEON/functions/NEElementwiseOperations.cpp
@@ -0,0 +1,332 @@
+/*
+ * Copyright (c) 2018-2021 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/runtime/NEON/functions/NEElementwiseOperations.h"
+#include "arm_compute/core/Validate.h"
+#include "src/runtime/cpu/operators/CpuElementwise.h"
+
+#include "arm_compute/core/ITensor.h"
+
+#include <utility>
+
+namespace arm_compute
+{
+struct NEElementwiseMax::Impl
+{
+ const ITensor *src_0{ nullptr };
+ const ITensor *src_1{ nullptr };
+ ITensor *dst{ nullptr };
+ std::unique_ptr<cpu::CpuElementwiseMax> op{ nullptr };
+};
+
+NEElementwiseMax::NEElementwiseMax()
+ : _impl(std::make_unique<Impl>())
+{
+}
+NEElementwiseMax::NEElementwiseMax(NEElementwiseMax &&) = default;
+NEElementwiseMax &NEElementwiseMax::operator=(NEElementwiseMax &&) = default;
+NEElementwiseMax::~NEElementwiseMax() = default;
+
+void NEElementwiseMax::configure(ITensor *input1, ITensor *input2, ITensor *output, const ActivationLayerInfo &act_info)
+{
+ ARM_COMPUTE_UNUSED(act_info);
+ _impl->src_0 = input1;
+ _impl->src_1 = input2;
+ _impl->dst = output;
+ _impl->op = std::make_unique<cpu::CpuElementwiseMax>();
+ _impl->op->configure(input1->info(), input2->info(), output->info());
+}
+
+Status NEElementwiseMax::validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, const ActivationLayerInfo &act_info)
+{
+ ARM_COMPUTE_RETURN_ERROR_ON(act_info.enabled());
+ return cpu::CpuElementwiseMax::validate(input1, input2, output);
+}
+
+void NEElementwiseMax::run()
+{
+ ITensorPack pack;
+ pack.add_tensor(TensorType::ACL_SRC_0, _impl->src_0);
+ pack.add_tensor(TensorType::ACL_SRC_1, _impl->src_1);
+ pack.add_tensor(TensorType::ACL_DST, _impl->dst);
+ _impl->op->run(pack);
+}
+
+struct NEElementwiseMin::Impl
+{
+ const ITensor *src_0{ nullptr };
+ const ITensor *src_1{ nullptr };
+ ITensor *dst{ nullptr };
+ std::unique_ptr<cpu::CpuElementwiseMin> op{ nullptr };
+};
+
+NEElementwiseMin::NEElementwiseMin()
+ : _impl(std::make_unique<Impl>())
+{
+}
+NEElementwiseMin::NEElementwiseMin(NEElementwiseMin &&) = default;
+NEElementwiseMin &NEElementwiseMin::operator=(NEElementwiseMin &&) = default;
+NEElementwiseMin::~NEElementwiseMin() = default;
+
+void NEElementwiseMin::configure(ITensor *input1, ITensor *input2, ITensor *output, const ActivationLayerInfo &act_info)
+{
+ ARM_COMPUTE_UNUSED(act_info);
+ _impl->src_0 = input1;
+ _impl->src_1 = input2;
+ _impl->dst = output;
+ _impl->op = std::make_unique<cpu::CpuElementwiseMin>();
+ _impl->op->configure(input1->info(), input2->info(), output->info());
+}
+
+Status NEElementwiseMin::validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, const ActivationLayerInfo &act_info)
+{
+ ARM_COMPUTE_RETURN_ERROR_ON(act_info.enabled());
+ return cpu::CpuElementwiseMin::validate(input1, input2, output);
+}
+
+void NEElementwiseMin::run()
+{
+ ITensorPack pack;
+ pack.add_tensor(TensorType::ACL_SRC_0, _impl->src_0);
+ pack.add_tensor(TensorType::ACL_SRC_1, _impl->src_1);
+ pack.add_tensor(TensorType::ACL_DST, _impl->dst);
+ _impl->op->run(pack);
+}
+
+struct NEElementwiseSquaredDiff::Impl
+{
+ const ITensor *src_0{ nullptr };
+ const ITensor *src_1{ nullptr };
+ ITensor *dst{ nullptr };
+ std::unique_ptr<cpu::CpuElementwiseSquaredDiff> op{ nullptr };
+};
+
+NEElementwiseSquaredDiff::NEElementwiseSquaredDiff()
+ : _impl(std::make_unique<Impl>())
+{
+}
+NEElementwiseSquaredDiff::NEElementwiseSquaredDiff(NEElementwiseSquaredDiff &&) = default;
+NEElementwiseSquaredDiff &NEElementwiseSquaredDiff::operator=(NEElementwiseSquaredDiff &&) = default;
+NEElementwiseSquaredDiff::~NEElementwiseSquaredDiff() = default;
+
+void NEElementwiseSquaredDiff::configure(ITensor *input1, ITensor *input2, ITensor *output, const ActivationLayerInfo &act_info)
+{
+ ARM_COMPUTE_UNUSED(act_info);
+ _impl->src_0 = input1;
+ _impl->src_1 = input2;
+ _impl->dst = output;
+ _impl->op = std::make_unique<cpu::CpuElementwiseSquaredDiff>();
+ _impl->op->configure(input1->info(), input2->info(), output->info());
+}
+
+Status NEElementwiseSquaredDiff::validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, const ActivationLayerInfo &act_info)
+{
+ ARM_COMPUTE_RETURN_ERROR_ON(act_info.enabled());
+ return cpu::CpuElementwiseSquaredDiff::validate(input1, input2, output);
+}
+
+void NEElementwiseSquaredDiff::run()
+{
+ ITensorPack pack;
+ pack.add_tensor(TensorType::ACL_SRC_0, _impl->src_0);
+ pack.add_tensor(TensorType::ACL_SRC_1, _impl->src_1);
+ pack.add_tensor(TensorType::ACL_DST, _impl->dst);
+ _impl->op->run(pack);
+}
+
+struct NEElementwiseDivision::Impl
+{
+ const ITensor *src_0{ nullptr };
+ const ITensor *src_1{ nullptr };
+ ITensor *dst{ nullptr };
+ std::unique_ptr<cpu::CpuElementwiseDivision> op{ nullptr };
+};
+
+NEElementwiseDivision::NEElementwiseDivision()
+ : _impl(std::make_unique<Impl>())
+{
+}
+NEElementwiseDivision::NEElementwiseDivision(NEElementwiseDivision &&) = default;
+NEElementwiseDivision &NEElementwiseDivision::operator=(NEElementwiseDivision &&) = default;
+NEElementwiseDivision::~NEElementwiseDivision() = default;
+
+void NEElementwiseDivision::configure(ITensor *input1, ITensor *input2, ITensor *output, const ActivationLayerInfo &act_info)
+{
+ ARM_COMPUTE_UNUSED(act_info);
+ _impl->src_0 = input1;
+ _impl->src_1 = input2;
+ _impl->dst = output;
+ _impl->op = std::make_unique<cpu::CpuElementwiseDivision>();
+ _impl->op->configure(input1->info(), input2->info(), output->info());
+}
+
+Status NEElementwiseDivision::validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, const ActivationLayerInfo &act_info)
+{
+ ARM_COMPUTE_RETURN_ERROR_ON(act_info.enabled());
+ return cpu::CpuElementwiseDivision::validate(input1, input2, output);
+}
+
+void NEElementwiseDivision::run()
+{
+ ITensorPack pack;
+ pack.add_tensor(TensorType::ACL_SRC_0, _impl->src_0);
+ pack.add_tensor(TensorType::ACL_SRC_1, _impl->src_1);
+ pack.add_tensor(TensorType::ACL_DST, _impl->dst);
+ _impl->op->run(pack);
+}
+
+struct NEElementwisePower::Impl
+{
+ const ITensor *src_0{ nullptr };
+ const ITensor *src_1{ nullptr };
+ ITensor *dst{ nullptr };
+ std::unique_ptr<cpu::CpuElementwisePower> op{ nullptr };
+};
+
+NEElementwisePower::NEElementwisePower()
+ : _impl(std::make_unique<Impl>())
+{
+}
+NEElementwisePower::NEElementwisePower(NEElementwisePower &&) = default;
+NEElementwisePower &NEElementwisePower::operator=(NEElementwisePower &&) = default;
+NEElementwisePower::~NEElementwisePower() = default;
+
+void NEElementwisePower::configure(ITensor *input1, ITensor *input2, ITensor *output, const ActivationLayerInfo &act_info)
+{
+ ARM_COMPUTE_UNUSED(act_info);
+ _impl->src_0 = input1;
+ _impl->src_1 = input2;
+ _impl->dst = output;
+ _impl->op = std::make_unique<cpu::CpuElementwisePower>();
+ _impl->op->configure(input1->info(), input2->info(), output->info());
+}
+
+Status NEElementwisePower::validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, const ActivationLayerInfo &act_info)
+{
+ ARM_COMPUTE_RETURN_ERROR_ON(act_info.enabled());
+ return cpu::CpuElementwisePower::validate(input1, input2, output);
+}
+
+void NEElementwisePower::run()
+{
+ ITensorPack pack;
+ pack.add_tensor(TensorType::ACL_SRC_0, _impl->src_0);
+ pack.add_tensor(TensorType::ACL_SRC_1, _impl->src_1);
+ pack.add_tensor(TensorType::ACL_DST, _impl->dst);
+ _impl->op->run(pack);
+}
+
+template <ComparisonOperation COP>
+struct NEElementwiseComparisonStatic<COP>::Impl
+{
+ const ITensor *src_0{ nullptr };
+ const ITensor *src_1{ nullptr };
+ ITensor *dst{ nullptr };
+ std::unique_ptr<cpu::CpuElementwiseComparisonStatic<COP>> op{ nullptr };
+};
+
+template <ComparisonOperation COP>
+NEElementwiseComparisonStatic<COP>::NEElementwiseComparisonStatic()
+ : _impl(std::make_unique<Impl>())
+{
+}
+template <ComparisonOperation COP>
+NEElementwiseComparisonStatic<COP>::NEElementwiseComparisonStatic(NEElementwiseComparisonStatic &&) = default;
+template <ComparisonOperation COP>
+NEElementwiseComparisonStatic<COP> &NEElementwiseComparisonStatic<COP>::operator=(NEElementwiseComparisonStatic &&) = default;
+template <ComparisonOperation COP>
+NEElementwiseComparisonStatic<COP>::~NEElementwiseComparisonStatic() = default;
+
+template <ComparisonOperation COP>
+void NEElementwiseComparisonStatic<COP>::configure(ITensor *input1, ITensor *input2, ITensor *output)
+{
+ _impl->src_0 = input1;
+ _impl->src_1 = input2;
+ _impl->dst = output;
+ _impl->op = std::make_unique<cpu::CpuElementwiseComparisonStatic<COP>>();
+ _impl->op->configure(input1->info(), input2->info(), output->info());
+}
+
+template <ComparisonOperation COP>
+Status NEElementwiseComparisonStatic<COP>::validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output)
+{
+ return cpu::CpuElementwiseComparisonStatic<COP>::validate(input1, input2, output);
+}
+
+template <ComparisonOperation COP>
+void NEElementwiseComparisonStatic<COP>::run()
+{
+ ITensorPack pack;
+ pack.add_tensor(TensorType::ACL_SRC_0, _impl->src_0);
+ pack.add_tensor(TensorType::ACL_SRC_1, _impl->src_1);
+ pack.add_tensor(TensorType::ACL_DST, _impl->dst);
+ _impl->op->run(pack);
+}
+
+struct NEElementwiseComparison::Impl
+{
+ const ITensor *src_0{ nullptr };
+ const ITensor *src_1{ nullptr };
+ ITensor *dst{ nullptr };
+ std::unique_ptr<cpu::CpuElementwiseComparison> op{ nullptr };
+};
+
+NEElementwiseComparison::NEElementwiseComparison()
+ : _impl(std::make_unique<Impl>())
+{
+}
+NEElementwiseComparison::NEElementwiseComparison(NEElementwiseComparison &&) = default;
+NEElementwiseComparison &NEElementwiseComparison::operator=(NEElementwiseComparison &&) = default;
+NEElementwiseComparison::~NEElementwiseComparison() = default;
+
+void NEElementwiseComparison::configure(ITensor *input1, ITensor *input2, ITensor *output, ComparisonOperation op)
+{
+ _impl->src_0 = input1;
+ _impl->src_1 = input2;
+ _impl->dst = output;
+ _impl->op = std::make_unique<cpu::CpuElementwiseComparison>();
+ _impl->op->configure(input1->info(), input2->info(), output->info(), op);
+}
+
+Status NEElementwiseComparison::validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, ComparisonOperation op)
+{
+ return cpu::CpuElementwiseComparison::validate(input1, input2, output, op);
+}
+
+void NEElementwiseComparison::run()
+{
+ ITensorPack pack;
+ pack.add_tensor(TensorType::ACL_SRC_0, _impl->src_0);
+ pack.add_tensor(TensorType::ACL_SRC_1, _impl->src_1);
+ pack.add_tensor(TensorType::ACL_DST, _impl->dst);
+ _impl->op->run(pack);
+}
+
+// Supported Specializations
+template class NEElementwiseComparisonStatic<ComparisonOperation::Equal>;
+template class NEElementwiseComparisonStatic<ComparisonOperation::NotEqual>;
+template class NEElementwiseComparisonStatic<ComparisonOperation::Greater>;
+template class NEElementwiseComparisonStatic<ComparisonOperation::GreaterEqual>;
+template class NEElementwiseComparisonStatic<ComparisonOperation::Less>;
+template class NEElementwiseComparisonStatic<ComparisonOperation::LessEqual>;
+} // namespace arm_compute
diff --git a/src/runtime/NEON/functions/NEElementwiseOperators.cpp b/src/runtime/NEON/functions/NEElementwiseOperators.cpp
deleted file mode 100644
index badcf2e..0000000
--- a/src/runtime/NEON/functions/NEElementwiseOperators.cpp
+++ /dev/null
@@ -1,429 +0,0 @@
-/*
- * Copyright (c) 2018-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "arm_compute/core/Validate.h"
-#include "arm_compute/runtime/NEON/functions/NEElementwiseOperations.h"
-#include <src/core/NEON/kernels/NEElementwiseOperationKernel.h>
-
-#include "arm_compute/core/ITensor.h"
-
-#include <utility>
-
-namespace arm_compute
-{
-namespace experimental
-{
-void NEElementwiseMax::configure(const ITensorInfo *input1, const ITensorInfo *input2, ITensorInfo *output)
-{
- auto k = std::make_unique<NEArithmeticOperationKernel>();
- k->configure(ArithmeticOperation::MAX, input1, input2, output);
- _kernel = std::move(k);
-}
-
-Status NEElementwiseMax::validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output)
-{
- return NEArithmeticOperationKernel::validate(ArithmeticOperation::MAX, input1, input2, output);
-}
-
-void NEElementwiseMin::configure(const ITensorInfo *input1, const ITensorInfo *input2, ITensorInfo *output)
-{
- auto k = std::make_unique<NEArithmeticOperationKernel>();
- k->configure(ArithmeticOperation::MIN, input1, input2, output);
- _kernel = std::move(k);
-}
-
-Status NEElementwiseMin::validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output)
-{
- return NEArithmeticOperationKernel::validate(ArithmeticOperation::MIN, input1, input2, output);
-}
-
-void NEElementwiseSquaredDiff::configure(const ITensorInfo *input1, const ITensorInfo *input2, ITensorInfo *output)
-{
- auto k = std::make_unique<NEArithmeticOperationKernel>();
- k->configure(ArithmeticOperation::SQUARED_DIFF, input1, input2, output);
- _kernel = std::move(k);
-}
-
-Status NEElementwiseSquaredDiff::validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output)
-{
- return NEArithmeticOperationKernel::validate(ArithmeticOperation::SQUARED_DIFF, input1, input2, output);
-}
-
-void NEElementwiseDivision::configure(const ITensorInfo *input1, const ITensorInfo *input2, ITensorInfo *output)
-{
- auto k = std::make_unique<NEDivisionOperationKernel>();
- k->configure(input1, input2, output);
- _kernel = std::move(k);
-}
-
-Status NEElementwiseDivision::validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output)
-{
- return NEDivisionOperationKernel::validate(input1, input2, output);
-}
-
-void NEElementwisePower::configure(const ITensorInfo *input1, const ITensorInfo *input2, ITensorInfo *output)
-{
- auto k = std::make_unique<NEPowerOperationKernel>();
- k->configure(input1, input2, output);
- _kernel = std::move(k);
-}
-
-Status NEElementwisePower::validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output)
-{
- return NEPowerOperationKernel::validate(input1, input2, output);
-}
-
-template <ComparisonOperation COP>
-void NEElementwiseComparisonStatic<COP>::configure(const ITensorInfo *input1, const ITensorInfo *input2, ITensorInfo *output)
-{
- auto k = std::make_unique<NEComparisonOperationKernel>();
- k->configure(COP, input1, input2, output);
- _kernel = std::move(k);
-}
-
-template <ComparisonOperation COP>
-Status NEElementwiseComparisonStatic<COP>::validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output)
-{
- return NEComparisonOperationKernel::validate(COP, input1, input2, output);
-}
-
-void NEElementwiseComparison::configure(const ITensorInfo *input1, const ITensorInfo *input2, ITensorInfo *output, ComparisonOperation op)
-{
- auto k = std::make_unique<NEComparisonOperationKernel>();
- k->configure(op, input1, input2, output);
- _kernel = std::move(k);
-}
-
-Status NEElementwiseComparison::validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, ComparisonOperation op)
-{
- return NEComparisonOperationKernel::validate(op, input1, input2, output);
-}
-
-// Supported Specializations
-template class NEElementwiseComparisonStatic<ComparisonOperation::Equal>;
-template class NEElementwiseComparisonStatic<ComparisonOperation::NotEqual>;
-template class NEElementwiseComparisonStatic<ComparisonOperation::Greater>;
-template class NEElementwiseComparisonStatic<ComparisonOperation::GreaterEqual>;
-template class NEElementwiseComparisonStatic<ComparisonOperation::Less>;
-template class NEElementwiseComparisonStatic<ComparisonOperation::LessEqual>;
-} // namespace experimental
-
-struct NEElementwiseMax::Impl
-{
- const ITensor *src_0{ nullptr };
- const ITensor *src_1{ nullptr };
- ITensor *dst{ nullptr };
- std::unique_ptr<experimental::NEElementwiseMax> op{ nullptr };
-};
-
-NEElementwiseMax::NEElementwiseMax()
- : _impl(std::make_unique<Impl>())
-{
-}
-NEElementwiseMax::NEElementwiseMax(NEElementwiseMax &&) = default;
-NEElementwiseMax &NEElementwiseMax::operator=(NEElementwiseMax &&) = default;
-NEElementwiseMax::~NEElementwiseMax() = default;
-
-void NEElementwiseMax::configure(ITensor *input1, ITensor *input2, ITensor *output, const ActivationLayerInfo &act_info)
-{
- ARM_COMPUTE_UNUSED(act_info);
- _impl->src_0 = input1;
- _impl->src_1 = input2;
- _impl->dst = output;
- _impl->op = std::make_unique<experimental::NEElementwiseMax>();
- _impl->op->configure(input1->info(), input2->info(), output->info());
-}
-
-Status NEElementwiseMax::validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, const ActivationLayerInfo &act_info)
-{
- ARM_COMPUTE_RETURN_ERROR_ON(act_info.enabled());
- return experimental::NEElementwiseMax::validate(input1, input2, output);
-}
-
-void NEElementwiseMax::run()
-{
- ITensorPack pack;
- pack.add_tensor(TensorType::ACL_SRC_0, _impl->src_0);
- pack.add_tensor(TensorType::ACL_SRC_1, _impl->src_1);
- pack.add_tensor(TensorType::ACL_DST, _impl->dst);
- _impl->op->run(pack);
-}
-
-struct NEElementwiseMin::Impl
-{
- const ITensor *src_0{ nullptr };
- const ITensor *src_1{ nullptr };
- ITensor *dst{ nullptr };
- std::unique_ptr<experimental::NEElementwiseMin> op{ nullptr };
-};
-
-NEElementwiseMin::NEElementwiseMin()
- : _impl(std::make_unique<Impl>())
-{
-}
-NEElementwiseMin::NEElementwiseMin(NEElementwiseMin &&) = default;
-NEElementwiseMin &NEElementwiseMin::operator=(NEElementwiseMin &&) = default;
-NEElementwiseMin::~NEElementwiseMin() = default;
-
-void NEElementwiseMin::configure(ITensor *input1, ITensor *input2, ITensor *output, const ActivationLayerInfo &act_info)
-{
- ARM_COMPUTE_UNUSED(act_info);
- _impl->src_0 = input1;
- _impl->src_1 = input2;
- _impl->dst = output;
- _impl->op = std::make_unique<experimental::NEElementwiseMin>();
- _impl->op->configure(input1->info(), input2->info(), output->info());
-}
-
-Status NEElementwiseMin::validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, const ActivationLayerInfo &act_info)
-{
- ARM_COMPUTE_RETURN_ERROR_ON(act_info.enabled());
- return experimental::NEElementwiseMin::validate(input1, input2, output);
-}
-
-void NEElementwiseMin::run()
-{
- ITensorPack pack;
- pack.add_tensor(TensorType::ACL_SRC_0, _impl->src_0);
- pack.add_tensor(TensorType::ACL_SRC_1, _impl->src_1);
- pack.add_tensor(TensorType::ACL_DST, _impl->dst);
- _impl->op->run(pack);
-}
-
-struct NEElementwiseSquaredDiff::Impl
-{
- const ITensor *src_0{ nullptr };
- const ITensor *src_1{ nullptr };
- ITensor *dst{ nullptr };
- std::unique_ptr<experimental::NEElementwiseSquaredDiff> op{ nullptr };
-};
-
-NEElementwiseSquaredDiff::NEElementwiseSquaredDiff()
- : _impl(std::make_unique<Impl>())
-{
-}
-NEElementwiseSquaredDiff::NEElementwiseSquaredDiff(NEElementwiseSquaredDiff &&) = default;
-NEElementwiseSquaredDiff &NEElementwiseSquaredDiff::operator=(NEElementwiseSquaredDiff &&) = default;
-NEElementwiseSquaredDiff::~NEElementwiseSquaredDiff() = default;
-
-void NEElementwiseSquaredDiff::configure(ITensor *input1, ITensor *input2, ITensor *output, const ActivationLayerInfo &act_info)
-{
- ARM_COMPUTE_UNUSED(act_info);
- _impl->src_0 = input1;
- _impl->src_1 = input2;
- _impl->dst = output;
- _impl->op = std::make_unique<experimental::NEElementwiseSquaredDiff>();
- _impl->op->configure(input1->info(), input2->info(), output->info());
-}
-
-Status NEElementwiseSquaredDiff::validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, const ActivationLayerInfo &act_info)
-{
- ARM_COMPUTE_RETURN_ERROR_ON(act_info.enabled());
- return experimental::NEElementwiseSquaredDiff::validate(input1, input2, output);
-}
-
-void NEElementwiseSquaredDiff::run()
-{
- ITensorPack pack;
- pack.add_tensor(TensorType::ACL_SRC_0, _impl->src_0);
- pack.add_tensor(TensorType::ACL_SRC_1, _impl->src_1);
- pack.add_tensor(TensorType::ACL_DST, _impl->dst);
- _impl->op->run(pack);
-}
-
-struct NEElementwiseDivision::Impl
-{
- const ITensor *src_0{ nullptr };
- const ITensor *src_1{ nullptr };
- ITensor *dst{ nullptr };
- std::unique_ptr<experimental::NEElementwiseDivision> op{ nullptr };
-};
-
-NEElementwiseDivision::NEElementwiseDivision()
- : _impl(std::make_unique<Impl>())
-{
-}
-NEElementwiseDivision::NEElementwiseDivision(NEElementwiseDivision &&) = default;
-NEElementwiseDivision &NEElementwiseDivision::operator=(NEElementwiseDivision &&) = default;
-NEElementwiseDivision::~NEElementwiseDivision() = default;
-
-void NEElementwiseDivision::configure(ITensor *input1, ITensor *input2, ITensor *output, const ActivationLayerInfo &act_info)
-{
- ARM_COMPUTE_UNUSED(act_info);
- _impl->src_0 = input1;
- _impl->src_1 = input2;
- _impl->dst = output;
- _impl->op = std::make_unique<experimental::NEElementwiseDivision>();
- _impl->op->configure(input1->info(), input2->info(), output->info());
-}
-
-Status NEElementwiseDivision::validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, const ActivationLayerInfo &act_info)
-{
- ARM_COMPUTE_RETURN_ERROR_ON(act_info.enabled());
- return experimental::NEElementwiseDivision::validate(input1, input2, output);
-}
-
-void NEElementwiseDivision::run()
-{
- ITensorPack pack;
- pack.add_tensor(TensorType::ACL_SRC_0, _impl->src_0);
- pack.add_tensor(TensorType::ACL_SRC_1, _impl->src_1);
- pack.add_tensor(TensorType::ACL_DST, _impl->dst);
- _impl->op->run(pack);
-}
-
-struct NEElementwisePower::Impl
-{
- const ITensor *src_0{ nullptr };
- const ITensor *src_1{ nullptr };
- ITensor *dst{ nullptr };
- std::unique_ptr<experimental::NEElementwisePower> op{ nullptr };
-};
-
-NEElementwisePower::NEElementwisePower()
- : _impl(std::make_unique<Impl>())
-{
-}
-NEElementwisePower::NEElementwisePower(NEElementwisePower &&) = default;
-NEElementwisePower &NEElementwisePower::operator=(NEElementwisePower &&) = default;
-NEElementwisePower::~NEElementwisePower() = default;
-
-void NEElementwisePower::configure(ITensor *input1, ITensor *input2, ITensor *output, const ActivationLayerInfo &act_info)
-{
- ARM_COMPUTE_UNUSED(act_info);
- _impl->src_0 = input1;
- _impl->src_1 = input2;
- _impl->dst = output;
- _impl->op = std::make_unique<experimental::NEElementwisePower>();
- _impl->op->configure(input1->info(), input2->info(), output->info());
-}
-
-Status NEElementwisePower::validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, const ActivationLayerInfo &act_info)
-{
- ARM_COMPUTE_RETURN_ERROR_ON(act_info.enabled());
- return experimental::NEElementwisePower::validate(input1, input2, output);
-}
-
-void NEElementwisePower::run()
-{
- ITensorPack pack;
- pack.add_tensor(TensorType::ACL_SRC_0, _impl->src_0);
- pack.add_tensor(TensorType::ACL_SRC_1, _impl->src_1);
- pack.add_tensor(TensorType::ACL_DST, _impl->dst);
- _impl->op->run(pack);
-}
-
-template <ComparisonOperation COP>
-struct NEElementwiseComparisonStatic<COP>::Impl
-{
- const ITensor *src_0{ nullptr };
- const ITensor *src_1{ nullptr };
- ITensor *dst{ nullptr };
- std::unique_ptr<experimental::NEElementwiseComparisonStatic<COP>> op{ nullptr };
-};
-
-template <ComparisonOperation COP>
-NEElementwiseComparisonStatic<COP>::NEElementwiseComparisonStatic()
- : _impl(std::make_unique<Impl>())
-{
-}
-template <ComparisonOperation COP>
-NEElementwiseComparisonStatic<COP>::NEElementwiseComparisonStatic(NEElementwiseComparisonStatic &&) = default;
-template <ComparisonOperation COP>
-NEElementwiseComparisonStatic<COP> &NEElementwiseComparisonStatic<COP>::operator=(NEElementwiseComparisonStatic &&) = default;
-template <ComparisonOperation COP>
-NEElementwiseComparisonStatic<COP>::~NEElementwiseComparisonStatic() = default;
-
-template <ComparisonOperation COP>
-void NEElementwiseComparisonStatic<COP>::configure(ITensor *input1, ITensor *input2, ITensor *output)
-{
- _impl->src_0 = input1;
- _impl->src_1 = input2;
- _impl->dst = output;
- _impl->op = std::make_unique<experimental::NEElementwiseComparisonStatic<COP>>();
- _impl->op->configure(input1->info(), input2->info(), output->info());
-}
-
-template <ComparisonOperation COP>
-Status NEElementwiseComparisonStatic<COP>::validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output)
-{
- return experimental::NEElementwiseComparisonStatic<COP>::validate(input1, input2, output);
-}
-
-template <ComparisonOperation COP>
-void NEElementwiseComparisonStatic<COP>::run()
-{
- ITensorPack pack;
- pack.add_tensor(TensorType::ACL_SRC_0, _impl->src_0);
- pack.add_tensor(TensorType::ACL_SRC_1, _impl->src_1);
- pack.add_tensor(TensorType::ACL_DST, _impl->dst);
- _impl->op->run(pack);
-}
-
-struct NEElementwiseComparison::Impl
-{
- const ITensor *src_0{ nullptr };
- const ITensor *src_1{ nullptr };
- ITensor *dst{ nullptr };
- std::unique_ptr<experimental::NEElementwiseComparison> op{ nullptr };
-};
-
-NEElementwiseComparison::NEElementwiseComparison()
- : _impl(std::make_unique<Impl>())
-{
-}
-NEElementwiseComparison::NEElementwiseComparison(NEElementwiseComparison &&) = default;
-NEElementwiseComparison &NEElementwiseComparison::operator=(NEElementwiseComparison &&) = default;
-NEElementwiseComparison::~NEElementwiseComparison() = default;
-
-void NEElementwiseComparison::configure(ITensor *input1, ITensor *input2, ITensor *output, ComparisonOperation op)
-{
- _impl->src_0 = input1;
- _impl->src_1 = input2;
- _impl->dst = output;
- _impl->op = std::make_unique<experimental::NEElementwiseComparison>();
- _impl->op->configure(input1->info(), input2->info(), output->info(), op);
-}
-
-Status NEElementwiseComparison::validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, ComparisonOperation op)
-{
- return experimental::NEElementwiseComparison::validate(input1, input2, output, op);
-}
-
-void NEElementwiseComparison::run()
-{
- ITensorPack pack;
- pack.add_tensor(TensorType::ACL_SRC_0, _impl->src_0);
- pack.add_tensor(TensorType::ACL_SRC_1, _impl->src_1);
- pack.add_tensor(TensorType::ACL_DST, _impl->dst);
- _impl->op->run(pack);
-}
-
-// Supported Specializations
-template class NEElementwiseComparisonStatic<ComparisonOperation::Equal>;
-template class NEElementwiseComparisonStatic<ComparisonOperation::NotEqual>;
-template class NEElementwiseComparisonStatic<ComparisonOperation::Greater>;
-template class NEElementwiseComparisonStatic<ComparisonOperation::GreaterEqual>;
-template class NEElementwiseComparisonStatic<ComparisonOperation::Less>;
-template class NEElementwiseComparisonStatic<ComparisonOperation::LessEqual>;
-} // namespace arm_compute
diff --git a/src/runtime/NEON/functions/NEPReluLayer.cpp b/src/runtime/NEON/functions/NEPReluLayer.cpp
index fe656c0..d792357 100644
--- a/src/runtime/NEON/functions/NEPReluLayer.cpp
+++ b/src/runtime/NEON/functions/NEPReluLayer.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019-2020 Arm Limited.
+ * Copyright (c) 2019-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -24,7 +24,7 @@
#include "arm_compute/runtime/NEON/functions/NEPReluLayer.h"
#include "arm_compute/core/ITensor.h"
-#include "src/core/NEON/kernels/NEElementwiseOperationKernel.h"
+#include "src/core/cpu/kernels/CpuElementwiseKernel.h"
namespace arm_compute
{
@@ -32,14 +32,14 @@
{
void NEPRelu::configure(const ITensorInfo *input, const ITensorInfo *alpha, ITensorInfo *output)
{
- auto k = std::make_unique<NEArithmeticOperationKernel>();
+ auto k = std::make_unique<cpu::kernels::CpuArithmeticKernel>();
k->configure(ArithmeticOperation::PRELU, input, alpha, output);
_kernel = std::move(k);
}
Status NEPRelu::validate(const ITensorInfo *input, const ITensorInfo *alpha, const ITensorInfo *output)
{
- return NEArithmeticOperationKernel::validate(ArithmeticOperation::PRELU, input, alpha, output);
+ return cpu::kernels::CpuArithmeticKernel::validate(ArithmeticOperation::PRELU, input, alpha, output);
}
} // nsamespace experimental
diff --git a/src/runtime/cpu/operators/CpuElementwise.cpp b/src/runtime/cpu/operators/CpuElementwise.cpp
new file mode 100644
index 0000000..322bd09
--- /dev/null
+++ b/src/runtime/cpu/operators/CpuElementwise.cpp
@@ -0,0 +1,125 @@
+/*
+ * Copyright (c) 2021 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "src/runtime/cpu/operators/CpuElementwise.h"
+#include "src/core/cpu/kernels/CpuElementwiseKernel.h"
+
+namespace arm_compute
+{
+namespace cpu
+{
+void CpuElementwiseMax::configure(const ITensorInfo *input1, const ITensorInfo *input2, ITensorInfo *output)
+{
+ auto k = std::make_unique<kernels::CpuArithmeticKernel>();
+ k->configure(ArithmeticOperation::MAX, input1, input2, output);
+ _kernel = std::move(k);
+}
+
+Status CpuElementwiseMax::validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output)
+{
+ return kernels::CpuArithmeticKernel::validate(ArithmeticOperation::MAX, input1, input2, output);
+}
+
+void CpuElementwiseMin::configure(const ITensorInfo *input1, const ITensorInfo *input2, ITensorInfo *output)
+{
+ auto k = std::make_unique<kernels::CpuArithmeticKernel>();
+ k->configure(ArithmeticOperation::MIN, input1, input2, output);
+ _kernel = std::move(k);
+}
+
+Status CpuElementwiseMin::validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output)
+{
+ return kernels::CpuArithmeticKernel::validate(ArithmeticOperation::MIN, input1, input2, output);
+}
+
+void CpuElementwiseSquaredDiff::configure(const ITensorInfo *input1, const ITensorInfo *input2, ITensorInfo *output)
+{
+ auto k = std::make_unique<kernels::CpuArithmeticKernel>();
+ k->configure(ArithmeticOperation::SQUARED_DIFF, input1, input2, output);
+ _kernel = std::move(k);
+}
+
+Status CpuElementwiseSquaredDiff::validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output)
+{
+ return kernels::CpuArithmeticKernel::validate(ArithmeticOperation::SQUARED_DIFF, input1, input2, output);
+}
+
+void CpuElementwiseDivision::configure(const ITensorInfo *input1, const ITensorInfo *input2, ITensorInfo *output)
+{
+ auto k = std::make_unique<kernels::CpuDivisionKernel>();
+ k->configure(input1, input2, output);
+ _kernel = std::move(k);
+}
+
+Status CpuElementwiseDivision::validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output)
+{
+ return kernels::CpuDivisionKernel::validate(input1, input2, output);
+}
+
+void CpuElementwisePower::configure(const ITensorInfo *input1, const ITensorInfo *input2, ITensorInfo *output)
+{
+ auto k = std::make_unique<kernels::CpuPowerKernel>();
+ k->configure(input1, input2, output);
+ _kernel = std::move(k);
+}
+
+Status CpuElementwisePower::validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output)
+{
+ return kernels::CpuPowerKernel::validate(input1, input2, output);
+}
+
+template <ComparisonOperation COP>
+void CpuElementwiseComparisonStatic<COP>::configure(const ITensorInfo *input1, const ITensorInfo *input2, ITensorInfo *output)
+{
+ auto k = std::make_unique<kernels::CpuComparisonKernel>();
+ k->configure(COP, input1, input2, output);
+ _kernel = std::move(k);
+}
+
+template <ComparisonOperation COP>
+Status CpuElementwiseComparisonStatic<COP>::validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output)
+{
+ return kernels::CpuComparisonKernel::validate(COP, input1, input2, output);
+}
+
+void CpuElementwiseComparison::configure(const ITensorInfo *input1, const ITensorInfo *input2, ITensorInfo *output, ComparisonOperation op)
+{
+ auto k = std::make_unique<kernels::CpuComparisonKernel>();
+ k->configure(op, input1, input2, output);
+ _kernel = std::move(k);
+}
+
+Status CpuElementwiseComparison::validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, ComparisonOperation op)
+{
+ return kernels::CpuComparisonKernel::validate(op, input1, input2, output);
+}
+
+// Supported Specializations
+template class CpuElementwiseComparisonStatic<ComparisonOperation::Equal>;
+template class CpuElementwiseComparisonStatic<ComparisonOperation::NotEqual>;
+template class CpuElementwiseComparisonStatic<ComparisonOperation::Greater>;
+template class CpuElementwiseComparisonStatic<ComparisonOperation::GreaterEqual>;
+template class CpuElementwiseComparisonStatic<ComparisonOperation::Less>;
+template class CpuElementwiseComparisonStatic<ComparisonOperation::LessEqual>;
+} // namespace cpu
+} // namespace arm_compute
\ No newline at end of file
diff --git a/src/runtime/cpu/operators/CpuElementwise.h b/src/runtime/cpu/operators/CpuElementwise.h
new file mode 100644
index 0000000..611a374
--- /dev/null
+++ b/src/runtime/cpu/operators/CpuElementwise.h
@@ -0,0 +1,234 @@
+/*
+ * Copyright (c) 2021 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_CPU_ELEMENTWISE_H
+#define ARM_COMPUTE_CPU_ELEMENTWISE_H
+
+#include "src/runtime/cpu/ICpuOperator.h"
+
+namespace arm_compute
+{
+namespace cpu
+{
+/** Basic function to run @ref cpu::kernels::CpuArithmeticKernel for max
+ *
+ * @note The tensor data type for the inputs must be QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32.
+ * @note The function performs a max operation between two tensors.
+ */
+class CpuElementwiseMax : public ICpuOperator
+{
+public:
+ /** Initialise the kernel's inputs, output and conversion policy.
+ *
+ * @param[in, out] input1 First tensor input info. Data types supported: QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32.
+ * @param[in, out] input2 Second tensor input info. Data types supported: Same as @p input1.
+ * @param[out] output Output tensor info. Data types supported: Same as @p input1.
+ */
+ void configure(const ITensorInfo *input1, const ITensorInfo *input2, ITensorInfo *output);
+ /** Static function to check if given info will lead to a valid configuration of @ref cpu::kernels::CpuArithmeticKernel for max
+ *
+ * @param[in] input1 First tensor input info. Data types supported: QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32.
+ * @param[in] input2 Second tensor input info. Data types supported: Same as @p input1.
+ * @param[in] output Output tensor info. Data types supported: Same as @p input1.
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output);
+};
+
+/** Basic function to run @ref cpu::kernels::CpuArithmeticKernel for min
+ *
+ * @note The tensor data type for the inputs must be QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32.
+ * @note The function performs a min operation between two tensors.
+ */
+class CpuElementwiseMin : public ICpuOperator
+{
+public:
+ /** Initialise the kernel's inputs, output and conversion policy.
+ *
+ * @param[in, out] input1 First tensor input info. Data types supported: QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32.
+ * @param[in, out] input2 Second tensor input info. Data types supported: Same as @p input1.
+ * @param[out] output Output tensor info. Data types supported: Same as @p input1.
+ */
+ void configure(const ITensorInfo *input1, const ITensorInfo *input2, ITensorInfo *output);
+ /** Static function to check if given info will lead to a valid configuration of @ref cpu::kernels::CpuArithmeticKernel for min
+ *
+ * @param[in] input1 First tensor input info. Data types supported: QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32.
+ * @param[in] input2 Second tensor input info. Data types supported: Same as @p input1.
+ * @param[in] output Output tensor info. Data types supported: Same as @p input1.
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output);
+};
+
+/** Basic function to run @ref cpu::kernels::CpuArithmeticKernel for squared difference
+ *
+ * @note The tensor data type for the inputs must be QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32.
+ * @note The function performs a squared different operation between two tensors (i.e., out[i] = (in1[i] - in2[i])^2
+ */
+class CpuElementwiseSquaredDiff : public ICpuOperator
+{
+public:
+ /** Initialise the kernel's inputs, output and conversion policy.
+ *
+ * @param[in, out] input1 First tensor input info. Data types supported: QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32.
+ * @param[in, out] input2 Second tensor input info. Data types supported: Same as @p input1.
+ * @param[out] output Output tensor info. Data types supported: Same as @p input1.
+ */
+ void configure(const ITensorInfo *input1, const ITensorInfo *input2, ITensorInfo *output);
+ /** Static function to check if given info will lead to a valid configuration of @ref cpu::kernels::CpuArithmeticKernel for squared difference
+ *
+ * @param[in] input1 First tensor input info. Data types supported: QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32.
+ * @param[in] input2 Second tensor input info. Data types supported: Same as @p input1.
+ * @param[in] output Output tensor info. Data types supported: Same as @p input1.
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output);
+};
+
+/** Basic function to run @ref cpu::kernels::CpuArithmeticKernel for division
+ *
+ * @note The tensor data type for the inputs must be S32/F16/F32.
+ * @note The function performs a division operation between two tensors (i.e., out[i] = in1[i] / in2[i])
+ */
+class CpuElementwiseDivision : public ICpuOperator
+{
+public:
+ /** Initialise the kernel's inputs, output and conversion policy.
+ *
+ * @param[in, out] input1 First tensor input info. Data types supported: S32/F16/F32.
+ * @param[in, out] input2 Second tensor input info. Data types supported: Same as @p input1.
+ * @param[out] output Output tensor info. Data types supported: Same as @p input1.
+ */
+ void configure(const ITensorInfo *input1, const ITensorInfo *input2, ITensorInfo *output);
+ /** Static function to check if given info will lead to a valid configuration of @ref cpu::kernels::CpuArithmeticKernel for division
+ *
+ * @param[in] input1 First tensor input info. Data types supported: S32/F16/F32.
+ * @param[in] input2 Second tensor input info. Data types supported: Same as @p input1.
+ * @param[in] output Output tensor info. Data types supported: Same as @p input1.
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output);
+};
+
+/** Basic function to run @ref cpu::kernels::CpuArithmeticKernel for power
+ *
+ * @note The tensor data type for the inputs must be F16/F32.
+ * @note The function performs a elementwise power of in1 to in2 (i.e., out[i] = in1[i] ^ in2[i])
+ * @note For an exponent that is a float, this function will only work with a positive base.
+ */
+class CpuElementwisePower : public ICpuOperator
+{
+public:
+ /** Initialise the kernel's inputs, output and conversion policy.
+ *
+ * @param[in, out] input1 First tensor input info. Data types supported: F16/F32.
+ * @param[in, out] input2 Second tensor input info. Data types supported: Same as @p input1.
+ * @param[out] output Output tensor info. Data types supported: Same as @p input1.
+ */
+ void configure(const ITensorInfo *input1, const ITensorInfo *input2, ITensorInfo *output);
+ /** Static function to check if given info will lead to a valid configuration of @ref cpu::kernels::CpuArithmeticKernel for power
+ *
+ * @param[in] input1 First tensor input info. Data types supported: F16/F32.
+ * @param[in] input2 Second tensor input info. Data types supported: Same as @p input1.
+ * @param[in] output Output tensor info. Data types supported: Same as @p input1.
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output);
+};
+
+/** Basic function to run @ref cpu::kernels::CpuComparisonKernel.
+ *
+ * @note The tensor data type for the inputs must be QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32.
+ * @note The function performs a comparison operation between two tensors.
+ */
+class CpuElementwiseComparison : public ICpuOperator
+{
+public:
+ /** Initialise the kernel's inputs, output and conversion policy.
+ *
+ * @param[in, out] input1 First tensor input info. Data types supported: QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32.
+ * @param[in, out] input2 Second tensor input info. Data types supported: Same as @p input1.
+ * @param[out] output Output tensor info. Data types supported: U16/U32.
+ * @param[in] op Comparison Operation to be performed.
+ */
+ void configure(const ITensorInfo *input1, const ITensorInfo *input2, ITensorInfo *output, ComparisonOperation op);
+ /** Static function to check if given info will lead to a valid configuration of @ref cpu::kernels::CpuComparisonKernel
+ *
+ * @param[in] input1 First tensor input info. Data types supported: QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32.
+ * @param[in] input2 Second tensor input info. Data types supported: Same as @p input1.
+ * @param[in] output Output tensor info. Data types supported: U16/U32.
+ * @param[in] op Comparison Operation to be performed.
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, ComparisonOperation op);
+};
+
+/** Basic function to run @ref cpu::kernels::CpuComparisonKernel
+ *
+ * @note The tensor data type for the inputs must be QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32.
+ * @note The function performs a comparison operation between two tensors.
+ */
+template <ComparisonOperation op>
+class CpuElementwiseComparisonStatic : public ICpuOperator
+{
+public:
+ /** Initialise the kernel's inputs, output and conversion policy.
+ *
+ * @param[in, out] input1 First tensor input info. Data types supported: QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32.
+ * @param[in, out] input2 Second tensor input info. Data types supported: Same as @p input1.
+ * @param[out] output Output tensor info. Data types supported: U16/U32.
+ */
+ void configure(const ITensorInfo *input1, const ITensorInfo *input2, ITensorInfo *output);
+ /** Static function to check if given info will lead to a valid configuration of @ref cpu::kernels::CpuComparisonKernel
+ *
+ * @param[in] input1 First tensor input info. Data types supported: QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32.
+ * @param[in] input2 Second tensor input info. Data types supported: Same as @p input1.
+ * @param[in] output Output tensor info. Data types supported: U16/U32.
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output);
+};
+
+/** Basic function to run equal comparison. */
+using NEEqual = CpuElementwiseComparisonStatic<ComparisonOperation::Equal>;
+/** Basic function to run not equal comparison. */
+using NENotEqual = CpuElementwiseComparisonStatic<ComparisonOperation::NotEqual>;
+/** Basic function to run greater comparison. */
+using NEGreater = CpuElementwiseComparisonStatic<ComparisonOperation::Greater>;
+/** Basic function to run greater-equal comparison. */
+using NEGreaterEqual = CpuElementwiseComparisonStatic<ComparisonOperation::GreaterEqual>;
+/** Basic function to run less comparison. */
+using NELess = CpuElementwiseComparisonStatic<ComparisonOperation::Less>;
+/** Basic function to run less-equal comparison. */
+using NELessEqual = CpuElementwiseComparisonStatic<ComparisonOperation::LessEqual>;
+} // namespace cpu
+} // namespace arm_compute
+
+#endif /* ARM_COMPUTE_CPU_ELEMENTWISE_H */
\ No newline at end of file