COMPMID-1226 Extend CLMeanStdDev to support FP32 / FP16
- Extend support for FP16 in CLReduction.
- For F16/F32 MeanStdDev we perform one reduction operation for mean
and one for stddev and we calculate the final result in the host CPU.
Change-Id: Iad2099f26c0ba7969737d22f00c6c275634d875c
Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/135870
Tested-by: Jenkins <bsgcomp@arm.com>
Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com>
diff --git a/arm_compute/core/CL/kernels/CLMeanStdDevKernel.h b/arm_compute/core/CL/kernels/CLMeanStdDevKernel.h
index a6898fd..46e266e 100644
--- a/arm_compute/core/CL/kernels/CLMeanStdDevKernel.h
+++ b/arm_compute/core/CL/kernels/CLMeanStdDevKernel.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016, 2017 ARM Limited.
+ * Copyright (c) 2016-2018 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -59,6 +59,17 @@
* @param[out] global_sum_squared (Optional if stddev is not set, required if stddev is set) Keeps global sum of squared pixel values (Buffer size: 1 cl_ulong).
*/
void configure(const ICLImage *input, float *mean, cl::Buffer *global_sum, float *stddev = nullptr, cl::Buffer *global_sum_squared = nullptr);
+ /** Static function to check if given info will lead to a valid configuration of @ref CLMeanStdDevKernel.
+ *
+ * @param[in] input Input image info. Data types supported: U8.
+ * @param[in] mean Input average pixel value.
+ * @param[in] global_sum Keeps global sum of pixel values.
+ * @param[in] stddev (Optional) Output standard deviation of pixel values.
+ * @param[in] global_sum_squared (Optional if stddev is not set, required if stddev is set) Keeps global sum of squared pixel values.
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input, float *mean, cl::Buffer *global_sum, float *stddev = nullptr, cl::Buffer *global_sum_squared = nullptr);
// Inherited methods overridden:
void run(const Window &window, cl::CommandQueue &queue) override;
diff --git a/arm_compute/core/CL/kernels/CLReductionOperationKernel.h b/arm_compute/core/CL/kernels/CLReductionOperationKernel.h
index 56f75e5..60e2f08 100644
--- a/arm_compute/core/CL/kernels/CLReductionOperationKernel.h
+++ b/arm_compute/core/CL/kernels/CLReductionOperationKernel.h
@@ -50,7 +50,7 @@
/** Set the input and output tensors.
*
- * @param[in] input Source tensor. Data types supported: F32. Data layouts supported: NCHW.
+ * @param[in] input Source tensor. Data types supported: F16/F32. Data layouts supported: NCHW.
* @param[out] output Destination tensor. Data types and data layouts supported: Same as @p input.
* Output will have the same number of dimensions as input.
* @param[in] axis Axis along which to reduce. Supported reduction axis : 0
@@ -60,7 +60,7 @@
/** Static function to check if given info will lead to a valid configuration of @ref CLReductionOperationKernel.
*
- * @param[in] input Source tensor info. Data types supported: F32. Data layouts supported: NCHW.
+ * @param[in] input Source tensor info. Data types supported: F16/F32. Data layouts supported: NCHW.
* @param[in] output Destination tensor info. Data types and data layouts supported: Same as @p input.
* Output will have the same number of dimensions as input.
* @param[in] axis Axis along which to reduce. Supported reduction axis : 0
diff --git a/arm_compute/core/Validate.h b/arm_compute/core/Validate.h
index 1646ebe..918c8e5 100644
--- a/arm_compute/core/Validate.h
+++ b/arm_compute/core/Validate.h
@@ -787,6 +787,19 @@
*/
arm_compute::Status error_on_tensor_not_2d(const char *function, const char *file, const int line,
const ITensor *tensor);
+
+/** Return an error if the tensor info is not 2D.
+ *
+ * @param[in] function Function in which the error occurred.
+ * @param[in] file Name of the file where the error occurred.
+ * @param[in] line Line on which the error occurred.
+ * @param[in] tensor Tensor info to validate.
+ *
+ * @return Status
+ */
+arm_compute::Status error_on_tensor_not_2d(const char *function, const char *file, const int line,
+ const ITensorInfo *tensor);
+
#define ARM_COMPUTE_ERROR_ON_TENSOR_NOT_2D(t) \
ARM_COMPUTE_ERROR_THROW_ON(::arm_compute::error_on_tensor_not_2d(__func__, __FILE__, __LINE__, t))
#define ARM_COMPUTE_RETURN_ERROR_ON_TENSOR_NOT_2D(t) \