COMPMID-1083 : Compute library should be made usable on non-ARM platforms

Added
* Compile time switches for kernels using FP16 extensions
* Validation for support of atomics extension

Change-Id: Ia88e601db054ff35f1508988b5e322bd27511ac5
Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/133216
Reviewed-by: Pablo Tello <pablo.tello@arm.com>
Reviewed-by: Anthony Barbier <anthony.barbier@arm.com>
Tested-by: Jenkins <bsgcomp@arm.com>
diff --git a/arm_compute/core/CL/CLKernelLibrary.h b/arm_compute/core/CL/CLKernelLibrary.h
index 97537c0..f25915f 100644
--- a/arm_compute/core/CL/CLKernelLibrary.h
+++ b/arm_compute/core/CL/CLKernelLibrary.h
@@ -335,6 +335,12 @@
      */
     bool fp16_supported() const;
 
+    /** Returns true if int64_base_atomics extension is supported by the CL device
+     *
+     * @return true if the CL device supports int64_base_atomics extension
+     */
+    bool int64_base_atomics_supported() const;
+
 private:
     /** Load program and its dependencies.
      *
diff --git a/arm_compute/core/CL/CLValidate.h b/arm_compute/core/CL/CLValidate.h
index 6252f00..a087b2c 100644
--- a/arm_compute/core/CL/CLValidate.h
+++ b/arm_compute/core/CL/CLValidate.h
@@ -34,5 +34,28 @@
 #define ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(tensor) \
     ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_unsupported_fp16(__func__, __FILE__, __LINE__, tensor, CLKernelLibrary::get().fp16_supported()))
 
+/** Return an error if int64_base_atomics extension is not supported by the device.
+ *
+ * @param[in] function Function in which the error occurred.
+ * @param[in] file     Name of the file where the error occurred.
+ * @param[in] line     Line on which the error occurred.
+ *
+ * @return Status
+ */
+inline arm_compute::Status error_on_unsupported_int64_base_atomics(const char *function, const char *file, const int line)
+{
+    if(!CLKernelLibrary::get().int64_base_atomics_supported())
+    {
+        return ARM_COMPUTE_CREATE_ERROR_LOC(arm_compute::ErrorCode::UNSUPPORTED_EXTENSION_USE, function, file, line, "Atomic functions are not supported");
+    }
+    return arm_compute::Status{};
+}
+
+#define ARM_COMPUTE_ERROR_ON_INT64_BASE_ATOMICS_UNSUPPORTED() \
+    ARM_COMPUTE_ERROR_THROW_ON(::arm_compute::error_on_unsupported_int64_base_atomics(__func__, __FILE__, __LINE__));
+
+#define ARM_COMPUTE_RETURN_ERROR_ON_INT64_BASE_ATOMICS_UNSUPPORTED() \
+    ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_unsupported_int64_base_atomics(__func__, __FILE__, __LINE__));
+
 } // namespace arm_compute
 #endif /* __ARM_COMPUTE_CL_VALIDATE_H__ */
diff --git a/arm_compute/core/Error.h b/arm_compute/core/Error.h
index 3635e93..e254956 100644
--- a/arm_compute/core/Error.h
+++ b/arm_compute/core/Error.h
@@ -43,8 +43,9 @@
 /** Available error codes */
 enum class ErrorCode
 {
-    OK,           /**< No error */
-    RUNTIME_ERROR /**< Generic runtime error */
+    OK,                       /**< No error */
+    RUNTIME_ERROR,            /**< Generic runtime error */
+    UNSUPPORTED_EXTENSION_USE /**< Unsupported extension used*/
 };
 
 /** Status class */
diff --git a/src/core/CL/CLKernelLibrary.cpp b/src/core/CL/CLKernelLibrary.cpp
index 9b06330..21a0e68 100644
--- a/src/core/CL/CLKernelLibrary.cpp
+++ b/src/core/CL/CLKernelLibrary.cpp
@@ -799,6 +799,11 @@
     return ::fp16_supported(_device);
 }
 
+bool CLKernelLibrary::int64_base_atomics_supported() const
+{
+    return device_supports_extension(_device, "cl_khr_int64_base_atomics");
+}
+
 const Program &CLKernelLibrary::load_program(const std::string &program_name) const
 {
     const auto program_it = _programs_map.find(program_name);
diff --git a/src/core/CL/cl_kernels/gemm.cl b/src/core/CL/cl_kernels/gemm.cl
index 89d8036..ad38c7e 100644
--- a/src/core/CL/cl_kernels/gemm.cl
+++ b/src/core/CL/cl_kernels/gemm.cl
@@ -2499,6 +2499,7 @@
     vstore4(out, 0, (__global float *)dst.ptr);
 }
 
+#if defined(ARM_COMPUTE_OPENCL_FP16_ENABLED)
 /** This OpenCL kernel performs the in-place matrix addition between 2 matrices taking into account that the second matrix might be weighted by a scalar value beta:
  *
  * @note The beta's value need to be passed at compile time using -DBETA
@@ -2535,6 +2536,7 @@
     // Store final result in axb matrix
     vstore8(out, 0, (__global half *)dst.ptr);
 }
+#endif // defined(ARM_COMPUTE_OPENCL_FP16_ENABLED)
 
 #if defined(FIXED_POINT_POSITION)
 /** This OpenCL kernel performs the in-place matrix addition between 2 matrices in 8 bit fixed point taking into account that the second matrix might be weighted by a scalar value beta:
diff --git a/src/core/CL/kernels/CLMeanStdDevKernel.cpp b/src/core/CL/kernels/CLMeanStdDevKernel.cpp
index 1bf831b..fc8764d 100644
--- a/src/core/CL/kernels/CLMeanStdDevKernel.cpp
+++ b/src/core/CL/kernels/CLMeanStdDevKernel.cpp
@@ -24,13 +24,13 @@
 #include "arm_compute/core/CL/kernels/CLMeanStdDevKernel.h"
 
 #include "arm_compute/core/CL/CLKernelLibrary.h"
+#include "arm_compute/core/CL/CLValidate.h"
 #include "arm_compute/core/CL/ICLTensor.h"
 #include "arm_compute/core/CL/OpenCL.h"
 #include "arm_compute/core/Error.h"
 #include "arm_compute/core/Helpers.h"
 #include "arm_compute/core/TensorInfo.h"
 #include "arm_compute/core/Types.h"
-#include "arm_compute/core/Validate.h"
 #include "arm_compute/core/Window.h"
 
 #include <cmath>
@@ -51,6 +51,7 @@
 
 void CLMeanStdDevKernel::configure(const ICLImage *input, float *mean, cl::Buffer *global_sum, float *stddev, cl::Buffer *global_sum_squared)
 {
+    ARM_COMPUTE_ERROR_ON_INT64_BASE_ATOMICS_UNSUPPORTED();
     ARM_COMPUTE_ERROR_ON_TENSOR_NOT_2D(input);
     ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8);
     ARM_COMPUTE_ERROR_ON(nullptr == mean);