COMPMID-417 Checking CL non uniform support at runtime.

What have been done in the ticket are:
1. Add support to check whether cl-non-unform-workgroup is supported at
runtime
2. Add helper function to check the CL version at runtime
3. Add boolen to check whether CLSecheduler's init has been called.

Change-Id: I6e6df8eb5cebfac7229aa406242bb183477fd191
Reviewed-on: http://mpd-gerrit.cambridge.arm.com/80265
Tested-by: Kaizen <jeremy.johnson+kaizengerrit@arm.com>
Reviewed-by: Anthony Barbier <anthony.barbier@arm.com>
diff --git a/arm_compute/core/CL/CLHelpers.h b/arm_compute/core/CL/CLHelpers.h
index 5234ae1..01980d9 100644
--- a/arm_compute/core/CL/CLHelpers.h
+++ b/arm_compute/core/CL/CLHelpers.h
@@ -102,5 +102,20 @@
  * @return the GPU target which shows the arch
  */
 GPUTarget get_arch_from_target(GPUTarget target);
+
+/** Helper function to get the highest OpenCL version supported
+ *
+ * @param[in] device A CL device
+ *
+ * @return the highest OpenCL version supported
+ */
+CLVersion get_cl_version(const cl::Device &device);
+/** Helper function to check whether the arm_non_uniform_work_group_size extension is supported
+ *
+ * @param[in] device A CL device
+ *
+ * @return True if the extension is supported
+ */
+bool non_uniform_workgroup_support(const cl::Device &device);
 }
 #endif /* __ARM_COMPUTE_CLHELPERS_H__ */
diff --git a/arm_compute/core/CL/CLTypes.h b/arm_compute/core/CL/CLTypes.h
index c5643d8..cf11f6e 100644
--- a/arm_compute/core/CL/CLTypes.h
+++ b/arm_compute/core/CL/CLTypes.h
@@ -37,5 +37,15 @@
     T800          = 0x130,
     G70           = 0x210
 };
+
+/* Available OpenCL Version */
+enum class CLVersion
+{
+    CL10,   /* the OpenCL 1.0 */
+    CL11,   /* the OpenCL 1.1 */
+    CL12,   /* the OpenCL 1.2 */
+    CL20,   /* the OpenCL 2.0 and above */
+    UNKNOWN /* unkown version */
+};
 }
 #endif /* __ARM_COMPUTE_CL_TYPES_H__ */
diff --git a/arm_compute/runtime/CL/CLScheduler.h b/arm_compute/runtime/CL/CLScheduler.h
index 8e80259..3f3a8de 100644
--- a/arm_compute/runtime/CL/CLScheduler.h
+++ b/arm_compute/runtime/CL/CLScheduler.h
@@ -72,9 +72,10 @@
     void init(cl::Context context = cl::Context::getDefault(), cl::CommandQueue queue = cl::CommandQueue::getDefault(),
               cl::Device device = cl::Device::getDefault())
     {
-        _context = std::move(context);
-        _queue   = std::move(queue);
-        _target  = get_target_from_device(device);
+        _context        = std::move(context);
+        _queue          = std::move(queue);
+        _target         = get_target_from_device(device);
+        _is_initialised = true;
     }
 
     /** Accessor for the associated CL context.
@@ -83,6 +84,7 @@
      */
     cl::Context &context()
     {
+        ARM_COMPUTE_ERROR_ON(!_is_initialised);
         return _context;
     }
 
@@ -101,6 +103,7 @@
      */
     cl::CommandQueue &queue()
     {
+        ARM_COMPUTE_ERROR_ON(!_is_initialised);
         return _queue;
     }
 
@@ -153,6 +156,7 @@
     cl::Context      _context;
     cl::CommandQueue _queue;
     GPUTarget        _target;
+    bool             _is_initialised;
 };
 }
 #endif /* __ARM_COMPUTE_CLSCHEDULER_H__ */
diff --git a/src/core/CL/CLHelpers.cpp b/src/core/CL/CLHelpers.cpp
index 835260d..dd87e77 100644
--- a/src/core/CL/CLHelpers.cpp
+++ b/src/core/CL/CLHelpers.cpp
@@ -166,4 +166,57 @@
 {
     return (target & GPUTarget::GPU_ARCH_MASK);
 }
+
+bool non_uniform_workgroup_support(const cl::Device &device)
+{
+    std::vector<char> extension;
+    size_t            extension_size = 0;
+    cl_int            err            = clGetDeviceInfo(device.get(), CL_DEVICE_EXTENSIONS, 0, nullptr, &extension_size);
+    ARM_COMPUTE_ERROR_ON_MSG((err != 0) || (extension_size == 0), "clGetDeviceInfo failed to return valid information");
+    // Resize vector
+    extension.resize(extension_size);
+    // Query extension
+    err = clGetDeviceInfo(device.get(), CL_DEVICE_EXTENSIONS, extension_size, extension.data(), nullptr);
+    ARM_COMPUTE_ERROR_ON_MSG(err != 0, "clGetDeviceInfo failed to return valid information");
+    ARM_COMPUTE_UNUSED(err);
+
+    std::string extension_str(extension.begin(), extension.end());
+    auto        pos = extension_str.find("cl_arm_non_uniform_work_group_size");
+    return (pos != std::string::npos);
+}
+
+CLVersion get_cl_version(const cl::Device &device)
+{
+    std::vector<char> version;
+    size_t            version_size = 0;
+    cl_int            err          = clGetDeviceInfo(device.get(), CL_DEVICE_VERSION, 0, nullptr, &version_size);
+    ARM_COMPUTE_ERROR_ON_MSG((err != 0) || (version_size == 0), "clGetDeviceInfo failed to return valid information");
+    // Resize vector
+    version.resize(version_size);
+    // Query version
+    err = clGetDeviceInfo(device.get(), CL_DEVICE_VERSION, version_size, version.data(), nullptr);
+    ARM_COMPUTE_ERROR_ON_MSG(err != 0, "clGetDeviceInfo failed to return valid information");
+    ARM_COMPUTE_UNUSED(err);
+
+    std::string version_str(version.begin(), version.end());
+    if(version_str.find("OpenCL 2") != std::string::npos)
+    {
+        return CLVersion::CL20;
+    }
+    else if(version_str.find("OpenCL 1.2") != std::string::npos)
+    {
+        return CLVersion::CL12;
+    }
+    else if(version_str.find("OpenCL 1.1") != std::string::npos)
+    {
+        return CLVersion::CL11;
+    }
+    else if(version_str.find("OpenCL 1.0") != std::string::npos)
+    {
+        return CLVersion::CL10;
+    }
+
+    return CLVersion::UNKNOWN;
+}
+
 } // namespace arm_compute
diff --git a/src/core/CL/CLKernelLibrary.cpp b/src/core/CL/CLKernelLibrary.cpp
index 72230435..769d509 100644
--- a/src/core/CL/CLKernelLibrary.cpp
+++ b/src/core/CL/CLKernelLibrary.cpp
@@ -23,6 +23,7 @@
  */
 #include "arm_compute/core/CL/CLKernelLibrary.h"
 
+#include "arm_compute/core/CL/CLHelpers.h"
 #include "arm_compute/core/Error.h"
 #include "arm_compute/core/Utils.h"
 
@@ -514,9 +515,25 @@
         ARM_COMPUTE_ERROR("Kernel %s not found in the CLKernelLibrary", kernel_name.c_str());
     }
 
+    std::string concat_str;
+
+    if(non_uniform_workgroup_support(_device))
+    {
+        concat_str += " -cl-arm-non-uniform-work-group-size ";
+    }
+    else if(get_cl_version(_device) == CLVersion::CL20)
+    {
+        concat_str += " -cl-std=CL2.0 ";
+    }
+    else
+    {
+        ARM_COMPUTE_ERROR("Non uniform workgroup size is not supported!!");
+    }
+
     // Check if the program has been built before with same build options.
-    const std::string program_name       = kernel_program_it->second;
-    const std::string build_options      = stringify_set(build_options_set);
+    const std::string program_name  = kernel_program_it->second;
+    const std::string build_options = stringify_set(build_options_set) + concat_str;
+
     const std::string built_program_name = program_name + "_" + build_options;
     auto              built_program_it   = _built_programs_map.find(built_program_name);
 
@@ -591,7 +608,7 @@
 
 std::string CLKernelLibrary::stringify_set(const StringSet &s) const
 {
-    std::string concat_set = "-cl-arm-non-uniform-work-group-size ";
+    std::string concat_set;
 
 #ifndef EMBEDDED_KERNELS
     concat_set += "-I" + _kernel_path + " ";
diff --git a/src/runtime/CL/CLScheduler.cpp b/src/runtime/CL/CLScheduler.cpp
index fe25ce5..f413f62 100644
--- a/src/runtime/CL/CLScheduler.cpp
+++ b/src/runtime/CL/CLScheduler.cpp
@@ -28,7 +28,7 @@
 using namespace arm_compute;
 
 CLScheduler::CLScheduler()
-    : _context(), _queue(), _target(GPUTarget::MIDGARD)
+    : _context(), _queue(), _target(GPUTarget::MIDGARD), _is_initialised(false)
 {
 }
 
@@ -40,6 +40,10 @@
 
 void CLScheduler::enqueue(ICLKernel &kernel, bool flush)
 {
+    ARM_COMPUTE_ERROR_ON_MSG(!_is_initialised,
+                             "The CLScheduler is not initialised yet! Please call the CLScheduler::get().default_init(), \
+                             or CLScheduler::get()::init() and CLKernelLibrary::get()::init() function before running functions!");
+
     kernel.run(kernel.window(), _queue);
 
     if(flush)