Add Queue support

Queues are responsible for scheduling operators and performing other
runtime related activities like for example tuning.

Signed-off-by: Georgios Pinitas <georgios.pinitas@arm.com>
Change-Id: I0366d9048470d277b8cbf59fa42f95c0ae57c5c9
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/5487
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Michele Di Giorgio <michele.digiorgio@arm.com>
Reviewed-by: Michalis Spyrou <michalis.spyrou@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
diff --git a/Android.bp b/Android.bp
index f542b20..78ac7f1 100644
--- a/Android.bp
+++ b/Android.bp
@@ -52,6 +52,7 @@
     export_include_dirs: [".", "./include"],
     srcs: [
         "src/c/AclContext.cpp",
+        "src/c/AclQueue.cpp",
         "src/c/AclTensor.cpp",
         "src/c/AclTensorPack.cpp",
         "src/c/AclVersion.cpp",
@@ -300,7 +301,7 @@
         "src/core/cpu/kernels/CpuDepthwiseConvolutionNativeKernel.cpp",
         "src/core/cpu/kernels/CpuDequantizationKernel.cpp",
         "src/core/cpu/kernels/CpuDirectConvolutionKernel.cpp",
-        "src/core/cpu/kernels/CpuDirectConvolutionStageKernel.cpp",
+        "src/core/cpu/kernels/CpuDirectConvolutionOutputStageKernel.cpp",
         "src/core/cpu/kernels/CpuElementwiseKernel.cpp",
         "src/core/cpu/kernels/CpuElementwiseUnaryKernel.cpp",
         "src/core/cpu/kernels/CpuFillKernel.cpp",
@@ -315,16 +316,16 @@
         "src/core/cpu/kernels/CpuSoftmaxKernel.cpp",
         "src/core/cpu/kernels/CpuSubKernel.cpp",
         "src/core/cpu/kernels/CpuTransposeKernel.cpp",
-        "src/core/cpu/kernels/activation/NEON/fp16.cpp",
-        "src/core/cpu/kernels/activation/NEON/fp32.cpp",
-        "src/core/cpu/kernels/activation/NEON/qasymm8.cpp",
-        "src/core/cpu/kernels/activation/NEON/qasymm8_signed.cpp",
-        "src/core/cpu/kernels/activation/NEON/qsymm16.cpp",
-        "src/core/cpu/kernels/activation/SVE/fp16.cpp",
-        "src/core/cpu/kernels/activation/SVE/fp32.cpp",
-        "src/core/cpu/kernels/activation/SVE/qasymm8.cpp",
-        "src/core/cpu/kernels/activation/SVE/qasymm8_signed.cpp",
-        "src/core/cpu/kernels/activation/SVE/qsymm16.cpp",
+        "src/core/cpu/kernels/activation/neon/fp16.cpp",
+        "src/core/cpu/kernels/activation/neon/fp32.cpp",
+        "src/core/cpu/kernels/activation/neon/qasymm8.cpp",
+        "src/core/cpu/kernels/activation/neon/qasymm8_signed.cpp",
+        "src/core/cpu/kernels/activation/neon/qsymm16.cpp",
+        "src/core/cpu/kernels/activation/sve/fp16.cpp",
+        "src/core/cpu/kernels/activation/sve/fp32.cpp",
+        "src/core/cpu/kernels/activation/sve/qasymm8.cpp",
+        "src/core/cpu/kernels/activation/sve/qasymm8_signed.cpp",
+        "src/core/cpu/kernels/activation/sve/qsymm16.cpp",
         "src/core/cpu/kernels/add/neon/integer.cpp",
         "src/core/cpu/kernels/add/neon/qasymm8.cpp",
         "src/core/cpu/kernels/add/neon/qasymm8_signed.cpp",
@@ -390,8 +391,10 @@
         "src/core/utils/misc/MMappedFile.cpp",
         "src/core/utils/quantization/AsymmHelpers.cpp",
         "src/cpu/CpuContext.cpp",
+        "src/cpu/CpuQueue.cpp",
         "src/cpu/CpuTensor.cpp",
         "src/gpu/cl/ClContext.cpp",
+        "src/gpu/cl/ClQueue.cpp",
         "src/gpu/cl/ClTensor.cpp",
         "src/runtime/Allocator.cpp",
         "src/runtime/BlobLifetimeManager.cpp",
diff --git a/SConscript b/SConscript
index b09551f..63c2a48 100644
--- a/SConscript
+++ b/SConscript
@@ -188,11 +188,25 @@
 runtime_files += Glob('src/runtime/CPP/ICPPSimpleFunction.cpp')
 runtime_files += Glob('src/runtime/CPP/functions/*.cpp')
 
-runtime_files += Glob('src/c/*.cpp')
-runtime_files += Glob('src/common/*.cpp')
-runtime_files += Glob('src/common/utils/*.cpp')
-runtime_files += Glob('src/cpu/*.cpp')
+# C API files
+c_api_files = ['src/c/AclContext.cpp',
+               'src/c/AclQueue.cpp',
+               'src/c/AclTensor.cpp',
+               'src/c/AclTensorPack.cpp',
+               'src/c/AclVersion.cpp',
+               ]
+if env['opencl']:
+    c_api_files += ['src/c/cl/AclOpenClExt.cpp']
 
+# Common backend files
+common_backend_files = ['src/common/utils/LegacySupport.cpp',
+                        'src/common/AllocatorWrapper.cpp',
+                        'src/common/ITensorV2.cpp',
+                        'src/common/TensorPack.cpp',
+                        ]
+
+core_files += common_backend_files
+runtime_files += c_api_files
 # CLHarrisCorners uses the Scheduler to run CPP kernels
 runtime_files += Glob('src/runtime/CPP/SingleThreadScheduler.cpp')
 
@@ -225,7 +239,6 @@
     runtime_files += Glob('src/runtime/CL/gemm_auto_heuristics/*.cpp')
 
     runtime_files += Glob('src/gpu/cl/*.cpp')
-    runtime_files += Glob('src/c/cl/*.cpp')
 
     graph_files += Glob('src/graph/backends/CL/*.cpp')
 
@@ -278,8 +291,36 @@
     runtime_files += Glob('src/runtime/NEON/functions/*.cpp')
     runtime_files += Glob('src/runtime/NEON/functions/assembly/*.cpp')
 
-    core_files += Glob('src/core/cpu/*.cpp')
-    core_files += Glob('src/core/cpu/kernels/*.cpp')
+    cpu_kernel_hp_files = ['src/core/cpu/kernels/CpuActivationKernel.cpp',
+                           'src/core/cpu/kernels/CpuDepthwiseConvolutionNativeKernel.cpp',
+                           'src/core/cpu/kernels/CpuDirectConvolutionKernel.cpp',
+                           'src/core/cpu/kernels/CpuDirectConvolutionOutputStageKernel.cpp',
+                           'src/core/cpu/kernels/CpuPermuteKernel.cpp',
+                           'src/core/cpu/kernels/CpuPoolingAssemblyWrapperKernel.cpp',
+                           'src/core/cpu/kernels/CpuPoolingKernel.cpp',
+                           'src/core/cpu/kernels/CpuReshapeKernel.cpp',
+                          ]
+    cpu_kernel_files = ['src/core/cpu/kernels/CpuAddKernel.cpp',
+                        'src/core/cpu/kernels/CpuConcatenateBatchKernel.cpp',
+                        'src/core/cpu/kernels/CpuConcatenateDepthKernel.cpp',
+                        'src/core/cpu/kernels/CpuConcatenateHeightKernel.cpp',
+                        'src/core/cpu/kernels/CpuConcatenateWidthKernel.cpp',
+                        'src/core/cpu/kernels/CpuConvertFullyConnectedWeightsKernel.cpp',
+                        'src/core/cpu/kernels/CpuCopyKernel.cpp',
+                        'src/core/cpu/kernels/CpuDequantizationKernel.cpp',
+                        'src/core/cpu/kernels/CpuElementwiseKernel.cpp',
+                        'src/core/cpu/kernels/CpuElementwiseUnaryKernel.cpp',
+                        'src/core/cpu/kernels/CpuFillKernel.cpp',
+                        'src/core/cpu/kernels/CpuFloorKernel.cpp',
+                        'src/core/cpu/kernels/CpuPixelWiseMultiplicationKernel.cpp',
+                        'src/core/cpu/kernels/CpuQuantizationKernel.cpp',
+                        'src/core/cpu/kernels/CpuScaleKernel.cpp',
+                        'src/core/cpu/kernels/CpuSoftmaxKernel.cpp',
+                        'src/core/cpu/kernels/CpuSubKernel.cpp',
+                        'src/core/cpu/kernels/CpuTransposeKernel.cpp',
+                       ]
+    core_files += [cpu_kernel_hp_files, cpu_kernel_files]
+
     core_files += Glob('src/core/cpu/kernels/*/*.cpp')
     if any(i in env['data_type_support'] for i in ['all', 'fp16']):
         core_files += Glob('src/core/cpu/kernels/*/*/fp16.cpp')
@@ -293,12 +334,40 @@
         core_files += Glob('src/core/cpu/kernels/*/*/qsymm16.cpp')
     if any(i in env['data_type_support'] for i in ['all', 'integer']):
         core_files += Glob('src/core/cpu/kernels/*/*/integer.cpp')
-   
+
     if any(i in env['data_layout_support'] for i in ['all', 'nchw']):
         core_files += Glob('src/core/cpu/kernels/*/*/nchw/all.cpp')
 
-    runtime_files += Glob('src/runtime/cpu/*.cpp')
-    runtime_files += Glob('src/runtime/cpu/operators/*.cpp')
+    cpu_rt_files = ['src/cpu/CpuContext.cpp',
+                    'src/cpu/CpuQueue.cpp',
+                    'src/cpu/CpuTensor.cpp'
+                   ]
+    cpu_operator_hp_files = ['src/runtime/cpu/operators/CpuActivation.cpp',
+                             'src/runtime/cpu/operators/CpuDepthwiseConvolution.cpp',
+                             'src/runtime/cpu/operators/CpuDepthwiseConvolutionAssemblyDispatch.cpp',
+                             'src/runtime/cpu/operators/CpuDirectConvolution.cpp',
+                             'src/runtime/cpu/operators/CpuPermute.cpp',
+                             'src/runtime/cpu/operators/CpuPooling.cpp',
+                             'src/runtime/cpu/operators/CpuPoolingAssemblyDispatch.cpp',
+                            ]
+    cpu_operator_files = ['src/runtime/cpu/operators/CpuAdd.cpp',
+                          'src/runtime/cpu/operators/CpuConcatenate.cpp',
+                          'src/runtime/cpu/operators/CpuConvertFullyConnectedWeights.cpp',
+                          'src/runtime/cpu/operators/CpuCopy.cpp',
+                          'src/runtime/cpu/operators/CpuDequantization.cpp',
+                          'src/runtime/cpu/operators/CpuElementwise.cpp',
+                          'src/runtime/cpu/operators/CpuElementwiseUnary.cpp',
+                          'src/runtime/cpu/operators/CpuFill.cpp',
+                          'src/runtime/cpu/operators/CpuFloor.cpp',
+                          'src/runtime/cpu/operators/CpuPixelWiseMultiplication.cpp',
+                          'src/runtime/cpu/operators/CpuQuantization.cpp',
+                          'src/runtime/cpu/operators/CpuReshape.cpp',
+                          'src/runtime/cpu/operators/CpuScale.cpp',
+                          'src/runtime/cpu/operators/CpuSoftmax.cpp',
+                          'src/runtime/cpu/operators/CpuSub.cpp',
+                          'src/runtime/cpu/operators/CpuTranspose.cpp',
+                         ]
+    runtime_files += [ cpu_rt_files, cpu_operator_hp_files, cpu_operator_files ]
 
 bootcode_o = []
 if env['os'] == 'bare_metal':
diff --git a/arm_compute/Acl.hpp b/arm_compute/Acl.hpp
index 01f7179..93ac2d8 100644
--- a/arm_compute/Acl.hpp
+++ b/arm_compute/Acl.hpp
@@ -42,6 +42,7 @@
 {
 // Forward declarations
 class Context;
+class Queue;
 class Tensor;
 class TensorPack;
 
@@ -83,6 +84,7 @@
     };
 
 OBJECT_DELETER(AclContext, AclDestroyContext)
+OBJECT_DELETER(AclQueue, AclDestroyQueue)
 OBJECT_DELETER(AclTensor, AclDestroyTensor)
 OBJECT_DELETER(AclTensorPack, AclDestroyTensorPack)
 
@@ -384,7 +386,7 @@
         AclContext ctx;
         const auto st = detail::as_enum<StatusCode>(AclCreateContext(&ctx, detail::as_cenum<AclTarget>(target), &options.copts));
         reset(ctx);
-        report_status(st, "[Arm Compute Library] Failed to create context");
+        report_status(st, "[Compute Library] Failed to create context");
         if(status)
         {
             *status = st;
@@ -392,6 +394,92 @@
     }
 };
 
+/**< Available tuning modes */
+enum class TuningMode
+{
+    Rapid      = AclRapid,
+    Normal     = AclNormal,
+    Exhaustive = AclExhaustive
+};
+
+/** Queue class
+ *
+ * Queue is responsible for the execution related aspects, with main responsibilities those of
+ * scheduling and tuning operators.
+ *
+ * Multiple queues can be created from the same context, and the same operator can be scheduled on each concurrently.
+ *
+ * @note An operator might depend on the maximum possible compute units that are provided in the context,
+ *       thus in cases where the number of the scheduling units of the queue are greater might lead to errors.
+ */
+class Queue : public detail::ObjectBase<AclQueue_>
+{
+public:
+    /**< Queue options */
+    struct Options
+    {
+        /** Default Constructor
+         *
+         * As default options, no tuning will be performed, and the number of scheduling units will
+         * depends on internal device discovery functionality
+         */
+        Options()
+            : opts{ AclTuningModeNone, 0 } {};
+        /** Constructor
+         *
+         * @param[in] mode          Tuning mode to be used
+         * @param[in] compute_units Number of scheduling units to be used
+         */
+        Options(TuningMode mode, int32_t compute_units)
+            : opts{ detail::as_cenum<AclTuningMode>(mode), compute_units }
+        {
+        }
+
+        AclQueueOptions opts;
+    };
+
+public:
+    /** Constructor
+     *
+     * @note Serves as a simpler delegate constructor
+     * @note As queue options, default conservative options will be used
+     *
+     * @param[in]  ctx    Context to create queue for
+     * @param[out] status Status information if requested
+     */
+    explicit Queue(Context &ctx, StatusCode *status = nullptr)
+        : Queue(ctx, Options(), status)
+    {
+    }
+    /** Constructor
+     *
+     * @note As queue options, default conservative options will be used
+     *
+     * @param[in]  ctx     Context from where the queue will be created from
+     * @param[in]  options Queue options to be used
+     * @param[out] status  Status information if requested
+     */
+    explicit Queue(Context &ctx, const Options &options = Options(), StatusCode *status = nullptr)
+    {
+        AclQueue   queue;
+        const auto st = detail::as_enum<StatusCode>(AclCreateQueue(&queue, ctx.get(), &options.opts));
+        reset(queue);
+        report_status(st, "[Compute Library] Failed to create queue!");
+        if(status)
+        {
+            *status = st;
+        }
+    }
+    /** Block until all the tasks of the queue have been marked as finished
+     *
+     * @return Status code
+     */
+    StatusCode finish()
+    {
+        return detail::as_enum<StatusCode>(AclQueueFinish(_object.get()));
+    }
+};
+
 /**< Data type enumeration */
 enum class DataType
 {
@@ -519,7 +607,7 @@
         AclTensor  tensor;
         const auto st = detail::as_enum<StatusCode>(AclCreateTensor(&tensor, ctx.get(), desc.get(), allocate));
         reset(tensor);
-        report_status(st, "[Arm Compute Library] Failed to create tensor!");
+        report_status(st, "[Compute Library] Failed to create tensor!");
         if(status)
         {
             *status = st;
@@ -533,7 +621,7 @@
     {
         void      *handle = nullptr;
         const auto st     = detail::as_enum<StatusCode>(AclMapTensor(_object.get(), &handle));
-        report_status(st, "[Arm Compute Library] Failed to map the tensor and extract the tensor's backing memory!");
+        report_status(st, "[Compute Library] Failed to map the tensor and extract the tensor's backing memory!");
         return handle;
     }
     /** Unmaps tensor's memory
@@ -545,7 +633,7 @@
     StatusCode unmap(void *handle)
     {
         const auto st = detail::as_enum<StatusCode>(AclUnmapTensor(_object.get(), handle));
-        report_status(st, "[Arm Compute Library] Failed to unmap the tensor!");
+        report_status(st, "[Compute Library] Failed to unmap the tensor!");
         return st;
     }
     /** Import external memory to a given tensor object
@@ -558,7 +646,7 @@
     StatusCode import(void *handle, ImportType type)
     {
         const auto st = detail::as_enum<StatusCode>(AclTensorImport(_object.get(), handle, detail::as_cenum<AclImportMemoryType>(type)));
-        report_status(st, "[Arm Compute Library] Failed to import external memory to tensor!");
+        report_status(st, "[Compute Library] Failed to import external memory to tensor!");
         return st;
     }
     /** Get the size of the tensor in byte
@@ -571,7 +659,7 @@
     {
         uint64_t   size{ 0 };
         const auto st = detail::as_enum<StatusCode>(AclGetTensorSize(_object.get(), &size));
-        report_status(st, "[Arm Compute Library] Failed to get the size of the tensor");
+        report_status(st, "[Compute Library] Failed to get the size of the tensor");
         return size;
     }
     /** Get the descriptor of this tensor
@@ -582,7 +670,7 @@
     {
         AclTensorDescriptor desc;
         const auto          st = detail::as_enum<StatusCode>(AclGetTensorDescriptor(_object.get(), &desc));
-        report_status(st, "[Arm Compute Library] Failed to get the descriptor of the tensor");
+        report_status(st, "[Compute Library] Failed to get the descriptor of the tensor");
         return TensorDescriptor(desc);
     }
 };
@@ -623,7 +711,7 @@
         AclTensorPack pack;
         const auto    st = detail::as_enum<StatusCode>(AclCreateTensorPack(&pack, ctx.get()));
         reset(pack);
-        report_status(st, "[Arm Compute Library] Failure during tensor pack creation");
+        report_status(st, "[Compute Library] Failure during tensor pack creation");
         if(status)
         {
             *status = st;
diff --git a/arm_compute/AclEntrypoints.h b/arm_compute/AclEntrypoints.h
index cd97434..cf4a237 100644
--- a/arm_compute/AclEntrypoints.h
+++ b/arm_compute/AclEntrypoints.h
@@ -62,6 +62,49 @@
  */
 AclStatus AclDestroyContext(AclContext ctx);
 
+/** Create an operator queue
+ *
+ * Queue is responsible for any scheduling related activities
+ *
+ * @param[in, out] queue   A valid non-zero queue object is not failures occur
+ * @param[in]      ctx     Context to be used
+ * @param[in]      options Queue options to be used for the operators using the queue
+ *
+ * @return Status code
+ *
+ * Returns:
+ *  - @ref AclSuccess if function was completed successfully
+ *  - @ref AclOutOfMemory if there was a failure allocating memory resources
+ *  - @ref AclUnsupportedTarget if the requested target is unsupported
+ *  - @ref AclInvalidArgument if a given argument is invalid
+ */
+AclStatus AclCreateQueue(AclQueue *queue, AclContext ctx, const AclQueueOptions *options);
+
+/** Wait until all elements on the queue have been completed
+ *
+ * @param[in] queue Queue to wait on completion
+ *
+ * @return Status code
+ *
+ * Returns:
+ *  - @ref AclSuccess if functions was completed successfully
+ *  - @ref AclInvalidArgument if the provided queue is invalid
+ *  - @ref AclRuntimeError on any other runtime related error
+ */
+AclStatus AclQueueFinish(AclQueue queue);
+
+/** Destroy a given queue object
+ *
+ * @param[in] queue A valid context object to destroy
+ *
+ * @return Status code
+ *
+ * Returns:
+ *  - @ref AclSuccess if functions was completed successfully
+ *  - @ref AclInvalidArgument if the provided context is invalid
+ */
+AclStatus AclDestroyQueue(AclQueue queue);
+
 /** Create a Tensor object
  *
  * Tensor is a generalized matrix construct that can represent up to ND dimensionality (where N = 6 for Compute Library)
diff --git a/arm_compute/AclOpenClExt.h b/arm_compute/AclOpenClExt.h
index 15b233c..b9080da 100644
--- a/arm_compute/AclOpenClExt.h
+++ b/arm_compute/AclOpenClExt.h
@@ -43,7 +43,6 @@
 /** Extract the underlying OpenCL context used by a given Compute Library context object
  *
  * @note @ref AclContext should be of an OpenCL backend target
- * @note @ref AclContext refcount should be 0, meaning not used by other objects
  *
  * @param[in]  ctx            A valid non-zero context
  * @param[out] opencl_context Underlying OpenCL context used
@@ -52,7 +51,18 @@
  */
 AclStatus AclGetClContext(AclContext ctx, cl_context *opencl_context);
 
-/** Set the underlying OpenCL context used by a given Compute Library context object
+/** Extract the underlying OpenCL device id used by a given Compute Library context object
+ *
+ * @note @ref AclContext should be of an OpenCL backend target
+ *
+ * @param[in]  ctx           A valid non-zero context
+ * @param[out] opencl_device Underlying OpenCL device used
+ *
+ * @return Status code
+ */
+AclStatus AclGetClDevice(AclContext ctx, cl_device_id *opencl_device);
+
+/** Set the underlying OpenCL context to be used by a given Compute Library context object
  *
  * @note @ref AclContext should be of an OpenCL backend target
  *
@@ -63,6 +73,30 @@
  */
 AclStatus AclSetClContext(AclContext ctx, cl_context opencl_context);
 
+/** Extract the underlying OpenCL queue used by a given Compute Library queue object
+ *
+ * @note @ref AclQueue should be of an OpenCL backend target
+ * @note @ref AclQueue refcount should be 0, meaning not used by other objects
+ *
+ * @param[in]  queue        A valid non-zero queue
+ * @param[out] opencl_queue Underlying OpenCL queue used
+ *
+ * @return Status code
+ */
+AclStatus AclGetClQueue(AclQueue queue, cl_command_queue *opencl_queue);
+
+/** Set the underlying OpenCL queue to be used by a given Compute Library queue object
+ *
+ * @note @ref AclQueue should be of an OpenCL backend target
+ * @note opecl_queue needs to be created from the same context that the AclContext that the queue will use
+ *
+ * @param[in]  queue        A valid non-zero queue object
+ * @param[out] opencl_queue Underlying OpenCL queue to be used
+ *
+ * @return Status code
+ */
+AclStatus AclSetClQueue(AclQueue queue, cl_command_queue opencl_queue);
+
 /** Extract the underlying OpenCL memory object by a given Compute Library tensor object
  *
  * @param[in]  tensor     A valid non-zero tensor
diff --git a/arm_compute/AclTypes.h b/arm_compute/AclTypes.h
index 69717ec..902a508 100644
--- a/arm_compute/AclTypes.h
+++ b/arm_compute/AclTypes.h
@@ -33,6 +33,8 @@
 
 /**< Opaque Context object */
 typedef struct AclContext_ *AclContext;
+/**< Opaque Queue object */
+typedef struct AclQueue_ *AclQueue;
 /**< Opaque Tensor object */
 typedef struct AclTensor_ *AclTensor;
 /**< Opaque Tensor pack object */
@@ -138,6 +140,22 @@
     AclAllocator         *allocator;          /**< Allocator to be used by all the memory internally */
 } AclContextOptions;
 
+/**< Supported tuning modes */
+typedef enum
+{
+    AclTuningModeNone = 0, /**< No tuning */
+    AclRapid          = 1, /**< Fast tuning mode, testing a small portion of the tuning space */
+    AclNormal         = 2, /**< Normal tuning mode, gives a good balance between tuning mode and performance */
+    AclExhaustive     = 3, /**< Exhaustive tuning mode, increased tuning time but with best results */
+} AclTuningMode;
+
+/**< Queue options */
+typedef struct
+{
+    AclTuningMode mode;          /**< Tuning mode */
+    int32_t       compute_units; /**< Compute Units that the queue will deploy */
+} AclQueueOptions;
+
 /**< Supported data types */
 typedef enum AclDataType
 {
diff --git a/arm_compute/core/CL/OpenCL.h b/arm_compute/core/CL/OpenCL.h
index 1e6b04c..bbe469f 100644
--- a/arm_compute/core/CL/OpenCL.h
+++ b/arm_compute/core/CL/OpenCL.h
@@ -92,6 +92,7 @@
     DECLARE_FUNCTION_PTR(clCreateContext);
     DECLARE_FUNCTION_PTR(clCreateContextFromType);
     DECLARE_FUNCTION_PTR(clCreateCommandQueue);
+    DECLARE_FUNCTION_PTR(clCreateCommandQueueWithProperties);
     DECLARE_FUNCTION_PTR(clGetContextInfo);
     DECLARE_FUNCTION_PTR(clBuildProgram);
     DECLARE_FUNCTION_PTR(clEnqueueNDRangeKernel);
diff --git a/docs/01_library.dox b/docs/01_library.dox
index 722a07f..25535d1 100644
--- a/docs/01_library.dox
+++ b/docs/01_library.dox
@@ -508,7 +508,27 @@
 
 But, when the @ref CLTuner is disabled ( Target = 1 for the graph examples), the @ref graph::Graph will try to reload the file containing the tuning parameters, then for each executed kernel the Compute Library will use the fine tuned LWS if it was present in the file or use a default LWS value if it's not.
 
-@section S4_10_weights_manager Weights Manager
+@section S4_10_cl_queue_prioritites OpenCL Queue Priorities
+
+OpenCL 2.1 exposes the `cl_khr_priority_hints` extensions that if supported by an underlying implementation allows the user to specify priority hints to the created command queues.
+Is important to note that this does not specify guarantees or the explicit scheduling behavior, this is something that each implementation needs to expose.
+
+In some cases, priority queues can be used when there is an implicit internal priority between graphics and compute queues and thus allow some level of priority control between them.
+At the moment three priority level can be specified:
+- CL_QUEUE_PRIORITY_HIGH_KHR
+- CL_QUEUE_PRIORITY_MED_KHR
+- CL_QUEUE_PRIORITY_LOW_KHR
+
+Compute Library allows extraction of the internal OpenCL queue or the ability to inject directly a user-defined queue to the @ref CLScheduler.
+This way the user can utilize this extension to define priorities between the queues and setup the OpenCL scheduler mechanism to utilize them.
+
+@code{.cpp}
+cl_queue_properties queue_properties[] = {CL_QUEUE_PRIORITY_KHR, CL_QUEUE_PRIORITY_HIGH_KHR, 0};
+cl_command_queue priority_queue = clCreateCommandQueueWithProperties(ctx, dev, queue_properties, &error);
+CLScheduler::get().set_queue(::cl::CommandQueue(priority_queue));
+@endcode
+
+@section S4_11_weights_manager Weights Manager
 
 @ref IWeightsManager is a weights managing interface that can be used to reduce the memory requirements of a given pipeline by reusing transformed weights across multiple function executions.
 @ref IWeightsManager is responsible for managing weight tensors alongside with their transformations.
diff --git a/src/c/AclQueue.cpp b/src/c/AclQueue.cpp
new file mode 100644
index 0000000..020c6ed
--- /dev/null
+++ b/src/c/AclQueue.cpp
@@ -0,0 +1,99 @@
+/*
+ * Copyright (c) 2021 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/AclEntrypoints.h"
+
+#include "src/common/IQueue.h"
+#include "src/common/utils/Macros.h"
+#include "src/common/utils/Validate.h"
+
+namespace
+{
+/** Check if queue options are valid
+ *
+ * @param[in] options Queue options
+ *
+ * @return true in case of success else false
+ */
+bool is_mode_valid(const AclQueueOptions *options)
+{
+    ARM_COMPUTE_ASSERT_NOT_NULLPTR(options);
+    return arm_compute::utils::is_in(options->mode, { AclTuningModeNone, AclRapid, AclNormal, AclExhaustive });
+}
+} // namespace
+
+extern "C" AclStatus AclCreateQueue(AclQueue *external_queue, AclContext external_ctx, const AclQueueOptions *options)
+{
+    using namespace arm_compute;
+
+    auto ctx = get_internal(external_ctx);
+
+    StatusCode status = detail::validate_internal_context(ctx);
+    ARM_COMPUTE_RETURN_CENUM_ON_FAILURE(status);
+
+    if(options != nullptr && !is_mode_valid(options))
+    {
+        ARM_COMPUTE_LOG_ERROR_ACL("Queue options are invalid");
+        return AclInvalidArgument;
+    }
+
+    auto queue = ctx->create_queue(options);
+    if(queue == nullptr)
+    {
+        ARM_COMPUTE_LOG_ERROR_ACL("Couldn't allocate internal resources");
+        return AclOutOfMemory;
+    }
+
+    *external_queue = queue;
+
+    return AclSuccess;
+}
+
+extern "C" AclStatus AclQueueFinish(AclQueue external_queue)
+{
+    using namespace arm_compute;
+
+    auto queue = get_internal(external_queue);
+
+    StatusCode status = detail::validate_internal_queue(queue);
+    ARM_COMPUTE_RETURN_CENUM_ON_FAILURE(status);
+
+    status = queue->finish();
+    ARM_COMPUTE_RETURN_CENUM_ON_FAILURE(status);
+
+    return AclSuccess;
+}
+
+extern "C" AclStatus AclDestroyQueue(AclQueue external_queue)
+{
+    using namespace arm_compute;
+
+    auto queue = get_internal(external_queue);
+
+    StatusCode status = detail::validate_internal_queue(queue);
+    ARM_COMPUTE_RETURN_CENUM_ON_FAILURE(status);
+
+    delete queue;
+
+    return AclSuccess;
+}
diff --git a/src/c/cl/AclOpenClExt.cpp b/src/c/cl/AclOpenClExt.cpp
index ce6d296..e72babc 100644
--- a/src/c/cl/AclOpenClExt.cpp
+++ b/src/c/cl/AclOpenClExt.cpp
@@ -26,6 +26,7 @@
 #include "src/common/ITensorV2.h"
 #include "src/common/Types.h"
 #include "src/gpu/cl/ClContext.h"
+#include "src/gpu/cl/ClQueue.h"
 
 #include "arm_compute/core/CL/ICLTensor.h"
 
@@ -85,6 +86,80 @@
     return AclStatus::AclSuccess;
 }
 
+extern "C" AclStatus AclGetClDevice(AclContext external_ctx, cl_device_id *opencl_device)
+{
+    using namespace arm_compute;
+    IContext *ctx = get_internal(external_ctx);
+
+    if(detail::validate_internal_context(ctx) != StatusCode::Success)
+    {
+        return AclStatus::AclInvalidArgument;
+    }
+
+    if(ctx->type() != Target::GpuOcl)
+    {
+        return AclStatus::AclInvalidTarget;
+    }
+
+    if(opencl_device == nullptr)
+    {
+        return AclStatus::AclInvalidArgument;
+    }
+
+    *opencl_device = utils::cast::polymorphic_downcast<arm_compute::gpu::opencl::ClContext *>(ctx)->cl_dev().get();
+
+    return AclStatus::AclSuccess;
+}
+
+extern "C" AclStatus AclGetClQueue(AclQueue external_queue, cl_command_queue *opencl_queue)
+{
+    using namespace arm_compute;
+    IQueue *queue = get_internal(external_queue);
+
+    if(detail::validate_internal_queue(queue) != StatusCode::Success)
+    {
+        return AclStatus::AclInvalidArgument;
+    }
+
+    if(queue->header.ctx->type() != Target::GpuOcl)
+    {
+        return AclStatus::AclInvalidTarget;
+    }
+
+    if(opencl_queue == nullptr)
+    {
+        return AclStatus::AclInvalidArgument;
+    }
+
+    *opencl_queue = utils::cast::polymorphic_downcast<arm_compute::gpu::opencl::ClQueue *>(queue)->cl_queue().get();
+
+    return AclStatus::AclSuccess;
+}
+
+extern "C" AclStatus AclSetClQueue(AclQueue external_queue, cl_command_queue opencl_queue)
+{
+    using namespace arm_compute;
+    IQueue *queue = get_internal(external_queue);
+
+    if(detail::validate_internal_queue(queue) != StatusCode::Success)
+    {
+        return AclStatus::AclInvalidArgument;
+    }
+
+    if(queue->header.ctx->type() != Target::GpuOcl)
+    {
+        return AclStatus::AclInvalidTarget;
+    }
+
+    auto cl_queue = utils::cast::polymorphic_downcast<arm_compute::gpu::opencl::ClQueue *>(queue);
+    if(!cl_queue->set_cl_queue(::cl::CommandQueue(opencl_queue)))
+    {
+        return AclStatus::AclRuntimeError;
+    }
+
+    return AclStatus::AclSuccess;
+}
+
 extern "C" AclStatus AclGetClMem(AclTensor external_tensor, cl_mem *opencl_mem)
 {
     using namespace arm_compute;
diff --git a/src/common/IContext.h b/src/common/IContext.h
index ee23479..31f39da 100644
--- a/src/common/IContext.h
+++ b/src/common/IContext.h
@@ -43,6 +43,7 @@
 {
 // Forward declarations
 class ITensorV2;
+class IQueue;
 
 /**< Context interface */
 class IContext : public AclContext_
@@ -52,11 +53,13 @@
         : AclContext_(), _target(target), _refcount(0)
     {
     }
+
     /** Virtual Destructor */
     virtual ~IContext()
     {
         header.type = detail::ObjectType::Invalid;
     };
+
     /** Target type accessor
      *
      * @return Target that the context is associated with
@@ -65,16 +68,19 @@
     {
         return _target;
     }
+
     /** Increment context refcount */
     void inc_ref() const
     {
         ++_refcount;
     }
+
     /** Decrement context refcount */
     void dec_ref() const
     {
         --_refcount;
     }
+
     /** Reference counter accessor
      *
      * @return The number of references pointing to this object
@@ -83,6 +89,7 @@
     {
         return _refcount;
     }
+
     /** Checks if an object is valid
      *
      * @return True if sucessful otherwise false
@@ -91,6 +98,7 @@
     {
         return header.type == detail::ObjectType::Context;
     }
+
     /** Create a tensor object
      *
      * @param[in] desc     Descriptor to use
@@ -100,6 +108,14 @@
      */
     virtual ITensorV2 *create_tensor(const AclTensorDescriptor &desc, bool allocate) = 0;
 
+    /** Create a queue object
+     *
+     * @param[in] options Queue options to be used
+     *
+     * @return A pointer to the created queue object
+     */
+    virtual IQueue *create_queue(const AclQueueOptions *options) = 0;
+
 private:
     Target                   _target;   /**< Target type of context */
     mutable std::atomic<int> _refcount; /**< Reference counter */
diff --git a/src/common/IQueue.h b/src/common/IQueue.h
new file mode 100644
index 0000000..6a0cbc7
--- /dev/null
+++ b/src/common/IQueue.h
@@ -0,0 +1,100 @@
+/*
+ * Copyright (c) 2021 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef SRC_COMMON_IQUEUE_H_
+#define SRC_COMMON_IQUEUE_H_
+
+#include "src/common/IContext.h"
+
+struct AclQueue_
+{
+    arm_compute::detail::Header header{ arm_compute::detail::ObjectType::Queue, nullptr };
+
+protected:
+    AclQueue_()  = default;
+    ~AclQueue_() = default;
+};
+
+namespace arm_compute
+{
+/** Base class specifying the queue interface */
+class IQueue : public AclQueue_
+{
+public:
+    /** Explict Operator Constructor
+     *
+     * @param[in] ctx Context to be used by the operator
+     */
+    explicit IQueue(IContext *ctx)
+    {
+        this->header.ctx = ctx;
+        this->header.ctx->inc_ref();
+    }
+    /** Destructor */
+    virtual ~IQueue()
+    {
+        this->header.ctx->dec_ref();
+        this->header.type = detail::ObjectType::Invalid;
+    };
+    /** Checks if a queue is valid
+     *
+     * @return True if successful otherwise false
+     */
+    bool is_valid() const
+    {
+        return this->header.type == detail::ObjectType::Queue;
+    };
+    virtual StatusCode finish() = 0;
+};
+
+/** Extract internal representation of a Queue
+ *
+ * @param[in] queue Opaque queue pointer
+ *
+ * @return The internal representation as an IQueue
+ */
+inline IQueue *get_internal(AclQueue queue)
+{
+    return static_cast<IQueue *>(queue);
+}
+
+namespace detail
+{
+/** Check if an internal queue is valid
+ *
+ * @param[in] queue Internal queue to check
+ *
+ * @return A status code
+ */
+inline StatusCode validate_internal_queue(const IQueue *queue)
+{
+    if(queue == nullptr || !queue->is_valid())
+    {
+        ARM_COMPUTE_LOG_ERROR_ACL("[IQueue]: Invalid queue object");
+        return StatusCode::InvalidArgument;
+    }
+    return StatusCode::Success;
+}
+} // namespace detail
+} // namespace arm_compute
+#endif /* SRC_COMMON_IQUEUE_H_ */
diff --git a/src/core/CL/OpenCL.cpp b/src/core/CL/OpenCL.cpp
index a7be534..d8c2736 100644
--- a/src/core/CL/OpenCL.cpp
+++ b/src/core/CL/OpenCL.cpp
@@ -91,6 +91,7 @@
     LOAD_FUNCTION_PTR(clCreateContext, handle);
     LOAD_FUNCTION_PTR(clCreateContextFromType, handle);
     LOAD_FUNCTION_PTR(clCreateCommandQueue, handle);
+    LOAD_FUNCTION_PTR(clCreateCommandQueueWithProperties, handle);
     LOAD_FUNCTION_PTR(clGetContextInfo, handle);
     LOAD_FUNCTION_PTR(clBuildProgram, handle);
     LOAD_FUNCTION_PTR(clEnqueueNDRangeKernel, handle);
@@ -293,6 +294,23 @@
     }
 }
 
+cl_command_queue clCreateCommandQueueWithProperties(cl_context                 context,
+                                                    cl_device_id               device,
+                                                    const cl_queue_properties *properties,
+                                                    cl_int                    *errcode_ret)
+{
+    arm_compute::CLSymbols::get().load_default();
+    auto func = arm_compute::CLSymbols::get().clCreateCommandQueueWithProperties_ptr;
+    if(func != nullptr)
+    {
+        return func(context, device, properties, errcode_ret);
+    }
+    else
+    {
+        return nullptr;
+    }
+}
+
 cl_context clCreateContext(
     const cl_context_properties *properties,
     cl_uint                      num_devices,
diff --git a/src/core/cpu/kernels/CpuDirectConvolutionStageKernel.cpp b/src/core/cpu/kernels/CpuDirectConvolutionOutputStageKernel.cpp
similarity index 100%
rename from src/core/cpu/kernels/CpuDirectConvolutionStageKernel.cpp
rename to src/core/cpu/kernels/CpuDirectConvolutionOutputStageKernel.cpp
diff --git a/src/core/cpu/kernels/activation/NEON/fp16.cpp b/src/core/cpu/kernels/activation/neon/fp16.cpp
similarity index 100%
rename from src/core/cpu/kernels/activation/NEON/fp16.cpp
rename to src/core/cpu/kernels/activation/neon/fp16.cpp
diff --git a/src/core/cpu/kernels/activation/NEON/fp32.cpp b/src/core/cpu/kernels/activation/neon/fp32.cpp
similarity index 100%
rename from src/core/cpu/kernels/activation/NEON/fp32.cpp
rename to src/core/cpu/kernels/activation/neon/fp32.cpp
diff --git a/src/core/cpu/kernels/activation/NEON/qasymm8.cpp b/src/core/cpu/kernels/activation/neon/qasymm8.cpp
similarity index 100%
rename from src/core/cpu/kernels/activation/NEON/qasymm8.cpp
rename to src/core/cpu/kernels/activation/neon/qasymm8.cpp
diff --git a/src/core/cpu/kernels/activation/NEON/qasymm8_signed.cpp b/src/core/cpu/kernels/activation/neon/qasymm8_signed.cpp
similarity index 100%
rename from src/core/cpu/kernels/activation/NEON/qasymm8_signed.cpp
rename to src/core/cpu/kernels/activation/neon/qasymm8_signed.cpp
diff --git a/src/core/cpu/kernels/activation/NEON/qsymm16.cpp b/src/core/cpu/kernels/activation/neon/qsymm16.cpp
similarity index 100%
rename from src/core/cpu/kernels/activation/NEON/qsymm16.cpp
rename to src/core/cpu/kernels/activation/neon/qsymm16.cpp
diff --git a/src/core/cpu/kernels/activation/SVE/fp16.cpp b/src/core/cpu/kernels/activation/sve/fp16.cpp
similarity index 100%
rename from src/core/cpu/kernels/activation/SVE/fp16.cpp
rename to src/core/cpu/kernels/activation/sve/fp16.cpp
diff --git a/src/core/cpu/kernels/activation/SVE/fp32.cpp b/src/core/cpu/kernels/activation/sve/fp32.cpp
similarity index 100%
rename from src/core/cpu/kernels/activation/SVE/fp32.cpp
rename to src/core/cpu/kernels/activation/sve/fp32.cpp
diff --git a/src/core/cpu/kernels/activation/SVE/qasymm8.cpp b/src/core/cpu/kernels/activation/sve/qasymm8.cpp
similarity index 100%
rename from src/core/cpu/kernels/activation/SVE/qasymm8.cpp
rename to src/core/cpu/kernels/activation/sve/qasymm8.cpp
diff --git a/src/core/cpu/kernels/activation/SVE/qasymm8_signed.cpp b/src/core/cpu/kernels/activation/sve/qasymm8_signed.cpp
similarity index 100%
rename from src/core/cpu/kernels/activation/SVE/qasymm8_signed.cpp
rename to src/core/cpu/kernels/activation/sve/qasymm8_signed.cpp
diff --git a/src/core/cpu/kernels/activation/SVE/qsymm16.cpp b/src/core/cpu/kernels/activation/sve/qsymm16.cpp
similarity index 100%
rename from src/core/cpu/kernels/activation/SVE/qsymm16.cpp
rename to src/core/cpu/kernels/activation/sve/qsymm16.cpp
diff --git a/src/cpu/CpuContext.cpp b/src/cpu/CpuContext.cpp
index d62c1b6..b9a6999 100644
--- a/src/cpu/CpuContext.cpp
+++ b/src/cpu/CpuContext.cpp
@@ -24,6 +24,7 @@
 #include "src/cpu/CpuContext.h"
 
 #include "arm_compute/core/CPP/CPPTypes.h"
+#include "src/cpu/CpuQueue.h"
 #include "src/cpu/CpuTensor.h"
 #include "src/runtime/CPUUtils.h"
 
@@ -196,5 +197,10 @@
     }
     return tensor;
 }
+
+IQueue *CpuContext::create_queue(const AclQueueOptions *options)
+{
+    return new CpuQueue(this, options);
+}
 } // namespace cpu
 } // namespace arm_compute
diff --git a/src/cpu/CpuContext.h b/src/cpu/CpuContext.h
index d2062e4..e909767 100644
--- a/src/cpu/CpuContext.h
+++ b/src/cpu/CpuContext.h
@@ -69,6 +69,7 @@
 
     // Inherrited methods overridden
     ITensorV2 *create_tensor(const AclTensorDescriptor &desc, bool allocate) override;
+    IQueue *create_queue(const AclQueueOptions *options) override;
 
 private:
     AllocatorWrapper _allocator;
diff --git a/src/cpu/CpuQueue.cpp b/src/cpu/CpuQueue.cpp
new file mode 100644
index 0000000..0f0097b
--- /dev/null
+++ b/src/cpu/CpuQueue.cpp
@@ -0,0 +1,48 @@
+/*
+ * Copyright (c) 2021 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "src/cpu/CpuQueue.h"
+
+#include "arm_compute/runtime/Scheduler.h"
+
+namespace arm_compute
+{
+namespace cpu
+{
+CpuQueue::CpuQueue(IContext *ctx, const AclQueueOptions *options)
+    : IQueue(ctx)
+{
+    ARM_COMPUTE_UNUSED(options);
+}
+
+arm_compute::IScheduler &CpuQueue::scheduler()
+{
+    return arm_compute::Scheduler::get();
+}
+
+StatusCode CpuQueue::finish()
+{
+    return StatusCode::Success;
+}
+} // namespace cpu
+} // namespace arm_compute
diff --git a/src/cpu/CpuQueue.h b/src/cpu/CpuQueue.h
new file mode 100644
index 0000000..871a36c
--- /dev/null
+++ b/src/cpu/CpuQueue.h
@@ -0,0 +1,56 @@
+/*
+ * Copyright (c) 2021 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef SRC_CPU_CPUQUEUE_H
+#define SRC_CPU_CPUQUEUE_H
+
+#include "src/common/IQueue.h"
+
+#include "arm_compute/runtime/IScheduler.h"
+
+namespace arm_compute
+{
+namespace cpu
+{
+/** CPU queue implementation class */
+class CpuQueue final : public IQueue
+{
+public:
+    /** Construct a new CpuQueue object
+     *
+     * @param[in] ctx     Context to be used
+     * @param[in] options Command queue options
+     */
+    CpuQueue(IContext *ctx, const AclQueueOptions *options);
+    /** Return legacy scheduler
+     *
+     * @return arm_compute::IScheduler&
+     */
+    arm_compute::IScheduler &scheduler();
+
+    // Inherited functions overridden
+    StatusCode finish() override;
+};
+} // namespace cpu
+} // namespace arm_compute
+#endif /* SRC_CPU_CPUQUEUE_H */
diff --git a/src/gpu/cl/ClContext.cpp b/src/gpu/cl/ClContext.cpp
index 2e04e1d..d8ef18e 100644
--- a/src/gpu/cl/ClContext.cpp
+++ b/src/gpu/cl/ClContext.cpp
@@ -23,8 +23,11 @@
  */
 #include "src/gpu/cl/ClContext.h"
 
+#include "src/gpu/cl/ClQueue.h"
 #include "src/gpu/cl/ClTensor.h"
 
+#include "arm_compute/core/CL/CLKernelLibrary.h"
+
 namespace arm_compute
 {
 namespace gpu
@@ -49,12 +52,15 @@
 ClContext::ClContext(const AclContextOptions *options)
     : IContext(Target::GpuOcl),
       _mlgo_heuristics(),
-      _cl_context()
+      _cl_ctx(),
+      _cl_dev()
 {
     if(options != nullptr)
     {
         _mlgo_heuristics = populate_mlgo(options->kernel_config_file);
     }
+    _cl_ctx = CLKernelLibrary::get().context();
+    _cl_dev = CLKernelLibrary::get().get_device();
 }
 
 const mlgo::MLGOHeuristics &ClContext::mlgo() const
@@ -64,14 +70,20 @@
 
 ::cl::Context ClContext::cl_ctx()
 {
-    return _cl_context;
+    return _cl_ctx;
+}
+
+::cl::Device ClContext::cl_dev()
+{
+    return _cl_dev;
 }
 
 bool ClContext::set_cl_ctx(::cl::Context ctx)
 {
     if(this->refcount() == 0)
     {
-        _cl_context = ctx;
+        _cl_ctx = ctx;
+        CLScheduler::get().set_context(ctx);
         return true;
     }
     return false;
@@ -86,6 +98,11 @@
     }
     return tensor;
 }
+
+IQueue *ClContext::create_queue(const AclQueueOptions *options)
+{
+    return new ClQueue(this, options);
+}
 } // namespace opencl
 } // namespace gpu
 } // namespace arm_compute
diff --git a/src/gpu/cl/ClContext.h b/src/gpu/cl/ClContext.h
index dd6699a..2a0d4ee 100644
--- a/src/gpu/cl/ClContext.h
+++ b/src/gpu/cl/ClContext.h
@@ -44,6 +44,7 @@
      * @param[in] options Creational options
      */
     explicit ClContext(const AclContextOptions *options);
+
     /** Extract MLGO heuristics
      *
      * @return Heuristics tree
@@ -55,6 +56,13 @@
      * @return the cl context used
      */
     ::cl::Context cl_ctx();
+
+    /** Underlying cl device accessor
+     *
+     * @return the cl device used
+     */
+    ::cl::Device cl_dev();
+
     /** Update/inject an underlying cl context object
      *
      * @warning Context will be able to set if the object doesn't have any pending reference to other objects
@@ -67,10 +75,12 @@
 
     // Inherrited methods overridden
     ITensorV2 *create_tensor(const AclTensorDescriptor &desc, bool allocate) override;
+    IQueue *create_queue(const AclQueueOptions *options) override;
 
 private:
     mlgo::MLGOHeuristics _mlgo_heuristics;
-    ::cl::Context        _cl_context;
+    ::cl::Context        _cl_ctx;
+    ::cl::Device         _cl_dev;
 };
 } // namespace opencl
 } // namespace gpu
diff --git a/src/gpu/cl/ClQueue.cpp b/src/gpu/cl/ClQueue.cpp
new file mode 100644
index 0000000..2123adc
--- /dev/null
+++ b/src/gpu/cl/ClQueue.cpp
@@ -0,0 +1,102 @@
+/*
+ * Copyright (c) 2021 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "src/gpu/cl/ClQueue.h"
+
+#include "arm_compute/runtime/CL/CLScheduler.h"
+#include "arm_compute/runtime/CL/CLTuner.h"
+
+namespace arm_compute
+{
+namespace gpu
+{
+namespace opencl
+{
+namespace
+{
+CLTunerMode map_tuner_mode(AclTuningMode mode)
+{
+    switch(mode)
+    {
+        case AclRapid:
+            return CLTunerMode::RAPID;
+            break;
+        case AclNormal:
+            return CLTunerMode::NORMAL;
+            break;
+        case AclExhaustive:
+            return CLTunerMode::EXHAUSTIVE;
+            break;
+        default:
+            ARM_COMPUTE_ERROR("Invalid tuner mode");
+            break;
+    }
+}
+
+std::unique_ptr<CLTuner> populate_tuner(const AclQueueOptions *options)
+{
+    if(options == nullptr || options->mode == AclTuningModeNone)
+    {
+        return nullptr;
+    }
+
+    CLTuningInfo tune_info;
+    tune_info.tuner_mode = map_tuner_mode(options->mode);
+    tune_info.tune_wbsm  = false;
+
+    return std::make_unique<CLTuner>(true /* tune_new_kernels */, tune_info);
+}
+} // namespace
+
+ClQueue::ClQueue(IContext *ctx, const AclQueueOptions *options)
+    : IQueue(ctx), _tuner(nullptr)
+{
+    _tuner = populate_tuner(options);
+}
+
+arm_compute::CLScheduler &ClQueue::scheduler()
+{
+    return arm_compute::CLScheduler::get();
+}
+
+::cl::CommandQueue ClQueue::cl_queue()
+{
+    return arm_compute::CLScheduler::get().queue();
+}
+
+bool ClQueue::set_cl_queue(::cl::CommandQueue queue)
+{
+    // TODO: Check queue is from the same context
+    arm_compute::CLScheduler::get().set_queue(queue);
+    return true;
+}
+
+StatusCode ClQueue::finish()
+{
+    arm_compute::CLScheduler::get().queue().finish();
+    return StatusCode::Success;
+}
+
+} // namespace opencl
+} // namespace gpu
+} // namespace arm_compute
diff --git a/src/gpu/cl/ClQueue.h b/src/gpu/cl/ClQueue.h
new file mode 100644
index 0000000..b16a0f4
--- /dev/null
+++ b/src/gpu/cl/ClQueue.h
@@ -0,0 +1,84 @@
+/*
+ * Copyright (c) 2021 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef SRC_GPU_CLQUEUE_H
+#define SRC_GPU_CLQUEUE_H
+
+#include "src/common/IQueue.h"
+
+#include "arm_compute/runtime/CL/CLScheduler.h"
+
+#include <memory>
+
+namespace arm_compute
+{
+// Forward declarations
+class CLTuner;
+
+namespace gpu
+{
+namespace opencl
+{
+/** OpenCL queue implementation class */
+class ClQueue final : public IQueue
+{
+public:
+    /** Construct a new CpuQueue object
+     *
+     * @param[in] ctx     Context to be used
+     * @param[in] options Command queue options
+     */
+    ClQueue(IContext *ctx, const AclQueueOptions *options);
+
+    /** Return legacy scheduler
+     *
+     * @return arm_compute::IScheduler&
+     */
+    arm_compute::CLScheduler &scheduler();
+
+    /** Underlying cl command queue accessor
+     *
+     * @return the cl command queue used
+     */
+    ::cl::CommandQueue cl_queue();
+
+    /** Update/inject an underlying cl command queue object
+     *
+     * @warning Command queue needs to come from the same context as the AclQueue
+     *
+     * @param[in] queue Underlying cl command queue to be used
+     *
+     * @return true if the queue was set successfully else falseS
+     */
+    bool set_cl_queue(::cl::CommandQueue queue);
+
+    // Inherited functions overridden
+    StatusCode finish() override;
+
+private:
+    std::unique_ptr<CLTuner> _tuner;
+};
+} // namespace opencl
+} // namespace gpu
+} // namespace arm_compute
+#endif /* SRC_GPU_CLQUEUE_H */
diff --git a/tests/framework/Macros.h b/tests/framework/Macros.h
index 23c8266..a6ba137 100644
--- a/tests/framework/Macros.h
+++ b/tests/framework/Macros.h
@@ -224,6 +224,11 @@
 #define DISABLED_FIXTURE_TEST_CASE(TEST_NAME, FIXTURE, MODE) \
     FIXTURE_TEST_CASE_IMPL(TEST_NAME, FIXTURE, MODE, arm_compute::test::framework::TestCaseFactory::Status::DISABLED)
 
+#define EMPTY_BODY_FIXTURE_TEST_CASE(TEST_NAME, FIXTURE, MODE) \
+    FIXTURE_TEST_CASE(TEST_NAME, FIXTURE, MODE)                \
+    {                                                          \
+    }
+
 #define FIXTURE_DATA_TEST_CASE_IMPL(TEST_NAME, FIXTURE, MODE, STATUS, DATASET)                                                      \
     template <typename T>                                                                                                           \
     class TEST_NAME;                                                                                                                \
diff --git a/tests/validation/cpu/unit/Context.cpp b/tests/validation/cpu/unit/Context.cpp
index 519a7be..57ca866 100644
--- a/tests/validation/cpu/unit/Context.cpp
+++ b/tests/validation/cpu/unit/Context.cpp
@@ -21,7 +21,7 @@
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */
-#include "tests/validation/fixtures/UNIT/Context.h"
+#include "tests/validation/fixtures/UNIT/ContextFixture.h"
 
 #include "src/cpu/CpuContext.h"
 
@@ -74,18 +74,10 @@
     ARM_COMPUTE_ASSERT(ctx == nullptr);
 }
 
-FIXTURE_TEST_CASE(DestroyInvalidContext, DestroyInvalidContextFixture<AclTarget::AclCpu>, framework::DatasetMode::ALL)
-{
-}
-FIXTURE_TEST_CASE(SimpleContextCApi, SimpleContextCApiFixture<AclTarget::AclCpu>, framework::DatasetMode::ALL)
-{
-}
-FIXTURE_TEST_CASE(SimpleContextCppApi, SimpleContextCppApiFixture<acl::Target::Cpu>, framework::DatasetMode::ALL)
-{
-}
-FIXTURE_TEST_CASE(MultipleContexts, MultipleContextsFixture<AclTarget::AclCpu>, framework::DatasetMode::ALL)
-{
-}
+EMPTY_BODY_FIXTURE_TEST_CASE(DestroyInvalidContext, DestroyInvalidContextFixture<AclTarget::AclCpu>, framework::DatasetMode::ALL)
+EMPTY_BODY_FIXTURE_TEST_CASE(SimpleContextCApi, SimpleContextCApiFixture<AclTarget::AclCpu>, framework::DatasetMode::ALL)
+EMPTY_BODY_FIXTURE_TEST_CASE(SimpleContextCppApi, SimpleContextCppApiFixture<acl::Target::Cpu>, framework::DatasetMode::ALL)
+EMPTY_BODY_FIXTURE_TEST_CASE(MultipleContexts, MultipleContextsFixture<AclTarget::AclCpu>, framework::DatasetMode::ALL)
 
 /** Test-case for CpuCapabilities
  *
diff --git a/tests/validation/cpu/unit/Queue.cpp b/tests/validation/cpu/unit/Queue.cpp
new file mode 100644
index 0000000..7d977cc
--- /dev/null
+++ b/tests/validation/cpu/unit/Queue.cpp
@@ -0,0 +1,48 @@
+/*
+ * Copyright (c) 2021 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "tests/validation/fixtures/UNIT/QueueFixture.h"
+
+#include "src/cpu/CpuQueue.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+TEST_SUITE(CPU)
+TEST_SUITE(UNIT)
+TEST_SUITE(Queue)
+
+EMPTY_BODY_FIXTURE_TEST_CASE(CreateQueueWithInvalidContext, CreateQueueWithInvalidContextFixture, framework::DatasetMode::ALL)
+EMPTY_BODY_FIXTURE_TEST_CASE(CreateQueuerWithInvalidOptions, CreateQueuerWithInvalidOptionsFixture<acl::Target::Cpu>, framework::DatasetMode::ALL)
+EMPTY_BODY_FIXTURE_TEST_CASE(DestroyInvalidQueue, DestroyInvalidQueueFixture<acl::Target::Cpu>, framework::DatasetMode::ALL)
+EMPTY_BODY_FIXTURE_TEST_CASE(SimpleQueue, SimpleQueueFixture<acl::Target::Cpu>, framework::DatasetMode::ALL)
+
+TEST_SUITE_END() // Queue
+TEST_SUITE_END() // UNIT
+TEST_SUITE_END() // CPU
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/cpu/unit/Tensor.cpp b/tests/validation/cpu/unit/Tensor.cpp
index aa2e3ab..cc0c557 100644
--- a/tests/validation/cpu/unit/Tensor.cpp
+++ b/tests/validation/cpu/unit/Tensor.cpp
@@ -21,7 +21,7 @@
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */
-#include "tests/validation/fixtures/UNIT/Tensor.h"
+#include "tests/validation/fixtures/UNIT/TensorFixture.h"
 
 namespace arm_compute
 {
@@ -33,26 +33,19 @@
 TEST_SUITE(UNIT)
 TEST_SUITE(Tensor)
 
-#define TENSOR_TESE_CASE(name, fixture)                           \
-    FIXTURE_TEST_CASE(name, fixture, framework::DatasetMode::ALL) \
-    {                                                             \
-    }
-
-TENSOR_TESE_CASE(CreateTensorWithInvalidContext, CreateTensorWithInvalidContextFixture)
-TENSOR_TESE_CASE(CreateTensorWithInvalidDescriptor, CreateTensorWithInvalidDescriptorFixture<acl::Target::Cpu>)
-TENSOR_TESE_CASE(DestroyInvalidTensor, DestroyInvalidTensorFixture<acl::Target::Cpu>)
-TENSOR_TESE_CASE(SimpleTensor, SimpleTensorFixture<acl::Target::Cpu>)
-TENSOR_TESE_CASE(TensorStress, TensorStressFixture<acl::Target::Cpu>)
-TENSOR_TESE_CASE(MapInvalidTensor, MapInvalidTensorFixture<acl::Target::Cpu>)
-TENSOR_TESE_CASE(MapNotAllocatedTensor, MapNotAllocatedTensorFixture<acl::Target::Cpu>)
-TENSOR_TESE_CASE(MapAllocatedTensor, MapAllocatedTensorFixture<acl::Target::Cpu>)
-TENSOR_TESE_CASE(ImportMemory, ImportMemoryFixture<acl::Target::Cpu>)
-TENSOR_TESE_CASE(GetSize, TensorSizeFixture<acl::Target::Cpu>)
-TENSOR_TESE_CASE(GetInvalidSize, InvalidTensorSizeFixture<acl::Target::Cpu>)
-TENSOR_TESE_CASE(GetDescriptor, DescriptorConversionFixture<acl::Target::Cpu>)
-TENSOR_TESE_CASE(GetInvalidDescriptor, InvalidDescriptorConversionFixture<acl::Target::Cpu>)
-
-#undef TENSOR_TEST_CASE
+EMPTY_BODY_FIXTURE_TEST_CASE(CreateTensorWithInvalidContext, CreateTensorWithInvalidContextFixture, framework::DatasetMode::ALL)
+EMPTY_BODY_FIXTURE_TEST_CASE(CreateTensorWithInvalidDescriptor, CreateTensorWithInvalidDescriptorFixture<acl::Target::Cpu>, framework::DatasetMode::ALL)
+EMPTY_BODY_FIXTURE_TEST_CASE(DestroyInvalidTensor, DestroyInvalidTensorFixture<acl::Target::Cpu>, framework::DatasetMode::ALL)
+EMPTY_BODY_FIXTURE_TEST_CASE(SimpleTensor, SimpleTensorFixture<acl::Target::Cpu>, framework::DatasetMode::ALL)
+EMPTY_BODY_FIXTURE_TEST_CASE(TensorStress, TensorStressFixture<acl::Target::Cpu>, framework::DatasetMode::ALL)
+EMPTY_BODY_FIXTURE_TEST_CASE(MapInvalidTensor, MapInvalidTensorFixture<acl::Target::Cpu>, framework::DatasetMode::ALL)
+EMPTY_BODY_FIXTURE_TEST_CASE(MapNotAllocatedTensor, MapNotAllocatedTensorFixture<acl::Target::Cpu>, framework::DatasetMode::ALL)
+EMPTY_BODY_FIXTURE_TEST_CASE(MapAllocatedTensor, MapAllocatedTensorFixture<acl::Target::Cpu>, framework::DatasetMode::ALL)
+EMPTY_BODY_FIXTURE_TEST_CASE(ImportMemory, ImportMemoryFixture<acl::Target::Cpu>, framework::DatasetMode::ALL)
+EMPTY_BODY_FIXTURE_TEST_CASE(GetSize, TensorSizeFixture<acl::Target::Cpu>, framework::DatasetMode::ALL)
+EMPTY_BODY_FIXTURE_TEST_CASE(GetInvalidSize, InvalidTensorSizeFixture<acl::Target::Cpu>, framework::DatasetMode::ALL)
+EMPTY_BODY_FIXTURE_TEST_CASE(GetDescriptor, DescriptorConversionFixture<acl::Target::Cpu>, framework::DatasetMode::ALL)
+EMPTY_BODY_FIXTURE_TEST_CASE(GetInvalidDescriptor, InvalidDescriptorConversionFixture<acl::Target::Cpu>, framework::DatasetMode::ALL)
 
 TEST_SUITE_END() // Tensor
 TEST_SUITE_END() // UNIT
diff --git a/tests/validation/cpu/unit/TensorPack.cpp b/tests/validation/cpu/unit/TensorPack.cpp
index 5436ceb..f019e8e 100644
--- a/tests/validation/cpu/unit/TensorPack.cpp
+++ b/tests/validation/cpu/unit/TensorPack.cpp
@@ -21,7 +21,7 @@
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */
-#include "tests/validation/fixtures/UNIT/TensorPack.h"
+#include "tests/validation/fixtures/UNIT/TensorPackFixture.h"
 
 namespace arm_compute
 {
@@ -33,21 +33,11 @@
 TEST_SUITE(UNIT)
 TEST_SUITE(TensorPack)
 
-FIXTURE_TEST_CASE(CreateTensorPackWithInvalidContext, CreateTensorPackWithInvalidContextFixture, framework::DatasetMode::ALL)
-{
-}
-FIXTURE_TEST_CASE(DestroyInvalidTensorPack, DestroyInvalidTensorPackFixture<acl::Target::Cpu>, framework::DatasetMode::ALL)
-{
-}
-FIXTURE_TEST_CASE(AddInvalidObjectToTensorPack, AddInvalidObjectToTensorPackFixture<acl::Target::Cpu>, framework::DatasetMode::ALL)
-{
-}
-FIXTURE_TEST_CASE(SimpleTensorPack, SimpleTensorPackFixture<acl::Target::Cpu>, framework::DatasetMode::ALL)
-{
-}
-FIXTURE_TEST_CASE(MultipleTensorsInPack, MultipleTensorsInPackFixture<acl::Target::Cpu>, framework::DatasetMode::ALL)
-{
-}
+EMPTY_BODY_FIXTURE_TEST_CASE(CreateTensorPackWithInvalidContext, CreateTensorPackWithInvalidContextFixture, framework::DatasetMode::ALL)
+EMPTY_BODY_FIXTURE_TEST_CASE(DestroyInvalidTensorPack, DestroyInvalidTensorPackFixture<acl::Target::Cpu>, framework::DatasetMode::ALL)
+EMPTY_BODY_FIXTURE_TEST_CASE(AddInvalidObjectToTensorPack, AddInvalidObjectToTensorPackFixture<acl::Target::Cpu>, framework::DatasetMode::ALL)
+EMPTY_BODY_FIXTURE_TEST_CASE(SimpleTensorPack, SimpleTensorPackFixture<acl::Target::Cpu>, framework::DatasetMode::ALL)
+EMPTY_BODY_FIXTURE_TEST_CASE(MultipleTensorsInPack, MultipleTensorsInPackFixture<acl::Target::Cpu>, framework::DatasetMode::ALL)
 
 TEST_SUITE_END() // Tensor
 TEST_SUITE_END() // UNIT
diff --git a/tests/validation/fixtures/UNIT/Context.h b/tests/validation/fixtures/UNIT/ContextFixture.h
similarity index 96%
rename from tests/validation/fixtures/UNIT/Context.h
rename to tests/validation/fixtures/UNIT/ContextFixture.h
index afa49e00..77cbc12 100644
--- a/tests/validation/fixtures/UNIT/Context.h
+++ b/tests/validation/fixtures/UNIT/ContextFixture.h
@@ -21,8 +21,8 @@
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */
-#ifndef ARM_COMPUTE_TEST_UNIT_CONTEXT
-#define ARM_COMPUTE_TEST_UNIT_CONTEXT
+#ifndef ARM_COMPUTE_TEST_UNIT_CONTEXT_FIXTURE
+#define ARM_COMPUTE_TEST_UNIT_CONTEXT_FIXTURE
 
 #include "arm_compute/Acl.hpp"
 #include "tests/framework/Asserts.h"
@@ -145,4 +145,4 @@
 } // namespace validation
 } // namespace test
 } // namespace arm_compute
-#endif /* ARM_COMPUTE_TEST_UNIT_CONTEXT */
+#endif /* ARM_COMPUTE_TEST_UNIT_CONTEXT_FIXTURE */
diff --git a/tests/validation/fixtures/UNIT/QueueFixture.h b/tests/validation/fixtures/UNIT/QueueFixture.h
new file mode 100644
index 0000000..bc93f5f
--- /dev/null
+++ b/tests/validation/fixtures/UNIT/QueueFixture.h
@@ -0,0 +1,144 @@
+/*
+ * Copyright (c) 2021 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_TEST_UNIT_QUEUE_FIXTURE
+#define ARM_COMPUTE_TEST_UNIT_QUEUE_FIXTURE
+
+#include "arm_compute/Acl.hpp"
+#include "tests/framework/Asserts.h"
+#include "tests/framework/Fixture.h"
+#include "tests/framework/Macros.h"
+#include "tests/validation/Validation.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+/** Test case for AclCreateQueue
+ *
+ * Validate that AclCreateQueue behaves as expected with invalid context
+ *
+ * Test Steps:
+ *  - Call AclCreateQueue with an invalid context
+ *  - Confirm that AclInvalidArgument is reported
+ *  - Confirm that the queue is still nullptr
+ */
+class CreateQueueWithInvalidContextFixture : public framework::Fixture
+{
+public:
+    void setup()
+    {
+        AclQueue queue = nullptr;
+        ARM_COMPUTE_ASSERT(AclCreateQueue(&queue, nullptr, nullptr) == AclStatus::AclInvalidArgument);
+        ARM_COMPUTE_ASSERT(queue == nullptr);
+    };
+};
+
+/** Test-case for AclCreateQueue
+ *
+ * Validate that AclCreateQueue behaves as expected with invalid options
+ *
+ * Test Steps:
+ *  - Call AclCreateQueue with valid context but invalid options
+ *  - Confirm that AclInvalidArgument is reported
+ *  - Confirm that queue is still nullptr
+ */
+template <acl::Target Target>
+class CreateQueuerWithInvalidOptionsFixture : public framework::Fixture
+{
+public:
+    void setup()
+    {
+        acl::Context ctx(Target);
+
+        // Check invalid tuning mode
+        AclQueueOptions invalid_queue_opts;
+        invalid_queue_opts.mode = static_cast<AclTuningMode>(-1);
+
+        AclQueue queue = nullptr;
+        ARM_COMPUTE_ASSERT(AclCreateQueue(&queue, ctx.get(), &invalid_queue_opts) == AclStatus::AclInvalidArgument);
+        ARM_COMPUTE_ASSERT(queue == nullptr);
+    };
+};
+
+/** Test case for AclDestroyQueue
+*
+* Validate that AclDestroyQueue behaves as expected when an invalid queue is given
+*
+* Test Steps:
+*  - Call AclDestroyQueue with null queue
+*  - Confirm that AclInvalidArgument is reported
+*  - Call AclDestroyQueue on empty array
+*  - Confirm that AclInvalidArgument is reported
+*  - Call AclDestroyQueue on an ACL object other than AclQueue
+*  - Confirm that AclInvalidArgument is reported
+*  - Confirm that queue is still nullptr
+*/
+template <acl::Target Target>
+class DestroyInvalidQueueFixture : public framework::Fixture
+{
+public:
+    void setup()
+    {
+        acl::Context ctx(Target);
+
+        std::array<char, 256> empty_array{};
+        AclQueue queue = nullptr;
+
+        ARM_COMPUTE_ASSERT(AclDestroyQueue(queue) == AclStatus::AclInvalidArgument);
+        ARM_COMPUTE_ASSERT(AclDestroyQueue(reinterpret_cast<AclQueue>(ctx.get())) == AclStatus::AclInvalidArgument);
+        ARM_COMPUTE_ASSERT(AclDestroyQueue(reinterpret_cast<AclQueue>(empty_array.data())) == AclStatus::AclInvalidArgument);
+        ARM_COMPUTE_ASSERT(queue == nullptr);
+    };
+};
+
+/** Test case for AclCreateQueue
+ *
+ * Validate that a queue can be created successfully
+ *
+ * Test Steps:
+ *  - Create a valid context
+ *  - Create a valid queue
+ *  - Confirm that AclSuccess is returned
+ */
+template <acl::Target Target>
+class SimpleQueueFixture : public framework::Fixture
+{
+public:
+    void setup()
+    {
+        acl::StatusCode err = acl::StatusCode::Success;
+
+        acl::Context ctx(Target, &err);
+        ARM_COMPUTE_ASSERT(err == acl::StatusCode::Success);
+
+        acl::Queue queue(ctx, &err);
+        ARM_COMPUTE_ASSERT(err == acl::StatusCode::Success);
+    };
+};
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
+#endif /* ARM_COMPUTE_TEST_UNIT_QUEUE_FIXTURE */
diff --git a/tests/validation/fixtures/UNIT/Tensor.h b/tests/validation/fixtures/UNIT/TensorFixture.h
similarity index 98%
rename from tests/validation/fixtures/UNIT/Tensor.h
rename to tests/validation/fixtures/UNIT/TensorFixture.h
index 32260cb..bfe115b 100644
--- a/tests/validation/fixtures/UNIT/Tensor.h
+++ b/tests/validation/fixtures/UNIT/TensorFixture.h
@@ -21,8 +21,8 @@
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */
-#ifndef ARM_COMPUTE_TEST_UNIT_TENSOR
-#define ARM_COMPUTE_TEST_UNIT_TENSOR
+#ifndef ARM_COMPUTE_TEST_UNIT_TENSOR_FIXTURE
+#define ARM_COMPUTE_TEST_UNIT_TENSOR_FIXTURE
 
 #include "arm_compute/Acl.hpp"
 #include "tests/framework/Asserts.h"
@@ -149,7 +149,7 @@
 /** Test case for AclTensor
  *
  * Validate that multiple tensors can be created successfully
- * Possibly stress the possibility of memory leaks
+ * Stress the possibility of memory leaks
  *
  * Test Steps:
  *  - Create a valid context
@@ -421,4 +421,4 @@
 } // namespace validation
 } // namespace test
 } // namespace arm_compute
-#endif /* ARM_COMPUTE_TEST_UNIT_TENSOR */
+#endif /* ARM_COMPUTE_TEST_UNIT_TENSOR_FIXTURE */
diff --git a/tests/validation/fixtures/UNIT/TensorPack.h b/tests/validation/fixtures/UNIT/TensorPackFixture.h
similarity index 97%
rename from tests/validation/fixtures/UNIT/TensorPack.h
rename to tests/validation/fixtures/UNIT/TensorPackFixture.h
index 98bffb1..bc14631 100644
--- a/tests/validation/fixtures/UNIT/TensorPack.h
+++ b/tests/validation/fixtures/UNIT/TensorPackFixture.h
@@ -21,8 +21,8 @@
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */
-#ifndef ARM_COMPUTE_TEST_UNIT_TENSORPACK
-#define ARM_COMPUTE_TEST_UNIT_TENSORPACK
+#ifndef ARM_COMPUTE_TEST_UNIT_TENSORPACK_FIXTURE
+#define ARM_COMPUTE_TEST_UNIT_TENSORPACK_FIXTURE
 
 #include "arm_compute/Acl.hpp"
 #include "tests/framework/Asserts.h"
@@ -181,4 +181,4 @@
 } // namespace validation
 } // namespace test
 } // namespace arm_compute
-#endif /* ARM_COMPUTE_TEST_UNIT_TENSORPACK */
+#endif /* ARM_COMPUTE_TEST_UNIT_TENSORPACK_FIXTURE */
diff --git a/tests/validation/gpu/unit/Context.cpp b/tests/validation/gpu/unit/Context.cpp
index 523a028..598e219 100644
--- a/tests/validation/gpu/unit/Context.cpp
+++ b/tests/validation/gpu/unit/Context.cpp
@@ -21,7 +21,7 @@
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */
-#include "tests/validation/fixtures/UNIT/Context.h"
+#include "tests/validation/fixtures/UNIT/ContextFixture.h"
 
 #include "src/gpu/cl/ClContext.h"
 
@@ -37,15 +37,9 @@
 TEST_SUITE(UNIT)
 TEST_SUITE(Context)
 
-FIXTURE_TEST_CASE(SimpleContextCApi, SimpleContextCApiFixture<AclTarget::AclGpuOcl>, framework::DatasetMode::ALL)
-{
-}
-FIXTURE_TEST_CASE(SimpleContextCppApi, SimpleContextCppApiFixture<acl::Target::GpuOcl>, framework::DatasetMode::ALL)
-{
-}
-FIXTURE_TEST_CASE(MultipleContexts, MultipleContextsFixture<AclTarget::AclGpuOcl>, framework::DatasetMode::ALL)
-{
-}
+EMPTY_BODY_FIXTURE_TEST_CASE(SimpleContextCApi, SimpleContextCApiFixture<AclTarget::AclGpuOcl>, framework::DatasetMode::ALL)
+EMPTY_BODY_FIXTURE_TEST_CASE(SimpleContextCppApi, SimpleContextCppApiFixture<acl::Target::GpuOcl>, framework::DatasetMode::ALL)
+EMPTY_BODY_FIXTURE_TEST_CASE(MultipleContexts, MultipleContextsFixture<AclTarget::AclGpuOcl>, framework::DatasetMode::ALL)
 
 /** Test-case for MLGO kernel configuration file
  *
diff --git a/tests/validation/gpu/unit/Queue.cpp b/tests/validation/gpu/unit/Queue.cpp
new file mode 100644
index 0000000..8154a79
--- /dev/null
+++ b/tests/validation/gpu/unit/Queue.cpp
@@ -0,0 +1,81 @@
+/*
+ * Copyright (c) 2021 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "tests/validation/fixtures/UNIT/QueueFixture.h"
+
+#include "arm_compute/AclOpenClExt.h"
+#include "src/gpu/cl/ClQueue.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+TEST_SUITE(CL)
+TEST_SUITE(UNIT)
+TEST_SUITE(Queue)
+
+EMPTY_BODY_FIXTURE_TEST_CASE(CreateQueueWithInvalidContext, CreateQueueWithInvalidContextFixture, framework::DatasetMode::ALL)
+EMPTY_BODY_FIXTURE_TEST_CASE(CreateQueuerWithInvalidOptions, CreateQueuerWithInvalidOptionsFixture<acl::Target::GpuOcl>, framework::DatasetMode::ALL)
+EMPTY_BODY_FIXTURE_TEST_CASE(DestroyInvalidQueue, DestroyInvalidQueueFixture<acl::Target::GpuOcl>, framework::DatasetMode::ALL)
+EMPTY_BODY_FIXTURE_TEST_CASE(SimpleQueue, SimpleQueueFixture<acl::Target::GpuOcl>, framework::DatasetMode::ALL)
+
+TEST_CASE(KhrQueuePriorities, framework::DatasetMode::ALL)
+{
+    acl::StatusCode err = acl::StatusCode::Success;
+
+    acl::Context ctx(acl::Target::GpuOcl, &err);
+    ARM_COMPUTE_ASSERT(err == acl::StatusCode::Success);
+
+    acl::Queue queue(ctx, &err);
+    ARM_COMPUTE_ASSERT(err == acl::StatusCode::Success);
+
+    cl_device_id cl_dev;
+    auto         status = AclGetClDevice(ctx.get(), &cl_dev);
+    ARM_COMPUTE_ASSERT(status == AclSuccess);
+
+    std::string extensions = cl::Device(cl_dev).getInfo<CL_DEVICE_EXTENSIONS>();
+    if(extensions.find("cl_khr_priority_hints") != std::string::npos)
+    {
+        cl_int error = CL_SUCCESS;
+
+        cl_context cl_ctx;
+        auto       status = AclGetClContext(ctx.get(), &cl_ctx);
+        ARM_COMPUTE_ASSERT(status == AclSuccess);
+
+        /* Check a queue with high priority */
+        cl_queue_properties queue_properties[] = { CL_QUEUE_PRIORITY_KHR, CL_QUEUE_PRIORITY_HIGH_KHR, 0 };
+        cl_command_queue    priority_queue     = clCreateCommandQueueWithProperties(cl_ctx, cl_dev, queue_properties, &error);
+        ARM_COMPUTE_ASSERT(error == CL_SUCCESS);
+
+        clReleaseCommandQueue(priority_queue);
+    }
+}
+
+TEST_SUITE_END() // Queue
+TEST_SUITE_END() // UNIT
+TEST_SUITE_END() // CL
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/gpu/unit/Tensor.cpp b/tests/validation/gpu/unit/Tensor.cpp
index b40d626..1810273 100644
--- a/tests/validation/gpu/unit/Tensor.cpp
+++ b/tests/validation/gpu/unit/Tensor.cpp
@@ -21,7 +21,7 @@
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */
-#include "tests/validation/fixtures/UNIT/Tensor.h"
+#include "tests/validation/fixtures/UNIT/TensorFixture.h"
 
 namespace arm_compute
 {
@@ -33,24 +33,17 @@
 TEST_SUITE(UNIT)
 TEST_SUITE(Tensor)
 
-#define TENSOR_TESE_CASE(name, fixture)                           \
-    FIXTURE_TEST_CASE(name, fixture, framework::DatasetMode::ALL) \
-    {                                                             \
-    }
-
-TENSOR_TESE_CASE(CreateTensorWithInvalidContext, CreateTensorWithInvalidContextFixture)
-TENSOR_TESE_CASE(CreateTensorWithInvalidDescriptor, CreateTensorWithInvalidDescriptorFixture<acl::Target::GpuOcl>)
-TENSOR_TESE_CASE(DestroyInvalidTensor, DestroyInvalidTensorFixture<acl::Target::GpuOcl>)
-TENSOR_TESE_CASE(SimpleTensor, SimpleTensorFixture<acl::Target::GpuOcl>)
-TENSOR_TESE_CASE(TensorStress, TensorStressFixture<acl::Target::GpuOcl>)
-TENSOR_TESE_CASE(MapInvalidTensor, MapInvalidTensorFixture<acl::Target::GpuOcl>)
-TENSOR_TESE_CASE(MapAllocatedTensor, MapAllocatedTensorFixture<acl::Target::GpuOcl>)
-TENSOR_TESE_CASE(GetSize, TensorSizeFixture<acl::Target::GpuOcl>)
-TENSOR_TESE_CASE(GetInvalidSize, InvalidTensorSizeFixture<acl::Target::GpuOcl>)
-TENSOR_TESE_CASE(GetDescriptor, DescriptorConversionFixture<acl::Target::GpuOcl>)
-TENSOR_TESE_CASE(GetInvalidDescriptor, InvalidDescriptorConversionFixture<acl::Target::GpuOcl>)
-
-#undef TENSOR_TEST_CASE
+EMPTY_BODY_FIXTURE_TEST_CASE(CreateTensorWithInvalidContext, CreateTensorWithInvalidContextFixture, framework::DatasetMode::ALL)
+EMPTY_BODY_FIXTURE_TEST_CASE(CreateTensorWithInvalidDescriptor, CreateTensorWithInvalidDescriptorFixture<acl::Target::GpuOcl>, framework::DatasetMode::ALL)
+EMPTY_BODY_FIXTURE_TEST_CASE(DestroyInvalidTensor, DestroyInvalidTensorFixture<acl::Target::GpuOcl>, framework::DatasetMode::ALL)
+EMPTY_BODY_FIXTURE_TEST_CASE(SimpleTensor, SimpleTensorFixture<acl::Target::GpuOcl>, framework::DatasetMode::ALL)
+EMPTY_BODY_FIXTURE_TEST_CASE(TensorStress, TensorStressFixture<acl::Target::GpuOcl>, framework::DatasetMode::ALL)
+EMPTY_BODY_FIXTURE_TEST_CASE(MapInvalidTensor, MapInvalidTensorFixture<acl::Target::GpuOcl>, framework::DatasetMode::ALL)
+EMPTY_BODY_FIXTURE_TEST_CASE(MapAllocatedTensor, MapAllocatedTensorFixture<acl::Target::GpuOcl>, framework::DatasetMode::ALL)
+EMPTY_BODY_FIXTURE_TEST_CASE(GetSize, TensorSizeFixture<acl::Target::GpuOcl>, framework::DatasetMode::ALL)
+EMPTY_BODY_FIXTURE_TEST_CASE(GetInvalidSize, InvalidTensorSizeFixture<acl::Target::GpuOcl>, framework::DatasetMode::ALL)
+EMPTY_BODY_FIXTURE_TEST_CASE(GetDescriptor, DescriptorConversionFixture<acl::Target::GpuOcl>, framework::DatasetMode::ALL)
+EMPTY_BODY_FIXTURE_TEST_CASE(GetInvalidDescriptor, InvalidDescriptorConversionFixture<acl::Target::GpuOcl>, framework::DatasetMode::ALL)
 
 TEST_SUITE_END() // Tensor
 TEST_SUITE_END() // UNIT
diff --git a/tests/validation/gpu/unit/TensorPack.cpp b/tests/validation/gpu/unit/TensorPack.cpp
index b057db4..b62426d 100644
--- a/tests/validation/gpu/unit/TensorPack.cpp
+++ b/tests/validation/gpu/unit/TensorPack.cpp
@@ -21,7 +21,7 @@
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */
-#include "tests/validation/fixtures/UNIT/TensorPack.h"
+#include "tests/validation/fixtures/UNIT/TensorPackFixture.h"
 
 namespace arm_compute
 {
@@ -33,21 +33,11 @@
 TEST_SUITE(UNIT)
 TEST_SUITE(TensorPack)
 
-FIXTURE_TEST_CASE(CreateTensorPackWithInvalidContext, CreateTensorPackWithInvalidContextFixture, framework::DatasetMode::ALL)
-{
-}
-FIXTURE_TEST_CASE(DestroyInvalidTensorPack, DestroyInvalidTensorPackFixture<acl::Target::GpuOcl>, framework::DatasetMode::ALL)
-{
-}
-FIXTURE_TEST_CASE(AddInvalidObjectToTensorPack, AddInvalidObjectToTensorPackFixture<acl::Target::GpuOcl>, framework::DatasetMode::ALL)
-{
-}
-FIXTURE_TEST_CASE(SimpleTensorPack, SimpleTensorPackFixture<acl::Target::GpuOcl>, framework::DatasetMode::ALL)
-{
-}
-FIXTURE_TEST_CASE(MultipleTensorsInPack, MultipleTensorsInPackFixture<acl::Target::GpuOcl>, framework::DatasetMode::ALL)
-{
-}
+EMPTY_BODY_FIXTURE_TEST_CASE(CreateTensorPackWithInvalidContext, CreateTensorPackWithInvalidContextFixture, framework::DatasetMode::ALL)
+EMPTY_BODY_FIXTURE_TEST_CASE(DestroyInvalidTensorPack, DestroyInvalidTensorPackFixture<acl::Target::GpuOcl>, framework::DatasetMode::ALL)
+EMPTY_BODY_FIXTURE_TEST_CASE(AddInvalidObjectToTensorPack, AddInvalidObjectToTensorPackFixture<acl::Target::GpuOcl>, framework::DatasetMode::ALL)
+EMPTY_BODY_FIXTURE_TEST_CASE(SimpleTensorPack, SimpleTensorPackFixture<acl::Target::GpuOcl>, framework::DatasetMode::ALL)
+EMPTY_BODY_FIXTURE_TEST_CASE(MultipleTensorsInPack, MultipleTensorsInPackFixture<acl::Target::GpuOcl>, framework::DatasetMode::ALL)
 
 TEST_SUITE_END() // Tensor
 TEST_SUITE_END() // UNIT