Add CL command buffer class

* Two implementations of the command buffer are added:
  - CLMutableCommandBuffer uses mutable dispatch command buffer
    extension.
  - CLCompatCommandBuffer is the compatibility class for platform
    without the CL extension.

Resolves: COMPMID-6454
Signed-off-by: Viet-Hoa Do <viet-hoa.do@arm.com>
Change-Id: I15b370a50168ca940bd8fb2b5fae26230da3f472
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/10298
Reviewed-by: Gunes Bayir <gunes.bayir@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Benchmark: Arm Jenkins <bsgcomp@arm.com>
Tested-by: Arm Jenkins <bsgcomp@arm.com>
diff --git a/Android.bp b/Android.bp
index a81bf87..1603739 100644
--- a/Android.bp
+++ b/Android.bp
@@ -213,9 +213,12 @@
         "src/core/AccessWindowAutoPadding.cpp",
         "src/core/AccessWindowStatic.cpp",
         "src/core/AccessWindowTranspose.cpp",
+        "src/core/CL/CLCommandBuffer.cpp",
+        "src/core/CL/CLCompatCommandBuffer.cpp",
         "src/core/CL/CLCompileContext.cpp",
         "src/core/CL/CLHelpers.cpp",
         "src/core/CL/CLKernelLibrary.cpp",
+        "src/core/CL/CLMutableCommandBuffer.cpp",
         "src/core/CL/CLUtils.cpp",
         "src/core/CL/DefaultLWSHeuristics.cpp",
         "src/core/CL/ICLKernel.cpp",
diff --git a/arm_compute/core/CL/OpenCL.h b/arm_compute/core/CL/OpenCL.h
index 1e1f529..f86d55a 100644
--- a/arm_compute/core/CL/OpenCL.h
+++ b/arm_compute/core/CL/OpenCL.h
@@ -141,6 +141,16 @@
     DECLARE_FUNCTION_PTR(clCreateImage);
     DECLARE_FUNCTION_PTR(clSetKernelExecInfo);
 
+    // Command buffer and mutable dispatch command buffer extensions
+    DECLARE_FUNCTION_PTR(clCreateCommandBufferKHR);
+    DECLARE_FUNCTION_PTR(clRetainCommandBufferKHR);
+    DECLARE_FUNCTION_PTR(clReleaseCommandBufferKHR);
+    DECLARE_FUNCTION_PTR(clFinalizeCommandBufferKHR);
+    DECLARE_FUNCTION_PTR(clEnqueueCommandBufferKHR);
+    DECLARE_FUNCTION_PTR(clCommandNDRangeKernelKHR);
+
+    DECLARE_FUNCTION_PTR(clUpdateMutableCommandsKHR);
+
     // Third-party extensions
     DECLARE_FUNCTION_PTR(clImportMemoryARM);
 
diff --git a/docs/user_guide/release_version_and_change_log.dox b/docs/user_guide/release_version_and_change_log.dox
index 05a18c0..5d8ca2b 100644
--- a/docs/user_guide/release_version_and_change_log.dox
+++ b/docs/user_guide/release_version_and_change_log.dox
@@ -47,6 +47,7 @@
    - Add support for output data type S64 in NEArgMinMaxLayer and CLArgMinMaxLayer
    - Port the following kernels in the experimental Dynamic Fusion interface to use the new Compute Kernel Writer interface:
      - @ref experimental::dynamic_fusion::GpuCkwResize
+   - Add support for OpenCL™ comand buffer with mutable dispatch extension.
  - Update OpenCL™ API headers to v2023.04.17.
  - Remove legacy PostOps interface. PostOps was the experimental interface for kernel fusion and is replaced by the new Dynamic Fusion interface.
  - Performance optimizations:
diff --git a/filelist.json b/filelist.json
index 23ee9ca..e4627f8 100644
--- a/filelist.json
+++ b/filelist.json
@@ -118,7 +118,10 @@
   ],
   "gpu": {
     "common": [
+      "src/core/CL/CLCommandBuffer.cpp",
+      "src/core/CL/CLCompatCommandBuffer.cpp",
       "src/core/CL/CLCompileContext.cpp",
+      "src/core/CL/CLMutableCommandBuffer.cpp",
       "src/core/CL/DefaultLWSHeuristics.cpp",
       "src/core/CL/CLHelpers.cpp",
       "src/core/CL/CLKernelLibrary.cpp",
diff --git a/src/core/CL/CLCommandBuffer.cpp b/src/core/CL/CLCommandBuffer.cpp
new file mode 100644
index 0000000..7fcfdf2
--- /dev/null
+++ b/src/core/CL/CLCommandBuffer.cpp
@@ -0,0 +1,66 @@
+/*
+ * Copyright (c) 2023 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "src/core/CL/CLCommandBuffer.h"
+
+#include "arm_compute/core/CL/CLHelpers.h"
+#include "arm_compute/core/CL/CLKernelLibrary.h"
+
+#include "src/core/CL/CLCompatCommandBuffer.h"
+#include "src/core/CL/CLMutableCommandBuffer.h"
+
+namespace arm_compute
+{
+
+std::unique_ptr<CLCommandBuffer> CLCommandBuffer::create(cl_command_queue queue)
+{
+    const auto &cl_device            = CLKernelLibrary::get().get_device();
+    const auto  has_mutable_dispatch = command_buffer_mutable_dispatch_supported(cl_device);
+
+    if(has_mutable_dispatch)
+    {
+        return std::make_unique<CLMutableCommandBuffer>(queue);
+    }
+    else
+    {
+        return std::make_unique<CLCompatCommandBuffer>(queue);
+    }
+}
+
+CLCommandBuffer::CLCommandBuffer()  = default;
+CLCommandBuffer::~CLCommandBuffer() = default;
+
+CLCommandBuffer::State CLCommandBuffer::state() const
+{
+    return _state;
+}
+
+CLCommandBuffer &CLCommandBuffer::state(CLCommandBuffer::State state)
+{
+    _state = state;
+
+    return *this;
+}
+
+} // namespace arm_compute
diff --git a/src/core/CL/CLCommandBuffer.h b/src/core/CL/CLCommandBuffer.h
new file mode 100644
index 0000000..8a94e38
--- /dev/null
+++ b/src/core/CL/CLCommandBuffer.h
@@ -0,0 +1,162 @@
+/*
+ * Copyright (c) 2023 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef ACL_SRC_CORE_CL_CLCOMMANDBUFFER_H
+#define ACL_SRC_CORE_CL_CLCOMMANDBUFFER_H
+
+#include "arm_compute/core/CL/OpenCL.h"
+
+#include <cstdint>
+#include <memory>
+#include <type_traits>
+
+namespace arm_compute
+{
+
+/** Command buffer contains a list of commands that is constructed once and later enqueued multiple times.
+ *
+ * To prepare a command buffer:
+ *   - Construct a new command buffer targeting a command queue using @ref CLCommandBuffer::create.
+ *   - Add kernel enqueue command to the buffer using @ref CLCommandBuffer::add_kernel.
+ *     The kernel must be ready to be enqueued with all the arguments set.
+ *   - Specify which kernel argument is mutable after the command buffer has been finalized.
+ *   - When all the kernel enqueue commands have been added, call @ref CLCommandBuffer::finalize.
+ *     After this point the command buffer is ready to be executed.
+ *
+ * To execute the command buffer:
+ *   - Make any changes in the value which the mutable arguments are pointing to.
+ *   - Call @ref CLCommandBuffer::update to apply the argument value changes.
+ *   - Call @ref CLCommandBuffer::enqueue to enqueue the command buffer to execute.
+ */
+class CLCommandBuffer
+{
+public:
+    /** Create a new command buffer targeting the specified command queue.
+     *
+     * @param[in] queue The command queue to execute the command buffer.
+     *
+     * @return A unique pointer to the newly created command buffer.
+     */
+    static std::unique_ptr<CLCommandBuffer> create(cl_command_queue queue);
+
+    /** Constructor. */
+    CLCommandBuffer();
+
+    /** Destructor. */
+    virtual ~CLCommandBuffer();
+
+    /** Disallow copy constructor. */
+    CLCommandBuffer(const CLCommandBuffer &) = delete;
+
+    /** Disallow copy assignment. */
+    CLCommandBuffer &operator=(const CLCommandBuffer &) = delete;
+
+    /** Disallow move constructor. */
+    CLCommandBuffer(CLCommandBuffer &&other) = delete;
+
+    /** Disallow move assignment. */
+    CLCommandBuffer &operator=(CLCommandBuffer &&other) = delete;
+
+    /** Add a kernel enqueue command to the command queue.
+     *
+     * This function must be called before the command buffer has been finalized.
+     *
+     * @param[in] kernel The CL kernel.
+     * @param[in] offset The global work offset.
+     * @param[in] global The global work size.
+     * @param[in] local  The local work size.
+     */
+    virtual void add_kernel(cl_kernel kernel, const cl::NDRange &offset, const cl::NDRange &global, const cl::NDRange &local) = 0;
+
+    /** Add the mutable argument to the current kernel enqueue command.
+     *
+     * This function must be called after @ref CLCommandBuffer::add_kernel but before the command buffer
+     * has been finalized.
+     *
+     * The pointer must be valid and it must point to the correct value at the time
+     * @ref CLCommandBuffer::update is called so that the value of the argument
+     * can be applied successfully to the kernel enqueue command.
+     *
+     * @param[in] arg_idx The index of the argument in the current kernel program.
+     * @param[in] value   The pointer to the value of the argument.
+     */
+    template <typename T, typename = std::enable_if_t<std::is_arithmetic<T>::value || std::is_pointer<T>::value>>
+    void add_mutable_argument(cl_uint arg_idx, const T *value)
+    {
+        add_mutable_argument_generic(arg_idx, value, sizeof(T));
+    }
+
+    /** Finalize the command buffer. */
+    virtual void finalize() = 0;
+
+    /** Update the command buffer with new kernel argument values.
+     *
+     * This function must be called after the command buffer has been finalized.
+     *
+     * All the value pointed by the mutable argument will be applied to the command buffer.
+     */
+    virtual void update() = 0;
+
+    /** Enqueue the command buffer.
+     *
+     * This function must be called after the command buffer has been finalized.
+     */
+    virtual void enqueue() = 0;
+
+    /** Check if the command buffer has been finalized.
+     *
+     * @return true if the command buffer has been finalized.
+     */
+    virtual bool is_finalized() const = 0;
+
+protected:
+    /** Add the mutable argument to the current kernel enqueue command.
+     *
+     * @see CLCommandBuffer::add_mutable_argument for more information.
+     */
+    virtual void add_mutable_argument_generic(cl_uint arg_idx, const void *value, size_t size) = 0;
+
+    /** The state of the command buffer. */
+    enum class State : int32_t
+    {
+        /** The command buffer has been created and is being specified. */
+        Created,
+
+        /** The command buffer has been finalized and is ready to be executed. */
+        Finalized,
+    };
+
+    /** Get the state of the command buffer. */
+    State state() const;
+
+    /** Set the state of the command buffer. */
+    CLCommandBuffer &state(State state);
+
+private:
+    State _state{ State::Created };
+};
+
+} // namespace arm_compute
+
+#endif // ACL_SRC_CORE_CL_CLCOMMANDBUFFER_H
diff --git a/src/core/CL/CLCompatCommandBuffer.cpp b/src/core/CL/CLCompatCommandBuffer.cpp
new file mode 100644
index 0000000..f1a902c
--- /dev/null
+++ b/src/core/CL/CLCompatCommandBuffer.cpp
@@ -0,0 +1,112 @@
+/*
+ * Copyright (c) 2023 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "src/core/CL/CLCompatCommandBuffer.h"
+
+#include "arm_compute/core/Error.h"
+
+#include "src/core/CL/CLUtils.h"
+
+namespace arm_compute
+{
+
+CLCompatCommandBuffer::CLCompatCommandBuffer(cl_command_queue queue)
+    : _queue(queue)
+{
+}
+
+CLCompatCommandBuffer::~CLCompatCommandBuffer()
+{
+}
+
+void CLCompatCommandBuffer::add_kernel(cl_kernel kernel, const cl::NDRange &offset, const cl::NDRange &global, const cl::NDRange &local)
+{
+    ARM_COMPUTE_ERROR_ON(state() != State::Created);
+
+    _kernel_cmds.push_back(KernelCommand{ kernel, offset, global, local, {} });
+}
+
+void CLCompatCommandBuffer::add_mutable_argument_generic(cl_uint arg_idx, const void *value, size_t size)
+{
+    ARM_COMPUTE_ERROR_ON(state() != State::Created);
+    ARM_COMPUTE_ERROR_ON(_kernel_cmds.empty());
+
+    _kernel_cmds.back().mutable_args.push_back(cl_mutable_dispatch_arg_khr{ arg_idx, size, value });
+}
+
+void CLCompatCommandBuffer::finalize()
+{
+    ARM_COMPUTE_ERROR_ON(state() != State::Created);
+
+    _kernel_cmds.shrink_to_fit();
+
+    for(auto &cmd : _kernel_cmds)
+    {
+        cmd.mutable_args.shrink_to_fit();
+    }
+
+    state(State::Finalized);
+}
+
+void CLCompatCommandBuffer::update()
+{
+    ARM_COMPUTE_ERROR_ON(state() != State::Finalized);
+
+    // Nothing to do here - The kernel arguments will be updated when each command is enqueued.
+}
+
+void CLCompatCommandBuffer::enqueue()
+{
+    ARM_COMPUTE_ERROR_ON(state() != State::Finalized);
+
+    for(const auto &cmd : _kernel_cmds)
+    {
+        for(const auto &arg : cmd.mutable_args)
+        {
+            const auto error = clSetKernelArg(cmd.kernel, arg.arg_index, arg.arg_size, arg.arg_value);
+
+            handle_cl_error("clSetKernelArg", error);
+        }
+
+        const auto error = clEnqueueNDRangeKernel(
+            _queue,
+            cmd.kernel,
+            static_cast<cl_uint>(cmd.global.dimensions()),
+            cmd.offset.dimensions() != 0 ? cmd.offset.get() : nullptr,
+            cmd.global.get(),
+            cmd.local.dimensions() != 0 ? cmd.local.get() : nullptr,
+            0,
+            nullptr,
+            nullptr);
+
+        handle_cl_error("clEnqueueNDRangeKernel", error);
+    }
+}
+
+bool CLCompatCommandBuffer::is_finalized() const
+{
+    return state() == State::Finalized;
+}
+
+} // namespace arm_compute
diff --git a/src/core/CL/CLCompatCommandBuffer.h b/src/core/CL/CLCompatCommandBuffer.h
new file mode 100644
index 0000000..e91d52d
--- /dev/null
+++ b/src/core/CL/CLCompatCommandBuffer.h
@@ -0,0 +1,91 @@
+/*
+ * Copyright (c) 2023 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef ACL_SRC_CORE_CL_CLCOMPATCOMMANDBUFFER_H
+#define ACL_SRC_CORE_CL_CLCOMPATCOMMANDBUFFER_H
+
+#include "src/core/CL/CLCommandBuffer.h"
+
+#include <vector>
+
+namespace arm_compute
+{
+
+/** Command buffer implementation for platform without mutable dispatch command buffer extension. */
+class CLCompatCommandBuffer final : public CLCommandBuffer
+{
+public:
+    /** Create a new command buffer targeting the specified command queue.
+     *
+     * @param[in] queue The command queue to execute the command buffer.
+     */
+    CLCompatCommandBuffer(cl_command_queue queue);
+
+    /** Destructor. */
+    virtual ~CLCompatCommandBuffer();
+
+    /** Disallow copy constructor. */
+    CLCompatCommandBuffer(const CLCompatCommandBuffer &) = delete;
+
+    /** Disallow copy assignment. */
+    CLCompatCommandBuffer &operator=(const CLCompatCommandBuffer &) = delete;
+
+    /** Disallow move constructor. */
+    CLCompatCommandBuffer(CLCompatCommandBuffer &&) = delete;
+
+    /** Disallow move assignment. */
+    CLCompatCommandBuffer &operator=(CLCompatCommandBuffer &&) = delete;
+
+    void add_kernel(cl_kernel kernel, const cl::NDRange &offset, const cl::NDRange &global, const cl::NDRange &local) override;
+
+    void finalize() override;
+
+    void update() override;
+
+    void enqueue() override;
+
+    bool is_finalized() const override;
+
+protected:
+    void add_mutable_argument_generic(cl_uint arg_idx, const void *value, size_t size) override;
+
+private:
+    struct KernelCommand
+    {
+        cl_kernel   kernel;
+        cl::NDRange offset;
+        cl::NDRange global;
+        cl::NDRange local;
+
+        std::vector<cl_mutable_dispatch_arg_khr> mutable_args;
+    };
+
+private:
+    cl_command_queue           _queue{};
+    std::vector<KernelCommand> _kernel_cmds{};
+};
+
+} // namespace arm_compute
+
+#endif // ACL_SRC_CORE_CL_CLCOMPATCOMMANDBUFFER_H
diff --git a/src/core/CL/CLMutableCommandBuffer.cpp b/src/core/CL/CLMutableCommandBuffer.cpp
new file mode 100644
index 0000000..b9c59ac
--- /dev/null
+++ b/src/core/CL/CLMutableCommandBuffer.cpp
@@ -0,0 +1,162 @@
+/*
+ * Copyright (c) 2023 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "src/core/CL/CLMutableCommandBuffer.h"
+
+#include "arm_compute/core/Error.h"
+
+#include "src/core/CL/CLUtils.h"
+
+namespace arm_compute
+{
+
+CLMutableCommandBuffer::CLMutableCommandBuffer(cl_command_queue queue)
+    : CLCommandBuffer()
+{
+    cl_int status = CL_SUCCESS;
+
+    cl_command_buffer_properties_khr properties[] = {
+        CL_COMMAND_BUFFER_FLAGS_KHR,
+        CL_COMMAND_BUFFER_MUTABLE_KHR,
+        0,
+    };
+
+    _cb = clCreateCommandBufferKHR(1, &queue, properties, &status);
+    handle_cl_error("clCreateCommandBufferKHR", status);
+}
+
+CLMutableCommandBuffer::~CLMutableCommandBuffer()
+{
+    const auto status = clReleaseCommandBufferKHR(_cb);
+    handle_cl_error("clReleaseCommandBufferKHR", status);
+}
+
+void CLMutableCommandBuffer::add_kernel(cl_kernel kernel, const cl::NDRange &offset, const cl::NDRange &global, const cl::NDRange &local)
+{
+    ARM_COMPUTE_ERROR_ON(state() != State::Created);
+
+    cl_mutable_command_khr mutable_handle = nullptr;
+
+    cl_ndrange_kernel_command_properties_khr properties[] = {
+        CL_MUTABLE_DISPATCH_UPDATABLE_FIELDS_KHR,
+        CL_MUTABLE_DISPATCH_ARGUMENTS_KHR,
+        0,
+    };
+
+    const auto error = clCommandNDRangeKernelKHR(
+        _cb,
+        nullptr,
+        properties,
+        kernel,
+        global.dimensions(),
+        offset.dimensions() != 0 ? offset.get() : nullptr,
+        global.get(),
+        local.dimensions() != 0 ? local.get() : nullptr,
+        0,
+        nullptr,
+        nullptr,
+        &mutable_handle);
+
+    handle_cl_error("clCommandNDRangeKernelKHR", error);
+
+    cl_mutable_dispatch_config_khr mut_dispatch_cfg{};
+    mut_dispatch_cfg.type    = CL_STRUCTURE_TYPE_MUTABLE_DISPATCH_CONFIG_KHR;
+    mut_dispatch_cfg.command = mutable_handle;
+
+    _mut_dispatch_cfgs.emplace_back(mut_dispatch_cfg);
+}
+
+void CLMutableCommandBuffer::add_mutable_argument_generic(cl_uint arg_idx, const void *value, size_t size)
+{
+    ARM_COMPUTE_ERROR_ON(state() != State::Created);
+
+    cl_mutable_dispatch_arg_khr cfg{};
+    cfg.arg_index = arg_idx;
+    cfg.arg_size  = size;
+    cfg.arg_value = value;
+
+    _mut_arg_cfgs.emplace_back(cfg);
+    ++_mut_dispatch_cfgs.back().num_args;
+}
+
+void CLMutableCommandBuffer::finalize()
+{
+    ARM_COMPUTE_ERROR_ON(state() != State::Created);
+
+    const auto error = clFinalizeCommandBufferKHR(_cb);
+    handle_cl_error("clFinalizeCommandBufferKHR", error);
+
+    state(State::Finalized);
+
+    _mut_dispatch_cfgs.shrink_to_fit();
+    _mut_arg_cfgs.shrink_to_fit();
+
+    size_t arg_no = 0;
+
+    for(auto &mut_dispatch_cfg : _mut_dispatch_cfgs)
+    {
+        ARM_COMPUTE_ERROR_ON(arg_no >= _mut_arg_cfgs.size());
+        mut_dispatch_cfg.arg_list = &_mut_arg_cfgs[arg_no];
+
+        arg_no += mut_dispatch_cfg.num_args;
+    }
+
+    _mut_cfg.type                  = CL_STRUCTURE_TYPE_MUTABLE_BASE_CONFIG_KHR;
+    _mut_cfg.next                  = nullptr;
+    _mut_cfg.num_mutable_dispatch  = _mut_dispatch_cfgs.size();
+    _mut_cfg.mutable_dispatch_list = &_mut_dispatch_cfgs[0];
+}
+
+void CLMutableCommandBuffer::update()
+{
+    ARM_COMPUTE_ERROR_ON(state() != State::Finalized);
+
+    const auto error = clUpdateMutableCommandsKHR(
+        _cb,
+        &_mut_cfg);
+
+    handle_cl_error("clUpdateMutableCommandsKHR", error);
+}
+
+void CLMutableCommandBuffer::enqueue()
+{
+    ARM_COMPUTE_ERROR_ON(state() != State::Finalized);
+
+    const auto error = clEnqueueCommandBufferKHR(
+        0,
+        nullptr,
+        _cb,
+        0,
+        nullptr,
+        nullptr);
+
+    handle_cl_error("clEnqueueCommandBufferKHR", error);
+}
+
+bool CLMutableCommandBuffer::is_finalized() const
+{
+    return state() == State::Finalized;
+}
+
+} // namespace arm_compute
diff --git a/src/core/CL/CLMutableCommandBuffer.h b/src/core/CL/CLMutableCommandBuffer.h
new file mode 100644
index 0000000..04e94b0
--- /dev/null
+++ b/src/core/CL/CLMutableCommandBuffer.h
@@ -0,0 +1,82 @@
+/*
+ * Copyright (c) 2023 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef ACL_SRC_CORE_CL_CLMUTABLECOMMANDBUFFER_H
+#define ACL_SRC_CORE_CL_CLMUTABLECOMMANDBUFFER_H
+
+#include "src/core/CL/CLCommandBuffer.h"
+
+#include <vector>
+
+namespace arm_compute
+{
+
+/** Command buffer implementaton based on CL mutable dispatch command buffer extension. */
+class CLMutableCommandBuffer : public CLCommandBuffer
+{
+public:
+    /** Create a new mutable dispatch command buffer targeting the specified command queue.
+     *
+     * @param[in] queue The command queue to execute the command buffer.
+     */
+    CLMutableCommandBuffer(cl_command_queue queue);
+
+    /** Destructor. */
+    virtual ~CLMutableCommandBuffer();
+
+    /** Disallow copy constructor. */
+    CLMutableCommandBuffer(const CLMutableCommandBuffer &) = delete;
+
+    /** Disallow copy assignment. */
+    CLMutableCommandBuffer &operator=(const CLMutableCommandBuffer &) = delete;
+
+    /** Disallow move constructor. */
+    CLMutableCommandBuffer(CLMutableCommandBuffer &&) = delete;
+
+    /** Disallow move assignment. */
+    CLMutableCommandBuffer &operator=(CLMutableCommandBuffer &&) = delete;
+
+    void add_kernel(cl_kernel kernel, const cl::NDRange &offset, const cl::NDRange &global, const cl::NDRange &local) override;
+
+    void finalize() override;
+
+    void update() override;
+
+    void enqueue() override;
+
+    bool is_finalized() const override;
+
+protected:
+    void add_mutable_argument_generic(cl_uint arg_idx, const void *value, size_t size) override;
+
+private:
+    cl_command_buffer_khr                       _cb{};
+    cl_mutable_base_config_khr                  _mut_cfg{};
+    std::vector<cl_mutable_dispatch_config_khr> _mut_dispatch_cfgs{};
+    std::vector<cl_mutable_dispatch_arg_khr>    _mut_arg_cfgs{};
+};
+
+} // namespace arm_compute
+
+#endif // ACL_SRC_CORE_CL_CLMUTABLECOMMANDBUFFER_H
diff --git a/src/core/CL/CLUtils.cpp b/src/core/CL/CLUtils.cpp
index 7e56a3b..289300b 100644
--- a/src/core/CL/CLUtils.cpp
+++ b/src/core/CL/CLUtils.cpp
@@ -111,4 +111,14 @@
 
     return cl::Image2D(cl_image);
 }
+
+void handle_cl_error(const std::string &function_name, cl_int error_code)
+{
+    if(error_code != CL_SUCCESS)
+    {
+        std::string error_message = function_name + " - Error code: " + std::to_string(error_code);
+        ARM_COMPUTE_ERROR(error_message.c_str());
+    }
+}
+
 } // namespace arm_compute
diff --git a/src/core/CL/CLUtils.h b/src/core/CL/CLUtils.h
index f0e79bc..de9c1b3 100644
--- a/src/core/CL/CLUtils.h
+++ b/src/core/CL/CLUtils.h
@@ -73,6 +73,14 @@
  * @return cl::Image2D object
  */
 cl::Image2D create_image2d_from_buffer(const cl::Context &ctx, const cl::Buffer &buffer, const TensorShape &shape2d, DataType data_type, size_t image_row_pitch, CLImage2DType image_type);
+
+/** Check for CL error code and throw exception accordingly.
+ *
+ * @param[in] function_name The name of the CL function being called.
+ * @param[in] error_code    The error returned by the CL function.
+ */
+void handle_cl_error(const std::string &function_name, cl_int error_code);
+
 } // namespace arm_compute
 
 #endif // ACL_SRC_CORE_CL_CLUTILS_H
diff --git a/src/core/CL/OpenCL.cpp b/src/core/CL/OpenCL.cpp
index 8aa9b2b..b092dfb 100644
--- a/src/core/CL/OpenCL.cpp
+++ b/src/core/CL/OpenCL.cpp
@@ -187,6 +187,16 @@
     LOAD_FUNCTION_PTR(clCreateImage, handle);
     LOAD_FUNCTION_PTR(clSetKernelExecInfo, handle);
 
+    // Command buffer and mutable dispatch command buffer extensions
+    LOAD_FUNCTION_PTR(clCreateCommandBufferKHR, handle);
+    LOAD_FUNCTION_PTR(clRetainCommandBufferKHR, handle);
+    LOAD_FUNCTION_PTR(clReleaseCommandBufferKHR, handle);
+    LOAD_FUNCTION_PTR(clFinalizeCommandBufferKHR, handle);
+    LOAD_FUNCTION_PTR(clEnqueueCommandBufferKHR, handle);
+    LOAD_FUNCTION_PTR(clCommandNDRangeKernelKHR, handle);
+
+    LOAD_FUNCTION_PTR(clUpdateMutableCommandsKHR, handle);
+
     // Third-party extensions
     LOAD_FUNCTION_PTR(clImportMemoryARM, handle);
 
@@ -1083,6 +1093,141 @@
     }
 }
 
+cl_command_buffer_khr clCreateCommandBufferKHR(
+    cl_uint num_queues,
+    const cl_command_queue* queues,
+    const cl_command_buffer_properties_khr* properties,
+    cl_int* errcode_ret)
+{
+    arm_compute::CLSymbols::get().load_default();
+    const auto func = arm_compute::CLSymbols::get().clCreateCommandBufferKHR_ptr;
+
+    if(func != nullptr)
+    {
+        return func(num_queues, queues, properties, errcode_ret);
+    }
+    else
+    {
+        if(errcode_ret != nullptr)
+        {
+            *errcode_ret = CL_INVALID_OPERATION;
+        }
+
+        return {};
+    }
+}
+
+cl_int clFinalizeCommandBufferKHR(cl_command_buffer_khr command_buffer)
+{
+    arm_compute::CLSymbols::get().load_default();
+    const auto func = arm_compute::CLSymbols::get().clFinalizeCommandBufferKHR_ptr;
+
+    if(func != nullptr)
+    {
+        return func(command_buffer);
+    }
+    else
+    {
+        return CL_INVALID_OPERATION;
+    }
+}
+
+cl_int clRetainCommandBufferKHR(cl_command_buffer_khr command_buffer)
+{
+    arm_compute::CLSymbols::get().load_default();
+    const auto func = arm_compute::CLSymbols::get().clRetainCommandBufferKHR_ptr;
+
+    if(func != nullptr)
+    {
+        return func(command_buffer);
+    }
+    else
+    {
+        return CL_INVALID_OPERATION;
+    }
+}
+
+cl_int clReleaseCommandBufferKHR(cl_command_buffer_khr command_buffer)
+{
+    arm_compute::CLSymbols::get().load_default();
+    const auto func = arm_compute::CLSymbols::get().clReleaseCommandBufferKHR_ptr;
+
+    if(func != nullptr)
+    {
+        return func(command_buffer);
+    }
+    else
+    {
+        return CL_INVALID_OPERATION;
+    }
+}
+
+cl_int clEnqueueCommandBufferKHR(
+    cl_uint num_queues,
+    cl_command_queue* queues,
+    cl_command_buffer_khr command_buffer,
+    cl_uint num_events_in_wait_list,
+    const cl_event* event_wait_list,
+    cl_event* event)
+{
+    arm_compute::CLSymbols::get().load_default();
+    const auto func = arm_compute::CLSymbols::get().clEnqueueCommandBufferKHR_ptr;
+
+    if(func != nullptr)
+    {
+        return func(num_queues, queues, command_buffer, num_events_in_wait_list, event_wait_list, event);
+    }
+    else
+    {
+        return CL_INVALID_OPERATION;
+    }
+}
+
+
+cl_int clCommandNDRangeKernelKHR(
+    cl_command_buffer_khr command_buffer,
+    cl_command_queue command_queue,
+    const cl_ndrange_kernel_command_properties_khr* properties,
+    cl_kernel kernel,
+    cl_uint work_dim,
+    const size_t* global_work_offset,
+    const size_t* global_work_size,
+    const size_t* local_work_size,
+    cl_uint num_sync_points_in_wait_list,
+    const cl_sync_point_khr* sync_point_wait_list,
+    cl_sync_point_khr* sync_point,
+    cl_mutable_command_khr* mutable_handle)
+{
+    arm_compute::CLSymbols::get().load_default();
+    const auto func = arm_compute::CLSymbols::get().clCommandNDRangeKernelKHR_ptr;
+
+    if(func != nullptr)
+    {
+        return func(command_buffer, command_queue, properties, kernel, work_dim, global_work_offset, global_work_size, local_work_size, num_sync_points_in_wait_list, sync_point_wait_list, sync_point, mutable_handle);
+    }
+    else
+    {
+        return CL_INVALID_OPERATION;
+    }
+}
+
+cl_int clUpdateMutableCommandsKHR(
+    cl_command_buffer_khr command_buffer,
+    const cl_mutable_base_config_khr* mutable_config)
+{
+    arm_compute::CLSymbols::get().load_default();
+    const auto func = arm_compute::CLSymbols::get().clUpdateMutableCommandsKHR_ptr;
+
+    if(func != nullptr)
+    {
+        return func(command_buffer, mutable_config);
+    }
+    else
+    {
+        return CL_INVALID_OPERATION;
+    }
+}
+
 cl_mem
 clImportMemoryARM(cl_context                      context,
                   cl_mem_flags                    flags,