COMPMID-3391: Implement Async interfaces

Change-Id: I8168cea5056ff48a0253ebb8c88ea549a3ea69a2
Signed-off-by: Michalis Spyrou <michalis.spyrou@arm.com>
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/3335
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com>
diff --git a/arm_compute/core/CPP/ICPPKernel.h b/arm_compute/core/CPP/ICPPKernel.h
index ec05af2..21f6ab7 100644
--- a/arm_compute/core/CPP/ICPPKernel.h
+++ b/arm_compute/core/CPP/ICPPKernel.h
@@ -26,10 +26,13 @@
 
 #include "arm_compute/core/CPP/CPPTypes.h"
 #include "arm_compute/core/IKernel.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/core/experimental/Types.h"
 
 namespace arm_compute
 {
 class Window;
+class ITensor;
 
 /** Common interface for all kernels implemented in C++ */
 class ICPPKernel : public IKernel
@@ -51,8 +54,7 @@
      */
     virtual void run(const Window &window, const ThreadInfo &info)
     {
-        ARM_COMPUTE_UNUSED(window);
-        ARM_COMPUTE_UNUSED(info);
+        ARM_COMPUTE_UNUSED(window, info);
         ARM_COMPUTE_ERROR("default implementation of legacy run() virtual member function invoked");
     }
 
@@ -69,6 +71,24 @@
         run(window, info);
     }
 
+    /** Execute the kernel on the passed window
+     *
+     * @warning If is_parallelisable() returns false then the passed window must be equal to window()
+     *
+     * @note The window has to be a region within the window returned by the window() method
+     *
+     * @note The width of the window has to be a multiple of num_elems_processed_per_iteration().
+     *
+     * @param[in] inputs  A vector containing the input tensors.
+     * @param[in] outputs A vector containing the output tensors.
+     * @param[in] window  Region on which to execute the kernel. (Must be a region of the window returned by window())
+     * @param[in] info    Info about executing thread and CPU.
+     */
+    virtual void run_op(const std::vector<InputOperatorTensors *> &inputs, std::vector<OutputOperatorTensors *> &outputs, const Window &window, const ThreadInfo &info)
+    {
+        ARM_COMPUTE_UNUSED(inputs, outputs, window, info);
+    }
+
     /** Name of the kernel
      *
      * @return Kernel name