COMPMID-903: Implements NEPermute for NHWC conversions

Change-Id: I4083e8d16bb23933634f229a1408dfd0e8f2922a
Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/120069
Tested-by: Jenkins <bsgcomp@arm.com>
Reviewed-by: Anthony Barbier <anthony.barbier@arm.com>
diff --git a/arm_compute/runtime/NEON/functions/NEConvolutionLayer.h b/arm_compute/runtime/NEON/functions/NEConvolutionLayer.h
index f65e7ef..f80f67d 100644
--- a/arm_compute/runtime/NEON/functions/NEConvolutionLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEConvolutionLayer.h
@@ -146,6 +146,14 @@
      *                     except for input of QASYMM8 type where output should be of S32 type.
      */
     void configure_mm(const ITensor *input, const ITensor *weights, ITensor *output);
+    /** Prepare the appropriate assembly optimized kernel
+     *
+     * @param[in] ci CPU information
+     * @param[in] M  M parameter of matrix multiplication
+     * @param[in] N  N parameter of matrix multiplication
+     * @param[in] K  K parameter of matrix multiplication
+     */
+    void configure_asm_mm(const struct CPUInfo &ci, int M, int N, int K);
 
 private:
     MemoryGroup                                         _memory_group;