COMPMID-719: NEWinogradLayer reordering using NEPermute.

Input reordering from NCHW to NHWC
Output reordering from NHWC to NCHW
Weights reordering from [Ofm x Ifm x Height x Width] to [Height x Width x Ifm x Ofm]

Change-Id: I85aabedb1f9c13700bc4919eb3130f4d4bd0b465
Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/113631
Reviewed-by: Anthony Barbier <anthony.barbier@arm.com>
Tested-by: Jenkins <bsgcomp@arm.com>
diff --git a/arm_compute/runtime/NEON/functions/NEWinogradLayer.h b/arm_compute/runtime/NEON/functions/NEWinogradLayer.h
index 7770706..6fecf08 100644
--- a/arm_compute/runtime/NEON/functions/NEWinogradLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEWinogradLayer.h
@@ -28,6 +28,7 @@
 
 #include "arm_compute/core/NEON/kernels/NEWinogradLayerKernel.h"
 #include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/CPP/functions/CPPPermute.h"
 #include "arm_compute/runtime/MemoryGroup.h"
 #include "arm_compute/runtime/Tensor.h"
 
@@ -69,9 +70,14 @@
 private:
     MemoryGroup                     _memory_group;
     NEWinogradLayerKernel           _winograd_kernel;
-    Tensor                          _weights_workspace;
+    CPPPermute                      _permute_input;
+    CPPPermute                      _permute_weights;
+    CPPPermute                      _permute_output;
     Tensor                          _workspace;
     Tensor                          _kernel_storage;
+    Tensor                          _input_nhwc;
+    Tensor                          _output_nhwc;
+    Tensor                          _weights_hwio;
     const ITensor                  *_input;
     const ITensor                  *_weights;
     ITensor                        *_output;