COMPMID-3752: NEPermuteKernel does not support permutations2

Solves also:
- COMPMID-3766: CTS Failures in Transpose Neon + FP16

Change-Id: I9d323f45f49cc0bce9e6329790bcf2f0eeec8572
Signed-off-by: Michele Di Giorgio <michele.digiorgio@arm.com>
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/3949
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Manuel Bottini <manuel.bottini@arm.com>
Reviewed-by: Pablo Marquez <pablo.tello@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
diff --git a/src/core/NEON/kernels/NEPermuteKernel.cpp b/src/core/NEON/kernels/NEPermuteKernel.cpp
index 737b10b..3f447f9 100644
--- a/src/core/NEON/kernels/NEPermuteKernel.cpp
+++ b/src/core/NEON/kernels/NEPermuteKernel.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2019 Arm Limited.
+ * Copyright (c) 2018-2020 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -36,15 +36,19 @@
 #include "arm_compute/core/NEON/kernels/convolution/common/shims.hpp"
 } // namespace
 
-#include <cstddef>
-#include <cstdint>
-
-using namespace arm_compute;
-
+namespace arm_compute
+{
 namespace
 {
 inline bool is_permutation_supported(const PermutationVector &v)
 {
+    static const std::array<PermutationVector, 2> permutations2 =
+    {
+        {
+            PermutationVector(0U, 1U),
+            PermutationVector(1U, 0U),
+        }
+    };
     static const std::array<PermutationVector, 6> permutations3 =
     {
         {
@@ -86,7 +90,8 @@
         }
     };
 
-    return (permutations3.end() != std::find(permutations3.begin(), permutations3.end(), v)) || (permutations4.end() != std::find(permutations4.begin(), permutations4.end(), v));
+    return (permutations2.end() != std::find(permutations2.begin(), permutations2.end(), v)) || (permutations3.end() != std::find(permutations3.begin(), permutations3.end(), v))
+           || (permutations4.end() != std::find(permutations4.begin(), permutations4.end(), v));
 }
 
 Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, const PermutationVector &perm)
@@ -129,7 +134,7 @@
     // Output window
     Window                  window_out(window);
     const Window::Dimension zero_window = Window::Dimension(0, 0, 0);
-    for(size_t d = 0; d <= _perm.num_dimensions(); ++d)
+    for(size_t d = 0; d <= _output->info()->num_dimensions(); ++d)
     {
         window_out.set(d, zero_window);
     }
@@ -292,3 +297,4 @@
         (this->*_func)(window);
     }
 }
+} // namespace arm_compute