IVGCVSW-1913: Fix for ValidationTest.concat_float_3_relaxed

* Added RefPermuteFloat16Workload to serve as a fallback when CL
  does not support the required permute configuration for FP16
* Move Half.hpp to armnnUtils as the utils library should not be
  including private header files from the armnn library

Change-Id: Ibf0f698451e8406f7ed7cce470dab60b6d16361d
diff --git a/src/backends/reference/workloads/RefConvertFp16ToFp32Workload.cpp b/src/backends/reference/workloads/RefConvertFp16ToFp32Workload.cpp
index e148bf6..b01246b 100644
--- a/src/backends/reference/workloads/RefConvertFp16ToFp32Workload.cpp
+++ b/src/backends/reference/workloads/RefConvertFp16ToFp32Workload.cpp
@@ -4,10 +4,12 @@
 //
 
 #include "RefConvertFp16ToFp32Workload.hpp"
-#include "Half.hpp"
+
 #include "RefWorkloadUtils.hpp"
 #include "FloatingPointConverter.hpp"
 
+#include <armnnUtils/Half.hpp>
+
 namespace armnn
 {
 
diff --git a/src/backends/reference/workloads/RefConvertFp32ToFp16Workload.cpp b/src/backends/reference/workloads/RefConvertFp32ToFp16Workload.cpp
index efaaf8e..99e3541 100644
--- a/src/backends/reference/workloads/RefConvertFp32ToFp16Workload.cpp
+++ b/src/backends/reference/workloads/RefConvertFp32ToFp16Workload.cpp
@@ -5,12 +5,12 @@
 
 #include "RefConvertFp32ToFp16Workload.hpp"
 
-#include "Half.hpp"
 #include "FloatingPointConverter.hpp"
 #include "RefWorkloadUtils.hpp"
-
 #include "Profiling.hpp"
 
+#include "armnnUtils/Half.hpp"
+
 namespace armnn
 {
 
diff --git a/src/backends/reference/workloads/RefPermuteWorkload.cpp b/src/backends/reference/workloads/RefPermuteWorkload.cpp
index 4093ff3..df50156 100644
--- a/src/backends/reference/workloads/RefPermuteWorkload.cpp
+++ b/src/backends/reference/workloads/RefPermuteWorkload.cpp
@@ -26,6 +26,7 @@
     armnnUtils::Permute(GetTensorInfo(dst).GetShape(), mappings, GetConstCpuData<T>(src), GetCpuData<T>(dst));
 }
 
+template class RefPermuteWorkload<DataType::Float16>;
 template class RefPermuteWorkload<DataType::Float32>;
 template class RefPermuteWorkload<DataType::QuantisedAsymm8>;
 
diff --git a/src/backends/reference/workloads/RefPermuteWorkload.hpp b/src/backends/reference/workloads/RefPermuteWorkload.hpp
index 2cc176d..841a080 100644
--- a/src/backends/reference/workloads/RefPermuteWorkload.hpp
+++ b/src/backends/reference/workloads/RefPermuteWorkload.hpp
@@ -27,6 +27,7 @@
     void Execute() const override;
 };
 
+using RefPermuteFloat16Workload = RefPermuteWorkload<DataType::Float16>;
 using RefPermuteFloat32Workload = RefPermuteWorkload<DataType::Float32>;
 using RefPermuteUint8Workload   = RefPermuteWorkload<DataType::QuantisedAsymm8>;
 
diff --git a/src/backends/reference/workloads/RefWorkloadUtils.hpp b/src/backends/reference/workloads/RefWorkloadUtils.hpp
index 153c519..67a1f5e 100644
--- a/src/backends/reference/workloads/RefWorkloadUtils.hpp
+++ b/src/backends/reference/workloads/RefWorkloadUtils.hpp
@@ -9,7 +9,7 @@
 
 #include <armnn/Tensor.hpp>
 #include <armnn/Types.hpp>
-#include <Half.hpp>
+#include <armnnUtils/Half.hpp>
 
 #include <boost/polymorphic_cast.hpp>