Clean up header usage a bit in NEON backend

Including NEFunctions.h is unnecessary and adds about a second
to compile time each translation unit in which it appears,
so we should use just the header file with the arm compute function
declarations that we need.

Signed-off-by: Matthew Bentham <matthew.bentham@arm.com>
Change-Id: I605d0eb82ccf2aafa35381a5d9d54337d3fe17a7
diff --git a/src/backends/neon/workloads/NeonDequantizeWorkload.cpp b/src/backends/neon/workloads/NeonDequantizeWorkload.cpp
index aa454c9..8b229a1 100644
--- a/src/backends/neon/workloads/NeonDequantizeWorkload.cpp
+++ b/src/backends/neon/workloads/NeonDequantizeWorkload.cpp
@@ -7,6 +7,8 @@
 
 #include "NeonWorkloadUtils.hpp"
 
+#include <arm_compute/runtime/NEON/functions/NEDequantizationLayer.h>
+
 #include <aclCommon/ArmComputeTensorUtils.hpp>
 #include <backendsCommon/CpuTensorHandle.hpp>
 #include <neon/NeonTensorHandle.hpp>
@@ -33,9 +35,10 @@
     arm_compute::ITensor& input = boost::polymorphic_downcast<IAclTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
     arm_compute::ITensor& output = boost::polymorphic_downcast<IAclTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
 
-    m_Layer.reset(new arm_compute::NEDequantizationLayer());
-    m_Layer->configure(&input, &output);
-    m_Layer->prepare();
+    std::unique_ptr<arm_compute::NEDequantizationLayer> layer(new arm_compute::NEDequantizationLayer());
+    layer->configure(&input, &output);
+    layer->prepare();
+    m_Layer.reset(layer.release());
 }
 
 void NeonDequantizeWorkload::Execute() const