IVGCVSW-2102: Fix Pooling2D CpuRef indexing bug

* Fixes bug when calcuating indexes for NHWC in
  Pooling2D CpuRef implementation, it now uses
  TensorBufferArrayView.
* Adds 2-Channel unit tests for Pooling2d on CpuRef,
  Cl and Neon. The single channel tests were not
  properly exercising Pooling2d using NHWC data layout.
* Refactors Pooling2D NHWC tests so that the input and
  output data are permuted to NHWC when necessary,
  instead of hard coding the data in NHWC format.

Change-Id: I5b9d41ed425ff283ea8c8ef6b1266ae0bc80f43b
diff --git a/src/backends/reference/workloads/Pooling2d.cpp b/src/backends/reference/workloads/Pooling2d.cpp
index 9890920..d2fd0da 100644
--- a/src/backends/reference/workloads/Pooling2d.cpp
+++ b/src/backends/reference/workloads/Pooling2d.cpp
@@ -4,6 +4,7 @@
 //
 
 #include "Pooling2d.hpp"
+#include "TensorBufferArrayView.hpp"
 
 #include <armnn/Exceptions.hpp>
 #include <armnn/Types.hpp>
@@ -143,9 +144,10 @@
                const TensorInfo& outputInfo,
                const Pooling2dDescriptor& params)
 {
-    const unsigned int channelsIndex = params.m_DataLayout.GetChannelsIndex();
-    const unsigned int heightIndex   = params.m_DataLayout.GetHeightIndex();
-    const unsigned int widthIndex    = params.m_DataLayout.GetWidthIndex();
+    const armnn::DataLayoutIndexed dataLayout = params.m_DataLayout;
+    auto channelsIndex = dataLayout.GetChannelsIndex();
+    auto heightIndex = dataLayout.GetHeightIndex();
+    auto widthIndex = dataLayout.GetWidthIndex();
 
     const int batchSize    = boost::numeric_cast<int>(outputInfo.GetShape()[0]);
     const int channels     = boost::numeric_cast<int>(outputInfo.GetShape()[channelsIndex]);
@@ -167,6 +169,9 @@
     Accumulator accumulate = GetAccumulator(params.m_PoolType);
     Executor execute       = GetExecutor(params.m_PoolType);
 
+    TensorBufferArrayView<const float> input(inputInfo.GetShape(), in, dataLayout);
+    TensorBufferArrayView<float> output(outputInfo.GetShape(), out, dataLayout);
+
     // Check supported padding methods outside the loop to simplify
     // the inner loop.
     if (params.m_PaddingMethod != PaddingMethod::Exclude &&
@@ -221,10 +226,10 @@
                     {
                         for (auto xInput = wstart; xInput < wend; xInput++)
                         {
-                            float inval = in[n * widthInput * heightInput * channels +
-                                             c * widthInput * heightInput +
-                                             yInput * widthInput +
-                                             xInput];
+                            float inval = input.Get(boost::numeric_cast<unsigned int>(n),
+                                                    boost::numeric_cast<unsigned int>(c),
+                                                    boost::numeric_cast<unsigned int>(yInput),
+                                                    boost::numeric_cast<unsigned int>(xInput));
 
                             accumulate(result, inval);
                         }
@@ -232,10 +237,10 @@
 
                     execute(result, poolAreaSize);
 
-                    out[n * widthOutput * heightOutput * channels +
-                        c * widthOutput * heightOutput +
-                        yOutput * widthOutput +
-                        xOutput] = result;
+                    output.Get(boost::numeric_cast<unsigned int>(n),
+                               boost::numeric_cast<unsigned int>(c),
+                               boost::numeric_cast<unsigned int>(yOutput),
+                               boost::numeric_cast<unsigned int>(xOutput)) = result;
                 }
             }
         }