IVGCVSW-2316 Add reference implementation and unit tests for Debug

Change-Id: Ib2e5de2a057da57ef77a9f5c4367d699d4773294
diff --git a/src/backends/reference/RefLayerSupport.cpp b/src/backends/reference/RefLayerSupport.cpp
index 3b49fa0..2c8f9cb 100644
--- a/src/backends/reference/RefLayerSupport.cpp
+++ b/src/backends/reference/RefLayerSupport.cpp
@@ -160,6 +160,19 @@
                                      &TrueFunc<>);
 }
 
+bool RefLayerSupport::IsDebugSupported(const TensorInfo& input,
+                                       const TensorInfo& output,
+                                       const DebugDescriptor& descriptor,
+                                       Optional<std::string&> reasonIfUnsupported) const
+{
+    ignore_unused(output);
+    ignore_unused(descriptor);
+    return IsSupportedForDataTypeRef(reasonIfUnsupported,
+                                     input.GetDataType(),
+                                     &TrueFunc<>,
+                                     &TrueFunc<>);
+}
+
 bool RefLayerSupport::IsDepthwiseConvolutionSupported(const TensorInfo& input,
                                                       const TensorInfo& output,
                                                       const DepthwiseConvolution2dDescriptor& descriptor,
diff --git a/src/backends/reference/RefLayerSupport.hpp b/src/backends/reference/RefLayerSupport.hpp
index 0d34c08..9dc64cb 100644
--- a/src/backends/reference/RefLayerSupport.hpp
+++ b/src/backends/reference/RefLayerSupport.hpp
@@ -54,6 +54,11 @@
                                   const Optional<TensorInfo>& biases,
                                   Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const override;
 
+    bool IsDebugSupported(const TensorInfo& input,
+                          const TensorInfo& output,
+                          const DebugDescriptor& descriptor,
+                          Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const override;
+
     bool IsDepthwiseConvolutionSupported(const TensorInfo& input,
                                          const TensorInfo& output,
                                          const DepthwiseConvolution2dDescriptor& descriptor,
diff --git a/src/backends/reference/RefWorkloadFactory.cpp b/src/backends/reference/RefWorkloadFactory.cpp
index 43651cf..ac837d3 100644
--- a/src/backends/reference/RefWorkloadFactory.cpp
+++ b/src/backends/reference/RefWorkloadFactory.cpp
@@ -297,7 +297,7 @@
 std::unique_ptr<IWorkload> RefWorkloadFactory::CreateDebug(const DebugQueueDescriptor& descriptor,
                                                            const WorkloadInfo& info) const
 {
-    return MakeWorkload<NullWorkload, NullWorkload>(descriptor, info);
+    return MakeWorkload<RefDebugFloat32Workload, RefDebugUint8Workload>(descriptor, info);
 }
 
 } // namespace armnn
diff --git a/src/backends/reference/backend.mk b/src/backends/reference/backend.mk
index 66675bd..d868069 100644
--- a/src/backends/reference/backend.mk
+++ b/src/backends/reference/backend.mk
@@ -15,6 +15,7 @@
         workloads/BatchToSpaceNd.cpp \
         workloads/Broadcast.cpp \
         workloads/ConvImpl.cpp \
+        workloads/Debug.cpp \
         workloads/ElementwiseFunction.cpp \
         workloads/FullyConnected.cpp \
         workloads/Mean.cpp \
@@ -33,6 +34,7 @@
         workloads/RefConvertFp32ToFp16Workload.cpp \
         workloads/RefConvolution2dFloat32Workload.cpp \
         workloads/RefConvolution2dUint8Workload.cpp \
+        workloads/RefDebugWorkload.cpp \
         workloads/RefDepthwiseConvolution2dFloat32Workload.cpp \
         workloads/RefDepthwiseConvolution2dUint8Workload.cpp \
         workloads/RefElementwiseWorkload.cpp \
diff --git a/src/backends/reference/test/RefLayerTests.cpp b/src/backends/reference/test/RefLayerTests.cpp
index fa4af96..d3c2231 100644
--- a/src/backends/reference/test/RefLayerTests.cpp
+++ b/src/backends/reference/test/RefLayerTests.cpp
@@ -454,4 +454,15 @@
 ARMNN_AUTO_TEST_CASE(StridedSlice2DUint8, StridedSlice2DUint8Test)
 ARMNN_AUTO_TEST_CASE(StridedSlice2DReverseUint8, StridedSlice2DReverseUint8Test)
 
+// Debug
+ARMNN_AUTO_TEST_CASE(Debug4DFloat32, Debug4DFloat32Test)
+ARMNN_AUTO_TEST_CASE(Debug3DFloat32, Debug3DFloat32Test)
+ARMNN_AUTO_TEST_CASE(Debug2DFloat32, Debug2DFloat32Test)
+ARMNN_AUTO_TEST_CASE(Debug1DFloat32, Debug1DFloat32Test)
+
+ARMNN_AUTO_TEST_CASE(Debug4DUint8, Debug4DUint8Test)
+ARMNN_AUTO_TEST_CASE(Debug3DUint8, Debug3DUint8Test)
+ARMNN_AUTO_TEST_CASE(Debug2DUint8, Debug2DUint8Test)
+ARMNN_AUTO_TEST_CASE(Debug1DUint8, Debug1DUint8Test)
+
 BOOST_AUTO_TEST_SUITE_END()
diff --git a/src/backends/reference/workloads/CMakeLists.txt b/src/backends/reference/workloads/CMakeLists.txt
index 7028f18..14d6ca9 100644
--- a/src/backends/reference/workloads/CMakeLists.txt
+++ b/src/backends/reference/workloads/CMakeLists.txt
@@ -13,6 +13,8 @@
     Broadcast.hpp
     ConvImpl.cpp
     ConvImpl.hpp
+    Debug.cpp
+    Debug.hpp
     ElementwiseFunction.cpp
     ElementwiseFunction.hpp
     FullyConnected.cpp
@@ -52,6 +54,8 @@
     RefConvolution2dUint8Workload.hpp
     RefElementwiseWorkload.cpp
     RefElementwiseWorkload.hpp
+    RefDebugWorkload.cpp
+    RefDebugWorkload.hpp
     RefDepthwiseConvolution2dFloat32Workload.cpp
     RefDepthwiseConvolution2dFloat32Workload.hpp
     RefDepthwiseConvolution2dUint8Workload.cpp
diff --git a/src/backends/reference/workloads/Debug.cpp b/src/backends/reference/workloads/Debug.cpp
new file mode 100644
index 0000000..dfcbbd8
--- /dev/null
+++ b/src/backends/reference/workloads/Debug.cpp
@@ -0,0 +1,101 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+#include "Debug.hpp"
+
+#include <boost/numeric/conversion/cast.hpp>
+
+#include <cstring>
+#include <algorithm>
+#include <iostream>
+
+namespace armnn
+{
+
+template <typename T>
+void Debug(const TensorInfo& inputInfo,
+           const TensorInfo& outputInfo,
+           const DebugDescriptor& descriptor,
+           const T* inputData,
+           T* outputData)
+{
+    const unsigned int numDims = inputInfo.GetNumDimensions();
+    const unsigned int numElements = inputInfo.GetNumElements();
+    const TensorShape& inputShape = inputInfo.GetShape();
+
+    unsigned int strides[numDims];
+    strides[numDims - 1] = inputShape[numDims - 1];
+
+    for (unsigned int i = 2; i <= numDims; i++)
+    {
+        strides[numDims - i] = strides[numDims - i + 1] * inputShape[numDims - i];
+    }
+
+    std::cout << "{ ";
+    std::cout << "\"layer\": \"" << descriptor.m_LayerName << "\", ";
+    std::cout << "\"outputSlot\": " << descriptor.m_SlotIndex << ", ";
+    std::cout << "\"shape\": ";
+
+    std::cout << "[";
+    for (unsigned int i = 0; i < numDims; i++)
+    {
+        std::cout << inputShape[i];
+        if (i != numDims - 1)
+        {
+            std::cout << ", ";
+        }
+    }
+    std::cout << "], ";
+
+    std::cout << "\"min\": "
+        << boost::numeric_cast<float>(*std::min_element(inputData, inputData + numElements)) << ", ";
+
+    std::cout << "\"max\": "
+        << boost::numeric_cast<float>(*std::max_element(inputData, inputData + numElements)) << ", ";
+
+    std::cout << "\"data\": ";
+
+    for (unsigned int i = 0; i < numElements; i++)
+    {
+        for (unsigned int j = 0; j < numDims; j++)
+        {
+            if (i % strides[j] == 0)
+            {
+                std::cout << "[" ;
+            }
+        }
+
+        std::cout << boost::numeric_cast<float>(inputData[i]);
+
+        for (unsigned int j = 0; j < numDims; j++)
+        {
+            if ((i+1) % strides[j] == 0)
+            {
+                std::cout << "]" ;
+            }
+        }
+
+        if (i != numElements - 1)
+        {
+            std::cout << ", ";
+        }
+    }
+
+    std::cout << " }" << std::endl;
+
+    std::memcpy(outputData, inputData, inputInfo.GetNumElements()*sizeof(T));
+}
+
+template void Debug<float>(const TensorInfo& inputInfo,
+                           const TensorInfo& outputInfo,
+                           const DebugDescriptor& descriptor,
+                           const float* inputData,
+                           float* outputData);
+
+template void Debug<uint8_t>(const TensorInfo& inputInfo,
+                             const TensorInfo& outputInfo,
+                             const DebugDescriptor& descriptor,
+                             const uint8_t* inputData,
+                             uint8_t* outputData);
+} //namespace armnn
diff --git a/src/backends/reference/workloads/Debug.hpp b/src/backends/reference/workloads/Debug.hpp
new file mode 100644
index 0000000..682f0bd
--- /dev/null
+++ b/src/backends/reference/workloads/Debug.hpp
@@ -0,0 +1,20 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+#pragma once
+
+#include <armnn/Descriptors.hpp>
+#include <armnn/Tensor.hpp>
+
+namespace armnn
+{
+
+template <typename T>
+void Debug(const TensorInfo& inputInfo,
+           const TensorInfo& outputInfo,
+           const DebugDescriptor& descriptor,
+           const T* inputData,
+           T* outputData);
+
+} //namespace armnn
diff --git a/src/backends/reference/workloads/RefDebugWorkload.cpp b/src/backends/reference/workloads/RefDebugWorkload.cpp
new file mode 100644
index 0000000..17eb8fc
--- /dev/null
+++ b/src/backends/reference/workloads/RefDebugWorkload.cpp
@@ -0,0 +1,33 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+#include "RefDebugWorkload.hpp"
+#include "Debug.hpp"
+
+#include "RefWorkloadUtils.hpp"
+#include "TypeUtils.hpp"
+
+namespace armnn
+{
+
+template<armnn::DataType DataType>
+void RefDebugWorkload<DataType>::Execute() const
+{
+    using T = ResolveType<DataType>;
+
+    ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, GetName() + "_Execute");
+
+    const TensorInfo& inputInfo = GetTensorInfo(m_Data.m_Inputs[0]);
+    const TensorInfo& outputInfo = GetTensorInfo(m_Data.m_Outputs[0]);
+
+    const T* inputData = GetInputTensorData<T>(0, m_Data);
+    T* outputData = GetOutputTensorData<T>(0, m_Data);
+
+    Debug(inputInfo, outputInfo, m_Data.m_Parameters, inputData, outputData);
+}
+
+template class RefDebugWorkload<DataType::Float32>;
+template class RefDebugWorkload<DataType::QuantisedAsymm8>;
+
+} //namespace armnn
diff --git a/src/backends/reference/workloads/RefDebugWorkload.hpp b/src/backends/reference/workloads/RefDebugWorkload.hpp
new file mode 100644
index 0000000..a1231f9
--- /dev/null
+++ b/src/backends/reference/workloads/RefDebugWorkload.hpp
@@ -0,0 +1,33 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+#pragma once
+
+#include <backendsCommon/Workload.hpp>
+
+#include <armnn/TypesUtils.hpp>
+
+namespace armnn
+{
+
+template <armnn::DataType DataType>
+class RefDebugWorkload : public TypedWorkload<DebugQueueDescriptor, DataType>
+{
+public:
+    static const std::string& GetName()
+    {
+        static const std::string name = std::string("RefDebug") + GetDataTypeName(DataType) + "Workload";
+        return name;
+    }
+
+    using TypedWorkload<DebugQueueDescriptor, DataType>::m_Data;
+    using TypedWorkload<DebugQueueDescriptor, DataType>::TypedWorkload;
+
+    void Execute() const override;
+};
+
+using RefDebugFloat32Workload = RefDebugWorkload<DataType::Float32>;
+using RefDebugUint8Workload = RefDebugWorkload<DataType::QuantisedAsymm8>;
+
+} //namespace armnn
diff --git a/src/backends/reference/workloads/RefWorkloads.hpp b/src/backends/reference/workloads/RefWorkloads.hpp
index 86d8624..ddce68e 100644
--- a/src/backends/reference/workloads/RefWorkloads.hpp
+++ b/src/backends/reference/workloads/RefWorkloads.hpp
@@ -58,3 +58,4 @@
 #include "RefPadWorkload.hpp"
 #include "RefBatchToSpaceNdUint8Workload.hpp"
 #include "RefBatchToSpaceNdFloat32Workload.hpp"
+#include "RefDebugWorkload.hpp"