IVGCVSW-6163 Add Conv3d FrontEnd and Ref Implementation

 * Added front-end
 * Added Reference workload
 * Added Serializer & Deserializer support
 * Added unit tests
 * Added NDHWC DataLayout

Signed-off-by: Matthew Sloyan <matthew.sloyan@arm.com>
Change-Id: Iec4d39e7433b5334d52fa44cf8efc6bcd39319d8
diff --git a/src/backends/reference/workloads/CMakeLists.txt b/src/backends/reference/workloads/CMakeLists.txt
index 0ab8c6b..e169c03 100644
--- a/src/backends/reference/workloads/CMakeLists.txt
+++ b/src/backends/reference/workloads/CMakeLists.txt
@@ -18,6 +18,8 @@
     Broadcast.hpp
     ConvImpl.cpp
     ConvImpl.hpp
+    Conv3dImpl.cpp
+    Conv3dImpl.hpp
     Debug.cpp
     Debug.hpp
     Decoders.hpp
@@ -87,6 +89,8 @@
     RefConvertFp32ToFp16Workload.hpp
     RefConvolution2dWorkload.cpp
     RefConvolution2dWorkload.hpp
+    RefConvolution3dWorkload.cpp
+    RefConvolution3dWorkload.hpp
     RefElementwiseWorkload.cpp
     RefElementwiseWorkload.hpp
     RefDebugWorkload.cpp
diff --git a/src/backends/reference/workloads/Conv3dImpl.cpp b/src/backends/reference/workloads/Conv3dImpl.cpp
new file mode 100644
index 0000000..484d887
--- /dev/null
+++ b/src/backends/reference/workloads/Conv3dImpl.cpp
@@ -0,0 +1,151 @@
+//
+// Copyright © 2021 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "Conv3dImpl.hpp"
+
+namespace armnn
+{
+
+void Convolve3d(const TensorShape& rInputShape,
+                Decoder<float>& rInputDecoder,
+                const TensorShape& rOutputShape,
+                Encoder<float>& rOutputEncoder,
+                const TensorShape& rFilterShape,
+                Decoder<float>& rFilterDecoder,
+                bool biasEnabled,
+                Decoder<float>* pBiasDecoder,
+                DataLayout dataLayout,
+                unsigned int paddingTop,
+                unsigned int paddingLeft,
+                unsigned int paddingFront,
+                unsigned int xStride,
+                unsigned int yStride,
+                unsigned int zStride,
+                unsigned int xDilation,
+                unsigned int yDilation,
+                unsigned int zDilation)
+{
+    if (biasEnabled && !pBiasDecoder)
+    {
+        throw InvalidArgumentException("Bias is enabled but the bias data is invalid");
+    }
+    const armnnUtils::DataLayoutIndexed dataLayoutIndexed(dataLayout);
+
+    const unsigned int channelsIndex = dataLayoutIndexed.GetChannelsIndex();
+    const unsigned int heightIndex   = dataLayoutIndexed.GetHeightIndex();
+    const unsigned int widthIndex    = dataLayoutIndexed.GetWidthIndex();
+    const unsigned int depthIndex    = dataLayoutIndexed.GetDepthIndex();
+
+    const unsigned int inChannels   = rInputShape[channelsIndex];
+    const unsigned int outChannels  = rOutputShape[channelsIndex];
+
+    const unsigned int batchSize    = rOutputShape[0];
+    const unsigned int outputHeight = rOutputShape[heightIndex];
+    const unsigned int outputWidth  = rOutputShape[widthIndex];
+    const unsigned int outputDepth  = rOutputShape[depthIndex];
+    const unsigned int inputHeight  = rInputShape[heightIndex];
+    const unsigned int inputWidth   = rInputShape[widthIndex];
+    const unsigned int inputDepth   = rInputShape[depthIndex];
+
+    // Conv3d weights layout: [D,H,W,I,O]
+    const unsigned int filterDepth  = rFilterShape[0];
+    const unsigned int filterHeight = rFilterShape[1];
+    const unsigned int filterWidth  = rFilterShape[2];
+
+    const std::vector<float> inputVec = rInputDecoder.DecodeTensor(rInputShape);
+    const std::vector<float> filterVec = rFilterDecoder.DecodeTensor(rFilterShape);
+
+    const TensorShape biasShape{outChannels};
+    const std::vector<float> biasVec = biasEnabled ? pBiasDecoder->DecodeTensor(biasShape) : std::vector<float>();
+
+    for (unsigned int batchIdx = 0; batchIdx < batchSize; batchIdx++)
+    {
+        for (unsigned int zOutput = 0; zOutput < outputDepth; zOutput++)
+        {
+            for (unsigned int xOutput = 0; xOutput < outputWidth; xOutput++)
+            {
+                for (unsigned int yOutput = 0; yOutput < outputHeight; yOutput++)
+                {
+                    for (unsigned int cOutput = 0; cOutput < outChannels; cOutput++)
+                    {
+                        // This loop goes over each output element.
+                        float sum = 0.0f;
+
+                        // Loop over each input channel.
+                        for (unsigned int zFilter = 0; zFilter < filterDepth; zFilter++)
+                        {
+                            for (unsigned int yFilter = 0; yFilter < filterHeight; yFilter++)
+                            {
+                                for (unsigned int xFilter = 0; xFilter < filterWidth; xFilter++)
+                                {
+                                    for (unsigned int cInput = 0; cInput < inChannels; cInput++)
+                                    {
+                                        // This loop goes over each input element for each output element.
+                                        unsigned int filterIndex = 0;
+
+                                        // Conv3d weights layout: [D,H,W,I,O]
+                                        // Keep this implementation, as using DataLayoutIndexed::GetIndex
+                                        // causes large performance regression.
+                                        filterIndex = zFilter * filterHeight * filterWidth * inChannels * outChannels +
+                                                      yFilter * filterWidth * inChannels * outChannels +
+                                                      xFilter * inChannels * outChannels +
+                                                      cInput * outChannels +
+                                                      cOutput;
+
+                                        unsigned int yInput = yOutput * yStride + yFilter * yDilation;
+                                        unsigned int xInput = xOutput * xStride + xFilter * xDilation;
+                                        unsigned int zInput = zOutput * zStride + zFilter * zDilation;
+
+                                        float inputValue;
+
+                                        // Check if we're in the padding.
+                                        if (yInput < paddingTop || yInput >= inputHeight + paddingTop ||
+                                            xInput < paddingLeft || xInput >= inputWidth + paddingLeft ||
+                                            zInput < paddingFront || zInput >= inputDepth + paddingFront)
+                                        {
+                                            inputValue = 0.0f;
+                                        }
+                                        else
+                                        {
+                                            unsigned int inputIndex = 0;
+
+                                            // Keep this implementation, as using DataLayoutIndexed::GetIndex
+                                            // causes large performance regression.
+                                            inputIndex = batchIdx * inputDepth * inputHeight * inputWidth * inChannels +
+                                                         (zInput-paddingFront) * inputHeight * inputWidth * inChannels +
+                                                         (yInput-paddingTop) * inputWidth * inChannels +
+                                                         (xInput-paddingLeft) * inChannels +
+                                                         cInput;
+
+                                            inputValue = inputVec[inputIndex];
+                                        }
+
+                                        sum += filterVec[filterIndex] * inputValue;
+                                    }
+                                }
+                            }
+                        }
+
+                        if (biasEnabled)
+                        {
+                            sum += biasVec[cOutput];
+                        }
+
+                        unsigned int outIdx = batchIdx * outputDepth * outputHeight * outputWidth * outChannels +
+                                              zOutput * outputHeight * outputWidth * outChannels +
+                                              yOutput * outputWidth * outChannels +
+                                              xOutput * outChannels +
+                                              cOutput;
+
+                        rOutputEncoder[outIdx];
+                        rOutputEncoder.Set(sum);
+                    }
+                }
+            }
+        }
+    }
+}
+
+} // namespace armnn
diff --git a/src/backends/reference/workloads/Conv3dImpl.hpp b/src/backends/reference/workloads/Conv3dImpl.hpp
new file mode 100644
index 0000000..5cf2ed9
--- /dev/null
+++ b/src/backends/reference/workloads/Conv3dImpl.hpp
@@ -0,0 +1,38 @@
+//
+// Copyright © 2021 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include "BaseIterator.hpp"
+#include "Decoders.hpp"
+#include "Encoders.hpp"
+
+#include <armnn/Tensor.hpp>
+
+#include <armnnUtils/DataLayoutIndexed.hpp>
+
+namespace armnn
+{
+
+void Convolve3d(const TensorShape& rInputShape,
+                Decoder<float>& rInputDecoder,
+                const TensorShape& rOutputShape,
+                Encoder<float>& rOutputEncoder,
+                const TensorShape& rFilterShape,
+                Decoder<float>& rFilterDecoder,
+                bool biasEnabled,
+                Decoder<float>* pBiasDecoder,
+                DataLayout dataLayout,
+                unsigned int paddingTop,
+                unsigned int paddingLeft,
+                unsigned int paddingFront,
+                unsigned int xStride,
+                unsigned int yStride,
+                unsigned int zStride,
+                unsigned int xDilation,
+                unsigned int yDilation,
+                unsigned int zDilation);
+
+} //namespace armnn
diff --git a/src/backends/reference/workloads/RefConvolution3dWorkload.cpp b/src/backends/reference/workloads/RefConvolution3dWorkload.cpp
new file mode 100644
index 0000000..ea425da
--- /dev/null
+++ b/src/backends/reference/workloads/RefConvolution3dWorkload.cpp
@@ -0,0 +1,76 @@
+//
+// Copyright © 2021 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "RefConvolution3dWorkload.hpp"
+
+#include "Conv3dImpl.hpp"
+#include "RefWorkloadUtils.hpp"
+
+#include "Profiling.hpp"
+
+namespace armnn
+{
+RefConvolution3dWorkload::RefConvolution3dWorkload(
+    const Convolution3dQueueDescriptor& descriptor, const WorkloadInfo& info)
+    : BaseWorkload<Convolution3dQueueDescriptor>(descriptor, info)
+{
+    WorkloadInfo detailsInfo;
+    detailsInfo.m_InputTensorInfos = info.m_InputTensorInfos;
+    detailsInfo.m_OutputTensorInfos = info.m_OutputTensorInfos;
+    detailsInfo.m_WeightsTensorInfo = armnn::Optional<armnn::TensorInfo>(descriptor.m_Weight->GetTensorInfo());
+    if (descriptor.m_Parameters.m_BiasEnabled)
+    {
+        detailsInfo.m_BiasTensorInfo = armnn::Optional<armnn::TensorInfo>(descriptor.m_Bias->GetTensorInfo());
+    }
+
+    // Report Profiling Details
+    ARMNN_REPORT_PROFILING_WORKLOAD_DESC("RefConvolution3dWorkload_Construct",
+                                         descriptor.m_Parameters,
+                                         detailsInfo,
+                                         this->GetGuid());
+
+    m_Weight = std::make_unique<ScopedTensorHandle>(*( descriptor.m_Weight ));
+    const TensorInfo& rFilterInfo = m_Weight->GetTensorInfo();
+
+    m_FilterShape = rFilterInfo.GetShape();
+    m_FilterDecoder = MakeDecoder<float>(rFilterInfo, m_Weight.get()->Map(true));
+
+    if ( descriptor.m_Parameters.m_BiasEnabled )
+    {
+        m_Bias = std::make_unique<ScopedTensorHandle>(*( descriptor.m_Bias ));
+        const TensorInfo& biasInfo = m_Bias->GetTensorInfo();
+        m_BiasDecoder = MakeDecoder<float>(biasInfo, m_Bias->Map(true));
+    }
+}
+
+void RefConvolution3dWorkload::Execute() const
+{
+    Execute(m_Data.m_Inputs, m_Data.m_Outputs);
+}
+
+void RefConvolution3dWorkload::ExecuteAsync(WorkingMemDescriptor& workingMemDescriptor)
+{
+    Execute(workingMemDescriptor.m_Inputs, workingMemDescriptor.m_Outputs);
+}
+
+void RefConvolution3dWorkload::Execute(std::vector<ITensorHandle*> inputs, std::vector<ITensorHandle*> outputs) const
+{
+    ARMNN_SCOPED_PROFILING_EVENT_GUID(Compute::CpuRef, "RefConvolution3dWorkload_Execute", this->GetGuid());
+
+    std::unique_ptr<Decoder<float>> inputDecoder = MakeDecoder<float>(GetTensorInfo(inputs[0]), inputs[0]->Map());
+    std::unique_ptr<Encoder<float>> outputEncoder = MakeEncoder<float>(GetTensorInfo(outputs[0]), outputs[0]->Map());
+
+    const TensorShape& inputShape = GetTensorInfo(inputs[0]).GetShape();
+    const TensorShape& outputShape = GetTensorInfo(outputs[0]).GetShape();
+
+    Convolve3d(inputShape, *inputDecoder, outputShape, *outputEncoder, m_FilterShape,
+               *m_FilterDecoder, m_Data.m_Parameters.m_BiasEnabled, m_BiasDecoder.get(),
+               m_Data.m_Parameters.m_DataLayout,
+               m_Data.m_Parameters.m_PadTop, m_Data.m_Parameters.m_PadLeft, m_Data.m_Parameters.m_PadFront,
+               m_Data.m_Parameters.m_StrideX, m_Data.m_Parameters.m_StrideY, m_Data.m_Parameters.m_StrideZ,
+               m_Data.m_Parameters.m_DilationX, m_Data.m_Parameters.m_DilationY, m_Data.m_Parameters.m_DilationZ);
+}
+
+} //namespace armnn
diff --git a/src/backends/reference/workloads/RefConvolution3dWorkload.hpp b/src/backends/reference/workloads/RefConvolution3dWorkload.hpp
new file mode 100644
index 0000000..0373a8b
--- /dev/null
+++ b/src/backends/reference/workloads/RefConvolution3dWorkload.hpp
@@ -0,0 +1,38 @@
+//
+// Copyright © 2021 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include <backendsCommon/Workload.hpp>
+#include <backendsCommon/WorkloadData.hpp>
+#include "Decoders.hpp"
+#include "Encoders.hpp"
+
+namespace armnn
+{
+
+class RefConvolution3dWorkload : public BaseWorkload<Convolution3dQueueDescriptor>
+{
+public:
+    explicit RefConvolution3dWorkload(const Convolution3dQueueDescriptor& descriptor,
+                                      const WorkloadInfo& info);
+
+
+    void Execute() const override;
+    void ExecuteAsync(WorkingMemDescriptor& workingMemDescriptor)  override;
+
+private:
+    void Execute(std::vector<ITensorHandle*> inputs, std::vector<ITensorHandle*> outputs) const;
+    std::unique_ptr<ScopedTensorHandle> m_Weight;
+    std::unique_ptr<ScopedTensorHandle> m_Bias;
+
+    std::unique_ptr<Decoder<float>> m_FilterDecoder;
+    std::unique_ptr<Decoder<float>> m_BiasDecoder;
+
+    TensorShape m_FilterShape;
+};
+
+} //namespace armnn
+
diff --git a/src/backends/reference/workloads/RefWorkloads.hpp b/src/backends/reference/workloads/RefWorkloads.hpp
index 1cf84ee..ed3aa90 100644
--- a/src/backends/reference/workloads/RefWorkloads.hpp
+++ b/src/backends/reference/workloads/RefWorkloads.hpp
@@ -22,6 +22,7 @@
 #include "RefChannelShuffleWorkload.hpp"
 #include "RefComparisonWorkload.hpp"
 #include "RefConvolution2dWorkload.hpp"
+#include "RefConvolution3dWorkload.hpp"
 #include "RefConstantWorkload.hpp"
 #include "RefConcatWorkload.hpp"
 #include "RefConvertBf16ToFp32Workload.hpp"