IVGCVSW-6262 Add support for Reduce Prod

* Tflite parser
* Tflite delegate
* Serializer
* Deserializer
* Ref, CpuAcc and GpuAcc workloads

Signed-off-by: Teresa Charlin <teresa.charlinreyes@arm.com>
Change-Id: I601a9ee1680b372c7955d9a628857d08c3cfd377
diff --git a/src/armnnDeserializer/Deserializer.cpp b/src/armnnDeserializer/Deserializer.cpp
index 5c99496..074429b 100644
--- a/src/armnnDeserializer/Deserializer.cpp
+++ b/src/armnnDeserializer/Deserializer.cpp
@@ -522,6 +522,8 @@
             return armnn::ReduceOperation::Mean;
         case armnnSerializer::ReduceOperation::ReduceOperation_Min:
             return armnn::ReduceOperation::Min;
+        case armnnSerializer::ReduceOperation::ReduceOperation_Prod:
+            return armnn::ReduceOperation::Prod;
         default:
             return armnn::ReduceOperation::Sum;
     }
diff --git a/src/armnnSerializer/ArmnnSchema.fbs b/src/armnnSerializer/ArmnnSchema.fbs
index 85435a3..a285a11 100644
--- a/src/armnnSerializer/ArmnnSchema.fbs
+++ b/src/armnnSerializer/ArmnnSchema.fbs
@@ -52,7 +52,8 @@
     Sum  = 0,
     Max  = 1,
     Mean = 2,
-    Min  = 3
+    Min  = 3,
+    Prod  = 4
 }
 
 enum ResizeMethod: byte {
diff --git a/src/armnnSerializer/ArmnnSchema_generated.h b/src/armnnSerializer/ArmnnSchema_generated.h
index ca2bf0c..cf28a7a 100644
--- a/src/armnnSerializer/ArmnnSchema_generated.h
+++ b/src/armnnSerializer/ArmnnSchema_generated.h
@@ -559,26 +559,29 @@
   ReduceOperation_Max = 1,
   ReduceOperation_Mean = 2,
   ReduceOperation_Min = 3,
+  ReduceOperation_Prod = 4,
   ReduceOperation_MIN = ReduceOperation_Sum,
-  ReduceOperation_MAX = ReduceOperation_Min
+  ReduceOperation_MAX = ReduceOperation_Prod
 };
 
-inline const ReduceOperation (&EnumValuesReduceOperation())[4] {
+inline const ReduceOperation (&EnumValuesReduceOperation())[5] {
   static const ReduceOperation values[] = {
     ReduceOperation_Sum,
     ReduceOperation_Max,
     ReduceOperation_Mean,
-    ReduceOperation_Min
+    ReduceOperation_Min,
+    ReduceOperation_Prod
   };
   return values;
 }
 
 inline const char * const *EnumNamesReduceOperation() {
-  static const char * const names[5] = {
+  static const char * const names[6] = {
     "Sum",
     "Max",
     "Mean",
     "Min",
+    "Prod",
     nullptr
   };
   return names;
diff --git a/src/armnnSerializer/SerializerUtils.cpp b/src/armnnSerializer/SerializerUtils.cpp
index 1df8d4e..85ce01d 100644
--- a/src/armnnSerializer/SerializerUtils.cpp
+++ b/src/armnnSerializer/SerializerUtils.cpp
@@ -219,6 +219,8 @@
             return armnnSerializer::ReduceOperation::ReduceOperation_Mean;
         case armnn::ReduceOperation::Min:
             return armnnSerializer::ReduceOperation::ReduceOperation_Min;
+        case armnn::ReduceOperation::Prod:
+            return armnnSerializer::ReduceOperation::ReduceOperation_Prod;
         default:
             return armnnSerializer::ReduceOperation::ReduceOperation_Sum;
     }
diff --git a/src/armnnTfLiteParser/TfLiteParser.cpp b/src/armnnTfLiteParser/TfLiteParser.cpp
index 5c7cb9b..305f769 100644
--- a/src/armnnTfLiteParser/TfLiteParser.cpp
+++ b/src/armnnTfLiteParser/TfLiteParser.cpp
@@ -677,6 +677,7 @@
     m_ParserFunctions[tflite::BuiltinOperator_RELU6]                   = &TfLiteParserImpl::ParseRelu6;
     m_ParserFunctions[tflite::BuiltinOperator_REDUCE_MAX]              = &TfLiteParserImpl::ParseReduceMax;
     m_ParserFunctions[tflite::BuiltinOperator_REDUCE_MIN]              = &TfLiteParserImpl::ParseReduceMin;
+    m_ParserFunctions[tflite::BuiltinOperator_REDUCE_PROD]             = &TfLiteParserImpl::ParseReduceProd;
     m_ParserFunctions[tflite::BuiltinOperator_RESHAPE]                 = &TfLiteParserImpl::ParseReshape;
     m_ParserFunctions[tflite::BuiltinOperator_RESIZE_BILINEAR]         = &TfLiteParserImpl::ParseResizeBilinear;
     m_ParserFunctions[tflite::BuiltinOperator_RESIZE_NEAREST_NEIGHBOR] = &TfLiteParserImpl::ParseResizeNearestNeighbor;
@@ -3321,6 +3322,11 @@
     ParseReduce(subgraphIndex, operatorIndex, armnn::ReduceOperation::Sum);
 }
 
+void TfLiteParserImpl::ParseReduceProd(size_t subgraphIndex, size_t operatorIndex)
+{
+    ParseReduce(subgraphIndex, operatorIndex, armnn::ReduceOperation::Prod);
+}
+
 void TfLiteParserImpl::ParseReduceMax(size_t subgraphIndex, size_t operatorIndex)
 {
     ParseReduce(subgraphIndex, operatorIndex, armnn::ReduceOperation::Max);
diff --git a/src/armnnTfLiteParser/TfLiteParser.hpp b/src/armnnTfLiteParser/TfLiteParser.hpp
index dcd00d8..5645503 100644
--- a/src/armnnTfLiteParser/TfLiteParser.hpp
+++ b/src/armnnTfLiteParser/TfLiteParser.hpp
@@ -150,6 +150,7 @@
     void ParseReduce(size_t subgraphIndex, size_t operatorIndex, armnn::ReduceOperation reduceOperation);
     void ParseReduceMax(size_t subgraphIndex, size_t operatorIndex);
     void ParseReduceMin(size_t subgraphIndex, size_t operatorIndex);
+    void ParseReduceProd(size_t subgraphIndex, size_t operatorIndex);
     void ParseRelu(size_t subgraphIndex, size_t operatorIndex);
     void ParseRelu6(size_t subgraphIndex, size_t operatorIndex);
     void ParseReshape(size_t subgraphIndex, size_t operatorIndex);
diff --git a/src/armnnTfLiteParser/test/Prod.cpp b/src/armnnTfLiteParser/test/Prod.cpp
new file mode 100644
index 0000000..33ef6e3
--- /dev/null
+++ b/src/armnnTfLiteParser/test/Prod.cpp
@@ -0,0 +1,106 @@
+//
+// Copyright © 2021 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "ParserFlatbuffersFixture.hpp"
+
+
+TEST_SUITE("TensorflowLiteParser_Prod")
+{
+struct ProdFixture : public ParserFlatbuffersFixture
+{
+    explicit ProdFixture(const std::string& inputShape,
+                        const std::string& outputShape,
+                        const std::string& axisShape,
+                        const std::string& axisData)
+    {
+        m_JsonString = R"(
+            {
+                "version": 3,
+                "operator_codes": [ { "builtin_code": "PROD" } ],
+                "subgraphs": [ {
+                    "tensors": [
+                        {
+                            "shape": )" + inputShape + R"(,
+                            "type": "FLOAT32",
+                            "buffer": 0,
+                            "name": "inputTensor",
+                            "quantization": {
+                                "min": [ 0.0 ],
+                                "max": [ 255.0 ],
+                                "scale": [ 1.0 ],
+                                "zero_point": [ 0 ],
+                            }
+                        },
+                        {
+                            "shape": )" + outputShape + R"( ,
+                            "type": "FLOAT32",
+                            "buffer": 1,
+                            "name": "outputTensor",
+                            "quantization": {
+                                "min": [ 0.0 ],
+                                "max": [ 255.0 ],
+                                "scale": [ 1.0 ],
+                                "zero_point": [ 0 ],
+                            }
+                        },
+                        {
+                            "shape": )" + axisShape + R"( ,
+                            "type": "INT32",
+                            "buffer": 2,
+                            "name": "axis",
+                            "quantization": {
+                                "min": [ 0.0 ],
+                                "max": [ 255.0 ],
+                                "scale": [ 1.0 ],
+                                "zero_point": [ 0 ],
+                            }
+                        }
+                    ],
+                    "inputs": [ 0 ],
+                    "outputs": [ 1 ],
+                    "operators": [
+                        {
+                            "opcode_index": 0,
+                            "inputs": [ 0 , 2 ],
+                            "outputs": [ 1 ],
+                            "builtin_options_type": "ReducerOptions",
+                            "builtin_options": {
+                              "keep_dims": true,
+                            },
+                            "custom_options_format": "FLEXBUFFERS"
+                        }
+                    ],
+                } ],
+                "buffers" : [
+                    { },
+                    { },
+                    { "data": )" + axisData + R"(, },
+                ]
+            }
+        )";
+        SetupSingleInputSingleOutput("inputTensor", "outputTensor");
+    }
+};
+
+struct SimpleProdFixture : public ProdFixture
+{
+    SimpleProdFixture() : ProdFixture("[ 1, 3, 2, 4 ]", "[ 1, 1, 1, 4 ]", "[ 2 ]", "[ 1, 0, 0, 0,  2, 0, 0, 0 ]") {}
+};
+
+TEST_CASE_FIXTURE(SimpleProdFixture, "ParseProd")
+{
+    RunTest<4, armnn::DataType::Float32, armnn::DataType::Float32>
+        (0, {{ "inputTensor", { 1.0f,   2.0f,   3.0f,   4.0f,
+                                5.0f,   6.0f,   7.0f,   8.0f,
+
+                                10.0f,  20.0f,  30.0f,  40.0f,
+                                50.0f,  60.0f,  70.0f,  80.0f,
+
+                                11.0f, 22.0f, 33.0f, 44.0f,
+                                55.0f, 66.0f, 77.0f, 88.0f  } } },
+            {{ "outputTensor", { 1512500.f,  20908800.f, 112058100.f, 396492800.f } } });
+}
+
+}
diff --git a/src/backends/aclCommon/ArmComputeUtils.hpp b/src/backends/aclCommon/ArmComputeUtils.hpp
index 624ce5d..059518a 100644
--- a/src/backends/aclCommon/ArmComputeUtils.hpp
+++ b/src/backends/aclCommon/ArmComputeUtils.hpp
@@ -272,7 +272,8 @@
         case ReduceOperation::Mean:   return arm_compute::ReductionOperation::MEAN_SUM;
         case ReduceOperation::Max:    return arm_compute::ReductionOperation::MAX;
         case ReduceOperation::Min:    return arm_compute::ReductionOperation::MIN;
-        default:                         throw InvalidArgumentException("Unsupported Reduction operation");
+        case ReduceOperation::Prod:   return arm_compute::ReductionOperation::PROD;
+        default:                      throw InvalidArgumentException("Unsupported Reduction operation");
     }
 }
 
diff --git a/src/backends/backendsCommon/common.mk b/src/backends/backendsCommon/common.mk
index 5d33947..47ceffe 100644
--- a/src/backends/backendsCommon/common.mk
+++ b/src/backends/backendsCommon/common.mk
@@ -78,6 +78,7 @@
     test/layerTests/Pooling2dTestImpl.cpp \
     test/layerTests/RankTestImpl.cpp \
     test/layerTests/ReductionTestImpl.cpp \
+    test/layerTests/ReduceProdTestImpl.cpp \
     test/layerTests/ReduceSumTestImpl.cpp \
     test/layerTests/ReshapeTestImpl.cpp \
     test/layerTests/ResizeTestImpl.cpp \
diff --git a/src/backends/backendsCommon/test/CMakeLists.txt b/src/backends/backendsCommon/test/CMakeLists.txt
index 4561fd7..c9bc5e7 100644
--- a/src/backends/backendsCommon/test/CMakeLists.txt
+++ b/src/backends/backendsCommon/test/CMakeLists.txt
@@ -139,6 +139,8 @@
     layerTests/RankTestImpl.hpp
     layerTests/ReductionTestImpl.cpp
     layerTests/ReductionTestImpl.hpp
+    layerTests/ReduceProdTestImpl.cpp
+    layerTests/ReduceProdTestImpl.hpp
     layerTests/ReduceSumTestImpl.cpp
     layerTests/ReduceSumTestImpl.hpp
     layerTests/ReshapeTestImpl.cpp
diff --git a/src/backends/backendsCommon/test/LayerTests.hpp b/src/backends/backendsCommon/test/LayerTests.hpp
index fcb1f71..0690637 100644
--- a/src/backends/backendsCommon/test/LayerTests.hpp
+++ b/src/backends/backendsCommon/test/LayerTests.hpp
@@ -51,6 +51,7 @@
 #include <backendsCommon/test/layerTests/QuantizeTestImpl.hpp>
 #include <backendsCommon/test/layerTests/RankTestImpl.hpp>
 #include <backendsCommon/test/layerTests/ReductionTestImpl.hpp>
+#include <backendsCommon/test/layerTests/ReduceProdTestImpl.hpp>
 #include <backendsCommon/test/layerTests/ReduceSumTestImpl.hpp>
 #include <backendsCommon/test/layerTests/ReshapeTestImpl.hpp>
 #include <backendsCommon/test/layerTests/ResizeTestImpl.hpp>
diff --git a/src/backends/backendsCommon/test/layerTests/ReduceProdTestImpl.cpp b/src/backends/backendsCommon/test/layerTests/ReduceProdTestImpl.cpp
new file mode 100644
index 0000000..4fb0732
--- /dev/null
+++ b/src/backends/backendsCommon/test/layerTests/ReduceProdTestImpl.cpp
@@ -0,0 +1,345 @@
+//
+// Copyright © 2021 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "ReduceProdTestImpl.hpp"
+
+#include <backendsCommon/test/DataTypeUtils.hpp>
+#include <backendsCommon/test/TensorCopyUtils.hpp>
+#include <backendsCommon/test/WorkloadTestUtils.hpp>
+
+#include <test/TensorHelpers.hpp>
+
+namespace
+{
+
+template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
+LayerTestResult<float, 4> ReduceTestCommon(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+        const armnn::ITensorHandleFactory& tensorHandleFactory,
+        const armnn::TensorInfo inputTensorInfo,
+        const armnn::TensorInfo outputTensorInfo,
+        const std::vector<float>& inputData,
+        const std::vector<float>& outputData,
+        const std::vector<int32_t> vAxis,
+        const armnn::ReduceOperation reduceOperation,
+        bool keepDims = false)
+{
+    IgnoreUnused(memoryManager);
+    auto inputTensor = ConvertToDataType<ArmnnType>(inputData, inputTensorInfo);
+
+    std::vector<float> actualOutput(outputTensorInfo.GetNumElements());
+
+    std::unique_ptr<armnn::ITensorHandle> inputHandle = tensorHandleFactory.CreateTensorHandle(inputTensorInfo);
+    std::unique_ptr<armnn::ITensorHandle> outputHandle = tensorHandleFactory.CreateTensorHandle(outputTensorInfo);
+
+    armnn::ReduceQueueDescriptor descriptor;
+    std::vector<uint32_t> updated_idx;
+    uint32_t resolvedAxis = 0;
+    for (uint32_t i = 0; i < vAxis.size(); ++i)
+    {
+        if (vAxis[i] <  0)
+        {
+            resolvedAxis = inputTensorInfo.GetNumDimensions() + static_cast<uint32_t>(vAxis[i]);
+        } else
+        {
+            resolvedAxis = static_cast<uint32_t>(vAxis[i]);
+        }
+
+        updated_idx.push_back(resolvedAxis);
+    }
+
+    descriptor.m_Parameters.m_vAxis = updated_idx;
+    descriptor.m_Parameters.m_ReduceOperation = reduceOperation;
+    descriptor.m_Parameters.m_KeepDims = keepDims;
+    armnn::WorkloadInfo info;
+
+    AddInputToWorkload(descriptor, info, inputTensorInfo, inputHandle.get());
+    AddOutputToWorkload(descriptor, info, outputTensorInfo, outputHandle.get());
+
+    std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateReduce(descriptor, info);
+
+    inputHandle->Allocate();
+    outputHandle->Allocate();
+
+    CopyDataToITensorHandle(inputHandle.get(), inputTensor.data());
+
+    workload->Execute();
+
+    CopyDataFromITensorHandle(actualOutput.data(), outputHandle.get());
+
+    return LayerTestResult<float, 4>(actualOutput,
+                                     outputData,
+                                     outputHandle->GetShape(),
+                                     outputTensorInfo.GetShape());
+}
+
+} // namespace
+
+template<armnn::DataType ArmnnType, typename T>
+LayerTestResult<float, 4> ReduceProdSimpleTest(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+        const armnn::ITensorHandleFactory& tensorHandleFactory)
+{
+    const armnn::TensorShape inputShape{ 1, 1, 1, 5 };
+    const armnn::TensorShape outputShape{ 1, 1, 1, 1 };
+
+    armnn::TensorInfo inputTensorInfo(inputShape, ArmnnType);
+
+    if (armnn::IsQuantizedType<T>())
+    {
+        inputTensorInfo.SetQuantizationScale(1.0f);
+        inputTensorInfo.SetQuantizationOffset(0);
+    }
+
+    armnn::TensorInfo outputTensorInfo(outputShape, armnn::DataType::Float32);
+
+    std::vector<float> inputValues({ 5.0f, 2.0f, 8.0f, 10.0f, 9.0f });
+    std::vector<float> outputValues({ 7200.0f });
+
+    return ReduceTestCommon<ArmnnType>(workloadFactory,
+                                       memoryManager,
+                                       tensorHandleFactory,
+                                       inputTensorInfo,
+                                       outputTensorInfo,
+                                       inputValues,
+                                       outputValues,
+                                       { -1 },
+                                       armnn::ReduceOperation::Prod);
+}
+
+template<armnn::DataType ArmnnType, typename T>
+LayerTestResult<float, 4> ReduceProdSingleAxisTest1(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+        const armnn::ITensorHandleFactory& tensorHandleFactory)
+{
+    const armnn::TensorShape inputShape{ 1, 3, 2, 4 };
+    const armnn::TensorShape outputShape{ 1, 1, 2, 4 };
+
+    armnn::TensorInfo inputTensorInfo(inputShape, ArmnnType);
+
+    if (armnn::IsQuantizedType<T>())
+    {
+        inputTensorInfo.SetQuantizationScale(1.0f);
+        inputTensorInfo.SetQuantizationOffset(0);
+    }
+
+    armnn::TensorInfo outputTensorInfo(outputShape, armnn::DataType::Float32);
+
+    std::vector<float> inputValues({   1.0f,    2.0f,      3.0f,     4.0f,      5.0f,    6.0f,      7.0f,      8.0f,
+                                      10.0f,   20.0f,     30.0f,    40.0f,     50.0f,   60.0f,     70.0f,     80.0f,
+                                     100.0f,  200.0f,   300.0f,   400.0f,    500.0f,  600.0f,     700.0f,    800.0f
+                                   });
+    std::vector<float> outputValues({ 1000.0f, 8000.0f, 27000.0f, 64000.0f, 125000.0f, 216000.0f, 343000.0f, 512000.0f
+                                     });
+
+    return ReduceTestCommon<ArmnnType>(workloadFactory,
+                                       memoryManager,
+                                       tensorHandleFactory,
+                                       inputTensorInfo,
+                                       outputTensorInfo,
+                                       inputValues,
+                                       outputValues,
+                                       { 1 },
+                                       armnn::ReduceOperation::Prod);
+}
+
+template<armnn::DataType ArmnnType, typename T>
+LayerTestResult<float, 4> ReduceProdSingleAxisTest2(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+        const armnn::ITensorHandleFactory& tensorHandleFactory)
+{
+    const armnn::TensorShape inputShape{ 1, 6, 3, 4 };
+    const armnn::TensorShape outputShape{ 1, 1, 3, 4};
+
+    armnn::TensorInfo inputTensorInfo(inputShape, ArmnnType);
+
+    if (armnn::IsQuantizedType<T>())
+    {
+        inputTensorInfo.SetQuantizationScale(1.0f);
+        inputTensorInfo.SetQuantizationOffset(0);
+    }
+
+    armnn::TensorInfo outputTensorInfo(outputShape, armnn::DataType::Float32);
+
+    std::vector<float> inputValues( {7, 8, 6, 1,
+                                     1, 1, 8, 7,
+                                     3, 7, 7, 7,
+
+                                     6, 8, 4, 7,
+                                     3, 8, 7, 3,
+                                     5, 8, 8, 8,
+
+
+                                     7, 8, 2, 7,
+                                     3, 8, 5, 6,
+                                     8, 4, 2, 7,
+
+                                     1, 6, 7, 2,
+                                     8, 3, 3, 1,
+                                     7, 6, 2, 6,
+
+
+                                     5, 3, 4, 8,
+                                     7, 8, 2, 4,
+                                     6, 6, 2, 8,
+
+                                     2, 2, 7, 2,
+                                     5, 3, 6, 3,
+                                     6, 1, 8, 8});
+    std::vector<float> outputValues({  2940.f,  18432.f,   9408.f,   1568.f,
+                                       2520.f,   4608.f,  10080.f,   1512.f,
+                                      30240.f,   8064.f,   3584.f, 150528.f });
+
+    return ReduceTestCommon<ArmnnType>(workloadFactory,
+                                       memoryManager,
+                                       tensorHandleFactory,
+                                       inputTensorInfo,
+                                       outputTensorInfo,
+                                       inputValues,
+                                       outputValues,
+                                       { 1 },
+                                       armnn::ReduceOperation::Prod);
+}
+
+template<armnn::DataType ArmnnType, typename T>
+LayerTestResult<float, 4> ReduceProdSingleAxisTest3(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+        const armnn::ITensorHandleFactory& tensorHandleFactory)
+{
+    const armnn::TensorShape inputShape{ 1, 6, 3, 4 };
+    const armnn::TensorShape outputShape{ 1, 6, 3, 1 };
+
+    armnn::TensorInfo inputTensorInfo(inputShape, ArmnnType);
+
+    if (armnn::IsQuantizedType<T>())
+    {
+        inputTensorInfo.SetQuantizationScale(1.0f);
+        inputTensorInfo.SetQuantizationOffset(0);
+    }
+
+    armnn::TensorInfo outputTensorInfo(outputShape, armnn::DataType::Float32);
+
+    std::vector<float> inputValues({ 7, 8, 6, 1,
+                                     1, 1, 8, 7,
+                                     3, 7, 7, 7,
+
+                                     6, 8, 4, 7,
+                                     3, 8, 7, 3,
+                                     5, 8, 8, 8,
+
+
+                                     7, 8, 2, 7,
+                                     3, 8, 5, 6,
+                                     8, 4, 2, 7,
+
+                                     1, 6, 7, 2,
+                                     8, 3, 3, 1,
+                                     7, 6, 2, 6,
+
+
+                                     5, 3, 4, 8,
+                                     7, 8, 2, 4,
+                                     6, 6, 2, 8,
+
+                                     2, 2, 7, 2,
+                                     5, 3, 6, 3,
+                                     6, 1, 8, 8 });
+    std::vector<float> outputValues({ 336.f,  56.f, 1029.f,
+                                     1344.f, 504.f, 2560.f,
+
+                                      784.f, 720.f,  448.f,
+                                       84.f,  72.f,  504.f,
+
+                                      480.f, 448.f,  576.f,
+                                       56.f, 270.f,  384.f  });
+
+    return ReduceTestCommon<ArmnnType>(workloadFactory,
+                                       memoryManager,
+                                       tensorHandleFactory,
+                                       inputTensorInfo,
+                                       outputTensorInfo,
+                                       inputValues,
+                                       outputValues,
+                                       { 3 },
+                                       armnn::ReduceOperation::Prod,
+                                       true);
+}
+
+template<armnn::DataType ArmnnType, typename T>
+LayerTestResult<float, 4> ReduceProdMultipleAxisTest(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+        const armnn::ITensorHandleFactory& tensorHandleFactory)
+{
+    const armnn::TensorShape inputShape{ 1, 3, 2, 4 };
+    const armnn::TensorShape outputShape{ 1, 1, 1, 4 };
+
+    armnn::TensorInfo inputTensorInfo(inputShape, ArmnnType);
+
+    if (armnn::IsQuantizedType<T>())
+    {
+        inputTensorInfo.SetQuantizationScale(1.0f);
+        inputTensorInfo.SetQuantizationOffset(0);
+    }
+
+    armnn::TensorInfo outputTensorInfo(outputShape, armnn::DataType::Float32);
+
+    std::vector<float> inputValues({  1.0f,   2.0f,   3.0f,   4.0f,
+                                      5.0f,   6.0f,   7.0f,   8.0f,
+
+                                     10.0f,  20.0f,  30.0f,  40.0f,
+                                     50.0f,  60.0f,  70.0f,  80.0f,
+
+                                     11.0f, 22.0f, 33.0f, 44.0f,
+                                     55.0f, 66.0f, 77.0f, 88.0f });
+    std::vector<float> outputValues({ 1512500.f,  20908800.f, 112058100.f, 396492800.f });
+
+    return ReduceTestCommon<ArmnnType>(workloadFactory,
+                                       memoryManager,
+                                       tensorHandleFactory,
+                                       inputTensorInfo,
+                                       outputTensorInfo,
+                                       inputValues,
+                                       outputValues,
+                                       { 1, 2 },
+                                       armnn::ReduceOperation::Prod);
+}
+
+// Explicit template specializations
+
+template LayerTestResult<float, 4>
+ReduceProdSimpleTest<armnn::DataType::Float32>(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+        const armnn::ITensorHandleFactory& tensorHandleFactory);
+
+template LayerTestResult<float, 4>
+ReduceProdSingleAxisTest1<armnn::DataType::Float32>(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+        const armnn::ITensorHandleFactory& tensorHandleFactory);
+
+template LayerTestResult<float, 4>
+ReduceProdSingleAxisTest2<armnn::DataType::Float32>(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+        const armnn::ITensorHandleFactory& tensorHandleFactory);
+
+template LayerTestResult<float, 4>
+ReduceProdSingleAxisTest3<armnn::DataType::Float32>(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+        const armnn::ITensorHandleFactory& tensorHandleFactory);
+
+template LayerTestResult<float, 4>
+ReduceProdMultipleAxisTest<armnn::DataType::Float32>(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+        const armnn::ITensorHandleFactory& tensorHandleFactory);
diff --git a/src/backends/backendsCommon/test/layerTests/ReduceProdTestImpl.hpp b/src/backends/backendsCommon/test/layerTests/ReduceProdTestImpl.hpp
new file mode 100644
index 0000000..97e9497
--- /dev/null
+++ b/src/backends/backendsCommon/test/layerTests/ReduceProdTestImpl.hpp
@@ -0,0 +1,43 @@
+//
+// Copyright © 2021 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include "LayerTestResult.hpp"
+
+#include <ResolveType.hpp>
+
+#include <armnn/backends/IBackendInternal.hpp>
+#include <backendsCommon/WorkloadFactory.hpp>
+
+template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
+LayerTestResult<float, 4> ReduceProdSimpleTest(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+        const armnn::ITensorHandleFactory& tensorHandleFactory);
+
+template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
+LayerTestResult<float, 4> ReduceProdSingleAxisTest1(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+        const armnn::ITensorHandleFactory& tensorHandleFactory);
+
+template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
+LayerTestResult<float, 4> ReduceProdSingleAxisTest2(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+        const armnn::ITensorHandleFactory& tensorHandleFactory);
+
+template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
+LayerTestResult<float, 4> ReduceProdSingleAxisTest3(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+        const armnn::ITensorHandleFactory& tensorHandleFactory);
+
+template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
+LayerTestResult<float, 4> ReduceProdMultipleAxisTest(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+        const armnn::ITensorHandleFactory& tensorHandleFactory);
diff --git a/src/backends/cl/test/ClLayerTests.cpp b/src/backends/cl/test/ClLayerTests.cpp
index f5b26d3..2bb63b5 100644
--- a/src/backends/cl/test/ClLayerTests.cpp
+++ b/src/backends/cl/test/ClLayerTests.cpp
@@ -1894,6 +1894,15 @@
 ARMNN_AUTO_TEST_FIXTURE_WITH_THF(
     ReduceSumSingleAxisFloat32_3, ClContextControlFixture, ReduceSumSingleAxisTest3<DataType::Float32>)
 
+// ReduceProd
+ARMNN_AUTO_TEST_FIXTURE_WITH_THF(ReduceProdFloat32, ClContextControlFixture, ReduceProdSimpleTest<DataType::Float32>)
+ARMNN_AUTO_TEST_FIXTURE_WITH_THF(
+    ReduceProdSingleAxisFloat32_1, ClContextControlFixture, ReduceProdSingleAxisTest1<DataType::Float32>)
+ARMNN_AUTO_TEST_FIXTURE_WITH_THF(
+    ReduceProdSingleAxisFloat32_2, ClContextControlFixture, ReduceProdSingleAxisTest2<DataType::Float32>)
+ARMNN_AUTO_TEST_FIXTURE_WITH_THF(
+    ReduceProdSingleAxisFloat32_3, ClContextControlFixture, ReduceProdSingleAxisTest3<DataType::Float32>)
+
 // ReduceMax
 ARMNN_AUTO_TEST_FIXTURE_WITH_THF(ReduceMaxFloat32, ClContextControlFixture, ReduceMaxSimpleTest<DataType::Float32>)
 ARMNN_AUTO_TEST_FIXTURE_WITH_THF(
diff --git a/src/backends/neon/test/NeonLayerTests.cpp b/src/backends/neon/test/NeonLayerTests.cpp
index 6985776..75f9648 100644
--- a/src/backends/neon/test/NeonLayerTests.cpp
+++ b/src/backends/neon/test/NeonLayerTests.cpp
@@ -1399,6 +1399,10 @@
 // Moved to NeonLayerTests_NDK_Bug.cpp
 //ARMNN_AUTO_TEST_CASE_WITH_THF(ReduceSumSingleAxisFloat32_3, ReduceSumSingleAxisTest3<DataType::Float32>)
 
+// ReduceProd
+ARMNN_AUTO_TEST_CASE_WITH_THF(ReduceProdSingleAxisFloat32_1, ReduceProdSingleAxisTest1<DataType::Float32>)
+ARMNN_AUTO_TEST_CASE_WITH_THF(ReduceProdSingleAxisFloat32_2, ReduceProdSingleAxisTest2<DataType::Float32>)
+
 // ReduceMax
 ARMNN_AUTO_TEST_CASE_WITH_THF(ReduceMaxFloat32, ReduceMaxSimpleTest<DataType::Float32>)
 ARMNN_AUTO_TEST_CASE_WITH_THF(ReduceMaxNegativeAxisFloat32, ReduceMaxNegativeAxisTest<DataType::Float32>)
diff --git a/src/backends/reference/test/RefLayerTests.cpp b/src/backends/reference/test/RefLayerTests.cpp
index e906b29..18490e2 100644
--- a/src/backends/reference/test/RefLayerTests.cpp
+++ b/src/backends/reference/test/RefLayerTests.cpp
@@ -2321,6 +2321,13 @@
 ARMNN_AUTO_TEST_CASE_WITH_THF(ReduceSumSingleAxisFloat32_3, ReduceSumSingleAxisTest3<DataType::Float32>)
 ARMNN_AUTO_TEST_CASE_WITH_THF(ReduceSumMultipleAxisFloat32, ReduceSumMultipleAxisTest<DataType::Float32>)
 
+// ReduceProd
+ARMNN_AUTO_TEST_CASE_WITH_THF(ReduceProdFloat32, ReduceProdSimpleTest<DataType::Float32>)
+ARMNN_AUTO_TEST_CASE_WITH_THF(ReduceProdSingleAxisFloat32_1, ReduceProdSingleAxisTest1<DataType::Float32>)
+ARMNN_AUTO_TEST_CASE_WITH_THF(ReduceProdSingleAxisFloat32_2, ReduceProdSingleAxisTest2<DataType::Float32>)
+ARMNN_AUTO_TEST_CASE_WITH_THF(ReduceProdSingleAxisFloat32_3, ReduceProdSingleAxisTest3<DataType::Float32>)
+ARMNN_AUTO_TEST_CASE_WITH_THF(ReduceProdMultipleAxisFloat32, ReduceProdMultipleAxisTest<DataType::Float32>)
+
 // ReduceMax
 ARMNN_AUTO_TEST_CASE_WITH_THF(ReduceMaxFloat32, ReduceMaxSimpleTest<DataType::Float32>)
 ARMNN_AUTO_TEST_CASE_WITH_THF(ReduceMaxNegativeAxisFloat32, ReduceMaxNegativeAxisTest<DataType::Float32>)
diff --git a/src/backends/reference/workloads/Reduce.cpp b/src/backends/reference/workloads/Reduce.cpp
index 8bf422a..3f929c4 100644
--- a/src/backends/reference/workloads/Reduce.cpp
+++ b/src/backends/reference/workloads/Reduce.cpp
@@ -9,7 +9,6 @@
 
 #include <backendsCommon/WorkloadData.hpp>
 
-#include <cmath>
 #include <cstddef>
 #include <functional>
 #include <limits>
@@ -87,6 +86,9 @@
         case ReduceOperation::Sum:
             std::fill(tempOut.begin(), tempOut.end(), 0.0f);
             break;
+        case ReduceOperation::Prod:
+            std::fill(tempOut.begin(), tempOut.end(), 1.0f);
+            break;
         case ReduceOperation::Max:
             std::fill(tempOut.begin(), tempOut.end(), -1 * std::numeric_limits<float>::max());
             break;
@@ -119,23 +121,30 @@
                                                         numResolvedAxis, resolvedAxis);
         input[inputOffset];
         auto inputValue = input.Get();
-        if (reduceOperation == ReduceOperation::Max)
+        switch(reduceOperation)
         {
-            if (inputValue > tempOut[outputOffset])
-            {
-                tempOut[outputOffset] = inputValue;
-            }
-        }
-        else if (reduceOperation == ReduceOperation::Min)
-        {
-            if (inputValue < tempOut[outputOffset])
-            {
-                tempOut[outputOffset] = inputValue;
-            }
-        }
-        else
-        {
-            tempOut[outputOffset] += inputValue;
+            case ReduceOperation::Mean:
+            case ReduceOperation::Sum:
+                tempOut[outputOffset] += inputValue;
+                break;
+            case ReduceOperation::Prod:
+                tempOut[outputOffset] *= inputValue;
+                break;
+            case ReduceOperation::Max:
+                if (inputValue > tempOut[outputOffset])
+                {
+                    tempOut[outputOffset] = inputValue;
+                }
+                break;
+            case ReduceOperation::Min:
+                if (inputValue < tempOut[outputOffset])
+                {
+                    tempOut[outputOffset] = inputValue;
+                }
+                break;
+            default:
+                throw armnn::InvalidArgumentException("Unknown reduce method: " +
+                    std::to_string(static_cast<int>(reduceOperation)));
         }
     }