IVGCVSW-2087 Reference implementation and unit tests for StridedSlice

Change-Id: Ifeacc0adb4547c72537b9ea7a61bf3c4ec3673fa
diff --git a/src/armnn/layers/StridedSliceLayer.cpp b/src/armnn/layers/StridedSliceLayer.cpp
index a3dca25..4ad9b89 100644
--- a/src/armnn/layers/StridedSliceLayer.cpp
+++ b/src/armnn/layers/StridedSliceLayer.cpp
@@ -65,7 +65,7 @@
         int newSize = stride > 0 ? ((stop - start) + stride - 1) / stride :
                                    ((start - stop) - stride - 1) / -stride;
 
-        newSize = std::min(0, newSize);
+        newSize = std::max(0, newSize);
 
         outputShape.push_back(boost::numeric_cast<unsigned int>(newSize));
     }
diff --git a/src/backends/backendsCommon/test/CMakeLists.txt b/src/backends/backendsCommon/test/CMakeLists.txt
index 485241f..f1a4289 100644
--- a/src/backends/backendsCommon/test/CMakeLists.txt
+++ b/src/backends/backendsCommon/test/CMakeLists.txt
@@ -32,6 +32,7 @@
     RuntimeTestImpl.hpp
     SoftmaxTestImpl.hpp
     SplitterTestImpl.hpp
+    StridedSliceTestImpl.hpp
     TensorCopyUtils.cpp
     TensorCopyUtils.hpp
     WorkloadDataValidation.cpp
@@ -42,4 +43,4 @@
 add_library(armnnBackendsCommonUnitTests OBJECT ${armnnBackendsCommonUnitTests_sources})
 target_include_directories(armnnBackendsCommonUnitTests PRIVATE ${PROJECT_SOURCE_DIR}/src/armnn)
 target_include_directories(armnnBackendsCommonUnitTests PRIVATE ${PROJECT_SOURCE_DIR}/src/armnnUtils)
-target_include_directories(armnnBackendsCommonUnitTests PRIVATE ${PROJECT_SOURCE_DIR}/src/backends)
\ No newline at end of file
+target_include_directories(armnnBackendsCommonUnitTests PRIVATE ${PROJECT_SOURCE_DIR}/src/backends)
diff --git a/src/backends/backendsCommon/test/LayerTests.cpp b/src/backends/backendsCommon/test/LayerTests.cpp
index f10d14e..648bee6 100755
--- a/src/backends/backendsCommon/test/LayerTests.cpp
+++ b/src/backends/backendsCommon/test/LayerTests.cpp
@@ -32,6 +32,7 @@
 #include "SpaceToBatchNdTestImpl.hpp"
 #include "SplitterTestImpl.hpp"
 #include "SoftmaxTestImpl.hpp"
+#include "StridedSliceTestImpl.hpp"
 #include "NormTestImpl.hpp"
 #include "PermuteTestImpl.hpp"
 #include "LstmTestImpl.hpp"
@@ -7297,3 +7298,129 @@
     return BatchToSpaceNdHelper<uint8_t, 4, 4>(workloadFactory, memoryManager, armnn::DataLayout::NHWC, inputShape,
                                                input, blockShape, crops, outputShape, expectedOutput);
 }
+
+LayerTestResult<float, 4> StridedSlice4DFloat32Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    return StridedSlice4DTest<float>(workloadFactory, memoryManager);
+}
+
+LayerTestResult<float, 4> StridedSlice4DReverseFloat32Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    return StridedSlice4DReverseTest<float>(workloadFactory, memoryManager);
+}
+
+LayerTestResult<float, 4> StridedSliceSimpleStrideFloat32Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    return StridedSliceSimpleStrideTest<float>(workloadFactory, memoryManager);
+}
+
+LayerTestResult<float, 4> StridedSliceSimpleRangeMaskFloat32Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    return StridedSliceSimpleRangeMaskTest<float>(workloadFactory, memoryManager);
+}
+
+LayerTestResult<float, 2> StridedSliceShrinkAxisMaskFloat32Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    return StridedSliceShrinkAxisMaskTest<float>(workloadFactory, memoryManager);
+}
+
+LayerTestResult<float, 3> StridedSlice3DFloat32Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    return StridedSlice3DTest<float>(workloadFactory, memoryManager);
+}
+
+LayerTestResult<float, 3> StridedSlice3DReverseFloat32Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    return StridedSlice3DReverseTest<float>(workloadFactory, memoryManager);
+}
+
+LayerTestResult<float, 2> StridedSlice2DFloat32Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    return StridedSlice2DTest<float>(workloadFactory, memoryManager);
+}
+
+LayerTestResult<float, 2> StridedSlice2DReverseFloat32Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    return StridedSlice2DReverseTest<float>(workloadFactory, memoryManager);
+}
+
+LayerTestResult<uint8_t, 4> StridedSlice4DUint8Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    return StridedSlice4DTest<uint8_t>(workloadFactory, memoryManager);
+}
+
+LayerTestResult<uint8_t, 4> StridedSlice4DReverseUint8Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    return StridedSlice4DReverseTest<uint8_t>(workloadFactory, memoryManager);
+}
+
+LayerTestResult<uint8_t, 4> StridedSliceSimpleStrideUint8Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    return StridedSliceSimpleStrideTest<uint8_t>(workloadFactory, memoryManager);
+}
+
+LayerTestResult<uint8_t, 4> StridedSliceSimpleRangeMaskUint8Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    return StridedSliceSimpleRangeMaskTest<uint8_t>(workloadFactory, memoryManager);
+}
+
+LayerTestResult<uint8_t, 2> StridedSliceShrinkAxisMaskUint8Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    return StridedSliceShrinkAxisMaskTest<uint8_t>(workloadFactory, memoryManager);
+}
+
+LayerTestResult<uint8_t, 3> StridedSlice3DUint8Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    return StridedSlice3DTest<uint8_t>(workloadFactory, memoryManager);
+}
+
+LayerTestResult<uint8_t, 3> StridedSlice3DReverseUint8Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    return StridedSlice3DReverseTest<uint8_t>(workloadFactory, memoryManager);
+}
+
+LayerTestResult<uint8_t, 2> StridedSlice2DUint8Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    return StridedSlice2DTest<uint8_t>(workloadFactory, memoryManager);
+}
+
+LayerTestResult<uint8_t, 2> StridedSlice2DReverseUint8Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    return StridedSlice2DReverseTest<uint8_t>(workloadFactory, memoryManager);
+}
diff --git a/src/backends/backendsCommon/test/LayerTests.hpp b/src/backends/backendsCommon/test/LayerTests.hpp
index 498cfb7..f851b36 100644
--- a/src/backends/backendsCommon/test/LayerTests.hpp
+++ b/src/backends/backendsCommon/test/LayerTests.hpp
@@ -1071,3 +1071,75 @@
 LayerTestResult<uint8_t, 4> BatchToSpaceNdNhwcUintTest1(
     armnn::IWorkloadFactory &workloadFactory,
     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+LayerTestResult<float, 4> StridedSlice4DFloat32Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+LayerTestResult<float, 4> StridedSlice4DReverseFloat32Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+LayerTestResult<float, 4> StridedSliceSimpleStrideFloat32Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+LayerTestResult<float, 4> StridedSliceSimpleRangeMaskFloat32Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+LayerTestResult<float, 2> StridedSliceShrinkAxisMaskFloat32Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+LayerTestResult<float, 3> StridedSlice3DFloat32Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+LayerTestResult<float, 3> StridedSlice3DReverseFloat32Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+LayerTestResult<float, 2> StridedSlice2DFloat32Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+LayerTestResult<float, 2> StridedSlice2DReverseFloat32Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+LayerTestResult<uint8_t, 4> StridedSlice4DUint8Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+LayerTestResult<uint8_t, 4> StridedSlice4DReverseUint8Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+LayerTestResult<uint8_t, 4> StridedSliceSimpleStrideUint8Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+LayerTestResult<uint8_t, 4> StridedSliceSimpleRangeMaskUint8Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+LayerTestResult<uint8_t, 2> StridedSliceShrinkAxisMaskUint8Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+LayerTestResult<uint8_t, 3> StridedSlice3DUint8Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+LayerTestResult<uint8_t, 3> StridedSlice3DReverseUint8Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+LayerTestResult<uint8_t, 2> StridedSlice2DUint8Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+LayerTestResult<uint8_t, 2> StridedSlice2DReverseUint8Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
diff --git a/src/backends/backendsCommon/test/StridedSliceTestImpl.hpp b/src/backends/backendsCommon/test/StridedSliceTestImpl.hpp
new file mode 100644
index 0000000..1633151
--- /dev/null
+++ b/src/backends/backendsCommon/test/StridedSliceTestImpl.hpp
@@ -0,0 +1,429 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+#pragma once
+
+#include "WorkloadTestUtils.hpp"
+
+#include <armnn/ArmNN.hpp>
+#include <armnn/Tensor.hpp>
+#include <armnn/TypesUtils.hpp>
+
+#include <backendsCommon/CpuTensorHandle.hpp>
+#include <backendsCommon/IBackendInternal.hpp>
+#include <backendsCommon/WorkloadFactory.hpp>
+
+#include <test/TensorHelpers.hpp>
+
+namespace
+{
+
+template<typename T, std::size_t InDim, std::size_t OutDim>
+LayerTestResult<T, OutDim> StridedSliceTestImpl(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    armnn::TensorInfo& inputTensorInfo,
+    armnn::TensorInfo& outputTensorInfo,
+    std::vector<float>& inputData,
+    std::vector<float>& outputExpectedData,
+    armnn::StridedSliceQueueDescriptor descriptor,
+    const float qScale = 1.0f,
+    const int32_t qOffset = 0)
+{
+    if(armnn::IsQuantizedType<T>())
+    {
+        inputTensorInfo.SetQuantizationScale(qScale);
+        inputTensorInfo.SetQuantizationOffset(qOffset);
+
+        outputTensorInfo.SetQuantizationScale(qScale);
+        outputTensorInfo.SetQuantizationOffset(qOffset);
+    }
+
+    boost::multi_array<T, InDim> input =
+        MakeTensor<T, InDim>(inputTensorInfo, QuantizedVector<T>(qScale, qOffset, inputData));
+
+    LayerTestResult<T, OutDim> ret(outputTensorInfo);
+    ret.outputExpected =
+        MakeTensor<T, OutDim>(outputTensorInfo, QuantizedVector<T>(qScale, qOffset, outputExpectedData));
+
+    std::unique_ptr<armnn::ITensorHandle> inputHandle =
+        workloadFactory.CreateTensorHandle(inputTensorInfo);
+
+    std::unique_ptr<armnn::ITensorHandle> outputHandle =
+        workloadFactory.CreateTensorHandle(outputTensorInfo);
+
+    armnn::WorkloadInfo info;
+    AddInputToWorkload(descriptor, info, inputTensorInfo, inputHandle.get());
+    AddOutputToWorkload(descriptor, info, outputTensorInfo, outputHandle.get());
+
+    std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateStridedSlice(descriptor, info);
+
+    inputHandle->Allocate();
+    outputHandle->Allocate();
+
+    CopyDataToITensorHandle(inputHandle.get(), input.data());
+
+    ExecuteWorkload(*workload, memoryManager);
+
+    CopyDataFromITensorHandle(ret.output.data(), outputHandle.get());
+
+    return ret;
+}
+
+template <typename T>
+LayerTestResult<T, 4> StridedSlice4DTest(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    armnn::TensorInfo inputTensorInfo;
+    armnn::TensorInfo outputTensorInfo;
+
+    unsigned int inputShape[]  = {3, 2, 3, 1};
+    unsigned int outputShape[] = {1, 2, 3, 1};
+
+    armnn::StridedSliceQueueDescriptor desc;
+    desc.m_Parameters.m_Begin  = {1, 0, 0, 0};
+    desc.m_Parameters.m_End    = {2, 2, 3, 1};
+    desc.m_Parameters.m_Stride = {1, 1, 1, 1};
+
+    inputTensorInfo = armnn::TensorInfo(4, inputShape, armnn::GetDataType<T>());
+    outputTensorInfo = armnn::TensorInfo(4, outputShape, armnn::GetDataType<T>());
+
+    std::vector<float> input = std::vector<float>(
+    {
+        1.0f, 1.0f, 1.0f, 2.0f, 2.0f, 2.0f,
+
+        3.0f, 3.0f, 3.0f, 4.0f, 4.0f, 4.0f,
+
+        5.0f, 5.0f, 5.0f, 6.0f, 6.0f, 6.0f
+    });
+
+    std::vector<float> outputExpected = std::vector<float>(
+    {
+        3.0f, 3.0f, 3.0f, 4.0f, 4.0f, 4.0f
+    });
+
+    return StridedSliceTestImpl<T, 4, 4>(
+        workloadFactory, memoryManager, inputTensorInfo, outputTensorInfo, input, outputExpected, desc);
+}
+
+template <typename T>
+LayerTestResult<T, 4> StridedSlice4DReverseTest(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    armnn::TensorInfo inputTensorInfo;
+    armnn::TensorInfo outputTensorInfo;
+
+    unsigned int inputShape[]  = {3, 2, 3, 1};
+    unsigned int outputShape[] = {1, 2, 3, 1};
+
+    armnn::StridedSliceQueueDescriptor desc;
+    desc.m_Parameters.m_Begin  = {1, -1, 0, 0};
+    desc.m_Parameters.m_End    = {2, -3, 3, 1};
+    desc.m_Parameters.m_Stride = {1, -1, 1, 1};
+
+    inputTensorInfo = armnn::TensorInfo(4, inputShape, armnn::GetDataType<T>());
+    outputTensorInfo = armnn::TensorInfo(4, outputShape, armnn::GetDataType<T>());
+
+    std::vector<float> input = std::vector<float>(
+    {
+        1.0f, 1.0f, 1.0f, 2.0f, 2.0f, 2.0f,
+
+        3.0f, 3.0f, 3.0f, 4.0f, 4.0f, 4.0f,
+
+        5.0f, 5.0f, 5.0f, 6.0f, 6.0f, 6.0f
+    });
+
+    std::vector<float> outputExpected = std::vector<float>(
+    {
+        4.0f, 4.0f, 4.0f, 3.0f, 3.0f, 3.0f
+    });
+
+    return StridedSliceTestImpl<T, 4, 4>(
+        workloadFactory, memoryManager, inputTensorInfo, outputTensorInfo, input, outputExpected, desc);
+}
+
+template <typename T>
+LayerTestResult<T, 4> StridedSliceSimpleStrideTest(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    armnn::TensorInfo inputTensorInfo;
+    armnn::TensorInfo outputTensorInfo;
+
+    unsigned int inputShape[]  = {3, 2, 3, 1};
+    unsigned int outputShape[] = {2, 1, 2, 1};
+
+    armnn::StridedSliceQueueDescriptor desc;
+    desc.m_Parameters.m_Begin  = {0, 0, 0, 0};
+    desc.m_Parameters.m_End    = {3, 2, 3, 1};
+    desc.m_Parameters.m_Stride = {2, 2, 2, 1};
+
+    inputTensorInfo = armnn::TensorInfo(4, inputShape, armnn::GetDataType<T>());
+    outputTensorInfo = armnn::TensorInfo(4, outputShape, armnn::GetDataType<T>());
+
+    std::vector<float> input = std::vector<float>(
+    {
+        1.0f, 1.0f, 1.0f, 2.0f, 2.0f, 2.0f,
+
+        3.0f, 3.0f, 3.0f, 4.0f, 4.0f, 4.0f,
+
+        5.0f, 5.0f, 5.0f, 6.0f, 6.0f, 6.0f
+    });
+
+    std::vector<float> outputExpected = std::vector<float>(
+    {
+        1.0f, 1.0f,
+
+        5.0f, 5.0f
+    });
+
+    return StridedSliceTestImpl<T, 4, 4>(
+        workloadFactory, memoryManager, inputTensorInfo, outputTensorInfo, input, outputExpected, desc);
+}
+
+template <typename T>
+LayerTestResult<T, 4> StridedSliceSimpleRangeMaskTest(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    armnn::TensorInfo inputTensorInfo;
+    armnn::TensorInfo outputTensorInfo;
+
+    unsigned int inputShape[]  = {3, 2, 3, 1};
+    unsigned int outputShape[] = {3, 2, 3, 1};
+
+    armnn::StridedSliceQueueDescriptor desc;
+    desc.m_Parameters.m_Begin     = {1, 1, 1, 1};
+    desc.m_Parameters.m_End       = {1, 1, 1, 1};
+    desc.m_Parameters.m_Stride    = {1, 1, 1, 1};
+    desc.m_Parameters.m_BeginMask = (1 << 4) - 1;
+    desc.m_Parameters.m_EndMask   = (1 << 4) - 1;
+
+    inputTensorInfo = armnn::TensorInfo(4, inputShape, armnn::GetDataType<T>());
+    outputTensorInfo = armnn::TensorInfo(4, outputShape, armnn::GetDataType<T>());
+
+    std::vector<float> input = std::vector<float>(
+    {
+        1.0f, 1.0f, 1.0f, 2.0f, 2.0f, 2.0f,
+
+        3.0f, 3.0f, 3.0f, 4.0f, 4.0f, 4.0f,
+
+        5.0f, 5.0f, 5.0f, 6.0f, 6.0f, 6.0f
+    });
+
+    std::vector<float> outputExpected = std::vector<float>(
+    {
+        1.0f, 1.0f, 1.0f, 2.0f, 2.0f, 2.0f,
+
+        3.0f, 3.0f, 3.0f, 4.0f, 4.0f, 4.0f,
+
+        5.0f, 5.0f, 5.0f, 6.0f, 6.0f, 6.0f
+    });
+
+    return StridedSliceTestImpl<T, 4, 4>(
+        workloadFactory, memoryManager, inputTensorInfo, outputTensorInfo, input, outputExpected, desc);
+}
+
+template <typename T>
+LayerTestResult<T, 2> StridedSliceShrinkAxisMaskTest(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    armnn::TensorInfo inputTensorInfo;
+    armnn::TensorInfo outputTensorInfo;
+
+    unsigned int inputShape[]  = {3, 2, 3, 1};
+    unsigned int outputShape[] = {3, 1};
+
+    armnn::StridedSliceQueueDescriptor desc;
+    desc.m_Parameters.m_Begin          = {0, 0, 1, 0};
+    desc.m_Parameters.m_End            = {1, 1, 1, 1};
+    desc.m_Parameters.m_Stride         = {1, 1, 1, 1};
+    desc.m_Parameters.m_EndMask        = (1 << 4) - 1;
+    desc.m_Parameters.m_ShrinkAxisMask = (1 << 1) | (1 << 2);
+
+    inputTensorInfo = armnn::TensorInfo(4, inputShape, armnn::GetDataType<T>());
+    outputTensorInfo = armnn::TensorInfo(2, outputShape, armnn::GetDataType<T>());
+
+    std::vector<float> input = std::vector<float>(
+    {
+        1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f,
+
+        7.0f, 8.0f, 9.0f, 10.0f, 11.0f, 12.0f,
+
+        13.0f, 14.0f, 15.0f, 16.0f, 17.0f, 18.0f
+    });
+
+    std::vector<float> outputExpected = std::vector<float>(
+    {
+        2.0f, 8.0f, 14.0f
+    });
+
+    return StridedSliceTestImpl<T, 4, 2>(
+        workloadFactory, memoryManager, inputTensorInfo, outputTensorInfo, input, outputExpected, desc);
+}
+
+template <typename T>
+LayerTestResult<T, 3> StridedSlice3DTest(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    armnn::TensorInfo inputTensorInfo;
+    armnn::TensorInfo outputTensorInfo;
+
+    unsigned int inputShape[]  = {3, 3, 3};
+    unsigned int outputShape[] = {2, 2, 2};
+
+    armnn::StridedSliceQueueDescriptor desc;
+    desc.m_Parameters.m_Begin   = {0, 0, 0};
+    desc.m_Parameters.m_End     = {1, 1, 1};
+    desc.m_Parameters.m_Stride  = {2, 2, 2};
+    desc.m_Parameters.m_EndMask = (1 << 3) - 1;
+
+    inputTensorInfo = armnn::TensorInfo(3, inputShape, armnn::GetDataType<T>());
+    outputTensorInfo = armnn::TensorInfo(3, outputShape, armnn::GetDataType<T>());
+
+    std::vector<float> input = std::vector<float>(
+    {
+        1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f,
+
+        10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, 16.0f, 17.0f, 18.0f,
+
+        19.0f, 20.0f, 21.0f, 22.0f, 23.0f, 24.0f, 25.0f, 26.0f, 27.0f
+    });
+
+    std::vector<float> outputExpected = std::vector<float>(
+    {
+        1.0f, 3.0f, 7.0f, 9.0f,
+
+        19.0f, 21.0f, 25.0f, 27.0f
+    });
+
+    return StridedSliceTestImpl<T, 3, 3>(
+        workloadFactory, memoryManager, inputTensorInfo, outputTensorInfo, input, outputExpected, desc);
+}
+
+template <typename T>
+LayerTestResult<T, 3> StridedSlice3DReverseTest(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    armnn::TensorInfo inputTensorInfo;
+    armnn::TensorInfo outputTensorInfo;
+
+    unsigned int inputShape[]  = {3, 3, 3};
+    unsigned int outputShape[] = {2, 2, 2};
+
+    armnn::StridedSliceQueueDescriptor desc;
+    desc.m_Parameters.m_Begin  = {-1, -1, -1};
+    desc.m_Parameters.m_End    = {-4, -4, -4};
+    desc.m_Parameters.m_Stride = {-2, -2, -2};
+
+    inputTensorInfo = armnn::TensorInfo(3, inputShape, armnn::GetDataType<T>());
+    outputTensorInfo = armnn::TensorInfo(3, outputShape, armnn::GetDataType<T>());
+
+    std::vector<float> input = std::vector<float>(
+    {
+        1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f,
+
+        10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, 16.0f, 17.0f, 18.0f,
+
+        19.0f, 20.0f, 21.0f, 22.0f, 23.0f, 24.0f, 25.0f, 26.0f, 27.0f
+    });
+
+    std::vector<float> outputExpected = std::vector<float>(
+    {
+        27.0f, 25.0f, 21.0f, 19.0f,
+
+        9.0f, 7.0f, 3.0f, 1.0f
+    });
+
+    return StridedSliceTestImpl<T, 3, 3>(
+        workloadFactory, memoryManager, inputTensorInfo, outputTensorInfo, input, outputExpected, desc);
+}
+
+template <typename T>
+LayerTestResult<T, 2> StridedSlice2DTest(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    armnn::TensorInfo inputTensorInfo;
+    armnn::TensorInfo outputTensorInfo;
+
+    unsigned int inputShape[]  = {3, 3};
+    unsigned int outputShape[] = {2, 2};
+
+    armnn::StridedSliceQueueDescriptor desc;
+    desc.m_Parameters.m_Begin   = {0, 0};
+    desc.m_Parameters.m_End     = {1, 1};
+    desc.m_Parameters.m_Stride  = {2, 2};
+    desc.m_Parameters.m_EndMask = (1 << 2) - 1;
+
+    inputTensorInfo = armnn::TensorInfo(2, inputShape, armnn::GetDataType<T>());
+    outputTensorInfo = armnn::TensorInfo(2, outputShape, armnn::GetDataType<T>());
+
+    std::vector<float> input = std::vector<float>(
+    {
+        1.0f, 2.0f, 3.0f,
+
+        4.0f, 5.0f, 6.0f,
+
+        7.0f, 8.0f, 9.0f
+    });
+
+    std::vector<float> outputExpected = std::vector<float>(
+    {
+        1.0f, 3.0f,
+
+        7.0f, 9.0f
+    });
+
+    return StridedSliceTestImpl<T, 2, 2>(
+        workloadFactory, memoryManager, inputTensorInfo, outputTensorInfo, input, outputExpected, desc);
+}
+
+template <typename T>
+LayerTestResult<T, 2> StridedSlice2DReverseTest(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    armnn::TensorInfo inputTensorInfo;
+    armnn::TensorInfo outputTensorInfo;
+
+    unsigned int inputShape[]  = {3, 3};
+    unsigned int outputShape[] = {2, 2};
+
+    armnn::StridedSliceQueueDescriptor desc;
+    desc.m_Parameters.m_Begin     = {0, 0};
+    desc.m_Parameters.m_End       = {1, 1};
+    desc.m_Parameters.m_Stride    = {-2, -2};
+    desc.m_Parameters.m_BeginMask = (1 << 2) - 1;
+    desc.m_Parameters.m_EndMask   = (1 << 2) - 1;
+
+    inputTensorInfo = armnn::TensorInfo(2, inputShape, armnn::GetDataType<T>());
+    outputTensorInfo = armnn::TensorInfo(2, outputShape, armnn::GetDataType<T>());
+
+    std::vector<float> input = std::vector<float>(
+    {
+        1.0f, 2.0f, 3.0f,
+
+        4.0f, 5.0f, 6.0f,
+
+        7.0f, 8.0f, 9.0f
+    });
+
+    std::vector<float> outputExpected = std::vector<float>(
+    {
+        9.0f, 7.0f,
+
+        3.0f, 1.0f
+    });
+
+    return StridedSliceTestImpl<T, 2, 2>(
+        workloadFactory, memoryManager, inputTensorInfo, outputTensorInfo, input, outputExpected, desc);
+}
+
+} // anonymous namespace
diff --git a/src/backends/reference/RefLayerSupport.cpp b/src/backends/reference/RefLayerSupport.cpp
index 43a2fa2..00e4c5c 100644
--- a/src/backends/reference/RefLayerSupport.cpp
+++ b/src/backends/reference/RefLayerSupport.cpp
@@ -462,6 +462,19 @@
                                      &TrueFunc<>);
 }
 
+bool RefLayerSupport::IsStridedSliceSupported(const TensorInfo& input,
+                                              const TensorInfo& output,
+                                              const StridedSliceDescriptor& descriptor,
+                                              Optional<std::string&> reasonIfUnsupported) const
+{
+    ignore_unused(output);
+    ignore_unused(descriptor);
+    return IsSupportedForDataTypeRef(reasonIfUnsupported,
+                                     input.GetDataType(),
+                                     &TrueFunc<>,
+                                     &TrueFunc<>);
+}
+
 bool RefLayerSupport::IsSubtractionSupported(const TensorInfo& input0,
                                              const TensorInfo& input1,
                                              const TensorInfo& output,
diff --git a/src/backends/reference/RefLayerSupport.hpp b/src/backends/reference/RefLayerSupport.hpp
index a03c89c..defa962 100644
--- a/src/backends/reference/RefLayerSupport.hpp
+++ b/src/backends/reference/RefLayerSupport.hpp
@@ -174,6 +174,11 @@
                              const ViewsDescriptor& descriptor,
                              Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const override;
 
+    bool IsStridedSliceSupported(const TensorInfo& input,
+                                 const TensorInfo& output,
+                                 const StridedSliceDescriptor& descriptor,
+                                 Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const override;
+
     bool IsSubtractionSupported(const TensorInfo& input0,
                                 const TensorInfo& input1,
                                 const TensorInfo& output,
diff --git a/src/backends/reference/RefWorkloadFactory.cpp b/src/backends/reference/RefWorkloadFactory.cpp
index 6d51b3d..da8669c 100644
--- a/src/backends/reference/RefWorkloadFactory.cpp
+++ b/src/backends/reference/RefWorkloadFactory.cpp
@@ -279,7 +279,7 @@
 std::unique_ptr<IWorkload> RefWorkloadFactory::CreateStridedSlice(const StridedSliceQueueDescriptor& descriptor,
                                                                   const WorkloadInfo& info) const
 {
-    return MakeWorkload<NullWorkload, NullWorkload>(descriptor, info);
+    return MakeWorkload<RefStridedSliceFloat32Workload, RefStridedSliceUint8Workload>(descriptor, info);
 }
 
 } // namespace armnn
diff --git a/src/backends/reference/backend.mk b/src/backends/reference/backend.mk
index 7d56144..7162d4a 100644
--- a/src/backends/reference/backend.mk
+++ b/src/backends/reference/backend.mk
@@ -58,10 +58,12 @@
         workloads/RefSoftmaxFloat32Workload.cpp \
         workloads/RefSoftmaxUint8Workload.cpp \
         workloads/RefSpaceToBatchNdWorkload.cpp \
+        workloads/RefStridedSliceWorkload.cpp \
         workloads/RefSplitterFloat32Workload.cpp \
         workloads/RefSplitterUint8Workload.cpp \
         workloads/ResizeBilinear.cpp \
         workloads/SpaceToBatchNd.cpp \
+        workloads/StridedSlice.cpp \
         workloads/Softmax.cpp
 
 # BACKEND_TEST_SOURCES contains the list of files to be included
diff --git a/src/backends/reference/test/RefLayerTests.cpp b/src/backends/reference/test/RefLayerTests.cpp
index aba9f3e..b168499 100644
--- a/src/backends/reference/test/RefLayerTests.cpp
+++ b/src/backends/reference/test/RefLayerTests.cpp
@@ -412,4 +412,25 @@
 
 ARMNN_AUTO_TEST_CASE(BatchToSpaceNdNhwcUint1, BatchToSpaceNdNhwcUintTest1)
 
+// Strided Slice
+ARMNN_AUTO_TEST_CASE(StridedSlice4DFloat32, StridedSlice4DFloat32Test)
+ARMNN_AUTO_TEST_CASE(StridedSlice4DReverseFloat32, StridedSlice4DReverseFloat32Test)
+ARMNN_AUTO_TEST_CASE(StridedSliceSimpleStrideFloat32, StridedSliceSimpleStrideFloat32Test)
+ARMNN_AUTO_TEST_CASE(StridedSliceSimpleRangeMaskFloat32, StridedSliceSimpleRangeMaskFloat32Test)
+ARMNN_AUTO_TEST_CASE(StridedSliceShrinkAxisMaskFloat32, StridedSliceShrinkAxisMaskFloat32Test)
+ARMNN_AUTO_TEST_CASE(StridedSlice3DFloat32, StridedSlice3DFloat32Test)
+ARMNN_AUTO_TEST_CASE(StridedSlice3DReverseFloat32, StridedSlice3DReverseFloat32Test)
+ARMNN_AUTO_TEST_CASE(StridedSlice2DFloat32, StridedSlice2DFloat32Test)
+ARMNN_AUTO_TEST_CASE(StridedSlice2DReverseFloat32, StridedSlice2DReverseFloat32Test)
+
+ARMNN_AUTO_TEST_CASE(StridedSlice4DUint8, StridedSlice4DUint8Test)
+ARMNN_AUTO_TEST_CASE(StridedSlice4DReverseUint8, StridedSlice4DReverseUint8Test)
+ARMNN_AUTO_TEST_CASE(StridedSliceSimpleStrideUint8, StridedSliceSimpleStrideUint8Test)
+ARMNN_AUTO_TEST_CASE(StridedSliceSimpleRangeMaskUint8, StridedSliceSimpleRangeMaskUint8Test)
+ARMNN_AUTO_TEST_CASE(StridedSliceShrinkAxisMaskUint8, StridedSliceShrinkAxisMaskUint8Test)
+ARMNN_AUTO_TEST_CASE(StridedSlice3DUint8, StridedSlice3DUint8Test)
+ARMNN_AUTO_TEST_CASE(StridedSlice3DReverseUint8, StridedSlice3DReverseUint8Test)
+ARMNN_AUTO_TEST_CASE(StridedSlice2DUint8, StridedSlice2DUint8Test)
+ARMNN_AUTO_TEST_CASE(StridedSlice2DReverseUint8, StridedSlice2DReverseUint8Test)
+
 BOOST_AUTO_TEST_SUITE_END()
diff --git a/src/backends/reference/workloads/CMakeLists.txt b/src/backends/reference/workloads/CMakeLists.txt
index 1c38509..2d9ad92 100644
--- a/src/backends/reference/workloads/CMakeLists.txt
+++ b/src/backends/reference/workloads/CMakeLists.txt
@@ -98,6 +98,8 @@
     RefSplitterFloat32Workload.hpp
     RefSplitterUint8Workload.cpp
     RefSplitterUint8Workload.hpp
+    RefStridedSliceWorkload.cpp
+    RefStridedSliceWorkload.hpp
     RefWorkloads.hpp
     RefWorkloadUtils.hpp
     ResizeBilinear.cpp
@@ -107,6 +109,8 @@
     SpaceToBatchNd.hpp
     SpaceToBatchNd.cpp
     Splitter.hpp
+    StridedSlice.hpp
+    StridedSlice.cpp
     TensorBufferArrayView.hpp
     Mean.cpp
     Mean.hpp
diff --git a/src/backends/reference/workloads/RefStridedSliceWorkload.cpp b/src/backends/reference/workloads/RefStridedSliceWorkload.cpp
new file mode 100644
index 0000000..26a878e
--- /dev/null
+++ b/src/backends/reference/workloads/RefStridedSliceWorkload.cpp
@@ -0,0 +1,34 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "RefStridedSliceWorkload.hpp"
+#include "StridedSlice.hpp"
+
+#include "RefWorkloadUtils.hpp"
+#include "TypeUtils.hpp"
+
+namespace armnn
+{
+
+template<armnn::DataType DataType>
+void RefStridedSliceWorkload<DataType>::Execute() const
+{
+    using T = ResolveType<DataType>;
+
+    ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, GetName() + "_Execute");
+
+    const TensorInfo& inputInfo = GetTensorInfo(m_Data.m_Inputs[0]);
+    const TensorInfo& outputInfo = GetTensorInfo(m_Data.m_Outputs[0]);
+
+    const T* inputData = GetInputTensorData<T>(0, m_Data);
+    T* outputData = GetOutputTensorData<T>(0, m_Data);
+
+    StridedSlice(inputInfo, outputInfo, m_Data.m_Parameters, inputData, outputData);
+}
+
+template class RefStridedSliceWorkload<DataType::Float32>;
+template class RefStridedSliceWorkload<DataType::QuantisedAsymm8>;
+
+} //namespace armnn
diff --git a/src/backends/reference/workloads/RefStridedSliceWorkload.hpp b/src/backends/reference/workloads/RefStridedSliceWorkload.hpp
new file mode 100644
index 0000000..b3586ad
--- /dev/null
+++ b/src/backends/reference/workloads/RefStridedSliceWorkload.hpp
@@ -0,0 +1,34 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include <backendsCommon/Workload.hpp>
+
+#include <armnn/TypesUtils.hpp>
+
+namespace armnn
+{
+
+template <armnn::DataType DataType>
+class RefStridedSliceWorkload : public TypedWorkload<StridedSliceQueueDescriptor, DataType>
+{
+public:
+    static const std::string& GetName()
+    {
+        static const std::string name = std::string("RefStridedSlice") + GetDataTypeName(DataType) + "Workload";
+        return name;
+    }
+
+    using TypedWorkload<StridedSliceQueueDescriptor, DataType>::m_Data;
+    using TypedWorkload<StridedSliceQueueDescriptor, DataType>::TypedWorkload;
+
+    void Execute() const override;
+};
+
+using RefStridedSliceFloat32Workload = RefStridedSliceWorkload<DataType::Float32>;
+using RefStridedSliceUint8Workload = RefStridedSliceWorkload<DataType::QuantisedAsymm8>;
+
+} //namespace armnn
diff --git a/src/backends/reference/workloads/RefWorkloads.hpp b/src/backends/reference/workloads/RefWorkloads.hpp
index 5ea7fe4..20e9a9f 100644
--- a/src/backends/reference/workloads/RefWorkloads.hpp
+++ b/src/backends/reference/workloads/RefWorkloads.hpp
@@ -43,6 +43,7 @@
 #include "Merger.hpp"
 #include "RefSpaceToBatchNdWorkload.hpp"
 #include "RefSplitterFloat32Workload.hpp"
+#include "RefStridedSliceWorkload.hpp"
 #include "RefConstantFloat32Workload.hpp"
 #include "RefActivationFloat32Workload.hpp"
 #include "RefConvolution2dFloat32Workload.hpp"
diff --git a/src/backends/reference/workloads/StridedSlice.cpp b/src/backends/reference/workloads/StridedSlice.cpp
new file mode 100644
index 0000000..71903e4
--- /dev/null
+++ b/src/backends/reference/workloads/StridedSlice.cpp
@@ -0,0 +1,158 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "StridedSlice.hpp"
+
+#include <boost/assert.hpp>
+#include <boost/numeric/conversion/cast.hpp>
+
+namespace armnn
+{
+
+void PadParams(StridedSliceDescriptor& p, unsigned int dimCount)
+{
+    BOOST_ASSERT_MSG(dimCount <= 4, "Expected input with at most 4 dimensions");
+
+    const unsigned int beginIndicesCount =
+        boost::numeric_cast<unsigned int>(p.m_Begin.size());
+
+    BOOST_ASSERT(dimCount >= beginIndicesCount);
+    const unsigned int padCount = dimCount - beginIndicesCount;
+
+    p.m_Begin.resize(dimCount);
+    p.m_End.resize(dimCount);
+    p.m_Stride.resize(dimCount);
+
+    for (unsigned int i = beginIndicesCount; i > 0; --i)
+    {
+        p.m_Stride[i + padCount - 1] = p.m_Stride[i - 1];
+        p.m_Begin[i + padCount - 1] = p.m_Begin[i - 1];
+        p.m_End[i + padCount - 1] = p.m_End[i - 1];
+    }
+
+    for (unsigned int i = 0; i < padCount; ++i)
+    {
+        p.m_Stride[i] = 1;
+        p.m_Begin[i] = 0;
+        p.m_End[i] = 0;
+    }
+
+    p.m_ShrinkAxisMask <<= padCount;
+    p.m_EllipsisMask <<= padCount;
+    p.m_NewAxisMask <<= padCount;
+    p.m_BeginMask <<= padCount;
+    p.m_EndMask <<= padCount;
+    p.m_BeginMask |= (1 << padCount) - 1;
+    p.m_EndMask |= (1 << padCount) - 1;
+}
+
+bool LoopCondition(int index, int stop, int stride)
+{
+    return stride > 0 ? index >= stop : index <= stop;
+}
+
+TensorShape ExtendShape(const TensorShape& inputShape,
+                        unsigned int newNumDimensions)
+{
+    if (inputShape.GetNumDimensions() >= newNumDimensions)
+    {
+        return inputShape;
+    }
+
+    unsigned int newSizes[newNumDimensions];
+
+    unsigned int diff = newNumDimensions - inputShape.GetNumDimensions();
+
+    for (unsigned int i = 0; i < diff; i++)
+    {
+        newSizes[i] = 1;
+    }
+
+    for (unsigned int i = diff; i < newNumDimensions; i++)
+    {
+        newSizes[i] = inputShape[i - diff];
+    }
+
+    return TensorShape(newNumDimensions, newSizes);
+}
+
+template<typename T>
+void StridedSlice(const TensorInfo& inputInfo,
+                  const TensorInfo& outputInfo,
+                  const StridedSliceDescriptor& params,
+                  const T* inputData,
+                  T* outputData)
+{
+    const TensorShape inputShape =
+        ExtendShape(inputInfo.GetShape(), 4);
+
+    StridedSliceDescriptor paddedParams = params;
+
+    // Pad parameters to 4 dimensions
+    PadParams(paddedParams, 4);
+
+    const int start0 =
+        paddedParams.GetStartForAxis(inputShape, 0);
+    const int stop0 =
+        paddedParams.GetStopForAxis(inputShape, 0, start0);
+
+    const int start1 =
+        paddedParams.GetStartForAxis(inputShape, 1);
+    const int stop1 =
+        paddedParams.GetStopForAxis(inputShape, 1, start1);
+
+    const int start2 =
+        paddedParams.GetStartForAxis(inputShape, 2);
+    const int stop2 =
+        paddedParams.GetStopForAxis(inputShape, 2, start2);
+
+    const int start3 =
+        paddedParams.GetStartForAxis(inputShape, 3);
+    const int stop3 =
+        paddedParams.GetStopForAxis(inputShape, 3, start3);
+
+    T* outPtr = outputData;
+
+    for (int in0 = start0;
+         !LoopCondition(in0, stop0, paddedParams.m_Stride[0]);
+         in0 += paddedParams.m_Stride[0])
+    {
+        for (int in1 = start1;
+             !LoopCondition(in1, stop1, paddedParams.m_Stride[1]);
+             in1 += paddedParams.m_Stride[1])
+        {
+            for (int in2 = start2;
+                 !LoopCondition(in2, stop2, paddedParams.m_Stride[2]);
+                 in2 += paddedParams.m_Stride[2])
+            {
+                for (int in3 = start3;
+                     !LoopCondition(in3, stop3, paddedParams.m_Stride[3]);
+                     in3 += paddedParams.m_Stride[3])
+                {
+                    int dim1 = boost::numeric_cast<int>(inputShape[1]);
+                    int dim2 = boost::numeric_cast<int>(inputShape[2]);
+                    int dim3 = boost::numeric_cast<int>(inputShape[3]);
+
+                    int inputOffset = ((in0 * dim1 + in1) * dim2 + in2) * dim3 + in3;
+                    *(outPtr++) = inputData[inputOffset];
+                }
+            }
+        }
+    }
+}
+
+template void StridedSlice<float>(const TensorInfo& inputInfo,
+                                  const TensorInfo& outputInfo,
+                                  const StridedSliceDescriptor& params,
+                                  const float* inputData,
+                                  float* outData);
+
+template void StridedSlice<uint8_t>(const TensorInfo& inputInfo,
+                                    const TensorInfo& outputInfo,
+                                    const StridedSliceDescriptor& params,
+                                    const uint8_t* inputData,
+                                    uint8_t* outData);
+
+} //namespace armnn
diff --git a/src/backends/reference/workloads/StridedSlice.hpp b/src/backends/reference/workloads/StridedSlice.hpp
new file mode 100644
index 0000000..8eed870
--- /dev/null
+++ b/src/backends/reference/workloads/StridedSlice.hpp
@@ -0,0 +1,21 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include <armnn/Descriptors.hpp>
+#include <armnn/Tensor.hpp>
+
+namespace armnn
+{
+
+template <typename T>
+void StridedSlice(const TensorInfo& inputInfo,
+                  const TensorInfo& outputInfo,
+                  const StridedSliceDescriptor& params,
+                  const T* inputData,
+                  T* outputData);
+
+} //namespace armnn