IVGCVSW-7400 POW IVGCVSW-7278 SQUARED_DIFFERENCE.

* Added 2 new operators as ElementWiseBinary ops
* Ref End to End and unit tests
* Serialize and Deserialize tests
* Delegate and Opaque Delegate tests
* TfLite Parser tests

Signed-off-by: John Mcloughlin <john.mcloughlin@arm.com>
Change-Id: I537158127f602f0c41ca0402aa31655cd3bd4281
diff --git a/src/backends/backendsCommon/common.mk b/src/backends/backendsCommon/common.mk
index 986d253..c868cbe 100644
--- a/src/backends/backendsCommon/common.mk
+++ b/src/backends/backendsCommon/common.mk
@@ -84,6 +84,7 @@
     test/layerTests/PadTestImpl.cpp \
     test/layerTests/Pooling2dTestImpl.cpp \
     test/layerTests/Pooling3dTestImpl.cpp \
+    test/layerTests/PowerTestImpl.cpp \
     test/layerTests/RankTestImpl.cpp \
     test/layerTests/ReductionTestImpl.cpp \
     test/layerTests/ReduceProdTestImpl.cpp \
@@ -92,6 +93,7 @@
     test/layerTests/ResizeTestImpl.cpp \
     test/layerTests/RsqrtTestImpl.cpp \
     test/layerTests/SliceTestImpl.cpp \
+    test/layerTests/SquaredDifferenceTestImpl.cpp \
     test/layerTests/QuantizeTestImpl.cpp \
     test/layerTests/SinTestImpl.cpp \
     test/layerTests/ShapeTestImpl.cpp \
diff --git a/src/backends/backendsCommon/test/CMakeLists.txt b/src/backends/backendsCommon/test/CMakeLists.txt
index 95065df..aba9c72 100644
--- a/src/backends/backendsCommon/test/CMakeLists.txt
+++ b/src/backends/backendsCommon/test/CMakeLists.txt
@@ -153,6 +153,8 @@
     layerTests/Pooling2dTestImpl.hpp
     layerTests/Pooling3dTestImpl.cpp
     layerTests/Pooling3dTestImpl.hpp
+    layerTests/PowerTestImpl.cpp
+    layerTests/PowerTestImpl.hpp
     layerTests/PreluTestImpl.hpp
     layerTests/QuantizeTestImpl.cpp
     layerTests/QuantizeTestImpl.hpp
@@ -186,6 +188,8 @@
     layerTests/SplitterTestImpl.hpp
     layerTests/SqrtTestImpl.cpp
     layerTests/SqrtTestImpl.hpp
+    layerTests/SquaredDifferenceTestImpl.cpp
+    layerTests/SquaredDifferenceTestImpl.hpp
     layerTests/StackTestImpl.cpp
     layerTests/StackTestImpl.hpp
     layerTests/StridedSliceTestImpl.cpp
diff --git a/src/backends/backendsCommon/test/ElementwiseBinaryEndToEndTestImpl.hpp b/src/backends/backendsCommon/test/ElementwiseBinaryEndToEndTestImpl.hpp
index 6546a6a..0d47fd6 100644
--- a/src/backends/backendsCommon/test/ElementwiseBinaryEndToEndTestImpl.hpp
+++ b/src/backends/backendsCommon/test/ElementwiseBinaryEndToEndTestImpl.hpp
@@ -89,6 +89,12 @@
         case armnn::BinaryOperation::Sub:
             expectedOutput = { -1, -3, -1, -1,  3, -7, 3, 3,  -5, 1, 1, 1,  2, 2, -6, 2 };
             break;
+        case armnn::BinaryOperation::SqDiff:
+            expectedOutput = { 1, 9, 1, 1,  9, 49, 9, 9, 25, 1, 1, 1,  4, 4, 36, 4  };
+            break;
+        case armnn::BinaryOperation::Power:
+            expectedOutput = { 1, 1, 1, 1, 25, 25, 25, 25,  9, 9, 9, 9,  16, 16, 16, 16 };
+            break;
         default:
             throw("Invalid Elementwise Binary operation");
     }
diff --git a/src/backends/backendsCommon/test/LayerTests.hpp b/src/backends/backendsCommon/test/LayerTests.hpp
index 00bfea5..b6ddb31 100644
--- a/src/backends/backendsCommon/test/LayerTests.hpp
+++ b/src/backends/backendsCommon/test/LayerTests.hpp
@@ -1,5 +1,5 @@
 //
-// Copyright © 2017 Arm Ltd and Contributors. All rights reserved.
+// Copyright © 2017, 2023 Arm Ltd and Contributors. All rights reserved.
 // SPDX-License-Identifier: MIT
 //
 
@@ -51,6 +51,7 @@
 #include <backendsCommon/test/layerTests/PermuteTestImpl.hpp>
 #include <backendsCommon/test/layerTests/Pooling2dTestImpl.hpp>
 #include <backendsCommon/test/layerTests/Pooling3dTestImpl.hpp>
+#include <backendsCommon/test/layerTests/PowerTestImpl.hpp>
 #include <backendsCommon/test/layerTests/PreluTestImpl.hpp>
 #include <backendsCommon/test/layerTests/QuantizeTestImpl.hpp>
 #include <backendsCommon/test/layerTests/RankTestImpl.hpp>
@@ -68,6 +69,7 @@
 #include <backendsCommon/test/layerTests/SpaceToDepthTestImpl.hpp>
 #include <backendsCommon/test/layerTests/SplitterTestImpl.hpp>
 #include <backendsCommon/test/layerTests/SqrtTestImpl.hpp>
+#include <backendsCommon/test/layerTests/SquaredDifferenceTestImpl.hpp>
 #include <backendsCommon/test/layerTests/StackTestImpl.hpp>
 #include <backendsCommon/test/layerTests/StridedSliceTestImpl.hpp>
 #include <backendsCommon/test/layerTests/SubtractionTestImpl.hpp>
diff --git a/src/backends/backendsCommon/test/layerTests/ElementwiseTestImpl.hpp b/src/backends/backendsCommon/test/layerTests/ElementwiseTestImpl.hpp
index da6e11f..ded53cb 100644
--- a/src/backends/backendsCommon/test/layerTests/ElementwiseTestImpl.hpp
+++ b/src/backends/backendsCommon/test/layerTests/ElementwiseTestImpl.hpp
@@ -1,5 +1,5 @@
 //
-// Copyright © 2017 Arm Ltd and Contributors. All rights reserved.
+// Copyright © 2017-2023 Arm Ltd and Contributors. All rights reserved.
 // SPDX-License-Identifier: MIT
 //
 
@@ -206,4 +206,67 @@
             tensorHandleFactory,
             quantScale,
             quantOffset);
-}
\ No newline at end of file
+}
+
+// Elementwise Binary Operations
+template<std::size_t NumDims,
+        armnn::DataType ArmnnTypeInput,
+        armnn::DataType ArmnnTypeOutput,
+        typename TInput  = armnn::ResolveType<ArmnnTypeInput>,
+        typename TOutput = armnn::ResolveType<ArmnnTypeOutput>>
+LayerTestResult<TOutput, NumDims> ElementwiseTestHelper(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+        armnn::BinaryOperation op,
+        const unsigned int shape0[NumDims],
+        std::vector<TInput> values0,
+        const unsigned int shape1[NumDims],
+        std::vector<TInput> values1,
+        const unsigned int outShape[NumDims],
+        std::vector<TOutput> outValues,
+        const armnn::ITensorHandleFactory& tensorHandleFactory) {
+
+    armnn::TensorInfo inputTensorInfo0{NumDims, shape0, ArmnnTypeInput};
+    armnn::TensorInfo inputTensorInfo1{NumDims, shape1, ArmnnTypeInput};
+    armnn::TensorInfo outputTensorInfo{NumDims, outShape, ArmnnTypeOutput};
+
+    std::vector<TOutput> actualOutput(outputTensorInfo.GetNumElements());
+
+    bool isBoolean = false;
+    if (ArmnnTypeOutput == armnn::DataType::Boolean)
+    {
+        isBoolean = true;
+    }
+
+    std::unique_ptr<armnn::ITensorHandle> inputHandle0 = tensorHandleFactory.CreateTensorHandle(inputTensorInfo0);
+    std::unique_ptr<armnn::ITensorHandle> inputHandle1 = tensorHandleFactory.CreateTensorHandle(inputTensorInfo1);
+    std::unique_ptr<armnn::ITensorHandle> outputHandle = tensorHandleFactory.CreateTensorHandle(outputTensorInfo);
+
+    armnn::ElementwiseBinaryQueueDescriptor data;
+    data.m_Parameters.m_Operation = op;
+    armnn::WorkloadInfo info;
+
+    AddInputToWorkload(data, info, inputTensorInfo0, inputHandle0.get());
+    AddInputToWorkload(data, info, inputTensorInfo1, inputHandle1.get());
+    AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get());
+
+    auto workload = workloadFactory.CreateWorkload(armnn::LayerType::ElementwiseBinary, data, info);
+
+    inputHandle0->Allocate();
+    inputHandle1->Allocate();
+    outputHandle->Allocate();
+
+    CopyDataToITensorHandle(inputHandle0.get(), values0.data());
+    CopyDataToITensorHandle(inputHandle1.get(), values1.data());
+
+    workload->PostAllocationConfigure();
+    ExecuteWorkload(*workload, memoryManager);
+
+    CopyDataFromITensorHandle(actualOutput.data(), outputHandle.get());
+
+    return LayerTestResult<TOutput, NumDims>(actualOutput,
+                                             outValues,
+                                             outputHandle->GetShape(),
+                                             outputTensorInfo.GetShape(),
+                                             isBoolean);
+}
diff --git a/src/backends/backendsCommon/test/layerTests/PowerTestImpl.cpp b/src/backends/backendsCommon/test/layerTests/PowerTestImpl.cpp
new file mode 100644
index 0000000..dd6d569
--- /dev/null
+++ b/src/backends/backendsCommon/test/layerTests/PowerTestImpl.cpp
@@ -0,0 +1,539 @@
+//
+// Copyright © 2023 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "PowerTestImpl.hpp"
+
+#include "ElementwiseTestImpl.hpp"
+
+LayerTestResult<float, 4> PowerTest(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+        const armnn::ITensorHandleFactory& tensorHandleFactory)
+{
+    IgnoreUnused(memoryManager);
+    unsigned int shape[] = { 2, 2, 2, 2 };
+
+    std::vector<float> input0 =
+            {
+                    7.f, 3.f, 4.f, 2.f, 6.f, 4.f, 2.f, 1.f,
+                    1.f, 1.f, 0.f, 2.f, 9.f, 3.f, 5.f, 3.f
+            };
+
+    std::vector<float> input1 =
+            {
+                    2.f, 3.f, 2.f, 1.f, 2.f, 3.f, 4.f, 3.f,
+                    4.f, 5.f, 3.f, 5.f, 2.f, 3.f, 2.f, 0.f
+            };
+
+    std::vector<float> output
+            {
+                    49.f, 27.f, 16.f, 2.f, 36.f, 64.f, 16.f, 1.f,
+                    1.f, 1.f, 0.f, 32.f, 81.f, 27.f, 25.f, 1.f
+            };
+
+    return ElementwiseTestHelper<4, armnn::DataType::Float32, armnn::DataType::Float32>(
+            workloadFactory,
+            memoryManager,
+            armnn::BinaryOperation::Power,
+            shape,
+            input0,
+            shape,
+            input1,
+            shape,
+            output,
+            tensorHandleFactory);
+}
+
+LayerTestResult<float, 4> PowerBroadcast1ElementTest(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+        const armnn::ITensorHandleFactory& tensorHandleFactory)
+{
+    IgnoreUnused(memoryManager);
+    unsigned int shape0[] = { 1, 2, 2, 2 };
+    unsigned int shape1[] = { 1, 1, 1, 1 };
+
+    std::vector<float> input0 =
+            {
+                    1.f, 2.f, 3.f, 4.f, 5.f, 0.f, 2.f, 1.f
+            };
+
+    std::vector<float> input1 = { 2.f };
+
+    std::vector<float> output =
+            {
+                    1.f, 4.f, 9.f, 16.f, 25.f, 0.f, 4.f, 1.f
+            };
+
+    return ElementwiseTestHelper<4, armnn::DataType::Float32, armnn::DataType::Float32>(
+            workloadFactory,
+            memoryManager,
+            armnn::BinaryOperation::Power,
+            shape0,
+            input0,
+            shape1,
+            input1,
+            shape0,
+            output,
+            tensorHandleFactory);
+}
+
+LayerTestResult<float, 4> PowerBroadcastTest(
+        armnn::IWorkloadFactory & workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr & memoryManager,
+        const armnn::ITensorHandleFactory& tensorHandleFactory)
+{
+    IgnoreUnused(memoryManager);
+    const unsigned int shape0[] = { 1, 2, 2, 3 };
+    const unsigned int shape1[] = { 1, 1, 1, 3 };
+
+    std::vector<float> input0 =
+            {
+                    1.f, 2.f, 3.f, 3.f, 4.f, 4.f,
+                    4.f, 0.f, 2.f, 3.f, 4.f, 4.f
+            };
+
+    std::vector<float> input1 = { 1.f, 3.f, 1.f };
+
+    std::vector<float> output =
+            {
+                    1.f, 8.f, 3.f, 3.f, 64.f, 4.f,
+                    4.f, 0.f, 2.f, 3.f, 64.f, 4.f
+            };
+
+    return ElementwiseTestHelper<4, armnn::DataType::Float32, armnn::DataType::Float32>(
+            workloadFactory,
+            memoryManager,
+            armnn::BinaryOperation::Power,
+            shape0,
+            input0,
+            shape1,
+            input1,
+            shape0,
+            output,
+            tensorHandleFactory);
+}
+
+LayerTestResult<armnn::Half, 4> PowerFloat16Test(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+        const armnn::ITensorHandleFactory& tensorHandleFactory)
+{
+    IgnoreUnused(memoryManager);
+    using namespace half_float::literal;
+
+    unsigned int shape[] = { 2, 2, 2, 2 };
+
+    std::vector<armnn::Half> input0 =
+            {
+                    1._h, 5._h, 1._h, 4._h, 6._h, 1._h, 3._h, 5._h,
+                    3._h, 7._h, 6._h, 3._h, 8._h, 4._h, 4._h, 2._h
+            };
+
+    std::vector<armnn::Half> input1 =
+            {
+                    2._h, 2._h, 2._h, 2._h, 2._h, 3._h, 3._h, 2._h,
+                    1._h, 2._h, 2._h, 4._h, 2._h, 1._h, 3._h, 5._h
+            };
+
+    std::vector<armnn::Half> output
+            {
+                    1._h, 25._h, 1._h, 16._h, 36._h, 1._h, 27._h, 25._h,
+                    3._h, 49._h, 36._h, 81._h, 64._h, 4._h, 64._h, 32._h
+            };
+
+    return ElementwiseTestHelper<4, armnn::DataType::Float16, armnn::DataType::Float16>(
+            workloadFactory,
+            memoryManager,
+            armnn::BinaryOperation::Power,
+            shape,
+            input0,
+            shape,
+            input1,
+            shape,
+            output,
+            tensorHandleFactory);
+}
+
+LayerTestResult<armnn::Half, 4> PowerBroadcast1ElementFloat16Test(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+        const armnn::ITensorHandleFactory& tensorHandleFactory)
+{
+    IgnoreUnused(memoryManager);
+    using namespace half_float::literal;
+
+    const unsigned int shape0[] = { 1, 2, 2, 3 };
+    const unsigned int shape1[] = { 1, 1, 1, 1 };
+
+    std::vector<armnn::Half> input0 =
+            {
+                    1._h, 2._h, 3._h, 4._h, 5._h, 4._h,
+                    1._h, 5._h, 4._h, 2._h, 0._h, 1._h
+            };
+
+    std::vector<armnn::Half> input1 = { 2._h };
+
+    std::vector<armnn::Half> output =
+            {
+                    1._h, 4._h, 9._h, 16._h, 25._h, 16._h,
+                    1._h, 25._h, 16._h, 4._h, 0._h, 1._h
+            };
+
+    return ElementwiseTestHelper<4, armnn::DataType::Float16, armnn::DataType::Float16>(
+            workloadFactory,
+            memoryManager,
+            armnn::BinaryOperation::Power,
+            shape0,
+            input0,
+            shape1,
+            input1,
+            shape0,
+            output,
+            tensorHandleFactory);
+}
+
+LayerTestResult<armnn::Half, 4> PowerBroadcastFloat16Test(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+        const armnn::ITensorHandleFactory& tensorHandleFactory)
+{
+    IgnoreUnused(memoryManager);
+    using namespace half_float::literal;
+
+    const unsigned int shape0[] = { 1, 2, 2, 3 };
+    const unsigned int shape1[] = { 1, 1, 1, 3 };
+
+    std::vector<armnn::Half> input0 =
+            {
+                    4._h, 2._h, 3._h, 4._h, 1._h,  0._h,
+                    8._h, 1._h, 1._h, 1._h, 2._h, 4._h
+            };
+
+    std::vector<armnn::Half> input1 = { 1._h, 5._h, 3._h };
+
+    std::vector<armnn::Half> output =
+            {
+                    4._h, 32._h, 27._h, 4._h, 1._h, 0._h,
+                    8._h, 1._h, 1._h, 1._h, 32._h, 64._h
+            };
+
+    return ElementwiseTestHelper<4, armnn::DataType::Float16, armnn::DataType::Float16>(
+            workloadFactory,
+            memoryManager,
+            armnn::BinaryOperation::Power,
+            shape0,
+            input0,
+            shape1,
+            input1,
+            shape0,
+            output,
+            tensorHandleFactory);
+}
+
+LayerTestResult<uint8_t, 4> PowerUint8Test(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+        const armnn::ITensorHandleFactory& tensorHandleFactory)
+{
+    IgnoreUnused(memoryManager);
+    const unsigned int shape[] = { 1, 1, 2, 2 };
+
+    std::vector<uint8_t> input0 = { 4, 2, 4, 3 };
+
+    std::vector<uint8_t> input1 = { 1, 2, 2, 2 };
+
+    std::vector<uint8_t> output = { 4, 4, 16, 9 };
+
+    return ElementwiseTestHelper<4, armnn::DataType::QAsymmU8, armnn::DataType::QAsymmU8>(
+            workloadFactory,
+            memoryManager,
+            armnn::BinaryOperation::Power,
+            shape,
+            input0,
+            shape,
+            input1,
+            shape,
+            output,
+            tensorHandleFactory);
+}
+
+LayerTestResult<uint8_t, 4> PowerBroadcast1ElementUint8Test(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+        const armnn::ITensorHandleFactory& tensorHandleFactory)
+{
+    IgnoreUnused(memoryManager);
+    const unsigned int shape0[] = { 1, 1, 2, 2 };
+    const unsigned int shape1[] = { 1, 1, 1, 1 };
+
+    std::vector<uint8_t> input0 = { 4, 5, 1, 0 };
+
+    std::vector<uint8_t> input1 = { 2 };
+
+    std::vector<uint8_t> output = { 16, 25, 1, 0 };
+
+    return ElementwiseTestHelper<4, armnn::DataType::QAsymmU8, armnn::DataType::QAsymmU8>(
+            workloadFactory,
+            memoryManager,
+            armnn::BinaryOperation::Power,
+            shape0,
+            input0,
+            shape1,
+            input1,
+            shape0,
+            output,
+            tensorHandleFactory);
+}
+
+LayerTestResult<uint8_t, 4> PowerBroadcastUint8Test(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+        const armnn::ITensorHandleFactory& tensorHandleFactory)
+{
+    IgnoreUnused(memoryManager);
+    const unsigned int shape0[] = { 1, 1, 2, 2 };
+    const unsigned int shape1[] = { 1, 1, 1, 2 };
+
+    std::vector<uint8_t> input0 = { 4, 1, 6, 2 };
+
+    std::vector<uint8_t> input1 = { 2, 6 };
+
+    std::vector<uint8_t> output = { 16, 1, 36, 64 };
+
+    return ElementwiseTestHelper<4, armnn::DataType::QAsymmU8, armnn::DataType::QAsymmU8>(
+            workloadFactory,
+            memoryManager,
+            armnn::BinaryOperation::Power,
+            shape0,
+            input0,
+            shape1,
+            input1,
+            shape0,
+            output,
+            tensorHandleFactory);
+}
+
+LayerTestResult<int16_t, 4> PowerInt16Test(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+        const armnn::ITensorHandleFactory& tensorHandleFactory)
+{
+    IgnoreUnused(memoryManager);
+    unsigned int shape[] = { 2, 2, 2, 2 };
+
+    std::vector<int16_t> input0 =
+            {
+                    1, 5, 1, 4, 4, 9, 3, 7,
+                    3, 2, 9, 6, 1, 2, 1, 4
+            };
+
+    std::vector<int16_t> input1 =
+            {
+                    2, 2, 0, 3, 2, 1, 3, 2,
+                    4, 4, 2, 1, 7, 5, 4, 2
+            };
+
+    std::vector<int16_t> output
+            {
+                    1, 25, 0, 64, 16, 9, 27, 49,
+                    81, 16, 81, 6, 1, 32, 1, 16
+            };
+
+    return ElementwiseTestHelper<4, armnn::DataType::QSymmS16, armnn::DataType::QSymmS16>(
+            workloadFactory,
+            memoryManager,
+            armnn::BinaryOperation::Power,
+            shape,
+            input0,
+            shape,
+            input1,
+            shape,
+            output,
+            tensorHandleFactory);
+}
+
+LayerTestResult<int16_t, 4> PowerBroadcast1ElementInt16Test(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+        const armnn::ITensorHandleFactory& tensorHandleFactory)
+{
+    IgnoreUnused(memoryManager);
+    const unsigned int shape0[] = { 1, 2, 2, 3 };
+    const unsigned int shape1[] = { 1, 1, 1, 1 };
+
+    std::vector<int16_t> input0 =
+            {
+                    1, 2, 3, 4, 5, 0,
+                    5, 4, 1, 4, 5, 2
+            };
+
+    std::vector<int16_t> input1 = { 2 };
+
+    std::vector<int16_t> output =
+            {
+                    1, 4, 9, 16, 25, 0,
+                    25, 16, 1, 16, 25, 4
+            };
+
+    return ElementwiseTestHelper<4, armnn::DataType::QSymmS16, armnn::DataType::QSymmS16>(
+            workloadFactory,
+            memoryManager,
+            armnn::BinaryOperation::Power,
+            shape0,
+            input0,
+            shape1,
+            input1,
+            shape0,
+            output,
+            tensorHandleFactory);
+}
+
+LayerTestResult<int16_t, 4> PowerBroadcastInt16Test(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+        const armnn::ITensorHandleFactory& tensorHandleFactory)
+{
+    IgnoreUnused(memoryManager);
+    const unsigned int shape0[] = { 1, 2, 2, 3 };
+    const unsigned int shape1[] = { 1, 1, 1, 3 };
+
+    std::vector<int16_t> input0 =
+            {
+                    4, 2, 1, 4, 5, 3,
+                    7, 3, 4, 8, 1, 2
+            };
+
+    std::vector<int16_t> input1 = { 1, 2, 3 };
+
+    std::vector<int16_t> output =
+            {
+                    4, 4, 1, 4, 25, 27,
+                    7, 9, 64, 8, 1, 8
+            };
+
+    return ElementwiseTestHelper<4, armnn::DataType::QSymmS16, armnn::DataType::QSymmS16>(
+            workloadFactory,
+            memoryManager,
+            armnn::BinaryOperation::Power,
+            shape0,
+            input0,
+            shape1,
+            input1,
+            shape0,
+            output,
+            tensorHandleFactory);
+}
+
+LayerTestResult<int32_t, 4> PowerInt32Test(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+        const armnn::ITensorHandleFactory& tensorHandleFactory)
+{
+    IgnoreUnused(memoryManager);
+    unsigned int shape[] = { 2, 2, 2, 2 };
+
+    std::vector<int32_t> input0 =
+            {
+                    1, 3, 4, 3, 1, 4, 2, 1,
+                    2, 1, 2, 1, 4, 3, 4, 3
+            };
+
+    std::vector<int32_t> input1 =
+            {
+                    2, 2, 2, 2, 3, 3, 4, 3,
+                    4, 4, 4, 4, 1, 3, 1, 3
+            };
+
+    std::vector<int32_t> output
+            {
+                    1, 9, 16, 9, 1, 64, 16, 1,
+                    16, 1, 16, 1, 4, 27, 4, 27
+            };
+
+    return ElementwiseTestHelper<4, armnn::DataType::Signed32, armnn::DataType::Signed32>(
+            workloadFactory,
+            memoryManager,
+            armnn::BinaryOperation::Power,
+            shape,
+            input0,
+            shape,
+            input1,
+            shape,
+            output,
+            tensorHandleFactory);
+}
+
+LayerTestResult<int32_t, 4> PowerBroadcastInt32Test(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+        const armnn::ITensorHandleFactory& tensorHandleFactory)
+{
+    IgnoreUnused(memoryManager);
+    const unsigned int shape0[] = { 1, 2, 2, 3 };
+    const unsigned int shape1[] = { 1, 1, 1, 3 };
+
+    std::vector<int32_t> input0 =
+            {
+                    4, 4, 3, 4, 5, 0,
+                    5, 8, 1, 3, 9, 2
+            };
+
+    std::vector<int32_t> input1 = { 2, 1, 3 };
+
+    std::vector<int32_t> output =
+            {
+                    16, 4, 27, 16, 5, 0,
+                    25, 8, 1, 9, 9, 8
+            };
+
+    return ElementwiseTestHelper<4, armnn::DataType::Signed32, armnn::DataType::Signed32>(
+            workloadFactory,
+            memoryManager,
+            armnn::BinaryOperation::Power,
+            shape0,
+            input0,
+            shape1,
+            input1,
+            shape0,
+            output,
+            tensorHandleFactory);
+}
+
+LayerTestResult<int32_t, 4> PowerBroadcast1ElementInt32Test(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+        const armnn::ITensorHandleFactory& tensorHandleFactory)
+{
+    IgnoreUnused(memoryManager);
+    const unsigned int shape0[] = { 1, 2, 2, 3 };
+    const unsigned int shape1[] = { 1, 1, 1, 1 };
+
+    std::vector<int32_t> input0 =
+            {
+                    1, 2, 3, 4, 5, 3,
+                    3, 1, 0, 2, 1, 5
+            };
+
+    std::vector<int32_t> input1 = { 2 };
+
+    std::vector<int32_t> output =
+            {
+                    1, 4, 9, 16, 25, 9,
+                    9, 1, 0, 4, 1, 25
+            };
+
+    return ElementwiseTestHelper<4, armnn::DataType::Signed32, armnn::DataType::Signed32>(
+            workloadFactory,
+            memoryManager,
+            armnn::BinaryOperation::Power,
+            shape0,
+            input0,
+            shape1,
+            input1,
+            shape0,
+            output,
+            tensorHandleFactory);
+}
diff --git a/src/backends/backendsCommon/test/layerTests/PowerTestImpl.hpp b/src/backends/backendsCommon/test/layerTests/PowerTestImpl.hpp
new file mode 100644
index 0000000..3707208
--- /dev/null
+++ b/src/backends/backendsCommon/test/layerTests/PowerTestImpl.hpp
@@ -0,0 +1,88 @@
+//
+// Copyright © 2023 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include <armnnTestUtils/LayerTestResult.hpp>
+
+#include <Half.hpp>
+
+#include <armnn/backends/IBackendInternal.hpp>
+#include <armnn/backends/WorkloadFactory.hpp>
+
+LayerTestResult<float, 4> PowerTest(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+        const armnn::ITensorHandleFactory& tensorHandleFactory);
+
+LayerTestResult<float, 4> PowerBroadcast1ElementTest(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+        const armnn::ITensorHandleFactory& tensorHandleFactory);
+
+LayerTestResult<float, 4> PowerBroadcastTest(
+        armnn::IWorkloadFactory & workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr & memoryManager,
+        const armnn::ITensorHandleFactory& tensorHandleFactory);
+
+LayerTestResult<armnn::Half, 4> PowerFloat16Test(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+        const armnn::ITensorHandleFactory& tensorHandleFactory);
+
+LayerTestResult<armnn::Half, 4> PowerBroadcast1ElementFloat16Test(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+        const armnn::ITensorHandleFactory& tensorHandleFactory);
+
+LayerTestResult<armnn::Half, 4> PowerBroadcastFloat16Test(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+        const armnn::ITensorHandleFactory& tensorHandleFactory);
+
+LayerTestResult<uint8_t, 4> PowerBroadcast1ElementUint8Test(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+        const armnn::ITensorHandleFactory& tensorHandleFactory);
+
+LayerTestResult<uint8_t, 4> PowerUint8Test(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+        const armnn::ITensorHandleFactory& tensorHandleFactory);
+
+LayerTestResult<uint8_t, 4> PowerBroadcastUint8Test(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+        const armnn::ITensorHandleFactory& tensorHandleFactory);
+
+LayerTestResult<int16_t , 4> PowerInt16Test(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+        const armnn::ITensorHandleFactory& tensorHandleFactory);
+
+LayerTestResult<int16_t, 4> PowerBroadcast1ElementInt16Test(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+        const armnn::ITensorHandleFactory& tensorHandleFactory);
+
+LayerTestResult<int16_t, 4> PowerBroadcastInt16Test(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+        const armnn::ITensorHandleFactory& tensorHandleFactory);
+
+LayerTestResult<int32_t, 4> PowerInt32Test(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+        const armnn::ITensorHandleFactory& tensorHandleFactory);
+
+LayerTestResult<int32_t, 4> PowerBroadcastInt32Test(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+        const armnn::ITensorHandleFactory& tensorHandleFactory);
+
+LayerTestResult<int32_t, 4> PowerBroadcast1ElementInt32Test(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+        const armnn::ITensorHandleFactory& tensorHandleFactory);
diff --git a/src/backends/backendsCommon/test/layerTests/SquaredDifferenceTestImpl.cpp b/src/backends/backendsCommon/test/layerTests/SquaredDifferenceTestImpl.cpp
new file mode 100644
index 0000000..8bb31ed
--- /dev/null
+++ b/src/backends/backendsCommon/test/layerTests/SquaredDifferenceTestImpl.cpp
@@ -0,0 +1,539 @@
+//
+// Copyright © 2023 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "SquaredDifferenceTestImpl.hpp"
+
+#include "ElementwiseTestImpl.hpp"
+
+LayerTestResult<float, 4> SquaredDifferenceTest(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+        const armnn::ITensorHandleFactory& tensorHandleFactory)
+{
+    IgnoreUnused(memoryManager);
+    unsigned int shape[] = { 2, 2, 2, 2 };
+
+    std::vector<float> input0 =
+            {
+                 7.f, 3.f, 4.f, 2.f, 6.f, 4.f, 2.f, 1.f,
+                 3.f, 1.f, 0.f, 1.f, 4.f, 3.f, 4.f, 3.f
+            };
+
+    std::vector<float> input1 =
+            {
+                 5.f, 3.f, 2.f, 5.f, 3.f, 3.f, 4.f, 3.f,
+                 4.f, 4.f, 3.f, 2.f, 5.f, 5.f, 5.f, 5.f
+            };
+
+    std::vector<float> output
+            {
+                 4.f, 0.f, 4.f, 9.f, 9.f, 1.f, 4.f, 4.f,
+                 1.f, 9.f, 9.f, 1.f, 1.f, 4.f, 1.f, 4.f
+            };
+
+    return ElementwiseTestHelper<4, armnn::DataType::Float32, armnn::DataType::Float32>(
+            workloadFactory,
+            memoryManager,
+            armnn::BinaryOperation::SqDiff,
+            shape,
+            input0,
+            shape,
+            input1,
+            shape,
+            output,
+            tensorHandleFactory);
+}
+
+LayerTestResult<float, 4> SquaredDiffBroadcast1ElementTest(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+        const armnn::ITensorHandleFactory& tensorHandleFactory)
+{
+    IgnoreUnused(memoryManager);
+    unsigned int shape0[] = { 1, 2, 2, 2 };
+    unsigned int shape1[] = { 1, 1, 1, 1 };
+
+    std::vector<float> input0 =
+            {
+                   1.f, 2.f, 3.f, 4.f, 5.f, 0.f, 2.f, 1.f
+            };
+
+    std::vector<float> input1 = { 2.f };
+
+    std::vector<float> output =
+            {
+                1.f, 0.f, 1.f, 4.f, 9.f, 4.f, 0.f, 1.f
+            };
+
+    return ElementwiseTestHelper<4, armnn::DataType::Float32, armnn::DataType::Float32>(
+            workloadFactory,
+            memoryManager,
+            armnn::BinaryOperation::SqDiff,
+            shape0,
+            input0,
+            shape1,
+            input1,
+            shape0,
+            output,
+            tensorHandleFactory);
+}
+
+LayerTestResult<float, 4> SquaredDiffBroadcastTest(
+       armnn::IWorkloadFactory & workloadFactory,
+       const armnn::IBackendInternal::IMemoryManagerSharedPtr & memoryManager,
+       const armnn::ITensorHandleFactory& tensorHandleFactory)
+{
+    IgnoreUnused(memoryManager);
+    const unsigned int shape0[] = { 1, 2, 2, 3 };
+    const unsigned int shape1[] = { 1, 1, 1, 3 };
+
+    std::vector<float> input0 =
+            {
+                1.f, 2.f, 3.f, 3.f, 6.f, 4.f,
+                4.f, 0.f, 2.f, 3.f, 4.f, 4.f
+            };
+
+    std::vector<float> input1 = { 1.f, 3.f, 1.f };
+
+    std::vector<float> output =
+            {
+                0.f, 1.f, 4.f, 4.f, 9.f, 9.f,
+                9.f, 9.f, 1.f, 4.f, 1.f, 9.f
+            };
+
+    return ElementwiseTestHelper<4, armnn::DataType::Float32, armnn::DataType::Float32>(
+            workloadFactory,
+            memoryManager,
+            armnn::BinaryOperation::SqDiff,
+            shape0,
+            input0,
+            shape1,
+            input1,
+            shape0,
+            output,
+            tensorHandleFactory);
+}
+
+LayerTestResult<armnn::Half, 4> SquaredDifferenceFloat16Test(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+        const armnn::ITensorHandleFactory& tensorHandleFactory)
+{
+    IgnoreUnused(memoryManager);
+    using namespace half_float::literal;
+
+    unsigned int shape[] = { 2, 2, 2, 2 };
+
+    std::vector<armnn::Half> input0 =
+            {
+                1._h, 5._h, 1._h, 4._h, 6._h, 1._h, 3._h, 5._h,
+                3._h, 7._h, 6._h, 3._h, 8._h, 4._h, 4._h, 2._h
+            };
+
+    std::vector<armnn::Half> input1 =
+            {
+                2._h, 2._h, 2._h, 2._h, 3._h, 3._h, 3._h, 3._h,
+                4._h, 4._h, 4._h, 4._h, 5._h, 6._h, 5._h, 5._h
+            };
+
+    std::vector<armnn::Half> output
+            {
+                1._h, 9._h, 1._h, 4._h, 9._h, 4._h, 0._h, 4._h,
+                1._h, 9._h, 4._h, 1._h, 9._h, 4._h, 1._h, 9._h
+            };
+
+    return ElementwiseTestHelper<4, armnn::DataType::Float16, armnn::DataType::Float16>(
+            workloadFactory,
+            memoryManager,
+            armnn::BinaryOperation::SqDiff,
+            shape,
+            input0,
+            shape,
+            input1,
+            shape,
+            output,
+            tensorHandleFactory);
+}
+
+LayerTestResult<armnn::Half, 4> SquaredDiffBroadcast1ElementFloat16Test(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+        const armnn::ITensorHandleFactory& tensorHandleFactory)
+{
+    IgnoreUnused(memoryManager);
+    using namespace half_float::literal;
+
+    const unsigned int shape0[] = { 1, 2, 2, 3 };
+    const unsigned int shape1[] = { 1, 1, 1, 1 };
+
+    std::vector<armnn::Half> input0 =
+            {
+                1._h, 2._h, 3._h, 4._h, 5._h, 4._h,
+                1._h, 5._h, 4._h, 2._h, 0._h, 1._h
+            };
+
+    std::vector<armnn::Half> input1 = { 2._h };
+
+    std::vector<armnn::Half> output =
+            {
+                1._h, 0._h, 1._h, 4._h, 9._h, 4._h,
+                1._h, 9._h, 4._h, 0._h, 4._h, 1._h
+            };
+
+    return ElementwiseTestHelper<4, armnn::DataType::Float16, armnn::DataType::Float16>(
+            workloadFactory,
+            memoryManager,
+            armnn::BinaryOperation::SqDiff,
+            shape0,
+            input0,
+            shape1,
+            input1,
+            shape0,
+            output,
+            tensorHandleFactory);
+}
+
+LayerTestResult<armnn::Half, 4> SquaredDiffBroadcastFloat16Test(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+        const armnn::ITensorHandleFactory& tensorHandleFactory)
+{
+    IgnoreUnused(memoryManager);
+    using namespace half_float::literal;
+    const unsigned int shape0[] = { 1, 2, 2, 3 };
+    const unsigned int shape1[] = { 1, 1, 1, 3 };
+
+    std::vector<armnn::Half> input0 =
+            {
+                4._h, 2._h, 3._h, 4._h, 5._h,  5._h,
+                2._h, 8._h, 1._h, 1._h, 2._h, 4._h
+            };
+
+    std::vector<armnn::Half> input1 = { 1._h, 5._h, 3._h };
+
+    std::vector<armnn::Half> output =
+            {
+                9._h, 9._h, 0._h, 9._h, 0._h, 4._h,
+                1._h, 9._h, 4._h, 0._h, 9._h, 1._h
+            };
+
+    return ElementwiseTestHelper<4, armnn::DataType::Float16, armnn::DataType::Float16>(
+            workloadFactory,
+            memoryManager,
+            armnn::BinaryOperation::SqDiff,
+            shape0,
+            input0,
+            shape1,
+            input1,
+            shape0,
+            output,
+            tensorHandleFactory);
+}
+
+LayerTestResult<uint8_t, 4> SquaredDifferenceUint8Test(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+        const armnn::ITensorHandleFactory& tensorHandleFactory)
+{
+    IgnoreUnused(memoryManager);
+    const unsigned int shape0[] = { 1, 1, 2, 2 };
+    const unsigned int shape1[] = { 1, 1, 2, 2 };
+
+    std::vector<uint8_t> input0 = { 4, 2, 4, 3 };
+
+    std::vector<uint8_t> input1 = { 1, 2, 2, 2 };
+
+    std::vector<uint8_t> output = { 9, 0, 4, 1 };
+
+    return ElementwiseTestHelper<4, armnn::DataType::QAsymmU8, armnn::DataType::QAsymmU8>(
+            workloadFactory,
+            memoryManager,
+            armnn::BinaryOperation::SqDiff,
+            shape0,
+            input0,
+            shape1,
+            input1,
+            shape0,
+            output,
+            tensorHandleFactory);
+}
+
+LayerTestResult<uint8_t, 4> SquaredDiffBroadcast1ElementUint8Test(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+        const armnn::ITensorHandleFactory& tensorHandleFactory)
+{
+    IgnoreUnused(memoryManager);
+    const unsigned int shape0[] = { 1, 1, 2, 2 };
+    const unsigned int shape1[] = { 1, 1, 1, 1 };
+
+    std::vector<uint8_t> input0 = { 4, 5, 1, 0 };
+
+    std::vector<uint8_t> input1 = { 2 };
+
+    std::vector<uint8_t> output = { 4, 9, 1, 4 };
+
+    return ElementwiseTestHelper<4, armnn::DataType::QAsymmU8, armnn::DataType::QAsymmU8>(
+            workloadFactory,
+            memoryManager,
+            armnn::BinaryOperation::SqDiff,
+            shape0,
+            input0,
+            shape1,
+            input1,
+            shape0,
+            output,
+            tensorHandleFactory);
+}
+
+LayerTestResult<uint8_t, 4> SquaredDiffBroadcastUint8Test(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+        const armnn::ITensorHandleFactory& tensorHandleFactory)
+{
+    IgnoreUnused(memoryManager);
+    const unsigned int shape0[] = { 1, 1, 2, 2 };
+    const unsigned int shape1[] = { 1, 1, 1, 2 };
+
+    std::vector<uint8_t> input0 = { 4, 12, 3, 6 };
+
+    std::vector<uint8_t> input1 = { 2, 9 };
+
+    std::vector<uint8_t> output = { 4, 9, 1, 9 };
+
+    return ElementwiseTestHelper<4, armnn::DataType::QAsymmU8, armnn::DataType::QAsymmU8>(
+            workloadFactory,
+            memoryManager,
+            armnn::BinaryOperation::SqDiff,
+            shape0,
+            input0,
+            shape1,
+            input1,
+            shape0,
+            output,
+            tensorHandleFactory);
+}
+
+LayerTestResult<int16_t, 4> SquaredDifferenceInt16Test(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+        const armnn::ITensorHandleFactory& tensorHandleFactory)
+{
+    IgnoreUnused(memoryManager);
+    unsigned int shape[] = { 2, 2, 2, 2 };
+
+    std::vector<int16_t> input0 =
+            {
+                1, 5, 1, 4, 6, 9, 6, 5,
+                3, 2, 3, 6, 4, 4, 1, 4
+            };
+
+    std::vector<int16_t> input1 =
+            {
+                2, 2, 0, 4, 3, 7, 3, 3,
+                4, 4, 4, 9, 7, 5, 4, 5
+            };
+
+    std::vector<int16_t> output
+            {
+                1, 9, 1, 0, 9, 4, 9, 4,
+                1, 4, 1, 9, 9, 1, 9, 1
+            };
+
+    return ElementwiseTestHelper<4, armnn::DataType::QSymmS16, armnn::DataType::QSymmS16>(
+            workloadFactory,
+            memoryManager,
+            armnn::BinaryOperation::SqDiff,
+            shape,
+            input0,
+            shape,
+            input1,
+            shape,
+            output,
+            tensorHandleFactory);
+}
+
+LayerTestResult<int16_t, 4> SquaredDiffBroadcast1ElementInt16Test(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+        const armnn::ITensorHandleFactory& tensorHandleFactory)
+{
+    IgnoreUnused(memoryManager);
+    const unsigned int shape0[] = { 1, 2, 2, 3 };
+    const unsigned int shape1[] = { 1, 1, 1, 1 };
+
+    std::vector<int16_t> input0 =
+            {
+                1, 2, 3, 4, 5, 0,
+                5, 4, 1, 4, 5, 2
+            };
+
+    std::vector<int16_t> input1 = { 2 };
+
+    std::vector<int16_t> output =
+            {
+                1, 0, 1, 4, 9, 4,
+                9, 4, 1, 4, 9, 0
+            };
+
+    return ElementwiseTestHelper<4, armnn::DataType::QSymmS16, armnn::DataType::QSymmS16>(
+            workloadFactory,
+            memoryManager,
+            armnn::BinaryOperation::SqDiff,
+            shape0,
+            input0,
+            shape1,
+            input1,
+            shape0,
+            output,
+            tensorHandleFactory);
+}
+
+LayerTestResult<int16_t, 4> SquaredDiffBroadcastInt16Test(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+        const armnn::ITensorHandleFactory& tensorHandleFactory)
+{
+    IgnoreUnused(memoryManager);
+    const unsigned int shape0[] = { 1, 2, 2, 3 };
+    const unsigned int shape1[] = { 1, 1, 1, 3 };
+
+    std::vector<int16_t> input0 =
+            {
+                4, 2, 1, 4, 5, 6,
+                7, 3, 5, 8, 1, 5
+            };
+
+    std::vector<int16_t> input1 = { 7, 2, 3 };
+
+    std::vector<int16_t> output =
+            {
+                9, 0, 4, 9, 9, 9,
+                0, 1, 4, 1, 1, 4
+            };
+
+    return ElementwiseTestHelper<4, armnn::DataType::QSymmS16, armnn::DataType::QSymmS16>(
+            workloadFactory,
+            memoryManager,
+            armnn::BinaryOperation::SqDiff,
+            shape0,
+            input0,
+            shape1,
+            input1,
+            shape0,
+            output,
+            tensorHandleFactory);
+}
+
+LayerTestResult<int32_t, 4> SquaredDifferenceInt32Test(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+        const armnn::ITensorHandleFactory& tensorHandleFactory)
+{
+    IgnoreUnused(memoryManager);
+    unsigned int shape[] = { 2, 2, 2, 2 };
+
+    std::vector<int32_t> input0 =
+            {
+                1, 3, 4, 3, 6, 4, 2, 6,
+                3, 1, 3, 1, 4, 3, 4, 3
+            };
+
+    std::vector<int32_t> input1 =
+            {
+                2, 2, 2, 2, 3, 3, 4, 3,
+                4, 4, 4, 4, 5, 5, 5, 5
+            };
+
+    std::vector<int32_t> output
+            {
+                1, 1, 4, 1, 9, 1, 4, 9,
+                1, 9, 1, 9, 1, 4, 1, 4
+            };
+
+    return ElementwiseTestHelper<4, armnn::DataType::Signed32, armnn::DataType::Signed32>(
+            workloadFactory,
+            memoryManager,
+            armnn::BinaryOperation::SqDiff,
+            shape,
+            input0,
+            shape,
+            input1,
+            shape,
+            output,
+            tensorHandleFactory);
+}
+
+LayerTestResult<int32_t, 4> SquaredDiffBroadcastInt32Test(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+        const armnn::ITensorHandleFactory& tensorHandleFactory)
+{
+    IgnoreUnused(memoryManager);
+    const unsigned int shape0[] = { 1, 2, 2, 3 };
+    const unsigned int shape1[] = { 1, 1, 1, 3 };
+
+    std::vector<int32_t> input0 =
+            {
+                4, 4, 3, 4, 5, 6,
+                5, 8, 6, 3, 9, 5
+            };
+
+    std::vector<int32_t> input1 = { 2, 7, 3 };
+
+    std::vector<int32_t> output =
+            {
+                4, 9, 0, 4, 4, 9,
+                9, 1, 9, 1, 4, 4
+            };
+
+    return ElementwiseTestHelper<4, armnn::DataType::Signed32, armnn::DataType::Signed32>(
+            workloadFactory,
+            memoryManager,
+            armnn::BinaryOperation::SqDiff,
+            shape0,
+            input0,
+            shape1,
+            input1,
+            shape0,
+            output,
+            tensorHandleFactory);
+}
+
+LayerTestResult<int32_t, 4> SquaredDiffBroadcast1ElementInt32Test(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+        const armnn::ITensorHandleFactory& tensorHandleFactory)
+{
+    IgnoreUnused(memoryManager);
+    const unsigned int shape0[] = { 1, 2, 2, 3 };
+    const unsigned int shape1[] = { 1, 1, 1, 1 };
+
+    std::vector<int32_t> input0 =
+            {
+                1, 2, 3, 4, 5, 3,
+                3, 1, 0, 2, 1, 5
+            };
+
+    std::vector<int32_t> input1 = { 2 };
+
+    std::vector<int32_t> output =
+            {
+                1, 0, 1, 4, 9, 1,
+                1, 1, 4, 0, 1, 9
+            };
+
+    return ElementwiseTestHelper<4, armnn::DataType::Signed32, armnn::DataType::Signed32>(
+            workloadFactory,
+            memoryManager,
+            armnn::BinaryOperation::SqDiff,
+            shape0,
+            input0,
+            shape1,
+            input1,
+            shape0,
+            output,
+            tensorHandleFactory);
+}
diff --git a/src/backends/backendsCommon/test/layerTests/SquaredDifferenceTestImpl.hpp b/src/backends/backendsCommon/test/layerTests/SquaredDifferenceTestImpl.hpp
new file mode 100644
index 0000000..1d87700
--- /dev/null
+++ b/src/backends/backendsCommon/test/layerTests/SquaredDifferenceTestImpl.hpp
@@ -0,0 +1,88 @@
+//
+// Copyright © 2023 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include <armnnTestUtils/LayerTestResult.hpp>
+
+#include <ResolveType.hpp>
+
+#include <armnn/backends/IBackendInternal.hpp>
+#include <armnn/backends/WorkloadFactory.hpp>
+
+LayerTestResult<float, 4> SquaredDifferenceTest(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+        const armnn::ITensorHandleFactory& tensorHandleFactory);
+
+LayerTestResult<float, 4> SquaredDiffBroadcast1ElementTest(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+        const armnn::ITensorHandleFactory& tensorHandleFactory);
+
+LayerTestResult<float, 4> SquaredDiffBroadcastTest(
+        armnn::IWorkloadFactory & workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr & memoryManager,
+        const armnn::ITensorHandleFactory& tensorHandleFactory);
+
+LayerTestResult<armnn::Half, 4> SquaredDifferenceFloat16Test(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+        const armnn::ITensorHandleFactory& tensorHandleFactory);
+
+LayerTestResult<armnn::Half, 4> SquaredDiffBroadcast1ElementFloat16Test(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+        const armnn::ITensorHandleFactory& tensorHandleFactory);
+
+LayerTestResult<armnn::Half, 4> SquaredDiffBroadcastFloat16Test(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+        const armnn::ITensorHandleFactory& tensorHandleFactory);
+
+LayerTestResult<uint8_t, 4> SquaredDiffBroadcast1ElementUint8Test(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+        const armnn::ITensorHandleFactory& tensorHandleFactory);
+
+LayerTestResult<uint8_t, 4> SquaredDifferenceUint8Test(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+        const armnn::ITensorHandleFactory& tensorHandleFactory);
+
+LayerTestResult<uint8_t, 4> SquaredDiffBroadcastUint8Test(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+        const armnn::ITensorHandleFactory& tensorHandleFactory);
+
+LayerTestResult<int16_t , 4> SquaredDifferenceInt16Test(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+        const armnn::ITensorHandleFactory& tensorHandleFactory);
+
+LayerTestResult<int16_t, 4> SquaredDiffBroadcast1ElementInt16Test(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+        const armnn::ITensorHandleFactory& tensorHandleFactory);
+
+LayerTestResult<int16_t, 4> SquaredDiffBroadcastInt16Test(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+        const armnn::ITensorHandleFactory& tensorHandleFactory);
+
+LayerTestResult<int32_t, 4> SquaredDifferenceInt32Test(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+        const armnn::ITensorHandleFactory& tensorHandleFactory);
+
+LayerTestResult<int32_t, 4> SquaredDiffBroadcastInt32Test(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+        const armnn::ITensorHandleFactory& tensorHandleFactory);
+
+LayerTestResult<int32_t, 4> SquaredDiffBroadcast1ElementInt32Test(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+        const armnn::ITensorHandleFactory& tensorHandleFactory);