IVGCVSW-7400 POW IVGCVSW-7278 SQUARED_DIFFERENCE.

* Added 2 new operators as ElementWiseBinary ops
* Ref End to End and unit tests
* Serialize and Deserialize tests
* Delegate and Opaque Delegate tests
* TfLite Parser tests

Signed-off-by: John Mcloughlin <john.mcloughlin@arm.com>
Change-Id: I537158127f602f0c41ca0402aa31655cd3bd4281
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 59059a9..6e53d45 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -698,6 +698,7 @@
              src/armnnTfLiteParser/test/Pack.cpp
              src/armnnTfLiteParser/test/Pad.cpp
              src/armnnTfLiteParser/test/PadV2.cpp
+             src/armnnTfLiteParser/test/Power.cpp
              src/armnnTfLiteParser/test/Prelu.cpp
              src/armnnTfLiteParser/test/Reduce.cpp
              src/armnnTfLiteParser/test/Reshape.cpp
@@ -712,6 +713,7 @@
              src/armnnTfLiteParser/test/Split.cpp
              src/armnnTfLiteParser/test/SplitV.cpp
              src/armnnTfLiteParser/test/Squeeze.cpp
+             src/armnnTfLiteParser/test/SquaredDifference.cpp
              src/armnnTfLiteParser/test/StridedSlice.cpp
              src/armnnTfLiteParser/test/Sub.cpp
              src/armnnTfLiteParser/test/Sum.cpp
@@ -792,6 +794,7 @@
             src/armnnDeserializer/test/DeserializeConvolution3d.cpp
             src/armnnDeserializer/test/DeserializeDepthToSpace.cpp
             src/armnnDeserializer/test/DeserializeDepthwiseConv2d.cpp
+            src/armnnDeserializer/test/DeserializeElementWiseBinary.cpp
             src/armnnDeserializer/test/DeserializeDivision.cpp
             src/armnnDeserializer/test/DeserializeFill.cpp
             src/armnnDeserializer/test/DeserializeFloor.cpp
diff --git a/delegate/CMakeLists.txt b/delegate/CMakeLists.txt
index 055ffce..272c9a5 100644
--- a/delegate/CMakeLists.txt
+++ b/delegate/CMakeLists.txt
@@ -280,6 +280,8 @@
              test/Convolution3dTest.cpp
              test/ConvolutionTestHelper.hpp
              test/DepthwiseConvolution2dTest.cpp
+             test/ElementwiseBinaryTest.cpp
+             test/ElementwiseBinaryTestHelper.hpp
              test/ElementwiseUnaryTestHelper.hpp
              test/ElementwiseUnaryTest.cpp
              test/ExpandDimsTest.cpp
diff --git a/delegate/classic/src/ElementwiseBinary.hpp b/delegate/classic/src/ElementwiseBinary.hpp
index dbbf479..8055a69 100644
--- a/delegate/classic/src/ElementwiseBinary.hpp
+++ b/delegate/classic/src/ElementwiseBinary.hpp
@@ -174,6 +174,56 @@
     return isSupported ? kTfLiteOk : kTfLiteError;
 }
 
+TfLiteStatus ValidatePowerOperator(DelegateData& delegateData,
+                                   TfLiteContext* tfLiteContext,
+                                   const armnn::TensorInfo& inputInfo1,
+                                   const armnn::TensorInfo& inputInfo2,
+                                   const armnn::TensorInfo& outputInfo)
+{
+    bool isSupported = false;
+    auto validateFunc = [&](const armnn::TensorInfo& outputTensorInfo, bool& isSupported)
+    {
+        FORWARD_LAYER_SUPPORT_FUNC("POWER",
+                                   tfLiteContext,
+                                   IsElementwiseBinarySupported,
+                                   delegateData.m_Backends,
+                                   isSupported,
+                                   armnn::BackendId(),
+                                   inputInfo1,
+                                   inputInfo2,
+                                   outputTensorInfo,
+                                   armnn::BinaryOperation::Power);
+    };
+
+    validateFunc(outputInfo, isSupported);
+    return isSupported ? kTfLiteOk : kTfLiteError;
+}
+
+TfLiteStatus ValidateSquaredDifferenceOperator(DelegateData& delegateData,
+                                               TfLiteContext* tfLiteContext,
+                                               const armnn::TensorInfo& inputInfo1,
+                                               const armnn::TensorInfo& inputInfo2,
+                                               const armnn::TensorInfo& outputInfo)
+{
+    bool isSupported = false;
+    auto validateFunc = [&](const armnn::TensorInfo& outputTensorInfo, bool& isSupported)
+    {
+        FORWARD_LAYER_SUPPORT_FUNC("SQUAREDDIFFERENCE",
+                                   tfLiteContext,
+                                   IsElementwiseBinarySupported,
+                                   delegateData.m_Backends,
+                                   isSupported,
+                                   armnn::BackendId(),
+                                   inputInfo1,
+                                   inputInfo2,
+                                   outputTensorInfo,
+                                   armnn::BinaryOperation::SqDiff);
+    };
+
+    validateFunc(outputInfo, isSupported);
+    return isSupported ? kTfLiteOk : kTfLiteError;
+}
+
 TfLiteStatus ValidateSubOperator(DelegateData& delegateData,
                                  TfLiteContext* tfLiteContext,
                                  const armnn::TensorInfo& inputInfo1,
@@ -322,6 +372,18 @@
                                            inputTensorInfo0,
                                            inputTensorInfo1,
                                            outputTensorInfo);
+            case kTfLiteBuiltinPow:
+                return ValidatePowerOperator(delegateData,
+                                             tfLiteContext,
+                                             inputTensorInfo0,
+                                             inputTensorInfo1,
+                                             outputTensorInfo);
+            case kTfLiteBuiltinSquaredDifference:
+                return ValidateSquaredDifferenceOperator(delegateData,
+                                                         tfLiteContext,
+                                                         inputTensorInfo0,
+                                                         inputTensorInfo1,
+                                                         outputTensorInfo);
             case kTfLiteBuiltinSub:
                 return ValidateSubOperator(delegateData,
                                            tfLiteContext,
@@ -364,6 +426,14 @@
             elementwiseBinaryLayer = delegateData.m_Network->AddElementwiseBinaryLayer(
                     armnn::BinaryOperation::Mul);
             break;
+        case kTfLiteBuiltinPow:
+            elementwiseBinaryLayer = delegateData.m_Network->AddElementwiseBinaryLayer(
+                    armnn::BinaryOperation::Power);
+            break;
+        case kTfLiteBuiltinSquaredDifference:
+            elementwiseBinaryLayer = delegateData.m_Network->AddElementwiseBinaryLayer(
+                    armnn::BinaryOperation::SqDiff);
+            break;
         case kTfLiteBuiltinSub:
             elementwiseBinaryLayer = delegateData.m_Network->AddElementwiseBinaryLayer(
                     armnn::BinaryOperation::Sub);
diff --git a/delegate/classic/src/armnn_delegate.cpp b/delegate/classic/src/armnn_delegate.cpp
index 9b4a7d3..e597d13 100644
--- a/delegate/classic/src/armnn_delegate.cpp
+++ b/delegate/classic/src/armnn_delegate.cpp
@@ -871,6 +871,12 @@
                                     tfLiteNode,
                                     nodeIndex,
                                     kTfLiteBuiltinPadv2);
+        case kTfLiteBuiltinPow:
+            return VisitElementwiseBinaryOperator(delegateData,
+                                                  tfLiteContext,
+                                                  tfLiteNode,
+                                                  nodeIndex,
+                                                  kTfLiteBuiltinPow);
         case kTfLiteBuiltinPrelu:
             return VisitPreluOperator(delegateData,
                                       tfLiteContext,
@@ -979,6 +985,12 @@
                                                  tfLiteNode,
                                                  nodeIndex,
                                                  armnn::UnaryOperation::Sqrt);
+        case kTfLiteBuiltinSquaredDifference:
+            return VisitElementwiseBinaryOperator(delegateData,
+                                                  tfLiteContext,
+                                                  tfLiteNode,
+                                                  nodeIndex,
+                                                  kTfLiteBuiltinSquaredDifference);
         case kTfLiteBuiltinSqueeze:
             return VisitSqueezeOperator(delegateData,
                                         tfLiteContext,
diff --git a/delegate/opaque/src/ElementwiseBinary.hpp b/delegate/opaque/src/ElementwiseBinary.hpp
index d6a0947..8448609 100644
--- a/delegate/opaque/src/ElementwiseBinary.hpp
+++ b/delegate/opaque/src/ElementwiseBinary.hpp
@@ -167,6 +167,56 @@
     return isSupported ? kTfLiteOk : kTfLiteError;
 }
 
+TfLiteStatus ValidatePowerOperator(DelegateData& delegateData,
+                                   TfLiteOpaqueContext* tfLiteContext,
+                                   const armnn::TensorInfo& inputInfo1,
+                                   const armnn::TensorInfo& inputInfo2,
+                                   const armnn::TensorInfo& outputInfo)
+{
+    bool isSupported = false;
+    auto validateFunc = [&](const armnn::TensorInfo& outputTensorInfo, bool& isSupported)
+    {
+        FORWARD_LAYER_OPAQUE_SUPPORT_FUNC("POWER",
+                                          tfLiteContext,
+                                          IsElementwiseBinarySupported,
+                                          delegateData.m_Backends,
+                                          isSupported,
+                                          armnn::BackendId(),
+                                          inputInfo1,
+                                          inputInfo2,
+                                          outputTensorInfo,
+                                          armnn::BinaryOperation::Power);
+    };
+
+    validateFunc(outputInfo, isSupported);
+    return isSupported ? kTfLiteOk : kTfLiteError;
+}
+
+TfLiteStatus ValidateSquaredDifferenceOperator(DelegateData& delegateData,
+                                               TfLiteOpaqueContext* tfLiteContext,
+                                               const armnn::TensorInfo& inputInfo1,
+                                               const armnn::TensorInfo& inputInfo2,
+                                               const armnn::TensorInfo& outputInfo)
+{
+    bool isSupported = false;
+    auto validateFunc = [&](const armnn::TensorInfo& outputTensorInfo, bool& isSupported)
+    {
+        FORWARD_LAYER_OPAQUE_SUPPORT_FUNC("SQUAREDDIFFERENCE",
+                                          tfLiteContext,
+                                          IsElementwiseBinarySupported,
+                                          delegateData.m_Backends,
+                                          isSupported,
+                                          armnn::BackendId(),
+                                          inputInfo1,
+                                          inputInfo2,
+                                          outputTensorInfo,
+                                          armnn::BinaryOperation::SqDiff);
+    };
+
+    validateFunc(outputInfo, isSupported);
+    return isSupported ? kTfLiteOk : kTfLiteError;
+}
+
 TfLiteStatus ValidateSubOperator(DelegateData& delegateData,
                                  TfLiteOpaqueContext* tfLiteContext,
                                  const armnn::TensorInfo& inputInfo1,
@@ -336,6 +386,18 @@
                                            inputTensorInfo0,
                                            inputTensorInfo1,
                                            outputTensorInfo);
+            case kTfLiteBuiltinPow:
+                return ValidatePowerOperator(delegateData,
+                                             tfLiteContext,
+                                             inputTensorInfo0,
+                                             inputTensorInfo1,
+                                             outputTensorInfo);
+            case kTfLiteBuiltinSquaredDifference:
+                return ValidateSquaredDifferenceOperator(delegateData,
+                                                         tfLiteContext,
+                                                         inputTensorInfo0,
+                                                         inputTensorInfo1,
+                                                         outputTensorInfo);
             case kTfLiteBuiltinSub:
                 return ValidateSubOperator(delegateData,
                                            tfLiteContext,
@@ -378,6 +440,14 @@
             elementwiseBinaryLayer = delegateData.m_Network->AddElementwiseBinaryLayer(
                     armnn::BinaryOperation::Mul);
             break;
+        case kTfLiteBuiltinPow:
+            elementwiseBinaryLayer = delegateData.m_Network->AddElementwiseBinaryLayer(
+                    armnn::BinaryOperation::Power);
+            break;
+        case kTfLiteBuiltinSquaredDifference:
+            elementwiseBinaryLayer = delegateData.m_Network->AddElementwiseBinaryLayer(
+                    armnn::BinaryOperation::SqDiff);
+            break;
         case kTfLiteBuiltinSub:
             elementwiseBinaryLayer = delegateData.m_Network->AddElementwiseBinaryLayer(
                     armnn::BinaryOperation::Sub);
diff --git a/delegate/opaque/src/armnn_delegate.cpp b/delegate/opaque/src/armnn_delegate.cpp
index 1c9f2d9..45aff85 100644
--- a/delegate/opaque/src/armnn_delegate.cpp
+++ b/delegate/opaque/src/armnn_delegate.cpp
@@ -966,6 +966,12 @@
                                     tfLiteNode,
                                     nodeIndex,
                                     kTfLiteBuiltinPadv2);
+        case kTfLiteBuiltinPow:
+            return VisitElementwiseBinaryOperator(delegateData,
+                                                  tfLiteContext,
+                                                  tfLiteNode,
+                                                  nodeIndex,
+                                                  kTfLiteBuiltinPow);
         case kTfLiteBuiltinPrelu:
             return VisitPreluOperator(delegateData,
                                       tfLiteContext,
@@ -1088,6 +1094,12 @@
                                        tfLiteNode,
                                        nodeIndex,
                                        kTfLiteBuiltinSplitV);
+        case kTfLiteBuiltinSquaredDifference:
+            return VisitElementwiseBinaryOperator(delegateData,
+                                                  tfLiteContext,
+                                                  tfLiteNode,
+                                                  nodeIndex,
+                                                  kTfLiteBuiltinSquaredDifference);
         case kTfLiteBuiltinSub:
             return VisitElementwiseBinaryOperator(delegateData,
                                                   tfLiteContext,
diff --git a/delegate/test/ElementwiseBinaryTest.cpp b/delegate/test/ElementwiseBinaryTest.cpp
index effed03..2f22e7d 100644
--- a/delegate/test/ElementwiseBinaryTest.cpp
+++ b/delegate/test/ElementwiseBinaryTest.cpp
@@ -699,6 +699,50 @@
                                  expectedOutputValues);
 }
 
+void PowerFP32Test(std::vector<armnn::BackendId>& backends)
+{
+    std::vector<int32_t> input0Shape { 1, 1, 2, 2 };
+    std::vector<int32_t> input1Shape { 1, 1, 2, 2 };
+    std::vector<int32_t> expectedOutputShape { 1, 1, 2, 2 };
+
+    std::vector<float> input0Values = { 1, 3, 3, -7 };
+    std::vector<float> input1Values = { 1, 1, 0, 2 };
+    std::vector<float> expectedOutputValues = { 1, 3, 1, 49 };
+
+    ElementwiseBinaryTest<float>(tflite::BuiltinOperator_POW,
+                                 tflite::ActivationFunctionType_NONE,
+                                 ::tflite::TensorType_FLOAT32,
+                                 backends,
+                                 input0Shape,
+                                 input1Shape,
+                                 expectedOutputShape,
+                                 input0Values,
+                                 input1Values,
+                                 expectedOutputValues);
+}
+
+void SqDiffFP32Test(std::vector<armnn::BackendId>& backends)
+{
+    std::vector<int32_t> input0Shape { 1, 1, 2, 2 };
+    std::vector<int32_t> input1Shape { 1, 1, 2, 2 };
+    std::vector<int32_t> expectedOutputShape { 1, 1, 2, 2 };
+
+    std::vector<float> input0Values = { 1, 3, 3, -7 };
+    std::vector<float> input1Values = { 1, -1, 0, -2 };
+    std::vector<float> expectedOutputValues = { 0, 16, 9, 25 };
+
+    ElementwiseBinaryTest<float>(tflite::BuiltinOperator_SQUARED_DIFFERENCE,
+                                 tflite::ActivationFunctionType_NONE,
+                                 ::tflite::TensorType_FLOAT32,
+                                 backends,
+                                 input0Shape,
+                                 input1Shape,
+                                 expectedOutputShape,
+                                 input0Values,
+                                 input1Values,
+                                 expectedOutputValues);
+}
+
 void SubBroadcastTest(std::vector<armnn::BackendId>& backends)
 {
     std::vector<int32_t> input0Shape { 1, 1, 2, 2 };
@@ -1131,6 +1175,18 @@
     SubUint8Test(backends);
 }
 
+TEST_CASE ("SqDiffFP32_CpuRef_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuRef };
+    SqDiffFP32Test(backends);
+}
+
+TEST_CASE ("PowerFP32_CpuRef_Test")
+{
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuRef };
+    PowerFP32Test(backends);
+}
+
 } // TEST_SUITE("ElementwiseBinary_CpuRefTests")
 
 } // namespace armnnDelegate
diff --git a/delegate/test/ElementwiseBinaryTestHelper.hpp b/delegate/test/ElementwiseBinaryTestHelper.hpp
index fa9cbb8..fc75491 100644
--- a/delegate/test/ElementwiseBinaryTestHelper.hpp
+++ b/delegate/test/ElementwiseBinaryTestHelper.hpp
@@ -124,6 +124,18 @@
             operatorBuiltinOptions = CreateSubOptions(flatBufferBuilder, activationType).Union();
             break;
         }
+        case BuiltinOperator_POW:
+        {
+            operatorBuiltinOptionsType = BuiltinOptions_PowOptions;
+            operatorBuiltinOptions = CreatePowOptions(flatBufferBuilder).Union();
+            break;
+        }
+        case BuiltinOperator_SQUARED_DIFFERENCE:
+        {
+            operatorBuiltinOptionsType = BuiltinOptions_SquaredDifferenceOptions;
+            operatorBuiltinOptions = CreateSquaredDifferenceOptions(flatBufferBuilder).Union();
+            break;
+        }
         case BuiltinOperator_FLOOR_DIV:
         {
             operatorBuiltinOptionsType = tflite::BuiltinOptions_FloorDivOptions;
diff --git a/docs/02_operator_list.dox b/docs/02_operator_list.dox
index 53e37e2..16b0f0b 100644
--- a/docs/02_operator_list.dox
+++ b/docs/02_operator_list.dox
@@ -1164,6 +1164,52 @@
     <tr><td>FLOAT32
     </table>
 <tr>
+  <td rowspan="3">ElementwiseBinaryLayer
+  <td rowspan="3" style="width:200px;"> Layer to perform Power and Square Difference operations.
+  <td rowspan="3">
+      <ul>
+       <li>ANEURALNETWORKS_POW
+      </ul>
+   <td>CpuRef
+     <td>
+         <ul>
+          <li>All
+         </ul>
+     <td>
+      <table>
+       <tr><th>
+       <tr><td>FLOAT16
+       <tr><td>FLOAT32
+       <tr><td>QASYMMS8
+       <tr><td>QASYMMU8
+       <tr><td>QSYMMS16
+       <tr><td>SIGNED32
+      </table>
+<tr>
+  <td>CpuAcc
+  <td>
+      <ul>
+       <li>All
+      </ul>
+  <td>
+    <table>
+    <tr><th>
+    <tr><td>FLOAT16
+    <tr><td>FLOAT32
+    </table>
+<tr>
+  <td>GpuAcc
+  <td>
+      <ul>
+       <li>All
+      </ul>
+  <td>
+    <table>
+    <tr><th>
+    <tr><td>FLOAT16
+    <tr><td>FLOAT32
+    </table>
+<tr>
   <td rowspan="3">ElementwiseUnaryLayer
   <td rowspan="3" style="width:200px;"> Layer to perform Rsqrt - Exp - Neg - Log - Abs - Sin - Sqrt operations.
   <td rowspan="3">
diff --git a/docs/05_01_parsers.dox b/docs/05_01_parsers.dox
index e8d4797..08d569d 100644
--- a/docs/05_01_parsers.dox
+++ b/docs/05_01_parsers.dox
@@ -162,6 +162,7 @@
 - PACK
 - PAD
 - PADV2
+- POW
 - PRELU
 - QUANTIZE
 - RELU
@@ -183,6 +184,7 @@
 - SPLIT_V
 - SQUEEZE
 - SQRT
+- SQUARE_DIFFERENCE
 - STRIDED_SLICE
 - SUB
 - SUM
diff --git a/docs/05_03_delegate.dox b/docs/05_03_delegate.dox
index b804268..49d6af3 100644
--- a/docs/05_03_delegate.dox
+++ b/docs/05_03_delegate.dox
@@ -141,6 +141,8 @@
 
 - PADV2
 
+- POW
+
 - PRELU
 
 - QUANTIZE
@@ -183,6 +185,8 @@
 
 - SQRT
 
+- SQUARE_DIFFERENCE
+
 - SQUEEZE
 
 - SLICE
diff --git a/include/armnn/Types.hpp b/include/armnn/Types.hpp
index 513ea3f..117a679 100644
--- a/include/armnn/Types.hpp
+++ b/include/armnn/Types.hpp
@@ -141,7 +141,9 @@
     Maximum = 2,
     Minimum = 3,
     Mul     = 4,
-    Sub     = 5
+    Sub     = 5,
+    SqDiff  = 6,
+    Power   = 7
 };
 
 enum class PoolingAlgorithm
diff --git a/src/armnnDeserializer/Deserializer.cpp b/src/armnnDeserializer/Deserializer.cpp
index 3bd24bd..79cc398 100644
--- a/src/armnnDeserializer/Deserializer.cpp
+++ b/src/armnnDeserializer/Deserializer.cpp
@@ -581,6 +581,10 @@
             return armnn::BinaryOperation::Mul;
         case armnnSerializer::BinaryOperation::BinaryOperation_Sub:
             return armnn::BinaryOperation::Sub;
+        case armnnSerializer::BinaryOperation::BinaryOperation_SqDiff:
+            return armnn::BinaryOperation::SqDiff;
+        case armnnSerializer::BinaryOperation::BinaryOperation_Power:
+            return armnn::BinaryOperation::Power;
         default:
             throw armnn::InvalidArgumentException("Binary operation unknown");
     }
diff --git a/src/armnnDeserializer/test/DeserializeElementWiseBinary.cpp b/src/armnnDeserializer/test/DeserializeElementWiseBinary.cpp
new file mode 100644
index 0000000..39aed05
--- /dev/null
+++ b/src/armnnDeserializer/test/DeserializeElementWiseBinary.cpp
@@ -0,0 +1,163 @@
+//
+// Copyright © 2023 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "ParserFlatbuffersSerializeFixture.hpp"
+#include <armnnDeserializer/IDeserializer.hpp>
+
+#include <string>
+
+TEST_SUITE("ElementWiseBinary_Deserializer")
+{
+    struct ElementwiseBinaryFixture : public ParserFlatbuffersSerializeFixture {
+        explicit ElementwiseBinaryFixture(const std::string & inputShape1,
+                                          const std::string & inputShape2,
+                                          const std::string & outputShape,
+                                          const std::string & dataType,
+                                          const std::string &binaryOperation) {
+            m_JsonString = R"(
+            {
+                    inputIds: [0, 1],
+                    outputIds: [3],
+                    layers: [
+                    {
+                        layer_type: "InputLayer",
+                        layer: {
+                              base: {
+                                    layerBindingId: 0,
+                                    base: {
+                                        index: 0,
+                                        layerName: "InputLayer1",
+                                        layerType: "Input",
+                                        inputSlots: [{
+                                            index: 0,
+                                            connection: {sourceLayerIndex:0, outputSlotIndex:0 },
+                                        }],
+                                        outputSlots: [ {
+                                            index: 0,
+                                            tensorInfo: {
+                                                dimensions: )" + inputShape1 + R"(,
+                                                dataType: )" + dataType + R"(
+                                            },
+                                        }],
+                                     },}},
+                    },
+                    {
+                    layer_type: "InputLayer",
+                    layer: {
+                           base: {
+                                layerBindingId: 1,
+                                base: {
+                                      index:1,
+                                      layerName: "InputLayer2",
+                                      layerType: "Input",
+                                      inputSlots: [{
+                                          index: 0,
+                                          connection: {sourceLayerIndex:0, outputSlotIndex:0 },
+                                      }],
+                                      outputSlots: [ {
+                                          index: 0,
+                                          tensorInfo: {
+                                              dimensions: )" + inputShape2 + R"(,
+                                              dataType: )" + dataType + R"(
+                                          },
+                                      }],
+                                    },}},
+                    },
+                    {
+                        layer_type: "ElementwiseBinaryLayer",
+                        layer: {
+                            base: {
+                                index: 2,
+                                layerName: "ElementwiseBinaryLayer",
+                                layerType: "ElementwiseBinary",
+                                inputSlots: [                                            {
+                                             index: 0,
+                                             connection: {sourceLayerIndex:0, outputSlotIndex:0 },
+                                            },
+                                            {
+                                             index: 1,
+                                             connection: {sourceLayerIndex:1, outputSlotIndex:0 },
+                                            }],
+                                outputSlots: [{
+                                    index: 0,
+                                    tensorInfo: {
+                                        dimensions: )" + outputShape + R"(,
+                                        dataType: )" + dataType + R"(
+                                    }
+                                }]
+                            },
+                            descriptor: {
+                                operation: )" + binaryOperation + R"(
+                            },
+                        }
+                    },
+                    {
+                        layer_type: "OutputLayer",
+                        layer: {
+                            base:{
+                                layerBindingId: 0,
+                                base: {
+                                    index: 3,
+                                    layerName: "OutputLayer",
+                                    layerType: "Output",
+                                    inputSlots: [{
+                                        index: 0,
+                                        connection: {sourceLayerIndex:2, outputSlotIndex:0 },
+                                    }],
+                                    outputSlots: [{
+                                        index: 0,
+                                        tensorInfo: {
+                                            dimensions: )" + outputShape + R"(,
+                                            dataType: )" + dataType + R"(
+                                        },
+                                    }],
+                                }
+                            }
+                        },
+                    }
+                ]
+            }
+        )";
+            Setup();
+        }
+    };
+
+struct SimplePowerFixture : ElementwiseBinaryFixture
+{
+    SimplePowerFixture() : ElementwiseBinaryFixture("[ 2, 2 ]",
+                                                    "[ 2, 2 ]",
+                                                    "[ 2, 2 ]",
+                                                    "QuantisedAsymm8",
+                                                    "Power") {}
+};
+
+TEST_CASE_FIXTURE(SimplePowerFixture, "PowerQuantisedAsymm8")
+{
+    RunTest<2, armnn::DataType::QAsymmU8>(
+            0,
+            {{"InputLayer1", { 0, 1, 2, 3 }},
+             {"InputLayer2", { 4, 5, 3, 2 }}},
+            {{"OutputLayer", { 0, 1, 8, 9 }}});
+}
+
+struct SimpleSquaredDifferenceFixture : ElementwiseBinaryFixture
+{
+    SimpleSquaredDifferenceFixture() : ElementwiseBinaryFixture("[ 2, 2 ]",
+                                                                "[ 2, 2 ]",
+                                                                "[ 2, 2 ]",
+                                                                "QuantisedAsymm8",
+                                                                "SqDiff") {}
+};
+
+TEST_CASE_FIXTURE(SimpleSquaredDifferenceFixture, "SquaredDifferenceQuantisedAsymm8")
+{
+    RunTest<2, armnn::DataType::QAsymmU8>(
+            0,
+            {{"InputLayer1", { 5, 1, 7, 9 }},
+             {"InputLayer2", { 4, 5, 2, 1 }}},
+            {{"OutputLayer", { 1, 16, 25, 64 }}});
+}
+
+}
\ No newline at end of file
diff --git a/src/armnnSerializer/ArmnnSchema.fbs b/src/armnnSerializer/ArmnnSchema.fbs
index a8b5d72..84149bd 100644
--- a/src/armnnSerializer/ArmnnSchema.fbs
+++ b/src/armnnSerializer/ArmnnSchema.fbs
@@ -331,7 +331,9 @@
     Maximum = 2,
     Minimum = 3,
     Mul     = 4,
-    Sub     = 5
+    Sub     = 5,
+    SqDiff  = 6,
+    Power   = 7
 }
 
 table ElementwiseBinaryDescriptor {
diff --git a/src/armnnSerializer/SerializerUtils.cpp b/src/armnnSerializer/SerializerUtils.cpp
index 2188fdc..970c415 100644
--- a/src/armnnSerializer/SerializerUtils.cpp
+++ b/src/armnnSerializer/SerializerUtils.cpp
@@ -123,6 +123,10 @@
             return armnnSerializer::BinaryOperation::BinaryOperation_Mul;
         case armnn::BinaryOperation::Sub:
             return armnnSerializer::BinaryOperation::BinaryOperation_Sub;
+        case armnn::BinaryOperation::SqDiff:
+            return armnnSerializer::BinaryOperation::BinaryOperation_SqDiff;
+        case armnn::BinaryOperation::Power:
+            return armnnSerializer::BinaryOperation::BinaryOperation_Power;
         default:
             throw armnn::InvalidArgumentException("Elementwise Binary operation unknown");
     }
diff --git a/src/armnnSerializer/test/SerializerTests.cpp b/src/armnnSerializer/test/SerializerTests.cpp
index 6b9b5df..bd8a76a 100644
--- a/src/armnnSerializer/test/SerializerTests.cpp
+++ b/src/armnnSerializer/test/SerializerTests.cpp
@@ -1012,7 +1012,8 @@
 TEST_CASE("SerializeElementwiseBinary")
 {
     using op = armnn::BinaryOperation;
-    std::initializer_list<op> allBinaryOperations = {op::Add, op::Div, op::Maximum, op::Minimum, op::Mul, op::Sub};
+    std::initializer_list<op> allBinaryOperations =
+            {op::Add, op::Div, op::Maximum, op::Minimum, op::Mul, op::Power, op::SqDiff, op::Sub};
 
     for (auto binaryOperation : allBinaryOperations)
     {
diff --git a/src/armnnTfLiteParser/TfLiteParser.cpp b/src/armnnTfLiteParser/TfLiteParser.cpp
index 244f1fa..5075da4 100644
--- a/src/armnnTfLiteParser/TfLiteParser.cpp
+++ b/src/armnnTfLiteParser/TfLiteParser.cpp
@@ -797,6 +797,7 @@
     m_ParserFunctions[tflite::BuiltinOperator_PACK]                    = &TfLiteParserImpl::ParsePack;
     m_ParserFunctions[tflite::BuiltinOperator_PAD]                     = &TfLiteParserImpl::ParsePad;
     m_ParserFunctions[tflite::BuiltinOperator_PADV2]                   = &TfLiteParserImpl::ParsePad;
+    m_ParserFunctions[tflite::BuiltinOperator_POW]                     = &TfLiteParserImpl::ParsePower;
     m_ParserFunctions[tflite::BuiltinOperator_PRELU]                   = &TfLiteParserImpl::ParsePrelu;
     m_ParserFunctions[tflite::BuiltinOperator_QUANTIZE]                = &TfLiteParserImpl::ParseQuantize;
     m_ParserFunctions[tflite::BuiltinOperator_RELU]                    = &TfLiteParserImpl::ParseRelu;
@@ -818,6 +819,7 @@
     m_ParserFunctions[tflite::BuiltinOperator_SPLIT]                   = &TfLiteParserImpl::ParseSplit;
     m_ParserFunctions[tflite::BuiltinOperator_SPLIT_V]                 = &TfLiteParserImpl::ParseSplitV;
     m_ParserFunctions[tflite::BuiltinOperator_SQUEEZE]                 = &TfLiteParserImpl::ParseSqueeze;
+    m_ParserFunctions[tflite::BuiltinOperator_SQUARED_DIFFERENCE]      = &TfLiteParserImpl::ParseSquaredDifference;
     m_ParserFunctions[tflite::BuiltinOperator_STRIDED_SLICE]           = &TfLiteParserImpl::ParseStridedSlice;
     m_ParserFunctions[tflite::BuiltinOperator_SUB]                     = &TfLiteParserImpl::ParseSub;
     m_ParserFunctions[tflite::BuiltinOperator_SUM]                     = &TfLiteParserImpl::ParseSum;
@@ -4584,6 +4586,36 @@
     ParseElementwiseUnary(subgraphIndex, operatorIndex, armnn::UnaryOperation::Neg);
 }
 
+void TfLiteParserImpl::ParsePower(size_t subgraphIndex, size_t operatorIndex)
+{
+    CHECK_MODEL(m_Model, subgraphIndex, operatorIndex);
+
+    auto inputs = GetInputs(m_Model, subgraphIndex, operatorIndex);
+    CHECK_VALID_SIZE(inputs.size(), 2);
+
+    auto outputs = GetOutputs(m_Model, subgraphIndex, operatorIndex);
+    CHECK_VALID_SIZE(outputs.size(), 1);
+
+    auto layerName = fmt::format("Power:{}:{}", subgraphIndex, operatorIndex);
+
+    TensorInfo inputTensorInfo  = InputTensorInfo(subgraphIndex, operatorIndex, 0);
+    TensorInfo input1TensorInfo = InputTensorInfo(subgraphIndex, operatorIndex, 1);
+    CheckMatchingQuantization(inputTensorInfo, input1TensorInfo, layerName, "Input 0", "Input 1");
+
+    IConnectableLayer* layer = m_Network->AddElementwiseBinaryLayer(BinaryOperation::Power, layerName.c_str());
+    ARMNN_ASSERT(layer != nullptr);
+
+    TensorInfo outputTensorInfo = OutputTensorInfoFromInputs(subgraphIndex, operatorIndex, layer, 0, {0, 1});
+    CheckMatchingQuantization(inputTensorInfo, outputTensorInfo, layerName, "Input 0", "Output 0");
+    layer->GetOutputSlot(0).SetTensorInfo(outputTensorInfo);
+
+    auto inputTensorIndexes = AsUnsignedVector(GetInputTensorIds(m_Model, subgraphIndex, operatorIndex));
+    RegisterInputSlots(subgraphIndex, operatorIndex, layer, {inputTensorIndexes[0], inputTensorIndexes[1]});
+
+    auto outputTensorIndexes = AsUnsignedVector(GetOutputTensorIds(m_Model, subgraphIndex, operatorIndex));
+    RegisterOutputSlots(subgraphIndex, operatorIndex, layer, {outputTensorIndexes[0]});
+}
+
 void TfLiteParserImpl::ParseRsqrt(size_t subgraphIndex, size_t operatorIndex)
 {
     ParseElementwiseUnary(subgraphIndex, operatorIndex, armnn::UnaryOperation::Rsqrt);
@@ -4599,6 +4631,36 @@
     ParseElementwiseUnary(subgraphIndex, operatorIndex, armnn::UnaryOperation::Sqrt);
 }
 
+void TfLiteParserImpl::ParseSquaredDifference(size_t subgraphIndex, size_t operatorIndex)
+{
+    CHECK_MODEL(m_Model, subgraphIndex, operatorIndex);
+
+    auto inputs = GetInputs(m_Model, subgraphIndex, operatorIndex);
+    CHECK_VALID_SIZE(inputs.size(), 2);
+
+    auto outputs = GetOutputs(m_Model, subgraphIndex, operatorIndex);
+    CHECK_VALID_SIZE(outputs.size(), 1);
+
+    auto layerName = fmt::format("SquaredDifference:{}:{}", subgraphIndex, operatorIndex);
+
+    TensorInfo inputTensorInfo  = InputTensorInfo(subgraphIndex, operatorIndex, 0);
+    TensorInfo input1TensorInfo = InputTensorInfo(subgraphIndex, operatorIndex, 1);
+    CheckMatchingQuantization(inputTensorInfo, input1TensorInfo, layerName, "Input 0", "Input 1");
+
+    IConnectableLayer* layer = m_Network->AddElementwiseBinaryLayer(BinaryOperation::SqDiff, layerName.c_str());
+    ARMNN_ASSERT(layer != nullptr);
+
+    TensorInfo outputTensorInfo = OutputTensorInfoFromInputs(subgraphIndex, operatorIndex, layer, 0, {0, 1});
+    CheckMatchingQuantization(inputTensorInfo, outputTensorInfo, layerName, "Input 0", "Output 0");
+    layer->GetOutputSlot(0).SetTensorInfo(outputTensorInfo);
+
+    auto inputTensorIndexes = AsUnsignedVector(GetInputTensorIds(m_Model, subgraphIndex, operatorIndex));
+    RegisterInputSlots(subgraphIndex, operatorIndex, layer, {inputTensorIndexes[0], inputTensorIndexes[1]});
+
+    auto outputTensorIndexes = AsUnsignedVector(GetOutputTensorIds(m_Model, subgraphIndex, operatorIndex));
+    RegisterOutputSlots(subgraphIndex, operatorIndex, layer, {outputTensorIndexes[0]});
+}
+
 void TfLiteParserImpl::ParseElementwiseUnary(size_t subgraphIndex, size_t operatorIndex, UnaryOperation unaryOperation)
 {
     CHECK_MODEL(m_Model, subgraphIndex, operatorIndex);
diff --git a/src/armnnTfLiteParser/TfLiteParser.hpp b/src/armnnTfLiteParser/TfLiteParser.hpp
index 91fad43..774d054 100644
--- a/src/armnnTfLiteParser/TfLiteParser.hpp
+++ b/src/armnnTfLiteParser/TfLiteParser.hpp
@@ -163,6 +163,7 @@
     void ParsePack(size_t subgraphIndex, size_t operatorIndex);
     void ParsePad(size_t subgraphIndex, size_t operatorIndex);
     void ParsePool(size_t subgraphIndex, size_t operatorIndex, armnn::PoolingAlgorithm algorithm);
+    void ParsePower(size_t subgraphIndex, size_t operatorIndex);
     void ParsePrelu(size_t subgraphIndex, size_t operatorIndex);
     void ParseQuantize(size_t subgraphIndex, size_t operatorIndex);
     void ParseReduce(size_t subgraphIndex, size_t operatorIndex, armnn::ReduceOperation reduceOperation);
@@ -186,6 +187,7 @@
     void ParseSplit(size_t subgraphIndex, size_t operatorIndex);
     void ParseSplitV(size_t subgraphIndex, size_t operatorIndex);
     void ParseSqueeze(size_t subgraphIndex, size_t operatorIndex);
+    void ParseSquaredDifference(size_t subgraphIndex, size_t operatorIndex);
     void ParseStridedSlice(size_t subgraphIndex, size_t operatorIndex);
     void ParseSub(size_t subgraphIndex, size_t operatorIndex);
     void ParseSum(size_t subgraphIndex, size_t operatorIndex);
diff --git a/src/armnnTfLiteParser/test/Power.cpp b/src/armnnTfLiteParser/test/Power.cpp
new file mode 100644
index 0000000..f8b354a
--- /dev/null
+++ b/src/armnnTfLiteParser/test/Power.cpp
@@ -0,0 +1,101 @@
+//
+// Copyright © 2023 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "ParserFlatbuffersFixture.hpp"
+
+#include <doctest/doctest.h>
+
+
+TEST_SUITE("TensorflowLiteParser_Power")
+{
+    struct PowerFixture : public ParserFlatbuffersFixture
+    {
+        explicit PowerFixture(const std::string & inputShape1,
+                              const std::string & inputShape2,
+                              const std::string & outputShape,
+                              const std::string & activation="NONE")
+        {
+            m_JsonString = R"(
+            {
+                "version": 3,
+                "operator_codes": [ { "builtin_code": "POW" } ],
+                "subgraphs": [ {
+                    "tensors": [
+                        {
+                            "shape": )" + inputShape1 + R"(,
+                            "type": "UINT8",
+                            "buffer": 0,
+                            "name": "inputTensor1",
+                            "quantization": {
+                                "min": [ 0.0 ],
+                                "max": [ 255.0 ],
+                                "scale": [ 1.0 ],
+                                "zero_point": [ 0 ],
+                            }
+                        },
+                        {
+                            "shape": )" + inputShape2 + R"(,
+                            "type": "UINT8",
+                            "buffer": 1,
+                            "name": "inputTensor2",
+                            "quantization": {
+                                "min": [ 0.0 ],
+                                "max": [ 255.0 ],
+                                "scale": [ 1.0 ],
+                                "zero_point": [ 0 ],
+                            }
+                        },
+                        {
+                            "shape": )" + outputShape + R"( ,
+                            "type": "UINT8",
+                            "buffer": 2,
+                            "name": "outputTensor",
+                            "quantization": {
+                                "min": [ 0.0 ],
+                                "max": [ 255.0 ],
+                                "scale": [ 1.0 ],
+                                "zero_point": [ 0 ],
+                            }
+                        }
+                    ],
+                    "inputs": [ 0, 1 ],
+                    "outputs": [ 2 ],
+                    "operators": [
+                        {
+                            "opcode_index": 0,
+                            "inputs": [ 0, 1 ],
+                            "outputs": [ 2 ],
+                            "custom_options_format": "FLEXBUFFERS"
+                        }
+                    ],
+                } ],
+                "buffers" : [
+                    { },
+                    { }
+                ]
+            }
+        )";
+            Setup();
+        }
+    };
+
+
+    struct SimplePowerFixture : PowerFixture
+    {
+        SimplePowerFixture() : PowerFixture("[ 2, 2 ]",
+                                            "[ 2, 2 ]",
+                                            "[ 2, 2 ]") {}
+    };
+
+    TEST_CASE_FIXTURE(SimplePowerFixture, "SimplePower")
+    {
+        RunTest<2, armnn::DataType::QAsymmU8>(
+                0,
+                {{"inputTensor1", { 0, 1, 2, 3 }},
+                 {"inputTensor2", { 4, 5, 6, 3 }}},
+                {{"outputTensor", { 0, 1, 64, 27 }}});
+    }
+
+}
\ No newline at end of file
diff --git a/src/armnnTfLiteParser/test/SquaredDifference.cpp b/src/armnnTfLiteParser/test/SquaredDifference.cpp
new file mode 100644
index 0000000..97b4d3f
--- /dev/null
+++ b/src/armnnTfLiteParser/test/SquaredDifference.cpp
@@ -0,0 +1,101 @@
+//
+// Copyright © 2023 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "ParserFlatbuffersFixture.hpp"
+
+#include <doctest/doctest.h>
+
+
+TEST_SUITE("TensorflowLiteParser_SquaredDifference")
+{
+    struct SquaredDifferenceFixture : public ParserFlatbuffersFixture
+    {
+        explicit SquaredDifferenceFixture(const std::string & inputShape1,
+                                          const std::string & inputShape2,
+                                          const std::string & outputShape,
+                                          const std::string & activation="NONE")
+        {
+            m_JsonString = R"(
+                {
+                    "version": 3,
+                    "operator_codes": [ { "builtin_code": "SQUARED_DIFFERENCE" } ],
+                    "subgraphs": [ {
+                        "tensors": [
+                            {
+                                "shape": )" + inputShape1 + R"(,
+                                "type": "UINT8",
+                                "buffer": 0,
+                                "name": "inputTensor1",
+                                "quantization": {
+                                    "min": [ 0.0 ],
+                                    "max": [ 255.0 ],
+                                    "scale": [ 1.0 ],
+                                    "zero_point": [ 0 ],
+                                }
+                            },
+                            {
+                                "shape": )" + inputShape2 + R"(,
+                                "type": "UINT8",
+                                "buffer": 1,
+                                "name": "inputTensor2",
+                                "quantization": {
+                                    "min": [ 0.0 ],
+                                    "max": [ 255.0 ],
+                                    "scale": [ 1.0 ],
+                                    "zero_point": [ 0 ],
+                                }
+                            },
+                            {
+                                "shape": )" + outputShape + R"( ,
+                                "type": "UINT8",
+                                "buffer": 2,
+                                "name": "outputTensor",
+                                "quantization": {
+                                    "min": [ 0.0 ],
+                                    "max": [ 255.0 ],
+                                    "scale": [ 1.0 ],
+                                    "zero_point": [ 0 ],
+                                }
+                            }
+                        ],
+                        "inputs": [ 0, 1 ],
+                        "outputs": [ 2 ],
+                        "operators": [
+                            {
+                                "opcode_index": 0,
+                                "inputs": [ 0, 1 ],
+                                "outputs": [ 2 ],
+                                "custom_options_format": "FLEXBUFFERS"
+                            }
+                        ],
+                    } ],
+                    "buffers" : [
+                        { },
+                        { }
+                    ]
+                }
+            )";
+            Setup();
+        }
+    };
+
+
+    struct SimpleSquaredDifferenceFixture : SquaredDifferenceFixture
+    {
+        SimpleSquaredDifferenceFixture() : SquaredDifferenceFixture("[ 2, 2 ]",
+                                                                    "[ 2, 2 ]",
+                                                                    "[ 2, 2 ]") {}
+    };
+
+    TEST_CASE_FIXTURE(SimpleSquaredDifferenceFixture, "SimpleSquaredDifference")
+    {
+        RunTest<2, armnn::DataType::QAsymmU8>(
+            0,
+            {{"inputTensor1", { 4, 1, 8, 9 }},
+            {"inputTensor2", { 0, 5, 6, 3 }}},
+            {{"outputTensor", { 16, 16, 4, 36 }}});
+    }
+
+}
\ No newline at end of file
diff --git a/src/backends/backendsCommon/common.mk b/src/backends/backendsCommon/common.mk
index 986d253..c868cbe 100644
--- a/src/backends/backendsCommon/common.mk
+++ b/src/backends/backendsCommon/common.mk
@@ -84,6 +84,7 @@
     test/layerTests/PadTestImpl.cpp \
     test/layerTests/Pooling2dTestImpl.cpp \
     test/layerTests/Pooling3dTestImpl.cpp \
+    test/layerTests/PowerTestImpl.cpp \
     test/layerTests/RankTestImpl.cpp \
     test/layerTests/ReductionTestImpl.cpp \
     test/layerTests/ReduceProdTestImpl.cpp \
@@ -92,6 +93,7 @@
     test/layerTests/ResizeTestImpl.cpp \
     test/layerTests/RsqrtTestImpl.cpp \
     test/layerTests/SliceTestImpl.cpp \
+    test/layerTests/SquaredDifferenceTestImpl.cpp \
     test/layerTests/QuantizeTestImpl.cpp \
     test/layerTests/SinTestImpl.cpp \
     test/layerTests/ShapeTestImpl.cpp \
diff --git a/src/backends/backendsCommon/test/CMakeLists.txt b/src/backends/backendsCommon/test/CMakeLists.txt
index 95065df..aba9c72 100644
--- a/src/backends/backendsCommon/test/CMakeLists.txt
+++ b/src/backends/backendsCommon/test/CMakeLists.txt
@@ -153,6 +153,8 @@
     layerTests/Pooling2dTestImpl.hpp
     layerTests/Pooling3dTestImpl.cpp
     layerTests/Pooling3dTestImpl.hpp
+    layerTests/PowerTestImpl.cpp
+    layerTests/PowerTestImpl.hpp
     layerTests/PreluTestImpl.hpp
     layerTests/QuantizeTestImpl.cpp
     layerTests/QuantizeTestImpl.hpp
@@ -186,6 +188,8 @@
     layerTests/SplitterTestImpl.hpp
     layerTests/SqrtTestImpl.cpp
     layerTests/SqrtTestImpl.hpp
+    layerTests/SquaredDifferenceTestImpl.cpp
+    layerTests/SquaredDifferenceTestImpl.hpp
     layerTests/StackTestImpl.cpp
     layerTests/StackTestImpl.hpp
     layerTests/StridedSliceTestImpl.cpp
diff --git a/src/backends/backendsCommon/test/ElementwiseBinaryEndToEndTestImpl.hpp b/src/backends/backendsCommon/test/ElementwiseBinaryEndToEndTestImpl.hpp
index 6546a6a..0d47fd6 100644
--- a/src/backends/backendsCommon/test/ElementwiseBinaryEndToEndTestImpl.hpp
+++ b/src/backends/backendsCommon/test/ElementwiseBinaryEndToEndTestImpl.hpp
@@ -89,6 +89,12 @@
         case armnn::BinaryOperation::Sub:
             expectedOutput = { -1, -3, -1, -1,  3, -7, 3, 3,  -5, 1, 1, 1,  2, 2, -6, 2 };
             break;
+        case armnn::BinaryOperation::SqDiff:
+            expectedOutput = { 1, 9, 1, 1,  9, 49, 9, 9, 25, 1, 1, 1,  4, 4, 36, 4  };
+            break;
+        case armnn::BinaryOperation::Power:
+            expectedOutput = { 1, 1, 1, 1, 25, 25, 25, 25,  9, 9, 9, 9,  16, 16, 16, 16 };
+            break;
         default:
             throw("Invalid Elementwise Binary operation");
     }
diff --git a/src/backends/backendsCommon/test/LayerTests.hpp b/src/backends/backendsCommon/test/LayerTests.hpp
index 00bfea5..b6ddb31 100644
--- a/src/backends/backendsCommon/test/LayerTests.hpp
+++ b/src/backends/backendsCommon/test/LayerTests.hpp
@@ -1,5 +1,5 @@
 //
-// Copyright © 2017 Arm Ltd and Contributors. All rights reserved.
+// Copyright © 2017, 2023 Arm Ltd and Contributors. All rights reserved.
 // SPDX-License-Identifier: MIT
 //
 
@@ -51,6 +51,7 @@
 #include <backendsCommon/test/layerTests/PermuteTestImpl.hpp>
 #include <backendsCommon/test/layerTests/Pooling2dTestImpl.hpp>
 #include <backendsCommon/test/layerTests/Pooling3dTestImpl.hpp>
+#include <backendsCommon/test/layerTests/PowerTestImpl.hpp>
 #include <backendsCommon/test/layerTests/PreluTestImpl.hpp>
 #include <backendsCommon/test/layerTests/QuantizeTestImpl.hpp>
 #include <backendsCommon/test/layerTests/RankTestImpl.hpp>
@@ -68,6 +69,7 @@
 #include <backendsCommon/test/layerTests/SpaceToDepthTestImpl.hpp>
 #include <backendsCommon/test/layerTests/SplitterTestImpl.hpp>
 #include <backendsCommon/test/layerTests/SqrtTestImpl.hpp>
+#include <backendsCommon/test/layerTests/SquaredDifferenceTestImpl.hpp>
 #include <backendsCommon/test/layerTests/StackTestImpl.hpp>
 #include <backendsCommon/test/layerTests/StridedSliceTestImpl.hpp>
 #include <backendsCommon/test/layerTests/SubtractionTestImpl.hpp>
diff --git a/src/backends/backendsCommon/test/layerTests/ElementwiseTestImpl.hpp b/src/backends/backendsCommon/test/layerTests/ElementwiseTestImpl.hpp
index da6e11f..ded53cb 100644
--- a/src/backends/backendsCommon/test/layerTests/ElementwiseTestImpl.hpp
+++ b/src/backends/backendsCommon/test/layerTests/ElementwiseTestImpl.hpp
@@ -1,5 +1,5 @@
 //
-// Copyright © 2017 Arm Ltd and Contributors. All rights reserved.
+// Copyright © 2017-2023 Arm Ltd and Contributors. All rights reserved.
 // SPDX-License-Identifier: MIT
 //
 
@@ -206,4 +206,67 @@
             tensorHandleFactory,
             quantScale,
             quantOffset);
-}
\ No newline at end of file
+}
+
+// Elementwise Binary Operations
+template<std::size_t NumDims,
+        armnn::DataType ArmnnTypeInput,
+        armnn::DataType ArmnnTypeOutput,
+        typename TInput  = armnn::ResolveType<ArmnnTypeInput>,
+        typename TOutput = armnn::ResolveType<ArmnnTypeOutput>>
+LayerTestResult<TOutput, NumDims> ElementwiseTestHelper(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+        armnn::BinaryOperation op,
+        const unsigned int shape0[NumDims],
+        std::vector<TInput> values0,
+        const unsigned int shape1[NumDims],
+        std::vector<TInput> values1,
+        const unsigned int outShape[NumDims],
+        std::vector<TOutput> outValues,
+        const armnn::ITensorHandleFactory& tensorHandleFactory) {
+
+    armnn::TensorInfo inputTensorInfo0{NumDims, shape0, ArmnnTypeInput};
+    armnn::TensorInfo inputTensorInfo1{NumDims, shape1, ArmnnTypeInput};
+    armnn::TensorInfo outputTensorInfo{NumDims, outShape, ArmnnTypeOutput};
+
+    std::vector<TOutput> actualOutput(outputTensorInfo.GetNumElements());
+
+    bool isBoolean = false;
+    if (ArmnnTypeOutput == armnn::DataType::Boolean)
+    {
+        isBoolean = true;
+    }
+
+    std::unique_ptr<armnn::ITensorHandle> inputHandle0 = tensorHandleFactory.CreateTensorHandle(inputTensorInfo0);
+    std::unique_ptr<armnn::ITensorHandle> inputHandle1 = tensorHandleFactory.CreateTensorHandle(inputTensorInfo1);
+    std::unique_ptr<armnn::ITensorHandle> outputHandle = tensorHandleFactory.CreateTensorHandle(outputTensorInfo);
+
+    armnn::ElementwiseBinaryQueueDescriptor data;
+    data.m_Parameters.m_Operation = op;
+    armnn::WorkloadInfo info;
+
+    AddInputToWorkload(data, info, inputTensorInfo0, inputHandle0.get());
+    AddInputToWorkload(data, info, inputTensorInfo1, inputHandle1.get());
+    AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get());
+
+    auto workload = workloadFactory.CreateWorkload(armnn::LayerType::ElementwiseBinary, data, info);
+
+    inputHandle0->Allocate();
+    inputHandle1->Allocate();
+    outputHandle->Allocate();
+
+    CopyDataToITensorHandle(inputHandle0.get(), values0.data());
+    CopyDataToITensorHandle(inputHandle1.get(), values1.data());
+
+    workload->PostAllocationConfigure();
+    ExecuteWorkload(*workload, memoryManager);
+
+    CopyDataFromITensorHandle(actualOutput.data(), outputHandle.get());
+
+    return LayerTestResult<TOutput, NumDims>(actualOutput,
+                                             outValues,
+                                             outputHandle->GetShape(),
+                                             outputTensorInfo.GetShape(),
+                                             isBoolean);
+}
diff --git a/src/backends/backendsCommon/test/layerTests/PowerTestImpl.cpp b/src/backends/backendsCommon/test/layerTests/PowerTestImpl.cpp
new file mode 100644
index 0000000..dd6d569
--- /dev/null
+++ b/src/backends/backendsCommon/test/layerTests/PowerTestImpl.cpp
@@ -0,0 +1,539 @@
+//
+// Copyright © 2023 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "PowerTestImpl.hpp"
+
+#include "ElementwiseTestImpl.hpp"
+
+LayerTestResult<float, 4> PowerTest(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+        const armnn::ITensorHandleFactory& tensorHandleFactory)
+{
+    IgnoreUnused(memoryManager);
+    unsigned int shape[] = { 2, 2, 2, 2 };
+
+    std::vector<float> input0 =
+            {
+                    7.f, 3.f, 4.f, 2.f, 6.f, 4.f, 2.f, 1.f,
+                    1.f, 1.f, 0.f, 2.f, 9.f, 3.f, 5.f, 3.f
+            };
+
+    std::vector<float> input1 =
+            {
+                    2.f, 3.f, 2.f, 1.f, 2.f, 3.f, 4.f, 3.f,
+                    4.f, 5.f, 3.f, 5.f, 2.f, 3.f, 2.f, 0.f
+            };
+
+    std::vector<float> output
+            {
+                    49.f, 27.f, 16.f, 2.f, 36.f, 64.f, 16.f, 1.f,
+                    1.f, 1.f, 0.f, 32.f, 81.f, 27.f, 25.f, 1.f
+            };
+
+    return ElementwiseTestHelper<4, armnn::DataType::Float32, armnn::DataType::Float32>(
+            workloadFactory,
+            memoryManager,
+            armnn::BinaryOperation::Power,
+            shape,
+            input0,
+            shape,
+            input1,
+            shape,
+            output,
+            tensorHandleFactory);
+}
+
+LayerTestResult<float, 4> PowerBroadcast1ElementTest(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+        const armnn::ITensorHandleFactory& tensorHandleFactory)
+{
+    IgnoreUnused(memoryManager);
+    unsigned int shape0[] = { 1, 2, 2, 2 };
+    unsigned int shape1[] = { 1, 1, 1, 1 };
+
+    std::vector<float> input0 =
+            {
+                    1.f, 2.f, 3.f, 4.f, 5.f, 0.f, 2.f, 1.f
+            };
+
+    std::vector<float> input1 = { 2.f };
+
+    std::vector<float> output =
+            {
+                    1.f, 4.f, 9.f, 16.f, 25.f, 0.f, 4.f, 1.f
+            };
+
+    return ElementwiseTestHelper<4, armnn::DataType::Float32, armnn::DataType::Float32>(
+            workloadFactory,
+            memoryManager,
+            armnn::BinaryOperation::Power,
+            shape0,
+            input0,
+            shape1,
+            input1,
+            shape0,
+            output,
+            tensorHandleFactory);
+}
+
+LayerTestResult<float, 4> PowerBroadcastTest(
+        armnn::IWorkloadFactory & workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr & memoryManager,
+        const armnn::ITensorHandleFactory& tensorHandleFactory)
+{
+    IgnoreUnused(memoryManager);
+    const unsigned int shape0[] = { 1, 2, 2, 3 };
+    const unsigned int shape1[] = { 1, 1, 1, 3 };
+
+    std::vector<float> input0 =
+            {
+                    1.f, 2.f, 3.f, 3.f, 4.f, 4.f,
+                    4.f, 0.f, 2.f, 3.f, 4.f, 4.f
+            };
+
+    std::vector<float> input1 = { 1.f, 3.f, 1.f };
+
+    std::vector<float> output =
+            {
+                    1.f, 8.f, 3.f, 3.f, 64.f, 4.f,
+                    4.f, 0.f, 2.f, 3.f, 64.f, 4.f
+            };
+
+    return ElementwiseTestHelper<4, armnn::DataType::Float32, armnn::DataType::Float32>(
+            workloadFactory,
+            memoryManager,
+            armnn::BinaryOperation::Power,
+            shape0,
+            input0,
+            shape1,
+            input1,
+            shape0,
+            output,
+            tensorHandleFactory);
+}
+
+LayerTestResult<armnn::Half, 4> PowerFloat16Test(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+        const armnn::ITensorHandleFactory& tensorHandleFactory)
+{
+    IgnoreUnused(memoryManager);
+    using namespace half_float::literal;
+
+    unsigned int shape[] = { 2, 2, 2, 2 };
+
+    std::vector<armnn::Half> input0 =
+            {
+                    1._h, 5._h, 1._h, 4._h, 6._h, 1._h, 3._h, 5._h,
+                    3._h, 7._h, 6._h, 3._h, 8._h, 4._h, 4._h, 2._h
+            };
+
+    std::vector<armnn::Half> input1 =
+            {
+                    2._h, 2._h, 2._h, 2._h, 2._h, 3._h, 3._h, 2._h,
+                    1._h, 2._h, 2._h, 4._h, 2._h, 1._h, 3._h, 5._h
+            };
+
+    std::vector<armnn::Half> output
+            {
+                    1._h, 25._h, 1._h, 16._h, 36._h, 1._h, 27._h, 25._h,
+                    3._h, 49._h, 36._h, 81._h, 64._h, 4._h, 64._h, 32._h
+            };
+
+    return ElementwiseTestHelper<4, armnn::DataType::Float16, armnn::DataType::Float16>(
+            workloadFactory,
+            memoryManager,
+            armnn::BinaryOperation::Power,
+            shape,
+            input0,
+            shape,
+            input1,
+            shape,
+            output,
+            tensorHandleFactory);
+}
+
+LayerTestResult<armnn::Half, 4> PowerBroadcast1ElementFloat16Test(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+        const armnn::ITensorHandleFactory& tensorHandleFactory)
+{
+    IgnoreUnused(memoryManager);
+    using namespace half_float::literal;
+
+    const unsigned int shape0[] = { 1, 2, 2, 3 };
+    const unsigned int shape1[] = { 1, 1, 1, 1 };
+
+    std::vector<armnn::Half> input0 =
+            {
+                    1._h, 2._h, 3._h, 4._h, 5._h, 4._h,
+                    1._h, 5._h, 4._h, 2._h, 0._h, 1._h
+            };
+
+    std::vector<armnn::Half> input1 = { 2._h };
+
+    std::vector<armnn::Half> output =
+            {
+                    1._h, 4._h, 9._h, 16._h, 25._h, 16._h,
+                    1._h, 25._h, 16._h, 4._h, 0._h, 1._h
+            };
+
+    return ElementwiseTestHelper<4, armnn::DataType::Float16, armnn::DataType::Float16>(
+            workloadFactory,
+            memoryManager,
+            armnn::BinaryOperation::Power,
+            shape0,
+            input0,
+            shape1,
+            input1,
+            shape0,
+            output,
+            tensorHandleFactory);
+}
+
+LayerTestResult<armnn::Half, 4> PowerBroadcastFloat16Test(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+        const armnn::ITensorHandleFactory& tensorHandleFactory)
+{
+    IgnoreUnused(memoryManager);
+    using namespace half_float::literal;
+
+    const unsigned int shape0[] = { 1, 2, 2, 3 };
+    const unsigned int shape1[] = { 1, 1, 1, 3 };
+
+    std::vector<armnn::Half> input0 =
+            {
+                    4._h, 2._h, 3._h, 4._h, 1._h,  0._h,
+                    8._h, 1._h, 1._h, 1._h, 2._h, 4._h
+            };
+
+    std::vector<armnn::Half> input1 = { 1._h, 5._h, 3._h };
+
+    std::vector<armnn::Half> output =
+            {
+                    4._h, 32._h, 27._h, 4._h, 1._h, 0._h,
+                    8._h, 1._h, 1._h, 1._h, 32._h, 64._h
+            };
+
+    return ElementwiseTestHelper<4, armnn::DataType::Float16, armnn::DataType::Float16>(
+            workloadFactory,
+            memoryManager,
+            armnn::BinaryOperation::Power,
+            shape0,
+            input0,
+            shape1,
+            input1,
+            shape0,
+            output,
+            tensorHandleFactory);
+}
+
+LayerTestResult<uint8_t, 4> PowerUint8Test(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+        const armnn::ITensorHandleFactory& tensorHandleFactory)
+{
+    IgnoreUnused(memoryManager);
+    const unsigned int shape[] = { 1, 1, 2, 2 };
+
+    std::vector<uint8_t> input0 = { 4, 2, 4, 3 };
+
+    std::vector<uint8_t> input1 = { 1, 2, 2, 2 };
+
+    std::vector<uint8_t> output = { 4, 4, 16, 9 };
+
+    return ElementwiseTestHelper<4, armnn::DataType::QAsymmU8, armnn::DataType::QAsymmU8>(
+            workloadFactory,
+            memoryManager,
+            armnn::BinaryOperation::Power,
+            shape,
+            input0,
+            shape,
+            input1,
+            shape,
+            output,
+            tensorHandleFactory);
+}
+
+LayerTestResult<uint8_t, 4> PowerBroadcast1ElementUint8Test(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+        const armnn::ITensorHandleFactory& tensorHandleFactory)
+{
+    IgnoreUnused(memoryManager);
+    const unsigned int shape0[] = { 1, 1, 2, 2 };
+    const unsigned int shape1[] = { 1, 1, 1, 1 };
+
+    std::vector<uint8_t> input0 = { 4, 5, 1, 0 };
+
+    std::vector<uint8_t> input1 = { 2 };
+
+    std::vector<uint8_t> output = { 16, 25, 1, 0 };
+
+    return ElementwiseTestHelper<4, armnn::DataType::QAsymmU8, armnn::DataType::QAsymmU8>(
+            workloadFactory,
+            memoryManager,
+            armnn::BinaryOperation::Power,
+            shape0,
+            input0,
+            shape1,
+            input1,
+            shape0,
+            output,
+            tensorHandleFactory);
+}
+
+LayerTestResult<uint8_t, 4> PowerBroadcastUint8Test(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+        const armnn::ITensorHandleFactory& tensorHandleFactory)
+{
+    IgnoreUnused(memoryManager);
+    const unsigned int shape0[] = { 1, 1, 2, 2 };
+    const unsigned int shape1[] = { 1, 1, 1, 2 };
+
+    std::vector<uint8_t> input0 = { 4, 1, 6, 2 };
+
+    std::vector<uint8_t> input1 = { 2, 6 };
+
+    std::vector<uint8_t> output = { 16, 1, 36, 64 };
+
+    return ElementwiseTestHelper<4, armnn::DataType::QAsymmU8, armnn::DataType::QAsymmU8>(
+            workloadFactory,
+            memoryManager,
+            armnn::BinaryOperation::Power,
+            shape0,
+            input0,
+            shape1,
+            input1,
+            shape0,
+            output,
+            tensorHandleFactory);
+}
+
+LayerTestResult<int16_t, 4> PowerInt16Test(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+        const armnn::ITensorHandleFactory& tensorHandleFactory)
+{
+    IgnoreUnused(memoryManager);
+    unsigned int shape[] = { 2, 2, 2, 2 };
+
+    std::vector<int16_t> input0 =
+            {
+                    1, 5, 1, 4, 4, 9, 3, 7,
+                    3, 2, 9, 6, 1, 2, 1, 4
+            };
+
+    std::vector<int16_t> input1 =
+            {
+                    2, 2, 0, 3, 2, 1, 3, 2,
+                    4, 4, 2, 1, 7, 5, 4, 2
+            };
+
+    std::vector<int16_t> output
+            {
+                    1, 25, 0, 64, 16, 9, 27, 49,
+                    81, 16, 81, 6, 1, 32, 1, 16
+            };
+
+    return ElementwiseTestHelper<4, armnn::DataType::QSymmS16, armnn::DataType::QSymmS16>(
+            workloadFactory,
+            memoryManager,
+            armnn::BinaryOperation::Power,
+            shape,
+            input0,
+            shape,
+            input1,
+            shape,
+            output,
+            tensorHandleFactory);
+}
+
+LayerTestResult<int16_t, 4> PowerBroadcast1ElementInt16Test(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+        const armnn::ITensorHandleFactory& tensorHandleFactory)
+{
+    IgnoreUnused(memoryManager);
+    const unsigned int shape0[] = { 1, 2, 2, 3 };
+    const unsigned int shape1[] = { 1, 1, 1, 1 };
+
+    std::vector<int16_t> input0 =
+            {
+                    1, 2, 3, 4, 5, 0,
+                    5, 4, 1, 4, 5, 2
+            };
+
+    std::vector<int16_t> input1 = { 2 };
+
+    std::vector<int16_t> output =
+            {
+                    1, 4, 9, 16, 25, 0,
+                    25, 16, 1, 16, 25, 4
+            };
+
+    return ElementwiseTestHelper<4, armnn::DataType::QSymmS16, armnn::DataType::QSymmS16>(
+            workloadFactory,
+            memoryManager,
+            armnn::BinaryOperation::Power,
+            shape0,
+            input0,
+            shape1,
+            input1,
+            shape0,
+            output,
+            tensorHandleFactory);
+}
+
+LayerTestResult<int16_t, 4> PowerBroadcastInt16Test(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+        const armnn::ITensorHandleFactory& tensorHandleFactory)
+{
+    IgnoreUnused(memoryManager);
+    const unsigned int shape0[] = { 1, 2, 2, 3 };
+    const unsigned int shape1[] = { 1, 1, 1, 3 };
+
+    std::vector<int16_t> input0 =
+            {
+                    4, 2, 1, 4, 5, 3,
+                    7, 3, 4, 8, 1, 2
+            };
+
+    std::vector<int16_t> input1 = { 1, 2, 3 };
+
+    std::vector<int16_t> output =
+            {
+                    4, 4, 1, 4, 25, 27,
+                    7, 9, 64, 8, 1, 8
+            };
+
+    return ElementwiseTestHelper<4, armnn::DataType::QSymmS16, armnn::DataType::QSymmS16>(
+            workloadFactory,
+            memoryManager,
+            armnn::BinaryOperation::Power,
+            shape0,
+            input0,
+            shape1,
+            input1,
+            shape0,
+            output,
+            tensorHandleFactory);
+}
+
+LayerTestResult<int32_t, 4> PowerInt32Test(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+        const armnn::ITensorHandleFactory& tensorHandleFactory)
+{
+    IgnoreUnused(memoryManager);
+    unsigned int shape[] = { 2, 2, 2, 2 };
+
+    std::vector<int32_t> input0 =
+            {
+                    1, 3, 4, 3, 1, 4, 2, 1,
+                    2, 1, 2, 1, 4, 3, 4, 3
+            };
+
+    std::vector<int32_t> input1 =
+            {
+                    2, 2, 2, 2, 3, 3, 4, 3,
+                    4, 4, 4, 4, 1, 3, 1, 3
+            };
+
+    std::vector<int32_t> output
+            {
+                    1, 9, 16, 9, 1, 64, 16, 1,
+                    16, 1, 16, 1, 4, 27, 4, 27
+            };
+
+    return ElementwiseTestHelper<4, armnn::DataType::Signed32, armnn::DataType::Signed32>(
+            workloadFactory,
+            memoryManager,
+            armnn::BinaryOperation::Power,
+            shape,
+            input0,
+            shape,
+            input1,
+            shape,
+            output,
+            tensorHandleFactory);
+}
+
+LayerTestResult<int32_t, 4> PowerBroadcastInt32Test(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+        const armnn::ITensorHandleFactory& tensorHandleFactory)
+{
+    IgnoreUnused(memoryManager);
+    const unsigned int shape0[] = { 1, 2, 2, 3 };
+    const unsigned int shape1[] = { 1, 1, 1, 3 };
+
+    std::vector<int32_t> input0 =
+            {
+                    4, 4, 3, 4, 5, 0,
+                    5, 8, 1, 3, 9, 2
+            };
+
+    std::vector<int32_t> input1 = { 2, 1, 3 };
+
+    std::vector<int32_t> output =
+            {
+                    16, 4, 27, 16, 5, 0,
+                    25, 8, 1, 9, 9, 8
+            };
+
+    return ElementwiseTestHelper<4, armnn::DataType::Signed32, armnn::DataType::Signed32>(
+            workloadFactory,
+            memoryManager,
+            armnn::BinaryOperation::Power,
+            shape0,
+            input0,
+            shape1,
+            input1,
+            shape0,
+            output,
+            tensorHandleFactory);
+}
+
+LayerTestResult<int32_t, 4> PowerBroadcast1ElementInt32Test(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+        const armnn::ITensorHandleFactory& tensorHandleFactory)
+{
+    IgnoreUnused(memoryManager);
+    const unsigned int shape0[] = { 1, 2, 2, 3 };
+    const unsigned int shape1[] = { 1, 1, 1, 1 };
+
+    std::vector<int32_t> input0 =
+            {
+                    1, 2, 3, 4, 5, 3,
+                    3, 1, 0, 2, 1, 5
+            };
+
+    std::vector<int32_t> input1 = { 2 };
+
+    std::vector<int32_t> output =
+            {
+                    1, 4, 9, 16, 25, 9,
+                    9, 1, 0, 4, 1, 25
+            };
+
+    return ElementwiseTestHelper<4, armnn::DataType::Signed32, armnn::DataType::Signed32>(
+            workloadFactory,
+            memoryManager,
+            armnn::BinaryOperation::Power,
+            shape0,
+            input0,
+            shape1,
+            input1,
+            shape0,
+            output,
+            tensorHandleFactory);
+}
diff --git a/src/backends/backendsCommon/test/layerTests/PowerTestImpl.hpp b/src/backends/backendsCommon/test/layerTests/PowerTestImpl.hpp
new file mode 100644
index 0000000..3707208
--- /dev/null
+++ b/src/backends/backendsCommon/test/layerTests/PowerTestImpl.hpp
@@ -0,0 +1,88 @@
+//
+// Copyright © 2023 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include <armnnTestUtils/LayerTestResult.hpp>
+
+#include <Half.hpp>
+
+#include <armnn/backends/IBackendInternal.hpp>
+#include <armnn/backends/WorkloadFactory.hpp>
+
+LayerTestResult<float, 4> PowerTest(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+        const armnn::ITensorHandleFactory& tensorHandleFactory);
+
+LayerTestResult<float, 4> PowerBroadcast1ElementTest(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+        const armnn::ITensorHandleFactory& tensorHandleFactory);
+
+LayerTestResult<float, 4> PowerBroadcastTest(
+        armnn::IWorkloadFactory & workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr & memoryManager,
+        const armnn::ITensorHandleFactory& tensorHandleFactory);
+
+LayerTestResult<armnn::Half, 4> PowerFloat16Test(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+        const armnn::ITensorHandleFactory& tensorHandleFactory);
+
+LayerTestResult<armnn::Half, 4> PowerBroadcast1ElementFloat16Test(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+        const armnn::ITensorHandleFactory& tensorHandleFactory);
+
+LayerTestResult<armnn::Half, 4> PowerBroadcastFloat16Test(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+        const armnn::ITensorHandleFactory& tensorHandleFactory);
+
+LayerTestResult<uint8_t, 4> PowerBroadcast1ElementUint8Test(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+        const armnn::ITensorHandleFactory& tensorHandleFactory);
+
+LayerTestResult<uint8_t, 4> PowerUint8Test(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+        const armnn::ITensorHandleFactory& tensorHandleFactory);
+
+LayerTestResult<uint8_t, 4> PowerBroadcastUint8Test(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+        const armnn::ITensorHandleFactory& tensorHandleFactory);
+
+LayerTestResult<int16_t , 4> PowerInt16Test(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+        const armnn::ITensorHandleFactory& tensorHandleFactory);
+
+LayerTestResult<int16_t, 4> PowerBroadcast1ElementInt16Test(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+        const armnn::ITensorHandleFactory& tensorHandleFactory);
+
+LayerTestResult<int16_t, 4> PowerBroadcastInt16Test(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+        const armnn::ITensorHandleFactory& tensorHandleFactory);
+
+LayerTestResult<int32_t, 4> PowerInt32Test(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+        const armnn::ITensorHandleFactory& tensorHandleFactory);
+
+LayerTestResult<int32_t, 4> PowerBroadcastInt32Test(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+        const armnn::ITensorHandleFactory& tensorHandleFactory);
+
+LayerTestResult<int32_t, 4> PowerBroadcast1ElementInt32Test(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+        const armnn::ITensorHandleFactory& tensorHandleFactory);
diff --git a/src/backends/backendsCommon/test/layerTests/SquaredDifferenceTestImpl.cpp b/src/backends/backendsCommon/test/layerTests/SquaredDifferenceTestImpl.cpp
new file mode 100644
index 0000000..8bb31ed
--- /dev/null
+++ b/src/backends/backendsCommon/test/layerTests/SquaredDifferenceTestImpl.cpp
@@ -0,0 +1,539 @@
+//
+// Copyright © 2023 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "SquaredDifferenceTestImpl.hpp"
+
+#include "ElementwiseTestImpl.hpp"
+
+LayerTestResult<float, 4> SquaredDifferenceTest(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+        const armnn::ITensorHandleFactory& tensorHandleFactory)
+{
+    IgnoreUnused(memoryManager);
+    unsigned int shape[] = { 2, 2, 2, 2 };
+
+    std::vector<float> input0 =
+            {
+                 7.f, 3.f, 4.f, 2.f, 6.f, 4.f, 2.f, 1.f,
+                 3.f, 1.f, 0.f, 1.f, 4.f, 3.f, 4.f, 3.f
+            };
+
+    std::vector<float> input1 =
+            {
+                 5.f, 3.f, 2.f, 5.f, 3.f, 3.f, 4.f, 3.f,
+                 4.f, 4.f, 3.f, 2.f, 5.f, 5.f, 5.f, 5.f
+            };
+
+    std::vector<float> output
+            {
+                 4.f, 0.f, 4.f, 9.f, 9.f, 1.f, 4.f, 4.f,
+                 1.f, 9.f, 9.f, 1.f, 1.f, 4.f, 1.f, 4.f
+            };
+
+    return ElementwiseTestHelper<4, armnn::DataType::Float32, armnn::DataType::Float32>(
+            workloadFactory,
+            memoryManager,
+            armnn::BinaryOperation::SqDiff,
+            shape,
+            input0,
+            shape,
+            input1,
+            shape,
+            output,
+            tensorHandleFactory);
+}
+
+LayerTestResult<float, 4> SquaredDiffBroadcast1ElementTest(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+        const armnn::ITensorHandleFactory& tensorHandleFactory)
+{
+    IgnoreUnused(memoryManager);
+    unsigned int shape0[] = { 1, 2, 2, 2 };
+    unsigned int shape1[] = { 1, 1, 1, 1 };
+
+    std::vector<float> input0 =
+            {
+                   1.f, 2.f, 3.f, 4.f, 5.f, 0.f, 2.f, 1.f
+            };
+
+    std::vector<float> input1 = { 2.f };
+
+    std::vector<float> output =
+            {
+                1.f, 0.f, 1.f, 4.f, 9.f, 4.f, 0.f, 1.f
+            };
+
+    return ElementwiseTestHelper<4, armnn::DataType::Float32, armnn::DataType::Float32>(
+            workloadFactory,
+            memoryManager,
+            armnn::BinaryOperation::SqDiff,
+            shape0,
+            input0,
+            shape1,
+            input1,
+            shape0,
+            output,
+            tensorHandleFactory);
+}
+
+LayerTestResult<float, 4> SquaredDiffBroadcastTest(
+       armnn::IWorkloadFactory & workloadFactory,
+       const armnn::IBackendInternal::IMemoryManagerSharedPtr & memoryManager,
+       const armnn::ITensorHandleFactory& tensorHandleFactory)
+{
+    IgnoreUnused(memoryManager);
+    const unsigned int shape0[] = { 1, 2, 2, 3 };
+    const unsigned int shape1[] = { 1, 1, 1, 3 };
+
+    std::vector<float> input0 =
+            {
+                1.f, 2.f, 3.f, 3.f, 6.f, 4.f,
+                4.f, 0.f, 2.f, 3.f, 4.f, 4.f
+            };
+
+    std::vector<float> input1 = { 1.f, 3.f, 1.f };
+
+    std::vector<float> output =
+            {
+                0.f, 1.f, 4.f, 4.f, 9.f, 9.f,
+                9.f, 9.f, 1.f, 4.f, 1.f, 9.f
+            };
+
+    return ElementwiseTestHelper<4, armnn::DataType::Float32, armnn::DataType::Float32>(
+            workloadFactory,
+            memoryManager,
+            armnn::BinaryOperation::SqDiff,
+            shape0,
+            input0,
+            shape1,
+            input1,
+            shape0,
+            output,
+            tensorHandleFactory);
+}
+
+LayerTestResult<armnn::Half, 4> SquaredDifferenceFloat16Test(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+        const armnn::ITensorHandleFactory& tensorHandleFactory)
+{
+    IgnoreUnused(memoryManager);
+    using namespace half_float::literal;
+
+    unsigned int shape[] = { 2, 2, 2, 2 };
+
+    std::vector<armnn::Half> input0 =
+            {
+                1._h, 5._h, 1._h, 4._h, 6._h, 1._h, 3._h, 5._h,
+                3._h, 7._h, 6._h, 3._h, 8._h, 4._h, 4._h, 2._h
+            };
+
+    std::vector<armnn::Half> input1 =
+            {
+                2._h, 2._h, 2._h, 2._h, 3._h, 3._h, 3._h, 3._h,
+                4._h, 4._h, 4._h, 4._h, 5._h, 6._h, 5._h, 5._h
+            };
+
+    std::vector<armnn::Half> output
+            {
+                1._h, 9._h, 1._h, 4._h, 9._h, 4._h, 0._h, 4._h,
+                1._h, 9._h, 4._h, 1._h, 9._h, 4._h, 1._h, 9._h
+            };
+
+    return ElementwiseTestHelper<4, armnn::DataType::Float16, armnn::DataType::Float16>(
+            workloadFactory,
+            memoryManager,
+            armnn::BinaryOperation::SqDiff,
+            shape,
+            input0,
+            shape,
+            input1,
+            shape,
+            output,
+            tensorHandleFactory);
+}
+
+LayerTestResult<armnn::Half, 4> SquaredDiffBroadcast1ElementFloat16Test(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+        const armnn::ITensorHandleFactory& tensorHandleFactory)
+{
+    IgnoreUnused(memoryManager);
+    using namespace half_float::literal;
+
+    const unsigned int shape0[] = { 1, 2, 2, 3 };
+    const unsigned int shape1[] = { 1, 1, 1, 1 };
+
+    std::vector<armnn::Half> input0 =
+            {
+                1._h, 2._h, 3._h, 4._h, 5._h, 4._h,
+                1._h, 5._h, 4._h, 2._h, 0._h, 1._h
+            };
+
+    std::vector<armnn::Half> input1 = { 2._h };
+
+    std::vector<armnn::Half> output =
+            {
+                1._h, 0._h, 1._h, 4._h, 9._h, 4._h,
+                1._h, 9._h, 4._h, 0._h, 4._h, 1._h
+            };
+
+    return ElementwiseTestHelper<4, armnn::DataType::Float16, armnn::DataType::Float16>(
+            workloadFactory,
+            memoryManager,
+            armnn::BinaryOperation::SqDiff,
+            shape0,
+            input0,
+            shape1,
+            input1,
+            shape0,
+            output,
+            tensorHandleFactory);
+}
+
+LayerTestResult<armnn::Half, 4> SquaredDiffBroadcastFloat16Test(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+        const armnn::ITensorHandleFactory& tensorHandleFactory)
+{
+    IgnoreUnused(memoryManager);
+    using namespace half_float::literal;
+    const unsigned int shape0[] = { 1, 2, 2, 3 };
+    const unsigned int shape1[] = { 1, 1, 1, 3 };
+
+    std::vector<armnn::Half> input0 =
+            {
+                4._h, 2._h, 3._h, 4._h, 5._h,  5._h,
+                2._h, 8._h, 1._h, 1._h, 2._h, 4._h
+            };
+
+    std::vector<armnn::Half> input1 = { 1._h, 5._h, 3._h };
+
+    std::vector<armnn::Half> output =
+            {
+                9._h, 9._h, 0._h, 9._h, 0._h, 4._h,
+                1._h, 9._h, 4._h, 0._h, 9._h, 1._h
+            };
+
+    return ElementwiseTestHelper<4, armnn::DataType::Float16, armnn::DataType::Float16>(
+            workloadFactory,
+            memoryManager,
+            armnn::BinaryOperation::SqDiff,
+            shape0,
+            input0,
+            shape1,
+            input1,
+            shape0,
+            output,
+            tensorHandleFactory);
+}
+
+LayerTestResult<uint8_t, 4> SquaredDifferenceUint8Test(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+        const armnn::ITensorHandleFactory& tensorHandleFactory)
+{
+    IgnoreUnused(memoryManager);
+    const unsigned int shape0[] = { 1, 1, 2, 2 };
+    const unsigned int shape1[] = { 1, 1, 2, 2 };
+
+    std::vector<uint8_t> input0 = { 4, 2, 4, 3 };
+
+    std::vector<uint8_t> input1 = { 1, 2, 2, 2 };
+
+    std::vector<uint8_t> output = { 9, 0, 4, 1 };
+
+    return ElementwiseTestHelper<4, armnn::DataType::QAsymmU8, armnn::DataType::QAsymmU8>(
+            workloadFactory,
+            memoryManager,
+            armnn::BinaryOperation::SqDiff,
+            shape0,
+            input0,
+            shape1,
+            input1,
+            shape0,
+            output,
+            tensorHandleFactory);
+}
+
+LayerTestResult<uint8_t, 4> SquaredDiffBroadcast1ElementUint8Test(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+        const armnn::ITensorHandleFactory& tensorHandleFactory)
+{
+    IgnoreUnused(memoryManager);
+    const unsigned int shape0[] = { 1, 1, 2, 2 };
+    const unsigned int shape1[] = { 1, 1, 1, 1 };
+
+    std::vector<uint8_t> input0 = { 4, 5, 1, 0 };
+
+    std::vector<uint8_t> input1 = { 2 };
+
+    std::vector<uint8_t> output = { 4, 9, 1, 4 };
+
+    return ElementwiseTestHelper<4, armnn::DataType::QAsymmU8, armnn::DataType::QAsymmU8>(
+            workloadFactory,
+            memoryManager,
+            armnn::BinaryOperation::SqDiff,
+            shape0,
+            input0,
+            shape1,
+            input1,
+            shape0,
+            output,
+            tensorHandleFactory);
+}
+
+LayerTestResult<uint8_t, 4> SquaredDiffBroadcastUint8Test(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+        const armnn::ITensorHandleFactory& tensorHandleFactory)
+{
+    IgnoreUnused(memoryManager);
+    const unsigned int shape0[] = { 1, 1, 2, 2 };
+    const unsigned int shape1[] = { 1, 1, 1, 2 };
+
+    std::vector<uint8_t> input0 = { 4, 12, 3, 6 };
+
+    std::vector<uint8_t> input1 = { 2, 9 };
+
+    std::vector<uint8_t> output = { 4, 9, 1, 9 };
+
+    return ElementwiseTestHelper<4, armnn::DataType::QAsymmU8, armnn::DataType::QAsymmU8>(
+            workloadFactory,
+            memoryManager,
+            armnn::BinaryOperation::SqDiff,
+            shape0,
+            input0,
+            shape1,
+            input1,
+            shape0,
+            output,
+            tensorHandleFactory);
+}
+
+LayerTestResult<int16_t, 4> SquaredDifferenceInt16Test(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+        const armnn::ITensorHandleFactory& tensorHandleFactory)
+{
+    IgnoreUnused(memoryManager);
+    unsigned int shape[] = { 2, 2, 2, 2 };
+
+    std::vector<int16_t> input0 =
+            {
+                1, 5, 1, 4, 6, 9, 6, 5,
+                3, 2, 3, 6, 4, 4, 1, 4
+            };
+
+    std::vector<int16_t> input1 =
+            {
+                2, 2, 0, 4, 3, 7, 3, 3,
+                4, 4, 4, 9, 7, 5, 4, 5
+            };
+
+    std::vector<int16_t> output
+            {
+                1, 9, 1, 0, 9, 4, 9, 4,
+                1, 4, 1, 9, 9, 1, 9, 1
+            };
+
+    return ElementwiseTestHelper<4, armnn::DataType::QSymmS16, armnn::DataType::QSymmS16>(
+            workloadFactory,
+            memoryManager,
+            armnn::BinaryOperation::SqDiff,
+            shape,
+            input0,
+            shape,
+            input1,
+            shape,
+            output,
+            tensorHandleFactory);
+}
+
+LayerTestResult<int16_t, 4> SquaredDiffBroadcast1ElementInt16Test(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+        const armnn::ITensorHandleFactory& tensorHandleFactory)
+{
+    IgnoreUnused(memoryManager);
+    const unsigned int shape0[] = { 1, 2, 2, 3 };
+    const unsigned int shape1[] = { 1, 1, 1, 1 };
+
+    std::vector<int16_t> input0 =
+            {
+                1, 2, 3, 4, 5, 0,
+                5, 4, 1, 4, 5, 2
+            };
+
+    std::vector<int16_t> input1 = { 2 };
+
+    std::vector<int16_t> output =
+            {
+                1, 0, 1, 4, 9, 4,
+                9, 4, 1, 4, 9, 0
+            };
+
+    return ElementwiseTestHelper<4, armnn::DataType::QSymmS16, armnn::DataType::QSymmS16>(
+            workloadFactory,
+            memoryManager,
+            armnn::BinaryOperation::SqDiff,
+            shape0,
+            input0,
+            shape1,
+            input1,
+            shape0,
+            output,
+            tensorHandleFactory);
+}
+
+LayerTestResult<int16_t, 4> SquaredDiffBroadcastInt16Test(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+        const armnn::ITensorHandleFactory& tensorHandleFactory)
+{
+    IgnoreUnused(memoryManager);
+    const unsigned int shape0[] = { 1, 2, 2, 3 };
+    const unsigned int shape1[] = { 1, 1, 1, 3 };
+
+    std::vector<int16_t> input0 =
+            {
+                4, 2, 1, 4, 5, 6,
+                7, 3, 5, 8, 1, 5
+            };
+
+    std::vector<int16_t> input1 = { 7, 2, 3 };
+
+    std::vector<int16_t> output =
+            {
+                9, 0, 4, 9, 9, 9,
+                0, 1, 4, 1, 1, 4
+            };
+
+    return ElementwiseTestHelper<4, armnn::DataType::QSymmS16, armnn::DataType::QSymmS16>(
+            workloadFactory,
+            memoryManager,
+            armnn::BinaryOperation::SqDiff,
+            shape0,
+            input0,
+            shape1,
+            input1,
+            shape0,
+            output,
+            tensorHandleFactory);
+}
+
+LayerTestResult<int32_t, 4> SquaredDifferenceInt32Test(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+        const armnn::ITensorHandleFactory& tensorHandleFactory)
+{
+    IgnoreUnused(memoryManager);
+    unsigned int shape[] = { 2, 2, 2, 2 };
+
+    std::vector<int32_t> input0 =
+            {
+                1, 3, 4, 3, 6, 4, 2, 6,
+                3, 1, 3, 1, 4, 3, 4, 3
+            };
+
+    std::vector<int32_t> input1 =
+            {
+                2, 2, 2, 2, 3, 3, 4, 3,
+                4, 4, 4, 4, 5, 5, 5, 5
+            };
+
+    std::vector<int32_t> output
+            {
+                1, 1, 4, 1, 9, 1, 4, 9,
+                1, 9, 1, 9, 1, 4, 1, 4
+            };
+
+    return ElementwiseTestHelper<4, armnn::DataType::Signed32, armnn::DataType::Signed32>(
+            workloadFactory,
+            memoryManager,
+            armnn::BinaryOperation::SqDiff,
+            shape,
+            input0,
+            shape,
+            input1,
+            shape,
+            output,
+            tensorHandleFactory);
+}
+
+LayerTestResult<int32_t, 4> SquaredDiffBroadcastInt32Test(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+        const armnn::ITensorHandleFactory& tensorHandleFactory)
+{
+    IgnoreUnused(memoryManager);
+    const unsigned int shape0[] = { 1, 2, 2, 3 };
+    const unsigned int shape1[] = { 1, 1, 1, 3 };
+
+    std::vector<int32_t> input0 =
+            {
+                4, 4, 3, 4, 5, 6,
+                5, 8, 6, 3, 9, 5
+            };
+
+    std::vector<int32_t> input1 = { 2, 7, 3 };
+
+    std::vector<int32_t> output =
+            {
+                4, 9, 0, 4, 4, 9,
+                9, 1, 9, 1, 4, 4
+            };
+
+    return ElementwiseTestHelper<4, armnn::DataType::Signed32, armnn::DataType::Signed32>(
+            workloadFactory,
+            memoryManager,
+            armnn::BinaryOperation::SqDiff,
+            shape0,
+            input0,
+            shape1,
+            input1,
+            shape0,
+            output,
+            tensorHandleFactory);
+}
+
+LayerTestResult<int32_t, 4> SquaredDiffBroadcast1ElementInt32Test(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+        const armnn::ITensorHandleFactory& tensorHandleFactory)
+{
+    IgnoreUnused(memoryManager);
+    const unsigned int shape0[] = { 1, 2, 2, 3 };
+    const unsigned int shape1[] = { 1, 1, 1, 1 };
+
+    std::vector<int32_t> input0 =
+            {
+                1, 2, 3, 4, 5, 3,
+                3, 1, 0, 2, 1, 5
+            };
+
+    std::vector<int32_t> input1 = { 2 };
+
+    std::vector<int32_t> output =
+            {
+                1, 0, 1, 4, 9, 1,
+                1, 1, 4, 0, 1, 9
+            };
+
+    return ElementwiseTestHelper<4, armnn::DataType::Signed32, armnn::DataType::Signed32>(
+            workloadFactory,
+            memoryManager,
+            armnn::BinaryOperation::SqDiff,
+            shape0,
+            input0,
+            shape1,
+            input1,
+            shape0,
+            output,
+            tensorHandleFactory);
+}
diff --git a/src/backends/backendsCommon/test/layerTests/SquaredDifferenceTestImpl.hpp b/src/backends/backendsCommon/test/layerTests/SquaredDifferenceTestImpl.hpp
new file mode 100644
index 0000000..1d87700
--- /dev/null
+++ b/src/backends/backendsCommon/test/layerTests/SquaredDifferenceTestImpl.hpp
@@ -0,0 +1,88 @@
+//
+// Copyright © 2023 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include <armnnTestUtils/LayerTestResult.hpp>
+
+#include <ResolveType.hpp>
+
+#include <armnn/backends/IBackendInternal.hpp>
+#include <armnn/backends/WorkloadFactory.hpp>
+
+LayerTestResult<float, 4> SquaredDifferenceTest(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+        const armnn::ITensorHandleFactory& tensorHandleFactory);
+
+LayerTestResult<float, 4> SquaredDiffBroadcast1ElementTest(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+        const armnn::ITensorHandleFactory& tensorHandleFactory);
+
+LayerTestResult<float, 4> SquaredDiffBroadcastTest(
+        armnn::IWorkloadFactory & workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr & memoryManager,
+        const armnn::ITensorHandleFactory& tensorHandleFactory);
+
+LayerTestResult<armnn::Half, 4> SquaredDifferenceFloat16Test(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+        const armnn::ITensorHandleFactory& tensorHandleFactory);
+
+LayerTestResult<armnn::Half, 4> SquaredDiffBroadcast1ElementFloat16Test(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+        const armnn::ITensorHandleFactory& tensorHandleFactory);
+
+LayerTestResult<armnn::Half, 4> SquaredDiffBroadcastFloat16Test(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+        const armnn::ITensorHandleFactory& tensorHandleFactory);
+
+LayerTestResult<uint8_t, 4> SquaredDiffBroadcast1ElementUint8Test(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+        const armnn::ITensorHandleFactory& tensorHandleFactory);
+
+LayerTestResult<uint8_t, 4> SquaredDifferenceUint8Test(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+        const armnn::ITensorHandleFactory& tensorHandleFactory);
+
+LayerTestResult<uint8_t, 4> SquaredDiffBroadcastUint8Test(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+        const armnn::ITensorHandleFactory& tensorHandleFactory);
+
+LayerTestResult<int16_t , 4> SquaredDifferenceInt16Test(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+        const armnn::ITensorHandleFactory& tensorHandleFactory);
+
+LayerTestResult<int16_t, 4> SquaredDiffBroadcast1ElementInt16Test(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+        const armnn::ITensorHandleFactory& tensorHandleFactory);
+
+LayerTestResult<int16_t, 4> SquaredDiffBroadcastInt16Test(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+        const armnn::ITensorHandleFactory& tensorHandleFactory);
+
+LayerTestResult<int32_t, 4> SquaredDifferenceInt32Test(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+        const armnn::ITensorHandleFactory& tensorHandleFactory);
+
+LayerTestResult<int32_t, 4> SquaredDiffBroadcastInt32Test(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+        const armnn::ITensorHandleFactory& tensorHandleFactory);
+
+LayerTestResult<int32_t, 4> SquaredDiffBroadcast1ElementInt32Test(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+        const armnn::ITensorHandleFactory& tensorHandleFactory);
diff --git a/src/backends/reference/test/RefEndToEndTests.cpp b/src/backends/reference/test/RefEndToEndTests.cpp
index 95004c4..4bb3f29 100644
--- a/src/backends/reference/test/RefEndToEndTests.cpp
+++ b/src/backends/reference/test/RefEndToEndTests.cpp
@@ -1602,6 +1602,22 @@
 {
     ElementwiseBinarySimpleEndToEnd<armnn::DataType::QAsymmU8>(defaultBackends, BinaryOperation::Minimum);
 }
+TEST_CASE("RefPowerEndToEndTestFloat32")
+{
+    ElementwiseBinarySimpleEndToEnd<armnn::DataType::Float32>(defaultBackends, BinaryOperation::Power);
+}
+TEST_CASE("RefPowerEndToEndTestUint8")
+{
+    ElementwiseBinarySimpleEndToEnd<armnn::DataType::QAsymmU8>(defaultBackends, BinaryOperation::Power);
+}
+TEST_CASE("RefSquaredDifferenceEndToEndTestFloat32")
+{
+    ElementwiseBinarySimpleEndToEnd<armnn::DataType::Float32>(defaultBackends, BinaryOperation::SqDiff);
+}
+TEST_CASE("RefSquaredDifferenceEndToEndTestUint8")
+{
+    ElementwiseBinarySimpleEndToEnd<armnn::DataType::QAsymmU8>(defaultBackends, BinaryOperation::SqDiff);
+}
 #endif
 
 }
diff --git a/src/backends/reference/test/RefLayerTests.cpp b/src/backends/reference/test/RefLayerTests.cpp
index 0e228db..8b89743 100644
--- a/src/backends/reference/test/RefLayerTests.cpp
+++ b/src/backends/reference/test/RefLayerTests.cpp
@@ -1,5 +1,5 @@
 //
-// Copyright © 2017,2022 Arm Ltd and Contributors. All rights reserved.
+// Copyright © 2017,2022, 2023 Arm Ltd and Contributors. All rights reserved.
 // SPDX-License-Identifier: MIT
 //
 
@@ -1003,6 +1003,48 @@
 ARMNN_AUTO_TEST_CASE_WITH_THF(MultiplicationBroadcast1DVectorInt32, MultiplicationBroadcast1DVectorInt32Test)
 ARMNN_AUTO_TEST_CASE_WITH_THF(Multiplication5d, Multiplication5dTest)
 
+// SquaredDifference
+ARMNN_AUTO_TEST_CASE_WITH_THF(SimpleSquaredDifference, SquaredDifferenceTest)
+ARMNN_AUTO_TEST_CASE_WITH_THF(SquaredDiffBroadcast1Element, SquaredDiffBroadcast1ElementTest)
+ARMNN_AUTO_TEST_CASE_WITH_THF(SquaredDiffBroadcast, SquaredDiffBroadcastTest)
+
+ARMNN_AUTO_TEST_CASE_WITH_THF(SquaredDifferenceFloat16, SquaredDifferenceFloat16Test)
+ARMNN_AUTO_TEST_CASE_WITH_THF(SquaredDiffBroadcast1ElementFloat16, SquaredDiffBroadcast1ElementFloat16Test)
+ARMNN_AUTO_TEST_CASE_WITH_THF(SquaredDiffBroadcastFloat16, SquaredDiffBroadcastFloat16Test)
+
+ARMNN_AUTO_TEST_CASE_WITH_THF(SquaredDifferenceUint8, SquaredDifferenceUint8Test)
+ARMNN_AUTO_TEST_CASE_WITH_THF(SquaredDiffBroadcastUint8, SquaredDiffBroadcastUint8Test)
+ARMNN_AUTO_TEST_CASE_WITH_THF(SquaredDiffBroadcast1ElementUint8, SquaredDiffBroadcast1ElementUint8Test)
+
+ARMNN_AUTO_TEST_CASE_WITH_THF(SquaredDifferenceInt16, SquaredDifferenceInt16Test)
+ARMNN_AUTO_TEST_CASE_WITH_THF(SquaredDiffBroadcastInt16, SquaredDiffBroadcastInt16Test)
+ARMNN_AUTO_TEST_CASE_WITH_THF(SquaredDiffBroadcast1ElementInt16, SquaredDiffBroadcast1ElementInt16Test)
+
+ARMNN_AUTO_TEST_CASE_WITH_THF(SquaredDifferenceInt32, SquaredDifferenceInt32Test)
+ARMNN_AUTO_TEST_CASE_WITH_THF(SquaredDiffBroadcastInt32, SquaredDiffBroadcastInt32Test)
+ARMNN_AUTO_TEST_CASE_WITH_THF(SquaredDiffBroadcast1ElementInt32, SquaredDiffBroadcast1ElementInt32Test)
+
+// Power
+ARMNN_AUTO_TEST_CASE_WITH_THF(SimplePower, PowerTest)
+ARMNN_AUTO_TEST_CASE_WITH_THF(PowerBroadcast1Element, PowerBroadcast1ElementTest)
+ARMNN_AUTO_TEST_CASE_WITH_THF(PowerBroadcast, PowerBroadcastTest)
+
+ARMNN_AUTO_TEST_CASE_WITH_THF(PowerFloat16, PowerFloat16Test)
+ARMNN_AUTO_TEST_CASE_WITH_THF(PowerBroadcast1ElementFloat16, PowerBroadcast1ElementFloat16Test)
+ARMNN_AUTO_TEST_CASE_WITH_THF(PowerBroadcastFloat16, PowerBroadcastFloat16Test)
+
+ARMNN_AUTO_TEST_CASE_WITH_THF(PowerUint8, PowerUint8Test)
+ARMNN_AUTO_TEST_CASE_WITH_THF(PowerBroadcastUint8, PowerBroadcastUint8Test)
+ARMNN_AUTO_TEST_CASE_WITH_THF(PowerBroadcast1ElementUint8, PowerBroadcast1ElementUint8Test)
+
+ARMNN_AUTO_TEST_CASE_WITH_THF(PowerInt16, PowerInt16Test)
+ARMNN_AUTO_TEST_CASE_WITH_THF(PowerBroadcastInt16, PowerBroadcastInt16Test)
+ARMNN_AUTO_TEST_CASE_WITH_THF(PowerBroadcast1ElementInt16, PowerBroadcast1ElementInt16Test)
+
+ARMNN_AUTO_TEST_CASE_WITH_THF(PowerInt32, PowerInt32Test)
+ARMNN_AUTO_TEST_CASE_WITH_THF(PowerBroadcastInt32, PowerBroadcastInt32Test)
+ARMNN_AUTO_TEST_CASE_WITH_THF(PowerBroadcast1ElementInt32, PowerBroadcast1ElementInt32Test)
+
 // Batch Mat Mul
 ARMNN_AUTO_TEST_CASE_WITH_THF(BatchMatMul2DSimpleFloat32, BatchMatMul2DSimpleTest<DataType::Float32>);
 ARMNN_AUTO_TEST_CASE_WITH_THF(BatchMatMul2DSimpleFloat16, BatchMatMul2DSimpleTest<DataType::Float16>);
diff --git a/src/backends/reference/workloads/ElementwiseFunction.cpp b/src/backends/reference/workloads/ElementwiseFunction.cpp
index c5b0ad1..4044f06 100644
--- a/src/backends/reference/workloads/ElementwiseFunction.cpp
+++ b/src/backends/reference/workloads/ElementwiseFunction.cpp
@@ -14,6 +14,8 @@
 #include "Rsqrt.hpp"
 #include "Sin.hpp"
 #include "Sqrt.hpp"
+#include "Power.hpp"
+#include "SquaredDifference.hpp"
 
 
 namespace armnn
@@ -67,6 +69,8 @@
 template struct armnn::ElementwiseBinaryFunction<std::divides<float>>;
 template struct armnn::ElementwiseBinaryFunction<armnn::maximum<float>>;
 template struct armnn::ElementwiseBinaryFunction<armnn::minimum<float>>;
+template struct armnn::ElementwiseBinaryFunction<armnn::power<float>>;
+template struct armnn::ElementwiseBinaryFunction<armnn::squaredDifference<float>>;
 
 template struct armnn::ElementwiseBinaryFunction<std::plus<int32_t>>;
 template struct armnn::ElementwiseBinaryFunction<std::minus<int32_t>>;
@@ -74,6 +78,8 @@
 template struct armnn::ElementwiseBinaryFunction<std::divides<int32_t>>;
 template struct armnn::ElementwiseBinaryFunction<armnn::maximum<int32_t>>;
 template struct armnn::ElementwiseBinaryFunction<armnn::minimum<int32_t>>;
+template struct armnn::ElementwiseBinaryFunction<armnn::power<int32_t>>;
+template struct armnn::ElementwiseBinaryFunction<armnn::squaredDifference<int32_t>>;
 
 // Comparison
 template struct armnn::ElementwiseBinaryFunction<std::equal_to<float>>;
diff --git a/src/backends/reference/workloads/Power.hpp b/src/backends/reference/workloads/Power.hpp
new file mode 100644
index 0000000..744328e
--- /dev/null
+++ b/src/backends/reference/workloads/Power.hpp
@@ -0,0 +1,27 @@
+//
+// Copyright © 2023 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include <iostream>
+
+namespace armnn
+{
+
+template<typename T>
+struct power
+{
+    typedef T result_type;
+    typedef T first_argument_type;
+
+    T
+    operator()(const T& input1, const T& input2) const
+    {
+        T power = armnn::numeric_cast<T>(std::pow(static_cast<float>(input1), static_cast<float>(input2)));
+        return power;
+    }
+};
+
+} //namespace armnn
diff --git a/src/backends/reference/workloads/RefElementwiseBinaryWorkload.cpp b/src/backends/reference/workloads/RefElementwiseBinaryWorkload.cpp
index 5dc77f8..e71cdd4 100644
--- a/src/backends/reference/workloads/RefElementwiseBinaryWorkload.cpp
+++ b/src/backends/reference/workloads/RefElementwiseBinaryWorkload.cpp
@@ -11,6 +11,8 @@
 #include "RefWorkloadUtils.hpp"
 #include "Maximum.hpp"
 #include "Minimum.hpp"
+#include "SquaredDifference.hpp"
+#include "Power.hpp"
 
 #include <Profiling.hpp>
 
@@ -44,6 +46,8 @@
     using MinimumFunction = ElementwiseBinaryFunction<armnn::minimum<DataType>>;
     using MulFunction     = ElementwiseBinaryFunction<std::multiplies<DataType>>;
     using SubFunction     = ElementwiseBinaryFunction<std::minus<DataType>>;
+    using SqDiffFunction  = ElementwiseBinaryFunction<armnn::squaredDifference<DataType>>;
+    using PowerFunction   = ElementwiseBinaryFunction<armnn::power<DataType>>;
 
     switch (operation)
     {
@@ -77,6 +81,16 @@
             SubFunction(inShape0, inShape1, outShape, *input0, *input1, *output);
             break;
         }
+        case BinaryOperation::SqDiff:
+        {
+            SqDiffFunction(inShape0, inShape1, outShape, *input0, *input1, *output);
+            break;
+        }
+        case BinaryOperation::Power:
+        {
+            PowerFunction(inShape0, inShape1, outShape, *input0, *input1, *output);
+            break;
+        }
         default:
         {
             throw InvalidArgumentException(std::string("Unsupported binary operation ") +
diff --git a/src/backends/reference/workloads/SquaredDifference.hpp b/src/backends/reference/workloads/SquaredDifference.hpp
new file mode 100644
index 0000000..c15b379
--- /dev/null
+++ b/src/backends/reference/workloads/SquaredDifference.hpp
@@ -0,0 +1,28 @@
+//
+// Copyright © 2023 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include <cmath>
+
+namespace armnn
+{
+
+template<typename T>
+struct squaredDifference
+{
+    typedef T result_type;
+    typedef T first_argument_type;
+
+    T
+    operator()(const T& input1, const T& input2) const
+    {
+        float diff = std::minus<>{}(static_cast<float>(input1),static_cast<float>(input2));
+        T squaredDiff = armnn::numeric_cast<T>(std::pow(static_cast<float>(diff), 2));
+        return squaredDiff;
+    }
+};
+
+} //namespace armnn