Add in-place calculation support for CL elementwise arithmetic kernels

- Add in-place calculation support in ClArithmeticKernel,  ClSaturatedArithmeticKernel and ClMulKernel
- Add in-place test cases

Resolves: COMPMID-4431

Signed-off-by: Sheri Zhang <sheri.zhang@arm.com>
Change-Id: Id484bdb76b74478a33fedb471ae0c7f799c599f6
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/5885
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: SiCong Li <sicong.li@arm.com>
Tested-by: Arm Jenkins <bsgcomp@arm.com>
diff --git a/tests/validation/fixtures/ArithmeticOperationsFixture.h b/tests/validation/fixtures/ArithmeticOperationsFixture.h
index 7aa716d..0f7e44e 100644
--- a/tests/validation/fixtures/ArithmeticOperationsFixture.h
+++ b/tests/validation/fixtures/ArithmeticOperationsFixture.h
@@ -47,13 +47,13 @@
 public:
     template <typename...>
     void setup(reference::ArithmeticOperation op, const TensorShape &shape0, const TensorShape &shape1, DataType data_type, ConvertPolicy convert_policy,
-               QuantizationInfo qinfo0, QuantizationInfo qinfo1, QuantizationInfo qinfo_out, ActivationLayerInfo act_info, bool in_place)
+               QuantizationInfo qinfo0, QuantizationInfo qinfo1, QuantizationInfo qinfo_out, ActivationLayerInfo act_info, bool is_inplace)
     {
-        _op        = op;
-        _act_info  = act_info;
-        _in_place  = in_place;
-        _target    = compute_target(shape0, shape1, data_type, convert_policy, qinfo0, qinfo1, qinfo_out);
-        _reference = compute_reference(shape0, shape1, data_type, convert_policy, qinfo0, qinfo1, qinfo_out);
+        _op         = op;
+        _act_info   = act_info;
+        _is_inplace = is_inplace;
+        _target     = compute_target(shape0, shape1, data_type, convert_policy, qinfo0, qinfo1, qinfo_out);
+        _reference  = compute_reference(shape0, shape1, data_type, convert_policy, qinfo0, qinfo1, qinfo_out);
     }
 
 protected:
@@ -67,27 +67,51 @@
                               QuantizationInfo qinfo0, QuantizationInfo qinfo1, QuantizationInfo qinfo_out)
     {
         // Create tensors
-        TensorType  ref_src1   = create_tensor<TensorType>(shape0, data_type, 1, qinfo0);
-        TensorType  ref_src2   = create_tensor<TensorType>(shape1, data_type, 1, qinfo1);
-        TensorType  dst        = create_tensor<TensorType>(TensorShape::broadcast_shape(shape0, shape1), data_type, 1, qinfo_out);
-        TensorType *dst_to_use = _in_place ? &ref_src1 : &dst;
+        const TensorShape out_shape = TensorShape::broadcast_shape(shape0, shape1);
+        TensorType        ref_src1  = create_tensor<TensorType>(shape0, data_type, 1, qinfo0);
+        TensorType        ref_src2  = create_tensor<TensorType>(shape1, data_type, 1, qinfo1);
+        TensorType        dst       = create_tensor<TensorType>(out_shape, data_type, 1, qinfo_out);
+
+        // Check whether do in-place computation and whether inputs are broadcast compatible
+        TensorType *actual_dst = &dst;
+        if(_is_inplace)
+        {
+            bool src1_is_inplace = !arm_compute::detail::have_different_dimensions(out_shape, shape0, 0) && (qinfo0 == qinfo_out);
+            bool src2_is_inplace = !arm_compute::detail::have_different_dimensions(out_shape, shape1, 0) && (qinfo1 == qinfo_out);
+            bool do_in_place     = out_shape.total_size() != 0 && (src1_is_inplace || src2_is_inplace);
+            ARM_COMPUTE_ASSERT(do_in_place);
+
+            if(src1_is_inplace)
+            {
+                actual_dst = &ref_src1;
+            }
+            else
+            {
+                actual_dst = &ref_src2;
+            }
+        }
 
         // Create and configure function
         FunctionType arith_op;
-        arith_op.configure(&ref_src1, &ref_src2, dst_to_use, convert_policy, _act_info);
+        arith_op.configure(&ref_src1, &ref_src2, actual_dst, convert_policy, _act_info);
 
         ARM_COMPUTE_ASSERT(ref_src1.info()->is_resizable());
         ARM_COMPUTE_ASSERT(ref_src2.info()->is_resizable());
-        ARM_COMPUTE_ASSERT(dst_to_use->info()->is_resizable());
 
         // Allocate tensors
         ref_src1.allocator()->allocate();
         ref_src2.allocator()->allocate();
-        dst_to_use->allocator()->allocate();
 
         ARM_COMPUTE_ASSERT(!ref_src1.info()->is_resizable());
         ARM_COMPUTE_ASSERT(!ref_src2.info()->is_resizable());
-        ARM_COMPUTE_ASSERT(!dst_to_use->info()->is_resizable());
+
+        // If don't do in-place computation, still need to allocate original dst
+        if(!_is_inplace)
+        {
+            ARM_COMPUTE_ASSERT(dst.info()->is_resizable());
+            dst.allocator()->allocate();
+            ARM_COMPUTE_ASSERT(!dst.info()->is_resizable());
+        }
 
         // Fill tensors
         fill(AccessorType(ref_src1), 0);
@@ -96,38 +120,30 @@
         // Compute function
         arith_op.run();
 
-        if(_in_place)
-        {
-            return ref_src1;
-        }
-        return dst;
+        return std::move(*actual_dst);
     }
 
     SimpleTensor<T> compute_reference(const TensorShape &shape0, const TensorShape &shape1, DataType data_type, ConvertPolicy convert_policy,
                                       QuantizationInfo qinfo0, QuantizationInfo qinfo1, QuantizationInfo qinfo_out)
     {
-        // current in-place implementation only supports same metadata of input and output tensors.
-        // By ignoring output quantization information here, we can make test cases implementation much simpler.
-        QuantizationInfo output_qinfo = _in_place ? qinfo0 : qinfo_out;
-
         // Create reference
         SimpleTensor<T> ref_src1{ shape0, data_type, 1, qinfo0 };
         SimpleTensor<T> ref_src2{ shape1, data_type, 1, qinfo1 };
-        SimpleTensor<T> ref_dst{ TensorShape::broadcast_shape(shape0, shape1), data_type, 1, output_qinfo };
+        SimpleTensor<T> ref_dst{ TensorShape::broadcast_shape(shape0, shape1), data_type, 1, qinfo_out };
 
         // Fill reference
         fill(ref_src1, 0);
         fill(ref_src2, 1);
 
         auto result = reference::arithmetic_operation<T>(_op, ref_src1, ref_src2, ref_dst, convert_policy);
-        return _act_info.enabled() ? reference::activation_layer(result, _act_info, output_qinfo) : result;
+        return _act_info.enabled() ? reference::activation_layer(result, _act_info, qinfo_out) : result;
     }
 
     TensorType                     _target{};
     SimpleTensor<T>                _reference{};
     reference::ArithmeticOperation _op{ reference::ArithmeticOperation::ADD };
     ActivationLayerInfo            _act_info{};
-    bool                           _in_place{};
+    bool                           _is_inplace{};
 };
 
 template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
@@ -135,10 +151,10 @@
 {
 public:
     template <typename...>
-    void setup(const TensorShape &shape0, const TensorShape &shape1, DataType data_type, ConvertPolicy convert_policy)
+    void setup(const TensorShape &shape0, const TensorShape &shape1, DataType data_type, ConvertPolicy convert_policy, bool is_inplace)
     {
         ArithmeticOperationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(reference::ArithmeticOperation::ADD, shape0, shape1, data_type, convert_policy,
-                                                                                            QuantizationInfo(), QuantizationInfo(), QuantizationInfo(), ActivationLayerInfo(), false);
+                                                                                            QuantizationInfo(), QuantizationInfo(), QuantizationInfo(), ActivationLayerInfo(), is_inplace);
     }
 };
 
@@ -147,10 +163,10 @@
 {
 public:
     template <typename...>
-    void setup(const TensorShape &shape, DataType data_type, ConvertPolicy convert_policy)
+    void setup(const TensorShape &shape, DataType data_type, ConvertPolicy convert_policy, bool is_inplace)
     {
         ArithmeticOperationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(reference::ArithmeticOperation::ADD, shape, shape, data_type, convert_policy,
-                                                                                            QuantizationInfo(), QuantizationInfo(), QuantizationInfo(), ActivationLayerInfo(), false);
+                                                                                            QuantizationInfo(), QuantizationInfo(), QuantizationInfo(), ActivationLayerInfo(), is_inplace);
     }
 };
 
@@ -159,10 +175,10 @@
 {
 public:
     template <typename...>
-    void setup(const TensorShape &shape0, const TensorShape &shape1, DataType data_type, ConvertPolicy convert_policy, ActivationLayerInfo act_info)
+    void setup(const TensorShape &shape0, const TensorShape &shape1, DataType data_type, ConvertPolicy convert_policy, ActivationLayerInfo act_info, bool is_inplace)
     {
         ArithmeticOperationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(reference::ArithmeticOperation::ADD, shape0, shape1, data_type, convert_policy,
-                                                                                            QuantizationInfo(), QuantizationInfo(), QuantizationInfo(), act_info, false);
+                                                                                            QuantizationInfo(), QuantizationInfo(), QuantizationInfo(), act_info, is_inplace);
     }
 };
 
@@ -171,10 +187,10 @@
 {
 public:
     template <typename...>
-    void setup(const TensorShape &shape, DataType data_type, ConvertPolicy convert_policy, ActivationLayerInfo act_info)
+    void setup(const TensorShape &shape, DataType data_type, ConvertPolicy convert_policy, ActivationLayerInfo act_info, bool is_inplace)
     {
         ArithmeticOperationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(reference::ArithmeticOperation::ADD, shape, shape, data_type, convert_policy,
-                                                                                            QuantizationInfo(), QuantizationInfo(), QuantizationInfo(), act_info, false);
+                                                                                            QuantizationInfo(), QuantizationInfo(), QuantizationInfo(), act_info, is_inplace);
     }
 };
 
@@ -183,11 +199,11 @@
 {
 public:
     template <typename...>
-    void setup(const TensorShape &shape, DataType data_type, ConvertPolicy convert_policy, QuantizationInfo qinfo0, QuantizationInfo qinfo1, QuantizationInfo qinfo_out)
+    void setup(const TensorShape &shape, DataType data_type, ConvertPolicy convert_policy, QuantizationInfo qinfo0, QuantizationInfo qinfo1, QuantizationInfo qinfo_out, bool is_inplace)
 
     {
         ArithmeticOperationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(reference::ArithmeticOperation::ADD, shape, shape, data_type, convert_policy,
-                                                                                            qinfo0, qinfo1, qinfo_out, ActivationLayerInfo(), false);
+                                                                                            qinfo0, qinfo1, qinfo_out, ActivationLayerInfo(), is_inplace);
     }
 };
 
@@ -196,10 +212,11 @@
 {
 public:
     template <typename...>
-    void setup(const TensorShape &shape0, const TensorShape &shape1, DataType data_type, ConvertPolicy convert_policy, QuantizationInfo qinfo0, QuantizationInfo qinfo1, QuantizationInfo qinfo_out)
+    void setup(const TensorShape &shape0, const TensorShape &shape1, DataType data_type, ConvertPolicy convert_policy, QuantizationInfo qinfo0, QuantizationInfo qinfo1, QuantizationInfo qinfo_out,
+               bool is_inplace)
     {
         ArithmeticOperationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(reference::ArithmeticOperation::ADD, shape0, shape1, data_type, convert_policy,
-                                                                                            qinfo0, qinfo1, qinfo_out, ActivationLayerInfo(), false);
+                                                                                            qinfo0, qinfo1, qinfo_out, ActivationLayerInfo(), is_inplace);
     }
 };
 
@@ -208,10 +225,10 @@
 {
 public:
     template <typename...>
-    void setup(const TensorShape &shape0, const TensorShape &shape1, DataType data_type, ConvertPolicy convert_policy, bool in_place)
+    void setup(const TensorShape &shape0, const TensorShape &shape1, DataType data_type, ConvertPolicy convert_policy, bool is_inplace)
     {
         ArithmeticOperationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(reference::ArithmeticOperation::SUB, shape0, shape1, data_type, convert_policy,
-                                                                                            QuantizationInfo(), QuantizationInfo(), QuantizationInfo(), ActivationLayerInfo(), in_place);
+                                                                                            QuantizationInfo(), QuantizationInfo(), QuantizationInfo(), ActivationLayerInfo(), is_inplace);
     }
 };
 
@@ -221,10 +238,10 @@
 public:
     template <typename...>
     void setup(const TensorShape &shape0, const TensorShape &shape1, DataType data_type, ConvertPolicy convert_policy, ActivationLayerInfo act_info,
-               bool in_place)
+               bool is_inplace)
     {
         ArithmeticOperationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(reference::ArithmeticOperation::SUB, shape0, shape1, data_type, convert_policy,
-                                                                                            QuantizationInfo(), QuantizationInfo(), QuantizationInfo(), act_info, in_place);
+                                                                                            QuantizationInfo(), QuantizationInfo(), QuantizationInfo(), act_info, is_inplace);
     }
 };
 
@@ -233,10 +250,10 @@
 {
 public:
     template <typename...>
-    void setup(const TensorShape &shape, DataType data_type, ConvertPolicy convert_policy, bool in_place)
+    void setup(const TensorShape &shape, DataType data_type, ConvertPolicy convert_policy, bool is_inplace)
     {
         ArithmeticOperationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(reference::ArithmeticOperation::SUB, shape, shape, data_type, convert_policy,
-                                                                                            QuantizationInfo(), QuantizationInfo(), QuantizationInfo(), ActivationLayerInfo(), in_place);
+                                                                                            QuantizationInfo(), QuantizationInfo(), QuantizationInfo(), ActivationLayerInfo(), is_inplace);
     }
 };
 
@@ -245,10 +262,10 @@
 {
 public:
     template <typename...>
-    void setup(const TensorShape &shape, DataType data_type, ConvertPolicy convert_policy, ActivationLayerInfo act_info, bool in_place)
+    void setup(const TensorShape &shape, DataType data_type, ConvertPolicy convert_policy, ActivationLayerInfo act_info, bool is_inplace)
     {
         ArithmeticOperationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(reference::ArithmeticOperation::SUB, shape, shape, data_type, convert_policy,
-                                                                                            QuantizationInfo(), QuantizationInfo(), QuantizationInfo(), act_info, in_place);
+                                                                                            QuantizationInfo(), QuantizationInfo(), QuantizationInfo(), act_info, is_inplace);
     }
 };
 
@@ -257,11 +274,11 @@
 {
 public:
     template <typename...>
-    void setup(const TensorShape &shape, DataType data_type, ConvertPolicy convert_policy, QuantizationInfo qinfo0, QuantizationInfo qinfo1, QuantizationInfo qinfo_out, bool in_place)
+    void setup(const TensorShape &shape, DataType data_type, ConvertPolicy convert_policy, QuantizationInfo qinfo0, QuantizationInfo qinfo1, QuantizationInfo qinfo_out, bool is_inplace)
 
     {
         ArithmeticOperationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(reference::ArithmeticOperation::SUB, shape, shape, data_type, convert_policy,
-                                                                                            qinfo0, qinfo1, qinfo_out, ActivationLayerInfo(), in_place);
+                                                                                            qinfo0, qinfo1, qinfo_out, ActivationLayerInfo(), is_inplace);
     }
 };
 
@@ -271,10 +288,10 @@
 public:
     template <typename...>
     void setup(const TensorShape &shape0, const TensorShape &shape1, DataType data_type, ConvertPolicy convert_policy, QuantizationInfo qinfo0, QuantizationInfo qinfo1, QuantizationInfo qinfo_out,
-               bool in_place)
+               bool is_inplace)
     {
         ArithmeticOperationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(reference::ArithmeticOperation::SUB, shape0, shape1, data_type, convert_policy,
-                                                                                            qinfo0, qinfo1, qinfo_out, ActivationLayerInfo(), in_place);
+                                                                                            qinfo0, qinfo1, qinfo_out, ActivationLayerInfo(), is_inplace);
     }
 };
 } // namespace validation
diff --git a/tests/validation/fixtures/ElementwiseOperationsFixture.h b/tests/validation/fixtures/ElementwiseOperationsFixture.h
index 6661862..6f31a73 100644
--- a/tests/validation/fixtures/ElementwiseOperationsFixture.h
+++ b/tests/validation/fixtures/ElementwiseOperationsFixture.h
@@ -49,12 +49,13 @@
     template <typename...>
     void setup(ArithmeticOperation op, const TensorShape &shape0, const TensorShape &shape1,
                DataType data_type0, DataType data_type1, DataType output_data_type,
-               QuantizationInfo qinfo0, QuantizationInfo qinfo1, QuantizationInfo qinfo_out, bool in_place = false, bool use_dynamic_shape = false)
+               QuantizationInfo qinfo0, QuantizationInfo qinfo1, QuantizationInfo qinfo_out, bool is_inplace = false, bool use_dynamic_shape = false)
     {
         _op                = op;
         _use_dynamic_shape = use_dynamic_shape;
+        _is_inplace        = is_inplace;
 
-        _target    = compute_target(shape0, shape1, data_type0, data_type1, output_data_type, qinfo0, qinfo1, qinfo_out, in_place);
+        _target    = compute_target(shape0, shape1, data_type0, data_type1, output_data_type, qinfo0, qinfo1, qinfo_out);
         _reference = compute_reference(shape0, shape1, data_type0, data_type1, output_data_type, qinfo0, qinfo1, qinfo_out);
     }
 
@@ -83,7 +84,7 @@
     }
 
     TensorType compute_target(const TensorShape &shape0, const TensorShape &shape1, DataType data_type0, DataType data_type1, DataType output_data_type,
-                              QuantizationInfo qinfo0, QuantizationInfo qinfo1, QuantizationInfo qinfo_out, bool in_place = false)
+                              QuantizationInfo qinfo0, QuantizationInfo qinfo1, QuantizationInfo qinfo_out)
     {
         // Create tensors
         const TensorShape out_shape = TensorShape::broadcast_shape(shape0, shape1);
@@ -92,17 +93,19 @@
         TensorType        dst       = create_tensor<TensorType>(out_shape, output_data_type, 1, qinfo_out);
 
         // Check whether do in-place computation and whether inputs are broadcast compatible
-        TensorType *actual_dst        = &dst;
-        bool        src1_can_in_place = !arm_compute::detail::have_different_dimensions(out_shape, shape0, 0) && (qinfo0 == qinfo_out);
-        bool        src2_can_in_place = !arm_compute::detail::have_different_dimensions(out_shape, shape1, 0) && (qinfo1 == qinfo_out);
-        bool        do_in_place       = in_place && out_shape.total_size() != 0 && (src1_can_in_place || src2_can_in_place);
-        if(do_in_place)
+        TensorType *actual_dst = &dst;
+        if(_is_inplace)
         {
-            if(src1_can_in_place)
+            bool src1_is_inplace = !arm_compute::detail::have_different_dimensions(out_shape, shape0, 0) && (qinfo0 == qinfo_out) && (data_type0 == output_data_type);
+            bool src2_is_inplace = !arm_compute::detail::have_different_dimensions(out_shape, shape1, 0) && (qinfo1 == qinfo_out) && (data_type1 == output_data_type);
+            bool do_in_place     = out_shape.total_size() != 0 && (src1_is_inplace || src2_is_inplace);
+            ARM_COMPUTE_ASSERT(do_in_place);
+
+            if(src1_is_inplace)
             {
                 actual_dst = &ref_src1;
             }
-            else if(src2_can_in_place)
+            else
             {
                 actual_dst = &ref_src2;
             }
@@ -135,8 +138,8 @@
         ref_src1.allocator()->allocate();
         ref_src2.allocator()->allocate();
 
-        // If in-place computation is not supported, still need to allocate original dst
-        if(!do_in_place)
+        // If don't do in-place computation, still need to allocate original dst
+        if(!_is_inplace)
         {
             ARM_COMPUTE_ASSERT(dst.info()->is_resizable());
             dst.allocator()->allocate();
@@ -176,6 +179,7 @@
     SimpleTensor<T>     _reference{};
     ArithmeticOperation _op{ ArithmeticOperation::ADD };
     bool                _use_dynamic_shape{ false };
+    bool                _is_inplace{ false };
 };
 
 // Arithmetic operation fused with activation function
@@ -186,12 +190,13 @@
     template <typename...>
     void setup(ArithmeticOperation op, const TensorShape &shape0, const TensorShape &shape1,
                DataType data_type0, DataType data_type1, DataType output_data_type,
-               QuantizationInfo qinfo0, QuantizationInfo qinfo1, QuantizationInfo qinfo_out, ActivationLayerInfo act_info, bool in_place = false)
+               QuantizationInfo qinfo0, QuantizationInfo qinfo1, QuantizationInfo qinfo_out, ActivationLayerInfo act_info, bool is_inplace = true)
     {
         ArithmeticOperationsGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(op, shape0, shape1,
                                                                                              data_type0, data_type1, output_data_type,
-                                                                                             qinfo0, qinfo1, qinfo_out, in_place);
-        _act_info = act_info;
+                                                                                             qinfo0, qinfo1, qinfo_out, is_inplace);
+        _act_info   = act_info;
+        _is_inplace = is_inplace;
     }
 
 protected:
@@ -199,26 +204,51 @@
                               QuantizationInfo qinfo0, QuantizationInfo qinfo1, QuantizationInfo qinfo_out)
     {
         // Create tensors
-        TensorType ref_src1 = create_tensor<TensorType>(shape0, data_type0, 1, qinfo0);
-        TensorType ref_src2 = create_tensor<TensorType>(shape1, data_type1, 1, qinfo1);
-        TensorType dst      = create_tensor<TensorType>(TensorShape::broadcast_shape(shape0, shape1), output_data_type, 1, qinfo_out);
+        const TensorShape out_shape = TensorShape::broadcast_shape(shape0, shape1);
+        TensorType        ref_src1  = create_tensor<TensorType>(shape0, data_type0, 1, qinfo0);
+        TensorType        ref_src2  = create_tensor<TensorType>(shape1, data_type1, 1, qinfo1);
+        TensorType        dst       = create_tensor<TensorType>(out_shape, output_data_type, 1, qinfo_out);
+
+        // Check whether do in-place computation and whether inputs are broadcast compatible
+        TensorType *actual_dst = &dst;
+        if(_is_inplace)
+        {
+            bool src1_is_inplace = !arm_compute::detail::have_different_dimensions(out_shape, shape0, 0) && (qinfo0 == qinfo_out) && (data_type0 == output_data_type);
+            bool src2_is_inplace = !arm_compute::detail::have_different_dimensions(out_shape, shape1, 0) && (qinfo1 == qinfo_out) && (data_type1 == output_data_type);
+            bool do_in_place     = out_shape.total_size() != 0 && (src1_is_inplace || src2_is_inplace);
+            ARM_COMPUTE_ASSERT(do_in_place);
+
+            if(src1_is_inplace)
+            {
+                actual_dst = &ref_src1;
+            }
+            else
+            {
+                actual_dst = &ref_src2;
+            }
+        }
 
         // Create and configure function
         FunctionType elem_op;
-        elem_op.configure(&ref_src1, &ref_src2, &dst, _act_info);
+        elem_op.configure(&ref_src1, &ref_src2, actual_dst, _act_info);
 
         ARM_COMPUTE_ASSERT(ref_src1.info()->is_resizable());
         ARM_COMPUTE_ASSERT(ref_src2.info()->is_resizable());
-        ARM_COMPUTE_ASSERT(dst.info()->is_resizable());
 
         // Allocate tensors
         ref_src1.allocator()->allocate();
         ref_src2.allocator()->allocate();
-        dst.allocator()->allocate();
+
+        // If don't do in-place computation, still need to allocate original dst
+        if(!_is_inplace)
+        {
+            ARM_COMPUTE_ASSERT(dst.info()->is_resizable());
+            dst.allocator()->allocate();
+            ARM_COMPUTE_ASSERT(!dst.info()->is_resizable());
+        }
 
         ARM_COMPUTE_ASSERT(!ref_src1.info()->is_resizable());
         ARM_COMPUTE_ASSERT(!ref_src2.info()->is_resizable());
-        ARM_COMPUTE_ASSERT(!dst.info()->is_resizable());
 
         // Fill tensors
         fill(AccessorType(ref_src1), 0);
@@ -227,7 +257,7 @@
         // Compute function
         elem_op.run();
 
-        return dst;
+        return std::move(*actual_dst);
     }
 
     SimpleTensor<T> compute_reference(const TensorShape &shape0, const TensorShape &shape1,
@@ -240,6 +270,7 @@
     }
 
     ActivationLayerInfo _act_info{};
+    bool                _is_inplace{ false };
 };
 
 template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
@@ -247,11 +278,11 @@
 {
 public:
     template <typename...>
-    void setup(const TensorShape &shape0, const TensorShape &shape1, DataType data_type0, DataType data_type1, DataType output_data_type)
+    void setup(const TensorShape &shape0, const TensorShape &shape1, DataType data_type0, DataType data_type1, DataType output_data_type, bool is_inplace)
     {
         ArithmeticOperationsGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(ArithmeticOperation::DIV, shape0, shape1,
                                                                                              data_type0, data_type1, output_data_type,
-                                                                                             QuantizationInfo(), QuantizationInfo(), QuantizationInfo(), true);
+                                                                                             QuantizationInfo(), QuantizationInfo(), QuantizationInfo(), is_inplace);
     }
 };
 
@@ -260,11 +291,11 @@
 {
 public:
     template <typename...>
-    void setup(const TensorShape &shape, DataType data_type0, DataType data_type1, DataType output_data_type)
+    void setup(const TensorShape &shape, DataType data_type0, DataType data_type1, DataType output_data_type, bool is_inplace)
     {
         ArithmeticOperationsGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(ArithmeticOperation::DIV, shape, shape,
                                                                                              data_type0, data_type1, output_data_type,
-                                                                                             QuantizationInfo(), QuantizationInfo(), QuantizationInfo());
+                                                                                             QuantizationInfo(), QuantizationInfo(), QuantizationInfo(), is_inplace);
     }
 };
 
@@ -273,11 +304,11 @@
 {
 public:
     template <typename...>
-    void setup(const TensorShape &shape0, const TensorShape &shape1, DataType data_type0, DataType data_type1, DataType output_data_type)
+    void setup(const TensorShape &shape0, const TensorShape &shape1, DataType data_type0, DataType data_type1, DataType output_data_type, bool is_inplace)
     {
         ArithmeticOperationsGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(ArithmeticOperation::DIV, shape0, shape1,
                                                                                              data_type0, data_type1, output_data_type,
-                                                                                             QuantizationInfo(), QuantizationInfo(), QuantizationInfo(), true, true);
+                                                                                             QuantizationInfo(), QuantizationInfo(), QuantizationInfo(), is_inplace, true);
     }
 };
 
@@ -286,11 +317,11 @@
 {
 public:
     template <typename...>
-    void setup(const TensorShape &shape, DataType data_type0, DataType data_type1, DataType output_data_type)
+    void setup(const TensorShape &shape, DataType data_type0, DataType data_type1, DataType output_data_type, bool is_inplace)
     {
         ArithmeticOperationsGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(ArithmeticOperation::DIV, shape, shape,
                                                                                              data_type0, data_type1, output_data_type,
-                                                                                             QuantizationInfo(), QuantizationInfo(), QuantizationInfo(), true);
+                                                                                             QuantizationInfo(), QuantizationInfo(), QuantizationInfo(), is_inplace);
     }
 };
 
@@ -299,11 +330,11 @@
 {
 public:
     template <typename...>
-    void setup(const TensorShape &shape0, const TensorShape &shape1, DataType data_type0, DataType data_type1, DataType output_data_type, ActivationLayerInfo act_info)
+    void setup(const TensorShape &shape0, const TensorShape &shape1, DataType data_type0, DataType data_type1, DataType output_data_type, ActivationLayerInfo act_info, bool is_inplace)
     {
         ArithmeticOperationsFuseActivationFixture<TensorType, AccessorType, FunctionType, T>::setup(ArithmeticOperation::DIV, shape0, shape1,
                                                                                                     data_type0, data_type1, output_data_type,
-                                                                                                    QuantizationInfo(), QuantizationInfo(), QuantizationInfo(), act_info, true);
+                                                                                                    QuantizationInfo(), QuantizationInfo(), QuantizationInfo(), act_info, is_inplace);
     }
 };
 
@@ -312,11 +343,11 @@
 {
 public:
     template <typename...>
-    void setup(const TensorShape &shape, DataType data_type0, DataType data_type1, DataType output_data_type, ActivationLayerInfo act_info)
+    void setup(const TensorShape &shape, DataType data_type0, DataType data_type1, DataType output_data_type, ActivationLayerInfo act_info, bool is_inplace)
     {
         ArithmeticOperationsFuseActivationFixture<TensorType, AccessorType, FunctionType, T>::setup(ArithmeticOperation::DIV, shape, shape,
                                                                                                     data_type0, data_type1, output_data_type,
-                                                                                                    QuantizationInfo(), QuantizationInfo(), QuantizationInfo(), act_info);
+                                                                                                    QuantizationInfo(), QuantizationInfo(), QuantizationInfo(), act_info, is_inplace);
     }
 };
 
@@ -325,11 +356,11 @@
 {
 public:
     template <typename...>
-    void setup(const TensorShape &shape, DataType data_type0, DataType data_type1, DataType output_data_type, ActivationLayerInfo act_info)
+    void setup(const TensorShape &shape, DataType data_type0, DataType data_type1, DataType output_data_type, ActivationLayerInfo act_info, bool is_inplace)
     {
         ArithmeticOperationsFuseActivationFixture<TensorType, AccessorType, FunctionType, T>::setup(ArithmeticOperation::DIV, shape, shape,
                                                                                                     data_type0, data_type1, output_data_type,
-                                                                                                    QuantizationInfo(), QuantizationInfo(), QuantizationInfo(), act_info);
+                                                                                                    QuantizationInfo(), QuantizationInfo(), QuantizationInfo(), act_info, is_inplace);
     }
 };
 
@@ -339,12 +370,12 @@
 public:
     template <typename...>
     void setup(const TensorShape &shape, DataType data_type0, DataType data_type1, DataType output_data_type,
-               QuantizationInfo qinfo0, QuantizationInfo qinfo1, QuantizationInfo qinfo_out)
+               QuantizationInfo qinfo0, QuantizationInfo qinfo1, QuantizationInfo qinfo_out, bool is_inplace)
 
     {
         ArithmeticOperationsGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(ArithmeticOperation::DIV, shape, shape,
                                                                                              data_type0, data_type1, output_data_type,
-                                                                                             qinfo0, qinfo1, qinfo_out);
+                                                                                             qinfo0, qinfo1, qinfo_out, is_inplace);
     }
 };
 
@@ -353,11 +384,11 @@
 {
 public:
     template <typename...>
-    void setup(const TensorShape &shape0, const TensorShape &shape1, DataType data_type0, DataType data_type1, DataType output_data_type)
+    void setup(const TensorShape &shape0, const TensorShape &shape1, DataType data_type0, DataType data_type1, DataType output_data_type, bool is_inplace)
     {
         ArithmeticOperationsGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(ArithmeticOperation::MAX, shape0, shape1,
                                                                                              data_type0, data_type1, output_data_type,
-                                                                                             QuantizationInfo(), QuantizationInfo(), QuantizationInfo(), true);
+                                                                                             QuantizationInfo(), QuantizationInfo(), QuantizationInfo(), is_inplace);
     }
 };
 
@@ -366,11 +397,11 @@
 {
 public:
     template <typename...>
-    void setup(const TensorShape &shape, DataType data_type0, DataType data_type1, DataType output_data_type)
+    void setup(const TensorShape &shape, DataType data_type0, DataType data_type1, DataType output_data_type, bool is_inplace)
     {
         ArithmeticOperationsGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(ArithmeticOperation::MAX, shape, shape,
                                                                                              data_type0, data_type1, output_data_type,
-                                                                                             QuantizationInfo(), QuantizationInfo(), QuantizationInfo());
+                                                                                             QuantizationInfo(), QuantizationInfo(), QuantizationInfo(), is_inplace);
     }
 };
 
@@ -379,11 +410,11 @@
 {
 public:
     template <typename...>
-    void setup(const TensorShape &shape0, const TensorShape &shape1, DataType data_type0, DataType data_type1, DataType output_data_type, ActivationLayerInfo act_info)
+    void setup(const TensorShape &shape0, const TensorShape &shape1, DataType data_type0, DataType data_type1, DataType output_data_type, ActivationLayerInfo act_info, bool is_inplace)
     {
         ArithmeticOperationsFuseActivationFixture<TensorType, AccessorType, FunctionType, T>::setup(ArithmeticOperation::MAX, shape0, shape1,
                                                                                                     data_type0, data_type1, output_data_type,
-                                                                                                    QuantizationInfo(), QuantizationInfo(), QuantizationInfo(), act_info, true);
+                                                                                                    QuantizationInfo(), QuantizationInfo(), QuantizationInfo(), act_info, is_inplace);
     }
 };
 
@@ -392,11 +423,11 @@
 {
 public:
     template <typename...>
-    void setup(const TensorShape &shape, DataType data_type0, DataType data_type1, DataType output_data_type, ActivationLayerInfo act_info)
+    void setup(const TensorShape &shape, DataType data_type0, DataType data_type1, DataType output_data_type, ActivationLayerInfo act_info, bool is_inplace)
     {
         ArithmeticOperationsFuseActivationFixture<TensorType, AccessorType, FunctionType, T>::setup(ArithmeticOperation::MAX, shape, shape,
                                                                                                     data_type0, data_type1, output_data_type,
-                                                                                                    QuantizationInfo(), QuantizationInfo(), QuantizationInfo(), act_info);
+                                                                                                    QuantizationInfo(), QuantizationInfo(), QuantizationInfo(), act_info, is_inplace);
     }
 };
 
@@ -406,12 +437,12 @@
 public:
     template <typename...>
     void setup(const TensorShape &shape, DataType data_type0, DataType data_type1, DataType output_data_type,
-               QuantizationInfo qinfo0, QuantizationInfo qinfo1, QuantizationInfo qinfo_out)
+               QuantizationInfo qinfo0, QuantizationInfo qinfo1, QuantizationInfo qinfo_out, bool is_inplace)
 
     {
         ArithmeticOperationsGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(ArithmeticOperation::MAX, shape, shape,
                                                                                              data_type0, data_type1, output_data_type,
-                                                                                             qinfo0, qinfo1, qinfo_out);
+                                                                                             qinfo0, qinfo1, qinfo_out, is_inplace);
     }
 };
 
@@ -421,12 +452,12 @@
 public:
     template <typename...>
     void setup(const TensorShape &shape0, const TensorShape &shape1, DataType data_type0, DataType data_type1, DataType output_data_type,
-               QuantizationInfo qinfo0, QuantizationInfo qinfo1, QuantizationInfo qinfo_out)
+               QuantizationInfo qinfo0, QuantizationInfo qinfo1, QuantizationInfo qinfo_out, bool is_inplace)
 
     {
         ArithmeticOperationsGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(ArithmeticOperation::MAX, shape0, shape1,
                                                                                              data_type0, data_type1, output_data_type,
-                                                                                             qinfo0, qinfo1, qinfo_out, true);
+                                                                                             qinfo0, qinfo1, qinfo_out, is_inplace);
     }
 };
 
@@ -435,11 +466,11 @@
 {
 public:
     template <typename...>
-    void setup(const TensorShape &shape0, const TensorShape &shape1, DataType data_type0, DataType data_type1, DataType output_data_type)
+    void setup(const TensorShape &shape0, const TensorShape &shape1, DataType data_type0, DataType data_type1, DataType output_data_type, bool is_inplace)
     {
         ArithmeticOperationsGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(ArithmeticOperation::MIN, shape0, shape1,
                                                                                              data_type0, data_type1, output_data_type,
-                                                                                             QuantizationInfo(), QuantizationInfo(), QuantizationInfo(), true);
+                                                                                             QuantizationInfo(), QuantizationInfo(), QuantizationInfo(), is_inplace);
     }
 };
 
@@ -448,11 +479,11 @@
 {
 public:
     template <typename...>
-    void setup(const TensorShape &shape, DataType data_type0, DataType data_type1, DataType output_data_type)
+    void setup(const TensorShape &shape, DataType data_type0, DataType data_type1, DataType output_data_type, bool is_inplace)
     {
         ArithmeticOperationsGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(ArithmeticOperation::MIN, shape, shape,
                                                                                              data_type0, data_type1, output_data_type,
-                                                                                             QuantizationInfo(), QuantizationInfo(), QuantizationInfo());
+                                                                                             QuantizationInfo(), QuantizationInfo(), QuantizationInfo(), is_inplace);
     }
 };
 
@@ -461,11 +492,11 @@
 {
 public:
     template <typename...>
-    void setup(const TensorShape &shape0, const TensorShape &shape1, DataType data_type0, DataType data_type1, DataType output_data_type, ActivationLayerInfo act_info)
+    void setup(const TensorShape &shape0, const TensorShape &shape1, DataType data_type0, DataType data_type1, DataType output_data_type, ActivationLayerInfo act_info, bool is_inplace)
     {
         ArithmeticOperationsFuseActivationFixture<TensorType, AccessorType, FunctionType, T>::setup(ArithmeticOperation::MIN, shape0, shape1,
                                                                                                     data_type0, data_type1, output_data_type,
-                                                                                                    QuantizationInfo(), QuantizationInfo(), QuantizationInfo(), act_info, true);
+                                                                                                    QuantizationInfo(), QuantizationInfo(), QuantizationInfo(), act_info, is_inplace);
     }
 };
 
@@ -474,11 +505,11 @@
 {
 public:
     template <typename...>
-    void setup(const TensorShape &shape, DataType data_type0, DataType data_type1, DataType output_data_type, ActivationLayerInfo act_info)
+    void setup(const TensorShape &shape, DataType data_type0, DataType data_type1, DataType output_data_type, ActivationLayerInfo act_info, bool is_inplace)
     {
         ArithmeticOperationsFuseActivationFixture<TensorType, AccessorType, FunctionType, T>::setup(ArithmeticOperation::MIN, shape, shape,
                                                                                                     data_type0, data_type1, output_data_type,
-                                                                                                    QuantizationInfo(), QuantizationInfo(), QuantizationInfo(), act_info);
+                                                                                                    QuantizationInfo(), QuantizationInfo(), QuantizationInfo(), act_info, is_inplace);
     }
 };
 
@@ -488,12 +519,12 @@
 public:
     template <typename...>
     void setup(const TensorShape &shape, DataType data_type0, DataType data_type1, DataType output_data_type,
-               QuantizationInfo qinfo0, QuantizationInfo qinfo1, QuantizationInfo qinfo_out)
+               QuantizationInfo qinfo0, QuantizationInfo qinfo1, QuantizationInfo qinfo_out, bool is_inplace)
 
     {
         ArithmeticOperationsGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(ArithmeticOperation::MIN, shape, shape,
                                                                                              data_type0, data_type1, output_data_type,
-                                                                                             qinfo0, qinfo1, qinfo_out);
+                                                                                             qinfo0, qinfo1, qinfo_out, is_inplace);
     }
 };
 
@@ -503,12 +534,12 @@
 public:
     template <typename...>
     void setup(const TensorShape &shape0, const TensorShape &shape1, DataType data_type0, DataType data_type1, DataType output_data_type,
-               QuantizationInfo qinfo0, QuantizationInfo qinfo1, QuantizationInfo qinfo_out)
+               QuantizationInfo qinfo0, QuantizationInfo qinfo1, QuantizationInfo qinfo_out, bool is_inplace)
 
     {
         ArithmeticOperationsGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(ArithmeticOperation::MIN, shape0, shape1,
                                                                                              data_type0, data_type1, output_data_type,
-                                                                                             qinfo0, qinfo1, qinfo_out, true);
+                                                                                             qinfo0, qinfo1, qinfo_out, is_inplace);
     }
 };
 
@@ -517,11 +548,11 @@
 {
 public:
     template <typename...>
-    void setup(const TensorShape &shape0, const TensorShape &shape1, DataType data_type0, DataType data_type1, DataType output_data_type)
+    void setup(const TensorShape &shape0, const TensorShape &shape1, DataType data_type0, DataType data_type1, DataType output_data_type, bool is_inplace)
     {
         ArithmeticOperationsGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(ArithmeticOperation::SQUARED_DIFF, shape0, shape1,
                                                                                              data_type0, data_type1, output_data_type,
-                                                                                             QuantizationInfo(), QuantizationInfo(), QuantizationInfo(), true);
+                                                                                             QuantizationInfo(), QuantizationInfo(), QuantizationInfo(), is_inplace);
     }
 };
 
@@ -530,11 +561,11 @@
 {
 public:
     template <typename...>
-    void setup(const TensorShape &shape, DataType data_type0, DataType data_type1, DataType output_data_type)
+    void setup(const TensorShape &shape, DataType data_type0, DataType data_type1, DataType output_data_type, bool is_inplace)
     {
         ArithmeticOperationsGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(ArithmeticOperation::SQUARED_DIFF, shape, shape,
                                                                                              data_type0, data_type1, output_data_type,
-                                                                                             QuantizationInfo(), QuantizationInfo(), QuantizationInfo());
+                                                                                             QuantizationInfo(), QuantizationInfo(), QuantizationInfo(), is_inplace);
     }
 };
 
@@ -543,11 +574,11 @@
 {
 public:
     template <typename...>
-    void setup(const TensorShape &shape0, const TensorShape &shape1, DataType data_type0, DataType data_type1, DataType output_data_type, ActivationLayerInfo act_info)
+    void setup(const TensorShape &shape0, const TensorShape &shape1, DataType data_type0, DataType data_type1, DataType output_data_type, ActivationLayerInfo act_info, bool is_inplace)
     {
         ArithmeticOperationsFuseActivationFixture<TensorType, AccessorType, FunctionType, T>::setup(ArithmeticOperation::SQUARED_DIFF, shape0, shape1,
                                                                                                     data_type0, data_type1, output_data_type,
-                                                                                                    QuantizationInfo(), QuantizationInfo(), QuantizationInfo(), act_info, true);
+                                                                                                    QuantizationInfo(), QuantizationInfo(), QuantizationInfo(), act_info, is_inplace);
     }
 };
 
@@ -556,11 +587,11 @@
 {
 public:
     template <typename...>
-    void setup(const TensorShape &shape, DataType data_type0, DataType data_type1, DataType output_data_type, ActivationLayerInfo act_info)
+    void setup(const TensorShape &shape, DataType data_type0, DataType data_type1, DataType output_data_type, ActivationLayerInfo act_info, bool is_inplace)
     {
         ArithmeticOperationsFuseActivationFixture<TensorType, AccessorType, FunctionType, T>::setup(ArithmeticOperation::SQUARED_DIFF, shape, shape,
                                                                                                     data_type0, data_type1, output_data_type,
-                                                                                                    QuantizationInfo(), QuantizationInfo(), QuantizationInfo(), act_info);
+                                                                                                    QuantizationInfo(), QuantizationInfo(), QuantizationInfo(), act_info, is_inplace);
     }
 };
 
@@ -570,12 +601,12 @@
 public:
     template <typename...>
     void setup(const TensorShape &shape, DataType data_type0, DataType data_type1, DataType output_data_type,
-               QuantizationInfo qinfo0, QuantizationInfo qinfo1, QuantizationInfo qinfo_out)
+               QuantizationInfo qinfo0, QuantizationInfo qinfo1, QuantizationInfo qinfo_out, bool is_inplace)
 
     {
         ArithmeticOperationsGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(ArithmeticOperation::SQUARED_DIFF, shape, shape,
                                                                                              data_type0, data_type1, output_data_type,
-                                                                                             qinfo0, qinfo1, qinfo_out);
+                                                                                             qinfo0, qinfo1, qinfo_out, is_inplace);
     }
 };
 
@@ -585,12 +616,12 @@
 public:
     template <typename...>
     void setup(const TensorShape &shape0, const TensorShape &shape1, DataType data_type0, DataType data_type1, DataType output_data_type,
-               QuantizationInfo qinfo0, QuantizationInfo qinfo1, QuantizationInfo qinfo_out)
+               QuantizationInfo qinfo0, QuantizationInfo qinfo1, QuantizationInfo qinfo_out, bool is_inplace)
 
     {
         ArithmeticOperationsGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(ArithmeticOperation::SQUARED_DIFF, shape0, shape1,
                                                                                              data_type0, data_type1, output_data_type,
-                                                                                             qinfo0, qinfo1, qinfo_out, true);
+                                                                                             qinfo0, qinfo1, qinfo_out, is_inplace);
     }
 };
 
@@ -603,7 +634,7 @@
     {
         ArithmeticOperationsGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(ArithmeticOperation::PRELU, shape0, shape1,
                                                                                              data_type0, data_type1, output_data_type,
-                                                                                             QuantizationInfo(), QuantizationInfo(), QuantizationInfo(), true);
+                                                                                             QuantizationInfo(), QuantizationInfo(), QuantizationInfo());
     }
 };
 
@@ -646,7 +677,7 @@
     {
         ArithmeticOperationsGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(ArithmeticOperation::PRELU, shape0, shape1,
                                                                                              data_type0, data_type1, output_data_type,
-                                                                                             qinfo0, qinfo1, qinfo_out, true);
+                                                                                             qinfo0, qinfo1, qinfo_out);
     }
 };
 
@@ -655,11 +686,11 @@
 {
 public:
     template <typename...>
-    void setup(const TensorShape &shape0, const TensorShape &shape1, DataType data_type0, DataType data_type1, DataType output_data_type)
+    void setup(const TensorShape &shape0, const TensorShape &shape1, DataType data_type0, DataType data_type1, DataType output_data_type, bool is_inplace)
     {
         ArithmeticOperationsGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(ArithmeticOperation::POWER, shape0, shape1,
                                                                                              data_type0, data_type1, output_data_type,
-                                                                                             QuantizationInfo(), QuantizationInfo(), QuantizationInfo(), true);
+                                                                                             QuantizationInfo(), QuantizationInfo(), QuantizationInfo(), is_inplace);
     }
 };
 
@@ -668,11 +699,11 @@
 {
 public:
     template <typename...>
-    void setup(const TensorShape &shape, DataType data_type0, DataType data_type1, DataType output_data_type)
+    void setup(const TensorShape &shape, DataType data_type0, DataType data_type1, DataType output_data_type, bool is_inplace)
     {
         ArithmeticOperationsGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(ArithmeticOperation::POWER, shape, shape,
                                                                                              data_type0, data_type1, output_data_type,
-                                                                                             QuantizationInfo(), QuantizationInfo(), QuantizationInfo());
+                                                                                             QuantizationInfo(), QuantizationInfo(), QuantizationInfo(), is_inplace);
     }
 };
 
@@ -681,11 +712,11 @@
 {
 public:
     template <typename...>
-    void setup(const TensorShape &shape0, const TensorShape &shape1, DataType data_type0, DataType data_type1, DataType output_data_type, ActivationLayerInfo act_info)
+    void setup(const TensorShape &shape0, const TensorShape &shape1, DataType data_type0, DataType data_type1, DataType output_data_type, ActivationLayerInfo act_info, bool is_inplace)
     {
         ArithmeticOperationsFuseActivationFixture<TensorType, AccessorType, FunctionType, T>::setup(ArithmeticOperation::POWER, shape0, shape1,
                                                                                                     data_type0, data_type1, output_data_type,
-                                                                                                    QuantizationInfo(), QuantizationInfo(), QuantizationInfo(), act_info, true);
+                                                                                                    QuantizationInfo(), QuantizationInfo(), QuantizationInfo(), act_info, is_inplace);
     }
 };
 
@@ -694,11 +725,11 @@
 {
 public:
     template <typename...>
-    void setup(const TensorShape &shape, DataType data_type0, DataType data_type1, DataType output_data_type, ActivationLayerInfo act_info)
+    void setup(const TensorShape &shape, DataType data_type0, DataType data_type1, DataType output_data_type, ActivationLayerInfo act_info, bool is_inplace)
     {
         ArithmeticOperationsFuseActivationFixture<TensorType, AccessorType, FunctionType, T>::setup(ArithmeticOperation::POWER, shape, shape,
                                                                                                     data_type0, data_type1, output_data_type,
-                                                                                                    QuantizationInfo(), QuantizationInfo(), QuantizationInfo(), act_info);
+                                                                                                    QuantizationInfo(), QuantizationInfo(), QuantizationInfo(), act_info, is_inplace);
     }
 };
 
diff --git a/tests/validation/fixtures/PixelWiseMultiplicationFixture.h b/tests/validation/fixtures/PixelWiseMultiplicationFixture.h
index 7c643bd..8dc5179 100644
--- a/tests/validation/fixtures/PixelWiseMultiplicationFixture.h
+++ b/tests/validation/fixtures/PixelWiseMultiplicationFixture.h
@@ -76,9 +76,29 @@
                               QuantizationInfo qinfo0, QuantizationInfo qinfo1, QuantizationInfo qinfo_out, ActivationLayerInfo act_info)
     {
         // Create tensors
-        TensorType src1 = create_tensor<TensorType>(shape0, dt_in1, 1, qinfo0);
-        TensorType src2 = create_tensor<TensorType>(shape1, dt_in2, 1, qinfo1);
-        TensorType dst  = create_tensor<TensorType>(TensorShape::broadcast_shape(shape0, shape1), dt_out, 1, qinfo_out);
+        const TensorShape out_shape = TensorShape::broadcast_shape(shape0, shape1);
+        TensorType        src1      = create_tensor<TensorType>(shape0, dt_in1, 1, qinfo0);
+        TensorType        src2      = create_tensor<TensorType>(shape1, dt_in2, 1, qinfo1);
+        TensorType        dst       = create_tensor<TensorType>(out_shape, dt_out, 1, qinfo_out);
+
+        // Check whether do in-place computation and whether inputs are broadcast compatible
+        TensorType *actual_dst = &dst;
+        if(_is_inplace)
+        {
+            bool src1_is_inplace = !arm_compute::detail::have_different_dimensions(out_shape, shape0, 0) && (qinfo0 == qinfo_out) && (dt_in1 == dt_out);
+            bool src2_is_inplace = !arm_compute::detail::have_different_dimensions(out_shape, shape1, 0) && (qinfo1 == qinfo_out) && (dt_in2 == dt_out);
+            bool do_in_place     = out_shape.total_size() != 0 && (src1_is_inplace || src2_is_inplace);
+            ARM_COMPUTE_ASSERT(do_in_place);
+
+            if(src1_is_inplace)
+            {
+                actual_dst = &src1;
+            }
+            else
+            {
+                actual_dst = &src2;
+            }
+        }
 
         auto allocate_tensor = [](TensorType & t)
         {
@@ -89,11 +109,12 @@
 
         // Create and configure function
         FunctionType multiply;
-        multiply.configure(&src1, &src2, (_is_inplace ? &src1 : &dst), scale, convert_policy, rounding_policy, act_info);
+        multiply.configure(&src1, &src2, actual_dst, scale, convert_policy, rounding_policy, act_info);
 
         allocate_tensor(src1);
         allocate_tensor(src2);
 
+        // If don't do in-place computation, still need to allocate original dst
         if(!_is_inplace)
         {
             allocate_tensor(dst);
@@ -106,12 +127,7 @@
         // Compute function
         multiply.run();
 
-        if(_is_inplace)
-        {
-            return src1;
-        }
-
-        return dst;
+        return std::move(*actual_dst);
     }
 
     SimpleTensor<T3> compute_reference(const TensorShape &shape0, const TensorShape &shape1, DataType dt_in1, DataType dt_in2, DataType dt_out,
@@ -122,16 +138,12 @@
         SimpleTensor<T1> src1{ shape0, dt_in1, 1, qinfo0 };
         SimpleTensor<T2> src2{ shape1, dt_in2, 1, qinfo1 };
 
-        // current in-place implementation only supports same metadata of input and output tensors.
-        // By ignoring output quantization information here, we can make test cases implementation much simpler.
-        QuantizationInfo output_qinfo = _is_inplace ? qinfo0 : qinfo_out;
-
         // Fill reference
         fill(src1, 0);
         fill(src2, 1);
 
-        auto result = reference::pixel_wise_multiplication<T1, T2, T3>(src1, src2, scale, convert_policy, rounding_policy, dt_out, output_qinfo);
-        return act_info.enabled() ? reference::activation_layer(result, act_info, output_qinfo) : result;
+        auto result = reference::pixel_wise_multiplication<T1, T2, T3>(src1, src2, scale, convert_policy, rounding_policy, dt_out, qinfo_out);
+        return act_info.enabled() ? reference::activation_layer(result, act_info, qinfo_out) : result;
     }
 
     TensorType       _target{};