IVGCVSW-3808 Add ElementwiseBinaryLayer

!android-nn-driver:9329

 * Added ElementwiseBinaryLayer that can represent all ElementwiseBinary
   operations including Add, Div, Sub, Maximum, Mul and Minimum.
 * Updated Delegate to use ElementwiseBinaryLayer instead of the Add,
   Div, Sub, Maximum, Mul and Minimum layers.
 * Updated Deserializer to use ElementwiseBinaryLayer instead of the Add,
   Div, Sub, Maximum, Mul and Minimum layers.
 * Updated OnnxParser to use ElementwiseBinaryLayer instead of the Add
   layer.
 * Updated TfLiteParser to use ElementwiseBinaryLayer instead of the Add,
   Div, Sub, Maximum, Mul and Minimum layers.
 * Updated CL and Neon tests to use ElementwiseBinaryLayer.
 * Updated CL and Neon Backend Specific Optimizations to accept
   ElementBinaryLayers as well as Add, Div, Mul, Sub, Maximum and Minimum
   layers.

Signed-off-by: Teresa Charlin <teresa.charlinreyes@arm.com>
Signed-off-by: Mike Kelly <mike.kelly@arm.com>
Change-Id: I7cbb96b60eb01f0e2b57b0541016d48a08b86c75
diff --git a/src/backends/cl/ClWorkloadFactory.cpp b/src/backends/cl/ClWorkloadFactory.cpp
index 6bf510a..0228677 100644
--- a/src/backends/cl/ClWorkloadFactory.cpp
+++ b/src/backends/cl/ClWorkloadFactory.cpp
@@ -1,5 +1,5 @@
 //
-// Copyright © 2017 Arm Ltd and Contributors. All rights reserved.
+// Copyright © 2017-2023 Arm Ltd and Contributors. All rights reserved.
 // SPDX-License-Identifier: MIT
 //
 #include "ClWorkloadFactory.hpp"
@@ -405,6 +405,75 @@
             auto divisionQueueDescriptor = PolymorphicDowncast<const DivisionQueueDescriptor*>(&descriptor);
             return std::make_unique<ClDivisionWorkload>(*divisionQueueDescriptor, info, m_CLCompileContext);
         }
+        case LayerType::ElementwiseBinary :
+        {
+            auto elementwiseBinaryQueueDescriptor
+                    = PolymorphicDowncast<const ElementwiseBinaryQueueDescriptor*>(&descriptor);
+
+            switch (elementwiseBinaryQueueDescriptor->m_Parameters.m_Operation)
+            {
+                case BinaryOperation::Add:
+                {
+                    AdditionQueueDescriptor additionQueueDescriptor;
+                    additionQueueDescriptor.m_Inputs = descriptor.m_Inputs;
+                    additionQueueDescriptor.m_Outputs = descriptor.m_Outputs;
+                    additionQueueDescriptor.m_AdditionalInfoObject =
+                            elementwiseBinaryQueueDescriptor->m_AdditionalInfoObject;
+                    return std::make_unique<ClAdditionWorkload>(additionQueueDescriptor, info, m_CLCompileContext);
+                }
+                case BinaryOperation::Div:
+                {
+                    DivisionQueueDescriptor divisionQueueDescriptor;
+                    divisionQueueDescriptor.m_Inputs = descriptor.m_Inputs;
+                    divisionQueueDescriptor.m_Outputs = descriptor.m_Outputs;
+                    divisionQueueDescriptor.m_AdditionalInfoObject =
+                            elementwiseBinaryQueueDescriptor->m_AdditionalInfoObject;
+                    return std::make_unique<ClDivisionWorkload>(divisionQueueDescriptor, info, m_CLCompileContext);
+                }
+                case BinaryOperation::Maximum:
+                {
+                    MaximumQueueDescriptor maximumQueueDescriptor;
+                    maximumQueueDescriptor.m_Inputs = descriptor.m_Inputs;
+                    maximumQueueDescriptor.m_Outputs = descriptor.m_Outputs;
+                    maximumQueueDescriptor.m_AdditionalInfoObject =
+                            elementwiseBinaryQueueDescriptor->m_AdditionalInfoObject;
+                    return std::make_unique<ClMaximumWorkload>(maximumQueueDescriptor, info, m_CLCompileContext);
+                }
+                case BinaryOperation::Minimum:
+                {
+                    MinimumQueueDescriptor minimumQueueDescriptor;
+                    minimumQueueDescriptor.m_Inputs = descriptor.m_Inputs;
+                    minimumQueueDescriptor.m_Outputs = descriptor.m_Outputs;
+                    minimumQueueDescriptor.m_AdditionalInfoObject =
+                            elementwiseBinaryQueueDescriptor->m_AdditionalInfoObject;
+                    return std::make_unique<ClMinimumWorkload>(minimumQueueDescriptor, info, m_CLCompileContext);
+                }
+                case BinaryOperation::Mul:
+                {
+                    MultiplicationQueueDescriptor multiplicationQueueDescriptor;
+                    multiplicationQueueDescriptor.m_Inputs = descriptor.m_Inputs;
+                    multiplicationQueueDescriptor.m_Outputs = descriptor.m_Outputs;
+                    multiplicationQueueDescriptor.m_AdditionalInfoObject =
+                            elementwiseBinaryQueueDescriptor->m_AdditionalInfoObject;
+                    return std::make_unique<ClMultiplicationWorkload>(multiplicationQueueDescriptor,
+                                                                      info,
+                                                                      m_CLCompileContext);
+                }
+                case BinaryOperation::Sub:
+                {
+                    SubtractionQueueDescriptor subtractionQueueDescriptor;
+                    subtractionQueueDescriptor.m_Inputs = descriptor.m_Inputs;
+                    subtractionQueueDescriptor.m_Outputs = descriptor.m_Outputs;
+                    subtractionQueueDescriptor.m_AdditionalInfoObject =
+                            elementwiseBinaryQueueDescriptor->m_AdditionalInfoObject;
+                    return std::make_unique<ClSubtractionWorkload>(subtractionQueueDescriptor,
+                                                                   info,
+                                                                   m_CLCompileContext);
+                }
+                default:
+                    return nullptr;
+            }
+        }
         case LayerType::ElementwiseUnary :
         {
             auto elementwiseUnaryQueueDescriptor