COMPMID-1519: Add support for 3D input/output in CLGEMMLowpOutputStage

Change-Id: I637add70310d2da4d82b236a6352af9d33be17a1
Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/149706
Reviewed-by: Isabella Gottardi <isabella.gottardi@arm.com>
Reviewed-by: Michele DiGiorgio <michele.digiorgio@arm.com>
Tested-by: bsgcomp <bsgcomp@arm.com>
diff --git a/arm_compute/core/utils/misc/ShapeCalculator.h b/arm_compute/core/utils/misc/ShapeCalculator.h
index b4ab10c..804ff3c 100644
--- a/arm_compute/core/utils/misc/ShapeCalculator.h
+++ b/arm_compute/core/utils/misc/ShapeCalculator.h
@@ -162,7 +162,7 @@
 {
     TensorShape shape_vector_sum_row{ a.tensor_shape() };
     shape_vector_sum_row.set(Window::DimX, a.dimension(1));
-    if(a.num_dimensions() > 1)
+    if(shape_vector_sum_row.num_dimensions() > 1)
     {
         shape_vector_sum_row.remove_dimension(1);
     }
@@ -513,13 +513,17 @@
     return output_shape;
 }
 
-inline TensorShape compute_output_stage_shape(const ITensorInfo &input, unsigned int gemm_3d_depth = 1)
+inline TensorShape compute_output_stage_shape(const ITensorInfo &input, unsigned int gemm_3d_depth = 1, bool batch_size_on_z = false)
 {
     ARM_COMPUTE_ERROR_ON(input.data_layout() != DataLayout::NHWC && gemm_3d_depth > 1);
 
     TensorShape output_shape = input.tensor_shape();
     if(gemm_3d_depth > 1)
     {
+        if(batch_size_on_z)
+        {
+            output_shape.shift_right(1);
+        }
         output_shape.set(0, input.tensor_shape().x());
         output_shape.set(1, input.tensor_shape().y() / gemm_3d_depth);
         output_shape.set(2, gemm_3d_depth);