Add section of shape operators

Rework of the shape operations. Shape operations are now done in shape specific
operators rather than being based on type.

shape_t is reworked to a list of size_t values.

Signed-off-by: Eric Kunze <eric.kunze@arm.com>
Change-Id: I2fca0728f9caa6a6fc34a8ce9e389bb581eea959
diff --git a/chapters/data_layout.adoc b/chapters/data_layout.adoc
index 1ce92be..c6afc4f 100644
--- a/chapters/data_layout.adoc
+++ b/chapters/data_layout.adoc
@@ -35,7 +35,7 @@
 
 ==== DIM
 
-Returns a rank 0 tensor of the size of the input tensor for the given axis.
+Returns a length 1 shape_t of the size of the input tensor for the given axis.
 
 include::{generated}/operators/DIM.adoc[]
 
diff --git a/chapters/introduction.adoc b/chapters/introduction.adoc
index ae5c7b1..3922d13 100644
--- a/chapters/introduction.adoc
+++ b/chapters/introduction.adoc
@@ -415,27 +415,23 @@
 The tensor shape in each dimension must be greater than or equal to 1.
 For tensor access information, see <<Tensor Access Helpers>>.
 
-The shape of a tensor of non-zero rank is itself a tensor of rank 1 with elements of type shape_t.
-The single dimension has size which is the rank of the original tensor.
-In this specification a shape-tensor means a rank 1 tensor with elements of type shape_t.
-The components of a shape tensor are rank 0 tensors of type shape_t.
+The shape of a tensor of non-zero rank is a special type shape_t.
+shape_t is a one-dimensional list with the size equal to the rank of the original tensor.
+The components of a shape_t are of type size_t.
 
-Some operations can process rank 0 or rank 1 tensors of type shape_t.
-For these operations, shape_t is permitted as an input or output tensor data type.
 In this version of the specification, shape_t values must be resolvable to constants at backend compile time.
 
 ==== Tensor size limit
 
 The tensor overall size is limited by the data type size_t.
-This type must be able to hold integers in the range 0 to (1++<<++(MAX_LOG2_SIZE+1)) - 1 where MAX_LOG2_SIZE is defined in <<Levels>>.
-For each tensor, the number of tensor elements multiplied by the element size in bytes (which is taken to be 1 for elements smaller than a 8-bit) must be less than or equal to (1<<(MAX_LOG2_SIZE+1)) - 1.
+This type must be able to hold integers in the range 0 to (1 << (MAX_LOG2_SIZE + 1)) - 1 where MAX_LOG2_SIZE is defined in <<Levels>>.
+For each tensor, the number of tensor elements multiplied by the element size in bytes (which is taken to be 1 for elements smaller than a 8-bit) must be less than or equal to (1 << (MAX_LOG2_SIZE + 1)) - 1.
 
-The size of tensors along each of their dimensions is limited by the data type index_t.
-This type must be able to hold integers in the range 0 to (1++<<++MAX_LOG2_SIZE) - 1 where MAX_LOG2_SIZE is defined in <<Levels>>.
-This means that the maximum size of a tensor along each dimension is (1<<MAX_LOG2_SIZE) - 1 and therefore the maximum coordinate value is (1<<MAX_LOG2_SIZE) - 2.
+The size of tensors along each of their dimensions is limited by the data type size_t.
+
+This means that the maximum size of a tensor along each dimension is (1 << MAX_LOG2_SIZE) - 1 and therefore the maximum coordinate value is (1 << MAX_LOG2_SIZE) - 2.
 Indices used to access tensors must be non-negative.
 
-The type shape_t, used in shape tensors, must be able to hold integers in the range -(1++<<++MAX_LOG2_SIZE) to (1++<<++MAX_LOG2_SIZE) - 1.
 
 ==== Data Layouts
 
diff --git a/chapters/operators.adoc b/chapters/operators.adoc
index 698738f..d05ab08 100644
--- a/chapters/operators.adoc
+++ b/chapters/operators.adoc
@@ -1,7 +1,7 @@
 //
 // This confidential and proprietary software may be used only as
 // authorised by a licensing agreement from ARM Limited
-// (C) COPYRIGHT 2020-2021 ARM Limited
+// (C) COPYRIGHT 2020-2021,2024 ARM Limited
 // ALL RIGHTS RESERVED
 // The entire notice above must be reproduced on all authorised
 // copies and copies may only be made to the extent permitted
@@ -111,4 +111,6 @@
 
 include::control_flow.adoc[]
 
-include::variable.adoc[]
\ No newline at end of file
+include::variable.adoc[]
+
+include::shape.adoc[]
\ No newline at end of file
diff --git a/chapters/pseudocode.adoc b/chapters/pseudocode.adoc
index 9e3b7bd..8954503 100644
--- a/chapters/pseudocode.adoc
+++ b/chapters/pseudocode.adoc
@@ -63,7 +63,7 @@
 [source,c++]
 ----
 // Convert tensor index coordinates to an element offset
-size_t tensor_index_to_offset(dim_t shape, dim_t index) {
+size_t tensor_index_to_offset(shape_t shape, shape_t index) {
     size_t size = tensor_size(shape);  // check tensor shape is valid
     size_t offset = 0;
     for (int32_t i = 0; i < rank(shape); i++) {
@@ -74,10 +74,10 @@
 }
 
 // Convert an element offset to tensor index coordinates
-dim_t tensor_offset_to_index(dim_t shape, size_t offset) {
+shape_t tensor_offset_to_index(shape_t shape, size_t offset) {
     size_t size = tensor_size(shape);  // check tensor shape is valid
     REQUIRE(offset < size);
-    dim_t index(rank(shape));    // index has rank(shape) indicies
+    shape_t index(rank(shape));    // index has rank(shape) indicies
     for(int32_t i = rank(shape) - 1; i >= 0; i--) {
         index[i] = offset % shape[i];
         offset /= shape[i];
@@ -86,7 +86,7 @@
 }
 
 // Check the tensor shape is valid and return the tensor size in elements
-size_t tensor_size(dim_t shape) {
+size_t tensor_size(shape_t shape) {
     size_t size = 1;
     for (int32_t i = 0; i < rank(shape); i++) {
         REQUIRE(1 <= shape[i] && shape[i] <= maximum<size_t> / size);
@@ -97,7 +97,7 @@
 
 // Return the size of the tensor in the given axis
 // For a rank=0 tensor, returns 1 for all axes
-size_t shape_dim(dim_t shape, int axis) {
+size_t shape_dim(shape_t shape, int axis) {
     return (axis >= rank(shape)) ? 1 : shape[axis];
 }
 ----
@@ -110,7 +110,7 @@
 
 [source,c++]
 ----
-in_t tensor_read<in_t>(in_t *address, dim_t shape, dim_t index) {
+in_t tensor_read<in_t>(in_t *address, shape_t shape, shape_t index) {
     size_t offset = tensor_index_to_offset(shape, index);
     return address[offset];
 }
@@ -125,7 +125,7 @@
 
 [source,c++]
 ----
-void tensor_write<type>(<type> *address, dim_t shape, dim_t index, <type> value) {
+void tensor_write<type>(<type> *address, shape_t shape, shape_t index, <type> value) {
     size_t offset = tensor_index_to_offset(shape, index);
     address[offset] = value;
 }
@@ -139,7 +139,7 @@
 
 [source,c++]
 ----
-tensor_t* variable_tensor_allocate<in_t>(dim_t shape, int32_t uid) {
+tensor_t* variable_tensor_allocate<in_t>(shape_t shape, int32_t uid) {
     size_t size = tensor_size(shape);
     tensor_t *allocated_tensor = new tensor_t;
     allocated_tensor->data = new in_t[size];
@@ -176,9 +176,9 @@
 
 [source,c++]
 ----
-dim_t broadcast_shape(dim_t shape1, dim_t shape2) {
+shape_t broadcast_shape(shape_t shape1, shape_t shape2) {
     ERROR_IF(rank(shape1) != rank(shape2));
-    dim_t shape = shape1;
+    shape_t shape = shape1;
     for (int32_t i = 0; i < rank(shape); i++) {
         if (shape[i] == 1) {
             shape[i] = shape2[i];
@@ -198,7 +198,7 @@
 // The function returns the location within in_shape that contributes
 // to the output based on broadcasting rules.
 
-dim_t apply_broadcast(dim_t out_shape, dim_t in_shape, dim_t index) {
+shape_t apply_broadcast(shape_t out_shape, shape_t in_shape, shape_t index) {
     ERROR_IF(rank(out_shape) != rank(in_shape));
     ERROR_IF(rank(out_shape) != rank(index));
     for (int32_t i = 0; i < rank(out_shape); i++) {
diff --git a/chapters/shape.adoc b/chapters/shape.adoc
new file mode 100644
index 0000000..1b58465
--- /dev/null
+++ b/chapters/shape.adoc
@@ -0,0 +1,83 @@
+//
+// This confidential and proprietary software may be used only as
+// authorised by a licensing agreement from ARM Limited
+// (C) COPYRIGHT 2024 ARM Limited
+// ALL RIGHTS RESERVED
+// The entire notice above must be reproduced on all authorised
+// copies and copies may only be made to the extent permitted
+// by a licensing agreement from ARM Limited.
+
+=== Shape Operators
+
+The shape operators are operators which describe the shapes of parameters and the corresponding transformations.
+When tensor sizes are unknown, the relationship between sizes can be stored as a sequence of TOSA shape operations.
+At a later point when the shapes are provided, the shape operators are used to propagate shapes appropriately.
+After shape inference, the shape operators may be removed from the TOSA graph.
+
+Having separate shape operations allows easier tracking of shape propagation than would be possible by using the existing TOSA operators.
+
+==== ADD_SHAPE
+
+Elementwise addition of input1 and input2. Size of shapes must match.
+
+include::{generated}/operators/ADD_SHAPE.adoc[]
+
+[source,c++]
+----
+include::{pseudocode}/operators/ADD_SHAPE.tosac[lines=10..-1]
+----
+
+==== CONCAT_SHAPE
+
+Concatenates a list of shape_t to create a new shape_t with length the sum of lengths of all shape_t in input1.
+
+include::{generated}/operators/CONCAT_SHAPE.adoc[]
+
+[source,c++]
+----
+include::{pseudocode}/operators/CONCAT_SHAPE.tosac[lines=10..-1]
+----
+
+==== CONST_SHAPE
+
+A node containing a constant shape.
+
+include::{generated}/operators/CONST_SHAPE.adoc[]
+
+[source,c++]
+----
+include::{pseudocode}/operators/CONST_SHAPE.tosac[lines=10..-1]
+----
+
+==== DIV_SHAPE
+
+Elementwise integer divide of input1 by input2. The result of the divide is truncated towards zero.
+
+include::{generated}/operators/DIV_SHAPE.adoc[]
+
+[source,c++]
+----
+include::{pseudocode}/operators/DIV_SHAPE.tosac[lines=10..-1]
+----
+
+==== MUL_SHAPE
+
+Elementwise multiplication of input1 and input2.
+
+include::{generated}/operators/MUL_SHAPE.adoc[]
+
+[source,c++]
+----
+include::{pseudocode}/operators/MUL_SHAPE.tosac[lines=10..-1]
+----
+
+==== SUB_SHAPE
+
+Elementwise subtraction of input1 and input2.
+
+include::{generated}/operators/SUB_SHAPE.adoc[]
+
+[source,c++]
+----
+include::{pseudocode}/operators/SUB_SHAPE.tosac[lines=10..-1]
+----
diff --git a/pseudocode/operators/ADD.tosac b/pseudocode/operators/ADD.tosac
index 61db3f6..82379e9 100644
--- a/pseudocode/operators/ADD.tosac
+++ b/pseudocode/operators/ADD.tosac
@@ -7,20 +7,12 @@
 // copies and copies may only be made to the extent permitted
 // by a licensing agreement from ARM Limited.
 
-if (in_out_t == shape_t) {
-    ERROR_IF(rank(shape) != 0 || rank(shape1) != 0 || rank(shape2) != 0);
-    shape_t value1 = tensor_read<shape_t>(input1, [], []);
-    shape_t value2 = tensor_read<shape_t>(input2, [], []);
-    shape_t result = apply_add_s<shape_t>(value1, value2);
-    tensor_write<shape_t>(output, [], [], result);
-} else {
-    ERROR_IF(shape != broadcast_shape(shape1, shape2));
-    for_each(index in shape) {
-        dim_t index1 = apply_broadcast(shape, shape1, index);
-        dim_t index2 = apply_broadcast(shape, shape2, index);
-        in_out_t value1 = tensor_read<in_out_t>(input1, shape1, index1);
-        in_out_t value2 = tensor_read<in_out_t>(input2, shape2, index2);
-        in_out_t result = apply_add_s<in_out_t>(value1, value2);
-        tensor_write<in_out_t>(output, shape, index, result);
-    }
+ERROR_IF(shape != broadcast_shape(shape1, shape2));
+for_each(index in shape) {
+    shape_t index1 = apply_broadcast(shape, shape1, index);
+    shape_t index2 = apply_broadcast(shape, shape2, index);
+    in_out_t value1 = tensor_read<in_out_t>(input1, shape1, index1);
+    in_out_t value2 = tensor_read<in_out_t>(input2, shape2, index2);
+    in_out_t result = apply_add_s<in_out_t>(value1, value2);
+    tensor_write<in_out_t>(output, shape, index, result);
 }
diff --git a/pseudocode/operators/ADD_SHAPE.tosac b/pseudocode/operators/ADD_SHAPE.tosac
new file mode 100644
index 0000000..edb7c20
--- /dev/null
+++ b/pseudocode/operators/ADD_SHAPE.tosac
@@ -0,0 +1,14 @@
+//
+// This confidential and proprietary software may be used only as
+// authorised by a licensing agreement from ARM Limited
+// (C) COPYRIGHT 2024 ARM Limited
+// ALL RIGHTS RESERVED
+// The entire notice above must be reproduced on all authorised
+// copies and copies may only be made to the extent permitted
+// by a licensing agreement from ARM Limited.
+
+ERROR_IF(length(input1) != length(input2));
+
+for(int32_t index=0; index < rank(input1); index++) {
+    output[index] = apply_add_s<size_t>(input1[index], input2[index]);
+}
diff --git a/pseudocode/operators/ARGMAX.tosac b/pseudocode/operators/ARGMAX.tosac
index 982957a..73b74a0 100644
--- a/pseudocode/operators/ARGMAX.tosac
+++ b/pseudocode/operators/ARGMAX.tosac
@@ -24,14 +24,14 @@
         in_t max_value = minimum_s<in_t>;
         out_t max_index = 0;
         for (i = 0; i < shape1[axis]; i++) {
-            dim_t index = flatten(left_index, [i], right_index);
+            shape_t index = flatten(left_index, [i], right_index);
             in_t value = tensor_read<in_t>(input, shape1, index);
             if (apply_max_s<in_t>(value, max_value) != max_value) {
                 max_value = value;
                 max_index = i;
             }
         }
-        dim_t index = flatten(left_index, right_index);
+        shape_t index = flatten(left_index, right_index);
         tensor_write<out_t>(output, shape, index, max_index);
     }
 }
diff --git a/pseudocode/operators/ARITHMETIC_RIGHT_SHIFT.tosac b/pseudocode/operators/ARITHMETIC_RIGHT_SHIFT.tosac
index 7ed4886..dd07fc5 100644
--- a/pseudocode/operators/ARITHMETIC_RIGHT_SHIFT.tosac
+++ b/pseudocode/operators/ARITHMETIC_RIGHT_SHIFT.tosac
@@ -9,8 +9,8 @@
 
 ERROR_IF(shape != broadcast_shape(shape1, shape2));
 for_each(index in shape) {
-    dim_t index1 = apply_broadcast(shape, shape1, index);
-    dim_t index2 = apply_broadcast(shape, shape2, index);
+    shape_t index1 = apply_broadcast(shape, shape1, index);
+    shape_t index2 = apply_broadcast(shape, shape2, index);
     in_out_t value1 = tensor_read<in_out_t>(input1, shape1, index1);
     in_out_t value2 = tensor_read<in_out_t>(input2, shape2, index2);
 
diff --git a/pseudocode/operators/BITWISE_AND.tosac b/pseudocode/operators/BITWISE_AND.tosac
index 5efe3ee..f3b647b 100644
--- a/pseudocode/operators/BITWISE_AND.tosac
+++ b/pseudocode/operators/BITWISE_AND.tosac
@@ -9,8 +9,8 @@
 
 ERROR_IF(shape != broadcast_shape(shape1, shape2));
 for_each(index in shape) {
-    dim_t index1 = apply_broadcast(shape, shape1, index);
-    dim_t index2 = apply_broadcast(shape, shape2, index);
+    shape_t index1 = apply_broadcast(shape, shape1, index);
+    shape_t index2 = apply_broadcast(shape, shape2, index);
     in_out_t value1 = tensor_read<in_out_t>(input1, shape1, index1);
     in_out_t value2 = tensor_read<in_out_t>(input2, shape2, index2);
     in_out_t result = value1 & value2;
diff --git a/pseudocode/operators/BITWISE_OR.tosac b/pseudocode/operators/BITWISE_OR.tosac
index b2b7fc6..10afb19 100644
--- a/pseudocode/operators/BITWISE_OR.tosac
+++ b/pseudocode/operators/BITWISE_OR.tosac
@@ -9,8 +9,8 @@
 
 ERROR_IF(shape != broadcast_shape(shape1, shape2));
 for_each(index in shape) {
-    dim_t index1 = apply_broadcast(shape, shape1, index);
-    dim_t index2 = apply_broadcast(shape, shape2, index);
+    shape_t index1 = apply_broadcast(shape, shape1, index);
+    shape_t index2 = apply_broadcast(shape, shape2, index);
     in_out_t value1 = tensor_read<in_out_t>(input1, shape1, index1);
     in_out_t value2 = tensor_read<in_out_t>(input2, shape2, index2);
     in_out_t result = value1 | value2;
diff --git a/pseudocode/operators/BITWISE_XOR.tosac b/pseudocode/operators/BITWISE_XOR.tosac
index 98f0d2c..45e8a0a 100644
--- a/pseudocode/operators/BITWISE_XOR.tosac
+++ b/pseudocode/operators/BITWISE_XOR.tosac
@@ -9,8 +9,8 @@
 
 ERROR_IF(shape != broadcast_shape(shape1, shape2));
 for_each(index in shape) {
-    dim_t index1 = apply_broadcast(shape, shape1, index);
-    dim_t index2 = apply_broadcast(shape, shape2, index);
+    shape_t index1 = apply_broadcast(shape, shape1, index);
+    shape_t index2 = apply_broadcast(shape, shape2, index);
     in_out_t value1 = tensor_read<in_out_t>(input1, shape1, index1);
     in_out_t value2 = tensor_read<in_out_t>(input2, shape2, index2);
     in_out_t result = value1 ^ value2;
diff --git a/pseudocode/operators/CONCAT.tosac b/pseudocode/operators/CONCAT.tosac
index f9329af..ca26f26 100644
--- a/pseudocode/operators/CONCAT.tosac
+++ b/pseudocode/operators/CONCAT.tosac
@@ -9,7 +9,6 @@
 
 ERROR_IF(axis < 0 || axis >= max(1,rank(shapes1[0])));
 ERROR_IF(shape[axis] != sum(shape_dim(shapes1[k], axis) for all k))
-ERROR_IF(in_out_t == shape_t && rank(shape) > 1);
 // The following checks ensure all inputs are compatible for concatenation
 for_each(input_shape in shapes1) {
     ERROR_IF(rank(input_shape) != rank(shapes1[0]));
@@ -18,7 +17,7 @@
     }
 }
 for_each(index1 in shape) {
-    dim_t index2 = index1;
+    shape_t index2 = index1;
     for (tensor t = 0; t < length(input1); t++) {
         // Continue to concatenate along axis from each tensor
         // For each output location, we are looking for the
@@ -30,3 +29,4 @@
         index2[axis] = index2[axis] - shape_dim(shapes1[t], axis);
     }
 }
+
diff --git a/pseudocode/operators/CONCAT_SHAPE.tosac b/pseudocode/operators/CONCAT_SHAPE.tosac
new file mode 100644
index 0000000..4b6def6
--- /dev/null
+++ b/pseudocode/operators/CONCAT_SHAPE.tosac
@@ -0,0 +1,18 @@
+//
+// This confidential and proprietary software may be used only as
+// authorised by a licensing agreement from ARM Limited
+// (C) COPYRIGHT 2024 ARM Limited
+// ALL RIGHTS RESERVED
+// The entire notice above must be reproduced on all authorised
+// copies and copies may only be made to the extent permitted
+// by a licensing agreement from ARM Limited.
+
+ERROR_IF(length(output) != sum(length(input1[k]) for all k));
+
+size_t index = 0;
+for (int32_t i=0; i < length(input1); i++) {
+    for (int32_t j=0; j < length(input1[i]); j++) {
+        output[index] = input1[i][j];
+        index++;
+    }
+}
diff --git a/pseudocode/operators/CONST_SHAPE.tosac b/pseudocode/operators/CONST_SHAPE.tosac
new file mode 100644
index 0000000..d14ead2
--- /dev/null
+++ b/pseudocode/operators/CONST_SHAPE.tosac
@@ -0,0 +1,10 @@
+//
+// This confidential and proprietary software may be used only as
+// authorised by a licensing agreement from ARM Limited
+// (C) COPYRIGHT 2024 ARM Limited
+// ALL RIGHTS RESERVED
+// The entire notice above must be reproduced on all authorised
+// copies and copies may only be made to the extent permitted
+// by a licensing agreement from ARM Limited.
+
+output = values;
diff --git a/pseudocode/operators/DIM.tosac b/pseudocode/operators/DIM.tosac
index 7131173..2105c62 100644
--- a/pseudocode/operators/DIM.tosac
+++ b/pseudocode/operators/DIM.tosac
@@ -8,4 +8,4 @@
 // by a licensing agreement from ARM Limited.
 
 ERROR_IF(axis >= rank(shape));
-tensor_write<shape_t>(output, [], [], shape_dim(shape, axis));
+output[0] = shape_dim(shape, axis);
diff --git a/pseudocode/operators/DIV_SHAPE.tosac b/pseudocode/operators/DIV_SHAPE.tosac
new file mode 100644
index 0000000..3f73c24
--- /dev/null
+++ b/pseudocode/operators/DIV_SHAPE.tosac
@@ -0,0 +1,14 @@
+//
+// This confidential and proprietary software may be used only as
+// authorised by a licensing agreement from ARM Limited
+// (C) COPYRIGHT 2024 ARM Limited
+// ALL RIGHTS RESERVED
+// The entire notice above must be reproduced on all authorised
+// copies and copies may only be made to the extent permitted
+// by a licensing agreement from ARM Limited.
+
+ERROR_IF(length(input1) != length(input2));
+
+for(int32_t index=0; index < rank(input1); index++) {
+    output[index] = apply_intdiv_s<size_t>(input1[index], input2[index]);
+}
diff --git a/pseudocode/operators/EQUAL.tosac b/pseudocode/operators/EQUAL.tosac
index 3445e27..bcbce33 100644
--- a/pseudocode/operators/EQUAL.tosac
+++ b/pseudocode/operators/EQUAL.tosac
@@ -9,8 +9,8 @@
 
 ERROR_IF(shape != broadcast_shape(shape1, shape2));
 for_each(index in shape) {
-    dim_t index1 = apply_broadcast(shape, shape1, index);
-    dim_t index2 = apply_broadcast(shape, shape2, index);
+    shape_t index1 = apply_broadcast(shape, shape1, index);
+    shape_t index2 = apply_broadcast(shape, shape2, index);
     in_t value1 = tensor_read<in_t>(input1, shape1, index1);
     in_t value2 = tensor_read<in_t>(input2, shape2, index2);
     out_t result;
diff --git a/pseudocode/operators/GREATER.tosac b/pseudocode/operators/GREATER.tosac
index 3155f23..6874bd0 100644
--- a/pseudocode/operators/GREATER.tosac
+++ b/pseudocode/operators/GREATER.tosac
@@ -9,8 +9,8 @@
 
 ERROR_IF(shape != broadcast_shape(shape1, shape2));
 for_each(index in shape) {
-    dim_t index1 = apply_broadcast(shape, shape1, index);
-    dim_t index2 = apply_broadcast(shape, shape2, index);
+    shape_t index1 = apply_broadcast(shape, shape1, index);
+    shape_t index2 = apply_broadcast(shape, shape2, index);
     in_t value1 = tensor_read<in_t>(input1, shape1, index1);
     in_t value2 = tensor_read<in_t>(input2, shape2, index2);
     out_t result;
diff --git a/pseudocode/operators/GREATER_EQUAL.tosac b/pseudocode/operators/GREATER_EQUAL.tosac
index 2f43d40..2a63077 100644
--- a/pseudocode/operators/GREATER_EQUAL.tosac
+++ b/pseudocode/operators/GREATER_EQUAL.tosac
@@ -9,8 +9,8 @@
 
 ERROR_IF(shape != broadcast_shape(shape1, shape2));
 for_each(index in shape) {
-    dim_t index1 = apply_broadcast(shape, shape1, index);
-    dim_t index2 = apply_broadcast(shape, shape2, index);
+    shape_t index1 = apply_broadcast(shape, shape1, index);
+    shape_t index2 = apply_broadcast(shape, shape2, index);
     in_t value1 = tensor_read<in_t>(input1, shape1, index1);
     in_t value2 = tensor_read<in_t>(input2, shape2, index2);
     out_t result;
diff --git a/pseudocode/operators/INTDIV.tosac b/pseudocode/operators/INTDIV.tosac
index b6d46cc..bf3126b 100644
--- a/pseudocode/operators/INTDIV.tosac
+++ b/pseudocode/operators/INTDIV.tosac
@@ -7,25 +7,16 @@
 // copies and copies may only be made to the extent permitted
 // by a licensing agreement from ARM Limited.
 
-if (in_out_t == shape_t) {
-    ERROR_IF(rank(shape) != 0 || rank(shape1) != 0 || rank(shape2) != 0);
-    shape_t value1 = tensor_read<shape_t>(input1, [], []);
-    shape_t value2 = tensor_read<shape_t>(input2, [], []);
+ERROR_IF(shape != broadcast_shape(shape1, shape2));
+for_each(index in shape) {
+    shape_t index1 = apply_broadcast(shape, shape1, index);
+    shape_t index2 = apply_broadcast(shape, shape2, index);
+    in_out_t value1 = tensor_read<in_out_t>(input1, shape1, index1);
+    in_out_t value2 = tensor_read<in_out_t>(input2, shape2, index2);
     REQUIRE(value2 != 0);
-    shape_t result = value1 / value2;
-    tensor_write<shape_t>(output, [], [], result);
-} else {
-    ERROR_IF(shape != broadcast_shape(shape1, shape2));
-    for_each(index in shape) {
-        dim_t index1 = apply_broadcast(shape, shape1, index);
-        dim_t index2 = apply_broadcast(shape, shape2, index);
-        in_out_t value1 = tensor_read<in_out_t>(input1, shape1, index1);
-        in_out_t value2 = tensor_read<in_out_t>(input2, shape2, index2);
-        REQUIRE(value2 != 0);
-        // This catches the case where we divide minimum<in_out_t> by -1
-        // which is not representable in two's complement
-        REQUIRE(static_cast<int64_t>(value1) / static_cast<int64_t>(value2) <= maximum_s<in_out_t>);
-        in_out_t result = apply_intdiv_s<in_out_t>(value1, value2);
-        tensor_write<in_out_t>(output, shape, index, result);
-    }
+    // This catches the case where we divide minimum<in_out_t> by -1
+    // which is not representable in two's complement
+    REQUIRE(static_cast<int64_t>(value1) / static_cast<int64_t>(value2) <= maximum_s<in_out_t>);
+    in_out_t result = apply_intdiv_s<in_out_t>(value1, value2);
+    tensor_write<in_out_t>(output, shape, index, result);
 }
diff --git a/pseudocode/operators/LOGICAL_AND.tosac b/pseudocode/operators/LOGICAL_AND.tosac
index ff03804..23bda8d 100644
--- a/pseudocode/operators/LOGICAL_AND.tosac
+++ b/pseudocode/operators/LOGICAL_AND.tosac
@@ -9,8 +9,8 @@
 
 ERROR_IF(shape != broadcast_shape(shape1, shape2));
 for_each(index in shape) {
-    dim_t index1 = apply_broadcast(shape, shape1, index);
-    dim_t index2 = apply_broadcast(shape, shape2, index);
+    shape_t index1 = apply_broadcast(shape, shape1, index);
+    shape_t index2 = apply_broadcast(shape, shape2, index);
     in_out_t value1 = tensor_read<in_out_t>(input1, shape1, index1);
     in_out_t value2 = tensor_read<in_out_t>(input2, shape2, index2);
     in_out_t result = value1 && value2;
diff --git a/pseudocode/operators/LOGICAL_LEFT_SHIFT.tosac b/pseudocode/operators/LOGICAL_LEFT_SHIFT.tosac
index 4057163..c8b2862 100644
--- a/pseudocode/operators/LOGICAL_LEFT_SHIFT.tosac
+++ b/pseudocode/operators/LOGICAL_LEFT_SHIFT.tosac
@@ -9,8 +9,8 @@
 
 ERROR_IF(shape != broadcast_shape(shape1, shape2));
 for_each(index in shape) {
-    dim_t index1 = apply_broadcast(shape, shape1, index);
-    dim_t index2 = apply_broadcast(shape, shape2, index);
+    shape_t index1 = apply_broadcast(shape, shape1, index);
+    shape_t index2 = apply_broadcast(shape, shape2, index);
     in_out_t value1 = tensor_read<in_out_t>(input1, shape1, index1);
     in_out_t value2 = tensor_read<in_out_t>(input2, shape2, index2);
     REQUIRE(0 <= value2 && value2 <= 31);
diff --git a/pseudocode/operators/LOGICAL_OR.tosac b/pseudocode/operators/LOGICAL_OR.tosac
index 4e3aa9e..de09fed 100644
--- a/pseudocode/operators/LOGICAL_OR.tosac
+++ b/pseudocode/operators/LOGICAL_OR.tosac
@@ -9,8 +9,8 @@
 
 ERROR_IF(shape != broadcast_shape(shape1, shape2));
 for_each(index in shape) {
-    dim_t index1 = apply_broadcast(shape, shape1, index);
-    dim_t index2 = apply_broadcast(shape, shape2, index);
+    shape_t index1 = apply_broadcast(shape, shape1, index);
+    shape_t index2 = apply_broadcast(shape, shape2, index);
     in_out_t value1 = tensor_read<in_out_t>(input1, shape1, index1);
     in_out_t value2 = tensor_read<in_out_t>(input2, shape2, index2);
     in_out_t result = value1 || value2;
diff --git a/pseudocode/operators/LOGICAL_RIGHT_SHIFT.tosac b/pseudocode/operators/LOGICAL_RIGHT_SHIFT.tosac
index 5e80211..2896b0f 100644
--- a/pseudocode/operators/LOGICAL_RIGHT_SHIFT.tosac
+++ b/pseudocode/operators/LOGICAL_RIGHT_SHIFT.tosac
@@ -9,8 +9,8 @@
 
 ERROR_IF(shape != broadcast_shape(shape1, shape2));
 for_each(index in shape) {
-    dim_t index1 = apply_broadcast(shape, shape1, index);
-    dim_t index2 = apply_broadcast(shape, shape2, index);
+    shape_t index1 = apply_broadcast(shape, shape1, index);
+    shape_t index2 = apply_broadcast(shape, shape2, index);
     in_out_t value1 = tensor_read<in_out_t>(input1, shape1, index1);
     in_out_t value2 = tensor_read<in_out_t>(input2, shape2, index2);
     REQUIRE(0 <= static_cast<int32_t>(value2) && static_cast<int32_t>(value2) <= 31);
diff --git a/pseudocode/operators/LOGICAL_XOR.tosac b/pseudocode/operators/LOGICAL_XOR.tosac
index fbb485b..08fa6bd 100644
--- a/pseudocode/operators/LOGICAL_XOR.tosac
+++ b/pseudocode/operators/LOGICAL_XOR.tosac
@@ -9,8 +9,8 @@
 
 ERROR_IF(shape != broadcast_shape(shape1, shape2));
 for_each(index in shape) {
-    dim_t index1 = apply_broadcast(shape, shape1, index);
-    dim_t index2 = apply_broadcast(shape, shape2, index);
+    shape_t index1 = apply_broadcast(shape, shape1, index);
+    shape_t index2 = apply_broadcast(shape, shape2, index);
     in_out_t value1 = tensor_read<in_out_t>(input1, shape1, index1);
     in_out_t value2 = tensor_read<in_out_t>(input2, shape2, index2);
     in_out_t result = value1 != value2;
diff --git a/pseudocode/operators/MAXIMUM.tosac b/pseudocode/operators/MAXIMUM.tosac
index 0f4f3b7..c34e2ba 100644
--- a/pseudocode/operators/MAXIMUM.tosac
+++ b/pseudocode/operators/MAXIMUM.tosac
@@ -9,8 +9,8 @@
 
 ERROR_IF(shape != broadcast_shape(shape1, shape2));
 for_each(index in shape) {
-    dim_t index1 = apply_broadcast(shape, shape1, index);
-    dim_t index2 = apply_broadcast(shape, shape2, index);
+    shape_t index1 = apply_broadcast(shape, shape1, index);
+    shape_t index2 = apply_broadcast(shape, shape2, index);
     in_out_t value1 = tensor_read<in_out_t>(input1, shape1, index1);
     in_out_t value2 = tensor_read<in_out_t>(input2, shape2, index2);
     in_out_t result = apply_max_s<in_out_t>(value1, value2);
diff --git a/pseudocode/operators/MINIMUM.tosac b/pseudocode/operators/MINIMUM.tosac
index fa47b03..659cba6 100644
--- a/pseudocode/operators/MINIMUM.tosac
+++ b/pseudocode/operators/MINIMUM.tosac
@@ -9,8 +9,8 @@
 
 ERROR_IF(shape != broadcast_shape(shape1, shape2));
 for_each(index in shape) {
-    dim_t index1 = apply_broadcast(shape, shape1, index);
-    dim_t index2 = apply_broadcast(shape, shape2, index);
+    shape_t index1 = apply_broadcast(shape, shape1, index);
+    shape_t index2 = apply_broadcast(shape, shape2, index);
     in_out_t value1 = tensor_read<in_out_t>(input1, shape1, index1);
     in_out_t value2 = tensor_read<in_out_t>(input2, shape2, index2);
     in_out_t result = apply_min_s(value1, value2);
diff --git a/pseudocode/operators/MUL.tosac b/pseudocode/operators/MUL.tosac
index 078525e..5cc9f80 100644
--- a/pseudocode/operators/MUL.tosac
+++ b/pseudocode/operators/MUL.tosac
@@ -7,31 +7,23 @@
 // copies and copies may only be made to the extent permitted
 // by a licensing agreement from ARM Limited.
 
-if (in_out_t == shape_t) {
-    ERROR_IF(rank(shape) != 0 || rank(shape1) != 0 || rank(shape2) != 0);
-    shape_t value1 = tensor_read<shape_t>(input1, [], []);
-    shape_t value2 = tensor_read<shape_t>(input2, [], []);
-    shape_t result = value1 * value2;
-    tensor_write<shape_t>(output, [], [], result);
-} else {
-    REQUIRE(0 <= shift && shift <= 63);
-    REQUIRE(in_t == int32_t || shift == 0);
-    ERROR_IF(shape != broadcast_shape(shape1, shape2));
-    for_each(index in shape) {
-        dim_t index1 = apply_broadcast(shape, shape1, index);
-        dim_t index2 = apply_broadcast(shape, shape2, index);
-        in_t value1 = tensor_read<in_t>(input1, shape1, index1);
-        in_t value2 = tensor_read<in_t>(input2, shape2, index2);
-        out_t result;
-        if (in_t == i32_t && shift > 0) {
-            int64_t product = sign_extend<int64_t>(value1) * sign_extend<int64_t>(value2);
-            int64_t round   = static_cast<int64_t>(1) << (shift - 1);
-            product = (product + round) >> shift;
-            REQUIRE(product >= minimum_s<i32_t> && product <= maximum_s<i32_t>)
-            result = product;
-        } else {
-            result = apply_mul_s(value1, value2);  // low 32-bits of result for i32_t
-        }
-        tensor_write<out_t>(output, shape, index, result);
+REQUIRE(0 <= shift && shift <= 63);
+REQUIRE(in_t == int32_t || shift == 0);
+ERROR_IF(shape != broadcast_shape(shape1, shape2));
+for_each(index in shape) {
+    shape_t index1 = apply_broadcast(shape, shape1, index);
+    shape_t index2 = apply_broadcast(shape, shape2, index);
+    in_t value1 = tensor_read<in_t>(input1, shape1, index1);
+    in_t value2 = tensor_read<in_t>(input2, shape2, index2);
+    out_t result;
+    if (in_t == i32_t && shift > 0) {
+        int64_t product = sign_extend<int64_t>(value1) * sign_extend<int64_t>(value2);
+        int64_t round   = static_cast<int64_t>(1) << (shift - 1);
+        product = (product + round) >> shift;
+        REQUIRE(product >= minimum_s<i32_t> && product <= maximum_s<i32_t>)
+        result = product;
+    } else {
+        result = apply_mul_s(value1, value2);  // low 32-bits of result for i32_t
     }
+    tensor_write<out_t>(output, shape, index, result);
 }
diff --git a/pseudocode/operators/MUL_SHAPE.tosac b/pseudocode/operators/MUL_SHAPE.tosac
new file mode 100644
index 0000000..f52f416
--- /dev/null
+++ b/pseudocode/operators/MUL_SHAPE.tosac
@@ -0,0 +1,14 @@
+//
+// This confidential and proprietary software may be used only as
+// authorised by a licensing agreement from ARM Limited
+// (C) COPYRIGHT 2024 ARM Limited
+// ALL RIGHTS RESERVED
+// The entire notice above must be reproduced on all authorised
+// copies and copies may only be made to the extent permitted
+// by a licensing agreement from ARM Limited.
+
+ERROR_IF(length(input1) != length(input2));
+
+for(int32_t index=0; index < rank(input1); index++) {
+    output[index] = apply_mul_s<size_t>(input1[index], input2[index]);
+}
diff --git a/pseudocode/operators/PAD.tosac b/pseudocode/operators/PAD.tosac
index 4adf114..45ef674 100644
--- a/pseudocode/operators/PAD.tosac
+++ b/pseudocode/operators/PAD.tosac
@@ -10,14 +10,14 @@
 // Check output shape matches the padded input shape
 ERROR_IF(rank(shape) != rank(shape1));
 for (i = 0; i < rank(shape); i++) {
-    ERROR_IF(padding[i,0] < 0 || padding[i,1] < 0);
-    ERROR_IF(shape[i] != padding[i, 0] + shape1[i] + padding[i, 1]);
+    ERROR_IF(padding[i * 2] < 0 || padding[(i * 2) + 1] < 0);
+    ERROR_IF(shape[i] != padding[i * 2] + shape1[i] + padding[(i * 2) + 1]);
 }
 for_each(index in shape) {
-    dim_t index1 = index;
+    shape_t index1 = index;
     bool_t is_pad = false;
     for(i = 0; i < rank(shape); i++) {
-        index1[i] = index1[i] - padding[i,0];
+        index1[i] = index1[i] - padding[i * 2];
         if (index1[i] < 0 || index[i] >= length(shape[i])) {
             is_pad = true;
         }
diff --git a/pseudocode/operators/POW.tosac b/pseudocode/operators/POW.tosac
index 6ecc8e6..482ea18 100644
--- a/pseudocode/operators/POW.tosac
+++ b/pseudocode/operators/POW.tosac
@@ -9,8 +9,8 @@
 
 ERROR_IF(shape != broadcast_shape(shape1, shape2));
 for_each(index in shape) {
-    dim_t index1 = apply_broadcast(shape, shape1, index);
-    dim_t index2 = apply_broadcast(shape, shape2, index);
+    shape_t index1 = apply_broadcast(shape, shape1, index);
+    shape_t index2 = apply_broadcast(shape, shape2, index);
     in_out_t value1 = tensor_read<in_out_t>(input1, shape1, index1);
     in_out_t value2 = tensor_read<in_out_t>(input2, shape2, index2);
     in_out_t result = apply_pow<in_out_t>(value1, value2);
diff --git a/pseudocode/operators/RESHAPE.tosac b/pseudocode/operators/RESHAPE.tosac
index 41439c3..af1be87 100644
--- a/pseudocode/operators/RESHAPE.tosac
+++ b/pseudocode/operators/RESHAPE.tosac
@@ -13,7 +13,7 @@
     // Calculate flattened index for the output location (index)
     size_t offset = tensor_index_to_offset(shape, index);
     // Now convert to the location in the input
-    dim_t tmp_index = tensor_offset_to_index(shape1, offset);
+    shape_t tmp_index = tensor_offset_to_index(shape1, offset);
 
     // Now read/write the value
     in_out_t val = tensor_read<in_out_t>(input1, shape1, tmp_index);
diff --git a/pseudocode/operators/REVERSE.tosac b/pseudocode/operators/REVERSE.tosac
index 63830d2..4a39cf2 100644
--- a/pseudocode/operators/REVERSE.tosac
+++ b/pseudocode/operators/REVERSE.tosac
@@ -9,7 +9,7 @@
 
 ERROR_IF(axis < 0 || axis >= rank(shape));
 for_each(index in shape) {
-    dim_t tmp_index = index;
+    shape_t tmp_index = index;
     tmp_index[axis] = shape[axis] - 1 - index[axis];
     in_out_t value = tensor_read<in_out_t>(input, shape, tmp_index);
     tensor_write<in_out_t>(output, shape, index, value);
diff --git a/pseudocode/operators/SELECT.tosac b/pseudocode/operators/SELECT.tosac
index fe8d760..c55ad8e 100644
--- a/pseudocode/operators/SELECT.tosac
+++ b/pseudocode/operators/SELECT.tosac
@@ -9,9 +9,9 @@
 
 ERROR_IF(shape != broadcast_shape(broadcast_shape(shape1, shape2), shape3));
 for_each(index in shape) {
-    dim_t index1 = apply_broadcast(shape, shape1, index);
-    dim_t index2 = apply_broadcast(shape, shape2, index);
-    dim_t index3 = apply_broadcast(shape, shape3, index);
+    shape_t index1 = apply_broadcast(shape, shape1, index);
+    shape_t index2 = apply_broadcast(shape, shape2, index);
+    shape_t index3 = apply_broadcast(shape, shape3, index);
     bool_t value1 = tensor_read<bool_t>(input1, shape1, index1);
     in_out_t value2 = tensor_read<in_out_t>(input2, shape2, index2);
     in_out_t value3 = tensor_read<in_out_t>(input3, shape3, index3);
diff --git a/pseudocode/operators/SLICE.tosac b/pseudocode/operators/SLICE.tosac
index 0ae0214..b6f70c5 100644
--- a/pseudocode/operators/SLICE.tosac
+++ b/pseudocode/operators/SLICE.tosac
@@ -19,7 +19,7 @@
 }
 
 for_each(index in shape) {
-    dim_t tmp_index = index;
+    shape_t tmp_index = index;
     for(i = 0; i < rank(shape); i++) {
        tmp_index[i] = index[i] + start[i];
     }
diff --git a/pseudocode/operators/SUB.tosac b/pseudocode/operators/SUB.tosac
index ac88b76..1ecc6e0 100644
--- a/pseudocode/operators/SUB.tosac
+++ b/pseudocode/operators/SUB.tosac
@@ -7,20 +7,12 @@
 // copies and copies may only be made to the extent permitted
 // by a licensing agreement from ARM Limited.
 
-if (in_out_t == shape_t) {
-    ERROR_IF(rank(shape) != 0 || rank(shape1) != 0 || rank(shape2) != 0);
-    shape_t value1 = tensor_read<shape_t>(input1, [], []);
-    shape_t value2 = tensor_read<shape_t>(input2, [], []);
-    shape_t result = apply_sub<shape_t>(value1, value2);
-    tensor_write<shape_t>(output, [], [], result);
-} else {
-    ERROR_IF(shape != broadcast_shape(shape1, shape2));
-    for_each(index in shape) {
-        dim_t index1 = apply_broadcast(shape, shape1, index);
-        dim_t index2 = apply_broadcast(shape, shape2, index);
-        in_out_t value1 = tensor_read<in_out_t>(input1, shape1, index1);
-        in_out_t value2 = tensor_read<in_out_t>(input2, shape2, index2);
-        in_out_t result = apply_sub_s<in_out_t>(value1, value2);
-        tensor_write<in_out_t>(output, shape, index, result);
-    }
+ERROR_IF(shape != broadcast_shape(shape1, shape2));
+for_each(index in shape) {
+    shape_t index1 = apply_broadcast(shape, shape1, index);
+    shape_t index2 = apply_broadcast(shape, shape2, index);
+    in_out_t value1 = tensor_read<in_out_t>(input1, shape1, index1);
+    in_out_t value2 = tensor_read<in_out_t>(input2, shape2, index2);
+    in_out_t result = apply_sub_s<in_out_t>(value1, value2);
+    tensor_write<in_out_t>(output, shape, index, result);
 }
diff --git a/pseudocode/operators/SUB_SHAPE.tosac b/pseudocode/operators/SUB_SHAPE.tosac
new file mode 100644
index 0000000..eff2687
--- /dev/null
+++ b/pseudocode/operators/SUB_SHAPE.tosac
@@ -0,0 +1,14 @@
+//
+// This confidential and proprietary software may be used only as
+// authorised by a licensing agreement from ARM Limited
+// (C) COPYRIGHT 2024 ARM Limited
+// ALL RIGHTS RESERVED
+// The entire notice above must be reproduced on all authorised
+// copies and copies may only be made to the extent permitted
+// by a licensing agreement from ARM Limited.
+
+ERROR_IF(length(input1) != length(input2));
+
+for(int32_t index=0; index < rank(input1); index++) {
+    output[index] = apply_sub_s<size_t>(input1[index], input2[index]);
+}
diff --git a/pseudocode/operators/TILE.tosac b/pseudocode/operators/TILE.tosac
index d053b8f..be1cfee 100644
--- a/pseudocode/operators/TILE.tosac
+++ b/pseudocode/operators/TILE.tosac
@@ -10,7 +10,7 @@
 ERROR_IF(rank(shape1) != rank(shape));
 
 for_each(index in shape) {
-    dim_t tmp_index = index;
+    shape_t tmp_index = index;
     for(i = 0; i < rank(shape); i++) {
         ERROR_IF(shape1[i] * multiples[i] != shape[i]);
         tmp_index[i] = index[i] % shape1[i];
diff --git a/pseudocode/operators/TRANSPOSE.tosac b/pseudocode/operators/TRANSPOSE.tosac
index e75db79..3981f54 100644
--- a/pseudocode/operators/TRANSPOSE.tosac
+++ b/pseudocode/operators/TRANSPOSE.tosac
@@ -26,7 +26,7 @@
 }
 
 for_each(index in shape) {
-    dim_t tmp_index = index;
+    shape_t tmp_index = index;
     for(i = 0; i < rank(shape); i++) {
         tmp_index[perms[i]] = index[i];
     }
diff --git a/tools/genspec.py b/tools/genspec.py
index b8e961b..bdca32c 100755
--- a/tools/genspec.py
+++ b/tools/genspec.py
@@ -1,5 +1,5 @@
 #!/usr/bin/env python3
-# Copyright (c) 2023, ARM Limited.
+# Copyright (c) 2023-2024, ARM Limited.
 # SPDX-License-Identifier: Apache-2.0
 import os
 
@@ -40,12 +40,17 @@
 
             # Type
             if arg.type == "tensor_t":
-                argtype = "T<{}>".format(arg.tensor_element_type)
+                argtype = f"T<{arg.tensor_element_type}>"
             elif arg.type == "tensor_list_t":
                 if arg.tensor_element_type == "-":
                     argtype = "tensor_list_t"
                 else:
-                    argtype = "tensor_list_t<T<{}>>".format(arg.tensor_element_type)
+                    argtype = f"tensor_list_t<T<{arg.tensor_element_type}>>"
+            elif arg.type == "shape_t":
+                if arg.shape != "-":
+                    argtype = f"shape_t<{arg.shape}>"
+                else:
+                    argtype = "shape_t<>"
             else:
                 argtype = arg.type
 
diff --git a/tosa.xml b/tosa.xml
index 613fdf9..ed5a25b 100644
--- a/tosa.xml
+++ b/tosa.xml
@@ -822,7 +822,6 @@
           <type name='in_out_t'/>
         </types>
         <typesupport mode="signed 32" in_out_t="i32_t"/>
-        <typesupport mode="shape" in_out_t="shape_t"/>
         <typesupport mode="fp16" in_out_t="fp16_t" >
           <profile name="MI"/>
           <profile name="MT"/>
@@ -957,7 +956,6 @@
           <type name='in_out_t'/>
         </types>
         <typesupport mode="signed 32" in_out_t="i32_t"/>
-        <typesupport mode="shape" in_out_t="shape_t"/>
       </operator>
       <operator>
         <name>LOGICAL_AND</name>
@@ -1169,7 +1167,6 @@
         <typesupport mode="signed 8" in_t="i8_t" out_t="i32_t"/>
         <typesupport mode="signed 16" in_t="i16_t" out_t="i32_t"/>
         <typesupport mode="signed 32" in_t="i32_t" out_t="i32_t"/>
-        <typesupport mode="shape" in_t="shape_t" out_t="shape_t"/>
         <typesupport mode="fp16" in_t="fp16_t" out_t="fp16_t">
           <profile name="MI"/>
           <profile name="MT"/>
@@ -1237,7 +1234,6 @@
           <type name='in_out_t'/>
         </types>
         <typesupport mode="signed 32" in_out_t="i32_t"/>
-        <typesupport mode="shape" in_out_t="shape_t"/>
         <typesupport mode="fp16" in_out_t="fp16_t" >
           <profile name="MI"/>
           <profile name="MT"/>
@@ -1936,7 +1932,6 @@
         <typesupport mode="signed 8" in_out_t="i8_t"/>
         <typesupport mode="signed 16" in_out_t="i16_t"/>
         <typesupport mode="signed 32" in_out_t="i32_t"/>
-        <typesupport mode="shape" in_out_t="shape_t"/>
         <typesupport mode="fp16" in_out_t="fp16_t">
           <profile name="MI"/>
           <profile name="MT"/>
@@ -1957,9 +1952,10 @@
             <description>Input tensor</description>
             <rank min="1" max="MAX_RANK"/>
           </argument>
-          <argument category="input" name="padding" type="tensor_t" shape="[rank(shape1),2]" tensor-element-type="shape_t">
-            <description>Number of pad elements at the start and end of each dimension</description>
-            <rank min="2" max="2"/>
+          <argument category="input" name="padding" type="shape_t" shape="-" tensor-element-type="-">
+            <description>Number of pad elements at the start and end of each dimension.
+            The values in padding are interpreted as start, end of each dimension.
+            As an example for rank 2, the values would be interpreted as [start_dim0, end_dim0, start_dim1, end_dim1].</description>
           </argument>
           <argument category="attribute" name="pad_const" type="tensor_t" shape="-" tensor-element-type="in_out_t">
             <description>Constant value to be used as padding</description>
@@ -2003,9 +1999,8 @@
             <description>Axis in range from 0 to rank(shape) - 1</description>
             <rank min="0" max="0"/>
           </argument>
-          <argument category="output" name="output" type="tensor_t" shape="-" tensor-element-type="shape_t" >
-            <description>Output rank 0 tensor giving the size of the shape for the given axis</description>
-            <rank min="0" max="0"/>
+          <argument category="output" name="output" type="shape_t" shape="-" tensor-element-type="-" >
+            <description>Output shape type of size 1 giving the size of the shape for the given axis</description>
           </argument>
         </arguments>
         <types>
@@ -2036,9 +2031,8 @@
             <levellimit value="rank(shape1)" limit="MAX_RANK"/>
             <rank min="1" max="MAX_RANK"/>
           </argument>
-          <argument category="input" name="shape" type="tensor_t" shape="[rank(shape)]" tensor-element-type="shape_t">
-            <description>1D shape tensor giving the new shape.</description>
-            <rank min="1" max="1"/>
+          <argument category="input" name="shape" type="shape_t" shape="-" tensor-element-type="-">
+            <description>shape_t giving the new shape.</description>
           </argument>
           <argument category="output" name="output" type="tensor_t" shape="shape" tensor-element-type="in_out_t">
             <description>Output tensor of same type, size as the input tensor</description>
@@ -2090,7 +2084,6 @@
         <typesupport mode="signed 8" in_out_t="i8_t"/>
         <typesupport mode="signed 16" in_out_t="i16_t"/>
         <typesupport mode="signed 32" in_out_t="i32_t"/>
-        <typesupport mode="shape" in_out_t="shape_t"/>
         <typesupport mode="fp16" in_out_t="fp16_t">
           <profile name="MI"/>
           <profile name="MT"/>
@@ -2111,11 +2104,11 @@
             <description>Input tensor</description>
             <rank min="1" max="MAX_RANK"/>
           </argument>
-          <argument category="attribute" name="start" type="tensor_t" shape="[rank(shape1)]" tensor-element-type="index_t">
+          <argument category="input" name="start" type="shape_t" shape="[rank(shape1)]" tensor-element-type="size_t">
             <description>List of integer coordinates, of length equal to the rank of input1. Start coordinate for slicing.</description>
             <rank min="1" max="1"/>
           </argument>
-          <argument category="attribute" name="size" type="tensor_t" shape="[rank(shape1)]" tensor-element-type="index_t">
+          <argument category="input" name="size" type="shape_t" shape="[rank(shape1)]" tensor-element-type="size_t">
             <description>List of integer size values, of length equal to the rank of input1. Size of the input to be
 used.</description>
             <levellimit value="rank(shape)" limit="MAX_RANK"/>
@@ -2153,7 +2146,7 @@
             <description>Input tensor</description>
             <rank min="1" max="MAX_RANK"/>
           </argument>
-          <argument category="input" name="multiples" type="tensor_t" shape="[rank(shape1)]" tensor-element-type="shape_t">
+          <argument category="input" name="multiples" type="shape_t" shape="[rank(shape1)]" tensor-element-type="-">
             <description>Number of times to replicate input1 in each dimension</description>
             <rank min="1" max="1"/>
           </argument>
@@ -2229,7 +2222,7 @@
             <description>3D value tensor</description>
             <rank min="3" max="3"/>
           </argument>
-          <argument category="input" name="indices" type="tensor_t" shape="[N,W]" tensor-element-type="index_t">
+          <argument category="input" name="indices" type="tensor_t" shape="[N,W]" tensor-element-type="size_t">
             <description>2D index tensor</description>
             <rank min="2" max="2"/>
           </argument>
@@ -2264,7 +2257,7 @@
             <description>3D values in tensor</description>
             <rank min="3" max="3"/>
           </argument>
-          <argument category="input" name="indices" type="tensor_t" shape="[N,W]" tensor-element-type="index_t">
+          <argument category="input" name="indices" type="tensor_t" shape="[N,W]" tensor-element-type="size_t">
             <description>2D index tensor</description>
             <rank min="2" max="2"/>
           </argument>
@@ -2305,17 +2298,17 @@
             <description>Input tensor</description>
             <rank min="4" max="4"/>
           </argument>
-          <argument category="input" name="scale" type="tensor_t" shape="[4]" tensor-element-type="shape_t">
+          <argument category="input" name="scale" type="shape_t" shape="[4]" tensor-element-type="-">
             <description>[scale_y_n, scale_y_d, scale_x_n, scale_x_d]</description>
             <levellimit value="scale_y_n/scale_y_d" limit="MAX_SCALE"/>
             <levellimit value="scale_x_n/scale_x_d" limit="MAX_SCALE"/>
             <rank min="1" max="1"/>
           </argument>
-          <argument category="input" name="offset" type="tensor_t" shape="[2]" tensor-element-type="shape_t">
+          <argument category="input" name="offset" type="shape_t" shape="[2]" tensor-element-type="-">
             <description>[offset_y, offset_x]</description>
             <rank min="1" max="1"/>
           </argument>
-          <argument category="input" name="border" type="tensor_t" shape="[2]" tensor-element-type="shape_t">
+          <argument category="input" name="border" type="shape_t" shape="[2]" tensor-element-type="-">
             <description>[border_y, border_x]</description>
             <rank min="1" max="1"/>
           </argument>
@@ -2559,7 +2552,6 @@
         <typesupport mode="16-bit" out_t="i16_t" />
         <typesupport mode="32-bit" out_t="i32_t" />
         <typesupport mode="48-bit" out_t="i48_t" />
-        <typesupport mode="shape" out_t="shape_t" />
         <typesupport mode="fp16" out_t="fp16_t" >
           <profile name="MI"/>
           <profile name="MT"/>
@@ -2680,7 +2672,7 @@
             <description>Globally unique identifier for the declared variable tensor.</description>
             <rank min="0" max="0"/>
           </argument>
-          <argument category="attribute" name="var_shape" type="tensor_t" shape="var_shape" tensor-element-type="index_t">
+          <argument category="attribute" name="var_shape" type="tensor_t" shape="var_shape" tensor-element-type="size_t">
             <description>The variable tensor shape</description>
             <rank min="1" max="1"/>
           </argument>
@@ -2724,6 +2716,86 @@
         </arguments>
       </operator>
     </operatorgroup>
+    <operatorgroup name="shape">
+      <operator>
+        <name>ADD_SHAPE</name>
+        <arguments>
+          <argument category="input" name="input1" type="shape_t" shape="-" tensor-element-type="-">
+            <description>Input 1</description>
+          </argument>
+          <argument category="input" name="input2" type="shape_t" shape="-" tensor-element-type="-">
+            <description>Input 2</description>
+          </argument>
+          <argument category="output" name="output" type="shape_t" shape="-" tensor-element-type="-">
+            <description>Output shape</description>
+          </argument>
+        </arguments>
+      </operator>
+      <operator>
+        <name>CONCAT_SHAPE</name>
+        <arguments>
+          <argument category="input" name="input1" type="shape_list_t" shape="-" tensor-element-type="-">
+            <description>List of input shape values</description>
+          </argument>
+          <argument category="output" name="output" type="shape_t" shape="-" tensor-element-type="-">
+            <description>Output shape</description>
+          </argument>
+        </arguments>
+      </operator>
+      <operator>
+        <name>CONST_SHAPE</name>
+        <arguments>
+          <argument category="attribute" name="values" type="shape_t" shape="-" tensor-element-type="-">
+            <description>Constant shape</description>
+          </argument>
+          <argument category="output" name="output" type="shape_t" shape="-" tensor-element-type="-">
+            <description>Output shape</description>
+          </argument>
+        </arguments>
+      </operator>
+      <operator>
+        <name>DIV_SHAPE</name>
+        <arguments>
+          <argument category="input" name="input1" type="shape_t" shape="-" tensor-element-type="-">
+            <description>Input 1</description>
+          </argument>
+          <argument category="input" name="input2" type="shape_t" shape="-" tensor-element-type="-">
+            <description>Input 2</description>
+          </argument>
+          <argument category="output" name="output" type="shape_t" shape="-" tensor-element-type="-">
+            <description>Output shape</description>
+          </argument>
+        </arguments>
+      </operator>
+      <operator>
+        <name>MUL_SHAPE</name>
+        <arguments>
+          <argument category="input" name="input1" type="shape_t" shape="-" tensor-element-type="-">
+            <description>Input 1</description>
+          </argument>
+          <argument category="input" name="input2" type="shape_t" shape="-" tensor-element-type="-">
+            <description>Input 2</description>
+          </argument>
+          <argument category="output" name="output" type="shape_t" shape="-" tensor-element-type="-">
+            <description>Output shape</description>
+          </argument>
+        </arguments>
+      </operator>
+      <operator>
+        <name>SUB_SHAPE</name>
+        <arguments>
+          <argument category="input" name="input1" type="shape_t" shape="-" tensor-element-type="-">
+            <description>Input 1</description>
+          </argument>
+          <argument category="input" name="input2" type="shape_t" shape="-" tensor-element-type="-">
+            <description>Input 2</description>
+          </argument>
+          <argument category="output" name="output" type="shape_t" shape="-" tensor-element-type="-">
+            <description>Output shape</description>
+          </argument>
+        </arguments>
+      </operator>
+    </operatorgroup>
   </operators>
 
   <enum name="resize_mode_t" description="Valid resize types">
diff --git a/tosa.xsd b/tosa.xsd
index c4af523..a52c1a7 100644
--- a/tosa.xsd
+++ b/tosa.xsd
@@ -41,7 +41,7 @@
     <xs:enumeration value="fp16_t"/>
     <xs:enumeration value="bf16_t"/>
     <xs:enumeration value="fp32_t"/>
-    <xs:enumeration value="shape_t"/>
+    <xs:enumeration value="size_t"/>
   </xs:restriction>
 </xs:simpleType>
 
@@ -61,7 +61,6 @@
     <xs:enumeration value="weight_t"/>
     <xs:enumeration value="resize_t"/>
     <xs:enumeration value="table_t"/>
-    <xs:enumeration value="index_t"/>
     <xs:enumeration value="mul_t"/>
     <xs:enumeration value="TABLE_SIZE"/>
     <xs:enumeration value="var_t"/>
@@ -83,6 +82,8 @@
     <xs:enumeration value="tensor_list_t"/>
     <xs:enumeration value="tosa_graph_t"/>
     <xs:enumeration value="String"/>
+    <xs:enumeration value="shape_t"/>
+    <xs:enumeration value="shape_list_t"/>
   </xs:restriction>
 </xs:simpleType>