blob: 4173aab6a6c24bfb15b45f24b075ab666b0dc2bc [file] [log] [blame]
//
// This confidential and proprietary software may be used only as
// authorised by a licensing agreement from ARM Limited
// (C) COPYRIGHT 2020-2021 ARM Limited
// ALL RIGHTS RESERVED
// The entire notice above must be reproduced on all authorised
// copies and copies may only be made to the extent permitted
// by a licensing agreement from ARM Limited.
=== Elementwise Binary Operators
==== ADD
Elementwise addition of input1 and input2.
Axis of size 1 will be broadcast, as necessary. Rank of input tensors must match.
*Arguments:*
|===
|Argument|Type|Name|Shape|Description
|Input|in_t*|input1|shape1|Input tensor
|Input|in_t*|input2|shape2|Input tensor with the same rank as input1
|Output|in_t*|output|shape|Output tensor with broadcast shape if necessary
|===
*Operation Function:*
[source,c++]
----
for_each(index in shape) {
index1 = apply_broadcast(shape, shape1, index);
index2 = apply_broadcast(shape, shape2, index);
in_t value1 = tensor_read<in_t>(input1, shape1, index1);
in_t value2 = tensor_read<in_t>(input2, shape2, index2);
in_t result = apply_add<in_t>(value1, value2);
tensor_write<in_t>(output, shape, index, result);
----
*Supported Data Types:*
|===
|Profile|Mode|in_t
|Any|signed 32|int32_t
|MI, MT|floating-point|float_t
|===
==== ARITHMETIC_RIGHT_SHIFT
Elementwise arithmetic right shift of input1 by the amount specified in input2.
Axis of size 1 will be broadcast, as necessary. Rank of input tensors must match.
*Arguments:*
|===
|Argument|Type|Name|Shape|Description
|Input|in_t*|input1|shape1|Input tensor
|Input|in_t*|input2|shape2|Input tensor with the same rank as input1
|Input|bool_t|round|-|If true then the shift is rounded
|Output|in_t*|output|shape|Output tensor with broadcast shape if necessary
|===
*Operation Function:*
[source,c++]
----
for_each(index in shape) {
index1 = apply_broadcast(shape, shape1, index);
index2 = apply_broadcast(shape, shape2, index);
in_t value1 = tensor_read<in_t>(input1, shape1, index1);
in_t value2 = tensor_read<in_t>(input2, shape2, index2);
// Ensure that shift amount is appropriate for the data type
REQUIRE((in_t == int32_t && 0 <= value2 && value2 <= 31) ||
(in_t == int16_t && 0 <= value2 && value2 <= 15) ||
(in_t == int8_t && 0 <= value2 && value2 <= 7));
in_t result = value1 >> value2;
if (round == true && value2 > 0 && (value1 >> (value2 - 1)) & 1 != 0) {
result = result + 1;
}
result = apply_clip<in_t>(result, minimum<in_t>, maximum<in_t>);
tensor_write<in_t>(output, shape, index, result);
}
----
*Supported Data Types:*
|===
|Profile|Mode|in_t
|Any|signed 8|int8_t
|Any|signed 16|int16_t
|Any|signed 32|int32_t
|===
==== BITWISE_AND
Elementwise bitwise AND of input1 and input2.
Axis of size 1 will be broadcast as necessary. Rank of input tensors must match.
*Arguments:*
|===
|Argument|Type|Name|Shape|Description
|Input|in_t*|input1|shape1|Input tensor
|Input|in_t*|input2|shape2|Input tensor with the same rank as input1
|Output|in_t*|output|shape|Output tensor of same type as the input tensors, with broadcast shape if necessary
|===
*Operation Function:*
[source,c++]
----
for_each(index in shape) {
index1 = apply_broadcast(shape, shape1, index);
index2 = apply_broadcast(shape, shape2, index);
in_t value1 = tensor_read<in_t>(input1, shape1, index1);
in_t value2 = tensor_read<in_t>(input2, shape2, index2);
in_t result = value1 & value2;
tensor_write<in_t>(output, shape, index, result);
}
----
*Supported Data Types:*
|===
|Profile|Mode|in_t
|Any|signed 8|int8_t
|Any|signed 16|int16_t
|Any|signed 32|int32_t
|===
==== BITWISE_OR
Elementwise bitwise OR of input1 and input2.
Axis of size 1 will be broadcast as necessary. Rank of input tensors must match.
*Arguments:*
|===
|Argument|Type|Name|Shape|Description
|Input|in_t*|input1|shape1|Input tensor
|Input|in_t*|input2|shape2|Input tensor with the same rank as input1
|Output|in_t*|output|shape|Output tensor with broadcast shape if necessary
|===
*Operation Function:*
[source,c++]
----
for_each(index in shape) {
index1 = apply_broadcast(shape, shape1, index);
index2 = apply_broadcast(shape, shape2, index);
in_t value1 = tensor_read<in_t>(input1, shape1, index1);
in_t value2 = tensor_read<in_t>(input2, shape2, index2);
in_t result = value1 | value2;
tensor_write<in_t>(output, shape, index, result);
}
----
*Supported Data Types:*
|===
|Profile|Mode|in_t
|Any|signed 8|int8_t
|Any|signed 16|int16_t
|Any|signed 32|int32_t
|===
==== BITWISE_XOR
Elementwise bitwise XOR of input1 and input2.
Axis of size 1 will be broadcast as necessary. Rank of input tensors must match.
*Arguments:*
|===
|Argument|Type|Name|Shape|Description
|Input|in_t*|input1|shape1|Input tensor
|Input|in_t*|input2|shape2|Input tensor with the same rank as input1
|Output|in_t*|output|shape|Output tensor with broadcast shape if necessary
|===
*Operation Function:*
[source,c++]
----
for_each(index in shape) {
index1 = apply_broadcast(shape, shape1, index);
index2 = apply_broadcast(shape, shape2, index);
in_t value1 = tensor_read<in_t>(input1, shape1, index1);
in_t value2 = tensor_read<in_t>(input2, shape2, index2);
in_t result = value1 ^ value2;
tensor_write<in_t>(output, shape, index, result);
}
----
*Supported Data Types:*
|===
|Profile|Mode|in_t
|Any|signed 8|int8_t
|Any|signed 16|int16_t
|Any|signed 32|int32_t
|===
==== INTDIV
Elementwise integer divide of input1 by input2.
The result of the divide is truncated towards zero.
Expected use is for operations on non-scaled integers.
Floating point divide should use RECIPROCAL and MUL.
Quantized integer divide should use TABLE (for 1/x) and MUL.
*Arguments:*
|===
|Argument|Type|Name|Shape|Description
|Input|in_t*|input1|shape1|Input tensor
|Input|in_t*|input2|shape2|Input tensor with the same rank as input1
|Output|in_t*|output|shape|Output tensor with broadcast shape if necessary
|===
*Operation Function:*
[source,c++]
----
for_each(index in shape) {
index1 = apply_broadcast(shape, shape1, index);
index2 = apply_broadcast(shape, shape2, index);
in_t value1 = tensor_read<in_t>(input1, shape1, index1);
in_t value2 = tensor_read<in_t>(input2, shape2, index2);
REQUIRE(value2 != 0);
// This catches the case where we divide minimum<in_t> by -1
// which is not representable in two's complement
REQUIRE((int64_t)value1 / value2 <= maximum<in_t>);
in_t result = value1 / value2;
tensor_write<in_t>(output, shape, index, result);
}
----
*Supported Data Types:*
|===
|Profile|Mode|in_t
|Any|signed 32|int32_t
|===
==== LOGICAL_AND
Elementwise logical AND of input1 and input2.
Axis of size 1 will be broadcast, as necessary. Rank of input tensors must match.
*Arguments:*
|===
|Argument|Type|Name|Shape|Description
|Input|in_t*|input1|shape1|Input tensor
|Input|in_t*|input2|shape2|Input tensor with the same rank as input1
|Output|in_t*|output|shape|Output tensor with broadcast shape if necessary
|===
*Operation Function:*
[source,c++]
----
for_each(index in shape) {
index1 = apply_broadcast(shape, shape1, index);
index2 = apply_broadcast(shape, shape2, index);
in_t value1 = tensor_read<in_t>(input1, shape1, index1);
in_t value2 = tensor_read<in_t>(input2, shape2, index2);
in_t result = value1 && value2;
tensor_write<in_t>(output, shape, index, result);
}
----
*Supported Data Types:*
|===
|Profile|Mode|in_t
|Any|Bool|bool_t
|===
==== LOGICAL_LEFT_SHIFT
Elementwise left shift of input1 and input2.
Axis of size 1 will be broadcast, as necessary. Rank of input tensors must match.
*Arguments:*
|===
|Argument|Type|Name|Shape|Description
|Input|in_t*|input1|shape1|Input tensor
|Input|in_t*|input2|shape2|Input tensor with the same rank as input1
|Output|in_t*|output|shape|Output tensor with broadcast shape if necessary
|===
*Operation Function:*
[source,c++]
----
for_each(index in shape) {
index1 = apply_broadcast(shape, shape1, index);
index2 = apply_broadcast(shape, shape2, index);
in_t value1 = tensor_read<in_t>(input1, shape1, index1);
in_t value2 = tensor_read<in_t>(input2, shape2, index2);
REQUIRE(0 <= value2 && value2 <= 31);
in_t result = value1 << value2;
tensor_write<in_t>(output, shape, index, result);
}
----
*Supported Data Types:*
|===
|Profile|Mode|in_t
|Any|signed 8|int8_t
|Any|signed 16|int16_t
|Any|signed 32|int32_t
|===
==== LOGICAL_RIGHT_SHIFT
Elementwise logical right shift of input1 by the amount specified in input2.
Axis of size 1 will be broadcast, as necessary. Rank of input tensors must match.
*Arguments:*
|===
|Argument|Type|Name|Shape|Description
|Input|in_t*|input1|shape1|Input tensor
|Input|in_t*|input2|shape2|Input tensor with the same rank as input1
|Output|in_t*|output|shape|Output tensor with broadcast shape if necessary
|===
*Operation Function:*
[source,c++]
----
for_each(index in shape) {
index1 = apply_broadcast(shape, shape1, index);
index2 = apply_broadcast(shape, shape2, index);
in_t value1 = tensor_read<in_t>(input1, shape1, index1);
in_t value2 = tensor_read<in_t>(input2, shape2, index2);
REQUIRE(0 <= value2 && value2 <= 31);
in_t result = (in_t)((unsigned in_t)value1 >> value2);
tensor_write<in_t>(output, shape, index, result);
}
----
*Supported Data Types:*
|===
|Profile|Mode|in_t
|Any|signed 8|int8_t
|Any|signed 16|int16_t
|Any|signed 32|int32_t
|===
==== LOGICAL_OR
Elementwise logical OR of input1 and input2.
Axis of size 1 will be broadcast as necessary. Rank of input tensors must match.
*Arguments:*
|===
|Argument|Type|Name|Shape|Description
|Input|in_t*|input1|shape1|Input tensor
|Input|in_t*|input2|shape2|Input tensor with the same rank as input1
|Output|in_t*|output|shape|Output tensor with broadcast shape if necessary
|===
*Operation Function:*
[source,c++]
----
for_each(index in shape) {
index1 = apply_broadcast(shape, shape1, index);
index2 = apply_broadcast(shape, shape2, index);
in_t value1 = tensor_read<in_t>(input1, shape1, index1);
in_t value2 = tensor_read<in_t>(input2, shape2, index2);
in_t result = value1 || value2;
tensor_write<in_t>(output, shape, index, result);
}
----
*Supported Data Types:*
|===
|Profile|Mode|in_t
|Any|Bool|bool_t
|===
==== LOGICAL_XOR
Elementwise logical XOR of input1 and input2.
Axis of size 1 will be broadcast as necessary. Rank of input tensors must match.
*Arguments:*
|===
|Argument|Type|Name|Shape|Description
|Input|in_t*|input1|shape1|Input tensor
|Input|in_t*|input2|shape2|Input tensor with the same rank as input1
|Output|in_t*|output|shape|Output tensor of same type as the input tensors, with broadcast shape if necessary
|===
*Operation Function:*
[source,c++]
----
for_each(index in shape) {
index1 = apply_broadcast(shape, shape1, index);
index2 = apply_broadcast(shape, shape2, index);
in_t value1 = tensor_read<in_t>(input1, shape1, index1);
in_t value2 = tensor_read<in_t>(input2, shape2, index2);
in_t result = value1 != value2;
tensor_write<in_t>(output, shape, index, result);
}
----
*Supported Data Types:*
|===
|Profile|Mode|in_t
|Any|Bool|bool_t
|===
==== MAXIMUM
Elementwise max of input1 and input2.
Axis of size 1 will be broadcast, as necessary. Rank of input tensors must match.
*Arguments:*
|===
|Argument|Type|Name|Shape|Description
|Input|in_t*|input1|shape1|Input tensor
|Input|in_t*|input2|shape2|Input tensor with the same rank as input1
|Output|in_t*|output|shape|Output tensor with broadcast shape if necessary
|===
*Operation Function:*
[source,c++]
----
for_each(index in shape) {
index1 = apply_broadcast(shape, shape1, index);
index2 = apply_broadcast(shape, shape2, index);
in_t value1 = tensor_read<in_t>(input1, shape1, index1);
in_t value2 = tensor_read<in_t>(input2, shape2, index2);
in_t result = apply_max(value1, value2);
tensor_write<in_t>(output, shape, index, result);
}
----
*Supported Data Types:*
|===
|Profile|Mode|in_t
|Any|signed 32|int32_t
|MI, MT|floating-point|float_t
|===
==== MINIMUM
Elementwise minimum of input1 and input2.
Axis of size 1 will be broadcast, as necessary. Rank of input tensors must match.
*Arguments:*
|===
|Argument|Type|Name|Shape|Description
|Input|in_t*|input1|shape1|Input tensor
|Input|in_t*|input2|shape2|Input tensor with the same rank as input1
|Output|in_t*|output|shape|Output tensor with broadcast shape if necessary
|===
*Operation Function:*
[source,c++]
----
for_each(index in shape) {
index1 = apply_broadcast(shape, shape1, index);
index2 = apply_broadcast(shape, shape2, index);
in_t value1 = tensor_read<in_t>(input1, shape1, index1);
in_t value2 = tensor_read<in_t>(input2, shape2, index2);
in_t result = apply_min(value1, value2);
tensor_write<in_t>(output, shape, index, result);
}
----
*Supported Data Types:*
|===
|Profile|Mode|in_t
|Any|signed 32|int32_t
|MI, MT|floating-point|float_t
|===
==== MUL
Elementwise multiplication (Hadamard product) of input1 and input2.
Axis of size 1 will be broadcast, as necessary. Rank of input tensors must match.
*Arguments:*
|===
|Argument|Type|Name|Shape|Description
|Input|in_t*|input1|shape1|Input tensor
|Input|in_t*|input2|shape2|Input tensor with the same rank as input1
|Input (MT profile) Attribute (BI/MI profiles)|uint6_t|shift|-|Result right shift (int32_t data type only)
|Output|out_t*|output|shape|Output tensor with broadcast shape if necessary
|===
*Operation Function:*
[source,c++]
----
for_each(index in shape) {
index1 = apply_broadcast(shape, shape1, index);
index2 = apply_broadcast(shape, shape2, index);
in_t value1 = tensor_read<in_t>(input1, shape1, index1);
in_t value2 = tensor_read<in_t>(input2, shape2, index2);
out_t result;
if (in_t == int32_t && shift > 0) {
result = apply_scale_32(value1, value2, shift);
} else {
result = value1 * value2; // low 32-bits of result for int32_t
}
tensor_write<out_t>(output, shape, index, result);
}
----
*Supported Data Types:*
|===
|Profile|Mode|in_t|out_t
|Any|signed 8|int8_t|int32_t
|Any|signed 16|int16_t|int32_t
|Any|signed 32|int32_t|int32_t
|MI, MT|floating-point|float_t|float_t
|===
==== POW
Elementwise input1 value raised to the power of input2.
Axis of size 1 will be broadcast, as necessary. Rank of input tensors must match.
*Arguments:*
|===
|Argument|Type|Name|Shape|Description
|Input|in_t*|input1|shape1|Input tensor from 1 to 4 dims
|Input|in_t*|input2|shape2|Input tensor with the same rank as input1
|Output|in_t*|output|shape|Output tensor of same type as the input tensors, with broadcast shape if necessary
|===
*Operation Function:*
[source,c++]
----
for_each(index in shape) {
index1 = apply_broadcast(shape, shape1, index);
index2 = apply_broadcast(shape, shape2, index);
in_t value1 = tensor_read<in_t>(input1, shape1, index1);
in_t value2 = tensor_read<in_t>(input2, shape2, index2);
in_t result = apply_pow<in_t>(value1, value2);
tensor_write<in_t>(output, shape, index, result);
}
----
*Supported Data Types:*
|===
|Profile|Mode|in_t
|MI, MT|floating-point|float_t
|===
==== SUB
Elementwise subtraction of input1 and input2.
Axis of size 1 will be broadcast as necessary. Rank of input tensors must match.
*Arguments:*
|===
|Argument|Type|Name|Shape|Description
|Input|in_t*|input1|shape1|Input tensor
|Input|in_t*|input2|shape2|Input tensor with the same rank as input1
|Output|in_t*|output|shape|Output tensor with broadcast shape if necessary
|===
*Operation Function:*
[source,c++]
----
for_each(index in shape) {
index1 = apply_broadcast(shape, shape1, index);
index2 = apply_broadcast(shape, shape2, index);
in_t value1 = tensor_read<in_t>(input1, shape1, index1);
in_t value2 = tensor_read<in_t>(input2, shape2, index2);
in_t result = apply_sub<in_t>(value1, value2);
tensor_write<in_t>(output, shape, index, result);
}
----
*Supported Data Types:*
|===
|Profile|Mode|in_t
|Any|signed 32|int32_t
|MI, MT|floating-point|float_t
|===
==== TABLE
Table lookup operation.
For int8_t TABLE operation, perform a 256 entry table lookup returning an int8_t value.
For int16_t tables, the int16_t input is treated as a fixed-point 9.7 value.
The most significant 9 bits are used to index into the table.
The fractional 7 bits are used to interpolate based on table[index] and table[index+1].
For int16_t inputs, the TABLE operator returns a 16.7 interpolated value in an int32_t.
This value can then be input to the RESCALE operator to scale to the required output data type.
Note that int16_t table has 513 values to handle table[index+1] when index=511.
An int16_t to int16_t table lookup can be constructed in TOSA as follows:
* Use the TABLE operator to produce a fixed point 16.7 interpolated result
* Use RESCALE (in_t=int32_t, out_t=int16_t, scale=1<<14, shift=21) to scale the output to int16_t range (or alternate scale as required)
*Arguments:*
|===
|Argument|Type|Name|Shape|Description
|Input|in_t*|Input|shape|Input tensor
|Input (MT profile) Attribute (BI/MI profiles)|table_t*|table|[TABLE_SIZE]|Lookup table tensor
|Output|out_t*|output|shape|Output tensor
|===
*Operation Function:*
[source,c++]
----
REQUIRE(length(table) == TABLE_SIZE);
for_each(index in shape) {
in_t value = tensor_read<in_t>(input, shape, index);
out_t result;
if (in_t == int8_t) {
// value is a signed int, convert to a 0 based index
result = table[value + 128];
} else {
result = apply_lookup(table, value);
}
tensor_write<out_t>(output, shape, index, result);
}
----
*Supported Data Types:*
|===
|Profile|Mode|in_t|table_t|TABLE_SIZE|out_t
|Any|signed 8|int8_t|int8_t|256|int8_t
|Any|signed 16|int16_t|int16_t|513|int32_t
|===