| // |
| // This confidential and proprietary software may be used only as |
| // authorised by a licensing agreement from ARM Limited |
| // (C) COPYRIGHT 2020-2021 ARM Limited |
| // ALL RIGHTS RESERVED |
| // The entire notice above must be reproduced on all authorised |
| // copies and copies may only be made to the extent permitted |
| // by a licensing agreement from ARM Limited. |
| |
| === Elementwise Binary Operators |
| |
| ==== ADD |
| |
| Elementwise addition of input1 and input2. |
| Axis of size 1 will be broadcast, as necessary. Rank of input tensors must match. |
| |
| *Arguments:* |
| |
| |=== |
| |Argument|Type|Name|Shape|Description |
| |
| |Input|in_t*|input1|shape1|Input tensor |
| |Input|in_t*|input2|shape2|Input tensor with the same rank as input1 |
| |Output|in_t*|output|shape|Output tensor with broadcast shape if necessary |
| |=== |
| |
| *Operation Function:* |
| |
| [source,c++] |
| ---- |
| for_each(index in shape) { |
| index1 = apply_broadcast(shape, shape1, index); |
| index2 = apply_broadcast(shape, shape2, index); |
| in_t value1 = tensor_read<in_t>(input1, shape1, index1); |
| in_t value2 = tensor_read<in_t>(input2, shape2, index2); |
| in_t result = apply_add<in_t>(value1, value2); |
| tensor_write<in_t>(output, shape, index, result); |
| ---- |
| |
| *Supported Data Types:* |
| |
| |=== |
| |Profile|Mode|in_t |
| |
| |Any|signed 32|int32_t |
| |MI, MT|floating-point|float_t |
| |=== |
| |
| ==== ARITHMETIC_RIGHT_SHIFT |
| |
| Elementwise arithmetic right shift of input1 by the amount specified in input2. |
| Axis of size 1 will be broadcast, as necessary. Rank of input tensors must match. |
| |
| *Arguments:* |
| |
| |=== |
| |Argument|Type|Name|Shape|Description |
| |
| |Input|in_t*|input1|shape1|Input tensor |
| |Input|in_t*|input2|shape2|Input tensor with the same rank as input1 |
| |Input|bool_t|round|-|If true then the shift is rounded |
| |Output|in_t*|output|shape|Output tensor with broadcast shape if necessary |
| |=== |
| |
| *Operation Function:* |
| |
| [source,c++] |
| ---- |
| for_each(index in shape) { |
| index1 = apply_broadcast(shape, shape1, index); |
| index2 = apply_broadcast(shape, shape2, index); |
| in_t value1 = tensor_read<in_t>(input1, shape1, index1); |
| in_t value2 = tensor_read<in_t>(input2, shape2, index2); |
| |
| // Ensure that shift amount is appropriate for the data type |
| REQUIRE((in_t == int32_t && 0 <= value2 && value2 <= 31) || |
| (in_t == int16_t && 0 <= value2 && value2 <= 15) || |
| (in_t == int8_t && 0 <= value2 && value2 <= 7)); |
| |
| in_t result = value1 >> value2; |
| if (round == true && value2 > 0 && (value1 >> (value2 - 1)) & 1 != 0) { |
| result = result + 1; |
| } |
| result = apply_clip<in_t>(result, minimum<in_t>, maximum<in_t>); |
| tensor_write<in_t>(output, shape, index, result); |
| } |
| ---- |
| |
| *Supported Data Types:* |
| |
| |=== |
| |Profile|Mode|in_t |
| |
| |Any|signed 8|int8_t |
| |Any|signed 16|int16_t |
| |Any|signed 32|int32_t |
| |=== |
| |
| ==== BITWISE_AND |
| |
| Elementwise bitwise AND of input1 and input2. |
| Axis of size 1 will be broadcast as necessary. Rank of input tensors must match. |
| |
| *Arguments:* |
| |
| |=== |
| |Argument|Type|Name|Shape|Description |
| |
| |Input|in_t*|input1|shape1|Input tensor |
| |Input|in_t*|input2|shape2|Input tensor with the same rank as input1 |
| |Output|in_t*|output|shape|Output tensor of same type as the input tensors, with broadcast shape if necessary |
| |=== |
| |
| *Operation Function:* |
| |
| [source,c++] |
| ---- |
| for_each(index in shape) { |
| index1 = apply_broadcast(shape, shape1, index); |
| index2 = apply_broadcast(shape, shape2, index); |
| in_t value1 = tensor_read<in_t>(input1, shape1, index1); |
| in_t value2 = tensor_read<in_t>(input2, shape2, index2); |
| in_t result = value1 & value2; |
| tensor_write<in_t>(output, shape, index, result); |
| } |
| ---- |
| |
| *Supported Data Types:* |
| |
| |=== |
| |Profile|Mode|in_t |
| |
| |Any|signed 8|int8_t |
| |Any|signed 16|int16_t |
| |Any|signed 32|int32_t |
| |=== |
| |
| ==== BITWISE_OR |
| |
| Elementwise bitwise OR of input1 and input2. |
| Axis of size 1 will be broadcast as necessary. Rank of input tensors must match. |
| |
| *Arguments:* |
| |
| |=== |
| |Argument|Type|Name|Shape|Description |
| |
| |Input|in_t*|input1|shape1|Input tensor |
| |Input|in_t*|input2|shape2|Input tensor with the same rank as input1 |
| |Output|in_t*|output|shape|Output tensor with broadcast shape if necessary |
| |=== |
| |
| *Operation Function:* |
| |
| [source,c++] |
| ---- |
| for_each(index in shape) { |
| index1 = apply_broadcast(shape, shape1, index); |
| index2 = apply_broadcast(shape, shape2, index); |
| in_t value1 = tensor_read<in_t>(input1, shape1, index1); |
| in_t value2 = tensor_read<in_t>(input2, shape2, index2); |
| in_t result = value1 | value2; |
| tensor_write<in_t>(output, shape, index, result); |
| } |
| ---- |
| |
| *Supported Data Types:* |
| |
| |=== |
| |Profile|Mode|in_t |
| |
| |Any|signed 8|int8_t |
| |Any|signed 16|int16_t |
| |Any|signed 32|int32_t |
| |=== |
| |
| ==== BITWISE_XOR |
| |
| Elementwise bitwise XOR of input1 and input2. |
| Axis of size 1 will be broadcast as necessary. Rank of input tensors must match. |
| |
| *Arguments:* |
| |
| |=== |
| |Argument|Type|Name|Shape|Description |
| |
| |Input|in_t*|input1|shape1|Input tensor |
| |Input|in_t*|input2|shape2|Input tensor with the same rank as input1 |
| |Output|in_t*|output|shape|Output tensor with broadcast shape if necessary |
| |=== |
| |
| *Operation Function:* |
| |
| [source,c++] |
| ---- |
| for_each(index in shape) { |
| index1 = apply_broadcast(shape, shape1, index); |
| index2 = apply_broadcast(shape, shape2, index); |
| in_t value1 = tensor_read<in_t>(input1, shape1, index1); |
| in_t value2 = tensor_read<in_t>(input2, shape2, index2); |
| in_t result = value1 ^ value2; |
| tensor_write<in_t>(output, shape, index, result); |
| } |
| ---- |
| |
| *Supported Data Types:* |
| |
| |=== |
| |Profile|Mode|in_t |
| |
| |Any|signed 8|int8_t |
| |Any|signed 16|int16_t |
| |Any|signed 32|int32_t |
| |=== |
| |
| ==== INTDIV |
| |
| Elementwise integer divide of input1 by input2. |
| The result of the divide is truncated towards zero. |
| Expected use is for operations on non-scaled integers. |
| Floating point divide should use RECIPROCAL and MUL. |
| Quantized integer divide should use TABLE (for 1/x) and MUL. |
| |
| *Arguments:* |
| |
| |=== |
| |Argument|Type|Name|Shape|Description |
| |
| |Input|in_t*|input1|shape1|Input tensor |
| |Input|in_t*|input2|shape2|Input tensor with the same rank as input1 |
| |Output|in_t*|output|shape|Output tensor with broadcast shape if necessary |
| |=== |
| |
| *Operation Function:* |
| |
| [source,c++] |
| ---- |
| for_each(index in shape) { |
| index1 = apply_broadcast(shape, shape1, index); |
| index2 = apply_broadcast(shape, shape2, index); |
| in_t value1 = tensor_read<in_t>(input1, shape1, index1); |
| in_t value2 = tensor_read<in_t>(input2, shape2, index2); |
| REQUIRE(value2 != 0); |
| // This catches the case where we divide minimum<in_t> by -1 |
| // which is not representable in two's complement |
| REQUIRE((int64_t)value1 / value2 <= maximum<in_t>); |
| in_t result = value1 / value2; |
| tensor_write<in_t>(output, shape, index, result); |
| } |
| ---- |
| |
| *Supported Data Types:* |
| |=== |
| |Profile|Mode|in_t |
| |
| |Any|signed 32|int32_t |
| |=== |
| |
| ==== LOGICAL_AND |
| |
| Elementwise logical AND of input1 and input2. |
| Axis of size 1 will be broadcast, as necessary. Rank of input tensors must match. |
| |
| *Arguments:* |
| |
| |=== |
| |Argument|Type|Name|Shape|Description |
| |
| |Input|in_t*|input1|shape1|Input tensor |
| |Input|in_t*|input2|shape2|Input tensor with the same rank as input1 |
| |Output|in_t*|output|shape|Output tensor with broadcast shape if necessary |
| |=== |
| |
| *Operation Function:* |
| |
| [source,c++] |
| ---- |
| for_each(index in shape) { |
| index1 = apply_broadcast(shape, shape1, index); |
| index2 = apply_broadcast(shape, shape2, index); |
| in_t value1 = tensor_read<in_t>(input1, shape1, index1); |
| in_t value2 = tensor_read<in_t>(input2, shape2, index2); |
| in_t result = value1 && value2; |
| tensor_write<in_t>(output, shape, index, result); |
| } |
| ---- |
| |
| *Supported Data Types:* |
| |
| |=== |
| |Profile|Mode|in_t |
| |
| |Any|Bool|bool_t |
| |=== |
| |
| ==== LOGICAL_LEFT_SHIFT |
| |
| Elementwise left shift of input1 and input2. |
| Axis of size 1 will be broadcast, as necessary. Rank of input tensors must match. |
| |
| *Arguments:* |
| |
| |=== |
| |Argument|Type|Name|Shape|Description |
| |
| |Input|in_t*|input1|shape1|Input tensor |
| |Input|in_t*|input2|shape2|Input tensor with the same rank as input1 |
| |Output|in_t*|output|shape|Output tensor with broadcast shape if necessary |
| |=== |
| |
| *Operation Function:* |
| |
| [source,c++] |
| ---- |
| for_each(index in shape) { |
| index1 = apply_broadcast(shape, shape1, index); |
| index2 = apply_broadcast(shape, shape2, index); |
| in_t value1 = tensor_read<in_t>(input1, shape1, index1); |
| in_t value2 = tensor_read<in_t>(input2, shape2, index2); |
| REQUIRE(0 <= value2 && value2 <= 31); |
| in_t result = value1 << value2; |
| tensor_write<in_t>(output, shape, index, result); |
| } |
| ---- |
| |
| *Supported Data Types:* |
| |
| |=== |
| |Profile|Mode|in_t |
| |
| |Any|signed 8|int8_t |
| |Any|signed 16|int16_t |
| |Any|signed 32|int32_t |
| |=== |
| |
| ==== LOGICAL_RIGHT_SHIFT |
| |
| Elementwise logical right shift of input1 by the amount specified in input2. |
| Axis of size 1 will be broadcast, as necessary. Rank of input tensors must match. |
| |
| *Arguments:* |
| |
| |=== |
| |Argument|Type|Name|Shape|Description |
| |
| |Input|in_t*|input1|shape1|Input tensor |
| |Input|in_t*|input2|shape2|Input tensor with the same rank as input1 |
| |Output|in_t*|output|shape|Output tensor with broadcast shape if necessary |
| |=== |
| |
| *Operation Function:* |
| |
| [source,c++] |
| ---- |
| for_each(index in shape) { |
| index1 = apply_broadcast(shape, shape1, index); |
| index2 = apply_broadcast(shape, shape2, index); |
| in_t value1 = tensor_read<in_t>(input1, shape1, index1); |
| in_t value2 = tensor_read<in_t>(input2, shape2, index2); |
| REQUIRE(0 <= value2 && value2 <= 31); |
| in_t result = (in_t)((unsigned in_t)value1 >> value2); |
| tensor_write<in_t>(output, shape, index, result); |
| } |
| ---- |
| |
| *Supported Data Types:* |
| |
| |=== |
| |Profile|Mode|in_t |
| |
| |Any|signed 8|int8_t |
| |Any|signed 16|int16_t |
| |Any|signed 32|int32_t |
| |=== |
| |
| ==== LOGICAL_OR |
| |
| Elementwise logical OR of input1 and input2. |
| Axis of size 1 will be broadcast as necessary. Rank of input tensors must match. |
| |
| *Arguments:* |
| |
| |=== |
| |Argument|Type|Name|Shape|Description |
| |
| |Input|in_t*|input1|shape1|Input tensor |
| |Input|in_t*|input2|shape2|Input tensor with the same rank as input1 |
| |Output|in_t*|output|shape|Output tensor with broadcast shape if necessary |
| |=== |
| |
| *Operation Function:* |
| |
| [source,c++] |
| ---- |
| for_each(index in shape) { |
| index1 = apply_broadcast(shape, shape1, index); |
| index2 = apply_broadcast(shape, shape2, index); |
| in_t value1 = tensor_read<in_t>(input1, shape1, index1); |
| in_t value2 = tensor_read<in_t>(input2, shape2, index2); |
| in_t result = value1 || value2; |
| tensor_write<in_t>(output, shape, index, result); |
| } |
| ---- |
| |
| *Supported Data Types:* |
| |
| |=== |
| |Profile|Mode|in_t |
| |
| |Any|Bool|bool_t |
| |=== |
| |
| ==== LOGICAL_XOR |
| |
| Elementwise logical XOR of input1 and input2. |
| Axis of size 1 will be broadcast as necessary. Rank of input tensors must match. |
| |
| *Arguments:* |
| |
| |=== |
| |Argument|Type|Name|Shape|Description |
| |
| |Input|in_t*|input1|shape1|Input tensor |
| |Input|in_t*|input2|shape2|Input tensor with the same rank as input1 |
| |Output|in_t*|output|shape|Output tensor of same type as the input tensors, with broadcast shape if necessary |
| |=== |
| |
| *Operation Function:* |
| |
| [source,c++] |
| ---- |
| for_each(index in shape) { |
| index1 = apply_broadcast(shape, shape1, index); |
| index2 = apply_broadcast(shape, shape2, index); |
| in_t value1 = tensor_read<in_t>(input1, shape1, index1); |
| in_t value2 = tensor_read<in_t>(input2, shape2, index2); |
| in_t result = value1 != value2; |
| tensor_write<in_t>(output, shape, index, result); |
| } |
| ---- |
| |
| *Supported Data Types:* |
| |
| |=== |
| |Profile|Mode|in_t |
| |
| |Any|Bool|bool_t |
| |=== |
| |
| ==== MAXIMUM |
| |
| Elementwise max of input1 and input2. |
| Axis of size 1 will be broadcast, as necessary. Rank of input tensors must match. |
| |
| *Arguments:* |
| |
| |=== |
| |Argument|Type|Name|Shape|Description |
| |
| |Input|in_t*|input1|shape1|Input tensor |
| |Input|in_t*|input2|shape2|Input tensor with the same rank as input1 |
| |Output|in_t*|output|shape|Output tensor with broadcast shape if necessary |
| |=== |
| |
| *Operation Function:* |
| |
| [source,c++] |
| ---- |
| for_each(index in shape) { |
| index1 = apply_broadcast(shape, shape1, index); |
| index2 = apply_broadcast(shape, shape2, index); |
| in_t value1 = tensor_read<in_t>(input1, shape1, index1); |
| in_t value2 = tensor_read<in_t>(input2, shape2, index2); |
| in_t result = apply_max(value1, value2); |
| tensor_write<in_t>(output, shape, index, result); |
| } |
| ---- |
| |
| *Supported Data Types:* |
| |
| |=== |
| |Profile|Mode|in_t |
| |
| |Any|signed 32|int32_t |
| |MI, MT|floating-point|float_t |
| |=== |
| |
| ==== MINIMUM |
| |
| Elementwise minimum of input1 and input2. |
| Axis of size 1 will be broadcast, as necessary. Rank of input tensors must match. |
| |
| *Arguments:* |
| |
| |=== |
| |Argument|Type|Name|Shape|Description |
| |
| |Input|in_t*|input1|shape1|Input tensor |
| |Input|in_t*|input2|shape2|Input tensor with the same rank as input1 |
| |Output|in_t*|output|shape|Output tensor with broadcast shape if necessary |
| |=== |
| |
| *Operation Function:* |
| |
| [source,c++] |
| ---- |
| for_each(index in shape) { |
| index1 = apply_broadcast(shape, shape1, index); |
| index2 = apply_broadcast(shape, shape2, index); |
| in_t value1 = tensor_read<in_t>(input1, shape1, index1); |
| in_t value2 = tensor_read<in_t>(input2, shape2, index2); |
| in_t result = apply_min(value1, value2); |
| tensor_write<in_t>(output, shape, index, result); |
| } |
| ---- |
| |
| *Supported Data Types:* |
| |
| |=== |
| |Profile|Mode|in_t |
| |
| |Any|signed 32|int32_t |
| |MI, MT|floating-point|float_t |
| |=== |
| |
| ==== MUL |
| |
| Elementwise multiplication (Hadamard product) of input1 and input2. |
| Axis of size 1 will be broadcast, as necessary. Rank of input tensors must match. |
| |
| *Arguments:* |
| |
| |=== |
| |Argument|Type|Name|Shape|Description |
| |
| |Input|in_t*|input1|shape1|Input tensor |
| |Input|in_t*|input2|shape2|Input tensor with the same rank as input1 |
| |Input (MT profile) Attribute (BI/MI profiles)|uint6_t|shift|-|Result right shift (int32_t data type only) |
| |Output|out_t*|output|shape|Output tensor with broadcast shape if necessary |
| |=== |
| |
| *Operation Function:* |
| |
| [source,c++] |
| ---- |
| for_each(index in shape) { |
| index1 = apply_broadcast(shape, shape1, index); |
| index2 = apply_broadcast(shape, shape2, index); |
| in_t value1 = tensor_read<in_t>(input1, shape1, index1); |
| in_t value2 = tensor_read<in_t>(input2, shape2, index2); |
| out_t result; |
| if (in_t == int32_t && shift > 0) { |
| result = apply_scale_32(value1, value2, shift); |
| } else { |
| result = value1 * value2; // low 32-bits of result for int32_t |
| } |
| tensor_write<out_t>(output, shape, index, result); |
| } |
| ---- |
| |
| *Supported Data Types:* |
| |=== |
| |Profile|Mode|in_t|out_t |
| |
| |Any|signed 8|int8_t|int32_t |
| |Any|signed 16|int16_t|int32_t |
| |Any|signed 32|int32_t|int32_t |
| |MI, MT|floating-point|float_t|float_t |
| |=== |
| |
| ==== POW |
| |
| Elementwise input1 value raised to the power of input2. |
| Axis of size 1 will be broadcast, as necessary. Rank of input tensors must match. |
| |
| *Arguments:* |
| |
| |=== |
| |Argument|Type|Name|Shape|Description |
| |
| |Input|in_t*|input1|shape1|Input tensor from 1 to 4 dims |
| |Input|in_t*|input2|shape2|Input tensor with the same rank as input1 |
| |Output|in_t*|output|shape|Output tensor of same type as the input tensors, with broadcast shape if necessary |
| |=== |
| |
| *Operation Function:* |
| |
| [source,c++] |
| ---- |
| for_each(index in shape) { |
| index1 = apply_broadcast(shape, shape1, index); |
| index2 = apply_broadcast(shape, shape2, index); |
| in_t value1 = tensor_read<in_t>(input1, shape1, index1); |
| in_t value2 = tensor_read<in_t>(input2, shape2, index2); |
| in_t result = apply_pow<in_t>(value1, value2); |
| tensor_write<in_t>(output, shape, index, result); |
| } |
| ---- |
| |
| *Supported Data Types:* |
| |
| |=== |
| |Profile|Mode|in_t |
| |
| |MI, MT|floating-point|float_t |
| |=== |
| |
| ==== SUB |
| |
| Elementwise subtraction of input1 and input2. |
| Axis of size 1 will be broadcast as necessary. Rank of input tensors must match. |
| |
| *Arguments:* |
| |
| |=== |
| |Argument|Type|Name|Shape|Description |
| |
| |Input|in_t*|input1|shape1|Input tensor |
| |Input|in_t*|input2|shape2|Input tensor with the same rank as input1 |
| |Output|in_t*|output|shape|Output tensor with broadcast shape if necessary |
| |=== |
| |
| *Operation Function:* |
| |
| [source,c++] |
| ---- |
| for_each(index in shape) { |
| index1 = apply_broadcast(shape, shape1, index); |
| index2 = apply_broadcast(shape, shape2, index); |
| in_t value1 = tensor_read<in_t>(input1, shape1, index1); |
| in_t value2 = tensor_read<in_t>(input2, shape2, index2); |
| in_t result = apply_sub<in_t>(value1, value2); |
| tensor_write<in_t>(output, shape, index, result); |
| } |
| ---- |
| |
| *Supported Data Types:* |
| |
| |=== |
| |Profile|Mode|in_t |
| |
| |Any|signed 32|int32_t |
| |MI, MT|floating-point|float_t |
| |=== |
| |
| ==== TABLE |
| |
| Table lookup operation. |
| For int8_t TABLE operation, perform a 256 entry table lookup returning an int8_t value. |
| For int16_t tables, the int16_t input is treated as a fixed-point 9.7 value. |
| The most significant 9 bits are used to index into the table. |
| The fractional 7 bits are used to interpolate based on table[index] and table[index+1]. |
| For int16_t inputs, the TABLE operator returns a 16.7 interpolated value in an int32_t. |
| This value can then be input to the RESCALE operator to scale to the required output data type. |
| Note that int16_t table has 513 values to handle table[index+1] when index=511. |
| |
| An int16_t to int16_t table lookup can be constructed in TOSA as follows: |
| |
| * Use the TABLE operator to produce a fixed point 16.7 interpolated result |
| * Use RESCALE (in_t=int32_t, out_t=int16_t, scale=1<<14, shift=21) to scale the output to int16_t range (or alternate scale as required) |
| |
| *Arguments:* |
| |
| |=== |
| |Argument|Type|Name|Shape|Description |
| |
| |Input|in_t*|Input|shape|Input tensor |
| |Input (MT profile) Attribute (BI/MI profiles)|table_t*|table|[TABLE_SIZE]|Lookup table tensor |
| |Output|out_t*|output|shape|Output tensor |
| |=== |
| |
| *Operation Function:* |
| |
| [source,c++] |
| ---- |
| REQUIRE(length(table) == TABLE_SIZE); |
| for_each(index in shape) { |
| in_t value = tensor_read<in_t>(input, shape, index); |
| out_t result; |
| if (in_t == int8_t) { |
| // value is a signed int, convert to a 0 based index |
| result = table[value + 128]; |
| } else { |
| result = apply_lookup(table, value); |
| } |
| tensor_write<out_t>(output, shape, index, result); |
| } |
| ---- |
| |
| *Supported Data Types:* |
| |
| |=== |
| |Profile|Mode|in_t|table_t|TABLE_SIZE|out_t |
| |
| |Any|signed 8|int8_t|int8_t|256|int8_t |
| |Any|signed 16|int16_t|int16_t|513|int32_t |
| |=== |
| |