| // |
| // This confidential and proprietary software may be used only as |
| // authorised by a licensing agreement from ARM Limited |
| // (C) COPYRIGHT 2021-2023 ARM Limited |
| // ALL RIGHTS RESERVED |
| // The entire notice above must be reproduced on all authorised |
| // copies and copies may only be made to the extent permitted |
| // by a licensing agreement from ARM Limited. |
| |
| == TOSA Pseudocode |
| |
| The TOSA pseudocode provides precise descriptions of TOSA operations. |
| Each operator contains pseudocode describing the operator's functionality. |
| This section contains pseudocode functions shared across multiple operators in the specification. |
| |
| === Operator Validation Helpers |
| |
| The following functions are used to define the valid conditions for TOSA operators. |
| |
| The REQUIRE function defines the conditions required by the TOSA operator. |
| If the conditions are not met then the result of the TOSA graph is marked as unpredictable. |
| Once the tosa_graph_result is set to tosa_unpredictable, the whole graph is considered unpredictable. |
| |
| The ERROR_IF function defines a condition that must set an error if the condition holds and the graph is not unpredictable. |
| Note that if a graph contains both unpredictable and error statements then result of tosa_execute_graph() is tosa_unpredictable. |
| This condition is captured in the ERROR_IF function. |
| |
| *Implementation Notes* |
| |
| * An implementation is not required to detect unpredictable behavior. If tosa_execute_graph() returns tosa_unpredictable then the tosa_test_compliance() function does not require any specific output from an implementation. |
| * An implementation is required to detect errors in a graph that does not have unpredictable behavior (see tosa_test_compliance). |
| * An acceptable implementation is to stop and report an error on the first ERROR_IF condition that occurs. This satifies tosa_test_compliance() even if the tosa_execute_graph() was tosa_unpredictable. |
| * If the tosa_execute_graphs() result is tosa_unpredictable or tosa_error, then there is no requirement on the implementation to execute any portion of the TOSA graph. |
| |
| [source,c++] |
| ---- |
| void REQUIRE(condition) { |
| // Unpredictable overrides any previous result |
| if (!(condition)) { |
| tosa_graph_result = tosa_unpredictable; |
| } |
| } |
| |
| void ERROR_IF(condition) { |
| // Error encodes a predictable error state and so is not registered |
| // if the graph is marked as unpredictable. |
| if (tosa_graph_result != tosa_unpredictable && condition) { |
| tosa_graph_result = tosa_error; |
| } |
| } |
| |
| void LEVEL_CHECK(condition) { |
| // If a level is specified and the level condition fails then |
| // the result is unpredictable. |
| REQUIRE(condition); |
| } |
| ---- |
| |
| === Tensor Access Helpers |
| |
| ==== Tensor Utilities |
| |
| [source,c++] |
| ---- |
| // Convert tensor index coordinates to an element offset |
| size_t tensor_index_to_offset(shape_t shape, shape_t index) { |
| size_t size = tensor_size(shape); // check tensor shape is valid |
| size_t offset = 0; |
| for (int32_t i = 0; i < rank(shape); i++) { |
| REQUIRE(index[i] >= 0 && index[i] < shape[i]); |
| offset = offset * shape[i] + index[i]; |
| } |
| return offset; |
| } |
| |
| // Convert an element offset to tensor index coordinates |
| shape_t tensor_offset_to_index(shape_t shape, size_t offset) { |
| size_t size = tensor_size(shape); // check tensor shape is valid |
| REQUIRE(offset < size); |
| shape_t index(rank(shape)); // index has rank(shape) indicies |
| for(int32_t i = rank(shape) - 1; i >= 0; i--) { |
| index[i] = offset % shape[i]; |
| offset /= shape[i]; |
| } |
| return index; |
| } |
| |
| // Check the tensor shape is valid and return the tensor size in elements |
| size_t tensor_size(shape_t shape) { |
| size_t size = 1; |
| for (int32_t i = 0; i < rank(shape); i++) { |
| REQUIRE(1 <= shape[i] && shape[i] <= maximum<size_t> / size); |
| size *= shape[i]; |
| } |
| return size; |
| } |
| |
| // Return the size of the tensor in the given axis |
| // For a rank=0 tensor, returns 1 for all axes |
| size_t shape_dim(shape_t shape, int axis) { |
| return (axis >= rank(shape)) ? 1 : shape[axis]; |
| } |
| ---- |
| |
| ==== Tensor Read |
| |
| tensor_read reads a single data value out of the given tensor. |
| The shape argument contains the shape of the tensor. |
| Index is the coordinates within the tensor of the value to be read. |
| |
| [source,c++] |
| ---- |
| in_t tensor_read<in_t>(in_t *address, shape_t shape, shape_t index) { |
| size_t offset = tensor_index_to_offset(shape, index); |
| return address[offset]; |
| } |
| ---- |
| |
| ==== Tensor Write |
| |
| tensor_write writes a single data value into the given tensor. |
| The shape argument contains the shape of the tensor. |
| Index is the coordinates within the tensor of the value to be written. |
| value is the value to be written to the given coordinate. |
| |
| [source,c++] |
| ---- |
| void tensor_write<type>(<type> *address, shape_t shape, shape_t index, <type> value) { |
| size_t offset = tensor_index_to_offset(shape, index); |
| address[offset] = value; |
| } |
| ---- |
| |
| ==== Variable Tensor Allocate |
| |
| variable_tensor_allocate allocates the mutable persistent memory block for storing variable tensors. |
| The shape argument contains the shape of the allocated memory block for the variable_tensor. |
| The uid argument is a globally unique identifier for variable tensors. |
| |
| [source,c++] |
| ---- |
| tensor_t* variable_tensor_allocate<in_t>(shape_t shape, int32_t uid) { |
| size_t size = tensor_size(shape); |
| tensor_t *allocated_tensor = new tensor_t; |
| allocated_tensor->data = new in_t[size]; |
| allocated_tensor->uid = uid; |
| allocated_tensor->is_written = false; |
| allocated_tensor->shape = shape; |
| allocated_tensor->type = in_t; |
| return allocated_tensor; |
| } |
| ---- |
| |
| ==== Variable Tensor Lookup |
| |
| variable_tensor_lookup checks whether a variable tensor has been allocated or not. |
| The uid argument is a globally unique identifier for variable tensors. |
| |
| [source,c++] |
| ---- |
| tensor_t variable_tensor_lookup(int32_t uid) { |
| // The global all_allocated_variable_tensors was instantiated at the first |
| // time of executing the tosa graph |
| for_each(tensor_t allocated_tensor in all_allocated_variable_tensors) { |
| if (allocated_tensor.uid == uid) { |
| return allocated_tensor; |
| } |
| } |
| return NULL; |
| } |
| ---- |
| |
| ==== Broadcast Helpers |
| |
| The following function derives the broadcast output shape from the input shapes. |
| |
| [source,c++] |
| ---- |
| shape_t broadcast_shape(shape_t shape1, shape_t shape2) { |
| ERROR_IF(rank(shape1) != rank(shape2)); |
| shape_t shape = shape1; |
| for (int32_t i = 0; i < rank(shape); i++) { |
| if (shape[i] == 1) { |
| shape[i] = shape2[i]; |
| } else { |
| ERROR_IF(shape2[i] != 1 && shape2[i] != shape[i]); |
| } |
| } |
| return shape; |
| } |
| ---- |
| |
| The following function maps an index in the output tensor to an index in the input tensor. |
| |
| [source,c++] |
| ---- |
| // The index argument should be a valid location within out_shape. |
| // The function returns the location within in_shape that contributes |
| // to the output based on broadcasting rules. |
| |
| shape_t apply_broadcast(shape_t out_shape, shape_t in_shape, shape_t index) { |
| ERROR_IF(rank(out_shape) != rank(in_shape)); |
| ERROR_IF(rank(out_shape) != rank(index)); |
| for (int32_t i = 0; i < rank(out_shape); i++) { |
| if (out_shape[i] != in_shape[i]) { |
| ERROR_IF(in_shape[i] != 1); |
| index[i] = 0; |
| } |
| } |
| return index; |
| } |
| ---- |
| |
| === General Pseudocode Helpers |
| |
| This section contains general pseudocode utility functions used throughout the specification. |
| |
| ==== Arithmetic Helpers |
| |
| The following functions provide arithmetic while defining requirements such that values stay in the valid range. |
| |
| [source,c++] |
| ---- |
| in_t apply_add_s<in_t>(in_t a, in_t b) { |
| if (is_floating_point(in_t)) return a + b; |
| int64_t c = sign_extend<int64_t>(a) + sign_extend<int64_t>(b); |
| REQUIRE(c >= minimum_s<in_t> && c <= maximum_s<in_t>); |
| return static_cast<in_t>(c); |
| } |
| |
| in_t apply_add_u<in_t>(in_t a, in_t b) { |
| if (is_floating_point(in_t)) return a + b; |
| uint64_t c = zero_extend<uint64_t>(a) + zero_extend<uint64_t>(b); |
| REQUIRE(c >= minimum_u<in_u_t> && c <= maximum_u<in_u_t>); |
| return truncate<in_t>(c); |
| } |
| |
| in_t apply_arith_rshift<in_t>(in_t a, in_t b) { |
| int32_t c = sign_extend<int32_t>(a) >> sign_extend<int32_t>(b); |
| return static_cast<in_t>(c); |
| } |
| |
| in_t apply_intdiv_s<in_t>(in_t a, in_t b) { |
| int64_t c = sign_extend<int64_t>(a) / sign_extend<int64_t>(b); |
| REQUIRE(c >= minimum_s<in_t> && c <= maximum_s<in_t>); |
| return static_cast<in_t>(c); |
| } |
| |
| in_t apply_ceil<in_t>(in_t input) { |
| return input value rounded up to nearest integer |
| } |
| |
| in_t apply_clip_s<in_t>(in_t value, in_t min_val, in_t max_val) { |
| if (is_floating_point(in_t>) { |
| REQUIRE(min_val <= max_val); |
| } |
| else { |
| REQUIRE(sign_extend<int64_t>(min_val) <= sign_extend<int64_t>(max_val)); |
| } |
| value = apply_max_s<in_t>(value, min_val); |
| value = apply_min_s<in_t>(value, max_val); |
| return value; |
| } |
| |
| in_t apply_clip_u<in_t>(in_t value, in_t min_val, in_t max_val) { |
| REQUIRE(zero_extend<int64_t>(min_val) <= zero_extend<int64_t>(max_val)); |
| value = apply_max_u<in_t>(value, min_val); |
| value = apply_min_u<in_t>(value, max_val); |
| return value; |
| } |
| |
| in_t apply_exp<in_t>(in_t input) { |
| return e to the power input |
| } |
| |
| in_t apply_floor<in_t>(in_t input) { |
| return input value rounded down to nearest integer |
| } |
| |
| in_t apply_log<in_t>(in_t input) { |
| if (input == 0) { |
| return -INFINITY |
| } |
| else if (input < 0) { |
| return NaN; |
| } |
| return the natural logarithm of input |
| } |
| |
| in_t apply_logical_rshift<in_t>(in_t a, in_t b) { |
| uint64_t c = zero_extend<uint32_t>(a) >> zero_extend<uint32_t>(b); |
| return static_cast<in_t>(c); |
| } |
| |
| in_t apply_max_s<in_t>(in_t a, in_t b) { |
| if (is_floating_point(in_t)) { |
| if (isNaN(a) || isNaN(b)) { |
| return NaN; |
| } |
| if (a >= b) return a; else return b; |
| } |
| // Integer version |
| if (sign_extend<int64_t>(a) >= sign_extend<int64_t>(b)) return a; else return b; |
| } |
| |
| in_t apply_max_u<in_t>(in_t a, in_t b) { |
| if (zero_extend<uint64_t>(a) >= zero_extend<int64_t>(b)) return a; else return b; |
| } |
| |
| in_t apply_min_s<in_t>(in_t a, in_t b) { |
| if (is_floating_point(in_t)) { |
| if (isNaN(a) || isNaN(b)) { |
| return NaN; |
| } |
| if (a < b) return a; else return b; |
| } |
| // Integer version |
| if (sign_extend<int64_t>(a) < sign_extend<int64_t>(b)) return a; else return b; |
| } |
| |
| in_t apply_min_u<in_t>(in_t a, in_t b) { |
| if (zero_extend<int64_t>(a) < zero_extend<int64_t>(b)) return a; else return b; |
| } |
| |
| in_t apply_mul_s<in_t>(in_t a, in_t b) { |
| if (is_floating_point(in_t)) return a * b; |
| int64_t c = sign_extend<int64_t>(a) * sign_extend<int64_t>(b); |
| return static_cast<in_t>(c); |
| } |
| |
| in_t apply_pow<in_t>(in_t a, in_t b) { |
| return a ** b; // a raised to the power b |
| } |
| |
| in_t apply_sqrt<in_t>(in_t input) { |
| return the square root of input |
| } |
| |
| in_t apply_sub_s<in_t>(in_t a, in_t b) { |
| if (is_floating_point(in_t)) return a - b; |
| int64_t c = sign_extend<int64_t>(a) - sign_extend<int64_t>(b); |
| REQUIRE(c >= minimum_s<in_t> && c <= maximum_s<in_t>); |
| return static_cast<in_t>(c); |
| } |
| |
| in_t apply_sub_u<in_t>(in_t a, in_t b) { |
| uint64_t c = zero_extend<uint64_t>(a) - zero_extend<uint64_t>(b); |
| REQUIRE(c >= minimum_u<in_u_t> && c <= maximum_u<in_u_t>); |
| return truncate<in_t>(c); |
| } |
| |
| int32_t count_leading_zeros(int32_t a) { |
| int32_t acc = 32; |
| if (a != 0) { |
| uint32_t mask; |
| mask = 1 << (32 - 1); // width of int32_t - 1 |
| acc = 0; |
| while ((mask & a) == 0) { |
| mask = mask >> 1; |
| acc = acc + 1; |
| } |
| } |
| return acc; |
| } |
| ---- |
| |
| ==== Type Conversion Helpers |
| |
| The following definitions indicate the type to be used when the given parameters are provided. |
| |
| [source,c++] |
| ---- |
| |
| // Returns a signed version of the given type |
| // A no-op for floating-point types |
| Type make_signed(Type in_t) |
| { |
| switch(in_t) { |
| case bool_t: |
| return bool_t; |
| case i8_t: |
| return int8_t; |
| case i16_t: |
| return int16_t; |
| case i32_t: |
| return int32_t; |
| case i48_t: |
| return int48_t; |
| case fp16_t: |
| return fp16_t; |
| case bf16_t: |
| return bf16_t; |
| case fp32_t: |
| return fp32_t; |
| } |
| } |
| |
| // Returns the usigned type of the given type |
| // Error to call this with anything but i8_t or i16_t |
| |
| Type make_unsigned(Type in_t) |
| { |
| ERROR_IF(in_t != i8_t && in_t != i16_t); |
| switch(in_t) { |
| case i8_t: |
| return uint8_t; |
| case i16_t: |
| return uint16_t; |
| } |
| } |
| |
| out_t static_cast<out_t>(in_t value) |
| { |
| // Operates similar to the c++ standard static_cast |
| // Limited to simple numeric conversion for TOSA. |
| // Sign extends signed integer input types if needed |
| // Zero extends unsigned integer input types if needed |
| // Truncates when converting to a smaller width data type |
| // Conversion from integer to floating-point is exact if possible |
| // If converting between signless integer types, treated as signed integer |
| } |
| |
| out_t bitcast<out_t>(in_t value) |
| { |
| // Treats the bits of value as if they were of type out_t |
| // Only supported for integer types of the same bit width |
| } |
| ---- |
| |
| ==== Numeric Accuracy Helpers |
| |
| For a floating point number of type in_t a normal value is of the form (1.x * 2^e). |
| The fractional part 'x' has a number of fractional or mantissa bits depending on the type. |
| The exponent 'e' has a normal range depending on the type. |
| The functions below return the ranges according to type. |
| |
| [source,c++] |
| ---- |
| fp64_t exp2(int n) { |
| if (n < -1075) { |
| return 0.0; // smaller than smallest denormal |
| } |
| REQUIRE(n <= 1023); |
| fp64_t v = 1.0; |
| while (n > 0) { v = v*2.0; n--; } |
| while (n < 0) { v = v/2.0; n++; } |
| return v; |
| } |
| |
| int ilog2(fp64_t v) { |
| REQURE(0 < v && v < infinity); |
| int n = 0; |
| while (v >= 2.0) { v = v/2.0; n++; } |
| while (v < 1.0) { v = v*2.0; n--; } |
| return n; |
| } |
| |
| fp64_t normal_min<in_t>() { |
| switch (in_t) { |
| case fp32_t: return exp2(-126); |
| case bf16_t: return exp2(-126); |
| case fp16_t: return exp2( -14); |
| } |
| } |
| |
| fp64_t normal_max<in_t>() { |
| switch (in_t) { |
| case fp32_t: return exp2(128) - exp2(127-23); |
| case bf16_t: return exp2(128) - exp2(127- 7); |
| case fp16_t: return exp2( 16) - exp2( 15-10); |
| } |
| } |
| |
| // Number of fractional (mantissa bits) |
| int normal_frac<in_t> () { |
| switch (in_t) { |
| case fp32_t: return 23; |
| case fp16_t: return 10; |
| case bf16_t: return 7; |
| } |
| } |
| ---- |
| |
| The following functions check if a test value in floating-point format in_t is within an error range compared to a reference value. |
| The functions assume that denormal values may be flushed to zero. |
| For the first function, the permitted error range is specified as num_ulp which is converted to an error bound as specified by the code. |
| For the second function, the permitted error range is specified as an absolute error bound. |
| |
| [source,c++] |
| ---- |
| bool tosa_reference_check_fp<in_t>(in_t test_value, fp64_t ref_value, fp64_t num_ulp) { |
| fp64_t err_bnd = 0.0; |
| if (is_normal_fp64(ref_value) && abs(ref_value) != 0) { |
| int ref_exp = ilog2(abs(ref_value)); |
| fp64_t ref_pow2 = max(exp2(ref_exp), normal_min<in_t>); |
| fp64_t val_ulp = ref_pow2 * exp2(-normal_frac<in_t>); |
| err_bnd = val_ulp * num_ulp; |
| } |
| return tosa_reference_check_fp_bnd<in_t>(test_value, ref_value, err_bnd); |
| } |
| |
| bool tosa_reference_check_fp_bnd<in_t>(in_t test_value, fp64_t ref_value, fp64_t err_bnd) { |
| if (is_a_NaN(ref_value)) { |
| return is_a_NaN(test_value); |
| } |
| REQUIRE(err_bnd >= 0.0); |
| if (ref_value < 0) { |
| ref_value = -ref_value; |
| test_value = -test_value; |
| } |
| fp64_t ref_max = ref_value + err_bnd; |
| fp64_t ref_min = ref_value - err_bnd; |
| if (ref_max > normal_max<in_t>) ref_max = infinity; |
| if (ref_min > normal_max<in_t>) ref_min = infinity; |
| if (ref_max < normal_min<in_t>) ref_max = normal_min<in_t>; |
| if (ref_min < normal_min<in_t>) ref_min = 0; |
| return (static_cast<fp64_t>(test_value) >= ref_min && |
| static_cast<fp64_t>(test_value) <= ref_max); |
| } |
| |
| ---- |
| |
| ==== Numeric Conversion Helpers |
| |
| The following definitions are used in pseudocode to do numeric conversions. |
| Where the *float_t* type is used, it represents all of the floating-point data types supported by the given profile. |
| See <<Number formats>> for details on the floating-point formats. |
| |
| [source,c++] |
| ---- |
| int round_to_nearest_int(float_t f) |
| Converts the floating-point value to f, with rounding to the nearest integer value. |
| For the required precision see the section: Main inference precision requirements. |
| |
| float_t round_to_nearest_float(in_t f) |
| Converts the input value into floating-point, rounding to the nearest representable value. |
| For the required precision see the section: Main inference precision requirements. |
| |
| out_t sign_extend<out_t>(in_t input) |
| Floating point values are unchanged. |
| For two's complement integer values where out_t has more bits than in_t, replicate the top bit of input for all bits between the top bit of input and the top bit of output. |
| |
| out_t zero_extend<out_t>(in_t input) |
| Floating point values are unchanged. |
| For two's complement integer values where out_t has more bits than in_t, insert zero values for all bits between the top bit of input and the top bit of output. |
| |
| out_t truncate(in_t input) |
| output is the sizeof(out_t) least significant bits in input. |
| Nop for floating-point types |
| ---- |
| |
| The following definition is used to flatten a list of lists into a single list. |
| |
| [source,c++] |
| ---- |
| in_t* flatten(in_t lists[]) { |
| in_t output = []; |
| for_each(list in lists) { |
| for_each(element in list) { |
| output.append(element); |
| } |
| } |
| } |
| ---- |
| |
| Generic helper functions used to keep the pseudocode concise. |
| |
| [source,c++] |
| ---- |
| |
| bool_t is_floating_point(type) { |
| if (type == fp16_t || type == fp32_t || type == bf16_t) |
| return true; |
| return false; |
| } |
| |
| int32_t idiv(int32_t input1, int32_t input2) { |
| return input1 / input2; // Integer divide that truncates towards zero |
| } |
| |
| // Integer division that checks input1 is a multiple of input2 |
| |
| int32_t idiv_check(int32_t input1, int32_t input2) { |
| ERROR_IF(input1 % input2 != 0); // input1 must be a multiple of input2 |
| return input1 / input2; // exact quotient without rounding |
| } |
| |
| // perform an integer division with rounding towards minus infinity |
| |
| int32_t idiv_floor(int32_t input1, int32_t input2) { |
| int32_t rval = input1 / input2; |
| if (rval * input2 > input1) { |
| rval--; |
| } |
| return rval; |
| } |
| |
| int32_t length(in_t input) |
| return number of elements in input list |
| |
| int32_t rank(in_t input) |
| return rank of an input tensor |
| |
| int32_t sum(in_t input[]) |
| return the sum of values of an input list |
| |
| bool isNaN(float input) |
| return True if floating-point input value is NaN |
| |
| float_t pi() |
| returns value of pi |
| |
| float_t sin(angle) |
| return sine of angle given in radians |
| |
| float_t cos(angle) |
| return cosine of angle given in radians |
| |
| bool power_of_two(int32_t value) |
| return true if value is a power of two, false otherwise |
| |
| in_out_t maximum_s<Type T> |
| return the maximum value when interpreting type T as a signed value as returned by the make_signed helper. |
| |
| in_out_t minimum_s<Type T> |
| return the minimum value when interpreting type T as a signed value as returned by the make_signed helper. |
| |
| in_out_t maximum_u<Type T> |
| return the maximum value when interpreting type T as an unsigned value as returned by the make_unsigned helper. |
| |
| in_out_t minimum_u<Type T> |
| return the minimum value when interpreting type T as an unsigned value as returned by the make_unsigned helper. |
| ---- |