chapters/type_conversion.adoc - tosa/specification - Gitiles

 //
 // This confidential and proprietary software may be used only as
 // authorised by a licensing agreement from ARM Limited
 // (C) COPYRIGHT 2020-2022 ARM Limited
 // ALL RIGHTS RESERVED
 // The entire notice above must be reproduced on all authorised
 // copies and copies may only be made to the extent permitted
 // by a licensing agreement from ARM Limited.

 === Type Conversion

 ==== CAST

 Casts a tensor from one data type to another.

 *Arguments:*

 |===
 |Argument|Type|Name|Shape|Description

 |Input|in_t*|input|shape|Input tensor
 |Output|out_t*|output|shape|Output tensor
 |===

 *Operation Function:*

 [source,c++]
 ----
 for_each(index in shape) {
     in_t in = tensor_read<in_t>(input, shape, index);
     out_t out;
     if (out_t == bool_t) {
         out = (in != 0) ? true : false;
     } else if (in_t == bool_t) {
         out = (in) ? 1 : 0;
     } else if (out_t == fp16_t || out_t == bf16_t || out_t == fp32_t) {
         out = round_to_nearest_float(in);
     } else if (in_t == fp16_t || in_t == bf16_t || in_t == fp32_t) {
         out = apply_clip<out_t>(round_to_nearest_int(in), minimum<out_t>, maximum<out_t>);
     } else if (sizeof(out_t) >= sizeof(in_t)) {
         out = sign_extend(in);
     } else {
         out = truncate(in);
     }
     tensor_write<out_t>(output, shape, index, out)
 }
 ----

 *Supported Data Types:*

 |===
 |Profile|Mode|in_t|out_t

 |Any|bool to signed 8|bool_t|int8_t
 |Any|bool to signed 16|bool_t|int16_t
 |Any|bool to signed 32|bool_t|int32_t
 |Any|signed 8 to bool|int8_t|bool_t
 |Any|signed 8 to signed 16|int8_t|int16_t
 |Any|signed 8 to signed 32|int8_t|int32_t
 |MI, MT|signed 8 to fp16|int8_t|fp16_t
 |MI, MT|signed 8 to bf16|int8_t|bf16_t
 |MI, MT|signed 8 to fp32|int8_t|fp32_t
 |Any|signed 16 to bool|int16_t|bool_t
 |Any|signed 16 to signed 8|int16_t|int8_t
 |Any|signed 16 to signed 32|int16_t|int32_t
 |MI, MT|signed 16 to fp16|int16_t|fp16_t
 |MI, MT|signed 16 to bf16|int16_t|bf16_t
 |MI, MT|signed 16 to fp32|int16_t|fp32_t
 |Any|signed 32 to bool|int32_t|bool_t
 |Any|signed 32 to signed 8|int32_t|int8_t
 |Any|signed 32 to signed 16|int32_t|int16_t
 |MI, MT|signed 32 to fp16|int32_t|fp16_t
 |MI, MT|signed 32 to bf16|int32_t|bf16_t
 |MI, MT|signed 32 to fp32|int32_t|fp32_t
 |MI, MT|fp16 to signed 8|fp16_t|int8_t
 |MI, MT|fp16 to signed 16|fp16_t|int16_t
 |MI, MT|fp16 to signed 32|fp16_t|int32_t
 |MI, MT|bf16 to signed 8|bf16_t|int8_t
 |MI, MT|bf16 to signed 16|bf16_t|int16_t
 |MI, MT|bf16 to signed 32|bf16_t|int32_t
 |MI, MT|fp32 to signed 8|fp32_t|int8_t
 |MI, MT|fp32 to signed 16|fp32_t|int16_t
 |MI, MT|fp32 to signed 32|fp32_t|int32_t
 |===

 ==== RESCALE

 Rescale quantized values into a new domain. This function scales by factor: multiplier * 2^-shift^.

 *Arguments:*

 |===
 |Argument|Type|Name|Shape|Description

 |Input|in_t*|input|shape|Input tensor from 1 to 4 dims
 |Output|out_t*|output|shape|Output tensor with the same shape as input
 |Attribute|in_t|input_zp|-|Input tensor zero point. Must be zero for non-int8 types.
 |Attribute|out_t|output_zp|-|Output tensor zero point. Must be zero for non-int8 types.
 |Input (MT profile) Attribute (BI/MI profiles)|mul_t*|multiplier|[NC]|Scaling multiplier array
 |Input (MT profile) Attribute (BI/MI profiles)|uint6_t*|shift|[NC]|Scaling shift array
 |Attribute|bool_t|scale32|-|if (scale32) mul_t=int32_t else mul_t=int16_t
 |Attribute|bool_t|double_round|-|Select double round mode
 |Attribute|bool_t|per_channel|-|if (per_channel) NC=shape[dims-1] else NC=1
 |===

 *Operation Function:*

 [source,c++]
 ----
 for_each(index in shape) {
     // uint16 values can have zero_point 0 or 32768
     // int8/uint8 can have zero point within their valid range
     // No other types can have zero point != 0
     ERROR_IF(in_t != int8_t &&
              in_t != uint8_t &&
              in_t != uint16_t && input_zp != 0);
     ERROR_IF(out_t != int8_t &&
              out_t != uint8_t &&
              out_t != uint16_t && output_zp != 0);
     ERROR_IF(in_t == uint16_t && (input_zp != 0 || input_zp != 32768));
     ERROR_IF(out_t == uint16_t && (output_zp != 0 || output_zp != 32768));
     ERROR_IF(scale32 && in_t == int48_t);
     ERROR_IF(!scale32 && double_round);
     int48_t value = tensor_read<in_t>(input, shape, index);
     value = value - input_zp;
     int c = (per_channel) ? index[dims-1] : 0;
     int32_t result = (scale32) ?
         apply_scale_32(value, multiplier[c], shift[c], double_round) :
         apply_scale_16(value, multiplier[c], shift[c]);
     result = (out_t)apply_clip<int32_t>(result + output_zp, minimum<out_t>, maximum<out_t>);
     tensor_write<out_t>(output, shape, index, result);
 }
 ----

 *Supported Data Types:*

 |===
 |Profile|Mode|in_t|out_t

 |Any|signed 8 to signed 8|int8_t|int8_t
 |Any|signed 8 to signed 16|int8_t|int16_t
 |Any|signed 8 to signed 32|int8_t|int32_t
 |Any|signed 8 to unsigned 8|int8_t|uint8_t
 |Any|signed 16 to signed 8|int16_t|int8_t
 |Any|signed 16 to signed 16|int16_t|int16_t
 |Any|signed 16 to signed 32|int16_t|int32_t
 |Any|signed 16 to unsigned 8|int16_t|uint8_t
 |Any|signed 16 to unsigned 16|int16_t|uint16_t
 |Any|signed 32 to signed 8|int32_t|int8_t
 |Any|signed 32 to signed 16|int32_t|int16_t
 |Any|signed 32 to signed 32|int32_t|int32_t
 |Any|signed 48 to signed 8|int48_t|int8_t
 |Any|signed 48 to signed 16|int48_t|int16_t
 |Any|signed 48 to signed 32|int48_t|int32_t
 |Any|unsigned 8 to signed 8|uint8_t|int8_t
 |Any|unsigned 8 to signed 16|uint8_t|int16_t
 |Any|unsigned 16 to signed 16|uint16_t|int16_t
 |===
	//
	// This confidential and proprietary software may be used only as
	// authorised by a licensing agreement from ARM Limited
	// (C) COPYRIGHT 2020-2022 ARM Limited
	// ALL RIGHTS RESERVED
	// The entire notice above must be reproduced on all authorised
	// copies and copies may only be made to the extent permitted
	// by a licensing agreement from ARM Limited.

	=== Type Conversion

	==== CAST

	Casts a tensor from one data type to another.

	Arguments:

	\|===
	\|Argument\|Type\|Name\|Shape\|Description

	\|Input\|in_t*\|input\|shape\|Input tensor
	\|Output\|out_t*\|output\|shape\|Output tensor
	\|===

	Operation Function:

	[source,c++]
	----
	for_each(index in shape) {
	in_t in = tensor_read<in_t>(input, shape, index);
	out_t out;
	if (out_t == bool_t) {
	out = (in != 0) ? true : false;
	} else if (in_t == bool_t) {
	out = (in) ? 1 : 0;
	} else if (out_t == fp16_t \|\| out_t == bf16_t \|\| out_t == fp32_t) {
	out = round_to_nearest_float(in);
	} else if (in_t == fp16_t \|\| in_t == bf16_t \|\| in_t == fp32_t) {
	out = apply_clip<out_t>(round_to_nearest_int(in), minimum<out_t>, maximum<out_t>);
	} else if (sizeof(out_t) >= sizeof(in_t)) {
	out = sign_extend(in);
	} else {
	out = truncate(in);
	}
	tensor_write<out_t>(output, shape, index, out)
	}
	----

	Supported Data Types:

	\|===
	\|Profile\|Mode\|in_t\|out_t

	\|Any\|bool to signed 8\|bool_t\|int8_t
	\|Any\|bool to signed 16\|bool_t\|int16_t
	\|Any\|bool to signed 32\|bool_t\|int32_t
	\|Any\|signed 8 to bool\|int8_t\|bool_t
	\|Any\|signed 8 to signed 16\|int8_t\|int16_t
	\|Any\|signed 8 to signed 32\|int8_t\|int32_t
	\|MI, MT\|signed 8 to fp16\|int8_t\|fp16_t
	\|MI, MT\|signed 8 to bf16\|int8_t\|bf16_t
	\|MI, MT\|signed 8 to fp32\|int8_t\|fp32_t
	\|Any\|signed 16 to bool\|int16_t\|bool_t
	\|Any\|signed 16 to signed 8\|int16_t\|int8_t
	\|Any\|signed 16 to signed 32\|int16_t\|int32_t
	\|MI, MT\|signed 16 to fp16\|int16_t\|fp16_t
	\|MI, MT\|signed 16 to bf16\|int16_t\|bf16_t
	\|MI, MT\|signed 16 to fp32\|int16_t\|fp32_t
	\|Any\|signed 32 to bool\|int32_t\|bool_t
	\|Any\|signed 32 to signed 8\|int32_t\|int8_t
	\|Any\|signed 32 to signed 16\|int32_t\|int16_t
	\|MI, MT\|signed 32 to fp16\|int32_t\|fp16_t
	\|MI, MT\|signed 32 to bf16\|int32_t\|bf16_t
	\|MI, MT\|signed 32 to fp32\|int32_t\|fp32_t
	\|MI, MT\|fp16 to signed 8\|fp16_t\|int8_t
	\|MI, MT\|fp16 to signed 16\|fp16_t\|int16_t
	\|MI, MT\|fp16 to signed 32\|fp16_t\|int32_t
	\|MI, MT\|bf16 to signed 8\|bf16_t\|int8_t
	\|MI, MT\|bf16 to signed 16\|bf16_t\|int16_t
	\|MI, MT\|bf16 to signed 32\|bf16_t\|int32_t
	\|MI, MT\|fp32 to signed 8\|fp32_t\|int8_t
	\|MI, MT\|fp32 to signed 16\|fp32_t\|int16_t
	\|MI, MT\|fp32 to signed 32\|fp32_t\|int32_t
	\|===

	==== RESCALE

	Rescale quantized values into a new domain. This function scales by factor: multiplier * 2^-shift^.

	Arguments:

	\|===
	\|Argument\|Type\|Name\|Shape\|Description

	\|Input\|in_t*\|input\|shape\|Input tensor from 1 to 4 dims
	\|Output\|out_t*\|output\|shape\|Output tensor with the same shape as input
	\|Attribute\|in_t\|input_zp\|-\|Input tensor zero point. Must be zero for non-int8 types.
	\|Attribute\|out_t\|output_zp\|-\|Output tensor zero point. Must be zero for non-int8 types.
	\|Input (MT profile) Attribute (BI/MI profiles)\|mul_t*\|multiplier\|[NC]\|Scaling multiplier array
	\|Input (MT profile) Attribute (BI/MI profiles)\|uint6_t*\|shift\|[NC]\|Scaling shift array
	\|Attribute\|bool_t\|scale32\|-\|if (scale32) mul_t=int32_t else mul_t=int16_t
	\|Attribute\|bool_t\|double_round\|-\|Select double round mode
	\|Attribute\|bool_t\|per_channel\|-\|if (per_channel) NC=shape[dims-1] else NC=1
	\|===

	Operation Function:

	[source,c++]
	----
	for_each(index in shape) {
	// uint16 values can have zero_point 0 or 32768
	// int8/uint8 can have zero point within their valid range
	// No other types can have zero point != 0
	ERROR_IF(in_t != int8_t &&
	in_t != uint8_t &&
	in_t != uint16_t && input_zp != 0);
	ERROR_IF(out_t != int8_t &&
	out_t != uint8_t &&
	out_t != uint16_t && output_zp != 0);
	ERROR_IF(in_t == uint16_t && (input_zp != 0 \|\| input_zp != 32768));
	ERROR_IF(out_t == uint16_t && (output_zp != 0 \|\| output_zp != 32768));
	ERROR_IF(scale32 && in_t == int48_t);
	ERROR_IF(!scale32 && double_round);
	int48_t value = tensor_read<in_t>(input, shape, index);
	value = value - input_zp;
	int c = (per_channel) ? index[dims-1] : 0;
	int32_t result = (scale32) ?
	apply_scale_32(value, multiplier[c], shift[c], double_round) :
	apply_scale_16(value, multiplier[c], shift[c]);
	result = (out_t)apply_clip<int32_t>(result + output_zp, minimum<out_t>, maximum<out_t>);
	tensor_write<out_t>(output, shape, index, result);
	}
	----

	Supported Data Types:

	\|===
	\|Profile\|Mode\|in_t\|out_t

	\|Any\|signed 8 to signed 8\|int8_t\|int8_t
	\|Any\|signed 8 to signed 16\|int8_t\|int16_t
	\|Any\|signed 8 to signed 32\|int8_t\|int32_t
	\|Any\|signed 8 to unsigned 8\|int8_t\|uint8_t
	\|Any\|signed 16 to signed 8\|int16_t\|int8_t
	\|Any\|signed 16 to signed 16\|int16_t\|int16_t
	\|Any\|signed 16 to signed 32\|int16_t\|int32_t
	\|Any\|signed 16 to unsigned 8\|int16_t\|uint8_t
	\|Any\|signed 16 to unsigned 16\|int16_t\|uint16_t
	\|Any\|signed 32 to signed 8\|int32_t\|int8_t
	\|Any\|signed 32 to signed 16\|int32_t\|int16_t
	\|Any\|signed 32 to signed 32\|int32_t\|int32_t
	\|Any\|signed 48 to signed 8\|int48_t\|int8_t
	\|Any\|signed 48 to signed 16\|int48_t\|int16_t
	\|Any\|signed 48 to signed 32\|int48_t\|int32_t
	\|Any\|unsigned 8 to signed 8\|uint8_t\|int8_t
	\|Any\|unsigned 8 to signed 16\|uint8_t\|int16_t
	\|Any\|unsigned 16 to signed 16\|uint16_t\|int16_t
	\|===