blob: 5da4da559fd5507a5db20bfaec8b9430bd95aac5 [file] [log] [blame]
Narumol Prangnawaratc3bf6ef2020-02-28 12:45:21 +00001//
2// Copyright © 2020 Arm Ltd. All rights reserved.
3// SPDX-License-Identifier: MIT
4//
5
6#pragma once
7
Narumol Prangnawarate6644842020-03-05 17:27:45 +00008#include <ostream>
Narumol Prangnawarat88325222020-03-06 14:45:57 +00009#include <cmath>
Narumol Prangnawaratc3bf6ef2020-02-28 12:45:21 +000010#include <stdint.h>
11
12namespace armnn
13{
Narumol Prangnawarate6644842020-03-05 17:27:45 +000014class BFloat16
15{
16public:
17 BFloat16()
Narumol Prangnawarate9e68182020-03-11 11:34:55 +000018 : m_Value(0)
Narumol Prangnawarate6644842020-03-05 17:27:45 +000019 {}
20
21 explicit BFloat16(uint16_t v)
Narumol Prangnawarate9e68182020-03-11 11:34:55 +000022 : m_Value(v)
Narumol Prangnawarate6644842020-03-05 17:27:45 +000023 {}
24
25 explicit BFloat16(float v)
26 {
Narumol Prangnawarate9e68182020-03-11 11:34:55 +000027 m_Value = Float32ToBFloat16(v).Val();
Narumol Prangnawarate6644842020-03-05 17:27:45 +000028 }
29
30 BFloat16& operator=(float v)
31 {
Narumol Prangnawarate9e68182020-03-11 11:34:55 +000032 m_Value = Float32ToBFloat16(v).Val();
Narumol Prangnawarate6644842020-03-05 17:27:45 +000033 return *this;
34 }
35
36 bool operator==(const BFloat16& r) const
37 {
Narumol Prangnawarate9e68182020-03-11 11:34:55 +000038 return m_Value == r.Val();
Narumol Prangnawarate6644842020-03-05 17:27:45 +000039 }
40
41 bool operator==(const float& r) const
42 {
Narumol Prangnawarate9e68182020-03-11 11:34:55 +000043 return ToFloat32() == r;
Narumol Prangnawarate6644842020-03-05 17:27:45 +000044 }
45
Narumol Prangnawarate9e68182020-03-11 11:34:55 +000046 static BFloat16 Float32ToBFloat16(const float v)
Narumol Prangnawarate6644842020-03-05 17:27:45 +000047 {
48 if (std::isnan(v))
49 {
Narumol Prangnawarate9e68182020-03-11 11:34:55 +000050 return Nan();
Narumol Prangnawarate6644842020-03-05 17:27:45 +000051 }
52 else
53 {
54 // Round value to the nearest even
55 // Float32
56 // S EEEEEEEE MMMMMMLRMMMMMMMMMMMMMMM
57 // BFloat16
58 // S EEEEEEEE MMMMMML
59 // LSB (L): Least significat bit of BFloat16 (last bit of the Mantissa of BFloat16)
60 // R: Rounding bit
61 // LSB = 0, R = 0 -> round down
62 // LSB = 1, R = 0 -> round down
63 // LSB = 0, R = 1, all the rest = 0 -> round down
64 // LSB = 1, R = 1 -> round up
65 // LSB = 0, R = 1 -> round up
66 const uint32_t* u32 = reinterpret_cast<const uint32_t*>(&v);
67 uint16_t u16 = static_cast<uint16_t>(*u32 >> 16u);
68 // Mark the LSB
69 const uint16_t lsb = u16 & 0x0001;
70 // Mark the error to be truncate (the rest of 16 bits of FP32)
71 const uint16_t error = static_cast<const uint16_t>((*u32 & 0x0000FFFF));
72 if ((error > 0x8000 || (error == 0x8000 && lsb == 1)))
73 {
74 u16++;
75 }
76 BFloat16 b(u16);
77 return b;
78 }
79 }
80
Narumol Prangnawarate9e68182020-03-11 11:34:55 +000081 float ToFloat32() const
Narumol Prangnawarate6644842020-03-05 17:27:45 +000082 {
Narumol Prangnawarate9e68182020-03-11 11:34:55 +000083 const uint32_t u32 = static_cast<const uint32_t>(m_Value << 16u);
Narumol Prangnawarate6644842020-03-05 17:27:45 +000084 const float* f32 = reinterpret_cast<const float*>(&u32);
85 return *f32;
86 }
87
Narumol Prangnawarate9e68182020-03-11 11:34:55 +000088 uint16_t Val() const
Narumol Prangnawarate6644842020-03-05 17:27:45 +000089 {
Narumol Prangnawarate9e68182020-03-11 11:34:55 +000090 return m_Value;
Narumol Prangnawarate6644842020-03-05 17:27:45 +000091 }
92
Narumol Prangnawarate9e68182020-03-11 11:34:55 +000093 static BFloat16 Max()
Narumol Prangnawarate6644842020-03-05 17:27:45 +000094 {
95 uint16_t max = 0x7F7F;
96 return BFloat16(max);
97 }
98
Narumol Prangnawarate9e68182020-03-11 11:34:55 +000099 static BFloat16 Nan()
Narumol Prangnawarate6644842020-03-05 17:27:45 +0000100 {
101 uint16_t nan = 0x7FC0;
102 return BFloat16(nan);
103 }
104
Narumol Prangnawarate9e68182020-03-11 11:34:55 +0000105 static BFloat16 Inf()
Narumol Prangnawarate6644842020-03-05 17:27:45 +0000106 {
107 uint16_t infVal = 0x7F80;
108 return BFloat16(infVal);
109 }
110
111private:
Narumol Prangnawarate9e68182020-03-11 11:34:55 +0000112 uint16_t m_Value;
Narumol Prangnawarate6644842020-03-05 17:27:45 +0000113};
114
115inline std::ostream& operator<<(std::ostream& os, const BFloat16& b)
116{
Narumol Prangnawarate9e68182020-03-11 11:34:55 +0000117 os << b.ToFloat32() << "(0x" << std::hex << b.Val() << ")";
Narumol Prangnawarate6644842020-03-05 17:27:45 +0000118 return os;
119}
120
Narumol Prangnawaratc3bf6ef2020-02-28 12:45:21 +0000121} //namespace armnn