blob: 173f2d16e2d4a3c639af9e9781963ba2364e9417 [file] [log] [blame]
Georgios Pinitase8291ac2020-02-26 09:58:13 +00001/*
Pablo Marquez Tellof73db972021-03-24 17:50:19 +00002 * Copyright (c) 2020-2021 Arm Limited.
Georgios Pinitase8291ac2020-02-26 09:58:13 +00003 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24#ifndef ARM_COMPUTE_BFLOAT16_H
25#define ARM_COMPUTE_BFLOAT16_H
26
27#include <cstdint>
Pablo Marquez Tellof73db972021-03-24 17:50:19 +000028#include <cstring>
Georgios Pinitase8291ac2020-02-26 09:58:13 +000029
30namespace arm_compute
31{
32namespace
33{
34/** Convert float to bfloat16
35 *
36 * @param[in] v Floating-point value to convert to bfloat
37 *
38 * @return Converted value
39 */
40inline uint16_t float_to_bf16(const float v)
41{
42 const uint32_t *fromptr = reinterpret_cast<const uint32_t *>(&v);
43#if defined(__ARM_FEATURE_BF16_VECTOR_ARITHMETIC) || defined(ARM_COMPUTE_FORCE_BF16)
44 uint16_t res;
45
46 __asm __volatile(
47 "ldr s0, [%[fromptr]]\n"
48 ".inst 0x1e634000\n" // BFCVT h0, s0
49 "str h0, [%[toptr]]\n"
50 :
51 : [fromptr] "r"(fromptr), [toptr] "r"(&res)
52 : "v0", "memory");
53#else /* defined(__ARM_FEATURE_BF16_VECTOR_ARITHMETIC) || defined(ARM_COMPUTE_FORCE_BF16) */
54 uint16_t res = (*fromptr >> 16);
55 const uint16_t error = (*fromptr & 0x0000ffff);
56 uint16_t bf_l = res & 0x0001;
57 if((error > 0x8000) || ((error == 0x8000) && (bf_l != 0)))
58 {
59 res += 1;
60 }
61#endif /* defined(__ARM_FEATURE_BF16_VECTOR_ARITHMETIC) || defined(ARM_COMPUTE_FORCE_BF16) */
62 return res;
63}
64
65/** Convert bfloat16 to float
66 *
67 * @param[in] v Bfloat16 value to convert to float
68 *
69 * @return Converted value
70 */
71inline float bf16_to_float(const uint16_t &v)
72{
73 const uint32_t lv = (v << 16);
Pablo Marquez Tellof73db972021-03-24 17:50:19 +000074 float fp;
75 memcpy(&fp, &lv, sizeof(lv));
76 return fp;
Georgios Pinitase8291ac2020-02-26 09:58:13 +000077}
78}
79
80/** Brain floating point representation class */
Georgios Pinitasc7b183a2020-03-06 18:12:09 +000081class bfloat16 final
Georgios Pinitase8291ac2020-02-26 09:58:13 +000082{
83public:
84 /** Default Constructor */
85 bfloat16()
86 : value(0)
87 {
88 }
89 /** Constructor
90 *
91 * @param[in] v Floating-point value
92 */
93 explicit bfloat16(float v)
94 : value(float_to_bf16(v))
95 {
96 }
97 /** Assignment operator
98 *
99 * @param[in] v Floating point value to assign
100 *
101 * @return The updated object
102 */
103 bfloat16 &operator=(float v)
104 {
105 value = float_to_bf16(v);
106 return *this;
107 }
108 /** Floating point conversion operator
109 *
110 * @return Floating point representation of the value
111 */
112 operator float() const
113 {
114 return bf16_to_float(value);
115 }
116 /** Lowest representative value
117 *
118 * @return Returns the lowest finite value representable by bfloat16
119 */
120 static bfloat16 lowest()
121 {
122 bfloat16 val;
123 val.value = 0xFF7F;
124 return val;
125 }
126 /** Largest representative value
127 *
128 * @return Returns the largest finite value representable by bfloat16
129 */
130 static bfloat16 max()
131 {
132 bfloat16 val;
133 val.value = 0x7F7F;
134 return val;
135 }
136
137private:
138 uint16_t value;
139};
140} // namespace arm_compute
Pablo Marquez Tellof73db972021-03-24 17:50:19 +0000141#endif /* ARM_COMPUTE_BFLOAT16_H */