IVGCVSW-4511 Add BFloat16 to RefLayerSupport and unit tests
Signed-off-by: Narumol Prangnawarat <narumol.prangnawarat@arm.com>
Change-Id: Ifaae4d5aac468ba927b2c6a4bf31b8c8522aeb2e
diff --git a/src/armnnUtils/BFloat16.hpp b/src/armnnUtils/BFloat16.hpp
index 5da4da5..16ceb52 100644
--- a/src/armnnUtils/BFloat16.hpp
+++ b/src/armnnUtils/BFloat16.hpp
@@ -27,6 +27,17 @@
m_Value = Float32ToBFloat16(v).Val();
}
+ operator float() const
+ {
+ return ToFloat32();
+ }
+
+ BFloat16& operator=(const BFloat16& other)
+ {
+ m_Value = other.Val();
+ return *this;
+ }
+
BFloat16& operator=(float v)
{
m_Value = Float32ToBFloat16(v).Val();
@@ -38,11 +49,6 @@
return m_Value == r.Val();
}
- bool operator==(const float& r) const
- {
- return ToFloat32() == r;
- }
-
static BFloat16 Float32ToBFloat16(const float v)
{
if (std::isnan(v))
diff --git a/src/armnnUtils/QuantizeHelper.hpp b/src/armnnUtils/QuantizeHelper.hpp
index 6fd13fd..596ec98 100644
--- a/src/armnnUtils/QuantizeHelper.hpp
+++ b/src/armnnUtils/QuantizeHelper.hpp
@@ -8,6 +8,7 @@
#include <armnn/utility/IgnoreUnused.hpp>
#include <armnn/TypesUtils.hpp>
+#include <BFloat16.hpp>
#include <Half.hpp>
#include <initializer_list>
@@ -65,6 +66,22 @@
}
};
+template<>
+struct SelectiveQuantizer<armnn::BFloat16, false>
+{
+ static armnn::BFloat16 Quantize(float value, float scale, int32_t offset)
+ {
+ armnn::IgnoreUnused(scale, offset);
+ return armnn::BFloat16(value);
+ }
+
+ static float Dequantize(armnn::BFloat16 value, float scale, int32_t offset)
+ {
+ armnn::IgnoreUnused(scale, offset);
+ return value;
+ }
+};
+
template<typename T>
T SelectiveQuantize(float value, float scale, int32_t offset)
{