IVGCVSW-4517 Implement BFloat16 Encoder and Decoder
* Add ConvertFloat32ToBFloat16
* Add ConvertBFloat16ToFloat32
* Add BFloat16Encoder
* Add BFloat16Decoder
* Unit tests
Signed-off-by: Narumol Prangnawarat <narumol.prangnawarat@arm.com>
Change-Id: I198888384c923aba28cfbed09a02edc6f8194b3e
diff --git a/src/armnnUtils/BFloat16.hpp b/src/armnnUtils/BFloat16.hpp
index bb56b7d..965fc31 100644
--- a/src/armnnUtils/BFloat16.hpp
+++ b/src/armnnUtils/BFloat16.hpp
@@ -6,7 +6,7 @@
#pragma once
#include <ostream>
-#include <math.h>
+#include <cmath>
#include <stdint.h>
namespace armnn
diff --git a/src/armnnUtils/FloatingPointConverter.cpp b/src/armnnUtils/FloatingPointConverter.cpp
index 3bdde11..e9b338a 100644
--- a/src/armnnUtils/FloatingPointConverter.cpp
+++ b/src/armnnUtils/FloatingPointConverter.cpp
@@ -5,6 +5,7 @@
#include <armnnUtils/FloatingPointConverter.hpp>
+#include "BFloat16.hpp"
#include "Half.hpp"
#include <boost/assert.hpp>
@@ -42,4 +43,34 @@
}
}
+void FloatingPointConverter::ConvertFloat32ToBFloat16(const float* srcFloat32Buffer,
+ size_t numElements,
+ void* dstBFloat16Buffer)
+{
+ BOOST_ASSERT(srcFloat32Buffer != nullptr);
+ BOOST_ASSERT(dstBFloat16Buffer != nullptr);
+
+ armnn::BFloat16* bf16 = reinterpret_cast<armnn::BFloat16*>(dstBFloat16Buffer);
+
+ for (size_t i = 0; i < numElements; i++)
+ {
+ bf16[i] = armnn::BFloat16(srcFloat32Buffer[i]);
+ }
+}
+
+void FloatingPointConverter::ConvertBFloat16ToFloat32(const void* srcBFloat16Buffer,
+ size_t numElements,
+ float* dstFloat32Buffer)
+{
+ BOOST_ASSERT(srcBFloat16Buffer != nullptr);
+ BOOST_ASSERT(dstFloat32Buffer != nullptr);
+
+ const armnn::BFloat16* bf16 = reinterpret_cast<const armnn::BFloat16*>(srcBFloat16Buffer);
+
+ for (size_t i = 0; i < numElements; i++)
+ {
+ dstFloat32Buffer[i] = bf16[i].toFloat32();
+ }
+}
+
} //namespace armnnUtils