IVGCVSW-8289 Add Numpy Support to execute network

  * Creates a numpy parser helper header
  * Adds ability to parse numpy file in PopulateTensorWithData
  * Reuse ExNet compare-output flag to compare against npy golden files.
  * Reuse ExNet write-outputs-to-file flag to write output to npy file.

Signed-off-by: Cathal Corbett <cathal.corbett@arm.com>
Signed-off-by: Ryan OShea <ryan.oshea3@arm.com>
Change-Id: I52df822f72f6ce9d191ac4bd26ca5718ccaa7a4a
diff --git a/delegate/classic/src/ClassicDelegateUtils.hpp b/delegate/classic/src/ClassicDelegateUtils.hpp
index f5ff32a..78eb662 100644
--- a/delegate/classic/src/ClassicDelegateUtils.hpp
+++ b/delegate/classic/src/ClassicDelegateUtils.hpp
@@ -1,5 +1,5 @@
 //
-// Copyright © 2023 Arm Ltd and Contributors. All rights reserved.
+// Copyright © 2023-2024 Arm Ltd and Contributors. All rights reserved.
 // SPDX-License-Identifier: MIT
 //
 
@@ -359,50 +359,6 @@
     return reshapeLayer;
 }
 
-armnn::DataType GetDataType(const TfLiteTensor& tfLiteTensor)
-{
-    switch (tfLiteTensor.type)
-    {
-        case kTfLiteBool:
-            return armnn::DataType::Boolean;
-        case kTfLiteFloat32:
-            return armnn::DataType::Float32;
-        case kTfLiteFloat16:
-            return armnn::DataType::Float16;
-        case kTfLiteUInt8:
-            return armnn::DataType::QAsymmU8;
-        case kTfLiteInt8:
-        {
-            auto quantizationInfo = tfLiteTensor.quantization;
-            if (quantizationInfo.type == kTfLiteAffineQuantization)
-            {
-                auto* quantization =
-                    reinterpret_cast<TfLiteAffineQuantization*>(tfLiteTensor.quantization.params);
-                if (quantization->zero_point != nullptr && quantization->zero_point->size == 1)
-                {
-                    return armnn::DataType::QAsymmS8;
-                }
-                else
-                {
-                    return armnn::DataType::QSymmS8;
-                }
-            }
-            else
-            {
-                return armnn::DataType::QAsymmS8;
-            }
-        }
-        case kTfLiteInt16:
-            return armnn::DataType::QSymmS16;
-        case kTfLiteInt32:
-            return armnn::DataType::Signed32;
-        case kTfLiteInt64:
-            return armnn::DataType::Signed64;
-        default:
-            throw armnn::Exception(&"TfLiteArmnnDelegate: Unsupported data type: " [ tfLiteTensor.type]);
-    }
-}
-
 armnn::TensorInfo GetTensorInfoForTfLiteTensor(const TfLiteTensor& tfLiteTensor, bool isOutput = false)
 {
     armnn::DataType type = GetDataType(tfLiteTensor);
diff --git a/delegate/common/src/DelegateUtils.hpp b/delegate/common/src/DelegateUtils.hpp
index a2cdc83..96767ff 100644
--- a/delegate/common/src/DelegateUtils.hpp
+++ b/delegate/common/src/DelegateUtils.hpp
@@ -1,5 +1,5 @@
 //
-// Copyright © 2020-2023 Arm Ltd and Contributors. All rights reserved.
+// Copyright © 2020-2024 Arm Ltd and Contributors. All rights reserved.
 // SPDX-License-Identifier: MIT
 //
 
@@ -26,6 +26,50 @@
 namespace
 {
 
+armnn::DataType GetDataType(const TfLiteTensor& tfLiteTensor)
+{
+    switch (tfLiteTensor.type)
+    {
+        case kTfLiteBool:
+            return armnn::DataType::Boolean;
+        case kTfLiteFloat32:
+            return armnn::DataType::Float32;
+        case kTfLiteFloat16:
+            return armnn::DataType::Float16;
+        case kTfLiteUInt8:
+            return armnn::DataType::QAsymmU8;
+        case kTfLiteInt8:
+        {
+            auto quantizationInfo = tfLiteTensor.quantization;
+            if (quantizationInfo.type == kTfLiteAffineQuantization)
+            {
+                auto* quantization =
+                    reinterpret_cast<TfLiteAffineQuantization*>(tfLiteTensor.quantization.params);
+                if (quantization->zero_point != nullptr && quantization->zero_point->size == 1)
+                {
+                    return armnn::DataType::QAsymmS8;
+                }
+                else
+                {
+                    return armnn::DataType::QSymmS8;
+                }
+            }
+            else
+            {
+                return armnn::DataType::QAsymmS8;
+            }
+        }
+        case kTfLiteInt16:
+            return armnn::DataType::QSymmS16;
+        case kTfLiteInt32:
+            return armnn::DataType::Signed32;
+        case kTfLiteInt64:
+            return armnn::DataType::Signed64;
+        default:
+            throw armnn::Exception(&"TfLiteArmnnDelegate: Unsupported data type: " [ tfLiteTensor.type]);
+    }
+}
+
 uint32_t NonNegative(int32_t value, int nodeIndex)
 {
     if (value < 0)
diff --git a/include/armnn/Numpy.hpp b/include/armnn/Numpy.hpp
new file mode 100644
index 0000000..7b0962d
--- /dev/null
+++ b/include/armnn/Numpy.hpp
@@ -0,0 +1,406 @@
+//
+// Copyright © 2024 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+// Copyright © 2021 Leon Merten Lohse
+// SPDX-License-Identifier: MIT
+//
+
+#ifndef NUMPY_HPP
+#define NUMPY_HPP
+
+#include <fmt/format.h>
+#include "Types.hpp"
+#include "Tensor.hpp"
+
+#include <fstream>
+#include <iostream>
+
+namespace armnnNumpy
+{
+
+    /// @struct HeaderInfo: contains information from the numpy file to be parsed.
+    /// @var m_MajorVersion: Single byte containing the major version of numpy implementation
+    /// @var m_MinorVersion: Single byte containing the minor version of numpy implementation
+    /// @var m_MagicStringLength: unsigned 8bit int containing the length of the magic string.
+    /// @var m_MagicString: Char array containing the magic string at the beginning of every numpy file.
+    /// @var m_HeaderLen: 2 or 4byte unsigned int containing the length of the header, depending on version.
+    struct HeaderInfo
+    {
+        char m_MajorVersion;
+        char m_MinorVersion;
+        const uint8_t m_MagicStringLength = 6;
+        const char m_MagicString[7] = "\x93NUMPY";
+        char m_HeaderLenBytes[4];
+        uint32_t m_HeaderLen;
+    };
+
+    /// @struct Header: contains information from the numpy file to be parsed.
+    /// @var m_HeaderString: String containing header.
+    /// @var m_DescrString: String containing description.
+    /// @var m_FortranOrder: Boolean declaring if array is in Fortran Order.
+    /// @var m_Shape: Shape of the data.
+    struct Header
+    {
+        std::string m_HeaderString;
+        std::string m_DescrString;
+        bool m_FortranOrder;
+        std::vector<uint32_t> m_Shape;
+    };
+
+    inline void CreateHeaderInfo(std::ifstream &ifStream, HeaderInfo &headerInfo)
+    {
+        // A Numpy header consists of:
+        // a magic string "x93NUMPY"
+        // 1 byte for the major version
+        // 1 byte for the minor version
+        // 2 or 4 bytes for the header length
+        // More info: https://numpy.org/devdocs/reference/generated/numpy.lib.format.html
+        char buffer[headerInfo.m_MagicStringLength + 2lu];
+        ifStream.read(buffer, headerInfo.m_MagicStringLength + 2);
+
+        if (!ifStream)
+        {
+            throw armnn::Exception(
+                    fmt::format("Failed to create numpy header info at {}",
+                                CHECK_LOCATION().AsString()));
+        }
+        // Verify that the numpy is in the valid format by checking for the magic string
+        int compare_result = ::memcmp(buffer, headerInfo.m_MagicString, headerInfo.m_MagicStringLength);
+        if (compare_result != 0) {
+            throw armnn::Exception(fmt::format("Numpy does not contain magic string. Can not parse invalid numpy {}",
+                                               CHECK_LOCATION().AsString()));
+        }
+
+        headerInfo.m_MajorVersion = buffer[headerInfo.m_MagicStringLength];
+        headerInfo.m_MinorVersion = buffer[headerInfo.m_MagicStringLength + 1];
+        if(headerInfo.m_MajorVersion == 1 && headerInfo.m_MinorVersion == 0)
+        {
+            ifStream.read(headerInfo.m_HeaderLenBytes, 2);
+            // Header len is written in little endian, so we do a quick test
+            // to check the machines endianness
+            int i = 1;
+            if (*(reinterpret_cast<char *>(&i)) == 1)
+            {
+                headerInfo.m_HeaderLen = static_cast<unsigned>(headerInfo.m_HeaderLenBytes[0]) |
+                                         (static_cast<unsigned>(headerInfo.m_HeaderLenBytes[1] << 8));
+            }
+            else
+            {
+                headerInfo.m_HeaderLen = static_cast<unsigned>(headerInfo.m_HeaderLenBytes[1]) |
+                                         (static_cast<unsigned>(headerInfo.m_HeaderLenBytes[0] << 8));
+            }
+        }
+        else if (headerInfo.m_MajorVersion == 2 && headerInfo.m_MinorVersion == 0)
+        {
+            ifStream.read(headerInfo.m_HeaderLenBytes, 4);
+            // Header len is written in little endian, so we do a quick test
+            // to check the machines endianness
+            int i = 1;
+            if (*(reinterpret_cast<char *>(&i)) == 1)
+            {
+                headerInfo.m_HeaderLen = static_cast<unsigned>(headerInfo.m_HeaderLenBytes[0] << 0) |
+                    static_cast<unsigned>(headerInfo.m_HeaderLenBytes[1] << 8) |
+                    static_cast<unsigned>(headerInfo.m_HeaderLenBytes[2] << 16) |
+                    static_cast<unsigned>(headerInfo.m_HeaderLenBytes[3] << 24);
+            }
+            else
+            {
+                headerInfo.m_HeaderLen = static_cast<unsigned>(headerInfo.m_HeaderLenBytes[3] << 0) |
+                    static_cast<unsigned>(headerInfo.m_HeaderLenBytes[2] << 8) |
+                    static_cast<unsigned>(headerInfo.m_HeaderLenBytes[1] << 16) |
+                    static_cast<unsigned>(headerInfo.m_HeaderLenBytes[0] << 24);
+            }
+        }
+        else
+        {
+            throw armnn::ParseException(fmt::format("Unable to parser Numpy version {}.{} {}",
+                                                    headerInfo.m_MajorVersion,
+                                                    headerInfo.m_MinorVersion,
+                                                    CHECK_LOCATION().AsString()));
+        }
+    }
+
+    /// Primarily used to isolate values from header dictionary
+    inline std::string getSubstring(std::string fullString,
+                                    std::string substringStart,
+                                    std::string substringEnd,
+                                    bool removeStartChar = 0,
+                                    bool includeEndChar = 0)
+    {
+        size_t startPos = fullString.find(substringStart);
+        size_t endPos = fullString.find(substringEnd, startPos);
+        if (startPos == std::string::npos || endPos == std::string::npos)
+        {
+            throw armnn::ParseException(fmt::format("Unable to find {} in numpy file.",
+                                                    CHECK_LOCATION().AsString()));
+        }
+
+        // std::string.substr takes the starting position and the length of the substring.
+        // To calculate the length we subtract the start position from the end position.
+        // We also add a boolean on whether or not we want to include the character used to find endPos
+        startPos+= removeStartChar;
+        endPos += includeEndChar;
+        return fullString.substr(startPos, endPos - startPos);
+    }
+
+    inline void parseShape(Header& header, std::string& shapeString)
+    {
+        std::istringstream shapeStringStream(shapeString);
+        std::string token;
+        while(getline(shapeStringStream, token, ','))
+        {
+            header.m_Shape.push_back(static_cast<uint32_t >(std::stoi(token)));
+        }
+    }
+
+    inline void CreateHeader(std::ifstream& ifStream, HeaderInfo& headerInfo, Header& header)
+    {
+        char stringBuffer[headerInfo.m_HeaderLen];
+        ifStream.read(stringBuffer, headerInfo.m_HeaderLen);
+
+        header.m_HeaderString = std::string(stringBuffer, headerInfo.m_HeaderLen);
+        // Remove new line character at the end of the string
+        if(header.m_HeaderString.back() == '\n')
+        {
+            header.m_HeaderString.pop_back();
+        }
+
+        // Remove whitespace from the string.
+        // std::remove shuffles the string by place all whitespace at the end and
+        //     returning the start location of the shuffled whitespace.
+        // std::string.erase then deletes the whitespace by deleting characters
+        //     between the iterator returned from std::remove and the end of the std::string
+        std::string::iterator whitespaceSubstringStart = std::remove(header.m_HeaderString.begin(),
+                                                                     header.m_HeaderString.end(), ' ');
+        header.m_HeaderString.erase(whitespaceSubstringStart, header.m_HeaderString.end());
+
+        // The order of the dictionary should be alphabetical,
+        // however this is not guarenteed so we have to search for the string.
+        // Because of this we do some weird parsing using std::string.find and some magic patterns
+        //
+        // For the description value, we include the end character from the first substring
+        // to help us find the value in the second substring. This should return with a "," at the end.
+        // Since we previously left the "," at the end of the substring,
+        // we can use it to find the end of the description value and then remove it after.
+        std::string descrString = getSubstring(header.m_HeaderString, "'descr", ",", 0, 1);
+        header.m_DescrString = getSubstring(descrString, ":", ",", 1);
+
+        // Fortran order is a python boolean literal, we simply look for a comma to delimit this pair.
+        // Since this is a boolean, both true and false end in an "e" without appearing in between.
+        // It is not great, but it is the easiest way to find the end.
+        // We have to ensure we include this e in the substring.
+        // Since this is a boolean we can check if the string contains
+        // either true or false and set the variable as required
+        std::string fortranOrderString = getSubstring(header.m_HeaderString, "'fortran_order", ",");
+        fortranOrderString = getSubstring(fortranOrderString, ":", "e", 1, 1);
+        header.m_FortranOrder = fortranOrderString.find("True") != std::string::npos ? true : false;
+
+        // The shape is a python tuple so we search for the closing bracket of the tuple.
+        // We include the end character to help us isolate the value substring.
+        // We can extract the inside of the tuple by searching for opening and closing brackets.
+        // We can then remove the brackets isolating the inside of the tuple.
+        // We then need to parse the string into a vector of unsigned integers
+        std::string shapeString = getSubstring(header.m_HeaderString, "'shape", ")", 0, 1);
+        shapeString = getSubstring(shapeString, "(", ")", 1, 0);
+        parseShape(header, shapeString);
+    }
+
+    template<typename T>
+    inline void ReadData(std::ifstream& ifStream, T* tensor, const unsigned int& numElements)
+    {
+        ifStream.read(reinterpret_cast<char *>(tensor), sizeof(T) * numElements);
+    }
+
+
+    inline armnn::DataType getArmNNDataType(std::string& descr)
+    {
+        if(descr.find("f4") != std::string::npos || descr.find("f8") != std::string::npos)
+        {
+            return armnn::DataType::Float32;
+        }
+        else if (descr.find("f2") != std::string::npos)
+        {
+            return armnn::DataType::Float16;
+        }
+        else if (descr.find("i8") != std::string::npos)
+        {
+            return armnn::DataType::Signed64;
+        }
+        else if (descr.find("i4") != std::string::npos)
+        {
+            return armnn::DataType::Signed32;
+        }
+        else if (descr.find("i2") != std::string::npos)
+        {
+            return armnn::DataType::QSymmS16;
+        }
+        else if (descr.find("i1") != std::string::npos)
+        {
+            return armnn::DataType::QSymmS8;
+        }
+        else if (descr.find("u1") != std::string::npos)
+        {
+            return armnn::DataType::QAsymmU8;
+        }
+        else
+        {
+            throw armnn::Exception(fmt::format("Numpy data type:{} not supported. {}",
+                                               descr, CHECK_LOCATION().AsString()));
+        }
+    }
+
+    inline std::string getNumpyDescr(armnn::DataType dType)
+    {
+        switch(dType)
+        {
+            case armnn::DataType::Float32:
+                return "f" + std::to_string(sizeof(float)); // size of float can be 4 or 8
+            case armnn::DataType::Float16:
+                return "f2";
+            case armnn::DataType::Signed64:
+                return "i8";
+            case armnn::DataType::Signed32:
+                return "i4";
+            case armnn::DataType::QSymmS16:
+                return "i2";
+            case armnn::DataType::QSymmS8:
+            case armnn::DataType::QAsymmS8:
+                return "i1";
+            case armnn::DataType::QAsymmU8:
+                return "u1";
+            default:
+                throw armnn::Exception(fmt::format("ArmNN to Numpy data type:{} not supported. {}",
+                                       dType, CHECK_LOCATION().AsString()));
+        }
+    }
+
+    template <typename T>
+    inline bool compareCTypes(std::string& descr)
+    {
+        if(descr.find("f4") != std::string::npos || descr.find("f8") != std::string::npos)
+        {
+            return std::is_same<T, float>::value;
+        }
+        else if (descr.find("i8") != std::string::npos)
+        {
+            return std::is_same<T, int64_t>::value;
+        }
+        else if (descr.find("i4") != std::string::npos)
+        {
+            return std::is_same<T, int32_t>::value;
+        }
+        else if (descr.find("i2") != std::string::npos)
+        {
+            return std::is_same<T, int16_t>::value;
+        }
+        else if (descr.find("i1") != std::string::npos)
+        {
+            return std::is_same<T, int8_t>::value;
+        }
+        else if (descr.find("u1") != std::string::npos)
+        {
+            return std::is_same<T, uint8_t>::value;
+        }
+        else
+        {
+            throw armnn::Exception(fmt::format("Numpy data type:{} not supported. {}",
+                                               descr, CHECK_LOCATION().AsString()));
+        }
+    }
+
+    inline unsigned int getNumElements(Header& header)
+    {
+        unsigned int numEls = 1;
+        for (auto dim: header.m_Shape)
+        {
+            numEls *= dim;
+        }
+
+        return numEls;
+    }
+
+    // Material in WriteToNumpyFile() has been reused from https://github.com/llohse/libnpy/blob/master/include/npy.hpp
+    // Please see write_header() in the above file for more details.
+    template<typename T>
+    inline void WriteToNumpyFile(const std::string& outputTensorFileName,
+                                 const T* const array,
+                                 const unsigned int numElements,
+                                 armnn::DataType dataType,
+                                 const armnn::TensorShape& shape)
+    {
+        std::ofstream out(outputTensorFileName, std::ofstream::binary);
+
+        // write header
+        {
+            // Setup string of tensor shape in format (x0, x1, x2, ..)
+            std::string shapeStr = "(";
+            for (uint32_t i = 0; i < shape.GetNumDimensions()-1; i++)
+            {
+                shapeStr = shapeStr + std::to_string(shape[i]) + ", ";
+            }
+            shapeStr = shapeStr + std::to_string(shape[shape.GetNumDimensions()-1]) + ")";
+
+            int i = 1;
+            std::string endianChar = (*(reinterpret_cast<char *>(&i))) ? "<" : ">";
+            std::string dataTypeStr = getNumpyDescr(dataType);
+            std::string fortranOrder = "False";
+            std::string headerStr = "{'descr': '" + endianChar + dataTypeStr +
+                                    "', 'fortran_order': " + fortranOrder +
+                                    ", 'shape': " + shapeStr + ", }";
+
+            armnnNumpy::HeaderInfo headerInfo;
+
+            // Header is composed of:
+            //     - 6 byte magic string
+            //     - 2 byte major and minor version
+            //     - 2 byte (v1.0) / 4 byte (v2.0) little-endian unsigned int
+            //     - headerStr.length() bytes
+            //     - 1 byte for newline termination (\n)
+            size_t length = headerInfo.m_MagicStringLength + 2 + 2 + headerStr.length() + 1;
+            uint8_t major_version = 1;
+
+            // for numpy major version 2, add extra 2 bytes for little-endian int (total 4 bytes)
+            if (length >= 255 * 255)
+            {
+                length += 2;
+                major_version = 2;
+            }
+
+            // Pad with spaces so header length is modulo 16 bytes.
+            size_t padding_length = 16 - length % 16;
+            std::string padding(padding_length, ' ');
+
+            // write magic string
+            out.write(headerInfo.m_MagicString, headerInfo.m_MagicStringLength);
+            out.put(major_version);
+            out.put(0); // minor version
+
+            // write header length
+            if (major_version == 1)
+            {
+                auto header_len = static_cast<uint16_t>(headerStr.length() + padding.length() + 1);
+
+                std::array<uint8_t, 2> header_len_16{static_cast<uint8_t>((header_len >> 0) & 0xff),
+                                                     static_cast<uint8_t>((header_len >> 8) & 0xff)};
+                out.write(reinterpret_cast<char *>(header_len_16.data()), 2);
+            }
+            else
+            {
+                auto header_len = static_cast<uint32_t>(headerStr.length() + padding.length() + 1);
+
+                std::array<uint8_t, 4> header_len_32{
+                    static_cast<uint8_t>((header_len >> 0) & 0xff), static_cast<uint8_t>((header_len >> 8) & 0xff),
+                    static_cast<uint8_t>((header_len >> 16) & 0xff), static_cast<uint8_t>((header_len >> 24) & 0xff)};
+                out.write(reinterpret_cast<char *>(header_len_32.data()), 4);
+            }
+
+            out << headerStr << padding << '\n';
+        }
+
+        // write tensor data to file
+        out.write(reinterpret_cast<const char *>(array), sizeof(T) * numElements);
+    }
+}
+
+#endif // NUMPY_HPP
\ No newline at end of file
diff --git a/tests/ExecuteNetwork/FileComparisonExecutor.cpp b/tests/ExecuteNetwork/FileComparisonExecutor.cpp
index 844004b..26d535a 100644
--- a/tests/ExecuteNetwork/FileComparisonExecutor.cpp
+++ b/tests/ExecuteNetwork/FileComparisonExecutor.cpp
@@ -1,14 +1,18 @@
 //
-// Copyright © 2023 Arm Ltd and Contributors. All rights reserved.
+// Copyright © 2023-2024 Arm Ltd and Contributors. All rights reserved.
 // SPDX-License-Identifier: MIT
 //
 
 #include "FileComparisonExecutor.hpp"
 #include <NetworkExecutionUtils/NetworkExecutionUtils.hpp>
+#include <armnn/Numpy.hpp>
 #include <algorithm>
 #include <ghc/filesystem.hpp>
 #include <iterator>
 
+// File limit size of 1Mb
+constexpr uint32_t MAX_FILE_SIZE = 1048576;
+
 using namespace armnn;
 
 /**
@@ -173,11 +177,10 @@
     {
         throw FileNotFoundException("The file \"" + fileName + "\" could not be found.");
     }
-    // The format we are reading in is based on NetworkExecutionUtils::WriteToFile. This could potentially
-    // be an enormous tensor. We'll limit what we can read in to 1Mb.
-    std::uintmax_t maxFileSize = 1048576;
-    std::uintmax_t fileSize    = ghc::filesystem::file_size(fileName);
-    if (fileSize > maxFileSize)
+    // The format we are reading in is based on NetworkExecutionUtils::WriteToFile.
+    // This could potentially be an enormous tensor.
+    std::uintmax_t fileSize = ghc::filesystem::file_size(fileName);
+    if (fileSize > MAX_FILE_SIZE)
     {
         throw InvalidArgumentException("The file \"" + fileName + "\" exceeds max size of 1 Mb.");
     }
@@ -245,6 +248,75 @@
     return result;
 }
 
+/**
+ * Open the given file and read the data out of it to construct a Tensor. This could throw FileNotFoundException
+ * or InvalidArgumentException
+ *
+ * @param fileName the file to be read.
+ * @return a populated tensor.
+ */
+Tensor ReadTensorFromNumpyFile(const std::string fileName)
+{
+    if (!ghc::filesystem::exists(fileName))
+    {
+        throw FileNotFoundException("The file \"" + fileName + "\" could not be found.");
+    }
+    // The format we are reading in is based on NetworkExecutionUtils::WriteToFile. This could potentially
+    // be an enormous tensor. We'll limit what we can read in to 1Mb.
+    std::uintmax_t fileSize = ghc::filesystem::file_size(fileName);
+    if (fileSize > MAX_FILE_SIZE)
+    {
+        throw InvalidArgumentException("The file \"" + fileName + "\" exceeds max size of 1 Mb.");
+    }
+
+    std::ifstream ifStream(fileName, std::ios::binary);
+    armnnNumpy::HeaderInfo headerInfo;
+    armnnNumpy::Header header;
+
+    CreateHeaderInfo(ifStream, headerInfo);
+    CreateHeader(ifStream, headerInfo, header);
+    uint32_t numElements = armnnNumpy::getNumElements(header);
+
+    switch (armnnNumpy::getArmNNDataType(header.m_DescrString))
+    {
+        case DataType::Float32: {
+            float* floats = new float[numElements];
+            armnnNumpy::ReadData<float>(ifStream, floats, numElements);
+            TensorInfo info({ numElements, 1, 1, 1 }, DataType::Float32);
+            return Tensor(info, floats);
+        }
+        case DataType::Signed32: {
+            int* ints = new int[numElements];
+            armnnNumpy::ReadData<int>(ifStream, ints, numElements);
+            TensorInfo info({ numElements, 1, 1, 1 }, DataType::Signed32);
+            return Tensor(info, ints);
+        }
+        case DataType::QSymmS8: {
+            int8_t* ints = new int8_t[numElements];
+            armnnNumpy::ReadData<int8_t>(ifStream, ints, numElements);
+            TensorInfo info({ numElements, 1, 1, 1 }, DataType::QSymmS8);
+            return Tensor(info, ints);
+        }
+        case DataType::QAsymmS8: {
+            int8_t* ints = new int8_t[numElements];
+            armnnNumpy::ReadData<int8_t>(ifStream, ints, numElements);
+            TensorInfo info({ numElements, 1, 1, 1 }, DataType::QAsymmS8);
+            return Tensor(info, ints);
+        }
+        case DataType::QAsymmU8: {
+            uint8_t* ints = new uint8_t[numElements];
+            armnnNumpy::ReadData<uint8_t>(ifStream, ints, numElements);
+            TensorInfo info({ numElements, 1, 1, 1 }, DataType::QAsymmU8);
+            return Tensor(info, ints);
+        }
+        default:
+            throw InvalidArgumentException("The tensor data could not be read from \"" + fileName + "\"");
+    }
+
+    Tensor result;
+    return result;
+}
+
 FileComparisonExecutor::FileComparisonExecutor(const ExecuteNetworkParams& params)
     : m_Params(params)
 {}
@@ -284,7 +356,16 @@
     std::vector<const void*> results;
     for (auto file : fileNames)
     {
-        Tensor t = ReadTensorFromFile(file);
+        Tensor t;
+        if (file.find(".npy") == std::string::npos)
+        {
+            t = ReadTensorFromFile(file);
+        }
+        else
+        {
+            t = ReadTensorFromNumpyFile(file);
+        }
+
         outputs.push_back({ 0, Tensor(t.GetInfo(), t.GetMemoryArea()) });
         results.push_back(t.GetMemoryArea());
     }
diff --git a/tests/ExecuteNetwork/TfliteExecutor.cpp b/tests/ExecuteNetwork/TfliteExecutor.cpp
index 433d538..94ff573 100644
--- a/tests/ExecuteNetwork/TfliteExecutor.cpp
+++ b/tests/ExecuteNetwork/TfliteExecutor.cpp
@@ -9,7 +9,9 @@
 
 #include <tensorflow/lite/core/c/c_api.h>
 #include "TfliteExecutor.hpp"
-#include "tensorflow/lite/kernels/kernel_util.h"
+#include <tensorflow/lite/kernels/kernel_util.h>
+
+#include <../delegate/common/src/DelegateUtils.hpp>
 
 #include <chrono>
 #include <string>
@@ -256,29 +258,44 @@
                     }
                     else
                     {
-                        ARMNN_LOG(info) << "Writing output " << outputIndex << "' of iteration: " << x + 1
+                        ARMNN_LOG(info) << "Writing output " << outputIndex << " of iteration: " << x + 1
                                         << " to file: '" << m_Params.m_OutputTensorFiles[outputIndex] << "'";
                     }
                 }
+
                 long outputSize = 1;
                 for (unsigned int dim = 0; dim < static_cast<unsigned int>(outputDims->size); ++dim)
                 {
                     outputSize *= outputDims->data[dim];
                 }
 
+                bool isNumpyOutput = m_Params.m_OutputTensorFiles[outputIndex].find(".npy") != std::string::npos;
+                armnn::TensorShape shape(static_cast<unsigned int>(outputDims->size), outputDims->data);
+                armnn::DataType dataType(GetDataType(*m_TfLiteInterpreter->tensor(tfLiteDelegateOutputId)));
+
                 std::cout << m_TfLiteInterpreter->tensor(tfLiteDelegateOutputId)->name << ": ";
                 switch (m_TfLiteInterpreter->tensor(tfLiteDelegateOutputId)->type)
                 {
-
                     case kTfLiteFloat32:
                     {
                         auto tfLiteDelegateOutputData = m_TfLiteInterpreter->typed_tensor<float>(
                                 tfLiteDelegateOutputId);
                         results.push_back(tfLiteDelegateOutputData);
 
-                        for (int i = 0; i < outputSize; ++i)
+                        if (isNumpyOutput)
                         {
-                            fprintf(outputTensorFile, "%f ", tfLiteDelegateOutputData[i]);
+                            armnnNumpy::WriteToNumpyFile(m_Params.m_OutputTensorFiles[outputIndex],
+                                                         tfLiteDelegateOutputData,
+                                                         outputSize,
+                                                         dataType,
+                                                         shape);
+                        }
+                        else
+                        {
+                            for (int i = 0; i < outputSize; ++i)
+                            {
+                                fprintf(outputTensorFile, "%f ", tfLiteDelegateOutputData[i]);
+                            }
                         }
                         break;
                     }
@@ -287,10 +304,23 @@
                         auto tfLiteDelegateOutputData = m_TfLiteInterpreter->typed_tensor<int32_t>(
                                 tfLiteDelegateOutputId);
                         results.push_back(tfLiteDelegateOutputData);
-                        for (int i = 0; i < outputSize; ++i)
+
+                        if (isNumpyOutput)
                         {
-                            fprintf(outputTensorFile, "%d ", tfLiteDelegateOutputData[i]);
+                            armnnNumpy::WriteToNumpyFile(m_Params.m_OutputTensorFiles[outputIndex],
+                                                         tfLiteDelegateOutputData,
+                                                         outputSize,
+                                                         dataType,
+                                                         shape);
                         }
+                        else
+                        {
+                            for (int i = 0; i < outputSize; ++i)
+                            {
+                                fprintf(outputTensorFile, "%d ", tfLiteDelegateOutputData[i]);
+                            }
+                        }
+
                         break;
                     }
                     case kTfLiteUInt8:
@@ -298,10 +328,23 @@
                         auto tfLiteDelegateOutputData = m_TfLiteInterpreter->typed_tensor<uint8_t>(
                                 tfLiteDelegateOutputId);
                         results.push_back(tfLiteDelegateOutputData);
-                        for (int i = 0; i < outputSize; ++i)
+
+                        if (isNumpyOutput)
                         {
-                            fprintf(outputTensorFile, "%u ", tfLiteDelegateOutputData[i]);
+                            armnnNumpy::WriteToNumpyFile(m_Params.m_OutputTensorFiles[outputIndex],
+                                                         tfLiteDelegateOutputData,
+                                                         outputSize,
+                                                         dataType,
+                                                         shape);
                         }
+                        else
+                        {
+                            for (int i = 0; i < outputSize; ++i)
+                            {
+                                fprintf(outputTensorFile, "%u ", tfLiteDelegateOutputData[i]);
+                            }
+                        }
+
                         break;
                     }
                     case kTfLiteInt8:
@@ -309,10 +352,23 @@
                         auto tfLiteDelegateOutputData = m_TfLiteInterpreter->typed_tensor<int8_t>(
                                 tfLiteDelegateOutputId);
                         results.push_back(tfLiteDelegateOutputData);
-                        for (int i = 0; i < outputSize; ++i)
+
+                        if (isNumpyOutput)
                         {
-                            fprintf(outputTensorFile, "%d ", tfLiteDelegateOutputData[i]);
+                            armnnNumpy::WriteToNumpyFile(m_Params.m_OutputTensorFiles[outputIndex],
+                                                         tfLiteDelegateOutputData,
+                                                         outputSize,
+                                                         dataType,
+                                                         shape);
                         }
+                        else
+                        {
+                            for (int i = 0; i < outputSize; ++i)
+                            {
+                                fprintf(outputTensorFile, "%d ", tfLiteDelegateOutputData[i]);
+                            }
+                        }
+
                         break;
                     }
                     case kTfLiteBool:
@@ -320,8 +376,21 @@
                         auto tfLiteDelegateOutputData = m_TfLiteInterpreter->typed_tensor<bool>(
                                 tfLiteDelegateOutputId);
                         results.push_back(tfLiteDelegateOutputData);
-                        for (int i = 0; i < outputSize; ++i) {
-                            fprintf(outputTensorFile, "%u ", tfLiteDelegateOutputData[i]);
+
+                        if (isNumpyOutput)
+                        {
+                            armnnNumpy::WriteToNumpyFile(m_Params.m_OutputTensorFiles[outputIndex],
+                                                         tfLiteDelegateOutputData,
+                                                         outputSize,
+                                                         dataType,
+                                                         shape);
+                        }
+                        else
+                        {
+                            for (int i = 0; i < outputSize; ++i)
+                            {
+                                fprintf(outputTensorFile, "%u ", tfLiteDelegateOutputData[i]);
+                            }
                         }
                         break;
                     }
diff --git a/tests/NetworkExecutionUtils/NetworkExecutionUtils.hpp b/tests/NetworkExecutionUtils/NetworkExecutionUtils.hpp
index 0df3bf5..1b881ed 100644
--- a/tests/NetworkExecutionUtils/NetworkExecutionUtils.hpp
+++ b/tests/NetworkExecutionUtils/NetworkExecutionUtils.hpp
@@ -1,5 +1,5 @@
 //
-// Copyright © 2022, 2023 Arm Ltd and Contributors. All rights reserved.
+// Copyright © 2022, 2023-2024 Arm Ltd and Contributors. All rights reserved.
 // SPDX-License-Identifier: MIT
 //
 
@@ -13,6 +13,7 @@
 #include <armnn/Optional.hpp>             // for Optional, EmptyOptional
 #include <armnn/Tensor.hpp>               // for Tensor, TensorInfo
 #include <armnn/TypesUtils.hpp>           // for Dequantize
+#include <armnn/Numpy.hpp>                // for Numpy
 #include <chrono>                         // for duration
 #include <functional>                     // for function
 #include <fstream>
@@ -132,51 +133,75 @@
 {
     const bool readFromFile = dataFile.has_value() && !dataFile.value().empty();
 
-    std::ifstream inputTensorFile;
     if (!readFromFile)
     {
         std::fill(tensor, tensor + numElements, 0);
         return;
     }
-    else
-    {
-        inputTensorFile = std::ifstream(dataFile.value());
-    }
 
-    auto parseElementFunc = GetParseElementFunc<T>();
-    std::string line;
-    unsigned int index = 0;
-    while (std::getline(inputTensorFile, line))
+    if (dataFile.value().find(".npy") == std::string::npos)
     {
-        std::vector<std::string> tokens = armnn::stringUtils::StringTokenizer(line, "\t ,:");
-        for (const std::string& token : tokens)
+        std::ifstream inputTensorFile;
+        inputTensorFile = std::ifstream(dataFile.value());
+
+        auto parseElementFunc = GetParseElementFunc<T>();
+        std::string line;
+        unsigned int index = 0;
+        while (std::getline(inputTensorFile, line))
         {
-            if (!token.empty()) // See https://stackoverflow.com/questions/10437406/
+            std::vector<std::string> tokens = armnn::stringUtils::StringTokenizer(line, "\t ,:");
+            for (const std::string &token: tokens)
             {
-                try
+                if (!token.empty()) // See https://stackoverflow.com/questions/10437406/
                 {
-                    if (index == numElements)
+                    try
                     {
-                        ARMNN_LOG(error) << "Number of elements: " << (index +1) << " in file \"" << dataFile.value()
-                                         << "\" does not match number of elements: " << numElements
-                                         << " for input \"" << inputName << "\".";
+                        if (index == numElements)
+                        {
+                            ARMNN_LOG(error) << "Number of elements: " << (index + 1) << " in file \""
+                                             << dataFile.value()
+                                             << "\" does not match number of elements: " << numElements
+                                             << " for input \"" << inputName << "\".";
+                        }
+                        *(tensor + index) = parseElementFunc(token);
+                        index++;
                     }
-                    *(tensor + index) = parseElementFunc(token);
-                    index++;
-                }
-                catch (const std::exception&)
-                {
-                    ARMNN_LOG(error) << "'" << token << "' is not a valid number. It has been ignored.";
+                    catch (const std::exception &)
+                    {
+                        ARMNN_LOG(error) << "'" << token << "' is not a valid number. It has been ignored.";
+                    }
                 }
             }
         }
-    }
 
-    if (index != numElements)
+        if (index != numElements)
+        {
+            ARMNN_LOG(error) << "Number of elements: " << (index + 1) << " in file \"" << inputName
+                             << "\" does not match number of elements: " << numElements
+                             << " for input \"" << inputName << "\".";
+        }
+    }
+    else
     {
-        ARMNN_LOG(error) << "Number of elements: " << (index +1) << " in file \"" << inputName
-                         << "\" does not match number of elements: " << numElements
-                         << " for input \"" << inputName << "\".";
+        std::ifstream ifStream(dataFile.value(), std::ifstream::binary);
+
+        armnnNumpy::HeaderInfo headerInfo;
+        armnnNumpy::Header header;
+
+        CreateHeaderInfo(ifStream, headerInfo);
+        CreateHeader(ifStream, headerInfo, header);
+
+        if (!armnnNumpy::compareCTypes<T>(header.m_DescrString))
+        {
+            ARMNN_LOG(error) << "Data type in numpy file " << inputName << " does not match expected data type.";
+        }
+        else if(numElements != armnnNumpy::getNumElements(header))
+        {
+            ARMNN_LOG(error) << "Number of elements in numpy " << inputName
+                             << " does not match expected number of elements.";
+        }
+
+        armnnNumpy::ReadData<T>(ifStream, tensor, numElements);
     }
 }
 
@@ -220,11 +245,22 @@
 
     if (info.m_OutputTensorFile.has_value())
     {
-        WriteToFile(info.m_OutputTensorFile.value(),
-                    info.m_OutputName,
-                    array,
-                    info.m_Tensor.GetNumElements(),
-                    info.m_DataType);
+        if (info.m_OutputTensorFile.value().find(".npy") == std::string::npos)
+        {
+            WriteToFile(info.m_OutputTensorFile.value(),
+                        info.m_OutputName,
+                        array,
+                        info.m_Tensor.GetNumElements(),
+                        info.m_DataType);
+        }
+        else
+        {
+            armnnNumpy::WriteToNumpyFile(info.m_OutputTensorFile.value(),
+                                         array,
+                                         info.m_Tensor.GetNumElements(),
+                                         info.m_DataType,
+                                         info.m_Tensor.GetInfo().GetShape());
+        }
     }
 
     if (info.m_PrintTensor)
@@ -236,34 +272,6 @@
     }
 }
 
-template <typename T>
-void PrintQuantizedTensor(OutputWriteInfo& info)
-{
-    std::vector<float> dequantizedValues;
-    auto tensor = info.m_Tensor;
-    dequantizedValues = DequantizeArray<T>(tensor.GetMemoryArea(),
-                                           tensor.GetNumElements(),
-                                           tensor.GetInfo().GetQuantizationScale(),
-                                           tensor.GetInfo().GetQuantizationOffset());
-
-    if (info.m_OutputTensorFile.has_value())
-    {
-        WriteToFile(info.m_OutputTensorFile.value(),
-                    info.m_OutputName,
-                    dequantizedValues.data(),
-                    tensor.GetNumElements(),
-                    info.m_DataType);
-    }
-
-    if (info.m_PrintTensor)
-    {
-        std::for_each(dequantizedValues.begin(), dequantizedValues.end(), [&](float value)
-        {
-            printf("%f ", value);
-        });
-    }
-}
-
 template<typename T, typename TParseElementFunc>
 std::vector<T> ParseArrayImpl(std::istream& stream, TParseElementFunc parseElementFunc, const char* chars = "\t ,:")
 {