IVGCVSW-5636 'Implement NNAPI caching functions'

* Cached serialized ArmNN model.

!armnn:6384

Signed-off-by: Sadik Armagan <sadik.armagan@arm.com>
Signed-off-by: Kevin May <kevin.may@arm.com>
Change-Id: I78120a7f8ea892a28c0ff25f1b54e67a4f912574
diff --git a/1.2/ArmnnDriver.hpp b/1.2/ArmnnDriver.hpp
index a350d3f..c855b52 100644
--- a/1.2/ArmnnDriver.hpp
+++ b/1.2/ArmnnDriver.hpp
@@ -19,6 +19,8 @@
 #include "../1.0/ArmnnDriverImpl.hpp"
 #include "../1.0/HalPolicy.hpp"
 
+#include <armnn/BackendHelper.hpp>
+
 #include <log/log.h>
 
 namespace armnn_driver
@@ -135,20 +137,26 @@
     }
 
     Return<V1_0::ErrorStatus> prepareModelFromCache(
-            const android::hardware::hidl_vec<android::hardware::hidl_handle>&,
-            const android::hardware::hidl_vec<android::hardware::hidl_handle>&,
-            const HidlToken&,
-            const android::sp<V1_2::IPreparedModelCallback>& callback)
+        const android::hardware::hidl_vec<android::hardware::hidl_handle>& modelCacheHandle,
+        const android::hardware::hidl_vec<android::hardware::hidl_handle>& dataCacheHandle,
+        const HidlToken& token,
+        const android::sp<V1_2::IPreparedModelCallback>& cb)
     {
         ALOGV("hal_1_2::ArmnnDriver::prepareModelFromCache()");
-        callback->notify_1_2(V1_0::ErrorStatus::GENERAL_FAILURE, nullptr);
-        return V1_0::ErrorStatus::GENERAL_FAILURE;
+        return ArmnnDriverImpl::prepareModelFromCache(m_Runtime,
+                                                      m_Options,
+                                                      modelCacheHandle,
+                                                      dataCacheHandle,
+                                                      token,
+                                                      cb);
     }
 
-    Return<V1_0::ErrorStatus> prepareModel_1_2(const V1_2::Model& model, V1_1::ExecutionPreference preference,
-            const android::hardware::hidl_vec<android::hardware::hidl_handle>&,
-            const android::hardware::hidl_vec<android::hardware::hidl_handle>&, const HidlToken&,
-            const android::sp<V1_2::IPreparedModelCallback>& cb)
+    Return<V1_0::ErrorStatus> prepareModel_1_2(
+        const V1_2::Model& model, V1_1::ExecutionPreference preference,
+        const android::hardware::hidl_vec<android::hardware::hidl_handle>& modelCacheHandle,
+        const android::hardware::hidl_vec<android::hardware::hidl_handle>& dataCacheHandle,
+        const HidlToken& token,
+        const android::sp<V1_2::IPreparedModelCallback>& cb)
     {
         ALOGV("hal_1_2::ArmnnDriver::prepareModel_1_2()");
 
@@ -165,6 +173,9 @@
                                                       m_ClTunedParameters,
                                                       m_Options,
                                                       model,
+                                                      modelCacheHandle,
+                                                      dataCacheHandle,
+                                                      token,
                                                       cb,
                                                       model.relaxComputationFloat32toFloat16
                                                       && m_Options.GetFp16Enabled());
@@ -198,9 +209,12 @@
     Return<void> getNumberOfCacheFilesNeeded(getNumberOfCacheFilesNeeded_cb cb)
     {
         ALOGV("hal_1_2::ArmnnDriver::getSupportedExtensions()");
-
-        // Set both numbers to be 0 for cache not supported.
-        cb(V1_0::ErrorStatus::NONE, 0, 0);
+        unsigned int numberOfCachedModelFiles = 0;
+        for (auto& backend : m_Options.GetBackends())
+        {
+            numberOfCachedModelFiles += GetNumberOfCacheFiles(backend);
+        }
+        cb(V1_0::ErrorStatus::NONE, numberOfCachedModelFiles,   1ul);
         return Void();
     }
 };
diff --git a/1.2/ArmnnDriverImpl.cpp b/1.2/ArmnnDriverImpl.cpp
index 01b3ab5..b3bc5cd 100644
--- a/1.2/ArmnnDriverImpl.cpp
+++ b/1.2/ArmnnDriverImpl.cpp
@@ -8,7 +8,10 @@
 #include "../ModelToINetworkConverter.hpp"
 #include "../SystemPropertiesUtils.hpp"
 
+#include <armnnDeserializer/IDeserializer.hpp>
+
 #include <log/log.h>
+#include <sys/stat.h>
 
 namespace
 {
@@ -90,6 +93,9 @@
        const armnn::IGpuAccTunedParametersPtr& clTunedParameters,
        const DriverOptions& options,
        const V1_2::Model& model,
+       const android::hardware::hidl_vec<android::hardware::hidl_handle>& modelCacheHandle,
+       const android::hardware::hidl_vec<android::hardware::hidl_handle>& dataCacheHandle,
+       const HidlToken& token,
        const android::sp<V1_2::IPreparedModelCallback>& cb,
        bool float32ToFloat16)
 {
@@ -127,8 +133,13 @@
 
     // Serialize the network graph to a .armnn file if an output directory
     // has been specified in the drivers' arguments.
+    std::vector<uint8_t> dataCacheData;
+    bool serializeToFile = dataCacheHandle.size() < 1 ? false : true;
     auto serializedNetworkFileName =
-        SerializeNetwork(*modelConverter.GetINetwork(), options.GetRequestInputsAndOutputsDumpDir());
+        SerializeNetwork(*modelConverter.GetINetwork(),
+                         options.GetRequestInputsAndOutputsDumpDir(),
+                         dataCacheData,
+                         serializeToFile);
 
     // Optimize the network
     armnn::IOptimizedNetworkPtr optNet(nullptr, nullptr);
@@ -136,12 +147,41 @@
     OptOptions.m_ReduceFp32ToFp16 = float32ToFloat16;
     OptOptions.m_ProfilingEnabled = options.IsGpuProfilingEnabled();
 
+    int cachedFd = -1;
+    bool saveCachedNetwork = options.SaveCachedNetwork();
+
+    unsigned int numberOfCachedModelFiles = 0;
+    if (modelCacheHandle.size() > 0)
+    {
+        unsigned int index = 0;
+        for (auto& backend : options.GetBackends())
+        {
+            // modelCacheHandle size should be equal to numberOfCachedModelFiles
+            // modelCacheHandle vector should be in same order as backends
+            auto numberOfCacheFiles = GetNumberOfCacheFiles(backend);
+            if (numberOfCacheFiles > 0)
+            {
+                numberOfCachedModelFiles += numberOfCacheFiles;
+                if (modelCacheHandle[index]->numFds == 1)
+                {
+                    if (backend == armnn::Compute::GpuAcc)
+                    {
+                        cachedFd = modelCacheHandle[index]->data[0];
+                        saveCachedNetwork = true;
+                    }
+                }
+                index += numberOfCachedModelFiles;
+            }
+        }
+    }
+
     armnn::BackendOptions gpuAcc("GpuAcc",
     {
         { "FastMathEnabled", options.IsFastMathEnabled() },
-        { "SaveCachedNetwork", options.SaveCachedNetwork() },
+        { "SaveCachedNetwork", saveCachedNetwork },
         { "CachedNetworkFilePath", options.GetCachedNetworkFilePath() },
-        { "MLGOTuningFilePath", options.GetClMLGOTunedParametersFile() }
+        { "MLGOTuningFilePath", options.GetClMLGOTunedParametersFile() },
+        { "CachedFileDescriptor", cachedFd }
     });
 
     armnn::BackendOptions cpuAcc("CpuAcc",
@@ -192,12 +232,16 @@
     std::string msg;
     armnn::INetworkProperties networkProperties(options.isAsyncModelExecutionEnabled(),
                                                 MemorySource::Undefined,
-                                                MemorySource::Undefined);
+                                                MemorySource::Undefined,
+                                                options.IsGpuProfilingEnabled());
+
+    auto numInputs  = getMainModel(model).inputIndexes.size();
+    auto numOutputs = getMainModel(model).outputIndexes.size();
     try
     {
         if (runtime->LoadNetwork(netId, move(optNet), msg, networkProperties) != armnn::Status::Success)
         {
-            return FailPrepareModel(V1_0::ErrorStatus::GENERAL_FAILURE, "Network could not be loaded", cb);
+            return FailPrepareModel(V1_0::ErrorStatus::GENERAL_FAILURE, msg, cb);
         }
     }
     catch (std::exception& e)
@@ -227,28 +271,344 @@
 
     // Run a single 'dummy' inference of the model. This means that CL kernels will get compiled (and tuned if
     // this is enabled) before the first 'real' inference which removes the overhead of the first inference.
-    if (!preparedModel->ExecuteWithDummyInputs())
+    // Only run this if the GpuAcc backend has been added to options
+    if (std::find(options.GetBackends().begin(),
+                  options.GetBackends().end(),
+                  armnn::Compute::GpuAcc) != options.GetBackends().end())
     {
-        return FailPrepareModel(V1_0::ErrorStatus::GENERAL_FAILURE, "Network could not be executed", cb);
+        if (!preparedModel->ExecuteWithDummyInputs(numInputs, numOutputs))
+        {
+            return FailPrepareModel(V1_0::ErrorStatus::GENERAL_FAILURE, "Network could not be executed", cb);
+        }
+
+        if (clTunedParameters &&
+            options.GetClTunedParametersMode() == armnn::IGpuAccTunedParameters::Mode::UpdateTunedParameters)
+        {
+            // Now that we've done one inference the CL kernel parameters will have been tuned,
+            // so save the updated file.
+            try
+            {
+                clTunedParameters->Save(options.GetClTunedParametersFile().c_str());
+            }
+            catch (std::exception& error)
+            {
+                ALOGE("ArmnnDriverImpl::prepareModel: Failed to save CL tuned parameters file '%s': %s",
+                      options.GetClTunedParametersFile().c_str(), error.what());
+            }
+        }
     }
 
-    if (clTunedParameters &&
-        options.GetClTunedParametersMode() == armnn::IGpuAccTunedParameters::Mode::UpdateTunedParameters)
+    size_t hashValue = 0;
+    // Cache the model
+    if (dataCacheHandle.size() > 0)
     {
-        // Now that we've done one inference the CL kernel parameters will have been tuned, so save the updated file.
-        try
+        // Cache the Arm NN model, should be only 1
+        if (dataCacheHandle.size() != 1)
         {
-            clTunedParameters->Save(options.GetClTunedParametersFile().c_str());
+            NotifyCallbackAndCheck(cb, V1_0::ErrorStatus::NONE, preparedModel.release());
+            return V1_0::ErrorStatus::NONE;
         }
-        catch (std::exception& error)
+
+        if (dataCacheHandle[0]->numFds != 1)
         {
-            ALOGE("ArmnnDriverImpl::prepareModel: Failed to save CL tuned parameters file '%s': %s",
-                  options.GetClTunedParametersFile().c_str(), error.what());
+            ALOGW("ArmnnDriverImpl::prepareArmnnModel_1_3: Cannot cache the data, numFds != 1.");
+            NotifyCallbackAndCheck(cb, V1_0::ErrorStatus::NONE, preparedModel.release());
+            return V1_0::ErrorStatus::NONE;
         }
+        int dataCacheFileAccessMode = fcntl(dataCacheHandle[0]->data[0], F_GETFL) & O_ACCMODE;
+        if (dataCacheFileAccessMode != O_RDWR)
+        {
+            ALOGW("ArmnnDriverImpl::prepareModelFromCache_1_2(): Invalid Access Mode.");
+            NotifyCallbackAndCheck(cb, V1_0::ErrorStatus::NONE, preparedModel.release());
+            return V1_0::ErrorStatus::NONE;
+        }
+
+        write(dataCacheHandle[0]->data[0], dataCacheData.data(), dataCacheData.size());
+        hashValue = CacheDataHandlerInstance().Hash(dataCacheData);
+    }
+
+    if (modelCacheHandle.size() > 0)
+    {
+        if (modelCacheHandle.size() != numberOfCachedModelFiles)
+        {
+            NotifyCallbackAndCheck(cb, V1_0::ErrorStatus::NONE, preparedModel.release());
+            return V1_0::ErrorStatus::NONE;
+        }
+        for (uint32_t i = 0; i < modelCacheHandle.size(); ++i)
+        {
+            if (modelCacheHandle[i]->numFds == 1)
+            {
+                int modelCacheFileAccessMode = fcntl(modelCacheHandle[i]->data[0], F_GETFL) & O_ACCMODE;
+                if (modelCacheFileAccessMode != O_RDONLY)
+                {
+                    struct stat statBuffer;
+                    if (fstat(modelCacheHandle[i]->data[0], &statBuffer) == 0)
+                    {
+                        long modelDataSize = statBuffer.st_size;
+                        if (modelDataSize > 0)
+                        {
+                            std::vector <uint8_t> modelData(modelDataSize);
+                            pread(modelCacheHandle[i]->data[0], modelData.data(), modelData.size(), 0);
+                            hashValue ^= CacheDataHandlerInstance().Hash(modelData);
+                        }
+                    }
+                }
+            }
+        }
+    }
+    if (hashValue != 0)
+    {
+        CacheDataHandlerInstance().Register(token, hashValue, dataCacheData.size());
     }
 
     NotifyCallbackAndCheck(cb, V1_0::ErrorStatus::NONE, preparedModel.release());
+    return V1_0::ErrorStatus::NONE;
+}
 
+Return<V1_0::ErrorStatus> ArmnnDriverImpl::prepareModelFromCache(
+    const armnn::IRuntimePtr& runtime,
+    const DriverOptions& options,
+    const android::hardware::hidl_vec<android::hardware::hidl_handle>& modelCacheHandle,
+    const android::hardware::hidl_vec<android::hardware::hidl_handle>& dataCacheHandle,
+    const HidlToken& token,
+    const android::sp<V1_2::IPreparedModelCallback>& cb,
+    bool float32ToFloat16)
+{
+    ALOGV("ArmnnDriverImpl::prepareModelFromCache()");
+
+    if (cb.get() == nullptr)
+    {
+        ALOGW("ArmnnDriverImpl::prepareModelFromCache: Invalid callback passed to prepareModel");
+        return V1_0::ErrorStatus::INVALID_ARGUMENT;
+    }
+
+    if (!runtime)
+    {
+        return FailPrepareModel(V1_0::ErrorStatus::DEVICE_UNAVAILABLE, "Device unavailable", cb);
+    }
+
+    if (token.size() != ANEURALNETWORKS_BYTE_SIZE_OF_CACHE_TOKEN)
+    {
+        FailPrepareModel(V1_0::ErrorStatus::INVALID_ARGUMENT, "Invalid token passed!", cb);
+        return V1_0::ErrorStatus::INVALID_ARGUMENT;
+    }
+
+    // DataCacheHandle size should always be 1
+    // Arm NN model
+    if (dataCacheHandle.size() != 1)
+    {
+        FailPrepareModel(V1_0::ErrorStatus::GENERAL_FAILURE, "No data cache!", cb);
+        return V1_0::ErrorStatus::GENERAL_FAILURE;
+    }
+
+    // Check if model files cached they match the expected value
+    unsigned int numberOfCachedModelFiles = 0;
+    for (auto& backend : options.GetBackends())
+    {
+        numberOfCachedModelFiles += GetNumberOfCacheFiles(backend);
+    }
+    if (modelCacheHandle.size() != numberOfCachedModelFiles)
+    {
+        FailPrepareModel(V1_0::ErrorStatus::GENERAL_FAILURE, "Invalid model cache!", cb);
+        return V1_0::ErrorStatus::GENERAL_FAILURE;
+    }
+
+    if (dataCacheHandle[0]->numFds != 1)
+    {
+        ALOGW("ArmnnDriverImpl::prepareModelFromCache: Cannot read from the cache data, numFds != 1.");
+        FailPrepareModel(V1_0::ErrorStatus::GENERAL_FAILURE, "No data cache!", cb);
+        return V1_0::ErrorStatus::GENERAL_FAILURE;
+    }
+
+    int dataCacheFileAccessMode = fcntl(dataCacheHandle[0]->data[0], F_GETFL) & O_ACCMODE;
+    if (dataCacheFileAccessMode != O_RDWR)
+    {
+        FailPrepareModel(V1_0::ErrorStatus::GENERAL_FAILURE, "Invalid Access Mode!", cb);
+        return V1_0::ErrorStatus::GENERAL_FAILURE;
+    }
+
+    auto dataSize = CacheDataHandlerInstance().GetCacheSize(token);
+    if (dataSize == 0)
+    {
+        ALOGW("ArmnnDriverImpl::prepareModelFromCache: Invalid data to deserialize!");
+        FailPrepareModel(V1_0::ErrorStatus::GENERAL_FAILURE, "Invalid data to deserialize!", cb);
+        return V1_0::ErrorStatus::GENERAL_FAILURE;
+    }
+
+    int offset = 0;
+    {
+        struct stat statBuffer;
+        if (fstat(dataCacheHandle[0]->data[0], &statBuffer) == 0)
+        {
+            unsigned long bufferSize = statBuffer.st_size;
+            if (bufferSize <= 0)
+            {
+                ALOGW("ArmnnDriverImpl::prepareModelFromCache: Invalid data to deserialize!");
+                FailPrepareModel(V1_0::ErrorStatus::GENERAL_FAILURE, "Invalid data to deserialize!", cb);
+                return V1_0::ErrorStatus::GENERAL_FAILURE;
+            }
+            if (bufferSize > dataSize)
+            {
+                offset = bufferSize - dataSize;
+            }
+        }
+    }
+    std::vector<uint8_t> dataCacheData(dataSize);
+    pread(dataCacheHandle[0]->data[0], dataCacheData.data(), dataCacheData.size(), offset);
+    auto hashValue = CacheDataHandlerInstance().Hash(dataCacheData);
+
+    int gpuAccCachedFd = -1;
+    bool saveCachedNetwork = false;
+    if (modelCacheHandle.size() > 0)
+    {
+        unsigned int index = 0;
+        for (auto& backend : options.GetBackends())
+        {
+            // modelCacheHandle size should be equal to numberOfCachedModelFiles
+            // modelCacheHandle vector should be in same order as backends
+            auto numberOfCacheFiles = GetNumberOfCacheFiles(backend);
+            if (numberOfCacheFiles > 0)
+            {
+                if (modelCacheHandle[index]->numFds != 1)
+                {
+                    ALOGW("ArmnnDriverImpl::prepareModelFromCache: Cannot read from the model cache, numFds != 1.");
+                    FailPrepareModel(V1_0::ErrorStatus::GENERAL_FAILURE,
+                                     "Cannot read from the model cache, numFds != 1.", cb);
+                    return V1_0::ErrorStatus::GENERAL_FAILURE;
+                }
+                auto cachedFd = modelCacheHandle[index]->data[0];
+
+                int modelCacheFileAccessMode = fcntl(cachedFd, F_GETFL) & O_ACCMODE;
+                if (modelCacheFileAccessMode != O_RDWR)
+                {
+                    FailPrepareModel(V1_0::ErrorStatus::GENERAL_FAILURE, "Invalid Access Mode!", cb);
+                    return V1_0::ErrorStatus::GENERAL_FAILURE;
+                }
+
+                struct stat statBuffer;
+                if (cachedFd != -1 && fstat(cachedFd, &statBuffer) == 0)
+                {
+                    long modelDataSize = statBuffer.st_size;
+                    if (modelDataSize > 0)
+                    {
+                        std::vector<uint8_t> modelData(modelDataSize);
+                        pread(cachedFd, modelData.data(), modelData.size(), 0);
+                        hashValue ^= CacheDataHandlerInstance().Hash(modelData);
+
+                        // For GpuAcc numberOfCachedFiles is 1
+                        if (backend == armnn::Compute::GpuAcc)
+                        {
+                            gpuAccCachedFd = cachedFd;
+                        }
+                    }
+                }
+                index += numberOfCacheFiles;
+            }
+        }
+    }
+
+    if (!CacheDataHandlerInstance().Validate(token, hashValue))
+    {
+        ALOGW("ArmnnDriverImpl::prepareModelFromCache: ValidateHash() failed!");
+        FailPrepareModel(V1_0::ErrorStatus::GENERAL_FAILURE, "ValidateHash Failed!", cb);
+        return V1_0::ErrorStatus::GENERAL_FAILURE;
+    }
+
+    // Deserialize the network..
+    auto network = armnnDeserializer::IDeserializer::Create()->CreateNetworkFromBinary(dataCacheData);
+
+    // Optimize the network
+    armnn::IOptimizedNetworkPtr optNet(nullptr, nullptr);
+    armnn::OptimizerOptions OptOptions;
+    OptOptions.m_ReduceFp32ToFp16 = float32ToFloat16;
+    OptOptions.m_ProfilingEnabled = options.IsGpuProfilingEnabled();
+
+    armnn::BackendOptions gpuAcc("GpuAcc",
+                                 {
+                                         {"FastMathEnabled",       options.IsFastMathEnabled()},
+                                         {"SaveCachedNetwork",     saveCachedNetwork},
+                                         {"CachedNetworkFilePath", options.GetCachedNetworkFilePath()},
+                                         {"MLGOTuningFilePath",    options.GetClMLGOTunedParametersFile()},
+                                         {"CachedFileDescriptor",  gpuAccCachedFd}
+                                 });
+
+    armnn::BackendOptions cpuAcc("CpuAcc",
+                                 {
+                                         {"FastMathEnabled", options.IsFastMathEnabled()},
+                                         {"NumberOfThreads", options.GetNumberOfThreads()}
+                                 });
+    OptOptions.m_ModelOptions.push_back(gpuAcc);
+    OptOptions.m_ModelOptions.push_back(cpuAcc);
+
+    std::vector<std::string> errMessages;
+    try
+    {
+        optNet = armnn::Optimize(*network.get(),
+                                 options.GetBackends(),
+                                 runtime->GetDeviceSpec(),
+                                 OptOptions,
+                                 errMessages);
+    }
+    catch (std::exception& e)
+    {
+        std::stringstream message;
+        message << "Exception (" << e.what() << ") caught from optimize.";
+        FailPrepareModel(V1_0::ErrorStatus::GENERAL_FAILURE, message.str(), cb);
+        return V1_0::ErrorStatus::NONE;
+    }
+
+    // Check that the optimized network is valid.
+    if (!optNet)
+    {
+        std::stringstream message;
+        message << "Invalid optimized network";
+        for (const std::string& msg : errMessages)
+        {
+            message << "\n" << msg;
+        }
+        FailPrepareModel(V1_0::ErrorStatus::GENERAL_FAILURE, message.str(), cb);
+        return V1_0::ErrorStatus::NONE;
+    }
+
+    // Export the optimized network graph to a dot file if an output dump directory
+    // has been specified in the drivers' arguments.
+    std::string dotGraphFileName = ExportNetworkGraphToDotFile(*optNet,
+                                                               options.GetRequestInputsAndOutputsDumpDir());
+
+    // Load it into the runtime.
+    armnn::NetworkId netId = 0;
+    std::string msg;
+    armnn::INetworkProperties networkProperties(options.isAsyncModelExecutionEnabled(),
+                                                MemorySource::Undefined,
+                                                MemorySource::Undefined,
+                                                options.IsGpuProfilingEnabled());
+
+    try
+    {
+        if (runtime->LoadNetwork(netId, move(optNet), msg, networkProperties) != armnn::Status::Success)
+        {
+            return FailPrepareModel(V1_0::ErrorStatus::GENERAL_FAILURE, msg, cb);
+        }
+    }
+    catch (std::exception& e)
+    {
+        std::stringstream message;
+        message << "Exception (" << e.what() << ") caught from LoadNetwork.";
+        FailPrepareModel(V1_0::ErrorStatus::GENERAL_FAILURE, message.str(), cb);
+        return V1_0::ErrorStatus::NONE;
+    }
+
+    std::unique_ptr<ArmnnPreparedModel_1_2<hal_1_2::HalPolicy>> preparedModel(
+            new ArmnnPreparedModel_1_2<hal_1_2::HalPolicy>(
+                    netId,
+                    runtime.get(),
+                    options.GetRequestInputsAndOutputsDumpDir(),
+                    options.IsGpuProfilingEnabled(),
+                    options.isAsyncModelExecutionEnabled(),
+                    options.getNoOfArmnnThreads(),
+                    true));
+
+    NotifyCallbackAndCheck(cb, V1_0::ErrorStatus::NONE, preparedModel.release());
     return V1_0::ErrorStatus::NONE;
 }
 
diff --git a/1.2/ArmnnDriverImpl.hpp b/1.2/ArmnnDriverImpl.hpp
index eeb491b..70f46cb 100644
--- a/1.2/ArmnnDriverImpl.hpp
+++ b/1.2/ArmnnDriverImpl.hpp
@@ -7,10 +7,13 @@
 
 #include <HalInterfaces.h>
 
+#include "../CacheDataHandler.hpp"
 #include "../DriverOptions.hpp"
 
 #include <armnn/ArmNN.hpp>
 
+#include <NeuralNetworks.h>
+
 #ifdef ARMNN_ANDROID_R
 using namespace android::nn::hal;
 #endif
@@ -30,12 +33,27 @@
 class ArmnnDriverImpl
 {
 public:
-    static Return<V1_0::ErrorStatus> prepareArmnnModel_1_2(const armnn::IRuntimePtr& runtime,
-                                                           const armnn::IGpuAccTunedParametersPtr& clTunedParameters,
-                                                           const DriverOptions& options,
-                                                           const V1_2::Model& model,
-                                                           const android::sp<V1_2::IPreparedModelCallback>& cb,
-                                                           bool float32ToFloat16 = false);
+    using HidlToken = android::hardware::hidl_array<uint8_t, ANEURALNETWORKS_BYTE_SIZE_OF_CACHE_TOKEN>;
+
+    static Return<V1_0::ErrorStatus> prepareArmnnModel_1_2(
+        const armnn::IRuntimePtr& runtime,
+        const armnn::IGpuAccTunedParametersPtr& clTunedParameters,
+        const DriverOptions& options,
+        const V1_2::Model& model,
+        const android::hardware::hidl_vec<android::hardware::hidl_handle>& modelCacheHandle,
+        const android::hardware::hidl_vec<android::hardware::hidl_handle>& dataCacheHandle,
+        const HidlToken& token,
+        const android::sp<V1_2::IPreparedModelCallback>& cb,
+        bool float32ToFloat16 = false);
+
+    static Return<V1_0::ErrorStatus> prepareModelFromCache(
+        const armnn::IRuntimePtr& runtime,
+        const DriverOptions& options,
+        const android::hardware::hidl_vec<android::hardware::hidl_handle>& modelCacheHandle,
+        const android::hardware::hidl_vec<android::hardware::hidl_handle>& dataCacheHandle,
+        const HidlToken& token,
+        const android::sp<V1_2::IPreparedModelCallback>& cb,
+        bool float32ToFloat16 = false);
 
     static Return<void> getCapabilities_1_2(const armnn::IRuntimePtr& runtime,
                                             V1_2::IDevice::getCapabilities_1_2_cb cb);
diff --git a/1.3/ArmnnDriver.hpp b/1.3/ArmnnDriver.hpp
index fd4aa74..6d2e0b7 100644
--- a/1.3/ArmnnDriver.hpp
+++ b/1.3/ArmnnDriver.hpp
@@ -21,6 +21,8 @@
 #include "../1.0/ArmnnDriverImpl.hpp"
 #include "../1.0/HalPolicy.hpp"
 
+#include <armnn/BackendHelper.hpp>
+
 #include <log/log.h>
 
 namespace armnn_driver
@@ -31,6 +33,7 @@
 class ArmnnDriver : public ArmnnDevice, public V1_3::IDevice
 {
 public:
+    using HidlToken = android::hardware::hidl_array<uint8_t, ANEURALNETWORKS_BYTE_SIZE_OF_CACHE_TOKEN>;
 
     ArmnnDriver(DriverOptions options)
         : ArmnnDevice(std::move(options))
@@ -39,9 +42,7 @@
     }
     ~ArmnnDriver() {}
 
-    using HidlToken = android::hardware::hidl_array<uint8_t, ANEURALNETWORKS_BYTE_SIZE_OF_CACHE_TOKEN>;
 
-public:
     Return<void> getCapabilities(V1_0::IDevice::getCapabilities_cb cb) override
     {
         ALOGV("hal_1_3::ArmnnDriver::getCapabilities()");
@@ -131,10 +132,13 @@
                                                                                          cb);
     }
 
-    Return<V1_0::ErrorStatus> prepareModel_1_2(const V1_2::Model& model, V1_1::ExecutionPreference preference,
-            const android::hardware::hidl_vec<android::hardware::hidl_handle>&,
-            const android::hardware::hidl_vec<android::hardware::hidl_handle>&, const HidlToken&,
-            const android::sp<V1_2::IPreparedModelCallback>& cb)
+    Return<V1_0::ErrorStatus> prepareModel_1_2(
+        const V1_2::Model& model,
+        V1_1::ExecutionPreference preference,
+        const android::hardware::hidl_vec<android::hardware::hidl_handle>& modelCacheHandle,
+        const android::hardware::hidl_vec<android::hardware::hidl_handle>& dataCacheHandle,
+        const HidlToken& token,
+        const android::sp<V1_2::IPreparedModelCallback>& cb)
     {
         ALOGV("hal_1_3::ArmnnDriver::prepareModel_1_2()");
 
@@ -151,6 +155,9 @@
                                                                m_ClTunedParameters,
                                                                m_Options,
                                                                model,
+                                                               modelCacheHandle,
+                                                               dataCacheHandle,
+                                                               token,
                                                                cb,
                                                                model.relaxComputationFloat32toFloat16
                                                                && m_Options.GetFp16Enabled());
@@ -174,14 +181,15 @@
                                                                                          cb);
     }
 
-    Return<V1_3::ErrorStatus> prepareModel_1_3(const V1_3::Model& model,
-                                               V1_1::ExecutionPreference preference,
-                                               V1_3::Priority priority,
-                                               const V1_3::OptionalTimePoint&,
-                                               const android::hardware::hidl_vec<android::hardware::hidl_handle>&,
-                                               const android::hardware::hidl_vec<android::hardware::hidl_handle>&,
-                                               const HidlToken&,
-                                               const android::sp<V1_3::IPreparedModelCallback>& cb)
+    Return<V1_3::ErrorStatus> prepareModel_1_3(
+        const V1_3::Model& model,
+        V1_1::ExecutionPreference preference,
+        V1_3::Priority priority,
+        const V1_3::OptionalTimePoint&,
+        const android::hardware::hidl_vec<android::hardware::hidl_handle>& modelCache,
+        const android::hardware::hidl_vec<android::hardware::hidl_handle>& dataCache,
+        const HidlToken& token,
+        const android::sp<V1_3::IPreparedModelCallback>& cb)
     {
         ALOGV("hal_1_3::ArmnnDriver::prepareModel_1_3()");
 
@@ -199,11 +207,13 @@
             return V1_3::ErrorStatus::INVALID_ARGUMENT;
         }
 
-
         return ArmnnDriverImpl::prepareArmnnModel_1_3(m_Runtime,
                                                       m_ClTunedParameters,
                                                       m_Options,
                                                       model,
+                                                      modelCache,
+                                                      dataCache,
+                                                      token,
                                                       cb,
                                                       model.relaxComputationFloat32toFloat16
                                                       && m_Options.GetFp16Enabled(),
@@ -219,10 +229,13 @@
 
     Return<void> getNumberOfCacheFilesNeeded(getNumberOfCacheFilesNeeded_cb cb)
     {
-        ALOGV("hal_1_3::ArmnnDriver::getSupportedExtensions()");
-
-        // Set both numbers to be 0 for cache not supported.
-        cb(V1_0::ErrorStatus::NONE, 0, 0);
+        ALOGV("hal_1_3::ArmnnDriver::getNumberOfCacheFilesNeeded()");
+        unsigned int numberOfCachedModelFiles = 0;
+        for (auto& backend : m_Options.GetBackends())
+        {
+            numberOfCachedModelFiles += GetNumberOfCacheFiles(backend);
+        }
+        cb(V1_0::ErrorStatus::NONE, numberOfCachedModelFiles,   1ul);
         return Void();
     }
 
@@ -250,26 +263,35 @@
     }
 
     Return<V1_0::ErrorStatus> prepareModelFromCache(
-        const android::hardware::hidl_vec<android::hardware::hidl_handle>&,
-        const android::hardware::hidl_vec<android::hardware::hidl_handle>&,
-        const HidlToken&,
-        const android::sp<V1_2::IPreparedModelCallback>& callback)
+        const android::hardware::hidl_vec<android::hardware::hidl_handle>& modelCacheHandle,
+        const android::hardware::hidl_vec<android::hardware::hidl_handle>& dataCacheHandle,
+        const HidlToken& token,
+        const android::sp<V1_2::IPreparedModelCallback>& cb)
     {
         ALOGV("hal_1_3::ArmnnDriver::prepareModelFromCache()");
-        callback->notify_1_2(V1_0::ErrorStatus::GENERAL_FAILURE, nullptr);
-        return V1_0::ErrorStatus::GENERAL_FAILURE;
+        return hal_1_2::ArmnnDriverImpl::prepareModelFromCache(m_Runtime,
+                                                               m_Options,
+                                                               modelCacheHandle,
+                                                               dataCacheHandle,
+                                                               token,
+                                                               cb);
     }
 
     Return<V1_3::ErrorStatus> prepareModelFromCache_1_3(
         const V1_3::OptionalTimePoint&,
-        const android::hardware::hidl_vec<android::hardware::hidl_handle>&,
-        const android::hardware::hidl_vec<android::hardware::hidl_handle>&,
-        const HidlToken&,
-        const android::sp<V1_3::IPreparedModelCallback>& callback)
+        const android::hardware::hidl_vec<android::hardware::hidl_handle>& modelCacheHandle,
+        const android::hardware::hidl_vec<android::hardware::hidl_handle>& dataCacheHandle,
+        const HidlToken& token,
+        const android::sp<V1_3::IPreparedModelCallback>& cb)
     {
-        ALOGV("hal_1_3::ArmnnDriver::prepareModelFromCache()");
-        callback->notify_1_3(V1_3::ErrorStatus::GENERAL_FAILURE, nullptr);
-        return V1_3::ErrorStatus::GENERAL_FAILURE;
+        ALOGV("hal_1_3::ArmnnDriver::prepareModelFromCache_1_3()");
+
+        return ArmnnDriverImpl::prepareModelFromCache_1_3(m_Runtime,
+                                                          m_Options,
+                                                          modelCacheHandle,
+                                                          dataCacheHandle,
+                                                          token,
+                                                          cb);
     }
 
     Return<void> allocate(const V1_3::BufferDesc& /*desc*/,
diff --git a/1.3/ArmnnDriverImpl.cpp b/1.3/ArmnnDriverImpl.cpp
index 3ecd2f8..e1d65f9 100644
--- a/1.3/ArmnnDriverImpl.cpp
+++ b/1.3/ArmnnDriverImpl.cpp
@@ -8,8 +8,12 @@
 #include "../ModelToINetworkConverter.hpp"
 #include "../SystemPropertiesUtils.hpp"
 
+#include <armnnDeserializer/IDeserializer.hpp>
+
 #include <log/log.h>
 
+#include <sys/stat.h>
+
 namespace
 {
 const char *g_RelaxedFloat32toFloat16PerformanceExecTime    = "ArmNN.relaxedFloat32toFloat16Performance.execTime";
@@ -100,6 +104,9 @@
        const armnn::IGpuAccTunedParametersPtr& clTunedParameters,
        const DriverOptions& options,
        const V1_3::Model& model,
+       const android::hardware::hidl_vec<android::hardware::hidl_handle>& modelCacheHandle,
+       const android::hardware::hidl_vec<android::hardware::hidl_handle>& dataCacheHandle,
+       const HidlToken& token,
        const android::sp<V1_3::IPreparedModelCallback>& cb,
        bool float32ToFloat16,
        V1_3::Priority priority)
@@ -138,8 +145,13 @@
 
     // Serialize the network graph to a .armnn file if an output directory
     // has been specified in the drivers' arguments.
+    std::vector<uint8_t> dataCacheData;
+    bool serializeToFile = dataCacheHandle.size() < 1 ? false : true;
     auto serializedNetworkFileName =
-        SerializeNetwork(*modelConverter.GetINetwork(), options.GetRequestInputsAndOutputsDumpDir());
+        SerializeNetwork(*modelConverter.GetINetwork(),
+                         options.GetRequestInputsAndOutputsDumpDir(),
+                         dataCacheData,
+                         serializeToFile);
 
     // Optimize the network
     armnn::IOptimizedNetworkPtr optNet(nullptr, nullptr);
@@ -147,12 +159,42 @@
     OptOptions.m_ReduceFp32ToFp16 = float32ToFloat16;
     OptOptions.m_ProfilingEnabled = options.IsGpuProfilingEnabled();
 
+    int cachedFd = -1;
+    bool saveCachedNetwork = options.SaveCachedNetwork();
+
+    unsigned int numberOfCachedModelFiles = 0;
+    if (modelCacheHandle.size() > 0)
+    {
+        unsigned int index = 0;
+        for (auto& backend : options.GetBackends())
+        {
+            // modelCacheHandle size should be equal to numberOfCachedModelFiles
+            // modelCacheHandle vector should be in same order as backends
+            auto numberOfCacheFiles = GetNumberOfCacheFiles(backend);
+            if (numberOfCacheFiles > 0)
+            {
+                numberOfCachedModelFiles += numberOfCacheFiles;
+                if (modelCacheHandle[index]->numFds == 1)
+                {
+                    // For GpuAcc numberOfCachedFiles is 1
+                    if (backend == armnn::Compute::GpuAcc)
+                    {
+                        cachedFd = modelCacheHandle[index]->data[0];
+                        saveCachedNetwork = true;
+                    }
+                }
+                index += numberOfCachedModelFiles;
+            }
+        }
+    }
+
     armnn::BackendOptions gpuAcc("GpuAcc",
     {
         { "FastMathEnabled", options.IsFastMathEnabled() },
-        { "SaveCachedNetwork", options.SaveCachedNetwork() },
+        { "SaveCachedNetwork", saveCachedNetwork },
         { "CachedNetworkFilePath", options.GetCachedNetworkFilePath() },
-        { "MLGOTuningFilePath", options.GetClMLGOTunedParametersFile() }
+        { "MLGOTuningFilePath", options.GetClMLGOTunedParametersFile() },
+        { "CachedFileDescriptor", cachedFd }
     });
 
     armnn::BackendOptions cpuAcc("CpuAcc",
@@ -203,7 +245,11 @@
     std::string msg;
     armnn::INetworkProperties networkProperties(options.isAsyncModelExecutionEnabled(),
                                                 MemorySource::Undefined,
-                                                MemorySource::Undefined);
+                                                MemorySource::Undefined,
+                                                options.IsGpuProfilingEnabled());
+
+    auto numInputs  = getMainModel(model).inputIndexes.size();
+    auto numOutputs = getMainModel(model).outputIndexes.size();
     try
     {
         if (runtime->LoadNetwork(netId, move(optNet), msg, networkProperties) != armnn::Status::Success)
@@ -239,28 +285,345 @@
 
     // Run a single 'dummy' inference of the model. This means that CL kernels will get compiled (and tuned if
     // this is enabled) before the first 'real' inference which removes the overhead of the first inference.
-    if (!preparedModel->ExecuteWithDummyInputs())
+    // Only run this if the GpuAcc backend has been added to options
+    if (std::find(options.GetBackends().begin(),
+                  options.GetBackends().end(),
+                  armnn::Compute::GpuAcc) != options.GetBackends().end())
     {
-        return FailPrepareModel(V1_3::ErrorStatus::GENERAL_FAILURE, "Network could not be executed", cb);
+        if (!preparedModel->ExecuteWithDummyInputs(numInputs, numOutputs))
+        {
+            return FailPrepareModel(V1_3::ErrorStatus::GENERAL_FAILURE, "Network could not be executed", cb);
+        }
+
+        if (clTunedParameters &&
+            options.GetClTunedParametersMode() == armnn::IGpuAccTunedParameters::Mode::UpdateTunedParameters)
+        {
+            // Now that we've done one inference the CL kernel parameters will have been tuned,
+            // so save the updated file.
+            try
+            {
+                clTunedParameters->Save(options.GetClTunedParametersFile().c_str());
+            }
+            catch (std::exception& error)
+            {
+                ALOGE("ArmnnDriverImpl::prepareModel: Failed to save CL tuned parameters file '%s': %s",
+                      options.GetClTunedParametersFile().c_str(), error.what());
+            }
+        }
+    }
+    size_t hashValue = 0;
+    // Cache the model
+    if (dataCacheHandle.size() > 0)
+    {
+        // Cache the Arm NN model
+        if (dataCacheHandle.size() != 1)
+        {
+            NotifyCallbackAndCheck(cb, V1_3::ErrorStatus::NONE, preparedModel.release());
+            return V1_3::ErrorStatus::NONE;
+        }
+
+        if (dataCacheHandle[0]->numFds != 1)
+        {
+            ALOGW("ArmnnDriverImpl::prepareArmnnModel_1_3: Cannot cache the data, numFds != 1.");
+            NotifyCallbackAndCheck(cb, V1_3::ErrorStatus::NONE, preparedModel.release());
+            return V1_3::ErrorStatus::NONE;
+        }
+        int dataCacheFileAccessMode = fcntl(dataCacheHandle[0]->data[0], F_GETFL) & O_ACCMODE;
+        if (dataCacheFileAccessMode != O_RDWR)
+        {
+            ALOGW("ArmnnDriverImpl::prepareModelFromCache_1_3(): Invalid Access Mode.");
+            NotifyCallbackAndCheck(cb, V1_3::ErrorStatus::NONE, preparedModel.release());
+            return V1_3::ErrorStatus::NONE;
+        }
+
+        write(dataCacheHandle[0]->data[0], dataCacheData.data(), dataCacheData.size());
+        hashValue = CacheDataHandlerInstance().Hash(dataCacheData);
     }
 
-    if (clTunedParameters &&
-        options.GetClTunedParametersMode() == armnn::IGpuAccTunedParameters::Mode::UpdateTunedParameters)
+    // Cache the model data
+    if (modelCacheHandle.size() > 0)
     {
-        // Now that we've done one inference the CL kernel parameters will have been tuned, so save the updated file.
-        try
+        if (modelCacheHandle.size() != numberOfCachedModelFiles)
         {
-            clTunedParameters->Save(options.GetClTunedParametersFile().c_str());
+            NotifyCallbackAndCheck(cb, V1_3::ErrorStatus::NONE, preparedModel.release());
+            return V1_3::ErrorStatus::NONE;
         }
-        catch (std::exception& error)
+
+        for (uint32_t i = 0; i < modelCacheHandle.size(); ++i)
         {
-            ALOGE("ArmnnDriverImpl::prepareModel: Failed to save CL tuned parameters file '%s': %s",
-                  options.GetClTunedParametersFile().c_str(), error.what());
+            if (modelCacheHandle[i]->numFds == 1)
+            {
+                int modelCacheFileAccessMode = fcntl(modelCacheHandle[i]->data[0], F_GETFL) & O_ACCMODE;
+                if (modelCacheFileAccessMode != O_RDONLY)
+                {
+                    struct stat statBuffer;
+                    if (fstat(modelCacheHandle[i]->data[0], &statBuffer) == 0)
+                    {
+                        long modelDataSize = statBuffer.st_size;
+                        if (modelDataSize > 0)
+                        {
+                            std::vector<uint8_t> modelData(modelDataSize);
+                            pread(modelCacheHandle[i]->data[0], modelData.data(), modelData.size(), 0);
+                            hashValue ^= CacheDataHandlerInstance().Hash(modelData);
+                        }
+                    }
+                }
+            }
         }
     }
+    if (hashValue != 0)
+    {
+        CacheDataHandlerInstance().Register(token, hashValue, dataCacheData.size());
+    }
 
     NotifyCallbackAndCheck(cb, V1_3::ErrorStatus::NONE, preparedModel.release());
+    return V1_3::ErrorStatus::NONE;
+}
 
+Return<V1_3::ErrorStatus> ArmnnDriverImpl::prepareModelFromCache_1_3(
+    const armnn::IRuntimePtr& runtime,
+    const DriverOptions& options,
+    const android::hardware::hidl_vec<android::hardware::hidl_handle>& modelCacheHandle,
+    const android::hardware::hidl_vec<android::hardware::hidl_handle>& dataCacheHandle,
+    const HidlToken& token,
+    const android::sp<V1_3::IPreparedModelCallback>& cb)
+{
+    ALOGV("ArmnnDriverImpl::prepareModelFromCache_1_3()");
+
+    if (token.size() != ANEURALNETWORKS_BYTE_SIZE_OF_CACHE_TOKEN)
+    {
+        cb->notify_1_3(V1_3::ErrorStatus::GENERAL_FAILURE, nullptr);
+        return V1_3::ErrorStatus::GENERAL_FAILURE;
+    }
+
+    if (cb.get() == nullptr)
+    {
+        ALOGW("ArmnnDriverImpl::prepareModelFromCache_1_3: Invalid callback passed to prepareModelFromCache_1_3");
+        return V1_3::ErrorStatus::INVALID_ARGUMENT;
+    }
+
+    if (!runtime)
+    {
+        ALOGW("ArmnnDriverImpl::prepareModelFromCache_1_3: Device unavailable");
+        return V1_3::ErrorStatus::DEVICE_UNAVAILABLE;
+    }
+
+    // DataCacheHandle size should always be 1
+    // Arm NN model
+    if (dataCacheHandle.size() != 1)
+    {
+        cb->notify_1_3(V1_3::ErrorStatus::GENERAL_FAILURE, nullptr);
+        return V1_3::ErrorStatus::GENERAL_FAILURE;
+    }
+
+    // Check if model files cached they match the expected value
+    unsigned int numberOfCachedModelFiles = 0;
+    for (auto& backend : options.GetBackends())
+    {
+        numberOfCachedModelFiles += GetNumberOfCacheFiles(backend);
+    }
+    if (modelCacheHandle.size() != numberOfCachedModelFiles)
+    {
+        cb->notify_1_3(V1_3::ErrorStatus::GENERAL_FAILURE, nullptr);
+        return V1_3::ErrorStatus::GENERAL_FAILURE;
+    }
+
+    if (dataCacheHandle[0]->numFds != 1)
+    {
+        ALOGW("ArmnnDriverImpl::prepareModelFromCache_1_3(): Cannot read from the cache data, numFds != 1.");
+        cb->notify_1_3(V1_3::ErrorStatus::GENERAL_FAILURE, nullptr);
+        return V1_3::ErrorStatus::GENERAL_FAILURE;
+    }
+
+    int dataCacheFileAccessMode = fcntl(dataCacheHandle[0]->data[0], F_GETFL) & O_ACCMODE;
+    if (dataCacheFileAccessMode != O_RDWR)
+    {
+        cb->notify_1_3(V1_3::ErrorStatus::GENERAL_FAILURE, nullptr);
+        return V1_3::ErrorStatus::GENERAL_FAILURE;
+    }
+
+    auto dataSize = CacheDataHandlerInstance().GetCacheSize(token);
+    if (dataSize == 0)
+    {
+        ALOGW("ArmnnDriverImpl::prepareModelFromCache_1_3: Invalid data to deserialize!");
+        cb->notify_1_3(V1_3::ErrorStatus::GENERAL_FAILURE, nullptr);
+        return V1_3::ErrorStatus::GENERAL_FAILURE;
+    }
+
+    int offset = 0;
+    {
+        struct stat statBuffer;
+        if (fstat(dataCacheHandle[0]->data[0], &statBuffer) == 0)
+        {
+            unsigned long bufferSize = statBuffer.st_size;
+            if (bufferSize <= 0)
+            {
+                ALOGW("ArmnnDriverImpl::prepareModelFromCache_1_3: Invalid data to deserialize!");
+                cb->notify_1_3(V1_3::ErrorStatus::GENERAL_FAILURE, nullptr);
+                return V1_3::ErrorStatus::GENERAL_FAILURE;
+            }
+            if (bufferSize > dataSize)
+            {
+                offset = bufferSize - dataSize;
+            }
+        }
+    }
+    std::vector<uint8_t> dataCacheData(dataSize);
+    pread(dataCacheHandle[0]->data[0], dataCacheData.data(), dataCacheData.size(), offset);
+    auto hashValue = CacheDataHandlerInstance().Hash(dataCacheData);
+
+    int gpuAccCachedFd = -1;
+    bool saveCachedNetwork = false;
+    if (modelCacheHandle.size() > 0)
+    {
+        unsigned int index = 0;
+        for (auto& backend : options.GetBackends())
+        {
+            // modelCacheHandle size should be equal to numberOfCachedModelFiles
+            // modelCacheHandle vector should be in same order as backends
+            auto numberOfCacheFiles = GetNumberOfCacheFiles(backend);
+            if (numberOfCacheFiles > 0)
+            {
+                if (modelCacheHandle[index]->numFds != 1)
+                {
+                    ALOGW(
+                       "ArmnnDriverImpl::prepareModelFromCache_1_3(): Cannot read from the model cache, numFds != 1.");
+                    cb->notify_1_3(V1_3::ErrorStatus::GENERAL_FAILURE, nullptr);
+                    return V1_3::ErrorStatus::GENERAL_FAILURE;
+                }
+                auto cachedFd = modelCacheHandle[index]->data[0];
+
+                int modelCacheFileAccessMode = fcntl(cachedFd, F_GETFL) & O_ACCMODE;
+                if (modelCacheFileAccessMode != O_RDWR)
+                {
+                    cb->notify_1_3(V1_3::ErrorStatus::GENERAL_FAILURE, nullptr);
+                    return V1_3::ErrorStatus::GENERAL_FAILURE;
+                }
+
+                struct stat statBuffer;
+                if (cachedFd != -1 && fstat(cachedFd, &statBuffer) == 0)
+                {
+                    long modelDataSize = statBuffer.st_size;
+                    if (modelDataSize > 0)
+                    {
+                        std::vector<uint8_t> modelData(modelDataSize);
+                        pread(cachedFd, modelData.data(), modelData.size(), 0);
+                        hashValue ^= CacheDataHandlerInstance().Hash(modelData);
+
+                        // For GpuAcc numberOfCachedFiles is 1
+                        if (backend == armnn::Compute::GpuAcc)
+                        {
+                            gpuAccCachedFd = cachedFd;
+                        }
+                    }
+                }
+                index += numberOfCacheFiles;
+            }
+        }
+    }
+
+    if (!CacheDataHandlerInstance().Validate(token, hashValue))
+    {
+        ALOGW("ArmnnDriverImpl::prepareModelFromCache_1_3: ValidateHash() failed!");
+        cb->notify_1_3(V1_3::ErrorStatus::GENERAL_FAILURE, nullptr);
+        return V1_3::ErrorStatus::GENERAL_FAILURE;
+    }
+
+    // Deserialize the network..
+    auto network = armnnDeserializer::IDeserializer::Create()->CreateNetworkFromBinary(dataCacheData);
+
+    // Optimize the network
+    armnn::IOptimizedNetworkPtr optNet(nullptr, nullptr);
+    armnn::OptimizerOptions OptOptions;
+    OptOptions.m_ReduceFp32ToFp16 = options.GetFp16Enabled();
+    OptOptions.m_ProfilingEnabled = options.IsGpuProfilingEnabled();
+
+    armnn::BackendOptions gpuAcc("GpuAcc",
+                                 {
+                                         {"FastMathEnabled",       options.IsFastMathEnabled()},
+                                         {"SaveCachedNetwork",     saveCachedNetwork},
+                                         {"CachedNetworkFilePath", options.GetCachedNetworkFilePath()},
+                                         {"MLGOTuningFilePath",    options.GetClMLGOTunedParametersFile()},
+                                         {"CachedFileDescriptor",  gpuAccCachedFd}
+                                 });
+
+    armnn::BackendOptions cpuAcc("CpuAcc",
+                                 {
+                                         {"FastMathEnabled", options.IsFastMathEnabled()},
+                                         {"NumberOfThreads", options.GetNumberOfThreads()}
+                                 });
+    OptOptions.m_ModelOptions.push_back(gpuAcc);
+    OptOptions.m_ModelOptions.push_back(cpuAcc);
+
+    std::vector<std::string> errMessages;
+    try
+    {
+        optNet = armnn::Optimize(*network.get(),
+                                 options.GetBackends(),
+                                 runtime->GetDeviceSpec(),
+                                 OptOptions,
+                                 errMessages);
+    }
+    catch (std::exception& e)
+    {
+        std::stringstream message;
+        message << "Exception (" << e.what() << ") caught from optimize.";
+        FailPrepareModel(V1_3::ErrorStatus::GENERAL_FAILURE, message.str(), cb);
+        return V1_3::ErrorStatus::NONE;
+    }
+
+    // Check that the optimized network is valid.
+    if (!optNet)
+    {
+        std::stringstream message;
+        message << "Invalid optimized network";
+        for (const std::string& msg : errMessages)
+        {
+            message << "\n" << msg;
+        }
+        FailPrepareModel(V1_3::ErrorStatus::GENERAL_FAILURE, message.str(), cb);
+        return V1_3::ErrorStatus::NONE;
+    }
+
+    // Export the optimized network graph to a dot file if an output dump directory
+    // has been specified in the drivers' arguments.
+    std::string dotGraphFileName = ExportNetworkGraphToDotFile(*optNet,
+                                                               options.GetRequestInputsAndOutputsDumpDir());
+
+    // Load it into the runtime.
+    armnn::NetworkId netId = 0;
+    std::string msg;
+    armnn::INetworkProperties networkProperties(options.isAsyncModelExecutionEnabled(),
+                                                MemorySource::Undefined,
+                                                MemorySource::Undefined,
+                                                options.IsGpuProfilingEnabled());
+
+    try
+    {
+        if (runtime->LoadNetwork(netId, move(optNet), msg, networkProperties) != armnn::Status::Success)
+        {
+            return FailPrepareModel(V1_3::ErrorStatus::GENERAL_FAILURE, msg, cb);
+        }
+    }
+    catch (std::exception& e)
+    {
+        std::stringstream message;
+        message << "Exception (" << e.what() << ") caught from LoadNetwork.";
+        FailPrepareModel(V1_3::ErrorStatus::GENERAL_FAILURE, message.str(), cb);
+        return V1_3::ErrorStatus::NONE;
+    }
+
+    std::unique_ptr<ArmnnPreparedModel_1_3<hal_1_3::HalPolicy>> preparedModel(
+            new ArmnnPreparedModel_1_3<hal_1_3::HalPolicy>(netId,
+                                                           runtime.get(),
+                                                           options.GetRequestInputsAndOutputsDumpDir(),
+                                                           options.IsGpuProfilingEnabled(),
+                                                           V1_3::Priority::MEDIUM,
+                                                           options.isAsyncModelExecutionEnabled(),
+                                                           options.getNoOfArmnnThreads(),
+                                                           true));
+
+    NotifyCallbackAndCheck(cb, V1_3::ErrorStatus::NONE, preparedModel.release());
     return V1_3::ErrorStatus::NONE;
 }
 
diff --git a/1.3/ArmnnDriverImpl.hpp b/1.3/ArmnnDriverImpl.hpp
index 3c094fe..a482eda 100644
--- a/1.3/ArmnnDriverImpl.hpp
+++ b/1.3/ArmnnDriverImpl.hpp
@@ -7,6 +7,7 @@
 
 #include <HalInterfaces.h>
 
+#include "../CacheDataHandler.hpp"
 #include "../DriverOptions.hpp"
 
 #include <armnn/ArmNN.hpp>
@@ -31,13 +32,27 @@
 class ArmnnDriverImpl
 {
 public:
-    static Return<V1_3::ErrorStatus> prepareArmnnModel_1_3(const armnn::IRuntimePtr& runtime,
-                                                           const armnn::IGpuAccTunedParametersPtr& clTunedParameters,
-                                                           const DriverOptions& options,
-                                                           const V1_3::Model& model,
-                                                           const android::sp<V1_3::IPreparedModelCallback>& cb,
-                                                           bool float32ToFloat16 = false,
-                                                           V1_3::Priority priority = V1_3::Priority::MEDIUM);
+    using HidlToken = android::hardware::hidl_array<uint8_t, ANEURALNETWORKS_BYTE_SIZE_OF_CACHE_TOKEN>;
+
+    static Return<V1_3::ErrorStatus> prepareArmnnModel_1_3(
+        const armnn::IRuntimePtr& runtime,
+        const armnn::IGpuAccTunedParametersPtr& clTunedParameters,
+        const DriverOptions& options,
+        const V1_3::Model& model,
+        const android::hardware::hidl_vec<android::hardware::hidl_handle>& modelCacheHandle,
+        const android::hardware::hidl_vec<android::hardware::hidl_handle>& dataCacheHandle,
+        const HidlToken& token,
+        const android::sp<V1_3::IPreparedModelCallback>& cb,
+        bool float32ToFloat16 = false,
+        V1_3::Priority priority = V1_3::Priority::MEDIUM);
+
+    static Return<V1_3::ErrorStatus> prepareModelFromCache_1_3(
+        const armnn::IRuntimePtr& runtime,
+        const DriverOptions& options,
+        const android::hardware::hidl_vec<android::hardware::hidl_handle>& modelCacheHandle,
+        const android::hardware::hidl_vec<android::hardware::hidl_handle>& dataCacheHandle,
+        const HidlToken& token,
+        const android::sp<V1_3::IPreparedModelCallback>& cb);
 
     static Return<void> getCapabilities_1_3(const armnn::IRuntimePtr& runtime,
                                             V1_3::IDevice::getCapabilities_1_3_cb cb);
diff --git a/Android.mk b/Android.mk
index 1d61930..d0f4493 100644
--- a/Android.mk
+++ b/Android.mk
@@ -469,6 +469,7 @@
         ArmnnDriverImpl.cpp \
         ArmnnPreparedModel.cpp \
         ArmnnPreparedModel_1_2.cpp \
+        CacheDataHandler.cpp \
         ConversionUtils.cpp \
         DriverOptions.cpp \
         ModelToINetworkConverter.cpp \
@@ -599,6 +600,7 @@
         ArmnnPreparedModel.cpp \
         ArmnnPreparedModel_1_2.cpp \
         ArmnnPreparedModel_1_3.cpp \
+        CacheDataHandler.cpp \
         ConversionUtils.cpp \
         DriverOptions.cpp \
         ModelToINetworkConverter.cpp \
diff --git a/ArmnnDriverImpl.cpp b/ArmnnDriverImpl.cpp
index 78ef12f..0b3b919 100644
--- a/ArmnnDriverImpl.cpp
+++ b/ArmnnDriverImpl.cpp
@@ -102,8 +102,12 @@
 
     // Serialize the network graph to a .armnn file if an output directory
     // has been specified in the drivers' arguments.
+    std::vector<uint8_t> dataCacheData;
     auto serializedNetworkFileName =
-        SerializeNetwork(*modelConverter.GetINetwork(), options.GetRequestInputsAndOutputsDumpDir());
+        SerializeNetwork(*modelConverter.GetINetwork(),
+                         options.GetRequestInputsAndOutputsDumpDir(),
+                         dataCacheData,
+                         false);
 
     // Optimize the network
     armnn::IOptimizedNetworkPtr optNet(nullptr, nullptr);
diff --git a/ArmnnPreparedModel_1_2.cpp b/ArmnnPreparedModel_1_2.cpp
index 2e37880..7cc7547 100644
--- a/ArmnnPreparedModel_1_2.cpp
+++ b/ArmnnPreparedModel_1_2.cpp
@@ -159,6 +159,47 @@
     , m_RequestInputsAndOutputsDumpDir(requestInputsAndOutputsDumpDir)
     , m_GpuProfilingEnabled(gpuProfilingEnabled)
     , m_AsyncModelExecutionEnabled(asyncModelExecutionEnabled)
+    , m_PreparedFromCache(false)
+{
+    // Enable profiling if required.
+    m_Runtime->GetProfiler(m_NetworkId)->EnableProfiling(m_GpuProfilingEnabled);
+
+    if (m_AsyncModelExecutionEnabled)
+    {
+        std::vector<std::shared_ptr<armnn::IWorkingMemHandle>> memHandles;
+        for (unsigned int i=0; i < numberOfThreads; ++i)
+        {
+            memHandles.emplace_back(m_Runtime->CreateWorkingMemHandle(networkId));
+        }
+
+        if (!m_Threadpool)
+        {
+            m_Threadpool = std::make_unique<armnn::Threadpool>(numberOfThreads, runtime, memHandles);
+        }
+        else
+        {
+            m_Threadpool->LoadMemHandles(memHandles);
+        }
+
+        m_WorkingMemHandle = memHandles.back();
+    }
+}
+
+template<typename HalVersion>
+ArmnnPreparedModel_1_2<HalVersion>::ArmnnPreparedModel_1_2(armnn::NetworkId networkId,
+                                                           armnn::IRuntime* runtime,
+                                                           const std::string& requestInputsAndOutputsDumpDir,
+                                                           const bool gpuProfilingEnabled,
+                                                           const bool asyncModelExecutionEnabled,
+                                                           const unsigned int numberOfThreads,
+                                                           const bool preparedFromCache)
+    : m_NetworkId(networkId)
+    , m_Runtime(runtime)
+    , m_RequestCount(0)
+    , m_RequestInputsAndOutputsDumpDir(requestInputsAndOutputsDumpDir)
+    , m_GpuProfilingEnabled(gpuProfilingEnabled)
+    , m_AsyncModelExecutionEnabled(asyncModelExecutionEnabled)
+    , m_PreparedFromCache(preparedFromCache)
 {
     // Enable profiling if required.
     m_Runtime->GetProfiler(m_NetworkId)->EnableProfiling(m_GpuProfilingEnabled);
@@ -384,7 +425,10 @@
                                                                       V1_2::MeasureTiming measureTiming,
                                                                       executeSynchronously_cb cb)
 {
-    ALOGV("ArmnnPreparedModel_1_2::executeSynchronously(): %s", GetModelSummary(m_Model).c_str());
+    if (!m_PreparedFromCache)
+    {
+        ALOGV("ArmnnPreparedModel_1_2::executeSynchronously(): %s", GetModelSummary(m_Model).c_str());
+    }
     m_RequestCount++;
 
     if (cb == nullptr)
@@ -400,7 +444,7 @@
         driverStart = Now();
     }
 
-    if (!android::nn::validateRequest(request, m_Model))
+    if (!m_PreparedFromCache && !android::nn::validateRequest(request, m_Model))
     {
         ALOGE("ArmnnPreparedModel_1_2::executeSynchronously invalid request model");
         cb(V1_0::ErrorStatus::INVALID_ARGUMENT, {}, g_NoTiming);
@@ -530,11 +574,11 @@
 }
 
 template<typename HalVersion>
-bool ArmnnPreparedModel_1_2<HalVersion>::ExecuteWithDummyInputs()
+bool ArmnnPreparedModel_1_2<HalVersion>::ExecuteWithDummyInputs(unsigned int numInputs, unsigned int numOutputs)
 {
     std::vector<std::vector<char>> storage;
     armnn::InputTensors inputTensors;
-    for (unsigned int i = 0; i < getMainModel(m_Model).inputIndexes.size(); i++)
+    for (unsigned int i = 0; i < numInputs; i++)
     {
         const armnn::TensorInfo inputTensorInfo = m_Runtime->GetInputTensorInfo(m_NetworkId, i);
         storage.emplace_back(inputTensorInfo.GetNumBytes());
@@ -544,7 +588,7 @@
     }
 
     armnn::OutputTensors outputTensors;
-    for (unsigned int i = 0; i < getMainModel(m_Model).outputIndexes.size(); i++)
+    for (unsigned int i = 0; i < numOutputs; i++)
     {
         const armnn::TensorInfo outputTensorInfo = m_Runtime->GetOutputTensorInfo(m_NetworkId, i);
         storage.emplace_back(outputTensorInfo.GetNumBytes());
@@ -576,10 +620,13 @@
         ctx.driverStart = Now();
     }
 
-    ALOGV("ArmnnPreparedModel_1_2::execute(): %s", GetModelSummary(m_Model).c_str());
+    if (!m_PreparedFromCache)
+    {
+        ALOGV("ArmnnPreparedModel_1_2::execute(): %s", GetModelSummary(m_Model).c_str());
+    }
     m_RequestCount++;
 
-    if (!android::nn::validateRequest(request, m_Model))
+    if (!m_PreparedFromCache && !android::nn::validateRequest(request, m_Model))
     {
         callback(V1_0::ErrorStatus::INVALID_ARGUMENT, {}, g_NoTiming, "ArmnnPreparedModel_1_2::execute");
         return V1_0::ErrorStatus::INVALID_ARGUMENT;
diff --git a/ArmnnPreparedModel_1_2.hpp b/ArmnnPreparedModel_1_2.hpp
index c64c891..255fc18 100644
--- a/ArmnnPreparedModel_1_2.hpp
+++ b/ArmnnPreparedModel_1_2.hpp
@@ -49,6 +49,14 @@
                            const bool asyncModelExecutionEnabled = false,
                            const unsigned int numberOfThreads = 1);
 
+    ArmnnPreparedModel_1_2(armnn::NetworkId networkId,
+                           armnn::IRuntime* runtime,
+                           const std::string& requestInputsAndOutputsDumpDir,
+                           const bool gpuProfilingEnabled,
+                           const bool asyncModelExecutionEnabled = false,
+                           const unsigned int numberOfThreads = 1,
+                           const bool preparedFromCache = false);
+
     virtual ~ArmnnPreparedModel_1_2();
 
     virtual Return<V1_0::ErrorStatus> execute(const V1_0::Request& request,
@@ -76,7 +84,7 @@
 
     /// Executes this model with dummy inputs (e.g. all zeroes).
     /// \return false on failure, otherwise true
-    bool ExecuteWithDummyInputs();
+    bool ExecuteWithDummyInputs(unsigned int numInputs, unsigned int numOutputs);
 
 private:
 
@@ -156,6 +164,7 @@
     static std::unique_ptr<armnn::Threadpool> m_Threadpool;
     std::shared_ptr<IWorkingMemHandle>        m_WorkingMemHandle;
     const bool                                m_AsyncModelExecutionEnabled;
+    const bool                                m_PreparedFromCache;
 };
 
 }
diff --git a/ArmnnPreparedModel_1_3.cpp b/ArmnnPreparedModel_1_3.cpp
index a503236..e963d4e 100644
--- a/ArmnnPreparedModel_1_3.cpp
+++ b/ArmnnPreparedModel_1_3.cpp
@@ -181,6 +181,49 @@
     , m_GpuProfilingEnabled(gpuProfilingEnabled)
     , m_ModelPriority(priority)
     , m_AsyncModelExecutionEnabled(asyncModelExecutionEnabled)
+    , m_PreparedFromCache(false)
+{
+    // Enable profiling if required.
+    m_Runtime->GetProfiler(m_NetworkId)->EnableProfiling(m_GpuProfilingEnabled);
+
+    if (m_AsyncModelExecutionEnabled)
+    {
+        std::vector<std::shared_ptr<armnn::IWorkingMemHandle>> memHandles;
+        for (unsigned int i=0; i < numberOfThreads; ++i)
+        {
+            memHandles.emplace_back(m_Runtime->CreateWorkingMemHandle(networkId));
+        }
+
+        if (!m_Threadpool)
+        {
+            m_Threadpool = std::make_unique<armnn::Threadpool>(numberOfThreads, runtime, memHandles);
+        }
+        else
+        {
+            m_Threadpool->LoadMemHandles(memHandles);
+        }
+
+        m_WorkingMemHandle = memHandles.back();
+    }
+}
+
+template<typename HalVersion>
+ArmnnPreparedModel_1_3<HalVersion>::ArmnnPreparedModel_1_3(armnn::NetworkId networkId,
+                                                           armnn::IRuntime* runtime,
+                                                           const std::string& requestInputsAndOutputsDumpDir,
+                                                           const bool gpuProfilingEnabled,
+                                                           V1_3::Priority priority,
+                                                           const bool asyncModelExecutionEnabled,
+                                                           const unsigned int numberOfThreads,
+                                                           const bool preparedFromCache)
+    : m_NetworkId(networkId)
+    , m_Runtime(runtime)
+    , m_RequestCount(0)
+    , m_RequestInputsAndOutputsDumpDir(requestInputsAndOutputsDumpDir)
+    , m_GpuProfilingEnabled(gpuProfilingEnabled)
+    , m_ModelPriority(priority)
+    , m_AsyncModelExecutionEnabled(asyncModelExecutionEnabled)
+    , m_PreparedFromCache(preparedFromCache)
 {
     // Enable profiling if required.
     m_Runtime->GetProfiler(m_NetworkId)->EnableProfiling(m_GpuProfilingEnabled);
@@ -343,7 +386,7 @@
         ALOGW("ArmnnPreparedModel_1_3::executeFenced parameter loopTimeoutDuration is set but not supported.");
     }
 
-    if (!android::nn::validateRequest(request, m_Model, /*allowUnspecifiedOutput=*/false))
+    if (!m_PreparedFromCache && !android::nn::validateRequest(request, m_Model, /*allowUnspecifiedOutput=*/false))
     {
         ALOGV("ArmnnPreparedModel_1_3::executeFenced outputs must be specified for fenced execution ");
         cb(V1_3::ErrorStatus::INVALID_ARGUMENT, hidl_handle(nullptr), nullptr);
@@ -357,7 +400,10 @@
         ctx.driverStart = Now();
     }
 
-    ALOGV("ArmnnPreparedModel_1_3::executeFenced(): %s", GetModelSummary(m_Model).c_str());
+    if (!m_PreparedFromCache)
+    {
+        ALOGV("ArmnnPreparedModel_1_3::executeFenced(): %s", GetModelSummary(m_Model).c_str());
+    }
     m_RequestCount++;
 
     if (!m_RequestInputsAndOutputsDumpDir.empty())
@@ -587,7 +633,7 @@
         cbCtx.ctx.driverStart = Now();
     }
 
-    if (!android::nn::validateRequest(convertToV1_3(request), m_Model))
+    if (!m_PreparedFromCache && !android::nn::validateRequest(convertToV1_3(request), m_Model))
     {
         ALOGE("ArmnnPreparedModel_1_3::ExecuteSynchronously invalid request model");
         cbCtx.callback(V1_3::ErrorStatus::INVALID_ARGUMENT,
@@ -597,7 +643,7 @@
         return Void();
     }
 
-    if (!android::nn::validateRequest(request, m_Model))
+    if (!m_PreparedFromCache && !android::nn::validateRequest(request, m_Model))
     {
         ALOGE("ArmnnPreparedModel_1_3::ExecuteSynchronously invalid request model");
         cbCtx.callback(V1_3::ErrorStatus::INVALID_ARGUMENT,
@@ -634,7 +680,10 @@
                                                                       V1_2::MeasureTiming measureTiming,
                                                                       executeSynchronously_cb cb)
 {
-    ALOGV("ArmnnPreparedModel_1_3::executeSynchronously(): %s", GetModelSummary(m_Model).c_str());
+    if (!m_PreparedFromCache)
+    {
+        ALOGV("ArmnnPreparedModel_1_3::executeSynchronously(): %s", GetModelSummary(m_Model).c_str());
+    }
     m_RequestCount++;
 
     if (cb == nullptr)
@@ -667,7 +716,10 @@
         const V1_3::OptionalTimeoutDuration& loopTimeoutDuration,
         executeSynchronously_1_3_cb cb)
 {
-    ALOGV("ArmnnPreparedModel_1_3::executeSynchronously_1_3(): %s", GetModelSummary(m_Model).c_str());
+    if (!m_PreparedFromCache)
+    {
+        ALOGV("ArmnnPreparedModel_1_3::executeSynchronously_1_3(): %s", GetModelSummary(m_Model).c_str());
+    }
     m_RequestCount++;
 
     if (cb == nullptr)
@@ -854,11 +906,11 @@
 }
 
 template<typename HalVersion>
-bool ArmnnPreparedModel_1_3<HalVersion>::ExecuteWithDummyInputs()
+bool ArmnnPreparedModel_1_3<HalVersion>::ExecuteWithDummyInputs(unsigned int numInputs, unsigned int numOutputs)
 {
     std::vector<std::vector<char>> storage;
     armnn::InputTensors inputTensors;
-    for (unsigned int i = 0; i < getMainModel(m_Model).inputIndexes.size(); i++)
+    for (unsigned int i = 0; i < numInputs; i++)
     {
         const armnn::TensorInfo inputTensorInfo = m_Runtime->GetInputTensorInfo(m_NetworkId, i);
         storage.emplace_back(inputTensorInfo.GetNumBytes());
@@ -868,7 +920,7 @@
     }
 
     armnn::OutputTensors outputTensors;
-    for (unsigned int i = 0; i < getMainModel(m_Model).outputIndexes.size(); i++)
+    for (unsigned int i = 0; i < numOutputs; i++)
     {
         const armnn::TensorInfo outputTensorInfo = m_Runtime->GetOutputTensorInfo(m_NetworkId, i);
         storage.emplace_back(outputTensorInfo.GetNumBytes());
@@ -902,10 +954,13 @@
         ctx.driverStart = Now();
     }
 
-    ALOGV("ArmnnPreparedModel_1_3::execute(): %s", GetModelSummary(m_Model).c_str());
+    if (!m_PreparedFromCache)
+    {
+        ALOGV("ArmnnPreparedModel_1_3::execute(): %s", GetModelSummary(m_Model).c_str());
+    }
     m_RequestCount++;
 
-    if (!android::nn::validateRequest(request, m_Model))
+    if (!m_PreparedFromCache && !android::nn::validateRequest(request, m_Model))
     {
         callback(V1_3::ErrorStatus::INVALID_ARGUMENT, {}, g_NoTiming, "ArmnnPreparedModel_1_3::execute");
         return V1_3::ErrorStatus::INVALID_ARGUMENT;
diff --git a/ArmnnPreparedModel_1_3.hpp b/ArmnnPreparedModel_1_3.hpp
index a245cc4..cd5fc0e 100644
--- a/ArmnnPreparedModel_1_3.hpp
+++ b/ArmnnPreparedModel_1_3.hpp
@@ -57,6 +57,15 @@
                            const bool asyncModelExecutionEnabled = false,
                            const unsigned int numberOfThreads = 1);
 
+    ArmnnPreparedModel_1_3(armnn::NetworkId networkId,
+                           armnn::IRuntime* runtime,
+                           const std::string& requestInputsAndOutputsDumpDir,
+                           const bool gpuProfilingEnabled,
+                           V1_3::Priority priority = V1_3::Priority::MEDIUM,
+                           const bool asyncModelExecutionEnabled = false,
+                           const unsigned int numberOfThreads = 1,
+                           const bool preparedFromCache = false);
+
     virtual ~ArmnnPreparedModel_1_3();
 
     Return<V1_0::ErrorStatus> execute(const V1_0::Request& request,
@@ -108,7 +117,7 @@
 
     /// Executes this model with dummy inputs (e.g. all zeroes).
     /// \return false on failure, otherwise true
-    bool ExecuteWithDummyInputs();
+    bool ExecuteWithDummyInputs(unsigned int numInputs, unsigned int numOutputs);
 
     V1_3::Priority GetModelPriority();
 
@@ -192,6 +201,7 @@
     static std::unique_ptr<armnn::Threadpool>      m_Threadpool;
     std::shared_ptr<IWorkingMemHandle>             m_WorkingMemHandle;
     const bool                                     m_AsyncModelExecutionEnabled;
+    const bool                                     m_PreparedFromCache;
 };
 
 }
diff --git a/CacheDataHandler.cpp b/CacheDataHandler.cpp
new file mode 100644
index 0000000..3688162
--- /dev/null
+++ b/CacheDataHandler.cpp
@@ -0,0 +1,63 @@
+//
+// Copyright © 2021 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "CacheDataHandler.hpp"
+
+#include <log/log.h>
+
+namespace armnn_driver
+{
+
+CacheDataHandler& CacheDataHandlerInstance()
+{
+    static CacheDataHandler instance;
+    return instance;
+}
+
+void CacheDataHandler::Register(const HidlToken token, const size_t hashValue, const size_t cacheSize)
+{
+    if (m_CacheDataMap.find(hashValue) != m_CacheDataMap.end())
+    {
+        ALOGV("CacheHandler::Register() Token has been already registered.");
+        return;
+    }
+    CacheHandle cacheHandle(token, cacheSize);
+    m_CacheDataMap.insert({hashValue, cacheHandle});
+}
+
+bool CacheDataHandler::Validate(const HidlToken token, const size_t hashValue) const
+{
+    return (m_CacheDataMap.find(hashValue) != m_CacheDataMap.end()
+                             && m_CacheDataMap.at(hashValue).GetToken() == token);
+}
+
+size_t CacheDataHandler::Hash(std::vector<uint8_t>& cacheData)
+{
+    std::size_t hash = cacheData.size();
+    for (auto& i : cacheData)
+    {
+        hash ^= std::hash<unsigned int>{}(i);
+    }
+    return hash;
+}
+
+size_t CacheDataHandler::GetCacheSize(HidlToken token)
+{
+    for (auto i = m_CacheDataMap.begin(); i != m_CacheDataMap.end(); ++i)
+    {
+        if (i->second.GetToken() == token)
+        {
+            return i->second.GetCacheSize();
+        }
+    }
+    return 0;
+}
+
+void CacheDataHandler::Clear()
+{
+    m_CacheDataMap.clear();
+}
+
+} // armnn_driver
diff --git a/CacheDataHandler.hpp b/CacheDataHandler.hpp
new file mode 100644
index 0000000..cea73d2
--- /dev/null
+++ b/CacheDataHandler.hpp
@@ -0,0 +1,68 @@
+//
+// Copyright © 2021 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include <HalInterfaces.h>
+
+#include <vector>
+#include <unordered_map>
+
+#include <NeuralNetworks.h>
+
+namespace armnn_driver
+{
+
+using HidlToken = android::hardware::hidl_array<uint8_t, ANEURALNETWORKS_BYTE_SIZE_OF_CACHE_TOKEN>;
+
+class CacheHandle
+{
+public:
+    CacheHandle(const HidlToken token, const size_t cacheSize)
+    : m_HidlToken(token), m_CacheSize(cacheSize) {}
+
+    ~CacheHandle() {};
+
+    HidlToken GetToken() const
+    {
+        return m_HidlToken;
+    }
+
+    size_t GetCacheSize() const
+    {
+        return m_CacheSize;
+    }
+
+private:
+    const HidlToken m_HidlToken;
+    const size_t m_CacheSize;
+};
+
+class CacheDataHandler
+{
+public:
+    CacheDataHandler() {}
+    ~CacheDataHandler() {}
+
+    void Register(const HidlToken token, const size_t hashValue, const size_t cacheSize);
+
+    bool Validate(const HidlToken token, const size_t hashValue) const;
+
+    size_t Hash(std::vector<uint8_t>& cacheData);
+
+    size_t GetCacheSize(HidlToken token);
+
+    void Clear();
+
+private:
+    CacheDataHandler(const CacheDataHandler&) = delete;
+    CacheDataHandler& operator=(const CacheDataHandler&) = delete;
+
+    std::unordered_map<size_t, CacheHandle> m_CacheDataMap;
+};
+
+CacheDataHandler& CacheDataHandlerInstance();
+
+} // armnn_driver
diff --git a/Utils.cpp b/Utils.cpp
index 9b52f5e..f910cd4 100644
--- a/Utils.cpp
+++ b/Utils.cpp
@@ -554,37 +554,59 @@
     return fileName;
 }
 
-std::string SerializeNetwork(const armnn::INetwork& network, const std::string& dumpDir)
+std::string SerializeNetwork(const armnn::INetwork& network,
+                             const std::string& dumpDir,
+                             std::vector<uint8_t>& dataCacheData,
+                             bool dataCachingActive)
 {
     std::string fileName;
-    // The dump directory must exist in advance.
+    bool bSerializeToFile = true;
     if (dumpDir.empty())
     {
-        return fileName;
+        bSerializeToFile = false;
     }
-
-    std::string timestamp = GetFileTimestamp();
-    if (timestamp.empty())
+    else
+    {
+        std::string timestamp = GetFileTimestamp();
+        if (timestamp.empty())
+        {
+            bSerializeToFile = false;
+        }
+    }
+    if (!bSerializeToFile && !dataCachingActive)
     {
         return fileName;
     }
 
     auto serializer(armnnSerializer::ISerializer::Create());
-
     // Serialize the Network
     serializer->Serialize(network);
-
-    // Set the name of the output .armnn file.
-    fs::path dumpPath = dumpDir;
-    fs::path tempFilePath = dumpPath / (timestamp + "_network.armnn");
-    fileName = tempFilePath.string();
-
-    // Save serialized network to a file
-    std::ofstream serializedFile(fileName, std::ios::out | std::ios::binary);
-    bool serialized = serializer->SaveSerializedToStream(serializedFile);
-    if (!serialized)
+    if (dataCachingActive)
     {
-        ALOGW("An error occurred when serializing to file %s", fileName.c_str());
+        std::stringstream stream;
+        auto serialized = serializer->SaveSerializedToStream(stream);
+        if (serialized)
+        {
+            std::string const serializedString{stream.str()};
+            std::copy(serializedString.begin(), serializedString.end(), std::back_inserter(dataCacheData));
+        }
+    }
+
+    if (bSerializeToFile)
+    {
+        // Set the name of the output .armnn file.
+        fs::path dumpPath = dumpDir;
+        std::string timestamp = GetFileTimestamp();
+        fs::path tempFilePath = dumpPath / (timestamp + "_network.armnn");
+        fileName = tempFilePath.string();
+
+        // Save serialized network to a file
+        std::ofstream serializedFile(fileName, std::ios::out | std::ios::binary);
+        auto serialized = serializer->SaveSerializedToStream(serializedFile);
+        if (!serialized)
+        {
+            ALOGW("An error occurred when serializing to file %s", fileName.c_str());
+        }
     }
     return fileName;
 }
diff --git a/Utils.hpp b/Utils.hpp
index da10153..9bd28ba 100644
--- a/Utils.hpp
+++ b/Utils.hpp
@@ -139,7 +139,10 @@
 std::string ExportNetworkGraphToDotFile(const armnn::IOptimizedNetwork& optimizedNetwork,
                                         const std::string& dumpDir);
 
-std::string SerializeNetwork(const armnn::INetwork& network, const std::string& dumpDir);
+std::string SerializeNetwork(const armnn::INetwork& network,
+                             const std::string& dumpDir,
+                             std::vector<uint8_t>& dataCacheData,
+                             bool dataCachingActive = true);
 
 void RenameExportedFiles(const std::string& existingSerializedFileName,
                          const std::string& existingDotFileName,