IVGCVSW-5686 Add GpuAcc MLGO tuning file configuration argument
Signed-off-by: Finn Williams <Finn.Williams@arm.com>
Change-Id: I3f320499c379162f9d1b00cc8816bd144cd7eee4
diff --git a/delegate/src/armnn_external_delegate.cpp b/delegate/src/armnn_external_delegate.cpp
index 4dba07d..edf46ef 100644
--- a/delegate/src/armnn_external_delegate.cpp
+++ b/delegate/src/armnn_external_delegate.cpp
@@ -125,6 +125,11 @@
armnn::BackendOptions option("GpuAcc", {{"TuningLevel", atoi(options_values[i])}});
options.AddBackendOption(option);
}
+ else if (std::string(options_keys[i]) == std::string("gpu-mlgo-tuning-file"))
+ {
+ armnn::BackendOptions option("GpuAcc", {{"MLGOTuningFilePath", std::string(options_values[i])}});
+ options.AddBackendOption(option);
+ }
else if (std::string(options_keys[i]) == std::string("gpu-tuning-file"))
{
armnn::BackendOptions option("GpuAcc", {{"TuningFile", std::string(options_values[i])}});
diff --git a/src/backends/cl/ClBackendContext.cpp b/src/backends/cl/ClBackendContext.cpp
index 125f01b..9c5cca9 100644
--- a/src/backends/cl/ClBackendContext.cpp
+++ b/src/backends/cl/ClBackendContext.cpp
@@ -21,8 +21,9 @@
struct ClBackendContext::ClContextControlWrapper
{
ClContextControlWrapper(arm_compute::CLTuner* tuner,
+ arm_compute::CLGEMMHeuristicsHandle* heuristicsHandle,
bool profilingEnabled)
- : m_ClContextControl(tuner, profilingEnabled)
+ : m_ClContextControl(tuner, heuristicsHandle, profilingEnabled)
{}
bool Sync()
@@ -143,6 +144,7 @@
bool kernelProfiling = options.m_EnableGpuProfiling;
arm_compute::CLTuner* tuner = nullptr;
+ arm_compute::CLGEMMHeuristicsHandle* mlgoTuner = nullptr;
bool useLegacyTunerAPI = options.m_GpuAccTunedParameters.get() != nullptr;
if (useLegacyTunerAPI)
{
@@ -197,6 +199,10 @@
{
tuningLevel = ParseTuningLevel(value, defaultTuningLevel);
}
+ else if (name == "MLGOTuningFilePath")
+ {
+ m_MLGOTuningFile = ParseFile(value, "");
+ }
});
// Create the tuner, in tuning mode initially.
@@ -216,13 +222,31 @@
ARMNN_LOG(warning) << "Could not load GpuAcc tuner data file.";
}
}
+
+ if (!m_MLGOTuningFile.empty())
+ {
+ try
+ {
+ ARMNN_LOG(info) << "Loading Gpu MLGO tuning data from file: " << m_TuningFile;
+ if(m_MLGOTuner.reload_from_file(m_MLGOTuningFile.c_str()))
+ {
+ mlgoTuner = &m_MLGOTuner;
+ }
+ }
+ catch (const std::exception& e)
+ {
+ ARMNN_LOG(warning) << "Could not load GpuAcc MLGO tuner data file.";
+ }
+ }
+
tuner = m_Tuner.get();
}
m_ClContextControlWrapper = std::make_unique<ClContextControlWrapper>(
tuner,
+ mlgoTuner,
kernelProfiling
- );
+ );
}
bool ClBackendContext::BeforeLoadNetwork(NetworkId)
diff --git a/src/backends/cl/ClBackendContext.hpp b/src/backends/cl/ClBackendContext.hpp
index bcac0d2..af988a9 100644
--- a/src/backends/cl/ClBackendContext.hpp
+++ b/src/backends/cl/ClBackendContext.hpp
@@ -9,6 +9,7 @@
#include <mutex>
#include <arm_compute/runtime/CL/CLTuner.h>
+#include <arm_compute/runtime/CL/CLGEMMHeuristicsHandle.h>
namespace armnn
{
@@ -35,6 +36,10 @@
std::unique_ptr<arm_compute::CLTuner> m_Tuner;
std::string m_TuningFile;
+
+protected:
+ arm_compute::CLGEMMHeuristicsHandle m_MLGOTuner;
+ std::string m_MLGOTuningFile;
};
} // namespace armnn
\ No newline at end of file
diff --git a/src/backends/cl/ClContextControl.cpp b/src/backends/cl/ClContextControl.cpp
index 7ab825f..fd2d0f5 100644
--- a/src/backends/cl/ClContextControl.cpp
+++ b/src/backends/cl/ClContextControl.cpp
@@ -28,8 +28,10 @@
{
ClContextControl::ClContextControl(arm_compute::CLTuner *tuner,
+ arm_compute::CLGEMMHeuristicsHandle* heuristicsHandle,
bool profilingEnabled)
: m_Tuner(tuner)
+ , m_HeuristicsHandle(heuristicsHandle)
, m_ProfilingEnabled(profilingEnabled)
{
// Ignore m_ProfilingEnabled if unused to avoid compiling problems when ArmCompute is disabled.
@@ -156,7 +158,7 @@
// Note the first argument (path to cl source code) will be ignored as they should be embedded in the armcompute.
arm_compute::CLKernelLibrary::get().init(".", context, device);
- arm_compute::CLScheduler::get().init(context, commandQueue, device, m_Tuner);
+ arm_compute::CLScheduler::get().init(context, commandQueue, device, m_Tuner, m_HeuristicsHandle);
}
void ClContextControl::ClearClCache()
diff --git a/src/backends/cl/ClContextControl.hpp b/src/backends/cl/ClContextControl.hpp
index 2ed43bc..4a640cd 100644
--- a/src/backends/cl/ClContextControl.hpp
+++ b/src/backends/cl/ClContextControl.hpp
@@ -7,6 +7,7 @@
#include "armnn/IRuntime.hpp"
#include <arm_compute/runtime/CL/CLTuner.h>
+#include <arm_compute/runtime/CL/CLGEMMHeuristicsHandle.h>
namespace armnn
{
@@ -17,6 +18,7 @@
public:
ClContextControl(arm_compute::CLTuner* = nullptr,
+ arm_compute::CLGEMMHeuristicsHandle* = nullptr,
bool profilingEnabled = false);
virtual ~ClContextControl();
@@ -35,6 +37,7 @@
void DoLoadOpenClRuntime(bool updateTunedParameters);
arm_compute::CLTuner* m_Tuner;
+ arm_compute::CLGEMMHeuristicsHandle* m_HeuristicsHandle;
bool m_ProfilingEnabled;
};
@@ -51,6 +54,7 @@
TuningLevel m_TuningLevel;
arm_compute::CLTuner m_Tuner;
+ arm_compute::CLGEMMHeuristicsHandle m_HeuristicsHandle;
};
} // namespace armnn
diff --git a/src/backends/cl/test/ClContextControlFixture.hpp b/src/backends/cl/test/ClContextControlFixture.hpp
index 0371c69..14970be 100644
--- a/src/backends/cl/test/ClContextControlFixture.hpp
+++ b/src/backends/cl/test/ClContextControlFixture.hpp
@@ -13,7 +13,7 @@
{
// Initialising ClContextControl to ensure OpenCL is loaded correctly for each test case
ClContextControlFixtureBase()
- : m_ClContextControl(nullptr, ProfilingEnabled) {}
+ : m_ClContextControl(nullptr, nullptr, ProfilingEnabled) {}
armnn::ClContextControl m_ClContextControl;
};
diff --git a/src/backends/cl/test/ClOptimizedNetworkTests.cpp b/src/backends/cl/test/ClOptimizedNetworkTests.cpp
index 2797080..dddc5aa 100644
--- a/src/backends/cl/test/ClOptimizedNetworkTests.cpp
+++ b/src/backends/cl/test/ClOptimizedNetworkTests.cpp
@@ -10,6 +10,10 @@
#include <test/GraphUtils.hpp>
#include <cl/ClWorkloadFactory.hpp>
+#include <cl/ClBackendContext.hpp>
+
+#include <Filesystem.hpp>
+
#include <boost/test/unit_test.hpp>
@@ -130,4 +134,113 @@
BOOST_TEST(modelOptionsOut[0].GetOption(0).GetValue().AsBool() == true);
}
+BOOST_AUTO_TEST_CASE(CheckMLGOTuningFile)
+{
+ class ClBackendContextTestClass : public armnn::ClBackendContext
+ {
+ public:
+ ClBackendContextTestClass(const armnn::IRuntime::CreationOptions &options) : ClBackendContext(options)
+ {}
+
+ bool call_reload_from_file()
+ {
+ return m_MLGOTuner.reload_from_file(m_MLGOTuningFile);
+ }
+ };
+
+ const std::string validText{
+ "<header>\n"
+ "gemm-version, [1,2,1]\n"
+ "ip-type,gpu\n"
+ "</header>\n"
+ "<heuristics-table>\n"
+ "0, g71 , 8, f32, best-performance, static, gemm-type, [m,n,k,n]\n"
+ "1, g71 , 8, f32, best-performance, static, gemm-config-reshaped-only-rhs, [m,n,k,n]\n"
+ "2, g71 , 8, f32, best-performance, static, gemm-config-reshaped, [m,n,k,n]\n"
+ "3, g71 , 8, qasymm8, best-performance, static, gemm-type, [m,n,k,n]\n"
+ "4, g71 , 8, qasymm8, best-performance, static, gemm-config-reshaped-only-rhs, [m,n,k,n]\n"
+ "5, g71 , 8, qasymm8, best-performance, static, gemm-config-native, [m,n,k,n]\n"
+ "</heuristics-table>\n"
+ "<heuristic, 0>\n"
+ "b , 0, var, r_mn, >=, num, 2., 1, 2\n"
+ "l , 1, gemm-type, reshaped\n"
+ "l , 2, gemm-type, reshaped-only-rhs\n"
+ "</heuristic>\n"
+ "<heuristic, 1>\n"
+ "l ,0,gemm-config-reshaped-only-rhs, [2, 4,4,4,1,1,0]\n"
+ "</heuristic>\n"
+ "<heuristic, 2>\n"
+ "l ,0,gemm-config-reshaped,[4,2,8,16,16,1,0,1,0]\n"
+ "</heuristic>\n"
+ "<heuristic, 3>\n"
+ "l , 0, gemm-type, native\n"
+ "</heuristic>\n"
+ "<heuristic, 4>\n"
+ "l ,0,gemm-config-reshaped-only-rhs, [2, 4,4,4,1,1,0]\n"
+ "</heuristic>\n"
+ "<heuristic, 5>\n"
+ "l ,0,gemm-config-native,[4,2,8]\n"
+ "</heuristic>\n"};
+
+ const std::string invalidText{"ʕノ•ᴥ•ʔノ ︵ ┻━┻"};
+
+ fs::path validFile = armnnUtils::Filesystem::NamedTempFile("validFile.mlgo");
+ fs::path invalidFile = armnnUtils::Filesystem::NamedTempFile("invalidFile.mlgo");
+
+ try
+ {
+ std::ofstream ofs1{validFile};
+ ofs1 << validText << std::endl;
+ ofs1.close();
+
+ std::ofstream ofs2{invalidFile};
+ ofs2 << invalidText << std::endl;
+ ofs2.close();
+ }
+ catch (std::exception &e)
+ {
+ std::cerr << "Unable to write to file at location [" << validFile.c_str() << "] : " << e.what() << std::endl;
+ BOOST_TEST(false);
+ }
+
+ armnn::IRuntime::CreationOptions creationOptions1;
+ armnn::BackendOptions validOptions
+ {
+ "GpuAcc",
+ {
+ {"MLGOTuningFilePath", validFile.c_str()}
+ }
+ };
+
+ creationOptions1.m_BackendOptions.emplace_back(validOptions);
+ ClBackendContextTestClass clBackendContext1(creationOptions1);
+ BOOST_TEST(clBackendContext1.call_reload_from_file());
+
+ armnn::BackendOptions invalidOptions
+ {
+ "GpuAcc",
+ {
+ {"MLGOTuningFilePath", invalidFile.c_str()}
+ }
+ };
+
+ armnn::IRuntime::CreationOptions creationOptions2;
+ creationOptions2.m_BackendOptions.emplace_back(invalidOptions);
+ ClBackendContextTestClass clBackendContext2(creationOptions2);
+ BOOST_TEST(clBackendContext2.call_reload_from_file() == false);
+
+ armnn::BackendOptions invalidPathOptions
+ {
+ "GpuAcc",
+ {
+ {"MLGOTuningFilePath", "not_a_real_file_path"}
+ }
+ };
+
+ armnn::IRuntime::CreationOptions creationOptions3;
+ creationOptions3.m_BackendOptions.emplace_back(invalidPathOptions);
+ ClBackendContextTestClass clBackendContext3(creationOptions3);
+ BOOST_TEST(clBackendContext3.call_reload_from_file() == false);
+}
+
BOOST_AUTO_TEST_SUITE_END();
diff --git a/src/backends/cl/test/OpenClTimerTest.cpp b/src/backends/cl/test/OpenClTimerTest.cpp
index 68a356a..0e1f28e 100644
--- a/src/backends/cl/test/OpenClTimerTest.cpp
+++ b/src/backends/cl/test/OpenClTimerTest.cpp
@@ -32,7 +32,7 @@
// Initialising ClContextControl to ensure OpenCL is loaded correctly for each test case.
// NOTE: Profiling needs to be enabled in ClContextControl to be able to obtain execution
// times from OpenClTimer.
- OpenClFixture() : m_ClContextControl(nullptr, true) {}
+ OpenClFixture() : m_ClContextControl(nullptr, nullptr, true) {}
~OpenClFixture() {}
ClContextControl m_ClContextControl;
diff --git a/tests/ExecuteNetwork/ExecuteNetwork.cpp b/tests/ExecuteNetwork/ExecuteNetwork.cpp
index c19f519..e3ca22e 100644
--- a/tests/ExecuteNetwork/ExecuteNetwork.cpp
+++ b/tests/ExecuteNetwork/ExecuteNetwork.cpp
@@ -309,6 +309,7 @@
inferenceModelParams.m_SaveCachedNetwork = params.m_SaveCachedNetwork;
inferenceModelParams.m_CachedNetworkFilePath = params.m_CachedNetworkFilePath;
inferenceModelParams.m_NumberOfThreads = params.m_NumberOfThreads;
+ inferenceModelParams.m_MLGOTuningFilePath = params.m_MLGOTuningFilePath;
for(const std::string& inputName: params.m_InputNames)
{
diff --git a/tests/ExecuteNetwork/ExecuteNetworkParams.hpp b/tests/ExecuteNetwork/ExecuteNetworkParams.hpp
index 830270a..a30ce57 100644
--- a/tests/ExecuteNetwork/ExecuteNetworkParams.hpp
+++ b/tests/ExecuteNetwork/ExecuteNetworkParams.hpp
@@ -46,6 +46,7 @@
double m_ThresholdTime;
int m_TuningLevel;
std::string m_TuningPath;
+ std::string m_MLGOTuningFilePath;
// Ensures that the parameters for ExecuteNetwork fit together
void ValidateParams();
diff --git a/tests/ExecuteNetwork/ExecuteNetworkProgramOptions.cpp b/tests/ExecuteNetwork/ExecuteNetworkProgramOptions.cpp
index 73da1f1..0eaf8da 100644
--- a/tests/ExecuteNetwork/ExecuteNetworkProgramOptions.cpp
+++ b/tests/ExecuteNetwork/ExecuteNetworkProgramOptions.cpp
@@ -310,7 +310,11 @@
("tuning-path",
"Path to tuning file. Enables use of CL tuning",
- cxxopts::value<std::string>(m_ExNetParams.m_TuningPath));
+ cxxopts::value<std::string>(m_ExNetParams.m_TuningPath))
+
+ ("MLGOTuningFilePath",
+ "Path to tuning file. Enables use of CL MLGO tuning",
+ cxxopts::value<std::string>(m_ExNetParams.m_MLGOTuningFilePath));
m_CxxOptions.add_options("d) Profiling")
("a,enable-external-profiling",
@@ -427,7 +431,8 @@
{
{"TuningLevel", m_ExNetParams.m_TuningLevel},
{"TuningFile", m_ExNetParams.m_TuningPath.c_str()},
- {"KernelProfilingEnabled", m_ExNetParams.m_EnableProfiling}
+ {"KernelProfilingEnabled", m_ExNetParams.m_EnableProfiling},
+ {"MLGOTuningFilePath", m_ExNetParams.m_MLGOTuningFilePath}
}
}
);
diff --git a/tests/InferenceModel.hpp b/tests/InferenceModel.hpp
index d20bb22..7996262 100644
--- a/tests/InferenceModel.hpp
+++ b/tests/InferenceModel.hpp
@@ -100,6 +100,8 @@
bool m_SaveCachedNetwork;
std::string m_CachedNetworkFilePath;
unsigned int m_NumberOfThreads;
+ std::string m_MLGOTuningFilePath;
+
Params()
: m_ComputeDevices{}
@@ -115,6 +117,7 @@
, m_SaveCachedNetwork(false)
, m_CachedNetworkFilePath("")
, m_NumberOfThreads(0)
+ , m_MLGOTuningFilePath("")
{}
};
@@ -434,8 +437,10 @@
{
{ "FastMathEnabled", params.m_EnableFastMath },
{ "SaveCachedNetwork", params.m_SaveCachedNetwork },
- { "CachedNetworkFilePath", params.m_CachedNetworkFilePath }
+ { "CachedNetworkFilePath", params.m_CachedNetworkFilePath },
+ { "MLGOTuningFilePath", params.m_MLGOTuningFilePath }
});
+
armnn::BackendOptions cpuAcc("CpuAcc",
{
{ "FastMathEnabled", params.m_EnableFastMath },