IVGCVSW-5482 'Add a ClCompileContext parameter to each ClWorkload Constructor'
* Injected CLCompileContext object to each CL workload.
Signed-off-by: Sadik Armagan <sadik.armagan@arm.com>
Change-Id: I4837dbd3d5b56cf743b3b89c944e3cdf8b11a42a
diff --git a/src/backends/cl/ClWorkloadFactory.cpp b/src/backends/cl/ClWorkloadFactory.cpp
index 41b779f..35186f2 100644
--- a/src/backends/cl/ClWorkloadFactory.cpp
+++ b/src/backends/cl/ClWorkloadFactory.cpp
@@ -107,9 +107,8 @@
void ClWorkloadFactory::InitializeCLCompileContext()
{
// Initialize our m_CLCompileContext using default device and context
- cl::Device device = cl::Device::getDefault();
- cl::Context context = cl::Context(device);
-
+ auto context = arm_compute::CLKernelLibrary::get().context();
+ auto device = arm_compute::CLKernelLibrary::get().get_device();
m_CLCompileContext = arm_compute::CLCompileContext(context, device);
if (m_ModelContextPtr)
@@ -200,64 +199,64 @@
std::unique_ptr<IWorkload> ClWorkloadFactory::CreateActivation(const ActivationQueueDescriptor& descriptor,
const WorkloadInfo& info) const
{
- return MakeWorkload<ClActivationWorkload>(descriptor, info);
+ return MakeWorkload<ClActivationWorkload>(descriptor, info, m_CLCompileContext);
}
std::unique_ptr<IWorkload> ClWorkloadFactory::CreateAddition(const AdditionQueueDescriptor& descriptor,
const WorkloadInfo& info) const
{
- return MakeWorkload<ClAdditionWorkload>(descriptor, info);
+ return MakeWorkload<ClAdditionWorkload>(descriptor, info, m_CLCompileContext);
}
std::unique_ptr<IWorkload> ClWorkloadFactory::CreateArgMinMax(const ArgMinMaxQueueDescriptor& descriptor,
const WorkloadInfo& info) const
{
- return std::make_unique<ClArgMinMaxWorkload>(descriptor, info);
+ return std::make_unique<ClArgMinMaxWorkload>(descriptor, info, m_CLCompileContext);
}
std::unique_ptr<IWorkload> ClWorkloadFactory::CreateBatchNormalization(
const BatchNormalizationQueueDescriptor& descriptor,
const WorkloadInfo& info) const
{
- return MakeWorkload<ClBatchNormalizationFloatWorkload, NullWorkload>(descriptor, info);
+ return MakeWorkload<ClBatchNormalizationFloatWorkload, NullWorkload>(descriptor, info, m_CLCompileContext);
}
std::unique_ptr<IWorkload> ClWorkloadFactory::CreateBatchToSpaceNd(const BatchToSpaceNdQueueDescriptor& descriptor,
const WorkloadInfo& info) const
{
- return MakeWorkload<ClBatchToSpaceNdWorkload>(descriptor, info);
+ return MakeWorkload<ClBatchToSpaceNdWorkload>(descriptor, info, m_CLCompileContext);
}
std::unique_ptr<IWorkload> ClWorkloadFactory::CreateComparison(const ComparisonQueueDescriptor& descriptor,
const WorkloadInfo& info) const
{
- return MakeWorkload<ClComparisonWorkload>(descriptor, info);
+ return MakeWorkload<ClComparisonWorkload>(descriptor, info, m_CLCompileContext);
}
std::unique_ptr<IWorkload> ClWorkloadFactory::CreateConcat(const ConcatQueueDescriptor& descriptor,
const WorkloadInfo& info) const
{
- return MakeWorkload<ClConcatWorkload>(descriptor, info);
+ return MakeWorkload<ClConcatWorkload>(descriptor, info, m_CLCompileContext);
}
std::unique_ptr<IWorkload> ClWorkloadFactory::CreateConstant(const ConstantQueueDescriptor& descriptor,
const WorkloadInfo& info) const
{
- return MakeWorkload<ClConstantWorkload>(descriptor, info);
+ return MakeWorkload<ClConstantWorkload>(descriptor, info, m_CLCompileContext);
}
std::unique_ptr<IWorkload> ClWorkloadFactory::CreateConvertFp16ToFp32(
const ConvertFp16ToFp32QueueDescriptor& descriptor,
const WorkloadInfo& info) const
{
- return MakeWorkload<ClConvertFp16ToFp32Workload>(descriptor, info);
+ return MakeWorkload<ClConvertFp16ToFp32Workload>(descriptor, info, m_CLCompileContext);
}
std::unique_ptr<IWorkload> ClWorkloadFactory::CreateConvertFp32ToFp16(
const ConvertFp32ToFp16QueueDescriptor& descriptor,
const WorkloadInfo& info) const
{
- return MakeWorkload<ClConvertFp32ToFp16Workload>(descriptor, info);
+ return MakeWorkload<ClConvertFp32ToFp16Workload>(descriptor, info, m_CLCompileContext);
}
std::unique_ptr<IWorkload> ClWorkloadFactory::CreateConvolution2d(const Convolution2dQueueDescriptor& descriptor,
@@ -278,45 +277,46 @@
return MakeWorkload<ClConvolution2dWorkload>(descriptor,
info,
m_MemoryManager->GetIntraLayerManager(),
+ m_CLCompileContext,
isFastMathEnabled);
}
std::unique_ptr<IWorkload> ClWorkloadFactory::CreateDebug(const DebugQueueDescriptor& descriptor,
const WorkloadInfo& info) const
{
- return MakeWorkload<NullWorkload, NullWorkload>(descriptor, info);
+ return MakeWorkload<NullWorkload, NullWorkload>(descriptor, info, m_CLCompileContext);
}
std::unique_ptr<IWorkload> ClWorkloadFactory::CreateDepthToSpace(const DepthToSpaceQueueDescriptor& descriptor,
const WorkloadInfo& info) const
{
- return MakeWorkload<ClDepthToSpaceWorkload>(descriptor, info);
+ return MakeWorkload<ClDepthToSpaceWorkload>(descriptor, info, m_CLCompileContext);
}
std::unique_ptr<IWorkload> ClWorkloadFactory::CreateDepthwiseConvolution2d(
const DepthwiseConvolution2dQueueDescriptor& descriptor,
const WorkloadInfo& info) const
{
- return MakeWorkload<ClDepthwiseConvolutionWorkload>(descriptor, info);
+ return MakeWorkload<ClDepthwiseConvolutionWorkload>(descriptor, info, m_CLCompileContext);
}
std::unique_ptr<IWorkload> ClWorkloadFactory::CreateDequantize(const DequantizeQueueDescriptor& descriptor,
const WorkloadInfo& info) const
{
- return MakeWorkload<ClDequantizeWorkload>(descriptor, info);
+ return MakeWorkload<ClDequantizeWorkload>(descriptor, info, m_CLCompileContext);
}
std::unique_ptr<IWorkload> ClWorkloadFactory::CreateDetectionPostProcess(
const DetectionPostProcessQueueDescriptor& descriptor,
const WorkloadInfo& info) const
{
- return MakeWorkload<NullWorkload, NullWorkload>(descriptor, info);
+ return MakeWorkload<NullWorkload, NullWorkload>(descriptor, info, m_CLCompileContext);
}
std::unique_ptr<IWorkload> ClWorkloadFactory::CreateDivision(const DivisionQueueDescriptor& descriptor,
const WorkloadInfo& info) const
{
- return MakeWorkload<ClDivisionFloatWorkload, NullWorkload>(descriptor, info);
+ return MakeWorkload<ClDivisionFloatWorkload, NullWorkload>(descriptor, info, m_CLCompileContext);
}
std::unique_ptr<IWorkload> ClWorkloadFactory::CreateElementwiseUnary(const ElementwiseUnaryQueueDescriptor& descriptor,
@@ -330,22 +330,22 @@
absQueueDescriptor.m_Inputs = descriptor.m_Inputs;
absQueueDescriptor.m_Outputs = descriptor.m_Outputs;
- return std::make_unique<ClAbsWorkload>(absQueueDescriptor, info);
+ return std::make_unique<ClAbsWorkload>(absQueueDescriptor, info, m_CLCompileContext);
}
case UnaryOperation::Exp:
- return std::make_unique<ClExpWorkload>(descriptor, info);
+ return std::make_unique<ClExpWorkload>(descriptor, info, m_CLCompileContext);
case UnaryOperation::Neg:
- return std::make_unique<ClNegWorkload>(descriptor, info);
+ return std::make_unique<ClNegWorkload>(descriptor, info, m_CLCompileContext);
case UnaryOperation::Rsqrt:
{
RsqrtQueueDescriptor rsqrtQueueDescriptor;
rsqrtQueueDescriptor.m_Inputs = descriptor.m_Inputs;
rsqrtQueueDescriptor.m_Outputs = descriptor.m_Outputs;
- return std::make_unique<ClRsqrtWorkload>(rsqrtQueueDescriptor, info);
+ return std::make_unique<ClRsqrtWorkload>(rsqrtQueueDescriptor, info, m_CLCompileContext);
}
case UnaryOperation::LogicalNot:
- return std::make_unique<ClLogicalNotWorkload>(descriptor, info);
+ return std::make_unique<ClLogicalNotWorkload>(descriptor, info, m_CLCompileContext);
default:
return nullptr;
}
@@ -365,25 +365,28 @@
std::unique_ptr<IWorkload> ClWorkloadFactory::CreateFill(const FillQueueDescriptor& descriptor,
const WorkloadInfo& info) const
{
- return std::make_unique<ClFillWorkload>(descriptor, info);
+ return std::make_unique<ClFillWorkload>(descriptor, info, m_CLCompileContext);
}
std::unique_ptr<IWorkload> ClWorkloadFactory::CreateFloor(const FloorQueueDescriptor& descriptor,
const WorkloadInfo& info) const
{
- return MakeWorkload<ClFloorFloatWorkload, NullWorkload>(descriptor, info);
+ return MakeWorkload<ClFloorFloatWorkload, NullWorkload>(descriptor, info, m_CLCompileContext);
}
std::unique_ptr<IWorkload> ClWorkloadFactory::CreateFullyConnected(const FullyConnectedQueueDescriptor& descriptor,
const WorkloadInfo& info) const
{
- return MakeWorkload<ClFullyConnectedWorkload>(descriptor, info, m_MemoryManager->GetIntraLayerManager());
+ return MakeWorkload<ClFullyConnectedWorkload>(descriptor,
+ info,
+ m_MemoryManager->GetIntraLayerManager(),
+ m_CLCompileContext);
}
std::unique_ptr<IWorkload> ClWorkloadFactory::CreateGather(const GatherQueueDescriptor& descriptor,
const WorkloadInfo& info) const
{
- return MakeWorkload<ClGatherWorkload>(descriptor, info);
+ return MakeWorkload<ClGatherWorkload>(descriptor, info, m_CLCompileContext);
}
std::unique_ptr<IWorkload> ClWorkloadFactory::CreateGreater(const GreaterQueueDescriptor& descriptor,
@@ -407,13 +410,13 @@
const InstanceNormalizationQueueDescriptor& descriptor,
const WorkloadInfo& info) const
{
- return MakeWorkload<ClInstanceNormalizationWorkload>(descriptor, info);
+ return MakeWorkload<ClInstanceNormalizationWorkload>(descriptor, info, m_CLCompileContext);
}
std::unique_ptr<IWorkload> ClWorkloadFactory::CreateL2Normalization(const L2NormalizationQueueDescriptor& descriptor,
const WorkloadInfo& info) const
{
- return MakeWorkload<ClL2NormalizationFloatWorkload, NullWorkload>(descriptor, info);
+ return MakeWorkload<ClL2NormalizationFloatWorkload, NullWorkload>(descriptor, info, m_CLCompileContext);
}
std::unique_ptr<IWorkload> ClWorkloadFactory::CreateLogicalBinary(const LogicalBinaryQueueDescriptor& descriptor,
@@ -422,9 +425,9 @@
switch(descriptor.m_Parameters.m_Operation)
{
case LogicalBinaryOperation::LogicalAnd:
- return std::make_unique<ClLogicalAndWorkload>(descriptor, info);
+ return std::make_unique<ClLogicalAndWorkload>(descriptor, info, m_CLCompileContext);
case LogicalBinaryOperation::LogicalOr:
- return std::make_unique<ClLogicalOrWorkload>(descriptor, info);
+ return std::make_unique<ClLogicalOrWorkload>(descriptor, info, m_CLCompileContext);
default:
return nullptr;
}
@@ -433,25 +436,28 @@
std::unique_ptr<IWorkload> ClWorkloadFactory::CreateLogSoftmax(const LogSoftmaxQueueDescriptor& descriptor,
const WorkloadInfo& info) const
{
- return MakeWorkload<ClLogSoftmaxWorkload>(descriptor, info, m_MemoryManager->GetIntraLayerManager());
+ return MakeWorkload<ClLogSoftmaxWorkload>(descriptor,
+ info,
+ m_MemoryManager->GetIntraLayerManager(),
+ m_CLCompileContext);
}
std::unique_ptr<IWorkload> ClWorkloadFactory::CreateLstm(const LstmQueueDescriptor& descriptor,
const WorkloadInfo& info) const
{
- return MakeWorkload<ClLstmFloatWorkload, NullWorkload>(descriptor, info);
+ return MakeWorkload<ClLstmFloatWorkload, NullWorkload>(descriptor, info, m_CLCompileContext);
}
std::unique_ptr<IWorkload> ClWorkloadFactory::CreateMaximum(const MaximumQueueDescriptor& descriptor,
const WorkloadInfo& info) const
{
- return MakeWorkload<ClMaximumWorkload>(descriptor, info);
+ return MakeWorkload<ClMaximumWorkload>(descriptor, info, m_CLCompileContext);
}
std::unique_ptr<IWorkload> ClWorkloadFactory::CreateMean(const MeanQueueDescriptor& descriptor,
const WorkloadInfo& info) const
{
- return MakeWorkload<ClMeanWorkload>(descriptor, info);
+ return MakeWorkload<ClMeanWorkload>(descriptor, info, m_CLCompileContext);
}
std::unique_ptr<IWorkload> ClWorkloadFactory::CreateMemCopy(const MemCopyQueueDescriptor& descriptor,
@@ -485,19 +491,19 @@
std::unique_ptr<IWorkload> ClWorkloadFactory::CreateMinimum(const MinimumQueueDescriptor& descriptor,
const WorkloadInfo& info) const
{
- return MakeWorkload<ClMinimumWorkload>(descriptor, info);
+ return MakeWorkload<ClMinimumWorkload>(descriptor, info, m_CLCompileContext);
}
std::unique_ptr<IWorkload> ClWorkloadFactory::CreateMultiplication(const MultiplicationQueueDescriptor& descriptor,
const WorkloadInfo& info) const
{
- return MakeWorkload<ClMultiplicationWorkload>(descriptor, info);
+ return MakeWorkload<ClMultiplicationWorkload>(descriptor, info, m_CLCompileContext);
}
std::unique_ptr<IWorkload> ClWorkloadFactory::CreateNormalization(const NormalizationQueueDescriptor& descriptor,
const WorkloadInfo& info) const
{
- return MakeWorkload<ClNormalizationFloatWorkload, NullWorkload>(descriptor, info);
+ return MakeWorkload<ClNormalizationFloatWorkload, NullWorkload>(descriptor, info, m_CLCompileContext);
}
std::unique_ptr<IWorkload> ClWorkloadFactory::CreateOutput(const OutputQueueDescriptor& descriptor,
@@ -509,61 +515,61 @@
std::unique_ptr<IWorkload> ClWorkloadFactory::CreatePad(const PadQueueDescriptor& descriptor,
const WorkloadInfo& info) const
{
- return MakeWorkload<ClPadWorkload>(descriptor, info);
+ return MakeWorkload<ClPadWorkload>(descriptor, info, m_CLCompileContext);
}
std::unique_ptr<IWorkload> ClWorkloadFactory::CreatePermute(const PermuteQueueDescriptor& descriptor,
const WorkloadInfo& info) const
{
- return MakeWorkload<ClPermuteWorkload>(descriptor, info);
+ return MakeWorkload<ClPermuteWorkload>(descriptor, info, m_CLCompileContext);
}
std::unique_ptr<IWorkload> ClWorkloadFactory::CreatePooling2d(const Pooling2dQueueDescriptor& descriptor,
const WorkloadInfo& info) const
{
- return MakeWorkload<ClPooling2dWorkload>(descriptor, info);
+ return MakeWorkload<ClPooling2dWorkload>(descriptor, info, m_CLCompileContext);
}
std::unique_ptr<IWorkload> ClWorkloadFactory::CreatePreCompiled(const PreCompiledQueueDescriptor& descriptor,
const WorkloadInfo& info) const
{
- return MakeWorkload<NullWorkload, NullWorkload>(descriptor, info);
+ return MakeWorkload<NullWorkload, NullWorkload>(descriptor, info, m_CLCompileContext);
}
std::unique_ptr<IWorkload> ClWorkloadFactory::CreatePrelu(const PreluQueueDescriptor &descriptor,
const WorkloadInfo &info) const
{
- return MakeWorkload<ClPreluWorkload>(descriptor, info);
+ return MakeWorkload<ClPreluWorkload>(descriptor, info, m_CLCompileContext);
}
std::unique_ptr<IWorkload> ClWorkloadFactory::CreateQLstm(const QLstmQueueDescriptor& descriptor,
const WorkloadInfo& info) const
{
- return std::make_unique<ClQLstmWorkload>(descriptor, info);
+ return std::make_unique<ClQLstmWorkload>(descriptor, info, m_CLCompileContext);
}
std::unique_ptr<IWorkload> ClWorkloadFactory::CreateQuantize(const QuantizeQueueDescriptor& descriptor,
const WorkloadInfo& info) const
{
- return MakeWorkload<ClQuantizeWorkload>(descriptor, info);
+ return MakeWorkload<ClQuantizeWorkload>(descriptor, info, m_CLCompileContext);
}
std::unique_ptr<IWorkload> ClWorkloadFactory::CreateQuantizedLstm(const QuantizedLstmQueueDescriptor& descriptor,
const WorkloadInfo& info) const
{
- return MakeWorkload<ClQuantizedLstmWorkload>(descriptor, info);
+ return MakeWorkload<ClQuantizedLstmWorkload>(descriptor, info, m_CLCompileContext);
}
std::unique_ptr<IWorkload> ClWorkloadFactory::CreateReshape(const ReshapeQueueDescriptor& descriptor,
const WorkloadInfo& info) const
{
- return MakeWorkload<ClReshapeWorkload>(descriptor, info);
+ return MakeWorkload<ClReshapeWorkload>(descriptor, info, m_CLCompileContext);
}
std::unique_ptr<IWorkload> ClWorkloadFactory::CreateResize(const ResizeQueueDescriptor& descriptor,
const WorkloadInfo& info) const
{
- return MakeWorkload<ClResizeWorkload>(descriptor, info);
+ return MakeWorkload<ClResizeWorkload>(descriptor, info, m_CLCompileContext);
}
std::unique_ptr<IWorkload> ClWorkloadFactory::CreateResizeBilinear(const ResizeBilinearQueueDescriptor& descriptor,
@@ -595,62 +601,68 @@
std::unique_ptr<IWorkload> ClWorkloadFactory::CreateSlice(const SliceQueueDescriptor& descriptor,
const WorkloadInfo& info) const
{
- return MakeWorkload<ClSliceWorkload>(descriptor, info);
+ return MakeWorkload<ClSliceWorkload>(descriptor, info, m_CLCompileContext);
}
std::unique_ptr<IWorkload> ClWorkloadFactory::CreateSoftmax(const SoftmaxQueueDescriptor& descriptor,
const WorkloadInfo& info) const
{
- return std::make_unique<ClSoftmaxWorkload>(descriptor, info, m_MemoryManager->GetIntraLayerManager());
+ return std::make_unique<ClSoftmaxWorkload>(descriptor,
+ info,
+ m_MemoryManager->GetIntraLayerManager(),
+ m_CLCompileContext);
}
std::unique_ptr<IWorkload> ClWorkloadFactory::CreateSpaceToBatchNd(const SpaceToBatchNdQueueDescriptor& descriptor,
const WorkloadInfo& info) const
{
- return MakeWorkload<ClSpaceToBatchNdWorkload>(descriptor, info);
+ return MakeWorkload<ClSpaceToBatchNdWorkload>(descriptor, info, m_CLCompileContext);
}
std::unique_ptr<IWorkload> ClWorkloadFactory::CreateSpaceToDepth(const SpaceToDepthQueueDescriptor& descriptor,
const WorkloadInfo& info) const
{
- return MakeWorkload<ClSpaceToDepthWorkload>(descriptor, info);
+ return MakeWorkload<ClSpaceToDepthWorkload>(descriptor, info, m_CLCompileContext);
}
std::unique_ptr<IWorkload> ClWorkloadFactory::CreateSplitter(const SplitterQueueDescriptor& descriptor,
const WorkloadInfo& info) const
{
- return MakeWorkload<ClSplitterWorkload>(descriptor, info);
+ return MakeWorkload<ClSplitterWorkload>(descriptor, info, m_CLCompileContext);
}
std::unique_ptr<IWorkload> ClWorkloadFactory::CreateStack(const StackQueueDescriptor& descriptor,
const WorkloadInfo& info) const
{
- return MakeWorkload<ClStackWorkload>(descriptor, info);
+ return MakeWorkload<ClStackWorkload>(descriptor, info, m_CLCompileContext);
}
std::unique_ptr<IWorkload> ClWorkloadFactory::CreateStridedSlice(const StridedSliceQueueDescriptor& descriptor,
const WorkloadInfo& info) const
{
- return MakeWorkload<ClStridedSliceWorkload>(descriptor, info);
+ return MakeWorkload<ClStridedSliceWorkload>(descriptor, info, m_CLCompileContext);
}
std::unique_ptr<IWorkload> ClWorkloadFactory::CreateSubtraction(const SubtractionQueueDescriptor& descriptor,
const WorkloadInfo& info) const
{
- return MakeWorkload<ClSubtractionWorkload>(descriptor, info);
+ return MakeWorkload<ClSubtractionWorkload>(descriptor, info, m_CLCompileContext);
}
std::unique_ptr<IWorkload> ClWorkloadFactory::CreateTranspose(const TransposeQueueDescriptor& descriptor,
const WorkloadInfo& info) const
{
- return MakeWorkload<ClTransposeWorkload>(descriptor, info);
+ return MakeWorkload<ClTransposeWorkload>(descriptor, info, m_CLCompileContext);
}
std::unique_ptr<IWorkload> ClWorkloadFactory::CreateTransposeConvolution2d(
const TransposeConvolution2dQueueDescriptor& descriptor,
const WorkloadInfo& info) const
{
- return MakeWorkload<ClTransposeConvolution2dWorkload>(descriptor, info, m_MemoryManager->GetIntraLayerManager());
+ return MakeWorkload<ClTransposeConvolution2dWorkload>(descriptor,
+ info,
+ m_MemoryManager->GetIntraLayerManager(),
+ m_CLCompileContext);
}
} // namespace armnn
diff --git a/src/backends/cl/test/ClCreateWorkloadTests.cpp b/src/backends/cl/test/ClCreateWorkloadTests.cpp
index 4bd3d3a..765409a 100644
--- a/src/backends/cl/test/ClCreateWorkloadTests.cpp
+++ b/src/backends/cl/test/ClCreateWorkloadTests.cpp
@@ -10,6 +10,8 @@
#include <armnn/utility/IgnoreUnused.hpp>
#include <armnn/utility/PolymorphicDowncast.hpp>
#include <backendsCommon/MemCopyWorkload.hpp>
+#include <backendsCommon/test/TensorCopyUtils.hpp>
+#include <backendsCommon/test/WorkloadTestUtils.hpp>
#include <aclCommon/test/CreateWorkloadClNeon.hpp>
#include <aclCommon/ArmComputeTensorUtils.hpp>
@@ -334,6 +336,98 @@
ARMNN_ASSERT(conv2dWorkload->GetConvolutionMethod() == arm_compute::ConvolutionMethod::WINOGRAD);
}
+BOOST_AUTO_TEST_CASE(CreateConvolution2dClCompiledContextWorkload)
+{
+ using namespace armnn;
+
+ const DataType inputType = DataType::QAsymmU8;
+ const DataType kernelType = DataType::QSymmS8;
+ const DataType biasType = DataType::Signed32;
+
+ TensorInfo inputInfo ({ 1, 3, 1, 2 }, inputType, 0.5f, 128);
+ TensorInfo outputInfo({ 1, 3, 1, 3 }, inputType, 1.0f, 128);
+
+ const std::vector<float> quantScales{ 0.5f, 0.75f, 1.0f };
+ constexpr unsigned int quantDimension = 0;
+
+ TensorInfo kernelInfo({ 3, 1, 1, 2 }, kernelType, quantScales, quantDimension);
+
+ const std::vector<float> biasQuantScales{ 0.25f, 0.375f, 0.5f };
+ TensorInfo biasInfo({ 3 }, biasType, biasQuantScales, quantDimension);
+
+ std::vector<uint8_t> inputData =
+ {
+ 138, 108, 138, 108, 138, 108
+ };
+
+ std::vector<int8_t> kernelData =
+ {
+ 1, 2, 1, 2, 1, 2
+ };
+
+ std::vector<int32_t> biasData =
+ {
+ 4, 4, 4
+ };
+
+ std::vector<uint8_t> expectedOutputData =
+ {
+ 121, 118, 115, 121, 118, 115, 121, 118, 115
+ };
+
+
+ Convolution2dDescriptor descriptor;
+ descriptor.m_StrideX = 1;
+ descriptor.m_StrideY = 1;
+ descriptor.m_PadLeft = 0;
+ descriptor.m_PadRight = 0;
+ descriptor.m_PadTop = 0;
+ descriptor.m_PadBottom = 0;
+ descriptor.m_BiasEnabled = true;
+ descriptor.m_DataLayout = DataLayout::NHWC;
+
+ auto memoryManager = ClWorkloadFactoryHelper::GetMemoryManager();
+ auto clMemoryManager = armnn::PolymorphicPointerDowncast<armnn::ClMemoryManager>(memoryManager);
+ auto tensorHandleFactory = ClWorkloadFactoryHelper::GetTensorHandleFactory(memoryManager);
+
+ std::unique_ptr<ITensorHandle> inputHandle = tensorHandleFactory.CreateTensorHandle(inputInfo);
+ std::unique_ptr<ITensorHandle> outputHandle = tensorHandleFactory.CreateTensorHandle(outputInfo);
+
+
+ WorkloadInfo workloadInfo;
+ ScopedCpuTensorHandle weightTensor(kernelInfo);
+ ScopedCpuTensorHandle biasTensor(biasInfo);
+
+ AllocateAndCopyDataToITensorHandle(&weightTensor, kernelData.data());
+ AllocateAndCopyDataToITensorHandle(&biasTensor, biasData.data());
+
+ Convolution2dQueueDescriptor queueDescriptor;
+ queueDescriptor.m_Parameters = descriptor;
+ queueDescriptor.m_Weight = &weightTensor;
+ queueDescriptor.m_Bias = &biasTensor;
+
+ AddInputToWorkload(queueDescriptor, workloadInfo, inputInfo, inputHandle.get());
+ AddOutputToWorkload(queueDescriptor, workloadInfo, outputInfo, outputHandle.get());
+
+ // Initialize our m_CLCompileContext using default device and context
+ auto context = arm_compute::CLKernelLibrary::get().context();
+ auto device = arm_compute::CLKernelLibrary::get().get_device();
+ auto clCompileContext = arm_compute::CLCompileContext(context, device);
+
+
+
+ // Check built programs are empty in context
+ BOOST_TEST(clCompileContext.get_built_programs().empty());
+
+ auto workload = std::make_unique<ClConvolution2dWorkload>(queueDescriptor,
+ workloadInfo,
+ clMemoryManager->GetIntraLayerManager(),
+ clCompileContext);
+ ARMNN_ASSERT(workload != nullptr);
+ // Check built programs are not empty in context
+ BOOST_TEST(!clCompileContext.get_built_programs().empty());
+}
+
template <typename DepthwiseConvolutionWorkloadType, typename armnn::DataType DataType>
static void ClDepthwiseConvolutionWorkloadTest(DataLayout dataLayout)
{
diff --git a/src/backends/cl/workloads/ClAbsWorkload.cpp b/src/backends/cl/workloads/ClAbsWorkload.cpp
index 858ef5b..4682c64 100644
--- a/src/backends/cl/workloads/ClAbsWorkload.cpp
+++ b/src/backends/cl/workloads/ClAbsWorkload.cpp
@@ -24,7 +24,9 @@
return arm_compute::CLAbsLayer::validate(&aclInput, &aclOutput);
}
-ClAbsWorkload::ClAbsWorkload(const AbsQueueDescriptor& descriptor, const WorkloadInfo& info)
+ClAbsWorkload::ClAbsWorkload(const AbsQueueDescriptor& descriptor,
+ const WorkloadInfo& info,
+ const arm_compute::CLCompileContext& clCompileContext)
: BaseWorkload<AbsQueueDescriptor>(descriptor, info)
{
m_Data.ValidateInputsOutputs("ClAbsWorkload", 1, 1);
@@ -32,7 +34,7 @@
arm_compute::ICLTensor& input = PolymorphicDowncast<ClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
arm_compute::ICLTensor& output = PolymorphicDowncast<ClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
- m_AbsLayer.configure(&input, &output);
+ m_AbsLayer.configure(clCompileContext, &input, &output);
}
void ClAbsWorkload::Execute() const
diff --git a/src/backends/cl/workloads/ClAbsWorkload.hpp b/src/backends/cl/workloads/ClAbsWorkload.hpp
index 763cafc..d0f7d16 100644
--- a/src/backends/cl/workloads/ClAbsWorkload.hpp
+++ b/src/backends/cl/workloads/ClAbsWorkload.hpp
@@ -18,7 +18,9 @@
class ClAbsWorkload : public BaseWorkload<AbsQueueDescriptor>
{
public:
- ClAbsWorkload(const AbsQueueDescriptor& descriptor, const WorkloadInfo& info);
+ ClAbsWorkload(const AbsQueueDescriptor& descriptor,
+ const WorkloadInfo& info,
+ const arm_compute::CLCompileContext& clCompileContext);
virtual void Execute() const override;
private:
diff --git a/src/backends/cl/workloads/ClActivationWorkload.cpp b/src/backends/cl/workloads/ClActivationWorkload.cpp
index 6856520..8997a97 100644
--- a/src/backends/cl/workloads/ClActivationWorkload.cpp
+++ b/src/backends/cl/workloads/ClActivationWorkload.cpp
@@ -30,7 +30,8 @@
}
ClActivationWorkload::ClActivationWorkload(const ActivationQueueDescriptor& descriptor,
- const WorkloadInfo& info)
+ const WorkloadInfo& info,
+ const arm_compute::CLCompileContext& clCompileContext)
: BaseWorkload<ActivationQueueDescriptor>(descriptor, info)
{
m_Data.ValidateInputsOutputs("ClActivationWorkload", 1, 1);
@@ -40,7 +41,7 @@
arm_compute::ICLTensor& input = static_cast<ClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
arm_compute::ICLTensor& output = static_cast<ClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
- m_ActivationLayer.configure(&input, &output, activationLayerInfo);
+ m_ActivationLayer.configure(clCompileContext, &input, &output, activationLayerInfo);
}
void ClActivationWorkload::Execute() const
diff --git a/src/backends/cl/workloads/ClActivationWorkload.hpp b/src/backends/cl/workloads/ClActivationWorkload.hpp
index 3516633..6b71e86 100644
--- a/src/backends/cl/workloads/ClActivationWorkload.hpp
+++ b/src/backends/cl/workloads/ClActivationWorkload.hpp
@@ -18,7 +18,9 @@
class ClActivationWorkload : public BaseWorkload<ActivationQueueDescriptor>
{
public:
- ClActivationWorkload(const ActivationQueueDescriptor& descriptor, const WorkloadInfo& info);
+ ClActivationWorkload(const ActivationQueueDescriptor& descriptor,
+ const WorkloadInfo& info,
+ const arm_compute::CLCompileContext& clCompileContext);
void Execute() const override;
private:
diff --git a/src/backends/cl/workloads/ClAdditionWorkload.cpp b/src/backends/cl/workloads/ClAdditionWorkload.cpp
index 7e75a04..0ab7446 100644
--- a/src/backends/cl/workloads/ClAdditionWorkload.cpp
+++ b/src/backends/cl/workloads/ClAdditionWorkload.cpp
@@ -19,7 +19,8 @@
static constexpr arm_compute::ConvertPolicy g_AclConvertPolicy = arm_compute::ConvertPolicy::SATURATE;
ClAdditionWorkload::ClAdditionWorkload(const AdditionQueueDescriptor& descriptor,
- const WorkloadInfo& info)
+ const WorkloadInfo& info,
+ const arm_compute::CLCompileContext& clCompileContext)
: BaseWorkload<AdditionQueueDescriptor>(descriptor, info)
{
this->m_Data.ValidateInputsOutputs("ClAdditionWorkload", 2, 1);
@@ -30,7 +31,7 @@
const arm_compute::ActivationLayerInfo activationInfo = ConvertAdditionalInfoToAclActivationLayerInfo(descriptor);
- m_Layer.configure(&input0, &input1, &output, g_AclConvertPolicy, activationInfo);
+ m_Layer.configure(clCompileContext, &input0, &input1, &output, g_AclConvertPolicy, activationInfo);
}
void ClAdditionWorkload::Execute() const
diff --git a/src/backends/cl/workloads/ClAdditionWorkload.hpp b/src/backends/cl/workloads/ClAdditionWorkload.hpp
index 372c4bc..cd25be1 100644
--- a/src/backends/cl/workloads/ClAdditionWorkload.hpp
+++ b/src/backends/cl/workloads/ClAdditionWorkload.hpp
@@ -15,7 +15,9 @@
class ClAdditionWorkload : public BaseWorkload<AdditionQueueDescriptor>
{
public:
- ClAdditionWorkload(const AdditionQueueDescriptor& descriptor, const WorkloadInfo& info);
+ ClAdditionWorkload(const AdditionQueueDescriptor& descriptor,
+ const WorkloadInfo& info,
+ const arm_compute::CLCompileContext& clCompileContext);
void Execute() const override;
diff --git a/src/backends/cl/workloads/ClArgMinMaxWorkload.cpp b/src/backends/cl/workloads/ClArgMinMaxWorkload.cpp
index 5910080..8974930 100644
--- a/src/backends/cl/workloads/ClArgMinMaxWorkload.cpp
+++ b/src/backends/cl/workloads/ClArgMinMaxWorkload.cpp
@@ -53,7 +53,8 @@
ClArgMinMaxWorkload::ClArgMinMaxWorkload(const ArgMinMaxQueueDescriptor& descriptor,
- const WorkloadInfo& info)
+ const WorkloadInfo& info,
+ const arm_compute::CLCompileContext& clCompileContext)
: BaseWorkload<ArgMinMaxQueueDescriptor>(descriptor, info)
{
arm_compute::ICLTensor& input = static_cast<IClTensorHandle*>(this->m_Data.m_Inputs[0])->GetTensor();
@@ -69,7 +70,11 @@
}
else
{
- m_ArgMinMaxLayer.configure(&input, aclAxis, &output, arm_compute::ReductionOperation::ARG_IDX_MIN);
+ m_ArgMinMaxLayer.configure(clCompileContext,
+ &input,
+ aclAxis,
+ &output,
+ arm_compute::ReductionOperation::ARG_IDX_MIN);
}
}
diff --git a/src/backends/cl/workloads/ClArgMinMaxWorkload.hpp b/src/backends/cl/workloads/ClArgMinMaxWorkload.hpp
index 54f28e6..3ec137d 100644
--- a/src/backends/cl/workloads/ClArgMinMaxWorkload.hpp
+++ b/src/backends/cl/workloads/ClArgMinMaxWorkload.hpp
@@ -20,7 +20,9 @@
class ClArgMinMaxWorkload : public BaseWorkload<ArgMinMaxQueueDescriptor>
{
public:
- ClArgMinMaxWorkload(const ArgMinMaxQueueDescriptor& descriptor, const WorkloadInfo& info);
+ ClArgMinMaxWorkload(const ArgMinMaxQueueDescriptor& descriptor,
+ const WorkloadInfo& info,
+ const arm_compute::CLCompileContext& clCompileContext);
virtual void Execute() const override;
private:
diff --git a/src/backends/cl/workloads/ClBatchNormalizationFloatWorkload.cpp b/src/backends/cl/workloads/ClBatchNormalizationFloatWorkload.cpp
index c595e20..daaed17 100644
--- a/src/backends/cl/workloads/ClBatchNormalizationFloatWorkload.cpp
+++ b/src/backends/cl/workloads/ClBatchNormalizationFloatWorkload.cpp
@@ -52,7 +52,9 @@
}
ClBatchNormalizationFloatWorkload::ClBatchNormalizationFloatWorkload(
- const BatchNormalizationQueueDescriptor& descriptor, const WorkloadInfo& info)
+ const BatchNormalizationQueueDescriptor& descriptor,
+ const WorkloadInfo& info,
+ const arm_compute::CLCompileContext& clCompileContext)
: FloatWorkload<BatchNormalizationQueueDescriptor>(descriptor, info)
{
m_Mean = std::make_unique<arm_compute::CLTensor>();
@@ -78,7 +80,8 @@
const arm_compute::ActivationLayerInfo activationInfo = ConvertAdditionalInfoToAclActivationLayerInfo(descriptor);
- m_Layer.configure(&input,
+ m_Layer.configure(clCompileContext,
+ &input,
&output,
m_Mean.get(),
m_Variance.get(),
diff --git a/src/backends/cl/workloads/ClBatchNormalizationFloatWorkload.hpp b/src/backends/cl/workloads/ClBatchNormalizationFloatWorkload.hpp
index ef57783..c9f1f7f 100644
--- a/src/backends/cl/workloads/ClBatchNormalizationFloatWorkload.hpp
+++ b/src/backends/cl/workloads/ClBatchNormalizationFloatWorkload.hpp
@@ -25,7 +25,9 @@
class ClBatchNormalizationFloatWorkload : public FloatWorkload<BatchNormalizationQueueDescriptor>
{
public:
- ClBatchNormalizationFloatWorkload(const BatchNormalizationQueueDescriptor& descriptor, const WorkloadInfo& info);
+ ClBatchNormalizationFloatWorkload(const BatchNormalizationQueueDescriptor& descriptor,
+ const WorkloadInfo& info,
+ const arm_compute::CLCompileContext& clCompileContext);
using FloatWorkload<BatchNormalizationQueueDescriptor>::FloatWorkload;
void Execute() const override;
diff --git a/src/backends/cl/workloads/ClBatchToSpaceNdWorkload.cpp b/src/backends/cl/workloads/ClBatchToSpaceNdWorkload.cpp
index 1a7a8dc..8978c5a 100644
--- a/src/backends/cl/workloads/ClBatchToSpaceNdWorkload.cpp
+++ b/src/backends/cl/workloads/ClBatchToSpaceNdWorkload.cpp
@@ -18,8 +18,9 @@
using namespace armcomputetensorutils;
ClBatchToSpaceNdWorkload::ClBatchToSpaceNdWorkload(const BatchToSpaceNdQueueDescriptor& desc,
- const WorkloadInfo& info)
- : BaseWorkload<BatchToSpaceNdQueueDescriptor>(desc, info)
+ const WorkloadInfo& info,
+ const arm_compute::CLCompileContext& clCompileContext)
+ : BaseWorkload<BatchToSpaceNdQueueDescriptor>(desc, info)
{
m_Data.ValidateInputsOutputs("ClBatchToSpaceNdWorkload", 1, 1);
@@ -35,7 +36,7 @@
arm_compute::ICLTensor& output = static_cast<IClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
output.info()->set_data_layout(aclDataLayout);
- m_Layer.configure(&input, blockWidth, blockHeight, &output);
+ m_Layer.configure(clCompileContext, &input, blockWidth, blockHeight, &output);
}
void ClBatchToSpaceNdWorkload::Execute() const
diff --git a/src/backends/cl/workloads/ClBatchToSpaceNdWorkload.hpp b/src/backends/cl/workloads/ClBatchToSpaceNdWorkload.hpp
index 881b294..2262f33 100644
--- a/src/backends/cl/workloads/ClBatchToSpaceNdWorkload.hpp
+++ b/src/backends/cl/workloads/ClBatchToSpaceNdWorkload.hpp
@@ -18,7 +18,9 @@
class ClBatchToSpaceNdWorkload : public BaseWorkload<BatchToSpaceNdQueueDescriptor>
{
public:
- ClBatchToSpaceNdWorkload(const BatchToSpaceNdQueueDescriptor& descriptor, const WorkloadInfo& info);
+ ClBatchToSpaceNdWorkload(const BatchToSpaceNdQueueDescriptor& descriptor,
+ const WorkloadInfo& info,
+ const arm_compute::CLCompileContext& clCompileContext);
void Execute() const override;
diff --git a/src/backends/cl/workloads/ClComparisonWorkload.cpp b/src/backends/cl/workloads/ClComparisonWorkload.cpp
index 30b336d..20e5669 100644
--- a/src/backends/cl/workloads/ClComparisonWorkload.cpp
+++ b/src/backends/cl/workloads/ClComparisonWorkload.cpp
@@ -39,7 +39,9 @@
return aclStatus;
}
-ClComparisonWorkload::ClComparisonWorkload(const ComparisonQueueDescriptor& descriptor, const WorkloadInfo& info)
+ClComparisonWorkload::ClComparisonWorkload(const ComparisonQueueDescriptor& descriptor,
+ const WorkloadInfo& info,
+ const arm_compute::CLCompileContext& clCompileContext)
: BaseWorkload<ComparisonQueueDescriptor>(descriptor, info)
{
m_Data.ValidateInputsOutputs("ClComparisonWorkload", 2, 1);
@@ -50,7 +52,7 @@
const arm_compute::ComparisonOperation comparisonOperation = ConvertComparisonOperationToAcl(m_Data.m_Parameters);
- m_ComparisonLayer.configure(&input0, &input1, &output, comparisonOperation);
+ m_ComparisonLayer.configure(clCompileContext, &input0, &input1, &output, comparisonOperation);
}
void ClComparisonWorkload::Execute() const
diff --git a/src/backends/cl/workloads/ClComparisonWorkload.hpp b/src/backends/cl/workloads/ClComparisonWorkload.hpp
index e842152..4a92e6b 100644
--- a/src/backends/cl/workloads/ClComparisonWorkload.hpp
+++ b/src/backends/cl/workloads/ClComparisonWorkload.hpp
@@ -20,7 +20,9 @@
class ClComparisonWorkload : public BaseWorkload<ComparisonQueueDescriptor>
{
public:
- ClComparisonWorkload(const ComparisonQueueDescriptor& descriptor, const WorkloadInfo& info);
+ ClComparisonWorkload(const ComparisonQueueDescriptor& descriptor,
+ const WorkloadInfo& info,
+ const arm_compute::CLCompileContext& clCompileContext);
void Execute() const override;
private:
diff --git a/src/backends/cl/workloads/ClConcatWorkload.cpp b/src/backends/cl/workloads/ClConcatWorkload.cpp
index da0d675..3c5f237 100644
--- a/src/backends/cl/workloads/ClConcatWorkload.cpp
+++ b/src/backends/cl/workloads/ClConcatWorkload.cpp
@@ -11,7 +11,6 @@
#include <cl/ClLayerSupport.hpp>
#include <arm_compute/core/Types.h>
-#include <arm_compute/runtime/CL/functions/CLConcatenateLayer.h>
namespace armnn
{
@@ -46,7 +45,9 @@
return arm_compute::CLConcatenateLayer::validate(aclInputPtrs, &aclOutputInfo, aclAxis);
}
-ClConcatWorkload::ClConcatWorkload(const ConcatQueueDescriptor& descriptor, const WorkloadInfo& info)
+ClConcatWorkload::ClConcatWorkload(const ConcatQueueDescriptor& descriptor,
+ const WorkloadInfo& info,
+ const arm_compute::CLCompileContext& clCompileContext)
: BaseWorkload<ConcatQueueDescriptor>(descriptor, info)
{
bool allInputsAreSubtensors = true;
@@ -83,7 +84,7 @@
// Configure input and output tensors
size_t aclAxis = CalcAxis(descriptor.m_Parameters);
- layer->configure(aclInputs, &output, aclAxis);
+ layer->configure(clCompileContext, aclInputs, &output, aclAxis);
// Prepare
layer->prepare();
diff --git a/src/backends/cl/workloads/ClConcatWorkload.hpp b/src/backends/cl/workloads/ClConcatWorkload.hpp
index 772bc09..3120b42 100644
--- a/src/backends/cl/workloads/ClConcatWorkload.hpp
+++ b/src/backends/cl/workloads/ClConcatWorkload.hpp
@@ -9,6 +9,7 @@
#include <arm_compute/core/Error.h>
#include <arm_compute/runtime/IFunction.h>
+#include <arm_compute/runtime/CL/functions/CLConcatenateLayer.h>
namespace armnn
{
@@ -20,7 +21,9 @@
class ClConcatWorkload : public BaseWorkload<ConcatQueueDescriptor>
{
public:
- ClConcatWorkload(const ConcatQueueDescriptor& descriptor, const WorkloadInfo& info);
+ ClConcatWorkload(const ConcatQueueDescriptor& descriptor,
+ const WorkloadInfo& info,
+ const arm_compute::CLCompileContext& clCompileContext);
void Execute() const override;
diff --git a/src/backends/cl/workloads/ClConstantWorkload.cpp b/src/backends/cl/workloads/ClConstantWorkload.cpp
index d6b5c57..40acb8e 100644
--- a/src/backends/cl/workloads/ClConstantWorkload.cpp
+++ b/src/backends/cl/workloads/ClConstantWorkload.cpp
@@ -41,7 +41,9 @@
}
}
-ClConstantWorkload::ClConstantWorkload(const ConstantQueueDescriptor& descriptor, const WorkloadInfo& info)
+ClConstantWorkload::ClConstantWorkload(const ConstantQueueDescriptor& descriptor,
+ const WorkloadInfo& info,
+ const arm_compute::CLCompileContext&)
: BaseWorkload<ConstantQueueDescriptor>(descriptor, info)
, m_RanOnce(false)
{
diff --git a/src/backends/cl/workloads/ClConstantWorkload.hpp b/src/backends/cl/workloads/ClConstantWorkload.hpp
index e5a1d44..8fa5d63 100644
--- a/src/backends/cl/workloads/ClConstantWorkload.hpp
+++ b/src/backends/cl/workloads/ClConstantWorkload.hpp
@@ -8,6 +8,8 @@
#include <arm_compute/core/Error.h>
#include <backendsCommon/Workload.hpp>
+#include <arm_compute/core/CL/CLCompileContext.h>
+
namespace armnn
{
arm_compute::Status ClConstantWorkloadValidate(const TensorInfo& output);
@@ -15,7 +17,9 @@
class ClConstantWorkload : public BaseWorkload<ConstantQueueDescriptor>
{
public:
- ClConstantWorkload(const ConstantQueueDescriptor& descriptor, const WorkloadInfo& info);
+ ClConstantWorkload(const ConstantQueueDescriptor& descriptor,
+ const WorkloadInfo& info,
+ const arm_compute::CLCompileContext& clCompileContext);
void Execute() const override;
diff --git a/src/backends/cl/workloads/ClConvertFp16ToFp32Workload.cpp b/src/backends/cl/workloads/ClConvertFp16ToFp32Workload.cpp
index d2e86f8..aaffd83 100644
--- a/src/backends/cl/workloads/ClConvertFp16ToFp32Workload.cpp
+++ b/src/backends/cl/workloads/ClConvertFp16ToFp32Workload.cpp
@@ -15,7 +15,9 @@
static constexpr arm_compute::ConvertPolicy g_AclConvertPolicy = arm_compute::ConvertPolicy::SATURATE;
ClConvertFp16ToFp32Workload::ClConvertFp16ToFp32Workload(
- const ConvertFp16ToFp32QueueDescriptor& descriptor, const WorkloadInfo& info) :
+ const ConvertFp16ToFp32QueueDescriptor& descriptor,
+ const WorkloadInfo& info,
+ const arm_compute::CLCompileContext& clCompileContext) :
Float16ToFloat32Workload<ConvertFp16ToFp32QueueDescriptor>(descriptor, info)
{
this->m_Data.ValidateInputsOutputs("ClConvertFp16ToFp32Workload", 1, 1);
@@ -23,7 +25,7 @@
arm_compute::ICLTensor& input = static_cast<IClTensorHandle*>(this->m_Data.m_Inputs[0])->GetTensor();
arm_compute::ICLTensor& output = static_cast<IClTensorHandle*>(this->m_Data.m_Outputs[0])->GetTensor();
- m_Layer.configure(&input, &output, g_AclConvertPolicy, 0);
+ m_Layer.configure(clCompileContext, &input, &output, g_AclConvertPolicy, 0);
}
void ClConvertFp16ToFp32Workload::Execute() const
diff --git a/src/backends/cl/workloads/ClConvertFp16ToFp32Workload.hpp b/src/backends/cl/workloads/ClConvertFp16ToFp32Workload.hpp
index ef5c9b6..e8e6c98 100644
--- a/src/backends/cl/workloads/ClConvertFp16ToFp32Workload.hpp
+++ b/src/backends/cl/workloads/ClConvertFp16ToFp32Workload.hpp
@@ -16,7 +16,9 @@
{
public:
- ClConvertFp16ToFp32Workload(const ConvertFp16ToFp32QueueDescriptor& descriptor, const WorkloadInfo& info);
+ ClConvertFp16ToFp32Workload(const ConvertFp16ToFp32QueueDescriptor& descriptor,
+ const WorkloadInfo& info,
+ const arm_compute::CLCompileContext& clCompileContext);
virtual void Execute() const override;
private:
diff --git a/src/backends/cl/workloads/ClConvertFp32ToFp16Workload.cpp b/src/backends/cl/workloads/ClConvertFp32ToFp16Workload.cpp
index 3f528a1..a9f1d91 100644
--- a/src/backends/cl/workloads/ClConvertFp32ToFp16Workload.cpp
+++ b/src/backends/cl/workloads/ClConvertFp32ToFp16Workload.cpp
@@ -15,7 +15,9 @@
static constexpr arm_compute::ConvertPolicy g_AclConvertPolicy = arm_compute::ConvertPolicy::SATURATE;
ClConvertFp32ToFp16Workload::ClConvertFp32ToFp16Workload(
- const ConvertFp32ToFp16QueueDescriptor& descriptor, const WorkloadInfo& info) :
+ const ConvertFp32ToFp16QueueDescriptor& descriptor,
+ const WorkloadInfo& info,
+ const arm_compute::CLCompileContext& clCompileContext) :
Float32ToFloat16Workload<ConvertFp32ToFp16QueueDescriptor>(descriptor, info)
{
this->m_Data.ValidateInputsOutputs("ClConvertFp32ToFp16Workload", 1, 1);
@@ -23,7 +25,7 @@
arm_compute::ICLTensor& input = static_cast<IClTensorHandle*>(this->m_Data.m_Inputs[0])->GetTensor();
arm_compute::ICLTensor& output = static_cast<IClTensorHandle*>(this->m_Data.m_Outputs[0])->GetTensor();
- m_Layer.configure(&input, &output, g_AclConvertPolicy, 0);
+ m_Layer.configure(clCompileContext, &input, &output, g_AclConvertPolicy, 0);
}
void ClConvertFp32ToFp16Workload::Execute() const
diff --git a/src/backends/cl/workloads/ClConvertFp32ToFp16Workload.hpp b/src/backends/cl/workloads/ClConvertFp32ToFp16Workload.hpp
index 6e04e39..17eac7d 100644
--- a/src/backends/cl/workloads/ClConvertFp32ToFp16Workload.hpp
+++ b/src/backends/cl/workloads/ClConvertFp32ToFp16Workload.hpp
@@ -16,7 +16,9 @@
{
public:
- ClConvertFp32ToFp16Workload(const ConvertFp32ToFp16QueueDescriptor& descriptor, const WorkloadInfo& info);
+ ClConvertFp32ToFp16Workload(const ConvertFp32ToFp16QueueDescriptor& descriptor,
+ const WorkloadInfo& info,
+ const arm_compute::CLCompileContext& clCompileContext);
virtual void Execute() const override;
private:
diff --git a/src/backends/cl/workloads/ClConvolution2dWorkload.cpp b/src/backends/cl/workloads/ClConvolution2dWorkload.cpp
index 50cb9de..99a981b 100644
--- a/src/backends/cl/workloads/ClConvolution2dWorkload.cpp
+++ b/src/backends/cl/workloads/ClConvolution2dWorkload.cpp
@@ -65,6 +65,7 @@
ClConvolution2dWorkload::ClConvolution2dWorkload(const Convolution2dQueueDescriptor& descriptor,
const WorkloadInfo& info,
std::shared_ptr<arm_compute::MemoryManagerOnDemand>& memoryManager,
+ const arm_compute::CLCompileContext& clCompileContext,
const bool isFastMathEnabled)
: BaseWorkload<Convolution2dQueueDescriptor>(descriptor, info)
, m_ConvolutionLayer(memoryManager)
@@ -97,7 +98,8 @@
const arm_compute::ActivationLayerInfo activationInfo = ConvertAdditionalInfoToAclActivationLayerInfo(descriptor);
- m_ConvolutionLayer.configure(&input,
+ m_ConvolutionLayer.configure(clCompileContext,
+ &input,
m_KernelTensor.get(),
m_BiasTensor.get(),
&output,
diff --git a/src/backends/cl/workloads/ClConvolution2dWorkload.hpp b/src/backends/cl/workloads/ClConvolution2dWorkload.hpp
index 70170b5..d0f7a5b 100644
--- a/src/backends/cl/workloads/ClConvolution2dWorkload.hpp
+++ b/src/backends/cl/workloads/ClConvolution2dWorkload.hpp
@@ -32,6 +32,7 @@
ClConvolution2dWorkload(const Convolution2dQueueDescriptor& descriptor,
const WorkloadInfo& info,
std::shared_ptr<arm_compute::MemoryManagerOnDemand>& memoryManager,
+ const arm_compute::CLCompileContext& clCompileContext,
const bool isFastMathEnabled = false);
void Execute() const override;
diff --git a/src/backends/cl/workloads/ClDepthToSpaceWorkload.cpp b/src/backends/cl/workloads/ClDepthToSpaceWorkload.cpp
index 43c81dc..d42b261 100644
--- a/src/backends/cl/workloads/ClDepthToSpaceWorkload.cpp
+++ b/src/backends/cl/workloads/ClDepthToSpaceWorkload.cpp
@@ -37,7 +37,8 @@
}
ClDepthToSpaceWorkload::ClDepthToSpaceWorkload(const DepthToSpaceQueueDescriptor& desc,
- const WorkloadInfo& info)
+ const WorkloadInfo& info,
+ const arm_compute::CLCompileContext& clCompileContext)
: BaseWorkload<DepthToSpaceQueueDescriptor>(desc, info)
{
m_Data.ValidateInputsOutputs("ClDepthToSpaceWorkload", 1, 1);
@@ -54,7 +55,7 @@
PolymorphicPointerDowncast<IClTensorHandle>(m_Data.m_Outputs[0])->GetTensor();
output.info()->set_data_layout(aclDataLayout);
- m_Layer.configure(&input, &output, blockSize);
+ m_Layer.configure(clCompileContext, &input, &output, blockSize);
}
void ClDepthToSpaceWorkload::Execute() const
diff --git a/src/backends/cl/workloads/ClDepthToSpaceWorkload.hpp b/src/backends/cl/workloads/ClDepthToSpaceWorkload.hpp
index de8b496..6cb8bb5 100644
--- a/src/backends/cl/workloads/ClDepthToSpaceWorkload.hpp
+++ b/src/backends/cl/workloads/ClDepthToSpaceWorkload.hpp
@@ -21,7 +21,9 @@
class ClDepthToSpaceWorkload : public BaseWorkload<DepthToSpaceQueueDescriptor>
{
public:
- ClDepthToSpaceWorkload(const DepthToSpaceQueueDescriptor& descriptor, const WorkloadInfo& info);
+ ClDepthToSpaceWorkload(const DepthToSpaceQueueDescriptor& descriptor,
+ const WorkloadInfo& info,
+ const arm_compute::CLCompileContext& clCompileContext);
void Execute() const override;
private:
diff --git a/src/backends/cl/workloads/ClDepthwiseConvolutionWorkload.cpp b/src/backends/cl/workloads/ClDepthwiseConvolutionWorkload.cpp
index 53f1684..655f0c9 100644
--- a/src/backends/cl/workloads/ClDepthwiseConvolutionWorkload.cpp
+++ b/src/backends/cl/workloads/ClDepthwiseConvolutionWorkload.cpp
@@ -75,7 +75,8 @@
ClDepthwiseConvolutionWorkload::ClDepthwiseConvolutionWorkload(
const DepthwiseConvolution2dQueueDescriptor& descriptor,
- const WorkloadInfo& info)
+ const WorkloadInfo& info,
+ const arm_compute::CLCompileContext& clCompileContext)
: BaseWorkload<DepthwiseConvolution2dQueueDescriptor>(descriptor, info)
{
// Allocate a buffer for the swizzling of the weight tensor
@@ -124,6 +125,7 @@
m_DepthwiseConvolutionLayer = std::make_unique<arm_compute::CLDepthwiseConvolutionLayer>();
static_cast<arm_compute::CLDepthwiseConvolutionLayer*>(m_DepthwiseConvolutionLayer.get())->configure(
+ clCompileContext,
&input,
m_KernelTensor.get(),
m_BiasTensor.get(),
diff --git a/src/backends/cl/workloads/ClDepthwiseConvolutionWorkload.hpp b/src/backends/cl/workloads/ClDepthwiseConvolutionWorkload.hpp
index c759137..d490012 100644
--- a/src/backends/cl/workloads/ClDepthwiseConvolutionWorkload.hpp
+++ b/src/backends/cl/workloads/ClDepthwiseConvolutionWorkload.hpp
@@ -27,7 +27,8 @@
using BaseWorkload<DepthwiseConvolution2dQueueDescriptor>::m_Data;
ClDepthwiseConvolutionWorkload(const DepthwiseConvolution2dQueueDescriptor& descriptor,
- const WorkloadInfo& info);
+ const WorkloadInfo& info,
+ const arm_compute::CLCompileContext& clCompileContext);
void Execute() const override;
diff --git a/src/backends/cl/workloads/ClDequantizeWorkload.cpp b/src/backends/cl/workloads/ClDequantizeWorkload.cpp
index eb63900..52d8fab 100644
--- a/src/backends/cl/workloads/ClDequantizeWorkload.cpp
+++ b/src/backends/cl/workloads/ClDequantizeWorkload.cpp
@@ -28,7 +28,8 @@
}
ClDequantizeWorkload::ClDequantizeWorkload(const DequantizeQueueDescriptor& descriptor,
- const WorkloadInfo& workloadInfo)
+ const WorkloadInfo& workloadInfo,
+ const arm_compute::CLCompileContext& clCompileContext)
: BaseWorkload<DequantizeQueueDescriptor>(descriptor, workloadInfo)
{
m_Data.ValidateInputsOutputs("ClDequantizeWorkload", 1, 1);
@@ -40,7 +41,7 @@
m_Data.m_Outputs[0])->GetTensor();
m_Layer.reset(new arm_compute::CLDequantizationLayer());
- m_Layer->configure(&input, &output);
+ m_Layer->configure(clCompileContext, &input, &output);
m_Layer->prepare();
}
diff --git a/src/backends/cl/workloads/ClDequantizeWorkload.hpp b/src/backends/cl/workloads/ClDequantizeWorkload.hpp
index 6e61da2..628ea20 100644
--- a/src/backends/cl/workloads/ClDequantizeWorkload.hpp
+++ b/src/backends/cl/workloads/ClDequantizeWorkload.hpp
@@ -17,7 +17,9 @@
class ClDequantizeWorkload : public BaseWorkload<DequantizeQueueDescriptor>
{
public:
- ClDequantizeWorkload(const DequantizeQueueDescriptor& descriptor, const WorkloadInfo& workloadInfo);
+ ClDequantizeWorkload(const DequantizeQueueDescriptor& descriptor,
+ const WorkloadInfo& workloadInfo,
+ const arm_compute::CLCompileContext& clCompileContext);
void Execute() const override;
diff --git a/src/backends/cl/workloads/ClDivisionFloatWorkload.cpp b/src/backends/cl/workloads/ClDivisionFloatWorkload.cpp
index c79e55e..3df8d52 100644
--- a/src/backends/cl/workloads/ClDivisionFloatWorkload.cpp
+++ b/src/backends/cl/workloads/ClDivisionFloatWorkload.cpp
@@ -32,7 +32,8 @@
ClDivisionFloatWorkload::ClDivisionFloatWorkload(const DivisionQueueDescriptor& descriptor,
- const WorkloadInfo& info)
+ const WorkloadInfo& info,
+ const arm_compute::CLCompileContext& clCompileContext)
: FloatWorkload<DivisionQueueDescriptor>(descriptor, info)
{
m_Data.ValidateInputsOutputs("ClDivisionFloatWorkload", 2, 1);
@@ -43,7 +44,7 @@
const arm_compute::ActivationLayerInfo activationInfo = ConvertAdditionalInfoToAclActivationLayerInfo(descriptor);
- m_ArithmeticDivision.configure(&input0, &input1, &output, activationInfo);
+ m_ArithmeticDivision.configure(clCompileContext, &input0, &input1, &output, activationInfo);
}
void ClDivisionFloatWorkload::Execute() const
diff --git a/src/backends/cl/workloads/ClDivisionFloatWorkload.hpp b/src/backends/cl/workloads/ClDivisionFloatWorkload.hpp
index 71d27ed..481b8b0 100644
--- a/src/backends/cl/workloads/ClDivisionFloatWorkload.hpp
+++ b/src/backends/cl/workloads/ClDivisionFloatWorkload.hpp
@@ -20,8 +20,9 @@
class ClDivisionFloatWorkload : public FloatWorkload<DivisionQueueDescriptor>
{
public:
- ClDivisionFloatWorkload(const DivisionQueueDescriptor& descriptor, const
- WorkloadInfo& info);
+ ClDivisionFloatWorkload(const DivisionQueueDescriptor& descriptor,
+ const WorkloadInfo& info,
+ const arm_compute::CLCompileContext& clCompileContext);
using FloatWorkload<DivisionQueueDescriptor>::FloatWorkload;
void Execute() const override;
diff --git a/src/backends/cl/workloads/ClExpWorkload.cpp b/src/backends/cl/workloads/ClExpWorkload.cpp
index abf4181..60c383f 100644
--- a/src/backends/cl/workloads/ClExpWorkload.cpp
+++ b/src/backends/cl/workloads/ClExpWorkload.cpp
@@ -23,7 +23,9 @@
return arm_compute::CLExpLayer::validate(&aclInput, &aclOutput);
}
-ClExpWorkload::ClExpWorkload(const ElementwiseUnaryQueueDescriptor& descriptor, const WorkloadInfo& info)
+ClExpWorkload::ClExpWorkload(const ElementwiseUnaryQueueDescriptor& descriptor,
+ const WorkloadInfo& info,
+ const arm_compute::CLCompileContext& clCompileContext)
: BaseWorkload<ElementwiseUnaryQueueDescriptor>(descriptor, info)
{
m_Data.ValidateInputsOutputs("ClExpWorkload", 1, 1);
@@ -31,7 +33,7 @@
arm_compute::ICLTensor& input = PolymorphicDowncast<ClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
arm_compute::ICLTensor& output = PolymorphicDowncast<ClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
- m_ExpLayer.configure(&input, &output);
+ m_ExpLayer.configure(clCompileContext, &input, &output);
}
void ClExpWorkload::Execute() const
diff --git a/src/backends/cl/workloads/ClExpWorkload.hpp b/src/backends/cl/workloads/ClExpWorkload.hpp
index c35aebb..407fb5e 100644
--- a/src/backends/cl/workloads/ClExpWorkload.hpp
+++ b/src/backends/cl/workloads/ClExpWorkload.hpp
@@ -18,7 +18,9 @@
class ClExpWorkload : public BaseWorkload<ElementwiseUnaryQueueDescriptor>
{
public:
- ClExpWorkload(const ElementwiseUnaryQueueDescriptor& descriptor, const WorkloadInfo& info);
+ ClExpWorkload(const ElementwiseUnaryQueueDescriptor& descriptor,
+ const WorkloadInfo& info,
+ const arm_compute::CLCompileContext& clCompileContext);
virtual void Execute() const override;
private:
diff --git a/src/backends/cl/workloads/ClFillWorkload.cpp b/src/backends/cl/workloads/ClFillWorkload.cpp
index 47a70bc..a2204fa 100644
--- a/src/backends/cl/workloads/ClFillWorkload.cpp
+++ b/src/backends/cl/workloads/ClFillWorkload.cpp
@@ -15,7 +15,9 @@
{
using namespace armcomputetensorutils;
-ClFillWorkload::ClFillWorkload(const FillQueueDescriptor& descriptor, const WorkloadInfo& info)
+ClFillWorkload::ClFillWorkload(const FillQueueDescriptor& descriptor,
+ const WorkloadInfo& info,
+ const arm_compute::CLCompileContext& clCompileContext)
: BaseWorkload<FillQueueDescriptor>(descriptor, info)
{
m_Data.ValidateInputsOutputs("ClFillWorkload", 1, 1);
@@ -23,7 +25,7 @@
arm_compute::ICLTensor& output = static_cast<IClTensorHandle*>(this->m_Data.m_Outputs[0])->GetTensor();
arm_compute::PixelValue pixelValue = GetPixelValue(output, descriptor.m_Parameters.m_Value);
- m_Layer.configure(&output, pixelValue);
+ m_Layer.configure(clCompileContext, &output, pixelValue);
}
void ClFillWorkload::Execute() const
diff --git a/src/backends/cl/workloads/ClFillWorkload.hpp b/src/backends/cl/workloads/ClFillWorkload.hpp
index 8919445..8539501 100644
--- a/src/backends/cl/workloads/ClFillWorkload.hpp
+++ b/src/backends/cl/workloads/ClFillWorkload.hpp
@@ -14,7 +14,9 @@
class ClFillWorkload : public BaseWorkload<FillQueueDescriptor>
{
public:
- ClFillWorkload(const FillQueueDescriptor& descriptor, const WorkloadInfo& info);
+ ClFillWorkload(const FillQueueDescriptor& descriptor,
+ const WorkloadInfo& info,
+ const arm_compute::CLCompileContext& clCompileContext);
void Execute() const override;
private:
diff --git a/src/backends/cl/workloads/ClFloorFloatWorkload.cpp b/src/backends/cl/workloads/ClFloorFloatWorkload.cpp
index f38342e..3915270 100644
--- a/src/backends/cl/workloads/ClFloorFloatWorkload.cpp
+++ b/src/backends/cl/workloads/ClFloorFloatWorkload.cpp
@@ -20,7 +20,9 @@
return arm_compute::CLFloor::validate(&aclInput, &aclOutput);
}
-ClFloorFloatWorkload::ClFloorFloatWorkload(const FloorQueueDescriptor& descriptor, const WorkloadInfo& info)
+ClFloorFloatWorkload::ClFloorFloatWorkload(const FloorQueueDescriptor& descriptor,
+ const WorkloadInfo& info,
+ const arm_compute::CLCompileContext& clCompileContext)
: FloatWorkload<FloorQueueDescriptor>(descriptor, info)
{
m_Data.ValidateInputsOutputs("ClFloorFloatWorkload", 1, 1);
@@ -28,7 +30,7 @@
arm_compute::ICLTensor& input = static_cast<IClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
arm_compute::ICLTensor& output = static_cast<IClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
- m_Layer.configure(&input, &output);
+ m_Layer.configure(clCompileContext, &input, &output);
}
void ClFloorFloatWorkload::Execute() const
diff --git a/src/backends/cl/workloads/ClFloorFloatWorkload.hpp b/src/backends/cl/workloads/ClFloorFloatWorkload.hpp
index 1ddaddf..1c755c5 100644
--- a/src/backends/cl/workloads/ClFloorFloatWorkload.hpp
+++ b/src/backends/cl/workloads/ClFloorFloatWorkload.hpp
@@ -18,7 +18,9 @@
class ClFloorFloatWorkload : public FloatWorkload<FloorQueueDescriptor>
{
public:
- ClFloorFloatWorkload(const FloorQueueDescriptor& descriptor, const WorkloadInfo& info);
+ ClFloorFloatWorkload(const FloorQueueDescriptor& descriptor,
+ const WorkloadInfo& info,
+ const arm_compute::CLCompileContext& clCompileContext);
void Execute() const override;
diff --git a/src/backends/cl/workloads/ClFullyConnectedWorkload.cpp b/src/backends/cl/workloads/ClFullyConnectedWorkload.cpp
index eaec639..9135d27 100644
--- a/src/backends/cl/workloads/ClFullyConnectedWorkload.cpp
+++ b/src/backends/cl/workloads/ClFullyConnectedWorkload.cpp
@@ -45,8 +45,11 @@
fullyConnectedLayerInfo);
}
-ClFullyConnectedWorkload::ClFullyConnectedWorkload(const FullyConnectedQueueDescriptor& descriptor,
- const WorkloadInfo& info, std::shared_ptr<arm_compute::MemoryManagerOnDemand>& memoryManager)
+ClFullyConnectedWorkload::ClFullyConnectedWorkload(
+ const FullyConnectedQueueDescriptor& descriptor,
+ const WorkloadInfo& info,
+ std::shared_ptr<arm_compute::MemoryManagerOnDemand>& memoryManager,
+ const arm_compute::CLCompileContext& clCompileContext)
: BaseWorkload<FullyConnectedQueueDescriptor>(descriptor, info)
, m_FullyConnectedLayer(memoryManager)
{
@@ -69,7 +72,12 @@
arm_compute::FullyConnectedLayerInfo fc_info =
ConvertFullyConnectedDescriptorToAclFullyConnectedLayerInfo(descriptor.m_Parameters, activationInfo);
- m_FullyConnectedLayer.configure(&input, m_WeightsTensor.get(), m_BiasesTensor.get(), &output, fc_info);
+ m_FullyConnectedLayer.configure(clCompileContext,
+ &input,
+ m_WeightsTensor.get(),
+ m_BiasesTensor.get(),
+ &output,
+ fc_info);
InitializeArmComputeClTensorData(*m_WeightsTensor, m_Data.m_Weight);
diff --git a/src/backends/cl/workloads/ClFullyConnectedWorkload.hpp b/src/backends/cl/workloads/ClFullyConnectedWorkload.hpp
index 311b594..45394da 100644
--- a/src/backends/cl/workloads/ClFullyConnectedWorkload.hpp
+++ b/src/backends/cl/workloads/ClFullyConnectedWorkload.hpp
@@ -27,7 +27,8 @@
public:
ClFullyConnectedWorkload(const armnn::FullyConnectedQueueDescriptor& descriptor,
const armnn::WorkloadInfo& info,
- std::shared_ptr<arm_compute::MemoryManagerOnDemand>& memoryManager);
+ std::shared_ptr<arm_compute::MemoryManagerOnDemand>& memoryManager,
+ const arm_compute::CLCompileContext& clCompileContext);
using armnn::BaseWorkload<armnn::FullyConnectedQueueDescriptor>::m_Data;
void Execute() const override;
diff --git a/src/backends/cl/workloads/ClGatherWorkload.cpp b/src/backends/cl/workloads/ClGatherWorkload.cpp
index c76b9c7..98dfe7b 100644
--- a/src/backends/cl/workloads/ClGatherWorkload.cpp
+++ b/src/backends/cl/workloads/ClGatherWorkload.cpp
@@ -27,7 +27,8 @@
}
ClGatherWorkload::ClGatherWorkload(const GatherQueueDescriptor& descriptor,
- const WorkloadInfo& info)
+ const WorkloadInfo& info,
+ const arm_compute::CLCompileContext& clCompileContext)
: BaseWorkload<GatherQueueDescriptor>(descriptor, info)
{
m_Data.ValidateInputsOutputs("ClGatherWorkload", 1, 1);
@@ -38,7 +39,7 @@
int aclAxis = ComputeAclAxis(descriptor.m_Parameters.m_Axis, info.m_InputTensorInfos[0]);
- m_Layer.configure(&input, &indices, &output, aclAxis);
+ m_Layer.configure(clCompileContext, &input, &indices, &output, aclAxis);
};
void ClGatherWorkload::Execute() const
diff --git a/src/backends/cl/workloads/ClGatherWorkload.hpp b/src/backends/cl/workloads/ClGatherWorkload.hpp
index df71a99..8199aaf 100644
--- a/src/backends/cl/workloads/ClGatherWorkload.hpp
+++ b/src/backends/cl/workloads/ClGatherWorkload.hpp
@@ -19,7 +19,9 @@
class ClGatherWorkload : public BaseWorkload<GatherQueueDescriptor>
{
public:
- ClGatherWorkload(const GatherQueueDescriptor& descriptor, const WorkloadInfo& info);
+ ClGatherWorkload(const GatherQueueDescriptor& descriptor,
+ const WorkloadInfo& info,
+ const arm_compute::CLCompileContext& clCompileContext);
void Execute() const override;
private:
diff --git a/src/backends/cl/workloads/ClInstanceNormalizationWorkload.cpp b/src/backends/cl/workloads/ClInstanceNormalizationWorkload.cpp
index 50cf345..a0e8e7b 100644
--- a/src/backends/cl/workloads/ClInstanceNormalizationWorkload.cpp
+++ b/src/backends/cl/workloads/ClInstanceNormalizationWorkload.cpp
@@ -31,7 +31,8 @@
ClInstanceNormalizationWorkload::ClInstanceNormalizationWorkload(
const InstanceNormalizationQueueDescriptor& descriptor,
- const WorkloadInfo& info)
+ const WorkloadInfo& info,
+ const arm_compute::CLCompileContext& clCompileContext)
: BaseWorkload<InstanceNormalizationQueueDescriptor>(descriptor, info)
{
m_Data.ValidateInputsOutputs("ClInstanceNormalizationWorkload", 1, 1);
@@ -43,7 +44,8 @@
input.info()->set_data_layout(aclDataLayout);
output.info()->set_data_layout(aclDataLayout);
- m_Layer.configure(&input,
+ m_Layer.configure(clCompileContext,
+ &input,
&output,
descriptor.m_Parameters.m_Gamma,
descriptor.m_Parameters.m_Beta,
diff --git a/src/backends/cl/workloads/ClInstanceNormalizationWorkload.hpp b/src/backends/cl/workloads/ClInstanceNormalizationWorkload.hpp
index 0e37bdc..957ba34 100644
--- a/src/backends/cl/workloads/ClInstanceNormalizationWorkload.hpp
+++ b/src/backends/cl/workloads/ClInstanceNormalizationWorkload.hpp
@@ -19,7 +19,9 @@
class ClInstanceNormalizationWorkload : public BaseWorkload<InstanceNormalizationQueueDescriptor>
{
public:
- ClInstanceNormalizationWorkload(const InstanceNormalizationQueueDescriptor& descriptor, const WorkloadInfo& info);
+ ClInstanceNormalizationWorkload(const InstanceNormalizationQueueDescriptor& descriptor,
+ const WorkloadInfo& info,
+ const arm_compute::CLCompileContext& clCompileContext);
void Execute() const override;
private:
diff --git a/src/backends/cl/workloads/ClL2NormalizationFloatWorkload.cpp b/src/backends/cl/workloads/ClL2NormalizationFloatWorkload.cpp
index e1a6127..bd38219 100644
--- a/src/backends/cl/workloads/ClL2NormalizationFloatWorkload.cpp
+++ b/src/backends/cl/workloads/ClL2NormalizationFloatWorkload.cpp
@@ -27,7 +27,8 @@
}
ClL2NormalizationFloatWorkload::ClL2NormalizationFloatWorkload(const L2NormalizationQueueDescriptor& descriptor,
- const WorkloadInfo& info)
+ const WorkloadInfo& info,
+ const arm_compute::CLCompileContext& clCompileContext)
: FloatWorkload<L2NormalizationQueueDescriptor>(descriptor, info)
{
m_Data.ValidateInputsOutputs("ClL2NormalizationFloatWorkload", 1, 1);
@@ -41,7 +42,7 @@
int axis = (m_Data.m_Parameters.m_DataLayout == DataLayout::NCHW) ? 2 : 0;
- m_Layer.configure(&input, &output, axis, m_Data.m_Parameters.m_Eps);
+ m_Layer.configure(clCompileContext, &input, &output, axis, m_Data.m_Parameters.m_Eps);
}
void ClL2NormalizationFloatWorkload::Execute() const
diff --git a/src/backends/cl/workloads/ClL2NormalizationFloatWorkload.hpp b/src/backends/cl/workloads/ClL2NormalizationFloatWorkload.hpp
index 26aea9f..8648da4 100644
--- a/src/backends/cl/workloads/ClL2NormalizationFloatWorkload.hpp
+++ b/src/backends/cl/workloads/ClL2NormalizationFloatWorkload.hpp
@@ -19,7 +19,9 @@
class ClL2NormalizationFloatWorkload : public FloatWorkload<L2NormalizationQueueDescriptor>
{
public:
- ClL2NormalizationFloatWorkload(const L2NormalizationQueueDescriptor& descriptor, const WorkloadInfo& info);
+ ClL2NormalizationFloatWorkload(const L2NormalizationQueueDescriptor& descriptor,
+ const WorkloadInfo& info,
+ const arm_compute::CLCompileContext& clCompileContext);
void Execute() const override;
diff --git a/src/backends/cl/workloads/ClLogSoftmaxWorkload.cpp b/src/backends/cl/workloads/ClLogSoftmaxWorkload.cpp
index 6d53523..1a255f1 100644
--- a/src/backends/cl/workloads/ClLogSoftmaxWorkload.cpp
+++ b/src/backends/cl/workloads/ClLogSoftmaxWorkload.cpp
@@ -25,8 +25,10 @@
return arm_compute::CLLogSoftmaxLayer::validate(&aclInputInfo, &aclOutputInfo, descriptor.m_Beta, aclAxis);
}
-ClLogSoftmaxWorkload::ClLogSoftmaxWorkload(const LogSoftmaxQueueDescriptor& descriptor, const WorkloadInfo& info,
- std::shared_ptr<arm_compute::MemoryManagerOnDemand>& memoryManager)
+ClLogSoftmaxWorkload::ClLogSoftmaxWorkload(const LogSoftmaxQueueDescriptor& descriptor,
+ const WorkloadInfo& info,
+ std::shared_ptr<arm_compute::MemoryManagerOnDemand>& memoryManager,
+ const arm_compute::CLCompileContext& clCompileContext)
: BaseWorkload<LogSoftmaxQueueDescriptor>(descriptor, info)
, m_LogSoftmaxLayer(memoryManager)
{
@@ -36,7 +38,7 @@
arm_compute::ICLTensor& output = static_cast<ClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
int aclAxis = ComputeAclAxis(m_Data.m_Parameters.m_Axis, info.m_InputTensorInfos[0]);
- m_LogSoftmaxLayer.configure(&input, &output, m_Data.m_Parameters.m_Beta, aclAxis);
+ m_LogSoftmaxLayer.configure(clCompileContext, &input, &output, m_Data.m_Parameters.m_Beta, aclAxis);
}
void ClLogSoftmaxWorkload::Execute() const
diff --git a/src/backends/cl/workloads/ClLogSoftmaxWorkload.hpp b/src/backends/cl/workloads/ClLogSoftmaxWorkload.hpp
index 9b531ad..a283588 100644
--- a/src/backends/cl/workloads/ClLogSoftmaxWorkload.hpp
+++ b/src/backends/cl/workloads/ClLogSoftmaxWorkload.hpp
@@ -25,7 +25,8 @@
{
public:
ClLogSoftmaxWorkload(const LogSoftmaxQueueDescriptor& descriptor, const WorkloadInfo& info,
- std::shared_ptr<arm_compute::MemoryManagerOnDemand>& memoryManager);
+ std::shared_ptr<arm_compute::MemoryManagerOnDemand>& memoryManager,
+ const arm_compute::CLCompileContext& clCompileContext);
void Execute() const override;
private:
diff --git a/src/backends/cl/workloads/ClLogicalAndWorkload.cpp b/src/backends/cl/workloads/ClLogicalAndWorkload.cpp
index 9418d73..f04cede 100644
--- a/src/backends/cl/workloads/ClLogicalAndWorkload.cpp
+++ b/src/backends/cl/workloads/ClLogicalAndWorkload.cpp
@@ -32,7 +32,8 @@
}
ClLogicalAndWorkload::ClLogicalAndWorkload(const LogicalBinaryQueueDescriptor& descriptor,
- const WorkloadInfo& info)
+ const WorkloadInfo& info,
+ const arm_compute::CLCompileContext& clCompileContext)
: BaseWorkload<LogicalBinaryQueueDescriptor>(descriptor, info)
{
m_Data.ValidateInputsOutputs("ClLogicalAndWorkload", 2, 1);
@@ -41,7 +42,7 @@
arm_compute::ICLTensor& input1 = PolymorphicDowncast<ClTensorHandle*>(m_Data.m_Inputs[1])->GetTensor();
arm_compute::ICLTensor& output = PolymorphicDowncast<ClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
- m_LogicalAndLayer.configure(&input0, &input1, &output);
+ m_LogicalAndLayer.configure(clCompileContext, &input0, &input1, &output);
}
void ClLogicalAndWorkload::Execute() const
diff --git a/src/backends/cl/workloads/ClLogicalAndWorkload.hpp b/src/backends/cl/workloads/ClLogicalAndWorkload.hpp
index 3bf6afe..c7cbf5a 100644
--- a/src/backends/cl/workloads/ClLogicalAndWorkload.hpp
+++ b/src/backends/cl/workloads/ClLogicalAndWorkload.hpp
@@ -20,7 +20,9 @@
class ClLogicalAndWorkload : public BaseWorkload<LogicalBinaryQueueDescriptor>
{
public:
- ClLogicalAndWorkload(const LogicalBinaryQueueDescriptor& descriptor, const WorkloadInfo& info);
+ ClLogicalAndWorkload(const LogicalBinaryQueueDescriptor& descriptor,
+ const WorkloadInfo& info,
+ const arm_compute::CLCompileContext& clCompileContext);
virtual void Execute() const override;
private:
diff --git a/src/backends/cl/workloads/ClLogicalNotWorkload.cpp b/src/backends/cl/workloads/ClLogicalNotWorkload.cpp
index eb90caf..475e57f 100644
--- a/src/backends/cl/workloads/ClLogicalNotWorkload.cpp
+++ b/src/backends/cl/workloads/ClLogicalNotWorkload.cpp
@@ -29,7 +29,8 @@
}
ClLogicalNotWorkload::ClLogicalNotWorkload(const ElementwiseUnaryQueueDescriptor& descriptor,
- const WorkloadInfo& info)
+ const WorkloadInfo& info,
+ const arm_compute::CLCompileContext& clCompileContext)
: BaseWorkload<ElementwiseUnaryQueueDescriptor>(descriptor, info)
{
m_Data.ValidateInputsOutputs("ClLogicalNotWorkload", 1, 1);
@@ -37,7 +38,7 @@
arm_compute::ICLTensor& input = PolymorphicDowncast<ClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
arm_compute::ICLTensor& output = PolymorphicDowncast<ClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
- m_LogicalNotLayer.configure(&input, &output);
+ m_LogicalNotLayer.configure(clCompileContext, &input, &output);
}
void ClLogicalNotWorkload::Execute() const
diff --git a/src/backends/cl/workloads/ClLogicalNotWorkload.hpp b/src/backends/cl/workloads/ClLogicalNotWorkload.hpp
index f1225c7..9c6c3d1 100644
--- a/src/backends/cl/workloads/ClLogicalNotWorkload.hpp
+++ b/src/backends/cl/workloads/ClLogicalNotWorkload.hpp
@@ -18,7 +18,9 @@
class ClLogicalNotWorkload : public BaseWorkload<ElementwiseUnaryQueueDescriptor>
{
public:
- ClLogicalNotWorkload(const ElementwiseUnaryQueueDescriptor& descriptor, const WorkloadInfo& info);
+ ClLogicalNotWorkload(const ElementwiseUnaryQueueDescriptor& descriptor,
+ const WorkloadInfo& info,
+ const arm_compute::CLCompileContext& clCompileContext);
virtual void Execute() const override;
private:
diff --git a/src/backends/cl/workloads/ClLogicalOrWorkload.cpp b/src/backends/cl/workloads/ClLogicalOrWorkload.cpp
index e9895bf..355310e 100644
--- a/src/backends/cl/workloads/ClLogicalOrWorkload.cpp
+++ b/src/backends/cl/workloads/ClLogicalOrWorkload.cpp
@@ -32,7 +32,8 @@
}
ClLogicalOrWorkload::ClLogicalOrWorkload(const LogicalBinaryQueueDescriptor& descriptor,
- const WorkloadInfo& info)
+ const WorkloadInfo& info,
+ const arm_compute::CLCompileContext& clCompileContext)
: BaseWorkload<LogicalBinaryQueueDescriptor>(descriptor, info)
{
m_Data.ValidateInputsOutputs("ClLogicalOrWorkload", 2, 1);
@@ -41,7 +42,7 @@
arm_compute::ICLTensor& input1 = PolymorphicDowncast<ClTensorHandle*>(m_Data.m_Inputs[1])->GetTensor();
arm_compute::ICLTensor& output = PolymorphicDowncast<ClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
- m_LogicalOrLayer.configure(&input0, &input1, &output);
+ m_LogicalOrLayer.configure(clCompileContext, &input0, &input1, &output);
}
void ClLogicalOrWorkload::Execute() const
diff --git a/src/backends/cl/workloads/ClLogicalOrWorkload.hpp b/src/backends/cl/workloads/ClLogicalOrWorkload.hpp
index 8faabde..a6db990 100644
--- a/src/backends/cl/workloads/ClLogicalOrWorkload.hpp
+++ b/src/backends/cl/workloads/ClLogicalOrWorkload.hpp
@@ -20,7 +20,9 @@
class ClLogicalOrWorkload : public BaseWorkload<LogicalBinaryQueueDescriptor>
{
public:
- ClLogicalOrWorkload(const LogicalBinaryQueueDescriptor& descriptor, const WorkloadInfo& info);
+ ClLogicalOrWorkload(const LogicalBinaryQueueDescriptor& descriptor,
+ const WorkloadInfo& info,
+ const arm_compute::CLCompileContext& clCompileContext);
virtual void Execute() const override;
private:
diff --git a/src/backends/cl/workloads/ClLstmFloatWorkload.cpp b/src/backends/cl/workloads/ClLstmFloatWorkload.cpp
index fe9b45e..58cc735 100644
--- a/src/backends/cl/workloads/ClLstmFloatWorkload.cpp
+++ b/src/backends/cl/workloads/ClLstmFloatWorkload.cpp
@@ -19,7 +19,9 @@
{
using namespace armcomputetensorutils;
-ClLstmFloatWorkload::ClLstmFloatWorkload(const LstmQueueDescriptor &descriptor, const WorkloadInfo &info)
+ClLstmFloatWorkload::ClLstmFloatWorkload(const LstmQueueDescriptor &descriptor,
+ const WorkloadInfo &info,
+ const arm_compute::CLCompileContext& clCompileContext)
: FloatWorkload<LstmQueueDescriptor>(descriptor, info)
{
arm_compute::LSTMParams<arm_compute::ICLTensor> lstm_param;
@@ -185,11 +187,12 @@
throw armnn::Exception("Wrong Type of Activation Function!");
}
- m_LstmLayer.configure(&input, m_InputToForgetWeightsTensor.get(), m_InputToCellWeightsTensor.get(),
- m_InputToOutputWeightsTensor.get(), m_RecurrentToForgetWeightsTensor.get(),
- m_RecurrentToCellWeightsTensor.get(), m_RecurrentToOutputWeightsTensor.get(),
- m_ForgetGateBiasTensor.get(), m_CellBiasTensor.get(), m_OutputGateBiasTensor.get(),
- &output_state_in, &cell_state_in, m_ScratchBuffer.get(), &output_state_out,
+ m_LstmLayer.configure(clCompileContext, &input, m_InputToForgetWeightsTensor.get(),
+ m_InputToCellWeightsTensor.get(), m_InputToOutputWeightsTensor.get(),
+ m_RecurrentToForgetWeightsTensor.get(), m_RecurrentToCellWeightsTensor.get(),
+ m_RecurrentToOutputWeightsTensor.get(), m_ForgetGateBiasTensor.get(),
+ m_CellBiasTensor.get(), m_OutputGateBiasTensor.get(), &output_state_in,
+ &cell_state_in, m_ScratchBuffer.get(), &output_state_out,
&cell_state_out, &output, lstm_param, activationLayerInfo,
cell_threshold, projection_threshold);
diff --git a/src/backends/cl/workloads/ClLstmFloatWorkload.hpp b/src/backends/cl/workloads/ClLstmFloatWorkload.hpp
index b7cb408..b084750 100644
--- a/src/backends/cl/workloads/ClLstmFloatWorkload.hpp
+++ b/src/backends/cl/workloads/ClLstmFloatWorkload.hpp
@@ -18,7 +18,9 @@
class ClLstmFloatWorkload : public FloatWorkload<LstmQueueDescriptor>
{
public:
- ClLstmFloatWorkload(const LstmQueueDescriptor& descriptor, const WorkloadInfo& info);
+ ClLstmFloatWorkload(const LstmQueueDescriptor& descriptor,
+ const WorkloadInfo& info,
+ const arm_compute::CLCompileContext& clCompileContext);
void Execute() const override;
private:
diff --git a/src/backends/cl/workloads/ClMaximumWorkload.cpp b/src/backends/cl/workloads/ClMaximumWorkload.cpp
index cd3192d..85bea47 100644
--- a/src/backends/cl/workloads/ClMaximumWorkload.cpp
+++ b/src/backends/cl/workloads/ClMaximumWorkload.cpp
@@ -37,7 +37,8 @@
}
ClMaximumWorkload::ClMaximumWorkload(const MaximumQueueDescriptor& descriptor,
- const WorkloadInfo& info)
+ const WorkloadInfo& info,
+ const arm_compute::CLCompileContext& clCompileContext)
: BaseWorkload<MaximumQueueDescriptor>(descriptor, info)
{
m_Data.ValidateInputsOutputs("ClMaximumWorkload", 2, 1);
@@ -46,7 +47,7 @@
arm_compute::ICLTensor& input1 = static_cast<IClTensorHandle*>(m_Data.m_Inputs[1])->GetTensor();
arm_compute::ICLTensor& output = static_cast<IClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
- m_MaximumLayer.configure(&input0, &input1, &output);
+ m_MaximumLayer.configure(clCompileContext, &input0, &input1, &output);
}
void ClMaximumWorkload::Execute() const
diff --git a/src/backends/cl/workloads/ClMaximumWorkload.hpp b/src/backends/cl/workloads/ClMaximumWorkload.hpp
index 18f67cd..f6681c7 100644
--- a/src/backends/cl/workloads/ClMaximumWorkload.hpp
+++ b/src/backends/cl/workloads/ClMaximumWorkload.hpp
@@ -19,7 +19,9 @@
class ClMaximumWorkload : public BaseWorkload<MaximumQueueDescriptor>
{
public:
- ClMaximumWorkload(const MaximumQueueDescriptor& descriptor, const WorkloadInfo& info);
+ ClMaximumWorkload(const MaximumQueueDescriptor& descriptor,
+ const WorkloadInfo& info,
+ const arm_compute::CLCompileContext& clCompileContext);
void Execute() const override;
private:
diff --git a/src/backends/cl/workloads/ClMeanWorkload.cpp b/src/backends/cl/workloads/ClMeanWorkload.cpp
index 470b6a8..4cc0f7c 100644
--- a/src/backends/cl/workloads/ClMeanWorkload.cpp
+++ b/src/backends/cl/workloads/ClMeanWorkload.cpp
@@ -28,7 +28,9 @@
return arm_compute::CLReduceMean::validate(&aclInputInfo, coords, desc.m_KeepDims, &aclOutputInfo);
}
-ClMeanWorkload::ClMeanWorkload(const MeanQueueDescriptor& descriptor, const WorkloadInfo& info)
+ClMeanWorkload::ClMeanWorkload(const MeanQueueDescriptor& descriptor,
+ const WorkloadInfo& info,
+ const arm_compute::CLCompileContext& clCompileContext)
: BaseWorkload<MeanQueueDescriptor>(descriptor, info)
{
m_Data.ValidateInputsOutputs("ClMeanWorkload", 1, 1);
@@ -40,7 +42,7 @@
info.m_InputTensorInfos[0].GetNumDimensions(),
m_Data.m_Parameters.m_Axis);
- m_Layer.configure(&input, coords, m_Data.m_Parameters.m_KeepDims, &output);
+ m_Layer.configure(clCompileContext, &input, coords, m_Data.m_Parameters.m_KeepDims, &output);
}
void ClMeanWorkload::Execute() const
diff --git a/src/backends/cl/workloads/ClMeanWorkload.hpp b/src/backends/cl/workloads/ClMeanWorkload.hpp
index 127c054..04e9fe2 100644
--- a/src/backends/cl/workloads/ClMeanWorkload.hpp
+++ b/src/backends/cl/workloads/ClMeanWorkload.hpp
@@ -19,7 +19,9 @@
class ClMeanWorkload : public BaseWorkload<MeanQueueDescriptor>
{
public:
- ClMeanWorkload(const MeanQueueDescriptor& descriptor, const WorkloadInfo& info);
+ ClMeanWorkload(const MeanQueueDescriptor& descriptor,
+ const WorkloadInfo& info,
+ const arm_compute::CLCompileContext& clCompileContext);
void Execute() const override;
diff --git a/src/backends/cl/workloads/ClMinimumWorkload.cpp b/src/backends/cl/workloads/ClMinimumWorkload.cpp
index 5f8dfdb..07a78b5 100644
--- a/src/backends/cl/workloads/ClMinimumWorkload.cpp
+++ b/src/backends/cl/workloads/ClMinimumWorkload.cpp
@@ -37,7 +37,8 @@
}
ClMinimumWorkload::ClMinimumWorkload(const MinimumQueueDescriptor& descriptor,
- const WorkloadInfo& info)
+ const WorkloadInfo& info,
+ const arm_compute::CLCompileContext& clCompileContext)
: BaseWorkload<MinimumQueueDescriptor>(descriptor, info)
{
m_Data.ValidateInputsOutputs("ClMinimumWorkload", 2, 1);
@@ -46,7 +47,7 @@
arm_compute::ICLTensor& input1 = static_cast<IClTensorHandle*>(m_Data.m_Inputs[1])->GetTensor();
arm_compute::ICLTensor& output = static_cast<IClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
- m_MinimumLayer.configure(&input0, &input1, &output);
+ m_MinimumLayer.configure(clCompileContext, &input0, &input1, &output);
}
void ClMinimumWorkload::Execute() const
diff --git a/src/backends/cl/workloads/ClMinimumWorkload.hpp b/src/backends/cl/workloads/ClMinimumWorkload.hpp
index 55d7eea..34e7bb8 100644
--- a/src/backends/cl/workloads/ClMinimumWorkload.hpp
+++ b/src/backends/cl/workloads/ClMinimumWorkload.hpp
@@ -19,7 +19,9 @@
class ClMinimumWorkload : public BaseWorkload<MinimumQueueDescriptor>
{
public:
- ClMinimumWorkload(const MinimumQueueDescriptor& descriptor, const WorkloadInfo& info);
+ ClMinimumWorkload(const MinimumQueueDescriptor& descriptor,
+ const WorkloadInfo& info,
+ const arm_compute::CLCompileContext& clCompileContext);
void Execute() const override;
private:
diff --git a/src/backends/cl/workloads/ClMultiplicationWorkload.cpp b/src/backends/cl/workloads/ClMultiplicationWorkload.cpp
index 46a1c4b..31e9d02 100644
--- a/src/backends/cl/workloads/ClMultiplicationWorkload.cpp
+++ b/src/backends/cl/workloads/ClMultiplicationWorkload.cpp
@@ -45,7 +45,8 @@
ClMultiplicationWorkload::ClMultiplicationWorkload(const MultiplicationQueueDescriptor& descriptor,
- const WorkloadInfo& info)
+ const WorkloadInfo& info,
+ const arm_compute::CLCompileContext& clCompileContext)
: BaseWorkload<MultiplicationQueueDescriptor>(descriptor, info)
{
m_Data.ValidateInputsOutputs("ClMultiplicationWorkload", 2, 1);
@@ -62,7 +63,8 @@
const arm_compute::ActivationLayerInfo activationInfo = ConvertAdditionalInfoToAclActivationLayerInfo(descriptor);
// Construct
- m_PixelWiseMultiplication.configure(&input0,
+ m_PixelWiseMultiplication.configure(clCompileContext,
+ &input0,
&input1,
&output,
1.0f,
diff --git a/src/backends/cl/workloads/ClMultiplicationWorkload.hpp b/src/backends/cl/workloads/ClMultiplicationWorkload.hpp
index 461449c..424f3d7 100644
--- a/src/backends/cl/workloads/ClMultiplicationWorkload.hpp
+++ b/src/backends/cl/workloads/ClMultiplicationWorkload.hpp
@@ -20,7 +20,9 @@
class ClMultiplicationWorkload : public BaseWorkload<MultiplicationQueueDescriptor>
{
public:
- ClMultiplicationWorkload(const MultiplicationQueueDescriptor& descriptor, const WorkloadInfo& info);
+ ClMultiplicationWorkload(const MultiplicationQueueDescriptor& descriptor,
+ const WorkloadInfo& info,
+ const arm_compute::CLCompileContext& clCompileContext);
using BaseWorkload<MultiplicationQueueDescriptor>::BaseWorkload;
void Execute() const override;
diff --git a/src/backends/cl/workloads/ClNegWorkload.cpp b/src/backends/cl/workloads/ClNegWorkload.cpp
index 2788662..7505ab6 100644
--- a/src/backends/cl/workloads/ClNegWorkload.cpp
+++ b/src/backends/cl/workloads/ClNegWorkload.cpp
@@ -23,7 +23,9 @@
return arm_compute::CLNegLayer::validate(&aclInput, &aclOutput);
}
-ClNegWorkload::ClNegWorkload(const ElementwiseUnaryQueueDescriptor& descriptor, const WorkloadInfo& info)
+ClNegWorkload::ClNegWorkload(const ElementwiseUnaryQueueDescriptor& descriptor,
+ const WorkloadInfo& info,
+ const arm_compute::CLCompileContext& clCompileContext)
: BaseWorkload<ElementwiseUnaryQueueDescriptor>(descriptor, info)
{
m_Data.ValidateInputsOutputs("ClNegWorkload", 1, 1);
@@ -31,7 +33,7 @@
arm_compute::ICLTensor& input = PolymorphicDowncast<ClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
arm_compute::ICLTensor& output = PolymorphicDowncast<ClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
- m_NegLayer.configure(&input, &output);
+ m_NegLayer.configure(clCompileContext, &input, &output);
}
void ClNegWorkload::Execute() const
diff --git a/src/backends/cl/workloads/ClNegWorkload.hpp b/src/backends/cl/workloads/ClNegWorkload.hpp
index 9dbfa07..7ee35cb 100644
--- a/src/backends/cl/workloads/ClNegWorkload.hpp
+++ b/src/backends/cl/workloads/ClNegWorkload.hpp
@@ -18,7 +18,9 @@
class ClNegWorkload : public BaseWorkload<ElementwiseUnaryQueueDescriptor>
{
public:
- ClNegWorkload(const ElementwiseUnaryQueueDescriptor& descriptor, const WorkloadInfo& info);
+ ClNegWorkload(const ElementwiseUnaryQueueDescriptor& descriptor,
+ const WorkloadInfo& info,
+ const arm_compute::CLCompileContext& clCompileContext);
virtual void Execute() const override;
private:
diff --git a/src/backends/cl/workloads/ClNormalizationFloatWorkload.cpp b/src/backends/cl/workloads/ClNormalizationFloatWorkload.cpp
index 5f2fd7a..290d29a 100644
--- a/src/backends/cl/workloads/ClNormalizationFloatWorkload.cpp
+++ b/src/backends/cl/workloads/ClNormalizationFloatWorkload.cpp
@@ -29,7 +29,8 @@
}
ClNormalizationFloatWorkload::ClNormalizationFloatWorkload(const NormalizationQueueDescriptor& descriptor,
- const WorkloadInfo& info)
+ const WorkloadInfo& info,
+ const arm_compute::CLCompileContext& clCompileContext)
: FloatWorkload<NormalizationQueueDescriptor>(descriptor, info)
{
m_Data.ValidateInputsOutputs("ClNormalizationFloatWorkload", 1, 1);
@@ -43,7 +44,7 @@
arm_compute::NormalizationLayerInfo normalizationInfo = BuildArmComputeNormalizationLayerInfo(m_Data.m_Parameters);
- m_NormalizationLayer.configure(&input, &output, normalizationInfo);
+ m_NormalizationLayer.configure(clCompileContext, &input, &output, normalizationInfo);
};
void ClNormalizationFloatWorkload::Execute() const
diff --git a/src/backends/cl/workloads/ClNormalizationFloatWorkload.hpp b/src/backends/cl/workloads/ClNormalizationFloatWorkload.hpp
index a6d4f25..dd309b4 100644
--- a/src/backends/cl/workloads/ClNormalizationFloatWorkload.hpp
+++ b/src/backends/cl/workloads/ClNormalizationFloatWorkload.hpp
@@ -19,7 +19,9 @@
class ClNormalizationFloatWorkload : public FloatWorkload<NormalizationQueueDescriptor>
{
public:
- ClNormalizationFloatWorkload(const NormalizationQueueDescriptor& descriptor, const WorkloadInfo& info);
+ ClNormalizationFloatWorkload(const NormalizationQueueDescriptor& descriptor,
+ const WorkloadInfo& info,
+ const arm_compute::CLCompileContext& clCompileContext);
void Execute() const override;
private:
diff --git a/src/backends/cl/workloads/ClPadWorkload.cpp b/src/backends/cl/workloads/ClPadWorkload.cpp
index 8a8c34a..533855c 100644
--- a/src/backends/cl/workloads/ClPadWorkload.cpp
+++ b/src/backends/cl/workloads/ClPadWorkload.cpp
@@ -15,7 +15,9 @@
{
using namespace armcomputetensorutils;
-ClPadWorkload::ClPadWorkload(const PadQueueDescriptor& descriptor, const WorkloadInfo& info)
+ClPadWorkload::ClPadWorkload(const PadQueueDescriptor& descriptor,
+ const WorkloadInfo& info,
+ const arm_compute::CLCompileContext& clCompileContext)
: BaseWorkload<PadQueueDescriptor>(descriptor, info)
{
this->m_Data.ValidateInputsOutputs("ClPadWorkload", 1, 1);
@@ -33,7 +35,7 @@
arm_compute::PixelValue pixelValue = GetPixelValue(input, descriptor.m_Parameters.m_PadValue);
- m_Layer.configure(&input, &output, padList, pixelValue);
+ m_Layer.configure(clCompileContext, &input, &output, padList, pixelValue);
}
void ClPadWorkload::Execute() const
diff --git a/src/backends/cl/workloads/ClPadWorkload.hpp b/src/backends/cl/workloads/ClPadWorkload.hpp
index e87cbb7..ac9b4b7 100644
--- a/src/backends/cl/workloads/ClPadWorkload.hpp
+++ b/src/backends/cl/workloads/ClPadWorkload.hpp
@@ -14,7 +14,9 @@
class ClPadWorkload : public BaseWorkload<PadQueueDescriptor>
{
public:
- ClPadWorkload(const PadQueueDescriptor& descriptor, const WorkloadInfo& info);
+ ClPadWorkload(const PadQueueDescriptor& descriptor,
+ const WorkloadInfo& info,
+ const arm_compute::CLCompileContext& clCompileContext);
void Execute() const override;
diff --git a/src/backends/cl/workloads/ClPermuteWorkload.cpp b/src/backends/cl/workloads/ClPermuteWorkload.cpp
index 41bce1d..5aadc76 100644
--- a/src/backends/cl/workloads/ClPermuteWorkload.cpp
+++ b/src/backends/cl/workloads/ClPermuteWorkload.cpp
@@ -27,7 +27,8 @@
}
ClPermuteWorkload::ClPermuteWorkload(const PermuteQueueDescriptor& descriptor,
- const WorkloadInfo& info)
+ const WorkloadInfo& info,
+ const arm_compute::CLCompileContext& clCompileContext)
: BaseWorkload<PermuteQueueDescriptor>(descriptor, info)
{
using armcomputetensorutils::BuildArmComputePermutationVector;
@@ -39,7 +40,7 @@
const armnn::PermutationVector& mappings = m_Data.m_Parameters.m_DimMappings;
// Run the layer.
- m_PermuteFunction.configure(&input, &output, BuildArmComputePermutationVector(mappings));
+ m_PermuteFunction.configure(clCompileContext, &input, &output, BuildArmComputePermutationVector(mappings));
}
void ClPermuteWorkload::Execute() const
diff --git a/src/backends/cl/workloads/ClPermuteWorkload.hpp b/src/backends/cl/workloads/ClPermuteWorkload.hpp
index 8b5f4c6..2df2b26 100644
--- a/src/backends/cl/workloads/ClPermuteWorkload.hpp
+++ b/src/backends/cl/workloads/ClPermuteWorkload.hpp
@@ -29,7 +29,9 @@
return name;
}
- ClPermuteWorkload(const PermuteQueueDescriptor& descriptor, const WorkloadInfo& info);
+ ClPermuteWorkload(const PermuteQueueDescriptor& descriptor,
+ const WorkloadInfo& info,
+ const arm_compute::CLCompileContext& clCompileContext);
void Execute() const override;
private:
diff --git a/src/backends/cl/workloads/ClPooling2dWorkload.cpp b/src/backends/cl/workloads/ClPooling2dWorkload.cpp
index 6838804..c7cc102 100644
--- a/src/backends/cl/workloads/ClPooling2dWorkload.cpp
+++ b/src/backends/cl/workloads/ClPooling2dWorkload.cpp
@@ -28,7 +28,9 @@
}
ClPooling2dWorkload::ClPooling2dWorkload(
- const Pooling2dQueueDescriptor& descriptor, const WorkloadInfo& info)
+ const Pooling2dQueueDescriptor& descriptor,
+ const WorkloadInfo& info,
+ const arm_compute::CLCompileContext& clCompileContext)
: BaseWorkload<Pooling2dQueueDescriptor>(descriptor, info)
{
m_Data.ValidateInputsOutputs("ClPooling2dWorkload", 1, 1);
@@ -48,7 +50,7 @@
arm_compute::PoolingLayerInfo layerInfo = BuildArmComputePoolingLayerInfo(m_Data.m_Parameters, fpMixedPrecision);
// Run the layer.
- m_PoolingLayer.configure(&input, &output, layerInfo);
+ m_PoolingLayer.configure(clCompileContext, &input, &output, layerInfo);
}
void ClPooling2dWorkload::Execute() const
diff --git a/src/backends/cl/workloads/ClPooling2dWorkload.hpp b/src/backends/cl/workloads/ClPooling2dWorkload.hpp
index ce67db2..f4117ac 100644
--- a/src/backends/cl/workloads/ClPooling2dWorkload.hpp
+++ b/src/backends/cl/workloads/ClPooling2dWorkload.hpp
@@ -22,7 +22,8 @@
using BaseWorkload<Pooling2dQueueDescriptor>::m_Data;
ClPooling2dWorkload(const Pooling2dQueueDescriptor& descriptor,
- const WorkloadInfo& info);
+ const WorkloadInfo& info,
+ const arm_compute::CLCompileContext& clCompileContext);
void Execute() const override;
diff --git a/src/backends/cl/workloads/ClPreluWorkload.cpp b/src/backends/cl/workloads/ClPreluWorkload.cpp
index 1813105..73fa887 100644
--- a/src/backends/cl/workloads/ClPreluWorkload.cpp
+++ b/src/backends/cl/workloads/ClPreluWorkload.cpp
@@ -27,7 +27,8 @@
}
ClPreluWorkload::ClPreluWorkload(const PreluQueueDescriptor& descriptor,
- const WorkloadInfo& info)
+ const WorkloadInfo& info,
+ const arm_compute::CLCompileContext& clCompileContext)
: BaseWorkload<PreluQueueDescriptor>(descriptor, info)
{
m_Data.ValidateInputsOutputs("ClPreluWorkload", 1, 1);
@@ -36,7 +37,7 @@
arm_compute::ICLTensor& alpha = static_cast<IClTensorHandle*>(m_Data.m_Inputs[1])->GetTensor();
arm_compute::ICLTensor& output = static_cast<IClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
- m_PreluLayer.configure(&input, &alpha, &output);
+ m_PreluLayer.configure(clCompileContext, &input, &alpha, &output);
}
void ClPreluWorkload::Execute() const
diff --git a/src/backends/cl/workloads/ClPreluWorkload.hpp b/src/backends/cl/workloads/ClPreluWorkload.hpp
index 9061416..ac8932c 100644
--- a/src/backends/cl/workloads/ClPreluWorkload.hpp
+++ b/src/backends/cl/workloads/ClPreluWorkload.hpp
@@ -18,7 +18,9 @@
class ClPreluWorkload : public BaseWorkload<PreluQueueDescriptor>
{
public:
- ClPreluWorkload(const PreluQueueDescriptor& descriptor, const WorkloadInfo& info);
+ ClPreluWorkload(const PreluQueueDescriptor& descriptor,
+ const WorkloadInfo& info,
+ const arm_compute::CLCompileContext& clCompileContext);
void Execute() const override;
private:
diff --git a/src/backends/cl/workloads/ClQLstmWorkload.cpp b/src/backends/cl/workloads/ClQLstmWorkload.cpp
index 7ece05f..0ae3715 100644
--- a/src/backends/cl/workloads/ClQLstmWorkload.cpp
+++ b/src/backends/cl/workloads/ClQLstmWorkload.cpp
@@ -14,7 +14,9 @@
{
using namespace armcomputetensorutils;
-ClQLstmWorkload::ClQLstmWorkload(const QLstmQueueDescriptor& descriptor, const WorkloadInfo& info)
+ClQLstmWorkload::ClQLstmWorkload(const QLstmQueueDescriptor& descriptor,
+ const WorkloadInfo& info,
+ const arm_compute::CLCompileContext& clCompileContext)
: BaseWorkload<QLstmQueueDescriptor>(descriptor, info)
{
arm_compute::LSTMParams<arm_compute::ICLTensor> qLstmParams;
@@ -150,8 +152,9 @@
m_Data.m_Parameters.m_CellIntermediateScale,
m_Data.m_Parameters.m_OutputIntermediateScale);
- // QLSTM NEON configure
- m_QLstmLayer.configure(&input,
+ // QLSTM CL configure
+ m_QLstmLayer.configure(clCompileContext,
+ &input,
m_InputToForgetWeightsTensor.get(),
m_InputToCellWeightsTensor.get(),
m_InputToOutputWeightsTensor.get(),
diff --git a/src/backends/cl/workloads/ClQLstmWorkload.hpp b/src/backends/cl/workloads/ClQLstmWorkload.hpp
index f98c9b3..6758abc 100644
--- a/src/backends/cl/workloads/ClQLstmWorkload.hpp
+++ b/src/backends/cl/workloads/ClQLstmWorkload.hpp
@@ -19,7 +19,9 @@
class ClQLstmWorkload : public BaseWorkload<QLstmQueueDescriptor>
{
public:
- ClQLstmWorkload(const QLstmQueueDescriptor& descriptor, const WorkloadInfo& info);
+ ClQLstmWorkload(const QLstmQueueDescriptor& descriptor,
+ const WorkloadInfo& info,
+ const arm_compute::CLCompileContext& clCompileContext);
virtual void Execute() const override;
private:
diff --git a/src/backends/cl/workloads/ClQuantizeWorkload.cpp b/src/backends/cl/workloads/ClQuantizeWorkload.cpp
index 263065a..5c945e0 100644
--- a/src/backends/cl/workloads/ClQuantizeWorkload.cpp
+++ b/src/backends/cl/workloads/ClQuantizeWorkload.cpp
@@ -29,7 +29,9 @@
&aclOutputInfo);
}
-ClQuantizeWorkload::ClQuantizeWorkload(const QuantizeQueueDescriptor& descriptor, const WorkloadInfo& info)
+ClQuantizeWorkload::ClQuantizeWorkload(const QuantizeQueueDescriptor& descriptor,
+ const WorkloadInfo& info,
+ const arm_compute::CLCompileContext& clCompileContext)
: BaseWorkload<QuantizeQueueDescriptor>(descriptor, info)
{
m_Data.ValidateInputsOutputs("ClQuantizeWorkload", 1, 1);
@@ -37,7 +39,7 @@
arm_compute::ICLTensor& input = static_cast<IClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
arm_compute::ICLTensor& output = static_cast<IClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
- m_Layer.configure(&input, &output);
+ m_Layer.configure(clCompileContext, &input, &output);
}
void ClQuantizeWorkload::Execute() const
diff --git a/src/backends/cl/workloads/ClQuantizeWorkload.hpp b/src/backends/cl/workloads/ClQuantizeWorkload.hpp
index f4a7ec6..9bb1572 100644
--- a/src/backends/cl/workloads/ClQuantizeWorkload.hpp
+++ b/src/backends/cl/workloads/ClQuantizeWorkload.hpp
@@ -18,7 +18,9 @@
class ClQuantizeWorkload : public BaseWorkload<QuantizeQueueDescriptor>
{
public:
- ClQuantizeWorkload(const QuantizeQueueDescriptor& descriptor, const WorkloadInfo& info);
+ ClQuantizeWorkload(const QuantizeQueueDescriptor& descriptor,
+ const WorkloadInfo& info,
+ const arm_compute::CLCompileContext& clCompileContext);
void Execute() const override;
private:
diff --git a/src/backends/cl/workloads/ClQuantizedLstmWorkload.cpp b/src/backends/cl/workloads/ClQuantizedLstmWorkload.cpp
index 688ebf9..636bdec 100644
--- a/src/backends/cl/workloads/ClQuantizedLstmWorkload.cpp
+++ b/src/backends/cl/workloads/ClQuantizedLstmWorkload.cpp
@@ -62,7 +62,8 @@
}
ClQuantizedLstmWorkload::ClQuantizedLstmWorkload(const QuantizedLstmQueueDescriptor &descriptor,
- const WorkloadInfo &info):
+ const WorkloadInfo &info,
+ const arm_compute::CLCompileContext& clCompileContext):
BaseWorkload<QuantizedLstmQueueDescriptor>(descriptor, info)
{
m_InputToInputWeightsTensor = std::make_unique<arm_compute::CLTensor>();
@@ -108,7 +109,8 @@
arm_compute::ICLTensor& cellStateOutTensor = static_cast<IClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
arm_compute::ICLTensor& outputStateOutTensor = static_cast<IClTensorHandle*>(m_Data.m_Outputs[1])->GetTensor();
- m_QuantizedLstmLayer.configure(&inputTensor, m_InputToInputWeightsTensor.get(), m_InputToForgetWeightsTensor.get(),
+ m_QuantizedLstmLayer.configure(clCompileContext, &inputTensor, m_InputToInputWeightsTensor.get(),
+ m_InputToForgetWeightsTensor.get(),
m_InputToCellWeightsTensor.get(), m_InputToOutputWeightsTensor.get(),
m_RecurrentToInputWeightsTensor.get(), m_RecurrentToForgetWeightsTensor.get(),
m_RecurrentToCellWeightsTensor.get(), m_RecurrentToOutputWeightsTensor.get(),
diff --git a/src/backends/cl/workloads/ClQuantizedLstmWorkload.hpp b/src/backends/cl/workloads/ClQuantizedLstmWorkload.hpp
index 580db49..6561850 100644
--- a/src/backends/cl/workloads/ClQuantizedLstmWorkload.hpp
+++ b/src/backends/cl/workloads/ClQuantizedLstmWorkload.hpp
@@ -22,7 +22,9 @@
class ClQuantizedLstmWorkload : public BaseWorkload<QuantizedLstmQueueDescriptor>
{
public:
- ClQuantizedLstmWorkload(const QuantizedLstmQueueDescriptor& descriptor, const WorkloadInfo& info);
+ ClQuantizedLstmWorkload(const QuantizedLstmQueueDescriptor& descriptor,
+ const WorkloadInfo& info,
+ const arm_compute::CLCompileContext& clCompileContext);
void Execute() const override;
private:
diff --git a/src/backends/cl/workloads/ClReshapeWorkload.cpp b/src/backends/cl/workloads/ClReshapeWorkload.cpp
index d752290..0988bab 100644
--- a/src/backends/cl/workloads/ClReshapeWorkload.cpp
+++ b/src/backends/cl/workloads/ClReshapeWorkload.cpp
@@ -21,7 +21,9 @@
return arm_compute::CLReshapeLayer::validate(&aclInputInfo, &aclOutputInfo);
}
-ClReshapeWorkload::ClReshapeWorkload(const ReshapeQueueDescriptor& descriptor, const WorkloadInfo& info)
+ClReshapeWorkload::ClReshapeWorkload(const ReshapeQueueDescriptor& descriptor,
+ const WorkloadInfo& info,
+ const arm_compute::CLCompileContext& clCompileContext)
: BaseWorkload<ReshapeQueueDescriptor>(descriptor, info)
{
m_Data.ValidateInputsOutputs("ClReshapeWorkload", 1, 1);
@@ -29,7 +31,7 @@
arm_compute::ICLTensor& input = static_cast<IClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
arm_compute::ICLTensor& output = static_cast<IClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
- m_Layer.configure(&input, &output);
+ m_Layer.configure(clCompileContext, &input, &output);
}
void ClReshapeWorkload::Execute() const
diff --git a/src/backends/cl/workloads/ClReshapeWorkload.hpp b/src/backends/cl/workloads/ClReshapeWorkload.hpp
index d836f1e..70d7287 100644
--- a/src/backends/cl/workloads/ClReshapeWorkload.hpp
+++ b/src/backends/cl/workloads/ClReshapeWorkload.hpp
@@ -18,7 +18,9 @@
class ClReshapeWorkload : public BaseWorkload<ReshapeQueueDescriptor>
{
public:
- ClReshapeWorkload(const ReshapeQueueDescriptor& descriptor, const WorkloadInfo& info);
+ ClReshapeWorkload(const ReshapeQueueDescriptor& descriptor,
+ const WorkloadInfo& info,
+ const arm_compute::CLCompileContext& clCompileContext);
void Execute() const override;
diff --git a/src/backends/cl/workloads/ClResizeWorkload.cpp b/src/backends/cl/workloads/ClResizeWorkload.cpp
index 744a915..e477406 100644
--- a/src/backends/cl/workloads/ClResizeWorkload.cpp
+++ b/src/backends/cl/workloads/ClResizeWorkload.cpp
@@ -46,8 +46,10 @@
descriptor.m_AlignCorners));
}
-ClResizeWorkload::ClResizeWorkload(const ResizeQueueDescriptor& descriptor, const WorkloadInfo& info) :
- BaseWorkload<ResizeQueueDescriptor>(descriptor, info)
+ClResizeWorkload::ClResizeWorkload(const ResizeQueueDescriptor& descriptor,
+ const WorkloadInfo& info,
+ const arm_compute::CLCompileContext& clCompileContext)
+ : BaseWorkload<ResizeQueueDescriptor>(descriptor, info)
{
m_Data.ValidateInputsOutputs("ClResizeWorkload", 1, 1);
@@ -65,7 +67,8 @@
? arm_compute::SamplingPolicy::CENTER
: arm_compute::SamplingPolicy::TOP_LEFT;
- m_ResizeLayer.configure(&input,
+ m_ResizeLayer.configure(clCompileContext,
+ &input,
&output,
arm_compute::ScaleKernelInfo(aclInterpolationPolicy,
arm_compute::BorderMode::REPLICATE,
diff --git a/src/backends/cl/workloads/ClResizeWorkload.hpp b/src/backends/cl/workloads/ClResizeWorkload.hpp
index ab5b943..9549a32 100644
--- a/src/backends/cl/workloads/ClResizeWorkload.hpp
+++ b/src/backends/cl/workloads/ClResizeWorkload.hpp
@@ -19,7 +19,9 @@
class ClResizeWorkload : public BaseWorkload<ResizeQueueDescriptor>
{
public:
- ClResizeWorkload(const ResizeQueueDescriptor& descriptor, const WorkloadInfo& info);
+ ClResizeWorkload(const ResizeQueueDescriptor& descriptor,
+ const WorkloadInfo& info,
+ const arm_compute::CLCompileContext& clCompileContext);
void Execute() const override;
private:
diff --git a/src/backends/cl/workloads/ClRsqrtWorkload.cpp b/src/backends/cl/workloads/ClRsqrtWorkload.cpp
index 48fd1e0..a3a04c1 100644
--- a/src/backends/cl/workloads/ClRsqrtWorkload.cpp
+++ b/src/backends/cl/workloads/ClRsqrtWorkload.cpp
@@ -23,7 +23,9 @@
return arm_compute::CLRsqrtLayer::validate(&aclInput, &aclOutput);
}
-ClRsqrtWorkload::ClRsqrtWorkload(const RsqrtQueueDescriptor& descriptor, const WorkloadInfo& info)
+ClRsqrtWorkload::ClRsqrtWorkload(const RsqrtQueueDescriptor& descriptor,
+ const WorkloadInfo& info,
+ const arm_compute::CLCompileContext& clCompileContext)
: BaseWorkload<RsqrtQueueDescriptor>(descriptor, info)
{
m_Data.ValidateInputsOutputs("ClRsqrtWorkload", 1, 1);
@@ -31,7 +33,7 @@
arm_compute::ICLTensor& input = PolymorphicDowncast<ClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
arm_compute::ICLTensor& output = PolymorphicDowncast<ClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
- m_RsqrtLayer.configure(&input, &output);
+ m_RsqrtLayer.configure(clCompileContext, &input, &output);
}
void ClRsqrtWorkload::Execute() const
diff --git a/src/backends/cl/workloads/ClRsqrtWorkload.hpp b/src/backends/cl/workloads/ClRsqrtWorkload.hpp
index 8fb6229..35f8414 100644
--- a/src/backends/cl/workloads/ClRsqrtWorkload.hpp
+++ b/src/backends/cl/workloads/ClRsqrtWorkload.hpp
@@ -18,7 +18,9 @@
class ClRsqrtWorkload : public BaseWorkload<RsqrtQueueDescriptor>
{
public:
- ClRsqrtWorkload(const RsqrtQueueDescriptor& descriptor, const WorkloadInfo& info);
+ ClRsqrtWorkload(const RsqrtQueueDescriptor& descriptor,
+ const WorkloadInfo& info,
+ const arm_compute::CLCompileContext& clCompileContext);
virtual void Execute() const override;
private:
diff --git a/src/backends/cl/workloads/ClSliceWorkload.cpp b/src/backends/cl/workloads/ClSliceWorkload.cpp
index d7b1dbb..1627196 100644
--- a/src/backends/cl/workloads/ClSliceWorkload.cpp
+++ b/src/backends/cl/workloads/ClSliceWorkload.cpp
@@ -30,7 +30,9 @@
return arm_compute::CLSlice::validate(&aclInput, &aclOutput, starts, ends);
}
-ClSliceWorkload::ClSliceWorkload(const SliceQueueDescriptor& descriptor, const WorkloadInfo& info)
+ClSliceWorkload::ClSliceWorkload(const SliceQueueDescriptor& descriptor,
+ const WorkloadInfo& info,
+ const arm_compute::CLCompileContext& clCompileContext)
: BaseWorkload<SliceQueueDescriptor>(descriptor, info)
{
m_Data.ValidateInputsOutputs("ClSliceWorkload", 1, 1);
@@ -43,7 +45,7 @@
std::tie(starts, ends) = SetClSliceData(m_Data.m_Parameters.m_Begin, m_Data.m_Parameters.m_Size);
- m_SliceFunction.configure(&input, &output, starts, ends);
+ m_SliceFunction.configure(clCompileContext, &input, &output, starts, ends);
}
void ClSliceWorkload::Execute() const
diff --git a/src/backends/cl/workloads/ClSliceWorkload.hpp b/src/backends/cl/workloads/ClSliceWorkload.hpp
index 3460b77..67836c2 100644
--- a/src/backends/cl/workloads/ClSliceWorkload.hpp
+++ b/src/backends/cl/workloads/ClSliceWorkload.hpp
@@ -20,7 +20,9 @@
class ClSliceWorkload : public BaseWorkload<SliceQueueDescriptor>
{
public:
- ClSliceWorkload(const SliceQueueDescriptor& descriptor, const WorkloadInfo& info);
+ ClSliceWorkload(const SliceQueueDescriptor& descriptor,
+ const WorkloadInfo& info,
+ const arm_compute::CLCompileContext& clCompileContext);
virtual void Execute() const override;
private:
diff --git a/src/backends/cl/workloads/ClSoftmaxWorkload.cpp b/src/backends/cl/workloads/ClSoftmaxWorkload.cpp
index 8bc2a76..4547c68 100644
--- a/src/backends/cl/workloads/ClSoftmaxWorkload.cpp
+++ b/src/backends/cl/workloads/ClSoftmaxWorkload.cpp
@@ -25,8 +25,10 @@
return arm_compute::CLSoftmaxLayer::validate(&aclInputInfo, &aclOutputInfo, descriptor.m_Beta, aclAxis);
}
-ClSoftmaxWorkload::ClSoftmaxWorkload(const SoftmaxQueueDescriptor& descriptor, const WorkloadInfo& info,
- std::shared_ptr<arm_compute::MemoryManagerOnDemand>& memoryManager)
+ClSoftmaxWorkload::ClSoftmaxWorkload(const SoftmaxQueueDescriptor& descriptor,
+ const WorkloadInfo& info,
+ std::shared_ptr<arm_compute::MemoryManagerOnDemand>& memoryManager,
+ const arm_compute::CLCompileContext& clCompileContext)
: BaseWorkload<SoftmaxQueueDescriptor>(descriptor, info)
, m_SoftmaxLayer(memoryManager)
{
@@ -36,7 +38,7 @@
arm_compute::ICLTensor& output = static_cast<ClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
int aclAxis = ComputeAclAxis(m_Data.m_Parameters.m_Axis, info.m_InputTensorInfos[0]);
- m_SoftmaxLayer.configure(&input, &output, m_Data.m_Parameters.m_Beta, aclAxis);
+ m_SoftmaxLayer.configure(clCompileContext, &input, &output, m_Data.m_Parameters.m_Beta, aclAxis);
}
void ClSoftmaxWorkload::Execute() const
diff --git a/src/backends/cl/workloads/ClSoftmaxWorkload.hpp b/src/backends/cl/workloads/ClSoftmaxWorkload.hpp
index 158bf46..1742c60 100644
--- a/src/backends/cl/workloads/ClSoftmaxWorkload.hpp
+++ b/src/backends/cl/workloads/ClSoftmaxWorkload.hpp
@@ -23,8 +23,10 @@
class ClSoftmaxWorkload : public BaseWorkload<SoftmaxQueueDescriptor>
{
public:
- ClSoftmaxWorkload(const SoftmaxQueueDescriptor& descriptor, const WorkloadInfo& info,
- std::shared_ptr<arm_compute::MemoryManagerOnDemand>& memoryManager);
+ ClSoftmaxWorkload(const SoftmaxQueueDescriptor& descriptor,
+ const WorkloadInfo& info,
+ std::shared_ptr<arm_compute::MemoryManagerOnDemand>& memoryManager,
+ const arm_compute::CLCompileContext& clCompileContext);
void Execute() const override;
private:
diff --git a/src/backends/cl/workloads/ClSpaceToBatchNdWorkload.cpp b/src/backends/cl/workloads/ClSpaceToBatchNdWorkload.cpp
index 443c56b..7b29cde 100644
--- a/src/backends/cl/workloads/ClSpaceToBatchNdWorkload.cpp
+++ b/src/backends/cl/workloads/ClSpaceToBatchNdWorkload.cpp
@@ -45,7 +45,9 @@
}
ClSpaceToBatchNdWorkload::ClSpaceToBatchNdWorkload(
- const SpaceToBatchNdQueueDescriptor& descriptor, const WorkloadInfo& info)
+ const SpaceToBatchNdQueueDescriptor& descriptor,
+ const WorkloadInfo& info,
+ const arm_compute::CLCompileContext& clCompileContext)
: BaseWorkload<SpaceToBatchNdQueueDescriptor>(descriptor, info)
{
m_Data.ValidateInputsOutputs("ClSpaceToBatchNdWorkload", 1, 1);
@@ -68,7 +70,8 @@
input.info()->set_data_layout(aclDataLayout);
output.info()->set_data_layout(aclDataLayout);
- m_SpaceToBatchLayer.configure(&input,
+ m_SpaceToBatchLayer.configure(clCompileContext,
+ &input,
blockWidth,
blockHeight,
paddingLeftTop,
diff --git a/src/backends/cl/workloads/ClSpaceToBatchNdWorkload.hpp b/src/backends/cl/workloads/ClSpaceToBatchNdWorkload.hpp
index 7500b5a..06d243a 100644
--- a/src/backends/cl/workloads/ClSpaceToBatchNdWorkload.hpp
+++ b/src/backends/cl/workloads/ClSpaceToBatchNdWorkload.hpp
@@ -22,7 +22,9 @@
class ClSpaceToBatchNdWorkload : public BaseWorkload<SpaceToBatchNdQueueDescriptor>
{
public:
- ClSpaceToBatchNdWorkload(const SpaceToBatchNdQueueDescriptor& descriptor, const WorkloadInfo& info);
+ ClSpaceToBatchNdWorkload(const SpaceToBatchNdQueueDescriptor& descriptor,
+ const WorkloadInfo& info,
+ const arm_compute::CLCompileContext& clCompileContext);
void Execute() const override;
private:
diff --git a/src/backends/cl/workloads/ClSpaceToDepthWorkload.cpp b/src/backends/cl/workloads/ClSpaceToDepthWorkload.cpp
index f35fe0e..7a590d2 100644
--- a/src/backends/cl/workloads/ClSpaceToDepthWorkload.cpp
+++ b/src/backends/cl/workloads/ClSpaceToDepthWorkload.cpp
@@ -18,7 +18,8 @@
using namespace armcomputetensorutils;
ClSpaceToDepthWorkload::ClSpaceToDepthWorkload(const SpaceToDepthQueueDescriptor& desc,
- const WorkloadInfo& info)
+ const WorkloadInfo& info,
+ const arm_compute::CLCompileContext& clCompileContext)
: BaseWorkload<SpaceToDepthQueueDescriptor>(desc, info)
{
m_Data.ValidateInputsOutputs("ClSpaceToDepthWorkload", 1, 1);
@@ -33,7 +34,7 @@
arm_compute::ICLTensor& output = static_cast<IClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
output.info()->set_data_layout(aclDataLayout);
- m_Layer.configure(&input, &output, blockSize);
+ m_Layer.configure(clCompileContext, &input, &output, blockSize);
}
void ClSpaceToDepthWorkload::Execute() const
diff --git a/src/backends/cl/workloads/ClSpaceToDepthWorkload.hpp b/src/backends/cl/workloads/ClSpaceToDepthWorkload.hpp
index 57ce5d4..b782bbe 100644
--- a/src/backends/cl/workloads/ClSpaceToDepthWorkload.hpp
+++ b/src/backends/cl/workloads/ClSpaceToDepthWorkload.hpp
@@ -19,7 +19,9 @@
class ClSpaceToDepthWorkload : public BaseWorkload<SpaceToDepthQueueDescriptor>
{
public:
- ClSpaceToDepthWorkload(const SpaceToDepthQueueDescriptor& descriptor, const WorkloadInfo& info);
+ ClSpaceToDepthWorkload(const SpaceToDepthQueueDescriptor& descriptor,
+ const WorkloadInfo& info,
+ const arm_compute::CLCompileContext& clCompileContext);
void Execute() const override;
private:
diff --git a/src/backends/cl/workloads/ClSplitterWorkload.cpp b/src/backends/cl/workloads/ClSplitterWorkload.cpp
index 045fbb7..70a8178 100644
--- a/src/backends/cl/workloads/ClSplitterWorkload.cpp
+++ b/src/backends/cl/workloads/ClSplitterWorkload.cpp
@@ -9,7 +9,6 @@
#include <aclCommon/ArmComputeTensorUtils.hpp>
#include <aclCommon/ArmComputeUtils.hpp>
-#include <arm_compute/runtime/CL/functions/CLSplit.h>
#include <armnn/utility/PolymorphicDowncast.hpp>
#include <backendsCommon/CpuTensorHandle.hpp>
#include <cl/ClTensorHandle.hpp>
@@ -53,7 +52,9 @@
return arm_compute::CLSplit::validate(&aclInputInfo, aclOutputPtr, aclAxis);
}
-ClSplitterWorkload::ClSplitterWorkload(const SplitterQueueDescriptor& descriptor, const WorkloadInfo& info)
+ClSplitterWorkload::ClSplitterWorkload(const SplitterQueueDescriptor& descriptor,
+ const WorkloadInfo& info,
+ const arm_compute::CLCompileContext&)
: BaseWorkload<SplitterQueueDescriptor>(descriptor, info)
{
bool allOutputsAreSubtensors = true;
diff --git a/src/backends/cl/workloads/ClSplitterWorkload.hpp b/src/backends/cl/workloads/ClSplitterWorkload.hpp
index 82211f5..c59aa02 100644
--- a/src/backends/cl/workloads/ClSplitterWorkload.hpp
+++ b/src/backends/cl/workloads/ClSplitterWorkload.hpp
@@ -9,6 +9,7 @@
#include <arm_compute/core/Error.h>
#include <arm_compute/runtime/IFunction.h>
+#include <arm_compute/runtime/CL/functions/CLSplit.h>
#include <functional>
@@ -22,7 +23,9 @@
class ClSplitterWorkload : public BaseWorkload<SplitterQueueDescriptor>
{
public:
- ClSplitterWorkload(const SplitterQueueDescriptor& descriptor, const WorkloadInfo& info);
+ ClSplitterWorkload(const SplitterQueueDescriptor& descriptor,
+ const WorkloadInfo& info,
+ const arm_compute::CLCompileContext& clCompileContext);
void Execute() const override;
diff --git a/src/backends/cl/workloads/ClStackWorkload.cpp b/src/backends/cl/workloads/ClStackWorkload.cpp
index c0b88b1..749282f 100644
--- a/src/backends/cl/workloads/ClStackWorkload.cpp
+++ b/src/backends/cl/workloads/ClStackWorkload.cpp
@@ -44,7 +44,9 @@
return arm_compute::CLStackLayer::validate(aclInputPtrs, aclAxis, &aclOutputInfo);
}
-ClStackWorkload::ClStackWorkload(const StackQueueDescriptor& descriptor, const WorkloadInfo& info)
+ClStackWorkload::ClStackWorkload(const StackQueueDescriptor& descriptor,
+ const WorkloadInfo& info,
+ const arm_compute::CLCompileContext& clCompileContext)
: BaseWorkload<StackQueueDescriptor>(descriptor, info)
{
std::vector<arm_compute::ICLTensor*> aclInputs;
@@ -58,7 +60,7 @@
m_Layer.reset(new arm_compute::CLStackLayer());
int aclAxis = CalcAxis(descriptor.m_Parameters.m_Axis, descriptor.m_Parameters.m_InputShape.GetNumDimensions());
- m_Layer->configure(aclInputs, aclAxis, &output);
+ m_Layer->configure(clCompileContext, aclInputs, aclAxis, &output);
}
void ClStackWorkload::Execute() const
diff --git a/src/backends/cl/workloads/ClStackWorkload.hpp b/src/backends/cl/workloads/ClStackWorkload.hpp
index f27d6cd..3f1e642 100644
--- a/src/backends/cl/workloads/ClStackWorkload.hpp
+++ b/src/backends/cl/workloads/ClStackWorkload.hpp
@@ -18,7 +18,9 @@
class ClStackWorkload : public BaseWorkload<StackQueueDescriptor>
{
public:
- ClStackWorkload(const StackQueueDescriptor& descriptor, const WorkloadInfo& info);
+ ClStackWorkload(const StackQueueDescriptor& descriptor,
+ const WorkloadInfo& info,
+ const arm_compute::CLCompileContext& clCompileContext);
void Execute() const override;
diff --git a/src/backends/cl/workloads/ClStridedSliceWorkload.cpp b/src/backends/cl/workloads/ClStridedSliceWorkload.cpp
index b094a91..92e860f 100644
--- a/src/backends/cl/workloads/ClStridedSliceWorkload.cpp
+++ b/src/backends/cl/workloads/ClStridedSliceWorkload.cpp
@@ -53,7 +53,8 @@
}
ClStridedSliceWorkload::ClStridedSliceWorkload(const StridedSliceQueueDescriptor& descriptor,
- const WorkloadInfo& info)
+ const WorkloadInfo& info,
+ const arm_compute::CLCompileContext& clCompileContext)
: BaseWorkload<StridedSliceQueueDescriptor>(descriptor, info)
{
m_Data.ValidateInputsOutputs("ClStridedSliceWorkload", 1, 1);
@@ -78,7 +79,8 @@
input.info()->set_data_layout(aclDataLayout);
output.info()->set_data_layout(aclDataLayout);
- m_StridedSliceLayer.configure(&input,
+ m_StridedSliceLayer.configure(clCompileContext,
+ &input,
&output,
starts,
ends,
diff --git a/src/backends/cl/workloads/ClStridedSliceWorkload.hpp b/src/backends/cl/workloads/ClStridedSliceWorkload.hpp
index bce3fe1..1229599 100644
--- a/src/backends/cl/workloads/ClStridedSliceWorkload.hpp
+++ b/src/backends/cl/workloads/ClStridedSliceWorkload.hpp
@@ -22,7 +22,9 @@
class ClStridedSliceWorkload : public BaseWorkload<StridedSliceQueueDescriptor>
{
public:
- ClStridedSliceWorkload(const StridedSliceQueueDescriptor& descriptor, const WorkloadInfo& info);
+ ClStridedSliceWorkload(const StridedSliceQueueDescriptor& descriptor,
+ const WorkloadInfo& info,
+ const arm_compute::CLCompileContext& clCompileContext);
void Execute() const override;
private:
diff --git a/src/backends/cl/workloads/ClSubtractionWorkload.cpp b/src/backends/cl/workloads/ClSubtractionWorkload.cpp
index 865dceb..31e0bec 100644
--- a/src/backends/cl/workloads/ClSubtractionWorkload.cpp
+++ b/src/backends/cl/workloads/ClSubtractionWorkload.cpp
@@ -19,7 +19,8 @@
static constexpr arm_compute::ConvertPolicy g_AclConvertPolicy = arm_compute::ConvertPolicy::SATURATE;
ClSubtractionWorkload::ClSubtractionWorkload(const SubtractionQueueDescriptor& descriptor,
- const WorkloadInfo& info)
+ const WorkloadInfo& info,
+ const arm_compute::CLCompileContext& clCompileContext)
: BaseWorkload<SubtractionQueueDescriptor>(descriptor, info)
{
this->m_Data.ValidateInputsOutputs("ClSubtractionWorkload", 2, 1);
@@ -30,7 +31,7 @@
const arm_compute::ActivationLayerInfo activationInfo = ConvertAdditionalInfoToAclActivationLayerInfo(descriptor);
- m_Layer.configure(&input0, &input1, &output, g_AclConvertPolicy, activationInfo);
+ m_Layer.configure(clCompileContext, &input0, &input1, &output, g_AclConvertPolicy, activationInfo);
}
void ClSubtractionWorkload::Execute() const
diff --git a/src/backends/cl/workloads/ClSubtractionWorkload.hpp b/src/backends/cl/workloads/ClSubtractionWorkload.hpp
index 9f51de6..28440b0 100644
--- a/src/backends/cl/workloads/ClSubtractionWorkload.hpp
+++ b/src/backends/cl/workloads/ClSubtractionWorkload.hpp
@@ -15,7 +15,9 @@
class ClSubtractionWorkload : public BaseWorkload<SubtractionQueueDescriptor>
{
public:
- ClSubtractionWorkload(const SubtractionQueueDescriptor& descriptor, const WorkloadInfo& info);
+ ClSubtractionWorkload(const SubtractionQueueDescriptor& descriptor,
+ const WorkloadInfo& info,
+ const arm_compute::CLCompileContext& clCompileContext);
void Execute() const override;
diff --git a/src/backends/cl/workloads/ClTransposeConvolution2dWorkload.cpp b/src/backends/cl/workloads/ClTransposeConvolution2dWorkload.cpp
index 20b2104..ff0fd5c 100644
--- a/src/backends/cl/workloads/ClTransposeConvolution2dWorkload.cpp
+++ b/src/backends/cl/workloads/ClTransposeConvolution2dWorkload.cpp
@@ -56,7 +56,8 @@
ClTransposeConvolution2dWorkload::ClTransposeConvolution2dWorkload(
const TransposeConvolution2dQueueDescriptor& descriptor,
const WorkloadInfo& info,
- std::shared_ptr<arm_compute::MemoryManagerOnDemand>& memoryManager) :
+ std::shared_ptr<arm_compute::MemoryManagerOnDemand>& memoryManager,
+ const arm_compute::CLCompileContext& clCompileContext) :
BaseWorkload<TransposeConvolution2dQueueDescriptor>(descriptor, info),
m_Layer(memoryManager)
{
@@ -82,7 +83,7 @@
output.info()->set_data_layout(aclDataLayout);
arm_compute::PadStrideInfo padStrideInfo = BuildArmComputePadStrideInfo(m_Data.m_Parameters);
- m_Layer.configure(&input, m_WeightsTensor.get(), m_BiasesTensor.get(), &output, padStrideInfo);
+ m_Layer.configure(clCompileContext, &input, m_WeightsTensor.get(), m_BiasesTensor.get(), &output, padStrideInfo);
InitializeArmComputeClTensorData(*m_WeightsTensor, m_Data.m_Weight);
if (m_BiasesTensor)
diff --git a/src/backends/cl/workloads/ClTransposeConvolution2dWorkload.hpp b/src/backends/cl/workloads/ClTransposeConvolution2dWorkload.hpp
index b7320bf..8a24e6d 100644
--- a/src/backends/cl/workloads/ClTransposeConvolution2dWorkload.hpp
+++ b/src/backends/cl/workloads/ClTransposeConvolution2dWorkload.hpp
@@ -29,7 +29,8 @@
public:
ClTransposeConvolution2dWorkload(const TransposeConvolution2dQueueDescriptor& descriptor,
const WorkloadInfo& info,
- std::shared_ptr<arm_compute::MemoryManagerOnDemand>& memoryManager);
+ std::shared_ptr<arm_compute::MemoryManagerOnDemand>& memoryManager,
+ const arm_compute::CLCompileContext& clCompileContext);
void Execute() const override;
diff --git a/src/backends/cl/workloads/ClTransposeWorkload.cpp b/src/backends/cl/workloads/ClTransposeWorkload.cpp
index b276b22..7ef502e 100644
--- a/src/backends/cl/workloads/ClTransposeWorkload.cpp
+++ b/src/backends/cl/workloads/ClTransposeWorkload.cpp
@@ -27,7 +27,8 @@
}
ClTransposeWorkload::ClTransposeWorkload(const TransposeQueueDescriptor& descriptor,
- const WorkloadInfo& info)
+ const WorkloadInfo& info,
+ const arm_compute::CLCompileContext& clCompileContext)
: BaseWorkload<TransposeQueueDescriptor>(descriptor, info)
{
m_Data.ValidateInputsOutputs(GetName(), 1, 1);
@@ -36,7 +37,9 @@
arm_compute::ICLTensor& output = static_cast<IClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
const armnn::PermutationVector& mappings = m_Data.m_Parameters.m_DimMappings;
// Run the layer.
- m_PermuteFunction.configure(&input, &output,
+ m_PermuteFunction.configure(clCompileContext,
+ &input,
+ &output,
armcomputetensorutils::BuildArmComputeTransposeVector(mappings));
}
diff --git a/src/backends/cl/workloads/ClTransposeWorkload.hpp b/src/backends/cl/workloads/ClTransposeWorkload.hpp
index c1bed93..4677bdc 100644
--- a/src/backends/cl/workloads/ClTransposeWorkload.hpp
+++ b/src/backends/cl/workloads/ClTransposeWorkload.hpp
@@ -29,7 +29,9 @@
return name;
}
- ClTransposeWorkload(const TransposeQueueDescriptor& descriptor, const WorkloadInfo& info);
+ ClTransposeWorkload(const TransposeQueueDescriptor& descriptor,
+ const WorkloadInfo& info,
+ const arm_compute::CLCompileContext& clCompileContext);
void Execute() const override;
private: