IVGCVSW-6249 Add ProfilingDetails Macros to all workloads in Neon
Signed-off-by: Keith Davis <keith.davis@arm.com>
Change-Id: I7be77712a9f790928219ce91222d46cc766ab9dd
diff --git a/src/backends/neon/workloads/NeonAbsWorkload.cpp b/src/backends/neon/workloads/NeonAbsWorkload.cpp
index ea14ac3..bd476be 100644
--- a/src/backends/neon/workloads/NeonAbsWorkload.cpp
+++ b/src/backends/neon/workloads/NeonAbsWorkload.cpp
@@ -35,7 +35,7 @@
void NeonAbsWorkload::Execute() const
{
- ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonAbsWorkload_Execute");
+ ARMNN_SCOPED_PROFILING_EVENT_NEON_GUID("NeonAbsWorkload_Execute", this->GetGuid());
m_AbsLayer.run();
}
diff --git a/src/backends/neon/workloads/NeonActivationWorkload.cpp b/src/backends/neon/workloads/NeonActivationWorkload.cpp
index 4b2169a..dd4c97d 100644
--- a/src/backends/neon/workloads/NeonActivationWorkload.cpp
+++ b/src/backends/neon/workloads/NeonActivationWorkload.cpp
@@ -33,6 +33,12 @@
const WorkloadInfo& info)
: BaseWorkload<ActivationQueueDescriptor>(descriptor, info)
{
+ // Report Profiling Details
+ ARMNN_REPORT_PROFILING_WORKLOAD_DESC("NeonActivationWorkload_Construct",
+ descriptor.m_Parameters,
+ info,
+ this->GetGuid());
+
m_Data.ValidateInputsOutputs("NeonActivationWorkload", 1, 1);
const arm_compute::ActivationLayerInfo activationLayerInfo =
@@ -49,7 +55,7 @@
void NeonActivationWorkload::Execute() const
{
- ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonActivationWorkload_Execute");
+ ARMNN_SCOPED_PROFILING_EVENT_NEON_GUID("NeonActivationWorkload_Execute", this->GetGuid());
m_ActivationLayer->run();
}
diff --git a/src/backends/neon/workloads/NeonAdditionWorkload.cpp b/src/backends/neon/workloads/NeonAdditionWorkload.cpp
index 5891677..dfbb992 100644
--- a/src/backends/neon/workloads/NeonAdditionWorkload.cpp
+++ b/src/backends/neon/workloads/NeonAdditionWorkload.cpp
@@ -56,7 +56,7 @@
void NeonAdditionWorkload::Execute() const
{
- ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonAdditionWorkload_Execute");
+ ARMNN_SCOPED_PROFILING_EVENT_NEON_GUID("NeonAdditionWorkload_Execute", this->GetGuid());
m_AddLayer->run();
}
diff --git a/src/backends/neon/workloads/NeonArgMinMaxWorkload.cpp b/src/backends/neon/workloads/NeonArgMinMaxWorkload.cpp
index cc85791..7e9d2c7 100644
--- a/src/backends/neon/workloads/NeonArgMinMaxWorkload.cpp
+++ b/src/backends/neon/workloads/NeonArgMinMaxWorkload.cpp
@@ -56,6 +56,12 @@
const WorkloadInfo& info)
: BaseWorkload<ArgMinMaxQueueDescriptor>(descriptor, info)
{
+ // Report Profiling Details
+ ARMNN_REPORT_PROFILING_WORKLOAD_DESC("NeonArgMinMaxWorkload_Construct",
+ descriptor.m_Parameters,
+ info,
+ this->GetGuid());
+
arm_compute::ITensor& input = PolymorphicDowncast<IAclTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
arm_compute::ITensor& output = PolymorphicDowncast<IAclTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
@@ -79,7 +85,7 @@
void NeonArgMinMaxWorkload::Execute() const
{
- ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonArgMinMaxWorkload_Execute");
+ ARMNN_SCOPED_PROFILING_EVENT_NEON_GUID("NeonArgMinMaxWorkload_Execute", this->GetGuid());
m_ArgMinMaxLayer->run();
}
diff --git a/src/backends/neon/workloads/NeonBatchNormalizationWorkload.cpp b/src/backends/neon/workloads/NeonBatchNormalizationWorkload.cpp
index 5da7cca..3d0a90b 100644
--- a/src/backends/neon/workloads/NeonBatchNormalizationWorkload.cpp
+++ b/src/backends/neon/workloads/NeonBatchNormalizationWorkload.cpp
@@ -60,6 +60,12 @@
const BatchNormalizationQueueDescriptor& descriptor, const WorkloadInfo& info)
: BaseWorkload<BatchNormalizationQueueDescriptor>(descriptor, info)
{
+ // Report Profiling Details
+ ARMNN_REPORT_PROFILING_WORKLOAD_DESC("NeonBatchNormalizationWorkload_Construct",
+ descriptor.m_Parameters,
+ info,
+ this->GetGuid());
+
m_Data.ValidateInputsOutputs("NeonBatchNormalizationWorkload", 1, 1);
arm_compute::ITensor& input = PolymorphicDowncast<IAclTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
@@ -107,7 +113,7 @@
void NeonBatchNormalizationWorkload::Execute() const
{
- ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonBatchNormalizationWorkload_Execute");
+ ARMNN_SCOPED_PROFILING_EVENT_NEON_GUID("NeonBatchNormalizationWorkload_Execute", this->GetGuid());
m_Layer->run();
}
diff --git a/src/backends/neon/workloads/NeonBatchToSpaceNdWorkload.cpp b/src/backends/neon/workloads/NeonBatchToSpaceNdWorkload.cpp
index 3d479ff..2a35475 100644
--- a/src/backends/neon/workloads/NeonBatchToSpaceNdWorkload.cpp
+++ b/src/backends/neon/workloads/NeonBatchToSpaceNdWorkload.cpp
@@ -19,14 +19,14 @@
arm_compute::Status NeonBatchToSpaceNdWorkloadValidate(const TensorInfo& input,
const TensorInfo& output,
- const BatchToSpaceNdDescriptor& desc)
+ const BatchToSpaceNdDescriptor& descriptor)
{
- const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input, desc.m_DataLayout);
- const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output, desc.m_DataLayout);
+ const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input, descriptor.m_DataLayout);
+ const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output, descriptor.m_DataLayout);
// ArmNN blockShape is [H, W] Cl asks for W, H
- int32_t blockHeight = armnn::numeric_cast<int32_t>(desc.m_BlockShape[0]);
- int32_t blockWidth = armnn::numeric_cast<int32_t>(desc.m_BlockShape[1]);
+ int32_t blockHeight = armnn::numeric_cast<int32_t>(descriptor.m_BlockShape[0]);
+ int32_t blockWidth = armnn::numeric_cast<int32_t>(descriptor.m_BlockShape[1]);
const arm_compute::Status aclStatus = arm_compute::NEBatchToSpaceLayer::validate(&aclInputInfo,
blockWidth,
@@ -35,10 +35,16 @@
return aclStatus;
}
-NeonBatchToSpaceNdWorkload::NeonBatchToSpaceNdWorkload(const BatchToSpaceNdQueueDescriptor& desc,
+NeonBatchToSpaceNdWorkload::NeonBatchToSpaceNdWorkload(const BatchToSpaceNdQueueDescriptor& descriptor,
const WorkloadInfo& info)
- : BaseWorkload<BatchToSpaceNdQueueDescriptor>(desc, info)
+ : BaseWorkload<BatchToSpaceNdQueueDescriptor>(descriptor, info)
{
+ // Report Profiling Details
+ ARMNN_REPORT_PROFILING_WORKLOAD_DESC("NeonBatchToSpaceWorkload_Construct",
+ descriptor.m_Parameters,
+ info,
+ this->GetGuid());
+
m_Data.ValidateInputsOutputs("NeonBatchToSpaceNdWorkload", 1, 1);
arm_compute::ITensor& input =
@@ -51,8 +57,8 @@
output.info()->set_data_layout(aclDataLayout);
// ArmNN blockShape is [H, W] Cl asks for W, H
- int32_t blockHeight = armnn::numeric_cast<int32_t>(desc.m_Parameters.m_BlockShape[0]);
- int32_t blockWidth = armnn::numeric_cast<int32_t>(desc.m_Parameters.m_BlockShape[1]);
+ int32_t blockHeight = armnn::numeric_cast<int32_t>(descriptor.m_Parameters.m_BlockShape[0]);
+ int32_t blockWidth = armnn::numeric_cast<int32_t>(descriptor.m_Parameters.m_BlockShape[1]);
m_Layer.reset(new arm_compute::NEBatchToSpaceLayer());
m_Layer->configure(&input, blockWidth, blockHeight, &output);
@@ -63,7 +69,7 @@
{
if (m_Layer)
{
- ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonSpaceToBatchNdWorkload_Execute");
+ ARMNN_SCOPED_PROFILING_EVENT_NEON_GUID("NeonSpaceToBatchNdWorkload_Execute", this->GetGuid());
m_Layer->run();
}
}
diff --git a/src/backends/neon/workloads/NeonCastWorkload.cpp b/src/backends/neon/workloads/NeonCastWorkload.cpp
index 4727fe1..50e212e 100644
--- a/src/backends/neon/workloads/NeonCastWorkload.cpp
+++ b/src/backends/neon/workloads/NeonCastWorkload.cpp
@@ -37,7 +37,7 @@
void NeonCastWorkload::Execute() const
{
- ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonCastWorkload_Execute");
+ ARMNN_SCOPED_PROFILING_EVENT_NEON_GUID("NeonCastWorkload_Execute", this->GetGuid());
m_CastLayer.run();
}
diff --git a/src/backends/neon/workloads/NeonComparisonWorkload.cpp b/src/backends/neon/workloads/NeonComparisonWorkload.cpp
index 01a6a0c..129921a 100644
--- a/src/backends/neon/workloads/NeonComparisonWorkload.cpp
+++ b/src/backends/neon/workloads/NeonComparisonWorkload.cpp
@@ -34,6 +34,12 @@
NeonComparisonWorkload::NeonComparisonWorkload(const ComparisonQueueDescriptor& descriptor, const WorkloadInfo& info)
: BaseWorkload<ComparisonQueueDescriptor>(descriptor, info)
{
+ // Report Profiling Details
+ ARMNN_REPORT_PROFILING_WORKLOAD_DESC("NeonComparisonWorkload_Construct",
+ descriptor.m_Parameters,
+ info,
+ this->GetGuid());
+
m_Data.ValidateInputsOutputs("NeonComparisonWorkload", 2, 1);
arm_compute::ITensor& input0 = PolymorphicDowncast<IAclTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
@@ -47,7 +53,7 @@
void NeonComparisonWorkload::Execute() const
{
- ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonComparisonWorkload_Execute");
+ ARMNN_SCOPED_PROFILING_EVENT_NEON_GUID("NeonComparisonWorkload_Execute", this->GetGuid());
m_ComparisonLayer.run();
}
diff --git a/src/backends/neon/workloads/NeonConcatWorkload.cpp b/src/backends/neon/workloads/NeonConcatWorkload.cpp
index 5cd906d..657a940 100644
--- a/src/backends/neon/workloads/NeonConcatWorkload.cpp
+++ b/src/backends/neon/workloads/NeonConcatWorkload.cpp
@@ -18,9 +18,9 @@
namespace
{
-size_t CalcAxis(const armnn::OriginsDescriptor& desc)
+size_t CalcAxis(const armnn::OriginsDescriptor& descriptor)
{
- return (desc.GetNumDimensions() - desc.GetConcatAxis()) - 1;
+ return (descriptor.GetNumDimensions() - descriptor.GetConcatAxis()) - 1;
}
} //namespace
@@ -50,6 +50,12 @@
const ConcatQueueDescriptor& descriptor, const WorkloadInfo& info)
: BaseWorkload<ConcatQueueDescriptor>(descriptor, info)
{
+ // Report Profiling Details
+ ARMNN_REPORT_PROFILING_WORKLOAD_DESC("NeonConcatWorkload_Construct",
+ descriptor.m_Parameters,
+ info,
+ this->GetGuid());
+
bool allInputsAreSubtensors = true;
// Check that all inputs are sub-tensors
@@ -93,7 +99,7 @@
{
if (m_Layer)
{
- ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonConcatWorkload_Execute");
+ ARMNN_SCOPED_PROFILING_EVENT_NEON_GUID("NeonConcatWorkload_Execute", this->GetGuid());
m_Layer->run();
}
}
diff --git a/src/backends/neon/workloads/NeonConstantWorkload.cpp b/src/backends/neon/workloads/NeonConstantWorkload.cpp
index 77e4420..16bb211 100644
--- a/src/backends/neon/workloads/NeonConstantWorkload.cpp
+++ b/src/backends/neon/workloads/NeonConstantWorkload.cpp
@@ -53,7 +53,7 @@
void NeonConstantWorkload::Execute() const
{
- ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonConstantWorkload_Execute");
+ ARMNN_SCOPED_PROFILING_EVENT_NEON_GUID("NeonConstantWorkload_Execute", this->GetGuid());
using namespace armcomputetensorutils;
diff --git a/src/backends/neon/workloads/NeonConvertBf16ToFp32Workload.cpp b/src/backends/neon/workloads/NeonConvertBf16ToFp32Workload.cpp
index 79d1f22..e8cc125 100644
--- a/src/backends/neon/workloads/NeonConvertBf16ToFp32Workload.cpp
+++ b/src/backends/neon/workloads/NeonConvertBf16ToFp32Workload.cpp
@@ -24,7 +24,7 @@
void NeonConvertBf16ToFp32Workload::Execute() const
{
- ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonConvertBf16ToFp32Workload_Execute");
+ ARMNN_SCOPED_PROFILING_EVENT_NEON_GUID("NeonConvertBf16ToFp32Workload_Execute", this->GetGuid());
auto convertFunc = [](uint8_t* dst, const uint8_t* src, size_t size)
{
diff --git a/src/backends/neon/workloads/NeonConvertFp16ToFp32Workload.cpp b/src/backends/neon/workloads/NeonConvertFp16ToFp32Workload.cpp
index 01f09a5..0d6bb04 100644
--- a/src/backends/neon/workloads/NeonConvertFp16ToFp32Workload.cpp
+++ b/src/backends/neon/workloads/NeonConvertFp16ToFp32Workload.cpp
@@ -24,7 +24,7 @@
void NeonConvertFp16ToFp32Workload::Execute() const
{
- ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonConvertFp16ToFp32Workload_Execute");
+ ARMNN_SCOPED_PROFILING_EVENT_NEON_GUID("NeonConvertFp16ToFp32Workload_Execute", this->GetGuid());
auto convertFunc = [](uint8_t* dst, const uint8_t* src, size_t size)
{
diff --git a/src/backends/neon/workloads/NeonConvertFp32ToBf16Workload.cpp b/src/backends/neon/workloads/NeonConvertFp32ToBf16Workload.cpp
index e1aceec..84d3c78 100644
--- a/src/backends/neon/workloads/NeonConvertFp32ToBf16Workload.cpp
+++ b/src/backends/neon/workloads/NeonConvertFp32ToBf16Workload.cpp
@@ -25,7 +25,7 @@
void NeonConvertFp32ToBf16Workload::Execute() const
{
- ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonConvertFp32ToBf16Workload_Execute");
+ ARMNN_SCOPED_PROFILING_EVENT_NEON_GUID("NeonConvertFp32ToBf16Workload_Execute", this->GetGuid());
auto convertFunc = [](uint8_t* dst, const uint8_t* src, size_t size)
{
diff --git a/src/backends/neon/workloads/NeonConvertFp32ToFp16Workload.cpp b/src/backends/neon/workloads/NeonConvertFp32ToFp16Workload.cpp
index 62f39be..7f6d4d6 100644
--- a/src/backends/neon/workloads/NeonConvertFp32ToFp16Workload.cpp
+++ b/src/backends/neon/workloads/NeonConvertFp32ToFp16Workload.cpp
@@ -25,7 +25,7 @@
void NeonConvertFp32ToFp16Workload::Execute() const
{
- ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonConvertFp32ToFp16Workload_Execute");
+ ARMNN_SCOPED_PROFILING_EVENT_NEON_GUID("NeonConvertFp32ToFp16Workload_Execute", this->GetGuid());
auto convertFunc = [](uint8_t* dst, const uint8_t* src, size_t size)
{
diff --git a/src/backends/neon/workloads/NeonConvolution2dWorkload.cpp b/src/backends/neon/workloads/NeonConvolution2dWorkload.cpp
index a6ae99b..0b0a72c 100644
--- a/src/backends/neon/workloads/NeonConvolution2dWorkload.cpp
+++ b/src/backends/neon/workloads/NeonConvolution2dWorkload.cpp
@@ -131,7 +131,7 @@
}
// Report Profiling Details
- ARMNN_REPORT_PROFILING_WORKLOAD_DESC("NeonConvolution2dWorkload_Execute",
+ ARMNN_REPORT_PROFILING_WORKLOAD_DESC("NeonConvolution2dWorkload_Construct",
descriptor.m_Parameters,
detailsInfo,
this->GetGuid());
diff --git a/src/backends/neon/workloads/NeonDepthToSpaceWorkload.cpp b/src/backends/neon/workloads/NeonDepthToSpaceWorkload.cpp
index 2c4a651..76829f3 100644
--- a/src/backends/neon/workloads/NeonDepthToSpaceWorkload.cpp
+++ b/src/backends/neon/workloads/NeonDepthToSpaceWorkload.cpp
@@ -29,10 +29,16 @@
return arm_compute::NEDepthToSpaceLayer::validate(&aclInput, &aclOutput, blockSize);
}
-NeonDepthToSpaceWorkload::NeonDepthToSpaceWorkload(const DepthToSpaceQueueDescriptor& desc,
+NeonDepthToSpaceWorkload::NeonDepthToSpaceWorkload(const DepthToSpaceQueueDescriptor& descriptor,
const WorkloadInfo& info)
- : BaseWorkload<DepthToSpaceQueueDescriptor>(desc, info)
+ : BaseWorkload<DepthToSpaceQueueDescriptor>(descriptor, info)
{
+ // Report Profiling Details
+ ARMNN_REPORT_PROFILING_WORKLOAD_DESC("NeonDepthToSpaceWorkload_Construct",
+ descriptor.m_Parameters,
+ info,
+ this->GetGuid());
+
m_Data.ValidateInputsOutputs("NeonDepthToSpaceWorkload", 1, 1);
arm_compute::DataLayout aclDataLayout = ConvertDataLayout(m_Data.m_Parameters.m_DataLayout);
@@ -41,7 +47,7 @@
PolymorphicPointerDowncast<IAclTensorHandle>(m_Data.m_Inputs[0])->GetTensor();
input.info()->set_data_layout(aclDataLayout);
- int32_t blockSize = armnn::numeric_cast<int32_t>(desc.m_Parameters.m_BlockSize);
+ int32_t blockSize = armnn::numeric_cast<int32_t>(descriptor.m_Parameters.m_BlockSize);
arm_compute::ITensor& output =
PolymorphicPointerDowncast<IAclTensorHandle>(m_Data.m_Outputs[0])->GetTensor();
@@ -53,7 +59,7 @@
void NeonDepthToSpaceWorkload::Execute() const
{
- ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonDepthToSpaceWorkload_Execute");
+ ARMNN_SCOPED_PROFILING_EVENT_NEON_GUID("NeonDepthToSpaceWorkload_Execute", this->GetGuid());
m_Layer.run();
}
diff --git a/src/backends/neon/workloads/NeonDepthwiseConvolutionWorkload.cpp b/src/backends/neon/workloads/NeonDepthwiseConvolutionWorkload.cpp
index 589a951..138c237 100644
--- a/src/backends/neon/workloads/NeonDepthwiseConvolutionWorkload.cpp
+++ b/src/backends/neon/workloads/NeonDepthwiseConvolutionWorkload.cpp
@@ -33,20 +33,20 @@
const Optional<TensorInfo>& biases,
const ActivationDescriptor* activationDescriptor)
{
- const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input, descriptor.m_DataLayout);
+ const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input, descriptor.m_DataLayout);
const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output, descriptor.m_DataLayout);
// ArmNN's weight format is usually [ M, I, H, W ] but for depthwise its [ 1, H, W, I*M]
// Permute to [ 1, I * M, H, W ] (if NCHW), as required by the compute library
unsigned int aclDepthMultiplier;
TensorInfo weightsPermuted;
- std::tie(weightsPermuted, aclDepthMultiplier) = Convert1HWOTensorInfoToAcl(weights, input,descriptor.m_DataLayout);
+ std::tie(weightsPermuted, aclDepthMultiplier) = Convert1HWOTensorInfoToAcl(weights, input, descriptor.m_DataLayout);
// Convert the weights into the compute library format
const arm_compute::TensorInfo aclWeightsInfo = BuildArmComputeTensorInfo(weightsPermuted, descriptor.m_DataLayout);
arm_compute::TensorInfo aclBiasesInfo;
- arm_compute::TensorInfo *optionalAclBiasesInfo = nullptr;
+ arm_compute::TensorInfo* optionalAclBiasesInfo = nullptr;
if (descriptor.m_BiasEnabled)
{
@@ -58,10 +58,10 @@
arm_compute::PadStrideInfo aclPadStrideInfo = BuildArmComputePadStrideInfo(descriptor);
const arm_compute::Size2D aclDilationInfo = BuildArmComputeSize2D(
- descriptor.m_DilationX,descriptor.m_DilationY);
+ descriptor.m_DilationX, descriptor.m_DilationY);
const arm_compute::ActivationLayerInfo activationInfo = ConvertActivationDescriptorToAclActivationLayerInfo(
- activationDescriptor);
+ activationDescriptor);
return arm_compute::NEDepthwiseConvolutionLayer::validate(&aclInputInfo,
&aclWeightsInfo,
@@ -85,9 +85,9 @@
unsigned int depthMultiplier;
std::unique_ptr<unsigned char[]> permuteBuffer(new unsigned char[weightInfo.GetNumBytes()]);
std::tie(weightsPermuted, depthMultiplier) = Convert1HWOTensorToAcl(m_Data.m_Weight,
- info.m_InputTensorInfos[0],
- m_Data.m_Parameters.m_DataLayout,
- permuteBuffer.get());
+ info.m_InputTensorInfos[0],
+ m_Data.m_Parameters.m_DataLayout,
+ permuteBuffer.get());
// Convert the weights into the compute library format
m_KernelTensor = std::make_unique<arm_compute::Tensor>();
@@ -100,14 +100,14 @@
}
const arm_compute::Size2D aclDilationInfo = BuildArmComputeSize2D(
- m_Data.m_Parameters.m_DilationX, m_Data.m_Parameters.m_DilationY);
+ m_Data.m_Parameters.m_DilationX, m_Data.m_Parameters.m_DilationY);
m_Data.ValidateInputsOutputs("NeonDepthwiseConvolutionWorkload", 1, 1);
- IAclTensorHandle* inputTensorHandle = static_cast<IAclTensorHandle*>(m_Data.m_Inputs[0]);
+ IAclTensorHandle* inputTensorHandle = static_cast<IAclTensorHandle*>(m_Data.m_Inputs[0]);
IAclTensorHandle* outputTensorHandle = static_cast<IAclTensorHandle*>(m_Data.m_Outputs[0]);
- arm_compute::ITensor& input = inputTensorHandle->GetTensor();
+ arm_compute::ITensor& input = inputTensorHandle->GetTensor();
arm_compute::ITensor& output = outputTensorHandle->GetTensor();
arm_compute::DataLayout aclDataLayout = ConvertDataLayout(m_Data.m_Parameters.m_DataLayout);
@@ -129,6 +129,23 @@
activationInfo,
aclDilationInfo);
+ // Add details for profiling output
+ WorkloadInfo detailsInfo;
+
+ detailsInfo.m_InputTensorInfos = info.m_InputTensorInfos;
+ detailsInfo.m_OutputTensorInfos = info.m_OutputTensorInfos;
+ detailsInfo.m_WeightsTensorInfo = armnn::Optional<armnn::TensorInfo>(descriptor.m_Weight->GetTensorInfo());
+ if (descriptor.m_Parameters.m_BiasEnabled)
+ {
+ detailsInfo.m_BiasTensorInfo = armnn::Optional<armnn::TensorInfo>(descriptor.m_Bias->GetTensorInfo());
+ }
+
+ // Report Profiling Details
+ ARMNN_REPORT_PROFILING_WORKLOAD_DESC("NeonDepthwiseConvolution2dWorkload_Construct",
+ descriptor.m_Parameters,
+ detailsInfo,
+ this->GetGuid());
+
ARMNN_ASSERT(m_pDepthwiseConvolutionLayer);
ScopedTensorHandle weightsPermutedHandle(weightsPermuted);
@@ -145,7 +162,7 @@
void NeonDepthwiseConvolutionWorkload::Execute() const
{
- ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonDepthwiseConvolutionWorkload_Execute");
+ ARMNN_SCOPED_PROFILING_EVENT_NEON_GUID("NeonDepthwiseConvolutionWorkload_Execute", this->GetGuid());
ARMNN_ASSERT(m_pDepthwiseConvolutionLayer);
m_pDepthwiseConvolutionLayer->run();
diff --git a/src/backends/neon/workloads/NeonDequantizeWorkload.cpp b/src/backends/neon/workloads/NeonDequantizeWorkload.cpp
index 07323d1..32c1134 100644
--- a/src/backends/neon/workloads/NeonDequantizeWorkload.cpp
+++ b/src/backends/neon/workloads/NeonDequantizeWorkload.cpp
@@ -44,7 +44,7 @@
void NeonDequantizeWorkload::Execute() const
{
- ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonDequantizeWorkload_Execute");
+ ARMNN_SCOPED_PROFILING_EVENT_NEON_GUID("NeonDequantizeWorkload_Execute", this->GetGuid());
m_Layer->run();
}
diff --git a/src/backends/neon/workloads/NeonDetectionPostProcessWorkload.cpp b/src/backends/neon/workloads/NeonDetectionPostProcessWorkload.cpp
index 36f1cd9..a9cb5c4 100644
--- a/src/backends/neon/workloads/NeonDetectionPostProcessWorkload.cpp
+++ b/src/backends/neon/workloads/NeonDetectionPostProcessWorkload.cpp
@@ -14,19 +14,19 @@
namespace armnn
{
-arm_compute::DetectionPostProcessLayerInfo MakeInfo(const DetectionPostProcessDescriptor& desc)
+arm_compute::DetectionPostProcessLayerInfo MakeInfo(const DetectionPostProcessDescriptor& descriptor)
{
- return arm_compute::DetectionPostProcessLayerInfo(desc.m_MaxDetections,
- desc.m_MaxClassesPerDetection,
- desc.m_NmsScoreThreshold,
- desc.m_NmsIouThreshold,
- desc.m_NumClasses,
- { desc.m_ScaleX,
- desc.m_ScaleY,
- desc.m_ScaleW,
- desc.m_ScaleH },
- desc.m_UseRegularNms,
- desc.m_DetectionsPerClass);
+ return arm_compute::DetectionPostProcessLayerInfo(descriptor.m_MaxDetections,
+ descriptor.m_MaxClassesPerDetection,
+ descriptor.m_NmsScoreThreshold,
+ descriptor.m_NmsIouThreshold,
+ descriptor.m_NumClasses,
+ { descriptor.m_ScaleX,
+ descriptor.m_ScaleY,
+ descriptor.m_ScaleW,
+ descriptor.m_ScaleH },
+ descriptor.m_UseRegularNms,
+ descriptor.m_DetectionsPerClass);
}
arm_compute::Status NeonDetectionPostProcessValidate(const TensorInfo& boxEncodings,
@@ -36,9 +36,9 @@
const TensorInfo& detectionClasses,
const TensorInfo& detectionScores,
const TensorInfo& numDetections,
- const DetectionPostProcessDescriptor &desc)
+ const DetectionPostProcessDescriptor &descriptor)
{
- arm_compute::DetectionPostProcessLayerInfo info = MakeInfo(desc);
+ arm_compute::DetectionPostProcessLayerInfo info = MakeInfo(descriptor);
const arm_compute::TensorInfo aclBoxEncodings =
armcomputetensorutils::BuildArmComputeTensorInfo(boxEncodings);
@@ -77,6 +77,12 @@
const WorkloadInfo& info)
: BaseWorkload<DetectionPostProcessQueueDescriptor>(descriptor, info)
{
+ // Report Profiling Details
+ ARMNN_REPORT_PROFILING_WORKLOAD_DESC("NeonDetectionPostProcessWorkload_Construct",
+ descriptor.m_Parameters,
+ info,
+ this->GetGuid());
+
m_Anchors = std::make_unique<arm_compute::Tensor>();
BuildArmComputeTensor(*m_Anchors, descriptor.m_Anchors->GetTensorInfo());
@@ -104,7 +110,7 @@
void NeonDetectionPostProcessWorkload::Execute() const
{
- ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonDetectionPostProcessWorkload_Execute");
+ ARMNN_SCOPED_PROFILING_EVENT_NEON_GUID("NeonDetectionPostProcessWorkload_Execute", this->GetGuid());
m_Func.run();
}
diff --git a/src/backends/neon/workloads/NeonDetectionPostProcessWorkload.hpp b/src/backends/neon/workloads/NeonDetectionPostProcessWorkload.hpp
index 29876ff..82ef1e2 100644
--- a/src/backends/neon/workloads/NeonDetectionPostProcessWorkload.hpp
+++ b/src/backends/neon/workloads/NeonDetectionPostProcessWorkload.hpp
@@ -20,7 +20,7 @@
const TensorInfo& detectionClasses,
const TensorInfo& detectionScores,
const TensorInfo& numDetections,
- const DetectionPostProcessDescriptor &desc);
+ const DetectionPostProcessDescriptor &descriptor);
class NeonDetectionPostProcessWorkload : public BaseWorkload<DetectionPostProcessQueueDescriptor>
{
diff --git a/src/backends/neon/workloads/NeonDivisionWorkload.cpp b/src/backends/neon/workloads/NeonDivisionWorkload.cpp
index fa61a10..8c5d2b8 100644
--- a/src/backends/neon/workloads/NeonDivisionWorkload.cpp
+++ b/src/backends/neon/workloads/NeonDivisionWorkload.cpp
@@ -50,7 +50,7 @@
void NeonDivisionWorkload::Execute() const
{
- ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonDivisionWorkload_Execute");
+ ARMNN_SCOPED_PROFILING_EVENT_NEON_GUID("NeonDivisionWorkload_Execute", this->GetGuid());
m_DivLayer.run();
}
diff --git a/src/backends/neon/workloads/NeonExpWorkload.cpp b/src/backends/neon/workloads/NeonExpWorkload.cpp
index 7baaa84..aff8e72 100644
--- a/src/backends/neon/workloads/NeonExpWorkload.cpp
+++ b/src/backends/neon/workloads/NeonExpWorkload.cpp
@@ -25,6 +25,12 @@
NeonExpWorkload::NeonExpWorkload(const ElementwiseUnaryQueueDescriptor& descriptor, const WorkloadInfo& info)
: BaseWorkload<ElementwiseUnaryQueueDescriptor>(descriptor, info)
{
+ // Report Profiling Details
+ ARMNN_REPORT_PROFILING_WORKLOAD_DESC("NeonExpWorkload_Construct",
+ descriptor.m_Parameters,
+ info,
+ this->GetGuid());
+
m_Data.ValidateInputsOutputs("NeonExpWorkload", 1, 1);
arm_compute::ITensor& input = PolymorphicDowncast<IAclTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
@@ -35,7 +41,7 @@
void NeonExpWorkload::Execute() const
{
- ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonExpWorkload_Execute");
+ ARMNN_SCOPED_PROFILING_EVENT_NEON_GUID("NeonExpWorkload_Execute", this->GetGuid());
m_ExpLayer.run();
}
diff --git a/src/backends/neon/workloads/NeonFillWorkload.cpp b/src/backends/neon/workloads/NeonFillWorkload.cpp
index 5965d20..0a3c7f0 100644
--- a/src/backends/neon/workloads/NeonFillWorkload.cpp
+++ b/src/backends/neon/workloads/NeonFillWorkload.cpp
@@ -19,6 +19,12 @@
NeonFillWorkload::NeonFillWorkload(const FillQueueDescriptor& descriptor, const WorkloadInfo& info)
: BaseWorkload<FillQueueDescriptor>(descriptor, info)
{
+ // Report Profiling Details
+ ARMNN_REPORT_PROFILING_WORKLOAD_DESC("NeonFillWorkload_Construct",
+ descriptor.m_Parameters,
+ info,
+ this->GetGuid());
+
m_Data.ValidateInputsOutputs("NeonFillWorkload", 1, 1);
arm_compute::ITensor& output = static_cast<IAclTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
@@ -31,7 +37,7 @@
void NeonFillWorkload::Execute() const
{
- ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonFillWorkload_Execute");
+ ARMNN_SCOPED_PROFILING_EVENT_NEON_GUID("NeonFillWorkload_Execute", this->GetGuid());
m_Layer->run();
}
diff --git a/src/backends/neon/workloads/NeonFloorFloatWorkload.cpp b/src/backends/neon/workloads/NeonFloorFloatWorkload.cpp
index c49df33..d728e00 100644
--- a/src/backends/neon/workloads/NeonFloorFloatWorkload.cpp
+++ b/src/backends/neon/workloads/NeonFloorFloatWorkload.cpp
@@ -29,7 +29,7 @@
void NeonFloorFloatWorkload::Execute() const
{
- ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonFloorFloatWorkload_Execute");
+ ARMNN_SCOPED_PROFILING_EVENT_NEON_GUID("NeonFloorFloatWorkload_Execute", this->GetGuid());
m_Layer->run();
}
} //namespace armnn
diff --git a/src/backends/neon/workloads/NeonFullyConnectedWorkload.cpp b/src/backends/neon/workloads/NeonFullyConnectedWorkload.cpp
index 713771b..94dc077 100644
--- a/src/backends/neon/workloads/NeonFullyConnectedWorkload.cpp
+++ b/src/backends/neon/workloads/NeonFullyConnectedWorkload.cpp
@@ -19,6 +19,7 @@
namespace armnn
{
using namespace armcomputetensorutils;
+using ACLMemManagerOnDemand = std::shared_ptr<arm_compute::MemoryManagerOnDemand>;
arm_compute::Status NeonFullyConnectedWorkloadValidate(const TensorInfo& input,
const TensorInfo& output,
@@ -32,10 +33,10 @@
const arm_compute::TensorInfo aclWeights = BuildArmComputeTensorInfo(weights);
arm_compute::TensorInfo aclBiases;
- arm_compute::TensorInfo *optionalAclBiases = nullptr;
+ arm_compute::TensorInfo* optionalAclBiases = nullptr;
if (descriptor.m_BiasEnabled)
{
- aclBiases = BuildArmComputeTensorInfo(biases);
+ aclBiases = BuildArmComputeTensorInfo(biases);
optionalAclBiases = &aclBiases;
}
@@ -50,7 +51,8 @@
}
NeonFullyConnectedWorkload::NeonFullyConnectedWorkload(const FullyConnectedQueueDescriptor& descriptor,
- const WorkloadInfo& info, std::shared_ptr<arm_compute::MemoryManagerOnDemand>& memoryManager)
+ const WorkloadInfo& info,
+ ACLMemManagerOnDemand& memoryManager)
: BaseWorkload<FullyConnectedQueueDescriptor>(descriptor, info)
{
m_Data.ValidateInputsOutputs("NeonFullyConnectedWorkload", 1, 1);
@@ -69,8 +71,8 @@
const arm_compute::ActivationLayerInfo activationInfo = ConvertAdditionalInfoToAclActivationLayerInfo(descriptor);
- arm_compute::FullyConnectedLayerInfo fc_info =
- ConvertFullyConnectedDescriptorToAclFullyConnectedLayerInfo(descriptor.m_Parameters, activationInfo);
+ arm_compute::FullyConnectedLayerInfo fc_info =
+ ConvertFullyConnectedDescriptorToAclFullyConnectedLayerInfo(descriptor.m_Parameters, activationInfo);
auto layer = std::make_unique<arm_compute::NEFullyConnectedLayer>(memoryManager);
layer->configure(&input, m_WeightsTensor.get(), m_BiasesTensor.get(), &output, fc_info);
@@ -98,6 +100,23 @@
}
}
+ // Add details for profiling output
+ WorkloadInfo detailsInfo;
+
+ detailsInfo.m_InputTensorInfos = info.m_InputTensorInfos;
+ detailsInfo.m_OutputTensorInfos = info.m_OutputTensorInfos;
+ detailsInfo.m_WeightsTensorInfo = armnn::Optional<armnn::TensorInfo>(descriptor.m_Weight->GetTensorInfo());
+ if (descriptor.m_Parameters.m_BiasEnabled)
+ {
+ detailsInfo.m_BiasTensorInfo = armnn::Optional<armnn::TensorInfo>(descriptor.m_Bias->GetTensorInfo());
+ }
+
+ // Report Profiling Details
+ ARMNN_REPORT_PROFILING_WORKLOAD_DESC("NeonFullyConnectedWorkload_Construct",
+ descriptor.m_Parameters,
+ detailsInfo,
+ this->GetGuid());
+
// Force Compute Library to perform the necessary copying and reshaping, after which
// delete all the input tensors that will no longer be needed
m_FullyConnectedLayer->prepare();
@@ -106,7 +125,7 @@
void NeonFullyConnectedWorkload::Execute() const
{
- ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonFullyConnectedWorkload_Execute");
+ ARMNN_SCOPED_PROFILING_EVENT_NEON_GUID("NeonFullyConnectedWorkload_Execute", this->GetGuid());
m_FullyConnectedLayer->run();
}
diff --git a/src/backends/neon/workloads/NeonGatherWorkload.cpp b/src/backends/neon/workloads/NeonGatherWorkload.cpp
index 2c94cb5..f5c8d34 100644
--- a/src/backends/neon/workloads/NeonGatherWorkload.cpp
+++ b/src/backends/neon/workloads/NeonGatherWorkload.cpp
@@ -28,6 +28,12 @@
const WorkloadInfo& info)
: BaseWorkload<GatherQueueDescriptor>(descriptor, info)
{
+ // Report Profiling Details
+ ARMNN_REPORT_PROFILING_WORKLOAD_DESC("NeonGatherWorkload_Construct",
+ descriptor.m_Parameters,
+ info,
+ this->GetGuid());
+
m_Data.ValidateInputsOutputs("NeonGatherWorkload", 1, 1);
arm_compute::ITensor& input = PolymorphicDowncast<IAclTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
@@ -41,7 +47,7 @@
void NeonGatherWorkload::Execute() const
{
- ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonGatherWorkload_Execute");
+ ARMNN_SCOPED_PROFILING_EVENT_NEON_GUID("NeonGatherWorkload_Execute", this->GetGuid());
m_Layer.run();
}
} //namespace armnn
\ No newline at end of file
diff --git a/src/backends/neon/workloads/NeonInstanceNormalizationWorkload.cpp b/src/backends/neon/workloads/NeonInstanceNormalizationWorkload.cpp
index 1bfd1e4..a68ea65 100644
--- a/src/backends/neon/workloads/NeonInstanceNormalizationWorkload.cpp
+++ b/src/backends/neon/workloads/NeonInstanceNormalizationWorkload.cpp
@@ -35,6 +35,12 @@
const WorkloadInfo& info)
: BaseWorkload<InstanceNormalizationQueueDescriptor>(descriptor, info)
{
+ // Report Profiling Details
+ ARMNN_REPORT_PROFILING_WORKLOAD_DESC("NeonInstanceNormalizationWorkload_Construct",
+ descriptor.m_Parameters,
+ info,
+ this->GetGuid());
+
m_Data.ValidateInputsOutputs("NeonInstanceNormalizationWorkload", 1, 1);
arm_compute::ITensor& input = static_cast<IAclTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
@@ -53,7 +59,7 @@
void NeonInstanceNormalizationWorkload::Execute() const
{
- ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonInstanceNormalizationWorkload_Execute");
+ ARMNN_SCOPED_PROFILING_EVENT_NEON_GUID("NeonInstanceNormalizationWorkload_Execute", this->GetGuid());
m_Layer.run();
}
diff --git a/src/backends/neon/workloads/NeonL2NormalizationFloatWorkload.cpp b/src/backends/neon/workloads/NeonL2NormalizationFloatWorkload.cpp
index d54607d..33b4609 100644
--- a/src/backends/neon/workloads/NeonL2NormalizationFloatWorkload.cpp
+++ b/src/backends/neon/workloads/NeonL2NormalizationFloatWorkload.cpp
@@ -32,6 +32,12 @@
const WorkloadInfo& info, std::shared_ptr<arm_compute::MemoryManagerOnDemand>& memoryManager)
: FloatWorkload<L2NormalizationQueueDescriptor>(descriptor, info)
{
+ // Report Profiling Details
+ ARMNN_REPORT_PROFILING_WORKLOAD_DESC("NeonL2NormalizationFloatWorkload_Construct",
+ descriptor.m_Parameters,
+ info,
+ this->GetGuid());
+
m_Data.ValidateInputsOutputs("NeonL2NormalizationFloatWorkload", 1, 1);
arm_compute::ITensor& input = PolymorphicDowncast<IAclTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
@@ -50,7 +56,7 @@
void NeonL2NormalizationFloatWorkload::Execute() const
{
- ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonL2NormalizationFloatWorkload_Execute");
+ ARMNN_SCOPED_PROFILING_EVENT_NEON_GUID("NeonL2NormalizationFloatWorkload_Execute", this->GetGuid());
m_Layer->run();
}
diff --git a/src/backends/neon/workloads/NeonLogSoftmaxWorkload.cpp b/src/backends/neon/workloads/NeonLogSoftmaxWorkload.cpp
index ba5c900..8a97432 100644
--- a/src/backends/neon/workloads/NeonLogSoftmaxWorkload.cpp
+++ b/src/backends/neon/workloads/NeonLogSoftmaxWorkload.cpp
@@ -35,6 +35,12 @@
std::shared_ptr<arm_compute::MemoryManagerOnDemand>& memoryManager)
: BaseWorkload<LogSoftmaxQueueDescriptor>(descriptor, info)
{
+ // Report Profiling Details
+ ARMNN_REPORT_PROFILING_WORKLOAD_DESC("NeonLogSoftmaxWorkload_Construct",
+ descriptor.m_Parameters,
+ info,
+ this->GetGuid());
+
m_Data.ValidateInputsOutputs("NeonLogSoftmaxWorkload", 1, 1);
arm_compute::ITensor& input = PolymorphicDowncast<IAclTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
@@ -48,7 +54,7 @@
void NeonLogSoftmaxWorkload::Execute() const
{
- ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonLogSoftmaxWorkload_Execute");
+ ARMNN_SCOPED_PROFILING_EVENT_NEON_GUID("NeonLogSoftmaxWorkload_Execute", this->GetGuid());
m_LogSoftmaxLayer->run();
}
diff --git a/src/backends/neon/workloads/NeonLogWorkload.cpp b/src/backends/neon/workloads/NeonLogWorkload.cpp
index 460f5b3..0fb8f8a 100644
--- a/src/backends/neon/workloads/NeonLogWorkload.cpp
+++ b/src/backends/neon/workloads/NeonLogWorkload.cpp
@@ -35,7 +35,7 @@
void NeonLogWorkload::Execute() const
{
- ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonLogWorkload_Execute");
+ ARMNN_SCOPED_PROFILING_EVENT_NEON_GUID("NeonLogWorkload_Execute", this->GetGuid());
m_LogLayer.run();
}
diff --git a/src/backends/neon/workloads/NeonLogicalAndWorkload.cpp b/src/backends/neon/workloads/NeonLogicalAndWorkload.cpp
index d85e05c..179e495 100644
--- a/src/backends/neon/workloads/NeonLogicalAndWorkload.cpp
+++ b/src/backends/neon/workloads/NeonLogicalAndWorkload.cpp
@@ -33,6 +33,12 @@
const WorkloadInfo& info)
: BaseWorkload<LogicalBinaryQueueDescriptor>(descriptor, info)
{
+ // Report Profiling Details
+ ARMNN_REPORT_PROFILING_WORKLOAD_DESC("NeonLogicalAndWorkload_Construct",
+ descriptor.m_Parameters,
+ info,
+ this->GetGuid());
+
m_Data.ValidateInputsOutputs("NeonLogicalAndWorkload", 2, 1);
arm_compute::ITensor& input0 = PolymorphicDowncast<IAclTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
@@ -44,7 +50,7 @@
void NeonLogicalAndWorkload::Execute() const
{
- ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonLogicalAndWorkload_Execute");
+ ARMNN_SCOPED_PROFILING_EVENT_NEON_GUID("NeonLogicalAndWorkload_Execute", this->GetGuid());
m_LogicalAndLayer.run();
}
diff --git a/src/backends/neon/workloads/NeonLogicalNotWorkload.cpp b/src/backends/neon/workloads/NeonLogicalNotWorkload.cpp
index cff5eaf..16bf4e8 100644
--- a/src/backends/neon/workloads/NeonLogicalNotWorkload.cpp
+++ b/src/backends/neon/workloads/NeonLogicalNotWorkload.cpp
@@ -31,6 +31,12 @@
const WorkloadInfo& info)
: BaseWorkload<ElementwiseUnaryQueueDescriptor>(descriptor, info)
{
+ // Report Profiling Details
+ ARMNN_REPORT_PROFILING_WORKLOAD_DESC("NeonLogicalNotWorkload_Construct",
+ descriptor.m_Parameters,
+ info,
+ this->GetGuid());
+
m_Data.ValidateInputsOutputs("NeonLogicalNotWorkload", 1, 1);
arm_compute::ITensor& input = PolymorphicDowncast<IAclTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
@@ -41,7 +47,7 @@
void NeonLogicalNotWorkload::Execute() const
{
- ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonLogicalNotWorkload_Execute");
+ ARMNN_SCOPED_PROFILING_EVENT_NEON_GUID("NeonLogicalNotWorkload_Execute", this->GetGuid());
m_LogicalNotLayer.run();
}
diff --git a/src/backends/neon/workloads/NeonLogicalOrWorkload.cpp b/src/backends/neon/workloads/NeonLogicalOrWorkload.cpp
index c3f21e1..301f432 100644
--- a/src/backends/neon/workloads/NeonLogicalOrWorkload.cpp
+++ b/src/backends/neon/workloads/NeonLogicalOrWorkload.cpp
@@ -30,9 +30,15 @@
}
NeonLogicalOrWorkload::NeonLogicalOrWorkload(const LogicalBinaryQueueDescriptor& descriptor,
- const WorkloadInfo& info)
+ const WorkloadInfo& info)
: BaseWorkload<LogicalBinaryQueueDescriptor>(descriptor, info)
{
+ // Report Profiling Details
+ ARMNN_REPORT_PROFILING_WORKLOAD_DESC("NeonLogicalOrWorkload_Construct",
+ descriptor.m_Parameters,
+ info,
+ this->GetGuid());
+
m_Data.ValidateInputsOutputs("NeonLogicalOrWorkload", 2, 1);
arm_compute::ITensor& input0 = PolymorphicDowncast<IAclTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
@@ -44,7 +50,7 @@
void NeonLogicalOrWorkload::Execute() const
{
- ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonLogicalOrWorkload_Execute");
+ ARMNN_SCOPED_PROFILING_EVENT_NEON_GUID("NeonLogicalOrWorkload_Execute", this->GetGuid());
m_LogicalOrLayer.run();
}
diff --git a/src/backends/neon/workloads/NeonLstmFloatWorkload.cpp b/src/backends/neon/workloads/NeonLstmFloatWorkload.cpp
index 175e908..f80da03 100644
--- a/src/backends/neon/workloads/NeonLstmFloatWorkload.cpp
+++ b/src/backends/neon/workloads/NeonLstmFloatWorkload.cpp
@@ -19,6 +19,12 @@
NeonLstmFloatWorkload::NeonLstmFloatWorkload(const LstmQueueDescriptor &descriptor, const WorkloadInfo &info)
: FloatWorkload<LstmQueueDescriptor>(descriptor, info)
{
+ // Report Profiling Details
+ ARMNN_REPORT_PROFILING_WORKLOAD_DESC("NeonLstmFloatWorkload_Construct",
+ descriptor.m_Parameters,
+ info,
+ this->GetGuid());
+
arm_compute::LSTMParams<arm_compute::ITensor> lstm_param;
// Basic parameters
@@ -267,6 +273,7 @@
void NeonLstmFloatWorkload::Execute() const
{
+ ARMNN_SCOPED_PROFILING_EVENT_NEON_GUID("NeonLstmFloatWorkload_Execute", this->GetGuid());
m_LstmLayer.run();
}
diff --git a/src/backends/neon/workloads/NeonMaximumWorkload.cpp b/src/backends/neon/workloads/NeonMaximumWorkload.cpp
index c4500d8..0f95af5 100644
--- a/src/backends/neon/workloads/NeonMaximumWorkload.cpp
+++ b/src/backends/neon/workloads/NeonMaximumWorkload.cpp
@@ -39,7 +39,7 @@
void NeonMaximumWorkload::Execute() const
{
- ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonMaximumWorkload_Execute");
+ ARMNN_SCOPED_PROFILING_EVENT_NEON_GUID("NeonMaximumWorkload_Execute", this->GetGuid());
m_MaxLayer.run();
}
diff --git a/src/backends/neon/workloads/NeonMeanWorkload.cpp b/src/backends/neon/workloads/NeonMeanWorkload.cpp
index bb0870d..5d8d1c4 100644
--- a/src/backends/neon/workloads/NeonMeanWorkload.cpp
+++ b/src/backends/neon/workloads/NeonMeanWorkload.cpp
@@ -17,21 +17,27 @@
arm_compute::Status NeonMeanWorkloadValidate(const TensorInfo& input,
const TensorInfo& output,
- const MeanDescriptor& desc)
+ const MeanDescriptor& descriptor)
{
const arm_compute::TensorInfo aclInputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(input);
const arm_compute::TensorInfo aclOutputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(output);
arm_compute::Coordinates coords = BuildArmComputeReductionCoordinates(aclInputInfo.num_dimensions(),
input.GetNumDimensions(),
- desc.m_Axis);
+ descriptor.m_Axis);
- return arm_compute::NEReduceMean::validate(&aclInputInfo, coords, desc.m_KeepDims, &aclOutputInfo);
+ return arm_compute::NEReduceMean::validate(&aclInputInfo, coords, descriptor.m_KeepDims, &aclOutputInfo);
}
NeonMeanWorkload::NeonMeanWorkload(const MeanQueueDescriptor& descriptor, const WorkloadInfo& info)
: BaseWorkload<MeanQueueDescriptor>(descriptor, info)
{
+ // Report Profiling Details
+ ARMNN_REPORT_PROFILING_WORKLOAD_DESC("NeonMeanWorkload_Construct",
+ descriptor.m_Parameters,
+ info,
+ this->GetGuid());
+
m_Data.ValidateInputsOutputs("NeonMeanWorkload", 1, 1);
arm_compute::ITensor& input = static_cast<IAclTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
@@ -46,7 +52,7 @@
void NeonMeanWorkload::Execute() const
{
- ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonMeanWorkload_Execute");
+ ARMNN_SCOPED_PROFILING_EVENT_NEON_GUID("NeonMeanWorkload_Execute", this->GetGuid());
m_Layer.run();
}
diff --git a/src/backends/neon/workloads/NeonMeanWorkload.hpp b/src/backends/neon/workloads/NeonMeanWorkload.hpp
index 055b52a..5d16588 100644
--- a/src/backends/neon/workloads/NeonMeanWorkload.hpp
+++ b/src/backends/neon/workloads/NeonMeanWorkload.hpp
@@ -14,7 +14,7 @@
arm_compute::Status NeonMeanWorkloadValidate(const TensorInfo& input,
const TensorInfo& output,
- const MeanDescriptor& desc);
+ const MeanDescriptor& descriptor);
class NeonMeanWorkload : public BaseWorkload<MeanQueueDescriptor>
{
diff --git a/src/backends/neon/workloads/NeonMinimumWorkload.cpp b/src/backends/neon/workloads/NeonMinimumWorkload.cpp
index 519b3c4..5212947 100644
--- a/src/backends/neon/workloads/NeonMinimumWorkload.cpp
+++ b/src/backends/neon/workloads/NeonMinimumWorkload.cpp
@@ -40,7 +40,7 @@
void NeonMinimumWorkload::Execute() const
{
- ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonMinimumWorkload_Execute");
+ ARMNN_SCOPED_PROFILING_EVENT_NEON_GUID("NeonMinimumWorkload_Execute", this->GetGuid());
m_MinLayer.run();
}
diff --git a/src/backends/neon/workloads/NeonMultiplicationWorkload.cpp b/src/backends/neon/workloads/NeonMultiplicationWorkload.cpp
index e4ed195..0ec5508 100644
--- a/src/backends/neon/workloads/NeonMultiplicationWorkload.cpp
+++ b/src/backends/neon/workloads/NeonMultiplicationWorkload.cpp
@@ -77,7 +77,7 @@
void NeonMultiplicationWorkload::Execute() const
{
- ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonMultiplicationWorkload_Execute");
+ ARMNN_SCOPED_PROFILING_EVENT_NEON_GUID("NeonMultiplicationWorkload_Execute", this->GetGuid());
m_PixelWiseMultiplication->run();
}
diff --git a/src/backends/neon/workloads/NeonNegWorkload.cpp b/src/backends/neon/workloads/NeonNegWorkload.cpp
index 06c1467..e7705e6 100644
--- a/src/backends/neon/workloads/NeonNegWorkload.cpp
+++ b/src/backends/neon/workloads/NeonNegWorkload.cpp
@@ -35,7 +35,7 @@
void NeonNegWorkload::Execute() const
{
- ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonNegWorkload_Execute");
+ ARMNN_SCOPED_PROFILING_EVENT_NEON_GUID("NeonNegWorkload_Execute", this->GetGuid());
m_NegLayer.run();
}
diff --git a/src/backends/neon/workloads/NeonNormalizationFloatWorkload.cpp b/src/backends/neon/workloads/NeonNormalizationFloatWorkload.cpp
index 77fc429..92d4997 100644
--- a/src/backends/neon/workloads/NeonNormalizationFloatWorkload.cpp
+++ b/src/backends/neon/workloads/NeonNormalizationFloatWorkload.cpp
@@ -19,6 +19,7 @@
namespace
{
+using ACLMemManagerOnDemand = std::shared_ptr<arm_compute::MemoryManagerOnDemand>;
bool IsNeonNormalizationDescriptorSupported(const NormalizationDescriptor& parameters,
Optional<std::string&> reasonIfUnsupported)
@@ -58,10 +59,16 @@
}
NeonNormalizationFloatWorkload::NeonNormalizationFloatWorkload(const NormalizationQueueDescriptor& descriptor,
- const WorkloadInfo& info,
- std::shared_ptr<arm_compute::MemoryManagerOnDemand>& memoryManager)
+ const WorkloadInfo& info,
+ ACLMemManagerOnDemand& memoryManager)
: FloatWorkload<NormalizationQueueDescriptor>(descriptor, info)
{
+ // Report Profiling Details
+ ARMNN_REPORT_PROFILING_WORKLOAD_DESC("NeonNormalizationWorkload_Construct",
+ descriptor.m_Parameters,
+ info,
+ this->GetGuid());
+
m_Data.ValidateInputsOutputs("NeonNormalizationFloatWorkload", 1, 1);
std::string reasonIfUnsupported;
if (!IsNeonNormalizationDescriptorSupported(m_Data.m_Parameters, Optional<std::string&>(reasonIfUnsupported)))
@@ -99,7 +106,7 @@
void NeonNormalizationFloatWorkload::Execute() const
{
- ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonNormalizationFloatWorkload_Execute");
+ ARMNN_SCOPED_PROFILING_EVENT_NEON_GUID("NeonNormalizationFloatWorkload_Execute", this->GetGuid());
m_NormalizationLayer->run();
}
diff --git a/src/backends/neon/workloads/NeonPadWorkload.cpp b/src/backends/neon/workloads/NeonPadWorkload.cpp
index 19cdefc..b378d5f 100644
--- a/src/backends/neon/workloads/NeonPadWorkload.cpp
+++ b/src/backends/neon/workloads/NeonPadWorkload.cpp
@@ -19,6 +19,12 @@
NeonPadWorkload::NeonPadWorkload(const PadQueueDescriptor& descriptor, const WorkloadInfo& info)
: BaseWorkload<PadQueueDescriptor>(descriptor, info)
{
+ // Report Profiling Details
+ ARMNN_REPORT_PROFILING_WORKLOAD_DESC("NeonPadWorkload_Construct",
+ descriptor.m_Parameters,
+ info,
+ this->GetGuid());
+
m_Data.ValidateInputsOutputs("NeonPadWorkload", 1, 1);
arm_compute::ITensor& input = static_cast<IAclTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
@@ -41,7 +47,7 @@
void NeonPadWorkload::Execute() const
{
- ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonPadWorkload_Execute");
+ ARMNN_SCOPED_PROFILING_EVENT_NEON_GUID("NeonPadWorkload_Execute", this->GetGuid());
m_Layer->run();
}
diff --git a/src/backends/neon/workloads/NeonPermuteWorkload.cpp b/src/backends/neon/workloads/NeonPermuteWorkload.cpp
index a5ecbcb..9e18f7f 100644
--- a/src/backends/neon/workloads/NeonPermuteWorkload.cpp
+++ b/src/backends/neon/workloads/NeonPermuteWorkload.cpp
@@ -28,6 +28,12 @@
const WorkloadInfo& info)
: BaseWorkload<PermuteQueueDescriptor>(descriptor, info)
{
+ // Report Profiling Details
+ ARMNN_REPORT_PROFILING_WORKLOAD_DESC("NeonPermuteWorkload_Construct",
+ descriptor.m_Parameters,
+ info,
+ this->GetGuid());
+
using armcomputetensorutils::BuildArmComputePermutationVector;
m_Data.ValidateInputsOutputs(GetName(), 1, 1);
@@ -42,7 +48,7 @@
void NeonPermuteWorkload::Execute() const
{
- ARMNN_SCOPED_PROFILING_EVENT_NEON(GetName() + "_Execute");
+ ARMNN_SCOPED_PROFILING_EVENT_NEON_GUID(GetName() + "_Execute", this->GetGuid());
m_PermuteFunction.run();
}
diff --git a/src/backends/neon/workloads/NeonPooling2dWorkload.cpp b/src/backends/neon/workloads/NeonPooling2dWorkload.cpp
index 968d5ce..2115e93 100644
--- a/src/backends/neon/workloads/NeonPooling2dWorkload.cpp
+++ b/src/backends/neon/workloads/NeonPooling2dWorkload.cpp
@@ -37,6 +37,12 @@
const Pooling2dQueueDescriptor& descriptor, const WorkloadInfo& info)
: BaseWorkload<Pooling2dQueueDescriptor>(descriptor, info)
{
+ // Report Profiling Details
+ ARMNN_REPORT_PROFILING_WORKLOAD_DESC("NeonPooling2dWorkload_Construct",
+ descriptor.m_Parameters,
+ info,
+ this->GetGuid());
+
m_Data.ValidateInputsOutputs("NeonPooling2dWorkload", 1, 1);
arm_compute::ITensor& input = PolymorphicDowncast<IAclTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
@@ -55,7 +61,7 @@
void NeonPooling2dWorkload::Execute() const
{
- ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonPooling2dWorkload_Execute");
+ ARMNN_SCOPED_PROFILING_EVENT_NEON_GUID("NeonPooling2dWorkload_Execute", this->GetGuid());
m_PoolingLayer->run();
}
diff --git a/src/backends/neon/workloads/NeonPreluWorkload.cpp b/src/backends/neon/workloads/NeonPreluWorkload.cpp
index 8e6ea30..af03e79 100644
--- a/src/backends/neon/workloads/NeonPreluWorkload.cpp
+++ b/src/backends/neon/workloads/NeonPreluWorkload.cpp
@@ -45,7 +45,7 @@
void NeonPreluWorkload::Execute() const
{
- ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonPreluWorkload_Execute");
+ ARMNN_SCOPED_PROFILING_EVENT_NEON_GUID("NeonPreluWorkload_Execute", this->GetGuid());
m_PreluLayer->run();
}
diff --git a/src/backends/neon/workloads/NeonQLstmWorkload.cpp b/src/backends/neon/workloads/NeonQLstmWorkload.cpp
index fd979d6..c25262a 100644
--- a/src/backends/neon/workloads/NeonQLstmWorkload.cpp
+++ b/src/backends/neon/workloads/NeonQLstmWorkload.cpp
@@ -17,6 +17,12 @@
NeonQLstmWorkload::NeonQLstmWorkload(const QLstmQueueDescriptor& descriptor, const WorkloadInfo& info)
: BaseWorkload<QLstmQueueDescriptor>(descriptor, info)
{
+ // Report Profiling Details
+ ARMNN_REPORT_PROFILING_WORKLOAD_DESC("NeonQLstmWorkload_Construct",
+ descriptor.m_Parameters,
+ info,
+ this->GetGuid());
+
arm_compute::LSTMParams<arm_compute::ITensor> qLstmParams;
// Mandatory params
@@ -230,6 +236,7 @@
void NeonQLstmWorkload::Execute() const
{
+ ARMNN_SCOPED_PROFILING_EVENT_NEON_GUID("NeonQuantizedLstmWorkload_Execute", this->GetGuid());
m_QLstmLayer.run();
}
diff --git a/src/backends/neon/workloads/NeonQuantizeWorkload.cpp b/src/backends/neon/workloads/NeonQuantizeWorkload.cpp
index 14fbdf3..f50ca81 100644
--- a/src/backends/neon/workloads/NeonQuantizeWorkload.cpp
+++ b/src/backends/neon/workloads/NeonQuantizeWorkload.cpp
@@ -43,7 +43,7 @@
{
if (m_Layer)
{
- ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonQuantizeWorkload_Execute");
+ ARMNN_SCOPED_PROFILING_EVENT_NEON_GUID("NeonQuantizeWorkload_Execute", this->GetGuid());
m_Layer->run();
}
}
diff --git a/src/backends/neon/workloads/NeonQuantizedLstmWorkload.cpp b/src/backends/neon/workloads/NeonQuantizedLstmWorkload.cpp
index d809017..e36fde4 100644
--- a/src/backends/neon/workloads/NeonQuantizedLstmWorkload.cpp
+++ b/src/backends/neon/workloads/NeonQuantizedLstmWorkload.cpp
@@ -124,6 +124,7 @@
void NeonQuantizedLstmWorkload::Execute() const
{
+ ARMNN_SCOPED_PROFILING_EVENT_NEON_GUID("NeonQuantizedLstmWorkload_Execute", this->GetGuid());
m_QuantizedLstmLayer.run();
}
diff --git a/src/backends/neon/workloads/NeonReduceWorkload.cpp b/src/backends/neon/workloads/NeonReduceWorkload.cpp
index 1436cd1..bf7ce98 100644
--- a/src/backends/neon/workloads/NeonReduceWorkload.cpp
+++ b/src/backends/neon/workloads/NeonReduceWorkload.cpp
@@ -18,28 +18,28 @@
arm_compute::Status NeonReduceWorkloadValidate(const TensorInfo& input,
const TensorInfo& output,
- const ReduceDescriptor& desc)
+ const ReduceDescriptor& descriptor)
{
- if ( desc.m_vAxis.size()==1 || desc.m_vAxis.empty())
+ if ( descriptor.m_vAxis.size()==1 || descriptor.m_vAxis.empty())
{
const arm_compute::TensorInfo aclInputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(input);
const arm_compute::TensorInfo aclOutputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(output);
arm_compute::Coordinates coords = BuildArmComputeReductionCoordinates(aclInputInfo.num_dimensions(),
input.GetNumDimensions(),
- desc.m_vAxis);
+ descriptor.m_vAxis);
return arm_compute::NEReductionOperation::validate(&aclInputInfo,
&aclOutputInfo,
static_cast<unsigned int>(coords[0]),
- ConvertReductionOperationToAcl(desc),
- desc.m_KeepDims);
+ ConvertReductionOperationToAcl(descriptor),
+ descriptor.m_KeepDims);
}
else
{
// Validate layer if there are multiple axes.
arm_compute::Status status;
- IS_MULTI_AXES_REDUCE_SUPPORTED(NeonReduceWorkloadValidate, input, desc, status);
+ IS_MULTI_AXES_REDUCE_SUPPORTED(NeonReduceWorkloadValidate, input, descriptor, status);
return status;
}
}
@@ -47,6 +47,12 @@
NeonReduceWorkload::NeonReduceWorkload(const ReduceQueueDescriptor& descriptor, const WorkloadInfo& info)
: BaseWorkload<ReduceQueueDescriptor>(descriptor, info)
{
+ // Report Profiling Details
+ ARMNN_REPORT_PROFILING_WORKLOAD_DESC("NeonReduceWorkload_Construct",
+ descriptor.m_Parameters,
+ info,
+ this->GetGuid());
+
m_Data.ValidateInputsOutputs("NeonReduceWorkload", 1, 1);
arm_compute::ITensor& input = static_cast<IAclTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
@@ -65,7 +71,7 @@
void NeonReduceWorkload::Execute() const
{
- ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonReduceWorkload_Execute");
+ ARMNN_SCOPED_PROFILING_EVENT_NEON_GUID("NeonReduceWorkload_Execute", this->GetGuid());
m_Layer.run();
}
diff --git a/src/backends/neon/workloads/NeonReduceWorkload.hpp b/src/backends/neon/workloads/NeonReduceWorkload.hpp
index 0472091..ddeac12 100644
--- a/src/backends/neon/workloads/NeonReduceWorkload.hpp
+++ b/src/backends/neon/workloads/NeonReduceWorkload.hpp
@@ -14,7 +14,7 @@
arm_compute::Status NeonReduceWorkloadValidate(const TensorInfo& input,
const TensorInfo& output,
- const ReduceDescriptor& desc);
+ const ReduceDescriptor& descriptor);
class NeonReduceWorkload : public BaseWorkload<ReduceQueueDescriptor>
{
diff --git a/src/backends/neon/workloads/NeonReshapeWorkload.cpp b/src/backends/neon/workloads/NeonReshapeWorkload.cpp
index 8b11da7..7f2f225 100644
--- a/src/backends/neon/workloads/NeonReshapeWorkload.cpp
+++ b/src/backends/neon/workloads/NeonReshapeWorkload.cpp
@@ -39,7 +39,7 @@
void NeonReshapeWorkload::Execute() const
{
- ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonReshapeWorkload_Execute");
+ ARMNN_SCOPED_PROFILING_EVENT_NEON_GUID("NeonReshapeWorkload_Execute", this->GetGuid());
m_Layer->run();
}
diff --git a/src/backends/neon/workloads/NeonResizeWorkload.cpp b/src/backends/neon/workloads/NeonResizeWorkload.cpp
index ab01e30..ecb43ae 100644
--- a/src/backends/neon/workloads/NeonResizeWorkload.cpp
+++ b/src/backends/neon/workloads/NeonResizeWorkload.cpp
@@ -53,6 +53,12 @@
const WorkloadInfo& info)
: BaseWorkload<ResizeQueueDescriptor>(descriptor, info)
{
+ // Report Profiling Details
+ ARMNN_REPORT_PROFILING_WORKLOAD_DESC("NeonResizeWorkload_Construct",
+ descriptor.m_Parameters,
+ info,
+ this->GetGuid());
+
m_Data.ValidateInputsOutputs("NeonResizeWorkload", 1, 1);
arm_compute::ITensor& input = PolymorphicDowncast<IAclTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
@@ -83,7 +89,7 @@
void NeonResizeWorkload::Execute() const
{
- ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonResizeWorkload_Execute");
+ ARMNN_SCOPED_PROFILING_EVENT_NEON_GUID("NeonResizeWorkload_Execute", this->GetGuid());
m_ResizeLayer.run();
}
diff --git a/src/backends/neon/workloads/NeonRsqrtWorkload.cpp b/src/backends/neon/workloads/NeonRsqrtWorkload.cpp
index 44980df..13615f9 100644
--- a/src/backends/neon/workloads/NeonRsqrtWorkload.cpp
+++ b/src/backends/neon/workloads/NeonRsqrtWorkload.cpp
@@ -36,7 +36,7 @@
void NeonRsqrtWorkload::Execute() const
{
- ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonRsqrtWorkload_Execute");
+ ARMNN_SCOPED_PROFILING_EVENT_NEON_GUID("NeonRsqrtWorkload_Execute", this->GetGuid());
m_RsqrtLayer.run();
}
diff --git a/src/backends/neon/workloads/NeonSinWorkload.cpp b/src/backends/neon/workloads/NeonSinWorkload.cpp
index ac2bd49..4602a9f 100644
--- a/src/backends/neon/workloads/NeonSinWorkload.cpp
+++ b/src/backends/neon/workloads/NeonSinWorkload.cpp
@@ -35,7 +35,7 @@
void NeonSinWorkload::Execute() const
{
- ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonSinWorkload_Execute");
+ ARMNN_SCOPED_PROFILING_EVENT_NEON_GUID("NeonSinWorkload_Execute", this->GetGuid());
m_SinLayer.run();
}
diff --git a/src/backends/neon/workloads/NeonSliceWorkload.cpp b/src/backends/neon/workloads/NeonSliceWorkload.cpp
index 32cc042..86ae303 100644
--- a/src/backends/neon/workloads/NeonSliceWorkload.cpp
+++ b/src/backends/neon/workloads/NeonSliceWorkload.cpp
@@ -37,6 +37,13 @@
const WorkloadInfo& info)
: BaseWorkload<SliceQueueDescriptor>(descriptor, info)
{
+ // Report Profiling Details
+ ARMNN_REPORT_PROFILING_WORKLOAD_DESC("NeonSliceWorkload_Construct",
+ descriptor.m_Parameters,
+ info,
+ this->GetGuid());
+
+
m_Data.ValidateInputsOutputs("NeonSliceWorkload", 1, 1);
arm_compute::ITensor& input = PolymorphicDowncast<IAclTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
@@ -52,7 +59,7 @@
void NeonSliceWorkload::Execute() const
{
- ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonSliceWorkload_Execute");
+ ARMNN_SCOPED_PROFILING_EVENT_NEON_GUID("NeonSliceWorkload_Execute", this->GetGuid());
m_SliceFunction.run();
}
diff --git a/src/backends/neon/workloads/NeonSoftmaxWorkload.cpp b/src/backends/neon/workloads/NeonSoftmaxWorkload.cpp
index 505844e..da20479 100644
--- a/src/backends/neon/workloads/NeonSoftmaxWorkload.cpp
+++ b/src/backends/neon/workloads/NeonSoftmaxWorkload.cpp
@@ -34,6 +34,12 @@
const WorkloadInfo& info, std::shared_ptr<arm_compute::MemoryManagerOnDemand>& memoryManager)
: BaseWorkload<SoftmaxQueueDescriptor>(descriptor, info)
{
+ // Report Profiling Details
+ ARMNN_REPORT_PROFILING_WORKLOAD_DESC("NeonSoftmaxWorkload_Construct",
+ descriptor.m_Parameters,
+ info,
+ this->GetGuid());
+
m_Data.ValidateInputsOutputs("NeonSoftmaxWorkload", 1, 1);
// The ArmCompute softmax layer uses 2D input/output tensors, so flatten the first three dimensions.
@@ -48,7 +54,7 @@
void NeonSoftmaxWorkload::Execute() const
{
- ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonSoftmaxWorkload_Execute");
+ ARMNN_SCOPED_PROFILING_EVENT_NEON_GUID("NeonSoftmaxWorkload_Execute", this->GetGuid());
m_SoftmaxLayer->run();
}
diff --git a/src/backends/neon/workloads/NeonSpaceToBatchNdWorkload.cpp b/src/backends/neon/workloads/NeonSpaceToBatchNdWorkload.cpp
index 42dd49c..d7880e0 100644
--- a/src/backends/neon/workloads/NeonSpaceToBatchNdWorkload.cpp
+++ b/src/backends/neon/workloads/NeonSpaceToBatchNdWorkload.cpp
@@ -41,10 +41,16 @@
&aclOutputInfo);
}
-NeonSpaceToBatchNdWorkload::NeonSpaceToBatchNdWorkload(const SpaceToBatchNdQueueDescriptor& desc,
+NeonSpaceToBatchNdWorkload::NeonSpaceToBatchNdWorkload(const SpaceToBatchNdQueueDescriptor& descriptor,
const WorkloadInfo& info)
- : BaseWorkload<SpaceToBatchNdQueueDescriptor>(desc, info)
+ : BaseWorkload<SpaceToBatchNdQueueDescriptor>(descriptor, info)
{
+ // Report Profiling Details
+ ARMNN_REPORT_PROFILING_WORKLOAD_DESC("NeonSpaceToBatchNdWorkload_Construct",
+ descriptor.m_Parameters,
+ info,
+ this->GetGuid());
+
m_Data.ValidateInputsOutputs("NESpaceToBatchNdWorkload", 1, 1);
arm_compute::ITensor& input =
@@ -79,7 +85,7 @@
{
if (m_Layer)
{
- ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonSpaceToBatchNdWorkload_Execute");
+ ARMNN_SCOPED_PROFILING_EVENT_NEON_GUID("NeonSpaceToBatchNdWorkload_Execute", this->GetGuid());
m_Layer->run();
}
}
diff --git a/src/backends/neon/workloads/NeonSpaceToDepthWorkload.cpp b/src/backends/neon/workloads/NeonSpaceToDepthWorkload.cpp
index 43c991c..b96b7d0 100644
--- a/src/backends/neon/workloads/NeonSpaceToDepthWorkload.cpp
+++ b/src/backends/neon/workloads/NeonSpaceToDepthWorkload.cpp
@@ -29,10 +29,16 @@
return arm_compute::NESpaceToDepthLayer::validate(&aclInput, &aclOutput, blockSize);
}
-NeonSpaceToDepthWorkload::NeonSpaceToDepthWorkload(const SpaceToDepthQueueDescriptor& desc,
+NeonSpaceToDepthWorkload::NeonSpaceToDepthWorkload(const SpaceToDepthQueueDescriptor& descriptor,
const WorkloadInfo& info)
- : BaseWorkload<SpaceToDepthQueueDescriptor>(desc, info)
+ : BaseWorkload<SpaceToDepthQueueDescriptor>(descriptor, info)
{
+ // Report Profiling Details
+ ARMNN_REPORT_PROFILING_WORKLOAD_DESC("NeonSpaceToDepthWorkload_Construct",
+ descriptor.m_Parameters,
+ info,
+ this->GetGuid());
+
m_Data.ValidateInputsOutputs("NeonSpaceToDepthWorkload", 1, 1);
arm_compute::DataLayout aclDataLayout = ConvertDataLayout(m_Data.m_Parameters.m_DataLayout);
@@ -40,7 +46,7 @@
arm_compute::ITensor& input = PolymorphicDowncast<IAclTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
input.info()->set_data_layout(aclDataLayout);
- int32_t blockSize = armnn::numeric_cast<int32_t>(desc.m_Parameters.m_BlockSize);
+ int32_t blockSize = armnn::numeric_cast<int32_t>(descriptor.m_Parameters.m_BlockSize);
arm_compute::ITensor& output = PolymorphicDowncast<IAclTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
output.info()->set_data_layout(aclDataLayout);
@@ -54,7 +60,7 @@
{
if (m_Layer)
{
- ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonSpaceToDepthWorkload_Execute");
+ ARMNN_SCOPED_PROFILING_EVENT_NEON_GUID("NeonSpaceToDepthWorkload_Execute", this->GetGuid());
m_Layer->run();
}
}
diff --git a/src/backends/neon/workloads/NeonSplitterWorkload.cpp b/src/backends/neon/workloads/NeonSplitterWorkload.cpp
index 4e428a2..ea1def6 100644
--- a/src/backends/neon/workloads/NeonSplitterWorkload.cpp
+++ b/src/backends/neon/workloads/NeonSplitterWorkload.cpp
@@ -56,6 +56,12 @@
NeonSplitterWorkload::NeonSplitterWorkload(const SplitterQueueDescriptor& descriptor, const WorkloadInfo& info)
: BaseWorkload<SplitterQueueDescriptor>(descriptor, info)
{
+ // Report Profiling Details
+ ARMNN_REPORT_PROFILING_WORKLOAD_DESC("NeonSplitterWorkload_Construct",
+ descriptor.m_Parameters,
+ info,
+ this->GetGuid());
+
bool allOutputsAreSubtensors = true;
// Check that all outputs are sub-tensors
@@ -106,7 +112,7 @@
{
if (m_Layer)
{
- ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonSplitterWorkload_Execute");
+ ARMNN_SCOPED_PROFILING_EVENT_NEON_GUID("NeonSplitterWorkload_Execute", this->GetGuid());
m_Layer->run();
}
}
diff --git a/src/backends/neon/workloads/NeonStackWorkload.cpp b/src/backends/neon/workloads/NeonStackWorkload.cpp
index 0b327b8..ad9bea1 100644
--- a/src/backends/neon/workloads/NeonStackWorkload.cpp
+++ b/src/backends/neon/workloads/NeonStackWorkload.cpp
@@ -49,6 +49,12 @@
NeonStackWorkload::NeonStackWorkload(const StackQueueDescriptor& descriptor, const WorkloadInfo& info)
: BaseWorkload<StackQueueDescriptor>(descriptor, info)
{
+ // Report Profiling Details
+ ARMNN_REPORT_PROFILING_WORKLOAD_DESC("NeonStackWorkload_Construct",
+ descriptor.m_Parameters,
+ info,
+ this->GetGuid());
+
std::vector<arm_compute::ITensor*> aclInputs;
for (auto input : m_Data.m_Inputs)
{
@@ -67,7 +73,7 @@
{
if (m_Layer)
{
- ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonStackWorkload_Execute");
+ ARMNN_SCOPED_PROFILING_EVENT_NEON_GUID("NeonStackWorkload_Execute", this->GetGuid());
m_Layer->run();
}
}
diff --git a/src/backends/neon/workloads/NeonStridedSliceWorkload.cpp b/src/backends/neon/workloads/NeonStridedSliceWorkload.cpp
index d0aee07..d9ec727 100644
--- a/src/backends/neon/workloads/NeonStridedSliceWorkload.cpp
+++ b/src/backends/neon/workloads/NeonStridedSliceWorkload.cpp
@@ -50,6 +50,12 @@
const WorkloadInfo& info)
: BaseWorkload<StridedSliceQueueDescriptor>(descriptor, info)
{
+ // Report Profiling Details
+ ARMNN_REPORT_PROFILING_WORKLOAD_DESC("NeonStridedSliceWorkload_Construct",
+ descriptor.m_Parameters,
+ info,
+ this->GetGuid());
+
m_Data.ValidateInputsOutputs("NeonStridedSliceWorkload", 1, 1);
arm_compute::ITensor& input = PolymorphicDowncast<IAclTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
@@ -87,7 +93,7 @@
void NeonStridedSliceWorkload::Execute() const
{
- ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonStridedSliceWorkload_Execute");
+ ARMNN_SCOPED_PROFILING_EVENT_NEON_GUID("NeonStridedSliceWorkload_Execute", this->GetGuid());
m_Layer->run();
}
diff --git a/src/backends/neon/workloads/NeonSubtractionWorkload.cpp b/src/backends/neon/workloads/NeonSubtractionWorkload.cpp
index 64f68aa..68bf154 100644
--- a/src/backends/neon/workloads/NeonSubtractionWorkload.cpp
+++ b/src/backends/neon/workloads/NeonSubtractionWorkload.cpp
@@ -57,7 +57,7 @@
void NeonSubtractionWorkload::Execute() const
{
- ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonSubtractionWorkload_Execute");
+ ARMNN_SCOPED_PROFILING_EVENT_NEON_GUID("NeonSubtractionWorkload_Execute", this->GetGuid());
m_SubLayer->run();
}
diff --git a/src/backends/neon/workloads/NeonTransposeConvolution2dWorkload.cpp b/src/backends/neon/workloads/NeonTransposeConvolution2dWorkload.cpp
index a1e545c..f9e1b36 100644
--- a/src/backends/neon/workloads/NeonTransposeConvolution2dWorkload.cpp
+++ b/src/backends/neon/workloads/NeonTransposeConvolution2dWorkload.cpp
@@ -77,6 +77,23 @@
arm_compute::PadStrideInfo padStrideInfo = BuildArmComputePadStrideInfo(m_Data.m_Parameters);
+ // Add details for profiling output
+ WorkloadInfo detailsInfo;
+
+ detailsInfo.m_InputTensorInfos = info.m_InputTensorInfos;
+ detailsInfo.m_OutputTensorInfos = info.m_OutputTensorInfos;
+ detailsInfo.m_WeightsTensorInfo = armnn::Optional<armnn::TensorInfo>(descriptor.m_Weight->GetTensorInfo());
+ if (descriptor.m_Parameters.m_BiasEnabled)
+ {
+ detailsInfo.m_BiasTensorInfo = armnn::Optional<armnn::TensorInfo>(descriptor.m_Bias->GetTensorInfo());
+ }
+
+ // Report Profiling Details
+ ARMNN_REPORT_PROFILING_WORKLOAD_DESC("NeonTransposeConvolution2dWorkload_Construct",
+ descriptor.m_Parameters,
+ detailsInfo,
+ this->GetGuid());
+
m_Layer = std::make_unique<arm_compute::NEDeconvolutionLayer>(memoryManager);
m_Layer->configure(&input, m_KernelTensor.get(), m_BiasTensor.get(), &output, padStrideInfo);
@@ -95,7 +112,7 @@
void NeonTransposeConvolution2dWorkload::Execute() const
{
- ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonTransposeConvolution2dWorkload_Execute");
+ ARMNN_SCOPED_PROFILING_EVENT_NEON_GUID("NeonTransposeConvolution2dWorkload_Execute", this->GetGuid());
m_Layer->run();
}
diff --git a/src/backends/neon/workloads/NeonTransposeWorkload.cpp b/src/backends/neon/workloads/NeonTransposeWorkload.cpp
index c11f2df..2e4f358 100644
--- a/src/backends/neon/workloads/NeonTransposeWorkload.cpp
+++ b/src/backends/neon/workloads/NeonTransposeWorkload.cpp
@@ -28,6 +28,12 @@
const WorkloadInfo& info)
: BaseWorkload<TransposeQueueDescriptor>(descriptor, info)
{
+ // Report Profiling Details
+ ARMNN_REPORT_PROFILING_WORKLOAD_DESC("NeonTransposeWorkload_Construct",
+ descriptor.m_Parameters,
+ info,
+ this->GetGuid());
+
m_Data.ValidateInputsOutputs(GetName(), 1, 1);
const arm_compute::ITensor& input = static_cast<IAclTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
@@ -41,7 +47,7 @@
void NeonTransposeWorkload::Execute() const
{
- ARMNN_SCOPED_PROFILING_EVENT_NEON(GetName() + "_Execute");
+ ARMNN_SCOPED_PROFILING_EVENT_NEON_GUID(GetName() + "_Execute", this->GetGuid());
m_PermuteFunction.run();
}