Implement Minimum Workload Size (MWS) in all CPPKernels used by small networks
* create get_mws method in ICPPKernel class that retuns default value for all kernels
* overwrite the default value for all the kernels used by small networks (according to banchmark case)
Resolves COMPMID-4648
Change-Id: I46d7cae61217213279d2ee740edc73f600b6d576
Signed-off-by: Dana Zlotnik <dana.zlotnik@arm.com>
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/6412
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: SiCong Li <sicong.li@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
diff --git a/src/cpu/kernels/internal/CpuDepthwiseConv2dAssemblyWrapperKernel.cpp b/src/cpu/kernels/internal/CpuDepthwiseConv2dAssemblyWrapperKernel.cpp
index eed4bb9..a71864c 100644
--- a/src/cpu/kernels/internal/CpuDepthwiseConv2dAssemblyWrapperKernel.cpp
+++ b/src/cpu/kernels/internal/CpuDepthwiseConv2dAssemblyWrapperKernel.cpp
@@ -354,6 +354,13 @@
{
return "CpuDepthwiseConv2dAssemblyWrapperKernel";
}
+
+size_t CpuDepthwiseConv2dAssemblyWrapperKernel::get_mws(const CPUInfo &platform, size_t thread_count) const
+{
+ ARM_COMPUTE_UNUSED(platform, thread_count);
+
+ return ICPPKernel::small_network_mws;
+}
} // namespace kernels
} // namespace cpu
} // namespace arm_compute
diff --git a/src/cpu/kernels/internal/CpuDepthwiseConv2dAssemblyWrapperKernel.h b/src/cpu/kernels/internal/CpuDepthwiseConv2dAssemblyWrapperKernel.h
index 8ee24a6..8980922 100644
--- a/src/cpu/kernels/internal/CpuDepthwiseConv2dAssemblyWrapperKernel.h
+++ b/src/cpu/kernels/internal/CpuDepthwiseConv2dAssemblyWrapperKernel.h
@@ -108,6 +108,15 @@
*/
bool is_configured() const;
+ /** Return minimum workload size of the relevant kernel
+ *
+ * @param[in] platform The CPU platform used to create the context.
+ * @param[in] thread_count Number of threads in the execution.
+ *
+ * @return[out] small_network_mws Minimum workload size for requsted configuration.
+ */
+ size_t get_mws(const CPUInfo &platform, size_t thread_count) const override;
+
private:
std::unique_ptr<arm_conv::depthwise::IDepthwiseCommon> _kernel_asm;
std::vector<int32_t> _multipliers{};
diff --git a/src/cpu/kernels/internal/CpuPool2dAssemblyWrapperKernel.cpp b/src/cpu/kernels/internal/CpuPool2dAssemblyWrapperKernel.cpp
index 958c04b..f9c11fd 100644
--- a/src/cpu/kernels/internal/CpuPool2dAssemblyWrapperKernel.cpp
+++ b/src/cpu/kernels/internal/CpuPool2dAssemblyWrapperKernel.cpp
@@ -274,6 +274,13 @@
_kernel_asm = std::move(pooling_kernel_asm);
}
+
+size_t CpuPool2dAssemblyWrapperKernel::get_mws(const CPUInfo &platform, size_t thread_count) const
+{
+ ARM_COMPUTE_UNUSED(platform, thread_count);
+
+ return ICPPKernel::small_network_mws;
+}
} // namespace kernels
} // namespace cpu
} // namespace arm_compute
diff --git a/src/cpu/kernels/internal/CpuPool2dAssemblyWrapperKernel.h b/src/cpu/kernels/internal/CpuPool2dAssemblyWrapperKernel.h
index ab3ed25..8625fd9 100644
--- a/src/cpu/kernels/internal/CpuPool2dAssemblyWrapperKernel.h
+++ b/src/cpu/kernels/internal/CpuPool2dAssemblyWrapperKernel.h
@@ -112,6 +112,15 @@
void create_arm_pooling_requant(const ITensorInfo *src, ITensorInfo *dst, const PoolingLayerInfo &info, const CPUInfo &cpu_info);
std::unique_ptr<arm_conv::pooling::IPoolingCommon> _kernel_asm{ nullptr };
+
+ /** Return minimum workload size of the relevant kernel
+ *
+ * @param[in] platform The CPU platform used to create the context.
+ * @param[in] thread_count Number of threads in the execution.
+ *
+ * @return[out] small_network_mws Minimum workload size for requsted configuration.
+ */
+ size_t get_mws(const CPUInfo &platform, size_t thread_count) const override;
};
} // namespace kernels
} // namespace cpu