Update Neon™ depthwise kernel - Reduce duplication and simplify overall structure. - Improve multi-threaded performance by sharing more data in lower-level caches. Partially Resolves: COMPMID-5054 Signed-off-by: Ramy Elgammal <ramy.elgammal@arm.com> Change-Id: Iac747f39b21c540122fa75218762631c4d787911 Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/7449 Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Andrew Mundy Reviewed-by: Sheri Zhang <sheri.zhang@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>

commit: 8a164884dddf769643cf3b9f7f94e43cb4f3c20b [log] [tgz]
author: ramelg01 <ramy.elgammal@arm.com> Thu Apr 07 02:42:52 2022 +0100
committer: Ramy Elgammal <ramy.elgammal@arm.com> Tue Apr 26 15:51:22 2022 +0000
tree: 35958dd48b6df1a851c880dad2b2ce285671b611
parent: c827e99fc46521f43719b0c2d1b6f05d66abf68c [diff] [blame]
diff --git a/src/core/NEON/kernels/arm_conv/depthwise/depthwise_implementation_constraints.hpp b/src/core/NEON/kernels/arm_conv/depthwise/depthwise_implementation_constraints.hpp
index 4198727..78b6aec 100644
--- a/src/core/NEON/kernels/arm_conv/depthwise/depthwise_implementation_constraints.hpp
+++ b/src/core/NEON/kernels/arm_conv/depthwise/depthwise_implementation_constraints.hpp

@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021 Arm Limited.
+ * Copyright (c) 2021-2022 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -109,6 +109,12 @@
   return args.channel_multiplier == 1;
 }
 
+bool has_channel_multiplier(const DepthwiseArgs &args, const void *) __attribute__ ((unused));
+bool has_channel_multiplier(const DepthwiseArgs &args, const void *)
+{
+  return args.channel_multiplier > 1;
+}
+
 bool qp_has_no_left_shift(const DepthwiseArgs &args, const void *_qp) __attribute__ ((unused));
 bool qp_has_no_left_shift(const DepthwiseArgs &, const void *_qp)
 {
@@ -118,6 +124,21 @@
     (qp->per_layer_left_shift == 0);
 }
 
+bool qp_zero_a_offset(const DepthwiseArgs &args, const void *_qp) __attribute__ ((unused));
+bool qp_zero_a_offset(const DepthwiseArgs &, const void *_qp)
+{
+  const auto qp = static_cast<const arm_gemm::Requantize32 *>(_qp);
+  return qp->a_offset == 0;
+}
+
+template <typename T> bool qp_skip_clamp(const DepthwiseArgs &args, const void *_qp) __attribute__ ((unused));
+template <typename T> bool qp_skip_clamp(const DepthwiseArgs &, const void *_qp)
+{
+  const auto qp = static_cast<const arm_gemm::Requantize32 *>(_qp);
+  return (qp->minval == std::numeric_limits<T>::min() &&
+          qp->maxval == std::numeric_limits<T>::max());
+}
+
 }  // namespace
 }  // namespace depthwise
 }  // namespace arm_conv
commit	8a164884dddf769643cf3b9f7f94e43cb4f3c20b	[log] [tgz]
author	ramelg01 <ramy.elgammal@arm.com>	Thu Apr 07 02:42:52 2022 +0100
committer	Ramy Elgammal <ramy.elgammal@arm.com>	Tue Apr 26 15:51:22 2022 +0000
tree	35958dd48b6df1a851c880dad2b2ce285671b611
parent	c827e99fc46521f43719b0c2d1b6f05d66abf68c [diff] [blame]