[ONCPUML-97]: Implement "int8" support for 2D decomposition at high core counts Interleaved2d functionality was extended to uint8 and int8 kernels. Change-Id: If78facbce56e9ec7b2f4c23436af0bd5db7f7b69 Signed-off-by: Aleksandr Nikolaev <aleksandr.nikolaev@arm.com> Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/3467 Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>

commit: a084b46835d20fdfe6e590b91b7ca64fba3542df [log] [tgz]
author: Aleksandr Nikolaev <aleksandr.nikolaev@arm.com> Thu Jun 25 12:25:52 2020 +0100
committer: Georgios Pinitas <georgios.pinitas@arm.com> Thu Jul 16 16:02:50 2020 +0000
tree: e4abe49cc10f3406dcaad9d3dfbbc22cc54476a6
parent: f738fe6b6e059916294c48b942952c261569df18 [diff] [blame]
diff --git a/src/runtime/NEON/functions/NEGEMMAssemblyDispatch.cpp b/src/runtime/NEON/functions/NEGEMMAssemblyDispatch.cpp
index b09ea2e..8e9f393 100644
--- a/src/runtime/NEON/functions/NEGEMMAssemblyDispatch.cpp
+++ b/src/runtime/NEON/functions/NEGEMMAssemblyDispatch.cpp

@@ -438,12 +438,18 @@
         const int granule_threshold = 200;
         scheduling_hint             = IScheduler::Hints(Window::DimX, IScheduler::StrategyHint::DYNAMIC, granule_threshold);
     }
-    else if(_kernel_info.method == arm_gemm::GemmMethod::GEMM_INTERLEAVED_2D && (_d->info()->data_type() == DataType::F32 || _d->info()->data_type() == DataType::F16))
+    else if(_kernel_info.method == arm_gemm::GemmMethod::GEMM_INTERLEAVED_2D && (_d->info()->data_type() == DataType::F32 || _d->info()->data_type() == DataType::F16 ||  _d->info()->data_type() == DataType::U8 || _d->info()->data_type() == DataType::S8) )
     {
         //GEMM_INTERLEAVED supports 2D parallelism, IScheduler::split_dimensions_all signals to parallelise over all window dimensions
         const int granule_threshold = 200;
         scheduling_hint             = IScheduler::Hints(IScheduler::split_dimensions_all, IScheduler::StrategyHint::STATIC, granule_threshold);
     }
+    else if(_kernel_info.method == arm_gemm::GemmMethod::QUANTIZE_WRAPPER_2D && (_d->info()->data_type() == DataType::QASYMM8 || _d->info()->data_type() == DataType::QASYMM8_SIGNED))
+    {
+        //special case for QASYMM8 to support 2D parallelism, scheduler here may be tweaked differently compared to FP32 case
+        const int granule_threshold = 200;
+        scheduling_hint             = IScheduler::Hints(IScheduler::split_dimensions_all, IScheduler::StrategyHint::STATIC, granule_threshold);
+    }
 
     NEScheduler::get().schedule(_optimised_kernel.get(), scheduling_hint);
 }
commit	a084b46835d20fdfe6e590b91b7ca64fba3542df	[log] [tgz]
author	Aleksandr Nikolaev <aleksandr.nikolaev@arm.com>	Thu Jun 25 12:25:52 2020 +0100
committer	Georgios Pinitas <georgios.pinitas@arm.com>	Thu Jul 16 16:02:50 2020 +0000
tree	e4abe49cc10f3406dcaad9d3dfbbc22cc54476a6
parent	f738fe6b6e059916294c48b942952c261569df18 [diff] [blame]