COMPMID-2305: NEDepthwiseConvolution 3x3: support for QUANT8_PER_CHANNEL_SYMM

Change-Id: I9a917cff6a089ce6ae16fb4e6066a4194e2e9487
Signed-off-by: Giuseppe Rossini <giuseppe.rossini@arm.com>
Reviewed-on: https://review.mlplatform.org/c/2241
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Michele Di Giorgio <michele.digiorgio@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Pablo Marquez <pablo.tello@arm.com>
diff --git a/src/core/NEON/kernels/convolution/depthwise/impl_base.hpp b/src/core/NEON/kernels/convolution/depthwise/impl_base.hpp
index b102a24..22231cf 100644
--- a/src/core/NEON/kernels/convolution/depthwise/impl_base.hpp
+++ b/src/core/NEON/kernels/convolution/depthwise/impl_base.hpp
@@ -292,6 +292,7 @@
   // Parallelise over blocks of channels
   const auto start_channel = CHANNEL_BLOCK * start;
   const auto stop_channel = std::min<unsigned int>(_n_channels, CHANNEL_BLOCK * stop);
+  const auto params_size_per_channel = this->get_packed_params_size()/_n_channels;
 
   // Compute top and bottom padding for input and output
   const int input_pad_top = _padding_top;
@@ -325,7 +326,7 @@
 
       // Get the offset into the packed parameters
       const auto params_ptr = static_cast<const uint8_t*>(_packed_parameters) +
-        start_channel*(sizeof(TIn)*KernelRows*KernelColumns + sizeof(TBias));
+        start_channel*params_size_per_channel;
 
       // Process the row
       process_tile_row(