COMPMID-2308: NEConvolutionLayer: support QUANT8_SYMM_PER_CHANNEL filters

Change-Id: Ic1bf5f0d21ccd525f84213a360f7e199d7f50577
Signed-off-by: Georgios Pinitas <georgios.pinitas@arm.com>
Reviewed-on: https://review.mlplatform.org/c/2177
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Michele Di Giorgio <michele.digiorgio@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
diff --git a/tests/validation/reference/Convolution3d.h b/tests/validation/reference/Convolution3d.h
index 30be25f..23918a4 100644
--- a/tests/validation/reference/Convolution3d.h
+++ b/tests/validation/reference/Convolution3d.h
@@ -42,13 +42,16 @@
 }
 
 // 3D convolution for floating point type
-template < typename T, typename TB, typename std::enable_if < validation::is_floating_point<T>::value &&validation::is_floating_point<TB>::value, int >::type = 0 >
-inline void convolution3d(const SimpleTensor<T> &in, const SimpleTensor<T> &weights, const SimpleTensor<TB> &bias, SimpleTensor<T> &out,
+template < typename T, typename TW, typename TB, typename std::enable_if < validation::is_floating_point<T>::value &&validation::is_floating_point<TW>::value
+                                                                           &&validation::is_floating_point<TB>::value,
+                                                                           int >::type = 0 >
+inline void convolution3d(const SimpleTensor<T> &in, const SimpleTensor<TW> &weights, const SimpleTensor<TB> &bias, SimpleTensor<T> &out,
                           int i_offset, int w_offset, int b_offset, int o_offset,
-                          int xi, int yi, int width_in, int height_in, int depth_in, int width_weights, int height_weights, int dilation_x = 1, int dilation_y = 1)
+                          int xi, int yi, int width_in, int height_in, int depth_in, int width_weights, int height_weights, int dilation_x = 1, int dilation_y = 1, int filter_id = 0)
 {
+    ARM_COMPUTE_UNUSED(filter_id);
     const T *in_ptr  = in.data() + i_offset;
-    const T *w_ptr   = weights.data() + w_offset;
+    const TW *w_ptr   = weights.data() + w_offset;
     const TB *b_ptr   = bias.data() + b_offset;
     T        *out_ptr = out.data() + o_offset;
 
@@ -77,8 +80,8 @@
                     const int idx = xk + half_width_weights_start;
                     const int idy = yk + half_height_weights_start;
 
-                    const T i_value = in_ptr[offset_slice_in + xk * dilation_x + yk * dilation_y * width_in];
-                    const T w_value = w_ptr[idx + idy * width_weights + ifm * width_weights * height_weights];
+                    const T  i_value = in_ptr[offset_slice_in + xk * dilation_x + yk * dilation_y * width_in];
+                    const TW w_value = w_ptr[idx + idy * width_weights + ifm * width_weights * height_weights];
 
                     acc += i_value * w_value;
                 }
@@ -91,13 +94,16 @@
 }
 
 // 3D convolution for QASYMM8 type
-template < typename T, typename TB, typename std::enable_if < std::is_same<T, uint8_t>::value &&std::is_same<TB, int32_t>::value, int >::type = 0 >
-inline void convolution3d(const SimpleTensor<T> &in, const SimpleTensor<T> &weights, const SimpleTensor<TB> &bias, SimpleTensor<T> &out,
+template < typename T, typename TW, typename TB, typename std::enable_if < std::is_same<T, uint8_t>::value &&(std::is_same<TW, uint8_t>::value
+                                                                                                              || std::is_same<TW, int8_t>::value)
+                                                                           &&std::is_same<TB, int32_t>::value,
+                                                                           int >::type = 0 >
+inline void convolution3d(const SimpleTensor<T> &in, const SimpleTensor<TW> &weights, const SimpleTensor<TB> &bias, SimpleTensor<T> &out,
                           int i_offset, int w_offset, int b_offset, int o_offset,
-                          int xi, int yi, int width_in, int height_in, int depth_in, int width_weights, int height_weights, int dilation_x = 1, int dilation_y = 1)
+                          int xi, int yi, int width_in, int height_in, int depth_in, int width_weights, int height_weights, int dilation_x = 1, int dilation_y = 1, int filter_id = 0)
 {
     const T *in_ptr  = in.data() + i_offset;
-    const T *w_ptr   = weights.data() + w_offset;
+    const TW *w_ptr   = weights.data() + w_offset;
     const TB *b_ptr   = bias.data() + b_offset;
     T        *out_ptr = out.data() + o_offset;
 
@@ -107,10 +113,22 @@
 
     const int   input_offset   = -iq_info.offset;
     const float input_scale    = iq_info.scale;
-    const int   weights_offset = -wq_info.offset;
-    const float weights_scale  = wq_info.scale;
-    const int   output_offset  = oq_info.offset;
-    const float output_scale   = oq_info.scale;
+    int         weights_offset = -wq_info.offset;
+    float       weights_scale  = wq_info.scale;
+    if(is_data_type_quantized_per_channel(weights.data_type()))
+    {
+        if(is_data_type_quantized_asymmetric(weights.data_type()))
+        {
+            weights_offset = weights.quantization_info().offset()[filter_id];
+        }
+        else
+        {
+            weights_offset = 0;
+        }
+        weights_scale = weights.quantization_info().scale()[filter_id];
+    }
+    const int   output_offset = oq_info.offset;
+    const float output_scale  = oq_info.scale;
 
     int         output_multiplier = 0;
     int         output_shift      = 0;
@@ -142,9 +160,8 @@
                     const int idx = xk + half_width_weights_start;
                     const int idy = yk + half_height_weights_start;
 
-                    const uint8_t i_value = in_ptr[offset_slice_in + xk * dilation_x + yk * dilation_y * width_in];
-                    const uint8_t w_value = w_ptr[idx + idy * width_weights + ifm * width_weights * height_weights];
-
+                    const int32_t i_value = in_ptr[offset_slice_in + xk * dilation_x + yk * dilation_y * width_in];
+                    const int32_t w_value = w_ptr[idx + idy * width_weights + ifm * width_weights * height_weights];
                     acc += (i_value + input_offset) * (w_value + weights_offset);
                 }
             }