IVGCVSW-5842 Remove cross-wiring in depthwise

 * Reading tensor infos won't allow a permutation vector anymore.
   The permutation only changed the quantization dimension not the
   shape and was therefore misleading
 * The permutation of the full tensor info is now performed in
   armnnUtils::Permuted
 * Changed TfLite Parser depthwise parsing function
 * Added unit tests to TfLite Parser with more random data
 * Changed TfLite Delegate depthwise parsing function
 * Added unit test to the delegate with per channel quantization

!android-nn-driver:5412

Signed-off-by: Jan Eilers <jan.eilers@arm.com>
Change-Id: I1f985ee69547bcaf16a72201e00a6b6fe1ef9a97
diff --git a/delegate/src/test/Convolution2dTest.cpp b/delegate/src/test/Convolution2dTest.cpp
index 2ce2944..6f498ce 100644
--- a/delegate/src/test/Convolution2dTest.cpp
+++ b/delegate/src/test/Convolution2dTest.cpp
@@ -166,8 +166,10 @@
                                             expectedOutputValues,
                                             biasShape,
                                             biasValues,
-                                            1, // filter scale
-                                            4, // filter offset
+                                            {1.0f}, // biasScale
+                                            {0},    // biasOffset
+                                            {1.0f}, // filterScale
+                                            {4},    // filterOffsets
                                             2, // output scale
                                             20); // output offset
 }
diff --git a/delegate/src/test/ConvolutionTestHelper.hpp b/delegate/src/test/ConvolutionTestHelper.hpp
index b2a3c88..1b33c1d 100644
--- a/delegate/src/test/ConvolutionTestHelper.hpp
+++ b/delegate/src/test/ConvolutionTestHelper.hpp
@@ -34,13 +34,16 @@
                                           const std::vector <int32_t>& outputTensorShape,
                                           const std::vector <T>& filterData,
                                           const std::vector <B>& biasData,
-                                          float filterScale = 1.0f,
-                                          int filterOffset = 0,
+                                          const std::vector<float> biasScales = {1.0f},
+                                          const std::vector<int64_t> biasOffsets = {0},
+                                          const std::vector<float> filterScales = {1.0f},
+                                          const std::vector<int64_t> filterOffsets = {0},
                                           float outputQuantScale = 2.0f,
                                           int outputQuantOffset = 0,
                                           float quantScale = 1.0f,
                                           int quantOffset = 0,
-                                          int32_t depth_multiplier = 1)
+                                          int32_t depth_multiplier = 1,
+                                          int32_t filterQuantizationDim = 0)
 {
     using namespace tflite;
     flatbuffers::FlatBufferBuilder flatBufferBuilder;
@@ -67,12 +70,23 @@
                                      0,
                                      flatBufferBuilder.CreateVector<float>({ outputQuantScale }),
                                      flatBufferBuilder.CreateVector<int64_t>({ outputQuantOffset }));
+
     auto filterQuantizationParameters =
-        CreateQuantizationParameters(flatBufferBuilder,
-                                     0,
-                                     0,
-                                     flatBufferBuilder.CreateVector<float>({ filterScale }),
-                                     flatBufferBuilder.CreateVector<int64_t>({ filterOffset }));
+            CreateQuantizationParameters(flatBufferBuilder,
+                                         0,
+                                         0,
+                                         flatBufferBuilder.CreateVector<float>(filterScales),
+                                         flatBufferBuilder.CreateVector<int64_t>(filterOffsets),
+                                         tflite::QuantizationDetails_NONE,
+                                         0,
+                                         filterQuantizationDim);
+
+    auto biasQuantizationParameters =
+            CreateQuantizationParameters(flatBufferBuilder,
+                                         0,
+                                         0,
+                                         flatBufferBuilder.CreateVector<float>(biasScales),
+                                         flatBufferBuilder.CreateVector<int64_t>(biasOffsets));
 
     std::array<flatbuffers::Offset<Tensor>, 4> tensors;
     tensors[0] = CreateTensor(flatBufferBuilder,
@@ -100,7 +114,7 @@
                               biasTensorType,
                               2,
                               flatBufferBuilder.CreateString("bias"),
-                              quantizationParameters);
+                              biasQuantizationParameters);
     tensors[3] = CreateTensor(flatBufferBuilder,
                               flatBufferBuilder.CreateVector<int32_t>(outputTensorShape.data(),
                                                                       outputTensorShape.size()),
@@ -192,13 +206,16 @@
                      std::vector<T>& expectedOutputValues,
                      const std::vector<int32_t>& biasShape = {},
                      const std::vector<B>& biasValues = {},
-                     float filterScale = 1.0f,
-                     int filterOffset = 0,
+                     const std::vector<float> biasScales = {1.0f},
+                     const std::vector<int64_t> biasOffsets = {0},
+                     const std::vector<float> filterScales = {1.0f},
+                     const std::vector<int64_t> filterOffsets = {0},
                      float outputQuantScale = 2.0f,
                      int outputQuantOffset = 0,
                      float quantScale = 1.0f,
                      int quantOffset = 0,
-                     int32_t depth_multiplier = 1)
+                     int32_t depth_multiplier = 1,
+                     int32_t filterQuantizationDim = 3)
 
 {
     using namespace tflite;
@@ -218,13 +235,16 @@
                                           outputShape,
                                           filterValues,
                                           biasValues,
-                                          filterScale,
-                                          filterOffset,
+                                          biasScales,
+                                          biasOffsets,
+                                          filterScales,
+                                          filterOffsets,
                                           outputQuantScale,
                                           outputQuantOffset,
                                           quantScale,
                                           quantOffset,
-                                          depth_multiplier);
+                                          depth_multiplier,
+                                          filterQuantizationDim);
 
 
     const Model* tfLiteModel = GetModel(modelBuffer.data());
diff --git a/delegate/src/test/DepthwiseConvolution2dTest.cpp b/delegate/src/test/DepthwiseConvolution2dTest.cpp
index 6ca4569..ca10f2c 100644
--- a/delegate/src/test/DepthwiseConvolution2dTest.cpp
+++ b/delegate/src/test/DepthwiseConvolution2dTest.cpp
@@ -70,12 +70,14 @@
                            expectedOutputValues,
                            biasShape,
                            biasValues,
-                           1.0f, // filterScale
-                           0,    // filterOffset
-                           2.0f, // outputQuantScale
-                           0,    // outputQuantOffset
-                           1.0f, // quantScale
-                           0,    // quantOffset
+                           {1.0f}, // biasScale
+                           {0},    // biasOffset
+                           {1.0f}, // filterScale
+                           {0},    // filterOffsets
+                           2.0f,   // outputQuantScale
+                           0,      // outputQuantOffset
+                           1.0f,   // quantScale
+                           0,      // quantOffset
                            depth_multiplier);
 }
 
@@ -126,6 +128,100 @@
                                       biasValues);
 }
 
+void DepthwiseConv2dSameInt8PerChannelTest(std::vector<armnn::BackendId>& backends)
+{
+    // Set input data
+    std::vector<int32_t> inputShape { 1, 4, 4, 4 };
+    std::vector<int32_t> filterShape { 1, 2, 2, 16 };
+    std::vector<int32_t> biasShape {16} ;
+    std::vector<int32_t> outputShape { 1, 4, 4, 16 };
+
+    static std::vector<int8_t> inputValues =
+        {
+            3,3,3,4, 4,4,0,0, 0,3,4,3, 0,2,2,3,
+            3,0,3,0, 0,3,2,1, 4,1,2,2, 0,0,0,4,
+            3,2,2,2, 2,1,0,4, 4,3,2,4, 3,2,0,0,
+            4,1,4,4, 1,0,4,3, 3,2,0,3, 1,1,0,2
+        };
+
+    std::vector<int8_t> filterValues = { 12,20,10, 3, 2,24, 9,10, 5,16,30,12, 3,10, 4,32,
+                                           8, 0,30, 3, 0,16,12,15,20,12, 0, 3, 9,20, 8, 8,
+                                          12,15,20, 0, 0, 0, 3,15,15, 8,40,12, 9, 5, 2,24,
+                                           4, 0, 0, 6, 6, 0, 3, 5,20, 8,20, 3, 6,15, 4, 0 };
+    std::vector<float> filterScales = {         0.25,   0.2,        0.1, 0.3333333333,
+                                                 0.5, 0.125, 0.33333333,          0.2,
+                                                 0.2,  0.25,        0.1,  0.333333333,
+                                        0.3333333333,   0.2,        0.5,        0.125 };
+
+    int32_t filterQuantizationDim = 3;
+
+    int32_t depth_multiplier = 4;
+
+    std::vector<int32_t> biasValues = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
+
+    float inputScale = 1.0f;
+    std::vector<float> biasScales {};
+    std::vector<int64_t> biasOffsets {};
+    std::vector<int64_t> filterOffsets {};
+    for (const auto& filterScale: filterScales)
+    {
+        biasScales.push_back(inputScale * filterScale);
+        // filter and bias offset always needs to be zero for per channel. We don't support anything else
+        biasOffsets.push_back(0);
+        filterOffsets.push_back(0);
+    }
+
+    std::vector<int8_t> expectedOutputValues =
+        {
+            26,21,21, 7,12,17,28,21,20,22,25,26, 6,11,10,16,
+            16,16, 4,12, 7,18,28,27,30,20,12,14,16,19,17, 6,
+            12,12, 8, 0, 3,13,18,15,18,26,20,26,26,32,28,21,
+            0, 0, 0, 0, 2, 6, 6, 4, 2, 8, 6, 8,15,10,10,24,
+            20,21, 9, 7, 3, 6,15,16,17,22,17,22,17,18,14, 7,
+            18, 6,16,12,12,11,17,15,18,18,10,12,27,26,22,18,
+            27,28,12,10, 7, 3, 8,13, 8,12,14,16,26,24,24,24,
+            9, 9, 6, 0, 0, 0, 2, 6, 0, 0, 0, 0, 4, 8, 8,16,
+            26,24,17, 7, 2, 8,11,10,30,24,30,28,32,33,30,24,
+            20,11,16,12, 7, 9,17,13,20,14,16,18,31,36,33,29,
+            28,25,19, 9, 6,13,20,19, 2, 8, 6, 8,17,17,15,25,
+            12,15, 5, 3, 2, 6, 7, 7, 0, 0, 0, 0, 6, 2, 2, 6,
+            14,16, 7, 5, 1, 3, 3, 2,20,28,12,20,13,20,20,19,
+            9, 4,10, 4, 0, 4, 8, 6, 4,16,12,16,12,18,18,15,
+            11,12, 6, 4, 2, 8,10, 7, 0, 0, 0, 0, 9,14,14,14,
+            3, 4, 1, 1, 1, 3, 3, 2, 0, 0, 0, 0, 2, 4, 4, 8
+        };
+
+    tflite::Padding padding = tflite::Padding_SAME;
+
+    ConvolutionTest<int8_t, int32_t>(tflite::BuiltinOperator_DEPTHWISE_CONV_2D,
+                                      ::tflite::TensorType_INT8,
+                                      1, // strideX
+                                      1, // strideY
+                                      1, // dilationX
+                                      1, // dilationY
+                                      padding,
+                                      tflite::ActivationFunctionType_NONE,
+                                      backends,
+                                      inputShape,
+                                      filterShape,
+                                      outputShape,
+                                      inputValues,
+                                      filterValues,
+                                      expectedOutputValues,
+                                      biasShape,
+                                      biasValues,
+                                      biasScales,
+                                      biasOffsets,
+                                      filterScales,
+                                      filterOffsets,
+                                      1.0f,
+                                      0,
+                                      inputScale,
+                                      0,
+                                      depth_multiplier,
+                                      filterQuantizationDim);
+}
+
 TEST_SUITE("DepthwiseConv2d_CpuRef_Tests")
 {
 
@@ -141,6 +237,12 @@
     DepthwiseConv2dSameUint8Test(backends);
 }
 
+TEST_CASE ("DepthwiseConv2d_Same_Int8_PerChannelQuantization_CpuRef_Test")
+{
+    std::vector<armnn::BackendId> backends = {armnn::Compute::CpuRef};
+    DepthwiseConv2dSameInt8PerChannelTest(backends);
+}
+
 }//End of TEST_SUITE("DepthwiseConv2d_CpuRef_Tests")
 
 TEST_SUITE("DepthwiseConv2d_CpuAcc_Tests")