Main Compliance: DEPTHWISE_CONV2D support

Added DEPTHWISE_CONV2D data generation.
Updated test generation for FP16 and FP32.

Signed-off-by: Jeremy Johnson <jeremy.johnson@arm.com>
Change-Id: I0471d0a1e4e279a27233f4d285082906ceea1bff
diff --git a/reference_model/src/generate/generate_dot_product.cc b/reference_model/src/generate/generate_dot_product.cc
index 67190c6..a5870c9 100644
--- a/reference_model/src/generate/generate_dot_product.cc
+++ b/reference_model/src/generate/generate_dot_product.cc
@@ -402,7 +402,7 @@
     }
 }
 //---------------------------------------------------------------------------//
-//                              Avg Pool 2D                                   //
+//                              Avg Pool 2D                                  //
 //---------------------------------------------------------------------------//
 
 template <typename DataType>
@@ -469,6 +469,139 @@
 
     return true;
 }
+//---------------------------------------------------------------------------//
+//                              Depthwise Conv2D                             //
+//---------------------------------------------------------------------------//
+
+template <typename DataType>
+bool generateDepthwiseConv2DInput(const TosaReference::GenerateConfig& cfg,
+                                  TosaReference::IDotProductGenerator& generator,
+                                  DataType* data,
+                                  size_t size)
+{
+    if (cfg.dotProductInfo.kernel.size() != 2 || cfg.dotProductInfo.kernel[0] <= 0 || cfg.dotProductInfo.kernel[1] <= 0)
+    {
+        WARNING("[Generator][DP][DWConv2D][Input] Missing or incorrect kernel size information.");
+        return false;
+    }
+    if (cfg.shape.size() != 4)
+    {
+        WARNING("[Generator][DP][DWConv2D][Input] Tensor shape expected 4 dimensions.");
+        return false;
+    }
+
+    const int64_t T   = TosaReference::numElementsFromShape(cfg.shape);
+    const uint32_t IH = cfg.shape[1];
+    const uint32_t IW = cfg.shape[2];
+    const uint32_t C  = cfg.shape[3];
+    const uint32_t KH = cfg.dotProductInfo.kernel[0];
+    const uint32_t KW = cfg.dotProductInfo.kernel[1];
+
+    for (int64_t t = 0; t < T; ++t)
+    {
+        uint32_t ix = (t / C) % IW;
+        uint32_t iy = ((t / C) / IW) % IH;
+        uint32_t k  = ((iy % KH) * KW + (ix % KW));
+
+        data[t] = static_cast<DataType>(generator(k));
+    }
+    return true;
+}
+
+template <typename DataType>
+bool generateDepthwiseConv2DWeight(const TosaReference::GenerateConfig& cfg,
+                                   TosaReference::IDotProductGenerator& generator,
+                                   DataType* data,
+                                   size_t size)
+{
+    if (cfg.shape.size() != 4)
+    {
+        WARNING("[Generator][DP][DWConv2D][Weight] Tensor shape expected 4 dimensions.");
+        return false;
+    }
+
+    const int64_t T   = TosaReference::numElementsFromShape(cfg.shape);
+    const uint32_t KH = cfg.shape[0];
+    const uint32_t KW = cfg.shape[1];
+    const uint32_t C  = cfg.shape[2];
+    const uint32_t M  = cfg.shape[3];
+
+    for (int64_t t = 0; t < T; ++t)
+    {
+        uint32_t kx = ((t / M) / C) % KW;
+        uint32_t ky = (((t / M) / C) / KW) % KH;
+        uint32_t k  = (ky * KW + kx);
+
+        data[t] = static_cast<DataType>(generator(k));
+    }
+    return true;
+}
+
+template <typename DataType>
+bool generateDepthwiseConv2DBias(const TosaReference::GenerateConfig& cfg,
+                                 TosaReference::IDotProductGenerator& generator,
+                                 DataType* data,
+                                 size_t size)
+{
+    if (cfg.shape.size() != 1)
+    {
+        WARNING("[Generator][DP][DWConv2D][Bias] Tensor shape expected 1 dimension.");
+        return false;
+    }
+
+    const uint32_t T = cfg.shape[0];
+
+    for (uint32_t t = 0; t < T; ++t)
+    {
+        data[t] = static_cast<DataType>(generator(2));
+    }
+    return true;
+}
+
+bool generateDepthwiseConv2D(const TosaReference::GenerateConfig& cfg,
+                             TosaReference::IDotProductGenerator& generator,
+                             void* data,
+                             size_t size)
+{
+    switch (cfg.dataType)
+    {
+        case DType::DType_FP32: {
+            float* outData = reinterpret_cast<float*>(data);
+            switch (cfg.inputPos)
+            {
+                case 0:
+                    return generateDepthwiseConv2DInput(cfg, generator, outData, size);
+                case 1:
+                    return generateDepthwiseConv2DWeight(cfg, generator, outData, size);
+                case 2:
+                    return generateDepthwiseConv2DBias(cfg, generator, outData, size);
+                default:
+                    WARNING("[Generator][DP][DWConv2D] Invalid input tensor slot position to operator.");
+                    return false;
+            }
+            break;
+        }
+        case DType::DType_FP16: {
+            half_float::half* outData = reinterpret_cast<half_float::half*>(data);
+            switch (cfg.inputPos)
+            {
+                case 0:
+                    return generateDepthwiseConv2DInput(cfg, generator, outData, size);
+                case 1:
+                    return generateDepthwiseConv2DWeight(cfg, generator, outData, size);
+                case 2:
+                    return generateDepthwiseConv2DBias(cfg, generator, outData, size);
+                default:
+                    WARNING("[Generator][DP][DWConv2D] Invalid input tensor slot position to operator.");
+                    return false;
+            }
+            break;
+        }
+        default:
+            WARNING("[Generator][DP][DWConv2D] Only supports FP32 or FP16.");
+            return false;
+    }
+}
 }    // namespace
 
 namespace TosaReference
@@ -501,6 +634,8 @@
             return generateFullyConnected(cfg, *generator, data, size);
         case tosa::Op_AVG_POOL2D:
             return generateAvgPool2D(cfg, *generator, data, size);
+        case tosa::Op_DEPTHWISE_CONV2D:
+            return generateDepthwiseConv2D(cfg, *generator, data, size);
         default:
             WARNING("[Generator][DP] Unsupported operator.");
             return false;
diff --git a/reference_model/src/generate/generate_utils.cc b/reference_model/src/generate/generate_utils.cc
index 917f1b1..b2208c7 100644
--- a/reference_model/src/generate/generate_utils.cc
+++ b/reference_model/src/generate/generate_utils.cc
@@ -48,6 +48,7 @@
                                  { Op::Op_CLAMP, "CLAMP" },
                                  { Op::Op_CONCAT, "CONCAT" },
                                  { Op::Op_CONV2D, "CONV2D" },
+                                 { Op::Op_DEPTHWISE_CONV2D, "DEPTHWISE_CONV2D" },
                                  { Op::Op_EQUAL, "EQUAL" },
                                  { Op::Op_ERF, "ERF" },
                                  { Op::Op_EXP, "EXP" },
diff --git a/reference_model/test/generate_tests.cpp b/reference_model/test/generate_tests.cpp
index e4a6d20..c01a223 100644
--- a/reference_model/test/generate_tests.cpp
+++ b/reference_model/test/generate_tests.cpp
@@ -1,4 +1,4 @@
-// Copyright (c) 2023, ARM Limited.
+// Copyright (c) 2023-2024, ARM Limited.
 //
 //    Licensed under the Apache License, Version 2.0 (the "License");
 //    you may not use this file except in compliance with the License.
@@ -12,6 +12,7 @@
 //    See the License for the specific language governing permissions and
 //    limitations under the License.
 #include "generate.h"
+#include "half.hpp"
 
 #include <doctest.h>
 
@@ -33,7 +34,8 @@
     }
 }
 
-void check_value(bool match, uint32_t result, uint32_t expected, uint32_t idx)
+template <typename T>
+void check_value(bool match, T result, T expected, uint32_t idx)
 {
     std::stringstream msg;
     msg << "index: " << idx << " expected: " << std::hex << expected << " got: " << result;
@@ -57,6 +59,15 @@
 }
 
 template <typename T>
+void check_output(const std::vector<T>& results, const std::vector<uint16_t>& expected)
+{
+    for (size_t idx = 0; idx < expected.size(); ++idx)
+    {
+        check_value(true, *(uint16_t*)&results[idx], expected[idx], idx);
+    }
+}
+
+template <typename T>
 void check_output(const std::vector<T>& results, const std::vector<T>& expected)
 {
     for (size_t idx = 0; idx < expected.size(); ++idx)
@@ -896,4 +907,162 @@
         }
     }
 }
+void depthwise_conv2d_test_FP16(const std::string tosaName[3],
+                                const size_t tosaElements[3],
+                                const std::string templateJsonCfg,
+                                const std::string setStr,
+                                int32_t param,
+                                const std::vector<uint16_t> expected)
+{
+    std::string jsonCfg = templateJsonCfg;
+    update_json_template(jsonCfg, "_SET_", setStr);
+
+    std::vector<half_float::half> buffer(tosaElements[param]);
+    REQUIRE(tgd_generate_data(jsonCfg.c_str(), tosaName[param].c_str(), (void*)buffer.data(), tosaElements[param] * 2));
+    check_output<half_float::half>(buffer, expected);
+}
+
+TEST_CASE("positive - FP16 depthwise_conv2d dot product (first 3 values)")
+{
+    std::string templateJsonCfg = R"({
+        "tensors" : {
+            "input" : {
+                "generator": "DOT_PRODUCT",
+                "data_type": "FP16",
+                "input_type": "VARIABLE",
+                "shape" : [1, 6, 3, 4],
+                "input_pos": 0,
+                "op" : "DEPTHWISE_CONV2D",
+                "dot_product_info": {
+                    "s": _SET_,
+                    "ks": 3,
+                    "acc_type": "FP16",
+                    "kernel": [1, 3]
+                }
+            },
+            "weight" : {
+                "generator": "DOT_PRODUCT",
+                "data_type": "FP16",
+                "input_type": "CONSTANT",
+                "shape" : [1, 3, 4, 2],
+                "input_pos": 1,
+                "op" : "DEPTHWISE_CONV2D",
+                "dot_product_info": {
+                    "s": _SET_,
+                    "ks": 3,
+                    "acc_type": "FP16"
+                }
+            },
+            "bias" : {
+                "generator": "DOT_PRODUCT",
+                "data_type": "FP16",
+                "input_type": "CONSTANT",
+                "shape" : [ 2 ],
+                "input_pos": 2,
+                "op" : "DEPTHWISE_CONV2D",
+                "dot_product_info": {
+                    "s": _SET_,
+                    "ks": 3,
+                    "acc_type": "FP16"
+                }
+            }
+
+        }
+    })";
+
+    const std::string tosaName[3] = { "input", "weight", "bias" };
+    const size_t tosaElements[3]  = { (1 * 6 * 3 * 4), (1 * 3 * 4 * 2), 2 };
+
+    SUBCASE("depthwise_conv2d, set 0, param 0")
+    {
+        std::vector<uint16_t> expected = { 0xbb33, 0xbb9b, 0x0 };
+        depthwise_conv2d_test_FP16(tosaName, tosaElements, templateJsonCfg, "0", 0, expected);
+    }
+    SUBCASE("depthwise_conv2d, set 0, param 1")
+    {
+        std::vector<uint16_t> expected = { 0x0, 0x0, 0x39a8 };
+        depthwise_conv2d_test_FP16(tosaName, tosaElements, templateJsonCfg, "0", 1, expected);
+    }
+    SUBCASE("depthwise_conv2d, set 0, param 2")
+    {
+        std::vector<uint16_t> expected = { 0x0, 0x0 };
+        depthwise_conv2d_test_FP16(tosaName, tosaElements, templateJsonCfg, "0", 2, expected);
+    }
+    SUBCASE("depthwise_conv2d, set 1, param 0")
+    {
+        std::vector<uint16_t> expected = { 0x541c, 0x5482, 0x54fb };
+        depthwise_conv2d_test_FP16(tosaName, tosaElements, templateJsonCfg, "1", 0, expected);
+    }
+    SUBCASE("depthwise_conv2d, set 1, param 1")
+    {
+        std::vector<uint16_t> expected = { 0x57ee, 0x56a2, 0x5520 };
+        depthwise_conv2d_test_FP16(tosaName, tosaElements, templateJsonCfg, "1", 1, expected);
+    }
+    SUBCASE("depthwise_conv2d, set 1, param 2")
+    {
+        std::vector<uint16_t> expected = { 0x7005, 0x7204 };
+        depthwise_conv2d_test_FP16(tosaName, tosaElements, templateJsonCfg, "1", 2, expected);
+    }
+    SUBCASE("depthwise_conv2d, set 2, param 0")
+    {
+        std::vector<uint16_t> expected = { 0x3c00, 0x3c00, 0x3c00 };
+        depthwise_conv2d_test_FP16(tosaName, tosaElements, templateJsonCfg, "2", 0, expected);
+    }
+    SUBCASE("depthwise_conv2d, set 2, param 1")
+    {
+        std::vector<uint16_t> expected = { 0x3c00, 0x3c00, 0x3c00 };
+        depthwise_conv2d_test_FP16(tosaName, tosaElements, templateJsonCfg, "2", 1, expected);
+    }
+    SUBCASE("depthwise_conv2d, set 2, param 2")
+    {
+        std::vector<uint16_t> expected = { 0x0, 0x0 };
+        depthwise_conv2d_test_FP16(tosaName, tosaElements, templateJsonCfg, "2", 2, expected);
+    }
+    SUBCASE("depthwise_conv2d, set 3, param 0")
+    {
+        std::vector<uint16_t> expected = { 0x4c00, 0x4c00, 0x4c00 };
+        depthwise_conv2d_test_FP16(tosaName, tosaElements, templateJsonCfg, "3", 0, expected);
+    }
+    SUBCASE("depthwise_conv2d, set 3, param 1")
+    {
+        std::vector<uint16_t> expected = { 0x4c00, 0x4c00, 0x4c00 };
+        depthwise_conv2d_test_FP16(tosaName, tosaElements, templateJsonCfg, "3", 1, expected);
+    }
+    SUBCASE("depthwise_conv2d, set 3, param 2")
+    {
+        std::vector<uint16_t> expected = { 0x0, 0x0 };
+        depthwise_conv2d_test_FP16(tosaName, tosaElements, templateJsonCfg, "3", 2, expected);
+    }
+    SUBCASE("depthwise_conv2d, set 4, param 0")
+    {
+        std::vector<uint16_t> expected = { 0x0, 0x0, 0x5798 };
+        depthwise_conv2d_test_FP16(tosaName, tosaElements, templateJsonCfg, "4", 0, expected);
+    }
+    SUBCASE("depthwise_conv2d, set 4, param 1")
+    {
+        std::vector<uint16_t> expected = { 0x49a3, 0xd866, 0x0 };
+        depthwise_conv2d_test_FP16(tosaName, tosaElements, templateJsonCfg, "4", 1, expected);
+    }
+    SUBCASE("depthwise_conv2d, set 4, param 2")
+    {
+        std::vector<uint16_t> expected = { 0x0, 0x0 };
+        depthwise_conv2d_test_FP16(tosaName, tosaElements, templateJsonCfg, "4", 2, expected);
+    }
+    SUBCASE("depthwise_conv2d, set 5, param 0")
+    {
+        std::vector<uint16_t> expected = { 0x4ead, 0x525d, 0x55a7 };
+        depthwise_conv2d_test_FP16(tosaName, tosaElements, templateJsonCfg, "5", 0, expected);
+    }
+    SUBCASE("depthwise_conv2d, set 5, param 1")
+    {
+        std::vector<uint16_t> expected = { 0xcf61, 0x5224, 0x550b };
+        depthwise_conv2d_test_FP16(tosaName, tosaElements, templateJsonCfg, "5", 1, expected);
+    }
+    SUBCASE("depthwise_conv2d, set 5, param 2")
+    {
+        std::vector<uint16_t> expected = { 0x0, 0x0 };
+        depthwise_conv2d_test_FP16(tosaName, tosaElements, templateJsonCfg, "5", 2, expected);
+    }
+}
+
 TEST_SUITE_END();    // generate
diff --git a/verif/conformance/tosa_main_profile_ops_info.json b/verif/conformance/tosa_main_profile_ops_info.json
index ced1d9e..c77f0be 100644
--- a/verif/conformance/tosa_main_profile_ops_info.json
+++ b/verif/conformance/tosa_main_profile_ops_info.json
@@ -747,6 +747,7 @@
         "profile": [
             "tosa-mi"
         ],
+        "support_for": [ "lazy_data_gen" ],
         "generation": {
             "standard": {
                 "negative_dim_range": "1,10",
@@ -759,20 +760,20 @@
                         "--target-dtype",
                         "bf16",
                         "--fp-values-range",
-                        "-2.0,2.0",
+                        "-max,max",
                         "--target-shape",
                         "1,17,31,4",
                         "--target-shape",
                         "1,37,11,5",
                         "--tensor-dim-range",
-                        "1,16",
+                        "1,32",
                         "--allow-pooling-and-conv-oversizes"
                     ],
                     [
                         "--target-dtype",
                         "fp32",
                         "--fp-values-range",
-                        "-2.0,2.0",
+                        "-max,max",
                         "--target-shape",
                         "1,1,65531,2",
                         "--target-shape",
diff --git a/verif/generator/tosa_arg_gen.py b/verif/generator/tosa_arg_gen.py
index 4863956..8501caa 100644
--- a/verif/generator/tosa_arg_gen.py
+++ b/verif/generator/tosa_arg_gen.py
@@ -2038,9 +2038,12 @@
 
                             # Compliance - number of dot product calculations
                             if depthwise:
-                                # TODO - add support
-                                dots = 0
+                                # N*OH*OW*C*M
+                                dots = gtu.product(
+                                    (ifm_shape[0], *outputs, *filter_shape[2:])
+                                )
                             else:
+                                # N*OH*OW*OC or N*OD*OH*OW*OC
                                 dots = gtu.product(
                                     (ifm_shape[0], *outputs, filter_shape[0])
                                 )
diff --git a/verif/generator/tosa_test_gen.py b/verif/generator/tosa_test_gen.py
index 49d9f1b..6867979 100644
--- a/verif/generator/tosa_test_gen.py
+++ b/verif/generator/tosa_test_gen.py
@@ -318,8 +318,13 @@
     def tensorComplianceMetaData(
         self, op, inputType, argsDict, outputTensor, errorName
     ):
-        # TODO - Dot product Ops with FP16 or BF16 inputs that produce FP32 outputs are not supported yet
-        UNSUPPORTED_NON_FP32_INPUT_OPS = (Op.MATMUL, Op.CONV2D, Op.FULLY_CONNECTED)
+        # TODO - Dot product Ops with BF16 inputs that produce FP32 outputs are not supported yet
+        UNSUPPORTED_NON_FP32_INPUT_OPS = (
+            Op.MATMUL,
+            Op.CONV2D,
+            Op.FULLY_CONNECTED,
+            Op.DEPTHWISE_CONV2D,
+        )
         if (
             errorName
             or not gtu.dtypeIsSupportedByCompliance(outputTensor.dtype)
@@ -1063,7 +1068,7 @@
         padding = args_dict["pad"]
         dilations = args_dict["dilation"]
 
-        result_tens = OutputShaper.depthwiseConv2dOp(
+        result_tensor = OutputShaper.depthwiseConv2dOp(
             self.ser,
             self.rng,
             ifm,
@@ -1082,12 +1087,12 @@
         ):
             qinfo = [
                 TosaQuantGen.getZeroPoint(self, ifm.dtype),
-                TosaQuantGen.getZeroPoint(self, result_tens.dtype),
+                TosaQuantGen.getZeroPoint(self, result_tensor.dtype),
             ]
 
         # Invalidate Input/Output list for error_if checks.
         input_list = [ifm.name, filter.name, bias.name]
-        output_list = [result_tens.name]
+        output_list = [result_tensor.name]
         num_operands = sum(op["operands"])
         input_list, output_list = TosaErrorIfArgGen.eiInvalidateInputOutputList(
             self, error_name, input_list, output_list
@@ -1100,7 +1105,7 @@
             op=op,
             input_dtype=ifm.dtype,
             weight_dtype=filter.dtype,
-            output_dtype=result_tens.dtype,
+            output_dtype=result_tensor.dtype,
             qinfo=qinfo,
             input_list=input_list,
             num_operands=num_operands,
@@ -1110,7 +1115,7 @@
             dilation=dilations,
             input_shape=ifm.shape,
             weight_shape=filter.shape,
-            output_shape=result_tens.shape,
+            output_shape=result_tensor.shape,
         ):
             return None
 
@@ -1121,7 +1126,12 @@
         attr.ConvAttribute(padding, strides, dilations, qinfo[0], qinfo[1], local_bound)
 
         self.ser.addOperator(op["op"], input_list, output_list, attr)
-        return result_tens
+
+        compliance = self.tensorComplianceMetaData(
+            op, ifm.dtype, args_dict, result_tensor, error_name
+        )
+
+        return TosaTestGen.BuildInfo(result_tensor, compliance)
 
     def build_fully_connected(
         self,
@@ -3206,6 +3216,9 @@
                 TosaErrorValidator.evConvOutputShapeMismatch,
                 TosaErrorValidator.evConvOutputShapeNonInteger,
             ),
+            "data_gen": {
+                "fp": (gtu.DataGenType.DOT_PRODUCT,),
+            },
             "template": True,
         },
         "fully_connected": {