8K levels: Tensor op tests kernel/stride at 8192 maximums

Operators updated: AVG_POOL2D, MAX_POOL2D, CONV2D, CONV3D,
 DEPTHWISE_CONV2D & TRANSPOSE_CONV2D
tosa_verif_build_tests argument --level-8k-sizes used to
allow kernel/stride maximum boundary testing

Fixed bugs in height/width validator function meaning some
esixting avg_pool2d float tests need regening.

Signed-off-by: Jeremy Johnson <jeremy.johnson@arm.com>
Change-Id: I7aeab82d3bd3c49d02d54708f2c9d995cd3cf2df
diff --git a/verif/conformance/tosa_base_profile_ops_info.json b/verif/conformance/tosa_base_profile_ops_info.json
index e40ddfc..b8b80ab 100644
--- a/verif/conformance/tosa_base_profile_ops_info.json
+++ b/verif/conformance/tosa_base_profile_ops_info.json
@@ -297,6 +297,20 @@
                         "--allow-pooling-and-conv-oversizes"
                     ]
                 ]
+            },
+            "8k_level": {
+                "no_negative_tests": "true",
+                "selector": "8k_level",
+                "generator_args": [
+                    [
+                        "--target-dtype",
+                        "int8",
+                        "--target-dtype",
+                        "int16",
+                        "--tensor-dim-range",
+                        "3,10",
+                        "--level-8k-sizes"                    ]
+                ]
             }
         },
         "selection": {
@@ -316,6 +330,9 @@
                     "type",
                     "pad"
                 ]
+            },
+            "8k_level": {
+                "all": "true"
             }
         }
     },
@@ -1079,6 +1096,21 @@
                         "--allow-pooling-and-conv-oversizes"
                     ]
                 ]
+            },
+            "8k_level": {
+                "no_negative_tests": "true",
+                "selector": "8k_level",
+                "generator_args": [
+                    [
+                        "--target-dtype",
+                        "int8",
+                        "--target-dtype",
+                        "int16",
+                        "--tensor-dim-range",
+                        "2,5",
+                        "--level-8k-sizes"
+                    ]
+                ]
             }
         },
         "selection": {
@@ -1101,6 +1133,9 @@
                     "type",
                     "pad"
                 ]
+            },
+            "8k_level": {
+                "all": "true"
             }
         }
     },
@@ -1140,6 +1175,21 @@
                         "--allow-pooling-and-conv-oversizes"
                     ]
                 ]
+            },
+            "8k_level": {
+                "no_negative_tests": "true",
+                "selector": "8k_level",
+                "generator_args": [
+                    [
+                        "--target-dtype",
+                        "int8",
+                        "--target-dtype",
+                        "int16",
+                        "--tensor-dim-range",
+                        "2,5",
+                        "--level-8k-sizes"
+                    ]
+                ]
             }
         },
         "selection": {
@@ -1163,6 +1213,9 @@
                     "pad",
                     "stride"
                 ]
+            },
+            "8k_level": {
+                "all": "true"
             }
         }
     },
@@ -1202,6 +1255,21 @@
                         "--allow-pooling-and-conv-oversizes"
                     ]
                 ]
+            },
+            "8k_level": {
+                "no_negative_tests": "true",
+                "selector": "8k_level",
+                "generator_args": [
+                    [
+                        "--target-dtype",
+                        "int8",
+                        "--target-dtype",
+                        "int16",
+                        "--tensor-dim-range",
+                        "2,5",
+                        "--level-8k-sizes"
+                    ]
+                ]
             }
         },
         "selection": {
@@ -1224,6 +1292,9 @@
                     "type",
                     "pad"
                 ]
+            },
+            "8k_level": {
+                "all": "true"
             }
         }
     },
@@ -2207,6 +2278,21 @@
                         "--allow-pooling-and-conv-oversizes"
                     ]
                 ]
+            },
+            "8k_level": {
+                "no_negative_tests": "true",
+                "selector": "8k_level",
+                "generator_args": [
+                    [
+                        "--target-dtype",
+                        "int8",
+                        "--target-dtype",
+                        "int16",
+                        "--tensor-dim-range",
+                        "3,10",
+                        "--level-8k-sizes"
+                    ]
+                ]
             }
         },
         "selection": {
@@ -2226,6 +2312,9 @@
                     "type",
                     "pad"
                 ]
+            },
+            "8k_level": {
+                "all": "true"
             }
         }
     },
@@ -3588,6 +3677,21 @@
                         "1"
                     ]
                 ]
+            },
+            "8k_level": {
+                "no_negative_tests": "true",
+                "selector": "8k_level",
+                "generator_args": [
+                    [
+                        "--target-dtype",
+                        "int8",
+                        "--target-dtype",
+                        "int16",
+                        "--tensor-dim-range",
+                        "2,5",
+                        "--level-8k-sizes"
+                    ]
+                ]
             }
         },
         "selection": {
@@ -3610,6 +3714,9 @@
                     "stride",
                     "pad"
                 ]
+            },
+            "8k_level": {
+                "all": "true"
             }
         }
     },
diff --git a/verif/conformance/tosa_verif_conformance_generator.py b/verif/conformance/tosa_verif_conformance_generator.py
index 2d9dad3..ef6bfb9 100644
--- a/verif/conformance/tosa_verif_conformance_generator.py
+++ b/verif/conformance/tosa_verif_conformance_generator.py
@@ -755,7 +755,29 @@
                             gen_neg_dim_range,
                         )
 
-                        if args.convert_all_tests:
+                        # Work out which selection criteria we are using
+                        if "selector" in gen_dict:
+                            selector_name = gen_dict["selector"]
+                            if selector_name not in test_params[op]["selection"]:
+                                logger.warn(
+                                    f"Could not find {selector_name} in selection dict for {op} - using default"
+                                )
+                                selector_name = "default"
+                        else:
+                            selector_name = "default"
+                        if selector_name not in test_params[op]["selection"]:
+                            logger.error(
+                                f"Could not find {selector_name} in selection dict for {op}"
+                            )
+                            raise (GenConformanceError())
+
+                        # Selection criteria
+                        selection_config = test_params[op]["selection"][selector_name]
+
+                        if args.convert_all_tests or (
+                            "all" in selection_config
+                            and selection_config["all"] == "true"
+                        ):
                             logger.debug(f"Running and converting all {op} tests")
                             generate_results(args, profile, op, op_build_dir)
                             operator_test_list = None
@@ -763,26 +785,6 @@
                             logger.debug(
                                 f"Running and converting selection of {op} tests"
                             )
-                            # Work out which selection criteria we are using
-                            if "selector" in gen_dict:
-                                selector_name = gen_dict["selector"]
-                                if selector_name not in test_params[op]["selection"]:
-                                    logger.warn(
-                                        f"Could not find {selector_name} in selection dict for {op} - using default"
-                                    )
-                                    selector_name = "default"
-                            else:
-                                selector_name = "default"
-                            if selector_name not in test_params[op]["selection"]:
-                                logger.error(
-                                    f"Could not find {selector_name} in selection dict for {op}"
-                                )
-                                raise (GenConformanceError())
-
-                            # Selection criteria
-                            selection_config = test_params[op]["selection"][
-                                selector_name
-                            ]
                             if test_type in ["positive", "both"]:
                                 tests_gen, tests_gen2 = tee(
                                     get_op_tests_selection(
diff --git a/verif/generator/tosa_arg_gen.py b/verif/generator/tosa_arg_gen.py
index b027d34..2bbc349 100644
--- a/verif/generator/tosa_arg_gen.py
+++ b/verif/generator/tosa_arg_gen.py
@@ -1056,66 +1056,129 @@
 
     @staticmethod
     def agConv(testGen, opName, shapeList, dtypes, error_name=None):
+        # Used by CONV2D, CONV3D and DEPTHWISE_CONV2D
         arg_list = []
 
+        if testGen.args.level8k and error_name is not None:
+            # Don't produce negative large tests
+            return arg_list
+
+        # Shape: Batches, (Depth), Height, Width, Channels
         ifm_shape = shapeList[0]
+        # Shape: (OFM channels), (KD), KH, KW, IFM channels
         filter_shape = shapeList[1]
-        # determine the kernel shape from operator name (e.g. "conv2d_3x3" => [3,3])
-        k = [int(x) for x in opName.split("_")[-1].split("x")]
 
         accum_dtype = get_accum_dtype_from_tgTypes(dtypes)
 
         # Check the rank
-        rank = 5 if opName.startswith("conv3d") else 4
+        conv3d = opName.startswith("conv3d")
+        rank = 5 if conv3d else 4
         if error_name != ErrorIf.WrongRank:
             assert len(ifm_shape) == rank
             assert len(filter_shape) == rank
 
-        # kernel rank omits batch and channels
+        # kernel rank omits channels
         k_rank = rank - 2
-        assert len(k) == k_rank
+        k_pos = 0 if opName.startswith("depthwise") else 1
+        k_shape = tuple(filter_shape[k_pos : (k_pos + k_rank)])
 
-        # Generate comprehensive argument lists
-        # - except for named errors, which use specific invalid value(s)
-        if error_name == ErrorIf.PadSmallerZero:
-            p_vals = [testGen.rng.choice(range(-5, 0))]
-        else:
-            p_vals = [x for x in range(0, testGen.args.max_conv_padding + 1)]
-        paddings = {x for x in itertools.product(*([p_vals] * k_rank * 2))}
-        if error_name == ErrorIf.StrideSmallerOne:
-            # Can't use stride=0, as it is used to derive output shape, as a divisor
-            s_vals = [testGen.rng.choice(range(-5, 0))]
-        else:
-            # Stride must be greater than 1 to force non-integer error
-            startStride = 1 if error_name != ErrorIf.ConvOutputShapeNonInteger else 2
-            s_vals = [x for x in range(startStride, testGen.args.max_conv_stride + 1)]
-        strides = {x for x in itertools.product(*([s_vals] * k_rank))}
-        if error_name == ErrorIf.DilationSmallerOne:
-            d_vals = [testGen.rng.choice(range(-5, 1))]
-        else:
-            d_vals = [x for x in range(1, testGen.args.max_conv_dilation + 1)]
-        dilations = {x for x in itertools.product(*([d_vals] * k_rank))}
-
-        if not error_name and testGen.args.oversize:
-            # add some oversize argument values
-            if max(ifm_shape) < 64:
-                bigPadding = 9
-                paddings.update(
-                    {x for x in itertools.product(*([[0, bigPadding]] * (k_rank * 2)))}
+        if not testGen.args.level8k:
+            # Generate comprehensive argument lists
+            # - except for named errors, which use specific invalid value(s)
+            if error_name == ErrorIf.PadSmallerZero:
+                p_vals = [testGen.rng.choice(range(-5, 0))]
+            else:
+                p_vals = [x for x in range(0, testGen.args.max_conv_padding + 1)]
+            paddings = {x for x in itertools.product(*([p_vals] * k_rank * 2))}
+            if error_name == ErrorIf.StrideSmallerOne:
+                # Can't use stride=0, as it is used to derive output shape, as a divisor
+                s_vals = [testGen.rng.choice(range(-5, 0))]
+            else:
+                # Stride must be greater than 1 to force non-integer error
+                startStride = (
+                    1 if error_name != ErrorIf.ConvOutputShapeNonInteger else 2
                 )
-            bigStride = 8
-            strides.update({x for x in itertools.product(*([[1, bigStride]] * k_rank))})
-            bigDilation = 7
-            dilations.update(
-                {x for x in itertools.product(*([[1, bigDilation]] * k_rank))}
-            )
+                s_vals = [
+                    x for x in range(startStride, testGen.args.max_conv_stride + 1)
+                ]
+            strides = {x for x in itertools.product(*([s_vals] * k_rank))}
+            if error_name == ErrorIf.DilationSmallerOne:
+                d_vals = [testGen.rng.choice(range(-5, 1))]
+            else:
+                d_vals = [x for x in range(1, testGen.args.max_conv_dilation + 1)]
+            dilations = {x for x in itertools.product(*([d_vals] * k_rank))}
 
-        # There are too many parameter combinations, so generate them sparsely,
-        # very sparse for negative tests
-        sparsity_factor = 2 if error_name else 120
-        sparsity = TosaArgGen._calculate_sparsity(
-            len(paddings) * len(strides) * len(dilations), sparsity_factor
-        )
+            if not error_name and testGen.args.oversize:
+                # add some oversize argument values
+                if max(ifm_shape) < 64:
+                    bigPadding = 9
+                    paddings.update(
+                        {
+                            x
+                            for x in itertools.product(
+                                *([[0, bigPadding]] * (k_rank * 2))
+                            )
+                        }
+                    )
+                bigStride = 8
+                strides.update(
+                    {x for x in itertools.product(*([[1, bigStride]] * k_rank))}
+                )
+                bigDilation = 7
+                dilations.update(
+                    {x for x in itertools.product(*([[1, bigDilation]] * k_rank))}
+                )
+            max_dim_size = None
+
+            # There are too many parameter combinations, so generate them sparsely,
+            # very sparse for negative tests
+            sparsity_factor = 2 if error_name else 120
+            sparsity = TosaArgGen._calculate_sparsity(
+                len(paddings) * len(strides) * len(dilations), sparsity_factor
+            )
+        else:
+            # Only test 8k levels boundaries
+            bigStride = testGen.TOSA_8K_LEVEL_MAX_STRIDE
+            bigKernel = testGen.TOSA_8K_LEVEL_MAX_KERNEL
+            bigPadding = bigKernel
+
+            dilation_shape = [1] * k_rank
+            pad_shape = [0] * k_rank * 2
+            if conv3d:
+                # Small stride apart from for big kernel (see below) to keep
+                # tensor size/calculation small
+                stride_shape = [1] * k_rank
+                for idx in range(k_rank):
+                    pad_offset = idx * 2
+                    if k_shape[idx] == bigKernel:
+                        # Padding shape needs to account for tensor shape
+                        pad_shape[pad_offset] = bigPadding - ifm_shape[idx + 1]
+                        pad_shape[pad_offset + 1] = bigPadding - dilation_shape[idx] + 1
+                        # Big stride to reduce output size
+                        stride_shape[idx] = bigKernel
+                    else:
+                        # Account for kernel size
+                        pad_shape[pad_offset] = k_shape[idx] - 1
+            else:
+                # Always have a large stride with extra padding and dilation to keep
+                # tensor calculation reasonable
+                stride_shape = [bigKernel] * k_rank
+                for idx in range(k_rank):
+                    # Dilation shape must account for kernel size
+                    dilation_shape[idx] = bigKernel // k_shape[idx]
+                    # Padding shape needs to accommodate tensor/kernel & dilation
+                    pad_offset = idx * 2
+                    pad_shape[pad_offset] = bigPadding - ifm_shape[idx + 1]
+                    pad_shape[pad_offset + 1] = bigPadding - dilation_shape[idx] + 1
+
+            strides = {tuple(stride_shape)}
+            dilations = {tuple(dilation_shape)}
+            paddings = {tuple(pad_shape)}
+            # Create a limit for the output dimensions size
+            max_dim_size = testGen.TOSA_8K_LEVEL_MAX_KERNEL
+
+            # Currently allow all combinations that are reasonable size
+            sparsity = 1
 
         n = 0
         for s in sorted(list(strides)):
@@ -1125,26 +1188,30 @@
                         n % sparsity == 0
                         # the padded shape must exceed the dilation * kernel to get a positive
                         # sized output shape
-                        and (ifm_shape[1] - 1 + p[0] + p[1]) > d[0] * (k[0] - 1)
-                        and (ifm_shape[2] - 1 + p[2] + p[3]) > d[1] * (k[1] - 1)
+                        and (ifm_shape[1] - 1 + p[0] + p[1]) > d[0] * (k_shape[0] - 1)
+                        and (ifm_shape[2] - 1 + p[2] + p[3]) > d[1] * (k_shape[1] - 1)
                         and (
                             k_rank < 3
-                            or ((ifm_shape[3] - 1 + p[4] + p[5]) > d[2] * (k[2] - 1))
+                            or (
+                                (ifm_shape[3] - 1 + p[4] + p[5])
+                                > d[2] * (k_shape[2] - 1)
+                            )
                         )
                     ):
                         remainders = []
+                        outputs = []
                         for index in range(k_rank):
                             pad_offset = index * 2
-                            remainders.append(
-                                (
-                                    ifm_shape[index + 1]
-                                    - 1
-                                    + p[pad_offset]
-                                    + p[pad_offset + 1]
-                                    - (k[index] - 1) * d[index]
-                                )
-                                % s[index]
+                            partial = (
+                                ifm_shape[index + 1]
+                                - 1
+                                + p[pad_offset]
+                                + p[pad_offset + 1]
+                                - (k_shape[index] - 1) * d[index]
                             )
+                            remainders.append(partial % s[index])
+                            outputs.append((partial // s[index]) + 1)
+
                         if (
                             # the parameters must produce integer exact output
                             error_name != ErrorIf.ConvOutputShapeNonInteger
@@ -1153,13 +1220,22 @@
                             error_name == ErrorIf.ConvOutputShapeNonInteger
                             and max(remainders) > 0
                         ):
+                            if (
+                                max_dim_size is not None
+                                and max(outputs) >= max_dim_size
+                            ):
+                                # Test will consume too much memory - skip it
+                                continue
+
+                            # Support for larger values than 9 needs different delimiter
+                            delim = "" if max(s + p + d) <= 9 else "x"
                             arg_list.append(
                                 (
                                     "acc{}_st{}_pad{}_dilat{}".format(
                                         testGen.typeStr(accum_dtype),
-                                        "".join([str(x) for x in s]),
-                                        "".join([str(x) for x in p]),
-                                        "".join([str(x) for x in d]),
+                                        delim.join([str(x) for x in s]),
+                                        delim.join([str(x) for x in p]),
+                                        delim.join([str(x) for x in d]),
                                     ),
                                     [accum_dtype, s, p, d],
                                 )
@@ -1215,6 +1291,10 @@
     def agTransposeConv2D(testGen, opName, shapeList, dtypes, error_name=None):
         arg_list = []
 
+        if testGen.args.level8k and error_name is not None:
+            # Don't produce negative large tests
+            return arg_list
+
         ifm_shape = shapeList[0]
         filter_shape = shapeList[1]
 
@@ -1225,66 +1305,112 @@
             assert len(ifm_shape) == 4
             assert len(filter_shape) == 4
 
-        # Generate comprehensive argument lists
-        # - except for named errors, which use specific invalid value(s)
-        smallest_padding_size = -min(filter_shape[1], filter_shape[2]) + 1
-        if error_name == ErrorIf.PadLargerEqualKernel:
-            max_filter_size = -max(filter_shape[1], filter_shape[2])
-            p_vals = [testGen.rng.choice(range(max_filter_size - 10, max_filter_size))]
-        else:
-            p_vals = [
-                x
-                for x in range(smallest_padding_size, testGen.args.max_conv_padding + 1)
-            ]
-        paddings = {x for x in itertools.product(*([p_vals] * 4))}
-        if error_name == ErrorIf.StrideSmallerOne:
-            # Can't use stride=0, as it is used to derive output shape, as a divisor
-            s_vals = [testGen.rng.choice(range(-5, 0))]
-        else:
-            s_vals = [x for x in range(1, testGen.args.max_conv_stride + 1)]
-        strides = {x for x in itertools.product(*([s_vals] * 2))}
+        k_shape = tuple(filter_shape[1:3])
 
-        if not error_name and testGen.args.oversize:
-            # add some oversize argument values
-            if max(ifm_shape) < 64:
-                bigPadding = 9
-                paddings.update(
-                    {
-                        x
-                        for x in itertools.product(
-                            *([[smallest_padding_size, bigPadding]] * 4)
-                        )
-                    }
-                )
-            bigStride = 8
-            strides.update({x for x in itertools.product(*([[1, bigStride]] * 2))})
+        if not testGen.args.level8k:
+            # Generate comprehensive argument lists
+            # - except for named errors, which use specific invalid value(s)
+            smallest_padding_size = -min(k_shape[0], k_shape[1]) + 1
+            if error_name == ErrorIf.PadLargerEqualKernel:
+                max_filter_size = -max(k_shape[0], k_shape[1])
+                p_vals = [
+                    testGen.rng.choice(range(max_filter_size - 10, max_filter_size))
+                ]
+            else:
+                p_vals = [
+                    x
+                    for x in range(
+                        smallest_padding_size, testGen.args.max_conv_padding + 1
+                    )
+                ]
+            paddings = {x for x in itertools.product(*([p_vals] * 4))}
+            if error_name == ErrorIf.StrideSmallerOne:
+                # Can't use stride=0, as it is used to derive output shape, as a divisor
+                s_vals = [testGen.rng.choice(range(-5, 0))]
+            else:
+                s_vals = [x for x in range(1, testGen.args.max_conv_stride + 1)]
+            strides = {x for x in itertools.product(*([s_vals] * 2))}
 
-        # There are too many parameter combinations, so generate them sparsely,
-        # very sparse for negative tests
-        sparsity_factor = 2 if error_name else 10
-        sparsity = len(paddings) * len(strides) // sparsity_factor + 1
-        # If there are only a small number of tests, just select them all
-        if sparsity < 13:
+            if not error_name and testGen.args.oversize:
+                # add some oversize argument values
+                if max(ifm_shape) < 64:
+                    bigPadding = 9
+                    paddings.update(
+                        {
+                            x
+                            for x in itertools.product(
+                                *([[smallest_padding_size, bigPadding]] * 4)
+                            )
+                        }
+                    )
+                bigStride = 8
+                strides.update({x for x in itertools.product(*([[1, bigStride]] * 2))})
+
+            # There are too many parameter combinations, so generate them sparsely,
+            # very sparse for negative tests
+            sparsity_factor = 2 if error_name else 10
+            sparsity = len(paddings) * len(strides) // sparsity_factor + 1
+            # If there are only a small number of tests, just select them all
+            if sparsity < 13:
+                sparsity = 1
+            # To get a variety of parameter combinations sparsity should not be a
+            # multiple of 2, 3 or 5
+            while sparsity % 2 == 0 or sparsity % 3 == 0 or sparsity % 5 == 0:
+                sparsity += 1
+        else:
+            # Only test 8k levels boundaries
+            bigStride = testGen.TOSA_8K_LEVEL_MAX_STRIDE
+            bigKernel = testGen.TOSA_8K_LEVEL_MAX_KERNEL
+            bigPadding = bigKernel
+
+            pad_shape = [0] * (len(k_shape) * 2)
+            stride_shape = [1] * len(k_shape)
+            # The point at which input dimension combined with the stride will
+            # create large output sizes!
+            LARGE_SIZE = 2
+            for idx in range(len(k_shape)):
+                pad_offset = idx * 2
+                if k_shape[idx] == bigKernel:
+                    # Set large stride
+                    stride_shape[idx] = bigKernel
+                    # Use negative output padding to reduce shape size
+                    pad_shape[pad_offset] = -(bigPadding - 1)
+                    if ifm_shape[idx + 1] > LARGE_SIZE:
+                        pad_shape[pad_offset + 1] = -(bigPadding - 1)
+                else:
+                    # The other dimension should be the bigKernel
+                    alt_idx = 1 - idx
+                    if (
+                        k_shape[alt_idx] == bigKernel
+                        and ifm_shape[alt_idx + 1] < LARGE_SIZE
+                    ):
+                        # As the input is small, the large stride won't
+                        # affect the output so we can add some padding
+                        pad_shape[pad_offset + 1] = bigPadding
+
+            strides = {tuple(stride_shape)}
+            paddings = {tuple(pad_shape)}
+
+            # Currently allow all combinations that are reasonable size
             sparsity = 1
-        # To get a variety of parameter combinations sparsity should not be a
-        # multiple of 2, 3 or 5
-        while sparsity % 2 == 0 or sparsity % 3 == 0 or sparsity % 5 == 0:
-            sparsity += 1
 
         n = 0
         for s in sorted(list(strides)):
             for p in sorted(list(paddings)):
                 if n % sparsity == 0:
                     # Determine the output shape
-                    oh = (ifm_shape[1] - 1) * s[0] + p[0] + p[1] + filter_shape[1]
-                    ow = (ifm_shape[2] - 1) * s[1] + p[2] + p[3] + filter_shape[2]
+                    oh = (ifm_shape[1] - 1) * s[0] + p[0] + p[1] + k_shape[0]
+                    ow = (ifm_shape[2] - 1) * s[1] + p[2] + p[3] + k_shape[1]
                     os = [ifm_shape[0], oh, ow, filter_shape[0]]
+
+                    # Support for larger values than 9 needs different delimiter
+                    delim = "" if max(s + p) <= 9 else "x"
                     arg_list.append(
                         (
                             "acc{}_st{}_pad{}_os{}".format(
                                 testGen.typeStr(accum_dtype),
-                                "".join([str(x) for x in s]),
-                                "".join([str(x) for x in p]),
+                                delim.join([str(x) for x in s]),
+                                delim.join([str(x) for x in p]),
                                 "x".join([str(x) for x in os]),
                             ),
                             [accum_dtype, s, p, os],
@@ -1364,15 +1490,49 @@
         if error_name != ErrorIf.WrongRank:
             assert len(shape) == 4
 
-        # Generate comprehensive argument lists
-        p_vals = [x for x in range(0, testGen.args.max_pooling_padding + 1)]
-        paddings = {x for x in itertools.product(*([p_vals] * 4))}
-        # Stride must be greater than 1 to force non-integer error
+        test_level8k = testGen.args.level8k and error_name is None
+
         startStride = 1 if error_name != ErrorIf.PoolingOutputShapeNonInteger else 2
-        s_vals = [x for x in range(startStride, testGen.args.max_pooling_stride + 1)]
-        strides = {x for x in itertools.product(*([s_vals] * 2))}
-        k_vals = [x for x in range(2, testGen.args.max_pooling_kernel + 1)]
-        kernels = {x for x in itertools.product(*([k_vals] * 2))}
+        startKernel = 2
+        startPad = 0
+        if not test_level8k:
+            # Generate comprehensive argument lists
+            p_vals = [x for x in range(startPad, testGen.args.max_pooling_padding + 1)]
+            paddings = {x for x in itertools.product(*([p_vals] * 4))}
+            # Stride must be greater than 1 to force non-integer error
+            s_vals = [
+                x for x in range(startStride, testGen.args.max_pooling_stride + 1)
+            ]
+            strides = {x for x in itertools.product(*([s_vals] * 2))}
+            k_vals = [
+                x for x in range(startKernel, testGen.args.max_pooling_kernel + 1)
+            ]
+            kernels = {x for x in itertools.product(*([k_vals] * 2))}
+            max_dim_size = None
+        else:
+            # Only test 8k levels
+            bigStride = testGen.TOSA_8K_LEVEL_MAX_STRIDE
+            bigKernel = testGen.TOSA_8K_LEVEL_MAX_KERNEL
+            strides = {(1, bigStride), (bigStride, 4)}
+            kernels = {(1, bigKernel), (bigKernel, 3)}
+            paddings = set()
+            for s in sorted(list(strides)):
+                for k in sorted(list(kernels)):
+                    padding = []
+                    for idx in range(len(k)):
+                        total_padding = s[idx] - shape[idx + 1] + k[idx]
+                        while total_padding < 0:
+                            # Must meet: shape + padding > kernel
+                            total_padding += s[idx]
+                        if total_padding < k[idx]:
+                            padding.extend([0, total_padding])
+                        else:
+                            # Note this may produce padding >= k[idx] which is not
+                            # allowed - but will be ignored in the creation loop below
+                            padding.extend([k[idx] - 1, total_padding - (k[idx] - 1)])
+                    paddings.add(tuple(padding))
+            # Create a limit for the output dimensions size
+            max_dim_size = testGen.TOSA_8K_LEVEL_MAX_KERNEL
 
         if opName == "max_pool2d":
             accum_dtypes = [None]  # max_pool has no accumulate dtype
@@ -1389,25 +1549,33 @@
             # incorrect input data-type
             accum_dtypes = [DType.INT32]
 
-        if testGen.args.oversize:
-            # add some oversize argument values
-            bigStride = 7
-            strides.update(
-                {x for x in itertools.product(*([[startStride, bigStride]] * 2))}
-            )
-            bigKernel = 9
-            kernels.update({x for x in itertools.product(*([[2, bigKernel]] * 2))})
-            if max(shape) < 64:
-                # padding must be less than the kernel size
-                bigPadding = bigKernel - 1
-                paddings.update(
-                    {x for x in itertools.product(*([[0, bigPadding]] * 4))}
+        if not test_level8k:
+            if testGen.args.oversize:
+                # add some oversize argument values
+                bigStride = 7
+                bigKernel = 9
+                strides.update(
+                    {x for x in itertools.product(*([[startStride, bigStride]] * 2))}
                 )
+                kernels.update(
+                    {x for x in itertools.product(*([[startKernel, bigKernel]] * 2))}
+                )
+                if max(shape) < 64:
+                    # padding must be less than the kernel size
+                    bigPadding = bigKernel - 1
+                    paddings.update(
+                        {x for x in itertools.product(*([[startPad, bigPadding]] * 4))}
+                    )
 
-        # There are too many parameter combinations, so generate them sparsely,
-        # very sparse for negative tests
-        sparsity_factor = 2 if error_name else 500
-        sparsity = len(paddings) * len(strides) * len(kernels) // sparsity_factor + 1
+            # There are too many parameter combinations, so generate them sparsely,
+            # very sparse for negative tests
+            sparsity_factor = 2 if error_name else 500
+            sparsity = (
+                len(paddings) * len(strides) * len(kernels) // sparsity_factor + 1
+            )
+        else:
+            # We have already limited test output combinations for 8k tests
+            sparsity = 1
 
         arg_str = (
             "acc{}_st{}_kern{}_pad{}"
@@ -1418,10 +1586,13 @@
         def get_arg_list_element(accum, stride, pad, kern):
             # Return tuple containing the formatted argument string and
             # the corresponding argument values
+
+            # Support for larger values than 9 needs different delimiter
+            delim = "" if max(stride + kern + pad) <= 9 else "x"
             arg_str_elems = [
-                "".join([str(x) for x in stride]),
-                "".join([str(x) for x in kern]),
-                "".join([str(x) for x in pad]),
+                delim.join([str(x) for x in stride]),
+                delim.join([str(x) for x in kern]),
+                delim.join([str(x) for x in pad]),
             ]
             # Note: different order to string
             arg_val_elems = [stride, pad, kern]
@@ -1459,8 +1630,13 @@
                             and (shape[1] + p[0] + p[1]) > k[0]
                             and (shape[2] + p[2] + p[3]) > k[1]
                         ):
-                            remainder_h = (shape[1] + p[0] + p[1] - k[0]) % s[0]
-                            remainder_w = (shape[2] + p[2] + p[3] - k[1]) % s[1]
+                            partial_h = shape[1] + p[0] + p[1] - k[0]
+                            partial_w = shape[2] + p[2] + p[3] - k[1]
+                            remainder_h = partial_h % s[0]
+                            remainder_w = partial_w % s[1]
+                            output_h = partial_h // s[0] + 1
+                            output_w = partial_w // s[1] + 1
+                            # debug print(shape, remainder_h, remainder_w, "/", output_h, output_w)
                             if (
                                 # the parameters must produce integer exact output
                                 error_name != ErrorIf.PoolingOutputShapeNonInteger
@@ -1470,6 +1646,12 @@
                                 error_name == ErrorIf.PoolingOutputShapeNonInteger
                                 and (remainder_h != 0 or remainder_w != 0)
                             ):
+                                if (
+                                    max_dim_size is not None
+                                    and max(output_h, output_w) > max_dim_size
+                                ):
+                                    # Test will consume too much memory - skip it
+                                    continue
                                 arg_vals = [a, s, p, k]
                                 arg_list.append(get_arg_list_element(*arg_vals))
                         n += 1
diff --git a/verif/generator/tosa_error_if.py b/verif/generator/tosa_error_if.py
index b19d5e9..8c40371 100644
--- a/verif/generator/tosa_error_if.py
+++ b/verif/generator/tosa_error_if.py
@@ -2547,14 +2547,16 @@
 
         args = kwargs["args"]
 
-        # MaxPool2D has no accum_dtype arg
-        stride_idx, pad_idx = (0, 1) if opName == "max_pool2d" else (1, 2)
+        # Skip accum_dtype arg (apart from MaxPool2D that doesn't have one)
+        stride_idx, pad_idx = (1, 2) if opName != "max_pool2d" else (0, 1)
+
+        # Common info for all ops
         strides = args[stride_idx]
         padding = args[pad_idx]
 
         if opName.endswith("pool2d"):
             # avg_pool2d, max_pool2d
-            kernel_shape = args[2]
+            kernel_shape = args[pad_idx + 1]
             h = (
                 input_shape[1] + padding[0] + padding[1] + strides[0] - kernel_shape[0]
             ) // strides[0]
@@ -2566,53 +2568,36 @@
 
         if opName.startswith("transpose_conv2d"):
             # transpose_conv2d
-            output_shape = args[2]
+            output_shape = args[pad_idx + 1]
             filter_shape = inputShapes[1]
             kernel_shape = filter_shape[1:-1]
 
             def get_out_size(in_size, stride, kernel_size, out_pad, in_pad):
-                """Calculate the transpose_conv2d output size for a dimension.
+                """Calculate the transpose_conv2d output size for a dimension."""
+                return (in_size - 1) * stride + kernel_size + in_pad + out_pad
 
-                Args:
-                    in_size: the input size - int
-                    stride: the stride - int
-                    kernel_size: the kernel size - int
-                    out_pad: the output padding - int
-                    in_pad: the input padding - int
-
-                Returns:
-                    the output size
-                """
-                return (in_size - 1) * stride + kernel_size - in_pad - out_pad
-
-            for pad_h, pad_w in (
-                (kernel_shape[0] - 1, kernel_shape[1] - 1),  # FULL padding
-                (kernel_shape[0] // 2, kernel_shape[1] // 2),  # SAME padding
-                (0, 0),  # VALID padding
-            ):
-                h = get_out_size(
-                    input_shape[1],
-                    strides[0],
-                    kernel_shape[0],
-                    padding[0],
-                    pad_h,
-                )
-                w = get_out_size(
-                    input_shape[2],
-                    strides[1],
-                    kernel_shape[1],
-                    padding[1],
-                    pad_w,
-                )
-                if output_shape[1] == h and output_shape[2] == w:
-                    return False
-
-            # output shape does not match the expected shape for any padding option
+            h = get_out_size(
+                input_shape[1],
+                strides[0],
+                kernel_shape[0],
+                padding[0],
+                padding[1],
+            )
+            w = get_out_size(
+                input_shape[2],
+                strides[1],
+                kernel_shape[1],
+                padding[2],
+                padding[3],
+            )
+            if output_shape[1] == h and output_shape[2] == w:
+                return False
+            # output shape does not match the expected shape
             return True
 
         if "conv2d" in opName or "conv3d" in opName:
             # conv2d, conv3d, depthwise_conv2d
-            dilations = args[2]
+            dilations = args[pad_idx + 1]
             filter_shape = inputShapes[1]
             kernel_shape = (
                 filter_shape[0:2]
@@ -2621,12 +2606,13 @@
             )
 
             for i in range(len(kernel_shape)):
+                pad_offset = i * 2
                 dim = (
                     input_shape[i + 1]
-                    - kernel_shape[i]
-                    - (kernel_shape[i] - 1) * (dilations[i] - 1)
-                    + padding[i * 2 + 0]
-                    + padding[i * 2 + 1]
+                    - 1
+                    + padding[pad_offset]
+                    + padding[pad_offset + 1]
+                    - (kernel_shape[i] - 1) * dilations[i]
                 ) // strides[i] + 1
                 # return True if any dimension is < 1
                 if dim < 1:
diff --git a/verif/generator/tosa_test_gen.py b/verif/generator/tosa_test_gen.py
index 5d3984c..65bdeb7 100644
--- a/verif/generator/tosa_test_gen.py
+++ b/verif/generator/tosa_test_gen.py
@@ -28,6 +28,8 @@
     # This currently matches the 8K level defined in the specification.
     TOSA_TENSOR_MAX_RANK = 6
     TOSA_8K_LEVEL_MAX_SCALE = 64
+    TOSA_8K_LEVEL_MAX_KERNEL = 8192
+    TOSA_8K_LEVEL_MAX_STRIDE = 8192
 
     def __init__(self, args):
         self.args = args
@@ -2348,8 +2350,8 @@
                 invalid_test_validators = op["invalid_test_validators"]
                 clean_testList = []
                 for test in testList:
+                    remove_test = False
                     for validator_fcn in invalid_test_validators:
-                        remove_test = False
                         if validator_fcn(
                             opName=test[0],
                             input_dtype=test[2],
@@ -2371,6 +2373,9 @@
         except KeyError:
             raise Exception("Cannot find op with name {}".format(opName))
 
+        if self.args.verbose:
+            print(f"Creating {testStr}")
+
         # Create a serializer
         self.createSerializer(opName, testStr)
 
@@ -2461,7 +2466,13 @@
             return
 
         # Dynamically create op lists for convolutions with a list of kernel sizes
-        KERNELS_2D = [[1, 1], [2, 2], [3, 3], [5, 5], [3, 1], [1, 3]]
+        if not self.args.level8k:
+            KERNELS_2D = [[1, 1], [2, 2], [3, 3], [5, 5], [3, 1], [1, 3]]
+            KERNELS_3D = [[1, 1, 1], [2, 1, 1], [1, 2, 1], [1, 1, 2]]
+        else:
+            bigK = self.TOSA_8K_LEVEL_MAX_KERNEL
+            KERNELS_2D = [[1, bigK], [bigK, 2]]
+            KERNELS_3D = [[1, bigK, 1], [2, 2, bigK]]
 
         for k in KERNELS_2D:
             testName = "conv2d_{}x{}".format(k[0], k[1])
@@ -2483,7 +2494,6 @@
             self.TOSA_OP_LIST[testName]["filter"] = k
             self.TOSA_OP_LIST[testName]["template"] = False
 
-        KERNELS_3D = [[1, 1, 1], [2, 1, 1], [1, 2, 1], [1, 1, 2]]
         for k in KERNELS_3D:
             testName = "conv3d_{}x{}x{}".format(k[0], k[1], k[2])
             self.TOSA_OP_LIST[testName] = self.TOSA_OP_LIST["conv3d_TEMPLATE"].copy()