Generate  an operator configuration file from a list of tflite models

Signed-off-by: Georgios Pinitas <georgios.pinitas@arm.com>
Change-Id: I1b13da6558bd11d49747162d66c81255ccec1498
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/6166
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: SiCong Li <sicong.li@arm.com>
Reviewed-by: Sheri Zhang <sheri.zhang@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
diff --git a/python/scripts/report-model-ops/README.md b/python/scripts/report-model-ops/README.md
new file mode 100644
index 0000000..aa8efa8
--- /dev/null
+++ b/python/scripts/report-model-ops/README.md
@@ -0,0 +1,37 @@
+# Extract list of operators from a list of TfLite models
+
+## Introduction
+
+Purpose of this script is to inspect a list of user-provided TfLite models and report
+the list of operators that are used as well as the data-types that the models operate on.
+The script can subsequently generate a configuration file that can be provided to the
+Compute Library build system and generate a library that contains only the operators required
+by the given model(s) to run.
+
+Utilizing this script, use-case tailored Compute Library dynamic libraries can be created,
+helping reduce the overall binary size requirements.
+
+## Usage example
+
+Assuming that the virtual environment is activated and the requirements are present,
+we can run the following command:
+
+```bash
+./report_model_ops.py -m modelA.tfile modelB.tflite -c build_config.json
+```
+
+## Input arguments
+
+***models (required)*** :
+A list of comma separated model files.
+
+Supported model formats are:
+
+* TfLite
+
+***config (optional)*** :
+The configuration file to be created on JSON format that can be provided to ComputeLibrary's
+build system and generate a library with the given list of operators and data-types
+
+***debug (optional)*** :
+Flag that enables debug information
diff --git a/python/scripts/report-model-ops/report_model_ops.py b/python/scripts/report-model-ops/report_model_ops.py
new file mode 100644
index 0000000..3888b80
--- /dev/null
+++ b/python/scripts/report-model-ops/report_model_ops.py
@@ -0,0 +1,188 @@
+#!/usr/bin/env python3
+# Copyright (c) 2021 Arm Limited.
+#
+# SPDX-License-Identifier: MIT
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to
+# deal in the Software without restriction, including without limitation the
+# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+# sell copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+import json
+import logging
+import os
+import sys
+from argparse import ArgumentParser
+
+import tflite
+
+sys.path.append(os.path.dirname(os.path.abspath(__file__)) + "/../")
+
+from utils.model_identification import identify_model_type
+from utils.tflite_helpers import tflite_op2acl, tflite_typecode2name
+
+SUPPORTED_MODEL_TYPES = ["tflite"]
+logger = logging.getLogger("report_model_ops")
+
+
+def get_ops_from_tflite_graph(model):
+    """
+    Helper function that extract operator related meta-data from a TfLite model
+
+    Parameters
+        ----------
+    model: str
+        Respective TfLite model to analyse
+
+    Returns
+    ----------
+    supported_ops, unsupported_ops, data_types: tuple
+        A tuple with the sets of unique operator types and data-types that are present in the model
+    """
+
+    logger.debug(f"Analysing TfLite mode '{model}'!")
+
+    with open(model, "rb") as f:
+        buf = f.read()
+        model = tflite.Model.GetRootAsModel(buf, 0)
+
+    # Extract unique operators
+    nr_unique_ops = model.OperatorCodesLength()
+    unique_ops = {tflite.opcode2name(model.OperatorCodes(op_id).BuiltinCode()) for op_id in range(0, nr_unique_ops)}
+
+    # Extract IO data-types
+    data_types = set()
+    for subgraph_id in range(0, model.SubgraphsLength()):
+        subgraph = model.Subgraphs(subgraph_id)
+        for tensor_id in range(0, subgraph.TensorsLength()):
+            data_types.add(tflite_typecode2name(subgraph.Tensors(tensor_id).Type()))
+
+    # Perform mapping between TfLite ops to ComputeLibrary ones
+    supported_ops = set()
+    unsupported_ops = set()
+    for top in unique_ops:
+        try:
+            supported_ops.add(tflite_op2acl(top))
+        except:
+            unsupported_ops.add(top)
+            logger.warning(f"Operator {top} has not ComputeLibrary mapping")
+
+    return (supported_ops, unsupported_ops, data_types)
+
+
+def extract_model_meta(model, model_type):
+    """
+    Function that calls the appropriate model parser to extract model related meta-data
+    Supported parsers: TfLite
+
+    Parameters
+        ----------
+    model: str
+        Path to model that we want to analyze
+    model_type:
+        type of the model
+
+    Returns
+    ----------
+    ops, data_types: (tuple)
+        A tuple with the list of unique operator types and data-types that are present in the model
+    """
+
+    if model_type == "tflite":
+        return get_ops_from_tflite_graph(model)
+    else:
+        logger.warning(f"Model type '{model_type}' is unsupported!")
+        return ()
+
+
+def generate_build_config(ops, data_types):
+    """
+    Function that generates a compatible ComputeLibrary operator-based build configuration
+
+    Parameters
+        ----------
+    ops: set
+        Set with the operators to add in the build configuration
+    data_types:
+        Set with the data types to add in the build configuration
+
+    Returns
+    ----------
+    config_data: dict
+        Dictionary compatible with ComputeLibrary
+    """
+    config_data = {}
+    config_data["operators"] = list(ops)
+    config_data["data_types"] = list(data_types)
+
+    return config_data
+
+
+if __name__ == "__main__":
+    parser = ArgumentParser(
+        description="""Report map of operations in a list of models.
+            The script consumes deep learning models and reports the type of operations and data-types used
+            Supported model types: TfLite """
+    )
+
+    parser.add_argument(
+        "-m",
+        "--models",
+        nargs="+",
+        required=True,
+        type=str,
+        help=f"List of models; supported model types: {SUPPORTED_MODEL_TYPES}",
+    )
+    parser.add_argument("-D", "--debug", action="store_true", help="Enable script debugging output")
+    parser.add_argument(
+        "-c",
+        "--config",
+        type=str,
+        help="JSON configuration file used that can be used for custom ComputeLibrary builds",
+    )
+    args = parser.parse_args()
+
+    # Setup Logger
+    logging_level = logging.INFO
+    if args.debug:
+        logging_level = logging.DEBUG
+    logging.basicConfig(level=logging_level)
+
+    # Extract operator mapping
+    final_supported_ops = set()
+    final_unsupported_ops = set()
+    final_dts = set()
+    for model in args.models:
+        logger.debug(f"Starting analyzing {model} model")
+
+        model_type = identify_model_type(model)
+        supported_model_ops, unsupported_mode_ops, model_dts = extract_model_meta(model, model_type)
+        final_supported_ops.update(supported_model_ops)
+        final_unsupported_ops.update(unsupported_mode_ops)
+        final_dts.update(model_dts)
+
+    logger.info("=== Supported Operators")
+    logger.info(final_supported_ops)
+    logger.info("=== Unsupported Operators")
+    logger.info(final_unsupported_ops)
+    logger.info("=== Data Types")
+    logger.info(final_dts)
+
+    # Generate json file
+    if args.config:
+        logger.debug("Generating JSON build configuration file")
+        config_data = generate_build_config(final_supported_ops, final_dts)
+        with open(args.config, "w") as f:
+            json.dump(config_data, f)
diff --git a/python/scripts/utils/model_identification.py b/python/scripts/utils/model_identification.py
new file mode 100644
index 0000000..43e7d20
--- /dev/null
+++ b/python/scripts/utils/model_identification.py
@@ -0,0 +1,76 @@
+# Copyright (c) 2021 Arm Limited.
+#
+# SPDX-License-Identifier: MIT
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to
+# deal in the Software without restriction, including without limitation the
+# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+# sell copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+import logging
+import os
+
+
+def is_tflite_model(model_path):
+    """Check if a model is of TfLite type
+
+    Parameters:
+    ----------
+    model_path: str
+        Path to model
+
+    Returns
+    ----------
+    bool:
+        True if given path is a valid TfLite model
+    """
+
+    try:
+        with open(model_path, "rb") as f:
+            hdr_bytes = f.read(8)
+            hdr_str = hdr_bytes[4:].decode("utf-8")
+            if hdr_str == "TFL3":
+                return True
+            else:
+                return False
+    except:
+        return False
+
+
+def identify_model_type(model_path):
+    """Identify the type of a given deep learning model
+
+    Parameters:
+    ----------
+    model_path: str
+        Path to model
+
+    Returns
+    ----------
+    model_type: str
+        String representation of model type or 'None' if type could not be retrieved.
+    """
+
+    if not os.path.exists(model_path):
+        logging.warn(f"Provided model {model_path} does not exist!")
+        return None
+
+    if is_tflite_model(model_path):
+        model_type = "tflite"
+    else:
+        logging.warn(logging.warn(f"Provided model {model_path} is not of supported type!"))
+        model_type = None
+
+    return model_type
diff --git a/python/scripts/utils/tflite_helpers.py b/python/scripts/utils/tflite_helpers.py
new file mode 100644
index 0000000..8f8d422
--- /dev/null
+++ b/python/scripts/utils/tflite_helpers.py
@@ -0,0 +1,230 @@
+# Copyright (c) 2021 Arm Limited.
+#
+# SPDX-License-Identifier: MIT
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to
+# deal in the Software without restriction, including without limitation the
+# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+# sell copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+_TFLITE_TYPECODE2NAME = {
+    0: "Float32",
+    1: "Float16",
+    2: "Int32",
+    3: "Uint8",
+    4: "Int64",
+    5: "String",
+    6: "Bool",
+    7: "Int16",
+    8: "Complex64",
+    9: "Int8",
+}
+
+_TFLITE_TO_ACL = {
+    "ADD": "Add",  # 0
+    "AVERAGE_POOL_2D": "Pool2d",  # 1
+    "CONCATENATION": "Concatenate",  # 2
+    "CONV_2D": "Conv2d",  # 3
+    "DEPTHWISE_CONV_2D": "DepthwiseConv2d",  # 4
+    "DEPTH_TO_SPACE": "DepthToSpace",  # 5
+    "DEQUANTIZE": "Dequantize",  # 6
+    # "EMBEDDING_LOOKUP" : "Unsupported",                  #7
+    "FLOOR": "Floor",  # 8
+    "FULLY_CONNECTED": "FullyConnected",  # 9
+    # "HASHTABLE_LOOKUP" : "Unsupported",                  #10
+    "L2_NORMALIZATION": "L2Normalize",  # 11
+    "L2_POOL_2D": "Pool2d",  # 12
+    "LOCAL_RESPONSE_NORMALIZATION": "Normalize",  # 13
+    "LOGISTIC": "Activation",  # 14
+    # "LSH_PROJECTION" : "Unsupported",                    #15
+    "LSTM": "LSTM",  # 16
+    "MAX_POOL_2D": "Pool2d",  # 17
+    "MUL": "Mul",  # 18
+    "RELU": "Activation",  # 19
+    "RELU_N1_TO_1": "Activation",  # 20
+    "RELU6": "Activation",  # 21
+    "RESHAPE": "Reshape",  # 22
+    "RESIZE_BILINEAR": "Scale",  # 23
+    "RNN": "RNN",  # 24
+    "SOFTMAX": "Softmax",  # 25
+    "SPACE_TO_DEPTH": "SpaceToDepth",  # 26
+    # "SVDF" : "Unsupported",                              #27
+    "TANH": "Activation",  # 28
+    # "CONCAT_EMBEDDINGS" : "Unsupported",                 #29
+    # "SKIP_GRAM" : "Unsupported",                         #30
+    # "CALL" : "Unsupported",                              #31
+    # "CUSTOM" : "Unsupported",                            #32
+    # "EMBEDDING_LOOKUP_SPARSE" : "Unsupported",           #33
+    "PAD": "Pad",  # 34
+    # "UNIDIRECTIONAL_SEQUENCE_RNN" : "Unsupported",       #35
+    "GATHER": "Gather",  # 36
+    "BATCH_TO_SPACE_ND": "BatchToSpace",  # 37
+    "SPACE_TO_BATCH_ND": "SpaceToBatch",  # 38
+    "TRANSPOSE": "Permute",  # 39
+    "MEAN": "Reduction",  # 40
+    "SUB": "Sub",  # 41
+    "DIV": "Div",  # 42
+    "SQUEEZE": "Reshape",  # 43
+    # "UNIDIRECTIONAL_SEQUENCE_LSTM" : "Unsupported",      #44
+    "STRIDED_SLICE": "StridedSlice",  # 45
+    # "BIDIRECTIONAL_SEQUENCE_RNN" : "Unsupported",        #46
+    "EXP": "ElementwiseUnary",  # 47
+    # "TOPK_V2" : "Unsupported",                           #48
+    "SPLIT": "Split",  # 49
+    "LOG_SOFTMAX": "Softmax",  # 50
+    # "DELEGATE" : "Unuspported",                          #51
+    # "BIDIRECTIONAL_SEQUENCE_LSTM" : "Unsupported",       #52
+    "CAST": "Cast",  # 53
+    "PRELU": "PRelu",  # 54
+    "MAXIMUM": "ElementwiseBinary",  # 55
+    "ARG_MAX": "Reduction",  # 56
+    "MINIMUM": "ElementwiseBinary",  # 57
+    "LESS": "ElementwiseBinary",  # 58
+    "NEG": "ElementwiseUnary",  # 59
+    "PADV2": "Pad",  # 60
+    "GREATER": "ElementwiseBinary",  # 61
+    "GREATER_EQUAL": "ElementwiseBinary",  # 62
+    "LESS_EQUAL": "ElementwiseBinary",  # 63
+    "SELECT": "Select",  # 64
+    "SLICE": "Slice",  # 65
+    "SIN": "ElementwiseUnary",  # 66
+    "TRANSPOSE_CONV": "TransposeConv2d",  # 67
+    # "SPARSE_TO_DENSE" : "Unsupported",                   #68
+    "TILE": "Tile",  # 69
+    "EXPAND_DIMS": "Reshape",  # 70
+    "EQUAL": "ElementwiseBinary",  # 71
+    "NOT_EQUAL": "ElementwiseBinary",  # 72
+    "LOG": "ElementwiseUnary",  # 73
+    "SUM": "Reduction",  # 74
+    "SQRT": "Activation",  # 75
+    "RSQRT": "ElementwiseUnary",  # 76
+    "SHAPE": "",  # 77
+    "POW": "ElementwiseBinary",  # 78
+    "ARG_MIN": "Reduction",  # 79
+    # "FAKE_QUANT" : "Unsupported",                        #80
+    "REDUCE_PROD": "Reduction",  # 81
+    "REDUCE_MAX": "Reduction",  # 82
+    "PACK": "Stack",  # 83
+    "LOGICAL_OR": "ElementwiseBinary",  # 84
+    "ONE_HOT": "Unsupported",  # 85
+    "LOGICAL_AND": "ElementwiseBinary",  # 86
+    "LOGICAL_NOT": "ElementwiseUnary",  # 87
+    "UNPACK": "Unstack",  # 88
+    "REDUCE_MIN": "Reduction",  # 89
+    # "FLOOR_DIV" :  "Unsupported",                        #90
+    # "REDUCE_ANY" :  "Unsupported",                       #91
+    "SQUARE": "Activation",  # 92
+    "ZEROS_LIKE": "",  # 93
+    "FILL": "Fill",  # 94
+    # "FLOOR_MOD" :  "Unsupported",                        #95
+    "RANGE": "",  # 96
+    "RESIZE_NEAREST_NEIGHBOR": "Scale",  # 97
+    "LEAKY_RELU": "Activation",  # 98
+    "SQUARED_DIFFERENCE": "ElementwiseBinary",  # 99
+    "MIRROR_PAD": "Pad",  # 100
+    "ABS": "ElementwiseUnary",  # 101
+    "SPLIT_V": "Split",  # 102
+    # "UNIQUE" :  "Unsupported",                           #103
+    # "CEIL" :  "Unsupported",                             #104
+    "REVERSE_V2": "Reverse",  # 105
+    "ADD_N": "Add",  # 106
+    "GATHER_ND": "Gather",  # 107
+    # "COS" :  "Unsupported",                              #108
+    # "WHERE" :  "Unsupported",                            #109
+    "RANK": "",  # 110
+    "ELU": "Activation",  # 111
+    # "REVERSE_SEQUENCE" : "Unsupported",                  #112
+    # "MATRIX_DIAG" : "Unsupported",                       #113
+    "QUANTIZE": "Quantize",  # 114
+    # "MATRIX_SET_DIAG" :  "Unsupported",                  #115
+    "ROUND": "ElementwiseUnary",  # 116
+    "HARD_SWISH": "Activation",  # 117
+    # "IF" :  "Unsupported",                               #118
+    # "WHILE" :  "Unsupported",                            #119
+    # "NON_MAX_SUPPRESSION_V4" :  "Unsupported",           #120
+    # "NON_MAX_SUPPRESSION_V5" :  "Unsupported",           #121
+    # "SCATTER_ND" :  "Unsupported",                       #122
+    "SELECT_V2": "Select",  # 123
+    "DENSIFY": "Cast",  # 124
+    # "SEGMENT_SUM" : "Unsupported",                       #125
+    "BATCH_MATMUL": "GEMM",  # 126
+    # "PLACEHOLDER_FOR_GREATER_OP_CODES" :  "Unsupported", #127
+    # "CUMSUM" :  "Unsupported",                           #128
+    # "CALL_ONCE" : "Unsupported",                         #129
+    # "BROADCAST_TO" : "Unsupported",                      #130
+    # "RFFT2D" :  "Unsupported",                           #131
+    # "CONV_3D" :  "Unsupported",                          #132
+    # "IMAG" : "Unsupported",                              #133
+    # "REAL" : "Unsupported",                              #134
+    # "COMPLEX_ABS" : "Unsupported",                       #135
+    # "HASHTABLE" :  "Unsupported",                        #136
+    # "HASHTABLE_FIND" :  "Unsupported",                   #137
+    # "HASHTABLE_IMPORT" :  "Unsupported",                 #138
+    # "HASHTABLE_SIZE" :  "Unsupported",                   #139
+    # "REDUCE_ALL" :  "Unsupported",                       #140
+    # "CONV_3D_TRANSPOSE" : "Unsupported",                 #141
+    # "VAR_HANDLE" :  "Unsupported",                       #142
+    # "READ_VARIABLE" :  "Unsupported",                    #143
+    # "ASSIGN_VARIABLE" :  "Unsupported",                  #144
+}
+
+
+def tflite_typecode2name(toc):
+    """Stringify TfLite data-type opcodes
+
+    Parameters:
+    ----------
+    toc: int
+        TfLite type opcode
+
+    Returns
+    ----------
+    str
+        Stringified opcode
+
+    Raises
+    ------
+    ValueError
+        If opcode does not exist in the map
+    """
+    if toc in _TFLITE_TYPECODE2NAME:
+        return _TFLITE_TYPECODE2NAME[toc]
+    else:
+        raise ValueError("Unknown typecode %d" % toc)
+
+
+def tflite_op2acl(top):
+    """Map TfLite operators to ComputeLibrary ones
+
+    Parameters:
+    ----------
+    top: str
+        TfLite operator name
+
+    Returns
+    ----------
+    str
+        Relevant ComputeLibrary operator name
+
+    Raises
+    ------
+    ValueError
+        If operator cannot be mapped
+    """
+    if top in _TFLITE_TO_ACL:
+        return _TFLITE_TO_ACL[top]
+    else:
+        raise ValueError("Operator {} does not exist in ComputeLibrary" % top)