TOSA raw data output

 - Add TOSA output generation in npz format

Change-Id: I97822e3a93a8fef1a95a990f23ef2c4ca5a8f73a
Signed-off-by: Diqing Zhong <diqing.zhong@arm.com>
diff --git a/ethosu/vela/rawdata_writer.py b/ethosu/vela/rawdata_writer.py
new file mode 100644
index 0000000..76765e6
--- /dev/null
+++ b/ethosu/vela/rawdata_writer.py
@@ -0,0 +1,74 @@
+# Copyright (C) 2021 Arm Limited or its affiliates. All rights reserved.
+#
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the License); you may
+# not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an AS IS BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# Description:
+# Functions used to write to a raw format (.npz) file.
+import numpy as np
+
+from .high_level_command_to_npu_op import get_region
+from .nn_graph import PassPlacement
+from .operation import Op
+
+
+def write_rawdata_output(nng, arch, filename):
+    subgraphs_to_write = [sg for sg in nng.subgraphs if sg.placement == PassPlacement.Cpu]
+
+    for sg_idx, sg in enumerate(subgraphs_to_write):
+        custom_op = None
+        for ps in sg.passes:
+            for op in ps.ops:
+                if op.type == Op.CustomNpuOp:
+                    custom_op = op
+                    break
+            if custom_op:
+                break
+
+        if custom_op:
+            ifm_shapes = []
+            ifm_regions = []
+            ifm_offsets = []
+            ofm_shapes = []
+            ofm_regions = []
+            ofm_offsets = []
+            cmd_stream_tensor, weight_tensor, scratch_tensor, scratch_fast_tensor = custom_op.inputs[:4]
+            weight_region = get_region(weight_tensor.mem_type, arch)
+            scratch_region = get_region(scratch_tensor.mem_type, arch)
+            scratch_fast_region = get_region(scratch_fast_tensor.mem_type, arch)
+            for ifm in custom_op.inputs[4:]:
+                ifm_shapes.append(ifm.shape)
+                ifm_regions.append(get_region(ifm.mem_type, arch))
+                ifm_offsets.append(ifm.address)
+            for ofm in custom_op.outputs:
+                ofm_shapes.append(ofm.shape)
+                ofm_regions.append(get_region(ofm.mem_type, arch))
+                ofm_offsets.append(ofm.address)
+
+            filename_sg = f"{filename}_sg{sg_idx}_vela.npz"
+            np.savez(
+                filename_sg,
+                cmd_data=cmd_stream_tensor.values,
+                weight_data=weight_tensor.values,
+                weight_region=weight_region,
+                scratch_shape=scratch_tensor.shape,
+                scratch_region=scratch_region,
+                scratch_fast_shape=scratch_fast_tensor.shape,
+                scratch_fast_region=scratch_fast_region,
+                input_shape=ifm_shapes,
+                input_region=ifm_regions,
+                input_offset=ifm_offsets,
+                output_shape=ofm_shapes,
+                output_region=ofm_regions,
+                output_offset=ofm_offsets,
+            )
diff --git a/ethosu/vela/vela.py b/ethosu/vela/vela.py
index 9e237f8..7400b8e 100644
--- a/ethosu/vela/vela.py
+++ b/ethosu/vela/vela.py
@@ -27,6 +27,7 @@
 from . import architecture_features
 from . import compiler_driver
 from . import model_reader
+from . import rawdata_writer
 from . import scheduler
 from . import stats_writer
 from . import tflite_writer
@@ -83,18 +84,20 @@
         arch=arch,
     )
 
-    output_filename = output_basename + "_vela.tflite"
+    output_tfl_filename = output_basename + "_vela.tflite"
     if input_name.endswith(".tflite"):
-        tflite_writer.write_tflite(nng, output_filename)
+        tflite_writer.write_tflite(nng, output_tfl_filename)
+    elif input_name.endswith(".tosa"):
+        rawdata_writer.write_rawdata_output(nng, arch, output_basename)
 
     if enable_debug_db:
-        file_offsets = calculate_operator_file_offsets(output_filename)
+        file_offsets = calculate_operator_file_offsets(output_tfl_filename)
         for idx, offset in enumerate(sorted(file_offsets)):
             sg = find_subgraph_with_command_stream_order(nng, idx)
             if sg is not None:
                 DebugDatabase.set_stream_offset(sg, offset)
         debug_filename = output_basename + "_debug.xml"
-        DebugDatabase.write(debug_filename, input_name, output_filename)
+        DebugDatabase.write(debug_filename, input_name, output_tfl_filename)
 
     if compiler_options.timing:
         stop = time.time()