MLECO-2354 MLECO-2355 MLECO-2356: Moving noise reduction to public repository

* Use RNNoise model from PMZ
* Add Noise reduction use-case

Signed-off-by: Richard burton <richard.burton@arm.com>
Change-Id: Ia8cc7ef102e22a5ff8bfbd3833594a4905a66057
diff --git a/scripts/py/gen_test_data_cpp.py b/scripts/py/gen_test_data_cpp.py
index a58f415..ba8f725 100644
--- a/scripts/py/gen_test_data_cpp.py
+++ b/scripts/py/gen_test_data_cpp.py
@@ -22,6 +22,7 @@
 import math
 import os
 import numpy as np
+from pathlib import Path
 
 from argparse import ArgumentParser
 from jinja2 import Environment, FileSystemLoader
@@ -43,8 +44,8 @@
                   lstrip_blocks=True)
 
 
-def write_hpp_file(header_filename, cc_file_path, header_template_file, num_iofms,
-                   ifm_array_names, ifm_size, ofm_array_names, ofm_size, iofm_data_type):
+def write_hpp_file(header_filename, cc_file_path, header_template_file, num_ifms, num_ofms,
+                   ifm_array_names, ifm_sizes, ofm_array_names, ofm_sizes, iofm_data_type):
     header_file_path = os.path.join(args.header_folder_path, header_filename)
 
     print(f"++ Generating {header_file_path}")
@@ -53,11 +54,12 @@
                                  gen_time=datetime.datetime.now(),
                                  year=datetime.datetime.now().year)
     env.get_template('TestData.hpp.template').stream(common_template_header=hdr,
-                                                   fm_count=num_iofms,
+                                                   ifm_count=num_ifms,
+                                                   ofm_count=num_ofms,
                                                    ifm_var_names=ifm_array_names,
-                                                   ifm_var_size=ifm_size,
+                                                   ifm_var_sizes=ifm_sizes,
                                                    ofm_var_names=ofm_array_names,
-                                                   ofm_var_size=ofm_size,
+                                                   ofm_var_sizes=ofm_sizes,
                                                    data_type=iofm_data_type,
                                                    namespaces=args.namespaces) \
         .dump(str(header_file_path))
@@ -116,17 +118,20 @@
     common_cc_filename = "TestData" + add_usecase_fname + ".cc"
 
     # In the data_folder_path there should be pairs of ifm-ofm
-    # It's assumed the ifm-ofm nameing convention: ifm0.npy-ofm0.npy, ifm1.npy-ofm1.npy
-    i_ofms_count = int(len([name for name in os.listdir(os.path.join(args.data_folder_path)) if name.lower().endswith('.npy')]) / 2)
+    # It's assumed the ifm-ofm naming convention: ifm0.npy-ofm0.npy, ifm1.npy-ofm1.npy
+    ifms_count = int(len(list(Path(args.data_folder_path).glob('ifm*.npy'))))
+    ofms_count = int(len(list(Path(args.data_folder_path).glob('ofm*.npy'))))
+
+    #i_ofms_count = int(len([name for name in os.listdir(os.path.join(args.data_folder_path)) if name.lower().endswith('.npy')]) / 2)
 
     iofm_data_type = "int8_t"
-    if (i_ofms_count > 0):
+    if ifms_count > 0:
         iofm_data_type = "int8_t" if (np.load(os.path.join(args.data_folder_path, "ifm0.npy")).dtype == np.int8) else "uint8_t"
 
-    ifm_size = -1
-    ofm_size = -1
+    ifm_sizes = []
+    ofm_sizes = []
 
-    for idx in range(i_ofms_count):
+    for idx in range(ifms_count):
         # Save the fm cc file
         base_name = "ifm" + str(idx)
         filename = base_name+".npy"
@@ -134,11 +139,9 @@
         cc_filename = os.path.join(args.source_folder_path, array_name + ".cc")
         ifm_array_names.append(array_name)
         write_individual_cc_file(filename, cc_filename, header_filename, args.license_template, array_name, iofm_data_type)
-        if ifm_size == -1:
-            ifm_size = get_npy_vec_size(filename)
-        elif ifm_size != get_npy_vec_size(filename):
-            raise Exception(f"ifm size changed for index {idx}")
+        ifm_sizes.append(get_npy_vec_size(filename))
 
+    for idx in range(ofms_count):
         # Save the fm cc file
         base_name = "ofm" + str(idx)
         filename = base_name+".npy"
@@ -146,14 +149,11 @@
         cc_filename = os.path.join(args.source_folder_path, array_name + ".cc")
         ofm_array_names.append(array_name)
         write_individual_cc_file(filename, cc_filename, header_filename, args.license_template, array_name, iofm_data_type)
-        if ofm_size == -1:
-            ofm_size = get_npy_vec_size(filename)
-        elif ofm_size != get_npy_vec_size(filename):
-            raise Exception(f"ofm size changed for index {idx}")
+        ofm_sizes.append(get_npy_vec_size(filename))
 
     common_cc_filepath = os.path.join(args.source_folder_path, common_cc_filename)
     write_hpp_file(header_filename, common_cc_filepath, args.license_template,
-                   i_ofms_count, ifm_array_names, ifm_size, ofm_array_names, ofm_size, iofm_data_type)
+                   ifms_count, ofms_count, ifm_array_names, ifm_sizes, ofm_array_names, ofm_sizes, iofm_data_type)
 
 
 if __name__ == '__main__':
diff --git a/scripts/py/rnnoise_dump_extractor.py b/scripts/py/rnnoise_dump_extractor.py
new file mode 100644
index 0000000..947a75a
--- /dev/null
+++ b/scripts/py/rnnoise_dump_extractor.py
@@ -0,0 +1,65 @@
+#  Copyright (c) 2021 Arm Limited. All rights reserved.
+#  SPDX-License-Identifier: Apache-2.0
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+
+"""
+This script can be used with the noise reduction use case to save
+the dumped noise reduced audio to a wav file.
+
+Example use:
+python rnnoise_dump_extractor.py --dump_file output.bin --output_dir ./denoised_wavs/
+"""
+import soundfile as sf
+import numpy as np
+
+import argparse
+from os import path
+
+import struct
+
+def extract(fp, output_dir, export_npy):
+    while True:
+        filename_length = struct.unpack("i", fp.read(4))[0]
+
+        if filename_length == -1:
+            return
+
+        filename = struct.unpack("{}s".format(filename_length), fp.read(filename_length))[0].decode('ascii')
+        audio_clip_length = struct.unpack("I", fp.read(4))[0]
+        output_file_name = path.join(output_dir, "denoised_{}".format(filename))
+        audio_clip = fp.read(audio_clip_length)
+        
+        with sf.SoundFile(output_file_name, 'w', channels=1, samplerate=48000, subtype="PCM_16", endian="LITTLE") as wav_file:
+            wav_file.buffer_write(audio_clip, dtype='int16')
+            print("{} written to disk".format(output_file_name))
+
+        if export_npy:
+            output_file_name += ".npy"
+            pack_format = "{}h".format(int(audio_clip_length/2))
+            npdata = np.array(struct.unpack(pack_format,audio_clip)).astype(np.int16)
+            np.save(output_file_name, npdata)
+            print("{} written to disk".format(output_file_name))
+
+def main(args):
+    extract(args.dump_file, args.output_dir, args.export_npy)
+
+parser = argparse.ArgumentParser()
+parser.add_argument("--dump_file", type=argparse.FileType('rb'), help="Dump file with audio files to extract.", required=True)
+parser.add_argument("--output_dir", help="Output directory, Warning: Duplicated file names will be overwritten.", required=True)
+parser.add_argument("--export_npy", help="Export the audio buffer in NumPy format", action="store_true")
+args = parser.parse_args()
+
+if __name__=="__main__":
+    main(args)
+
diff --git a/scripts/py/templates/TestData.cc.template b/scripts/py/templates/TestData.cc.template
index 1acd14d..d0f2698 100644
--- a/scripts/py/templates/TestData.cc.template
+++ b/scripts/py/templates/TestData.cc.template
@@ -32,7 +32,7 @@
 
 const {{data_type}}* get_ifm_data_array(const uint32_t idx)
 {
-    if (idx < NUMBER_OF_FM_FILES) {
+    if (idx < NUMBER_OF_IFM_FILES) {
         return ifm_arrays[idx];
     }
     return nullptr;
@@ -40,7 +40,7 @@
 
 const {{data_type}}* get_ofm_data_array(const uint32_t idx)
 {
-    if (idx < NUMBER_OF_FM_FILES) {
+    if (idx < NUMBER_OF_OFM_FILES) {
         return ofm_arrays[idx];
     }
     return nullptr;
diff --git a/scripts/py/templates/TestData.hpp.template b/scripts/py/templates/TestData.hpp.template
index cdedd48..413c062 100644
--- a/scripts/py/templates/TestData.hpp.template
+++ b/scripts/py/templates/TestData.hpp.template
@@ -25,16 +25,21 @@
 namespace {{namespace}} {
 {% endfor %}
 
-#define NUMBER_OF_FM_FILES  ({{fm_count}}U)
-#define IFM_DATA_SIZE  ({{ifm_var_size}}U)
-#define OFM_DATA_SIZE  ({{ofm_var_size}}U)
+#define NUMBER_OF_IFM_FILES  ({{ifm_count}}U)
+#define NUMBER_OF_OFM_FILES  ({{ofm_count}}U)
+{% for ifm_size in ifm_var_sizes %}
+#define IFM_{{loop.index0}}_DATA_SIZE  ({{ifm_size}}U)
+{% endfor %}
+{% for ofm_size in ofm_var_sizes %}
+#define OFM_{{loop.index0}}_DATA_SIZE  ({{ofm_size}}U)
+{% endfor %}
 
 {% for ifm_var_name in ifm_var_names %}
-extern const {{data_type}} {{ifm_var_name}}[IFM_DATA_SIZE];
+extern const {{data_type}} {{ifm_var_name}}[IFM_{{loop.index0}}_DATA_SIZE];
 {% endfor %}
 
 {% for ofm_var_name in ofm_var_names %}
-extern const {{data_type}} {{ofm_var_name}}[OFM_DATA_SIZE];
+extern const {{data_type}} {{ofm_var_name}}[OFM_{{loop.index0}}_DATA_SIZE];
 {% endfor %}
 
 const {{data_type}}* get_ifm_data_array(const uint32_t idx);