MLECO-1253: update ASR example python app * add link to new wav2letter model from ModelZoo in Readme * update model input size * update to match new model's output Signed-off-by: Nina Drozd <nina.drozd@arm.com> Change-Id: I8e85d025610a458e9ae7be93fd7179c71bac5b18

commit: 4018b21cd41437f1e1b2e528d5521136f39ff2b1 [log] [tgz]
author: Nina Drozd <nina.drozd@arm.com> Tue Feb 02 17:49:17 2021 +0000
committer: Richard <richard.burton@arm.com> Tue Feb 23 21:20:02 2021 +0000
tree: 5c1b790d1ebae59240281b49ef833d24f5aa7ef1
parent: 0909c5602e2cbe7ac73a7db6787f7bdb1facd2ab [diff] [blame]
diff --git a/python/pyarmnn/examples/speech_recognition/audio_utils.py b/python/pyarmnn/examples/speech_recognition/audio_utils.py
index a522a0e..f03d2e1 100644
--- a/python/pyarmnn/examples/speech_recognition/audio_utils.py
+++ b/python/pyarmnn/examples/speech_recognition/audio_utils.py

@@ -17,7 +17,7 @@
     Returns:
         Decoded string.
     """
-    top1_results = [labels[np.argmax(row[0])] for row in model_output]
+    top1_results = [labels[np.argmax(row)] for row in model_output]
     return filter_characters(top1_results)
 
 
@@ -82,7 +82,7 @@
     Slices the text appropriately depending on the window, and decodes for wav2letter output.
         * First run, take the left context, and inner context.
         * Every other run, take the inner context.
-    Stores the current right context, and updates it for each inference. Will get used after last inference
+    Stores the current right context, and updates it for each inference. Will get used after last inference.
 
     Args:
         is_first_window: Boolean to show if it is the first window we are running inference on
@@ -93,16 +93,21 @@
         current_r_context: the current right context
         text: the current text string, with the latest output decoded and appended
     """
+    # For wav2letter with 148 output steps:
+    # Left context is index 0-48, inner context 49-99, right context 100-147
+    inner_context_start = 49
+    inner_context_end = 99
+    right_context_start = 100
 
     if is_first_window:
         # Since it's the first inference, keep the left context, and inner context, and decode
-        text = decode(output_result[0][0:472], labels)
+        text = decode(output_result[0][0][0][0:inner_context_end], labels)
     else:
         # Only decode the inner context
-        text = decode(output_result[0][49:472], labels)
+        text = decode(output_result[0][0][0][inner_context_start:inner_context_end], labels)
 
     # Store the right context, we will need it after the last inference
-    current_r_context = decode(output_result[0][473:521], labels)
+    current_r_context = decode(output_result[0][0][0][right_context_start:], labels)
     return current_r_context, text
commit	4018b21cd41437f1e1b2e528d5521136f39ff2b1	[log] [tgz]
author	Nina Drozd <nina.drozd@arm.com>	Tue Feb 02 17:49:17 2021 +0000
committer	Richard <richard.burton@arm.com>	Tue Feb 23 21:20:02 2021 +0000
tree	5c1b790d1ebae59240281b49ef833d24f5aa7ef1
parent	0909c5602e2cbe7ac73a7db6787f7bdb1facd2ab [diff] [blame]