MLECO-1253 Adding ASR sample application using the PyArmNN api

Change-Id: I450b23800ca316a5bfd4608c8559cf4f11271c21
Signed-off-by: Éanna Ó Catháin <eanna.ocathain@arm.com>
diff --git a/python/pyarmnn/examples/common/cv_utils.py b/python/pyarmnn/examples/common/cv_utils.py
new file mode 100644
index 0000000..61aa46c
--- /dev/null
+++ b/python/pyarmnn/examples/common/cv_utils.py
@@ -0,0 +1,184 @@
+# Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
+# SPDX-License-Identifier: MIT
+
+"""
+This file contains helper functions for reading video/image data and
+ pre/postprocessing of video/image data using OpenCV.
+"""
+
+import os
+
+import cv2
+import numpy as np
+
+import pyarmnn as ann
+
+
+def preprocess(frame: np.ndarray, input_binding_info: tuple):
+    """
+    Takes a frame, resizes, swaps channels and converts data type to match
+    model input layer. The converted frame is wrapped in a const tensor
+    and bound to the input tensor.
+
+    Args:
+        frame: Captured frame from video.
+        input_binding_info:  Contains shape and data type of model input layer.
+
+    Returns:
+        Input tensor.
+    """
+    # Swap channels and resize frame to model resolution
+    frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
+    resized_frame = resize_with_aspect_ratio(frame, input_binding_info)
+
+    # Expand dimensions and convert data type to match model input
+    data_type = np.float32 if input_binding_info[1].GetDataType() == ann.DataType_Float32 else np.uint8
+    resized_frame = np.expand_dims(np.asarray(resized_frame, dtype=data_type), axis=0)
+    assert resized_frame.shape == tuple(input_binding_info[1].GetShape())
+
+    input_tensors = ann.make_input_tensors([input_binding_info], [resized_frame])
+    return input_tensors
+
+
+def resize_with_aspect_ratio(frame: np.ndarray, input_binding_info: tuple):
+    """
+    Resizes frame while maintaining aspect ratio, padding any empty space.
+
+    Args:
+        frame: Captured frame.
+        input_binding_info: Contains shape of model input layer.
+
+    Returns:
+        Frame resized to the size of model input layer.
+    """
+    aspect_ratio = frame.shape[1] / frame.shape[0]
+    model_height, model_width = list(input_binding_info[1].GetShape())[1:3]
+
+    if aspect_ratio >= 1.0:
+        new_height, new_width = int(model_width / aspect_ratio), model_width
+        b_padding, r_padding = model_height - new_height, 0
+    else:
+        new_height, new_width = model_height, int(model_height * aspect_ratio)
+        b_padding, r_padding = 0, model_width - new_width
+
+    # Resize and pad any empty space
+    frame = cv2.resize(frame, (new_width, new_height), interpolation=cv2.INTER_LINEAR)
+    frame = cv2.copyMakeBorder(frame, top=0, bottom=b_padding, left=0, right=r_padding,
+                               borderType=cv2.BORDER_CONSTANT, value=[0, 0, 0])
+    return frame
+
+
+def create_video_writer(video: cv2.VideoCapture, video_path: str, output_path: str):
+    """
+    Creates a video writer object to write processed frames to file.
+
+    Args:
+        video: Video capture object, contains information about data source.
+        video_path: User-specified video file path.
+        output_path: Optional path to save the processed video.
+
+    Returns:
+        Video writer object.
+    """
+    _, ext = os.path.splitext(video_path)
+
+    if output_path is not None:
+        assert os.path.isdir(output_path)
+
+    i, filename = 0, os.path.join(output_path if output_path is not None else str(), f'object_detection_demo{ext}')
+    while os.path.exists(filename):
+        i += 1
+        filename = os.path.join(output_path if output_path is not None else str(), f'object_detection_demo({i}){ext}')
+
+    video_writer = cv2.VideoWriter(filename=filename,
+                                   fourcc=get_source_encoding_int(video),
+                                   fps=int(video.get(cv2.CAP_PROP_FPS)),
+                                   frameSize=(int(video.get(cv2.CAP_PROP_FRAME_WIDTH)),
+                                              int(video.get(cv2.CAP_PROP_FRAME_HEIGHT))))
+    return video_writer
+
+
+def init_video_file_capture(video_path: str, output_path: str):
+    """
+    Creates a video capture object from a video file.
+
+    Args:
+        video_path: User-specified video file path.
+        output_path: Optional path to save the processed video.
+
+    Returns:
+        Video capture object to capture frames, video writer object to write processed
+        frames to file, plus total frame count of video source to iterate through.
+    """
+    if not os.path.exists(video_path):
+        raise FileNotFoundError(f'Video file not found for: {video_path}')
+    video = cv2.VideoCapture(video_path)
+    if not video.isOpened:
+        raise RuntimeError(f'Failed to open video capture from file: {video_path}')
+
+    video_writer = create_video_writer(video, video_path, output_path)
+    iter_frame_count = range(int(video.get(cv2.CAP_PROP_FRAME_COUNT)))
+    return video, video_writer, iter_frame_count
+
+
+def init_video_stream_capture(video_source: int):
+    """
+    Creates a video capture object from a device.
+
+    Args:
+        video_source: Device index used to read video stream.
+
+    Returns:
+        Video capture object used to capture frames from a video stream.
+    """
+    video = cv2.VideoCapture(video_source)
+    if not video.isOpened:
+        raise RuntimeError(f'Failed to open video capture for device with index: {video_source}')
+    print('Processing video stream. Press \'Esc\' key to exit the demo.')
+    return video
+
+
+def draw_bounding_boxes(frame: np.ndarray, detections: list, resize_factor, labels: dict):
+    """
+    Draws bounding boxes around detected objects and adds a label and confidence score.
+
+    Args:
+        frame: The original captured frame from video source.
+        detections: A list of detected objects in the form [class, [box positions], confidence].
+        resize_factor: Resizing factor to scale box coordinates to output frame size.
+        labels: Dictionary of labels and colors keyed on the classification index.
+    """
+    for detection in detections:
+        class_idx, box, confidence = [d for d in detection]
+        label, color = labels[class_idx][0].capitalize(), labels[class_idx][1]
+
+        # Obtain frame size and resized bounding box positions
+        frame_height, frame_width = frame.shape[:2]
+        x_min, y_min, x_max, y_max = [int(position * resize_factor) for position in box]
+
+        # Ensure box stays within the frame
+        x_min, y_min = max(0, x_min), max(0, y_min)
+        x_max, y_max = min(frame_width, x_max), min(frame_height, y_max)
+
+        # Draw bounding box around detected object
+        cv2.rectangle(frame, (x_min, y_min), (x_max, y_max), color, 2)
+
+        # Create label for detected object class
+        label = f'{label} {confidence * 100:.1f}%'
+        label_color = (0, 0, 0) if sum(color)>200 else (255, 255, 255)
+
+        # Make sure label always stays on-screen
+        x_text, y_text = cv2.getTextSize(label, cv2.FONT_HERSHEY_DUPLEX, 1, 1)[0][:2]
+
+        lbl_box_xy_min = (x_min, y_min if y_min<25 else y_min - y_text)
+        lbl_box_xy_max = (x_min + int(0.55 * x_text), y_min + y_text if y_min<25 else y_min)
+        lbl_text_pos = (x_min + 5, y_min + 16 if y_min<25 else y_min - 5)
+
+        # Add label and confidence value
+        cv2.rectangle(frame, lbl_box_xy_min, lbl_box_xy_max, color, -1)
+        cv2.putText(frame, label, lbl_text_pos, cv2.FONT_HERSHEY_DUPLEX, 0.50,
+                    label_color, 1, cv2.LINE_AA)
+
+
+def get_source_encoding_int(video_capture):
+    return int(video_capture.get(cv2.CAP_PROP_FOURCC))
diff --git a/python/pyarmnn/examples/common/network_executor.py b/python/pyarmnn/examples/common/network_executor.py
new file mode 100644
index 0000000..6e2c53c
--- /dev/null
+++ b/python/pyarmnn/examples/common/network_executor.py
@@ -0,0 +1,108 @@
+# Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
+# SPDX-License-Identifier: MIT
+
+import os
+from typing import List, Tuple
+
+import pyarmnn as ann
+import numpy as np
+
+
+def create_network(model_file: str, backends: list, input_names: Tuple[str] = (), output_names: Tuple[str] = ()):
+    """
+    Creates a network based on the model file and a list of backends.
+
+    Args:
+        model_file: User-specified model file.
+        backends: List of backends to optimize network.
+        input_names:
+        output_names:
+
+    Returns:
+        net_id: Unique ID of the network to run.
+        runtime: Runtime context for executing inference.
+        input_binding_info: Contains essential information about the model input.
+        output_binding_info: Used to map output tensor and its memory.
+    """
+    if not os.path.exists(model_file):
+        raise FileNotFoundError(f'Model file not found for: {model_file}')
+
+    _, ext = os.path.splitext(model_file)
+    if ext == '.tflite':
+        parser = ann.ITfLiteParser()
+    else:
+        raise ValueError("Supplied model file type is not supported. Supported types are [ tflite ]")
+
+    network = parser.CreateNetworkFromBinaryFile(model_file)
+
+    # Specify backends to optimize network
+    preferred_backends = []
+    for b in backends:
+        preferred_backends.append(ann.BackendId(b))
+
+    # Select appropriate device context and optimize the network for that device
+    options = ann.CreationOptions()
+    runtime = ann.IRuntime(options)
+    opt_network, messages = ann.Optimize(network, preferred_backends, runtime.GetDeviceSpec(),
+                                         ann.OptimizerOptions())
+    print(f'Preferred backends: {backends}\n{runtime.GetDeviceSpec()}\n'
+          f'Optimization warnings: {messages}')
+
+    # Load the optimized network onto the Runtime device
+    net_id, _ = runtime.LoadNetwork(opt_network)
+
+    # Get input and output binding information
+    graph_id = parser.GetSubgraphCount() - 1
+    input_names = parser.GetSubgraphInputTensorNames(graph_id)
+    input_binding_info = parser.GetNetworkInputBindingInfo(graph_id, input_names[0])
+    output_names = parser.GetSubgraphOutputTensorNames(graph_id)
+    output_binding_info = []
+    for output_name in output_names:
+        out_bind_info = parser.GetNetworkOutputBindingInfo(graph_id, output_name)
+        output_binding_info.append(out_bind_info)
+    return net_id, runtime, input_binding_info, output_binding_info
+
+
+def execute_network(input_tensors: list, output_tensors: list, runtime, net_id: int) -> List[np.ndarray]:
+    """
+    Executes inference for the loaded network.
+
+    Args:
+        input_tensors: The input frame tensor.
+        output_tensors: The output tensor from output node.
+        runtime: Runtime context for executing inference.
+        net_id: Unique ID of the network to run.
+
+    Returns:
+        list: Inference results as a list of ndarrays.
+    """
+    runtime.EnqueueWorkload(net_id, input_tensors, output_tensors)
+    output = ann.workload_tensors_to_ndarray(output_tensors)
+    return output
+
+
+class ArmnnNetworkExecutor:
+
+    def __init__(self, model_file: str, backends: list):
+        """
+        Creates an inference executor for a given network and a list of backends.
+
+        Args:
+            model_file: User-specified model file.
+            backends: List of backends to optimize network.
+        """
+        self.network_id, self.runtime, self.input_binding_info, self.output_binding_info = create_network(model_file,
+                                                                                                          backends)
+        self.output_tensors = ann.make_output_tensors(self.output_binding_info)
+
+    def run(self, input_tensors: list) -> List[np.ndarray]:
+        """
+        Executes inference for the loaded network.
+
+        Args:
+            input_tensors: The input frame tensor.
+
+        Returns:
+            list: Inference results as a list of ndarrays.
+        """
+        return execute_network(input_tensors, self.output_tensors, self.runtime, self.network_id)
diff --git a/python/pyarmnn/examples/common/tests/conftest.py b/python/pyarmnn/examples/common/tests/conftest.py
new file mode 100644
index 0000000..5e027a0
--- /dev/null
+++ b/python/pyarmnn/examples/common/tests/conftest.py
@@ -0,0 +1,40 @@
+# Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
+# SPDX-License-Identifier: MIT
+
+import os
+import ntpath
+
+import urllib.request
+import zipfile
+
+import pytest
+
+script_dir = os.path.dirname(__file__)
+@pytest.fixture(scope="session")
+def test_data_folder(request):
+    """
+        This fixture returns path to folder with shared test resources among all tests
+    """
+
+    data_dir = os.path.join(script_dir, "testdata")
+    if not os.path.exists(data_dir):
+        os.mkdir(data_dir)
+
+    files_to_download = ["https://raw.githubusercontent.com/opencv/opencv/4.0.0/samples/data/messi5.jpg",
+                         "https://raw.githubusercontent.com/opencv/opencv/4.0.0/samples/data/basketball1.png",
+                         "https://raw.githubusercontent.com/opencv/opencv/4.0.0/samples/data/Megamind.avi",
+                         "https://storage.googleapis.com/download.tensorflow.org/models/tflite/coco_ssd_mobilenet_v1_1.0_quant_2018_06_29.zip"
+                         ]
+
+    for file in files_to_download:
+        path, filename = ntpath.split(file)
+        file_path = os.path.join(data_dir, filename)
+        if not os.path.exists(file_path):
+            print("\nDownloading test file: " + file_path + "\n")
+            urllib.request.urlretrieve(file, file_path)
+
+    # Any unzipping needed, and moving around of files
+    with zipfile.ZipFile(os.path.join(data_dir, "coco_ssd_mobilenet_v1_1.0_quant_2018_06_29.zip"), 'r') as zip_ref:
+        zip_ref.extractall(data_dir)
+
+    return data_dir
diff --git a/python/pyarmnn/examples/common/tests/context.py b/python/pyarmnn/examples/common/tests/context.py
new file mode 100644
index 0000000..72246c0
--- /dev/null
+++ b/python/pyarmnn/examples/common/tests/context.py
@@ -0,0 +1,7 @@
+import os
+import sys
+sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..'))
+
+import cv_utils
+import network_executor
+import utils
diff --git a/python/pyarmnn/examples/common/tests/test_network_executor.py b/python/pyarmnn/examples/common/tests/test_network_executor.py
new file mode 100644
index 0000000..e27b382
--- /dev/null
+++ b/python/pyarmnn/examples/common/tests/test_network_executor.py
@@ -0,0 +1,24 @@
+# Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
+# SPDX-License-Identifier: MIT
+
+import os
+
+import cv2
+
+from context import network_executor
+from context import cv_utils
+
+
+def test_execute_network(test_data_folder):
+    model_path = os.path.join(test_data_folder, "detect.tflite")
+    backends = ["CpuAcc", "CpuRef"]
+
+    executor = network_executor.ArmnnNetworkExecutor(model_path, backends)
+    img = cv2.imread(os.path.join(test_data_folder, "messi5.jpg"))
+    input_tensors = cv_utils.preprocess(img, executor.input_binding_info)
+
+    output_result = executor.run(input_tensors)
+
+    # Ensure it detects a person
+    classes = output_result[1]
+    assert classes[0][0] == 0
diff --git a/python/pyarmnn/examples/common/tests/test_utils.py b/python/pyarmnn/examples/common/tests/test_utils.py
new file mode 100644
index 0000000..28d68ea
--- /dev/null
+++ b/python/pyarmnn/examples/common/tests/test_utils.py
@@ -0,0 +1,19 @@
+# Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
+# SPDX-License-Identifier: MIT
+
+import os
+
+from context import cv_utils
+from context import utils
+
+
+def test_get_source_encoding(test_data_folder):
+    video_file = os.path.join(test_data_folder, "Megamind.avi")
+    video, video_writer, frame_count = cv_utils.init_video_file_capture(video_file, "/tmp")
+    assert cv_utils.get_source_encoding_int(video) == 1145656920
+
+
+def test_read_existing_labels_file(test_data_folder):
+    label_file = os.path.join(test_data_folder, "labelmap.txt")
+    labels_map = utils.dict_labels(label_file)
+    assert labels_map is not None
diff --git a/python/pyarmnn/examples/common/utils.py b/python/pyarmnn/examples/common/utils.py
new file mode 100644
index 0000000..cf09fde
--- /dev/null
+++ b/python/pyarmnn/examples/common/utils.py
@@ -0,0 +1,41 @@
+# Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
+# SPDX-License-Identifier: MIT
+
+"""Contains helper functions that can be used across the example apps."""
+
+import os
+import errno
+from pathlib import Path
+
+import numpy as np
+
+
+def dict_labels(labels_file_path: str, include_rgb=False) -> dict:
+    """Creates a dictionary of labels from the input labels file.
+
+    Args:
+        labels_file: Path to file containing labels to map model outputs.
+        include_rgb: Adds randomly generated RGB values to the values of the
+            dictionary. Used for plotting bounding boxes of different colours.
+
+    Returns:
+        Dictionary with classification indices for keys and labels for values.
+
+    Raises:
+        FileNotFoundError:
+            Provided `labels_file_path` does not exist.
+    """
+    labels_file = Path(labels_file_path)
+    if not labels_file.is_file():
+        raise FileNotFoundError(
+            errno.ENOENT, os.strerror(errno.ENOENT), labels_file_path
+        )
+
+    labels = {}
+    with open(labels_file, "r") as f:
+        for idx, line in enumerate(f, 0):
+            if include_rgb:
+                labels[idx] = line.strip("\n"), tuple(np.random.random(size=3) * 255)
+            else:
+                labels[idx] = line.strip("\n")
+        return labels