blob: 1235bf4fa6f9299f017a47503fdfdd867dd3b731 [file] [log] [blame]
Jakub Sujak433a5952020-06-17 15:35:03 +01001# Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
2# SPDX-License-Identifier: MIT
3
4"""
5This file contains shared functions used in the object detection scripts for
6preprocessing data, preparing the network and postprocessing.
7"""
8
9import os
10import cv2
11import numpy as np
12import pyarmnn as ann
13
14
15def create_video_writer(video: cv2.VideoCapture, video_path: str, output_path: str):
16 """
17 Creates a video writer object to write processed frames to file.
18
19 Args:
20 video: Video capture object, contains information about data source.
21 video_path: User-specified video file path.
22 output_path: Optional path to save the processed video.
23
24 Returns:
25 Video writer object.
26 """
27 _, ext = os.path.splitext(video_path)
28
29 if output_path is not None:
30 assert os.path.isdir(output_path)
31
32 i, filename = 0, os.path.join(output_path if output_path is not None else str(), f'object_detection_demo{ext}')
33 while os.path.exists(filename):
34 i += 1
35 filename = os.path.join(output_path if output_path is not None else str(), f'object_detection_demo({i}){ext}')
36
37 video_writer = cv2.VideoWriter(filename=filename,
38 fourcc=cv2.VideoWriter_fourcc(*'mp4v'),
39 fps=int(video.get(cv2.CAP_PROP_FPS)),
40 frameSize=(int(video.get(cv2.CAP_PROP_FRAME_WIDTH)),
41 int(video.get(cv2.CAP_PROP_FRAME_HEIGHT))))
42 return video_writer
43
44
45def create_network(model_file: str, backends: list):
46 """
47 Creates a network based on the model file and a list of backends.
48
49 Args:
50 model_file: User-specified model file.
51 backends: List of backends to optimize network.
52
53 Returns:
54 net_id: Unique ID of the network to run.
55 runtime: Runtime context for executing inference.
56 input_binding_info: Contains essential information about the model input.
57 output_binding_info: Used to map output tensor and its memory.
58 """
59 if not os.path.exists(model_file):
60 raise FileNotFoundError(f'Model file not found for: {model_file}')
61
62 # Determine which parser to create based on model file extension
63 parser = None
64 _, ext = os.path.splitext(model_file)
65 if ext == '.tflite':
66 parser = ann.ITfLiteParser()
67 elif ext == '.pb':
68 parser = ann.ITfParser()
69 elif ext == '.onnx':
70 parser = ann.IOnnxParser()
71 assert (parser is not None)
72 network = parser.CreateNetworkFromBinaryFile(model_file)
73
74 # Specify backends to optimize network
75 preferred_backends = []
76 for b in backends:
77 preferred_backends.append(ann.BackendId(b))
78
79 # Select appropriate device context and optimize the network for that device
80 options = ann.CreationOptions()
81 runtime = ann.IRuntime(options)
82 opt_network, messages = ann.Optimize(network, preferred_backends, runtime.GetDeviceSpec(),
83 ann.OptimizerOptions())
84 print(f'Preferred backends: {backends}\n{runtime.GetDeviceSpec()}\n'
85 f'Optimization warnings: {messages}')
86
87 # Load the optimized network onto the Runtime device
88 net_id, _ = runtime.LoadNetwork(opt_network)
89
90 # Get input and output binding information
91 graph_id = parser.GetSubgraphCount() - 1
92 input_names = parser.GetSubgraphInputTensorNames(graph_id)
93 input_binding_info = parser.GetNetworkInputBindingInfo(graph_id, input_names[0])
94 output_names = parser.GetSubgraphOutputTensorNames(graph_id)
95 output_binding_info = []
96 for output_name in output_names:
97 outBindInfo = parser.GetNetworkOutputBindingInfo(graph_id, output_name)
98 output_binding_info.append(outBindInfo)
99 return net_id, runtime, input_binding_info, output_binding_info
100
101
102def dict_labels(labels_file: str):
103 """
104 Creates a labels dictionary from the input labels file.
105
106 Args:
107 labels_file: Default or user-specified file containing the model output labels.
108
109 Returns:
110 A dictionary keyed on the classification index with values corresponding to
111 labels and randomly generated RGB colors.
112 """
113 labels_dict = {}
114 with open(labels_file, 'r') as labels:
115 for index, line in enumerate(labels, 0):
116 labels_dict[index] = line.strip('\n'), tuple(np.random.random(size=3) * 255)
117 return labels_dict
118
119
120def resize_with_aspect_ratio(frame: np.ndarray, input_binding_info: tuple):
121 """
122 Resizes frame while maintaining aspect ratio, padding any empty space.
123
124 Args:
125 frame: Captured frame.
126 input_binding_info: Contains shape of model input layer.
127
128 Returns:
129 Frame resized to the size of model input layer.
130 """
131 aspect_ratio = frame.shape[1] / frame.shape[0]
132 model_height, model_width = list(input_binding_info[1].GetShape())[1:3]
133
134 if aspect_ratio >= 1.0:
135 new_height, new_width = int(model_width / aspect_ratio), model_width
136 b_padding, r_padding = model_height - new_height, 0
137 else:
138 new_height, new_width = model_height, int(model_height * aspect_ratio)
139 b_padding, r_padding = 0, model_width - new_width
140
141 # Resize and pad any empty space
142 frame = cv2.resize(frame, (new_width, new_height), interpolation=cv2.INTER_LINEAR)
143 frame = cv2.copyMakeBorder(frame, top=0, bottom=b_padding, left=0, right=r_padding,
144 borderType=cv2.BORDER_CONSTANT, value=[0, 0, 0])
145 return frame
146
147
148def preprocess(frame: np.ndarray, input_binding_info: tuple):
149 """
150 Takes a frame, resizes, swaps channels and converts data type to match
151 model input layer. The converted frame is wrapped in a const tensor
152 and bound to the input tensor.
153
154 Args:
155 frame: Captured frame from video.
156 input_binding_info: Contains shape and data type of model input layer.
157
158 Returns:
159 Input tensor.
160 """
161 # Swap channels and resize frame to model resolution
162 frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
163 resized_frame = resize_with_aspect_ratio(frame, input_binding_info)
164
165 # Expand dimensions and convert data type to match model input
166 data_type = np.float32 if input_binding_info[1].GetDataType() == ann.DataType_Float32 else np.uint8
167 resized_frame = np.expand_dims(np.asarray(resized_frame, dtype=data_type), axis=0)
168 assert resized_frame.shape == tuple(input_binding_info[1].GetShape())
169
170 input_tensors = ann.make_input_tensors([input_binding_info], [resized_frame])
171 return input_tensors
172
173
174def execute_network(input_tensors: list, output_tensors: list, runtime, net_id: int) -> np.ndarray:
175 """
176 Executes inference for the loaded network.
177
178 Args:
179 input_tensors: The input frame tensor.
180 output_tensors: The output tensor from output node.
181 runtime: Runtime context for executing inference.
182 net_id: Unique ID of the network to run.
183
184 Returns:
185 Inference results as a list of ndarrays.
186 """
187 runtime.EnqueueWorkload(net_id, input_tensors, output_tensors)
188 output = ann.workload_tensors_to_ndarray(output_tensors)
189 return output
190
191
192def draw_bounding_boxes(frame: np.ndarray, detections: list, resize_factor, labels: dict):
193 """
194 Draws bounding boxes around detected objects and adds a label and confidence score.
195
196 Args:
197 frame: The original captured frame from video source.
198 detections: A list of detected objects in the form [class, [box positions], confidence].
199 resize_factor: Resizing factor to scale box coordinates to output frame size.
200 labels: Dictionary of labels and colors keyed on the classification index.
201 """
202 for detection in detections:
203 class_idx, box, confidence = [d for d in detection]
204 label, color = labels[class_idx][0].capitalize(), labels[class_idx][1]
205
206 # Obtain frame size and resized bounding box positions
207 frame_height, frame_width = frame.shape[:2]
208 x_min, y_min, x_max, y_max = [int(position * resize_factor) for position in box]
209
210 # Ensure box stays within the frame
211 x_min, y_min = max(0, x_min), max(0, y_min)
212 x_max, y_max = min(frame_width, x_max), min(frame_height, y_max)
213
214 # Draw bounding box around detected object
215 cv2.rectangle(frame, (x_min, y_min), (x_max, y_max), color, 2)
216
217 # Create label for detected object class
218 label = f'{label} {confidence * 100:.1f}%'
219 label_color = (0, 0, 0) if sum(color)>200 else (255, 255, 255)
220
221 # Make sure label always stays on-screen
222 x_text, y_text = cv2.getTextSize(label, cv2.FONT_HERSHEY_DUPLEX, 1, 1)[0][:2]
223
224 lbl_box_xy_min = (x_min, y_min if y_min<25 else y_min - y_text)
225 lbl_box_xy_max = (x_min + int(0.55 * x_text), y_min + y_text if y_min<25 else y_min)
226 lbl_text_pos = (x_min + 5, y_min + 16 if y_min<25 else y_min - 5)
227
228 # Add label and confidence value
229 cv2.rectangle(frame, lbl_box_xy_min, lbl_box_xy_max, color, -1)
230 cv2.putText(frame, label, lbl_text_pos, cv2.FONT_HERSHEY_DUPLEX, 0.50,
231 label_color, 1, cv2.LINE_AA)