Blame - python/pyarmnn/examples/object_detection/utils.py - ml/armnn

blob: 1235bf4fa6f9299f017a47503fdfdd867dd3b731 [file] [log] [blame]

Jakub Sujak	433a595	2020-06-17 15:35:03 +0100	[diff] [blame]	1	# Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
				2	# SPDX-License-Identifier: MIT
				3
				4	"""
				5	This file contains shared functions used in the object detection scripts for
				6	preprocessing data, preparing the network and postprocessing.
				7	"""
				8
				9	import os
				10	import cv2
				11	import numpy as np
				12	import pyarmnn as ann
				13
				14
				15	def create_video_writer(video: cv2.VideoCapture, video_path: str, output_path: str):
				16	"""
				17	Creates a video writer object to write processed frames to file.
				18
				19	Args:
				20	video: Video capture object, contains information about data source.
				21	video_path: User-specified video file path.
				22	output_path: Optional path to save the processed video.
				23
				24	Returns:
				25	Video writer object.
				26	"""
				27	_, ext = os.path.splitext(video_path)
				28
				29	if output_path is not None:
				30	assert os.path.isdir(output_path)
				31
				32	i, filename = 0, os.path.join(output_path if output_path is not None else str(), f'object_detection_demo{ext}')
				33	while os.path.exists(filename):
				34	i += 1
				35	filename = os.path.join(output_path if output_path is not None else str(), f'object_detection_demo({i}){ext}')
				36
				37	video_writer = cv2.VideoWriter(filename=filename,
				38	fourcc=cv2.VideoWriter_fourcc(*'mp4v'),
				39	fps=int(video.get(cv2.CAP_PROP_FPS)),
				40	frameSize=(int(video.get(cv2.CAP_PROP_FRAME_WIDTH)),
				41	int(video.get(cv2.CAP_PROP_FRAME_HEIGHT))))
				42	return video_writer
				43
				44
				45	def create_network(model_file: str, backends: list):
				46	"""
				47	Creates a network based on the model file and a list of backends.
				48
				49	Args:
				50	model_file: User-specified model file.
				51	backends: List of backends to optimize network.
				52
				53	Returns:
				54	net_id: Unique ID of the network to run.
				55	runtime: Runtime context for executing inference.
				56	input_binding_info: Contains essential information about the model input.
				57	output_binding_info: Used to map output tensor and its memory.
				58	"""
				59	if not os.path.exists(model_file):
				60	raise FileNotFoundError(f'Model file not found for: {model_file}')
				61
				62	# Determine which parser to create based on model file extension
				63	parser = None
				64	_, ext = os.path.splitext(model_file)
				65	if ext == '.tflite':
				66	parser = ann.ITfLiteParser()
				67	elif ext == '.pb':
				68	parser = ann.ITfParser()
				69	elif ext == '.onnx':
				70	parser = ann.IOnnxParser()
				71	assert (parser is not None)
				72	network = parser.CreateNetworkFromBinaryFile(model_file)
				73
				74	# Specify backends to optimize network
				75	preferred_backends = []
				76	for b in backends:
				77	preferred_backends.append(ann.BackendId(b))
				78
				79	# Select appropriate device context and optimize the network for that device
				80	options = ann.CreationOptions()
				81	runtime = ann.IRuntime(options)
				82	opt_network, messages = ann.Optimize(network, preferred_backends, runtime.GetDeviceSpec(),
				83	ann.OptimizerOptions())
				84	print(f'Preferred backends: {backends}\n{runtime.GetDeviceSpec()}\n'
				85	f'Optimization warnings: {messages}')
				86
				87	# Load the optimized network onto the Runtime device
				88	net_id, _ = runtime.LoadNetwork(opt_network)
				89
				90	# Get input and output binding information
				91	graph_id = parser.GetSubgraphCount() - 1
				92	input_names = parser.GetSubgraphInputTensorNames(graph_id)
				93	input_binding_info = parser.GetNetworkInputBindingInfo(graph_id, input_names[0])
				94	output_names = parser.GetSubgraphOutputTensorNames(graph_id)
				95	output_binding_info = []
				96	for output_name in output_names:
				97	outBindInfo = parser.GetNetworkOutputBindingInfo(graph_id, output_name)
				98	output_binding_info.append(outBindInfo)
				99	return net_id, runtime, input_binding_info, output_binding_info
				100
				101
				102	def dict_labels(labels_file: str):
				103	"""
				104	Creates a labels dictionary from the input labels file.
				105
				106	Args:
				107	labels_file: Default or user-specified file containing the model output labels.
				108
				109	Returns:
				110	A dictionary keyed on the classification index with values corresponding to
				111	labels and randomly generated RGB colors.
				112	"""
				113	labels_dict = {}
				114	with open(labels_file, 'r') as labels:
				115	for index, line in enumerate(labels, 0):
				116	labels_dict[index] = line.strip('\n'), tuple(np.random.random(size=3) * 255)
				117	return labels_dict
				118
				119
				120	def resize_with_aspect_ratio(frame: np.ndarray, input_binding_info: tuple):
				121	"""
				122	Resizes frame while maintaining aspect ratio, padding any empty space.
				123
				124	Args:
				125	frame: Captured frame.
				126	input_binding_info: Contains shape of model input layer.
				127
				128	Returns:
				129	Frame resized to the size of model input layer.
				130	"""
				131	aspect_ratio = frame.shape[1] / frame.shape[0]
				132	model_height, model_width = list(input_binding_info[1].GetShape())[1:3]
				133
				134	if aspect_ratio >= 1.0:
				135	new_height, new_width = int(model_width / aspect_ratio), model_width
				136	b_padding, r_padding = model_height - new_height, 0
				137	else:
				138	new_height, new_width = model_height, int(model_height * aspect_ratio)
				139	b_padding, r_padding = 0, model_width - new_width
				140
				141	# Resize and pad any empty space
				142	frame = cv2.resize(frame, (new_width, new_height), interpolation=cv2.INTER_LINEAR)
				143	frame = cv2.copyMakeBorder(frame, top=0, bottom=b_padding, left=0, right=r_padding,
				144	borderType=cv2.BORDER_CONSTANT, value=[0, 0, 0])
				145	return frame
				146
				147
				148	def preprocess(frame: np.ndarray, input_binding_info: tuple):
				149	"""
				150	Takes a frame, resizes, swaps channels and converts data type to match
				151	model input layer. The converted frame is wrapped in a const tensor
				152	and bound to the input tensor.
				153
				154	Args:
				155	frame: Captured frame from video.
				156	input_binding_info: Contains shape and data type of model input layer.
				157
				158	Returns:
				159	Input tensor.
				160	"""
				161	# Swap channels and resize frame to model resolution
				162	frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
				163	resized_frame = resize_with_aspect_ratio(frame, input_binding_info)
				164
				165	# Expand dimensions and convert data type to match model input
				166	data_type = np.float32 if input_binding_info[1].GetDataType() == ann.DataType_Float32 else np.uint8
				167	resized_frame = np.expand_dims(np.asarray(resized_frame, dtype=data_type), axis=0)
				168	assert resized_frame.shape == tuple(input_binding_info[1].GetShape())
				169
				170	input_tensors = ann.make_input_tensors([input_binding_info], [resized_frame])
				171	return input_tensors
				172
				173
				174	def execute_network(input_tensors: list, output_tensors: list, runtime, net_id: int) -> np.ndarray:
				175	"""
				176	Executes inference for the loaded network.
				177
				178	Args:
				179	input_tensors: The input frame tensor.
				180	output_tensors: The output tensor from output node.
				181	runtime: Runtime context for executing inference.
				182	net_id: Unique ID of the network to run.
				183
				184	Returns:
				185	Inference results as a list of ndarrays.
				186	"""
				187	runtime.EnqueueWorkload(net_id, input_tensors, output_tensors)
				188	output = ann.workload_tensors_to_ndarray(output_tensors)
				189	return output
				190
				191
				192	def draw_bounding_boxes(frame: np.ndarray, detections: list, resize_factor, labels: dict):
				193	"""
				194	Draws bounding boxes around detected objects and adds a label and confidence score.
				195
				196	Args:
				197	frame: The original captured frame from video source.
				198	detections: A list of detected objects in the form [class, [box positions], confidence].
				199	resize_factor: Resizing factor to scale box coordinates to output frame size.
				200	labels: Dictionary of labels and colors keyed on the classification index.
				201	"""
				202	for detection in detections:
				203	class_idx, box, confidence = [d for d in detection]
				204	label, color = labels[class_idx][0].capitalize(), labels[class_idx][1]
				205
				206	# Obtain frame size and resized bounding box positions
				207	frame_height, frame_width = frame.shape[:2]
				208	x_min, y_min, x_max, y_max = [int(position * resize_factor) for position in box]
				209
				210	# Ensure box stays within the frame
				211	x_min, y_min = max(0, x_min), max(0, y_min)
				212	x_max, y_max = min(frame_width, x_max), min(frame_height, y_max)
				213
				214	# Draw bounding box around detected object
				215	cv2.rectangle(frame, (x_min, y_min), (x_max, y_max), color, 2)
				216
				217	# Create label for detected object class
				218	label = f'{label} {confidence * 100:.1f}%'
				219	label_color = (0, 0, 0) if sum(color)>200 else (255, 255, 255)
				220
				221	# Make sure label always stays on-screen
				222	x_text, y_text = cv2.getTextSize(label, cv2.FONT_HERSHEY_DUPLEX, 1, 1)[0][:2]
				223
				224	lbl_box_xy_min = (x_min, y_min if y_min<25 else y_min - y_text)
				225	lbl_box_xy_max = (x_min + int(0.55 * x_text), y_min + y_text if y_min<25 else y_min)
				226	lbl_text_pos = (x_min + 5, y_min + 16 if y_min<25 else y_min - 5)
				227
				228	# Add label and confidence value
				229	cv2.rectangle(frame, lbl_box_xy_min, lbl_box_xy_max, color, -1)
				230	cv2.putText(frame, label, lbl_text_pos, cv2.FONT_HERSHEY_DUPLEX, 0.50,
				231	label_color, 1, cv2.LINE_AA)