Blame - ethosu/vela/greedy_allocation.py - ml/ethos-u/ethos-u-vela

blob: 6b3d2c1eccd3e20fffc8b701806e62088bf39ef7 [file] [log] [blame]

Tim Hall	79d07d2	2020-04-27 18:20:16 +0100	[diff] [blame^]	1	# Copyright (C) 2020 Arm Limited or its affiliates. All rights reserved.
				2	#
				3	# SPDX-License-Identifier: Apache-2.0
				4	#
				5	# Licensed under the Apache License, Version 2.0 (the License); you may
				6	# not use this file except in compliance with the License.
				7	# You may obtain a copy of the License at
				8	#
				9	# www.apache.org/licenses/LICENSE-2.0
				10	#
				11	# Unless required by applicable law or agreed to in writing, software
				12	# distributed under the License is distributed on an AS IS BASIS, WITHOUT
				13	# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
				14	# See the License for the specific language governing permissions and
				15	# limitations under the License.
				16
				17
				18	# Description:
				19	# Allocate tensor addresses using a greedy algorithm.
				20
				21	from . import numeric_util
				22
				23
				24	class GreedyAllocator:
				25	def __init__(self, nng, arch, live_ranges, mem_area):
				26	self.nng = nng
				27	self.arch = arch
				28	self.mem_area = mem_area
				29
				30	self.live_ranges = live_ranges
				31	self.memory_required = 0
				32
				33	self.current_allocs = []
				34
				35	def alloc(self, new_lr):
				36	size = new_lr.size
				37	current_top = 0
				38	if self.current_allocs:
				39	current_top = max(start_addr + lr.size for start_addr, lr in self.current_allocs)
				40	best_offset = numeric_util.round_up(current_top, new_lr.get_alignment())
				41	best_offset_fit = (1 << 64) - 1
				42
				43	current_offset = 0
				44	for start_addr, lr in self.current_allocs:
				45	aligned_current_offset = numeric_util.round_up(current_offset, new_lr.get_alignment())
				46	if aligned_current_offset + size <= start_addr and start_addr - current_offset < best_offset_fit:
				47	best_offset = current_offset
				48	best_offset_fit = start_addr - current_offset
				49
				50	current_offset = start_addr + lr.size
				51
				52	self.memory_required = max(self.memory_required, best_offset + size)
				53	new_lr.set_address(best_offset)
				54	self.current_allocs.append((best_offset, new_lr))
				55	self.current_allocs = list(sorted(self.current_allocs))
				56
				57	def dealloc(self, lr_to_dealloc):
				58	self.current_allocs = [(start_addr, lr) for start_addr, lr in self.current_allocs if lr != lr_to_dealloc]
				59
				60	def allocate_live_ranges(self, verbose_allocation):
				61	lrs = set()
				62	for lr in self.live_ranges.ranges.values():
				63	lrs.add((lr.start_time, lr.end_time, lr))
				64
				65	lrs = sorted(lrs)
				66
				67	for curr_time, _, new_lr in lrs:
				68	for _, lr in list(self.current_allocs):
				69	if lr.end_time < curr_time:
				70	self.dealloc(lr)
				71
				72	self.alloc(new_lr)
				73
				74	assert self.verify_allocation()
				75	return self.memory_required
				76
				77	def verify_allocation(self):
				78	lrs = list(self.live_ranges.ranges.values())
				79	for n in lrs:
				80	for m in lrs:
				81	if n != m and n.overlaps_ranges(m):
				82	overlap, tens_n, tens_m = n.overlaps_address(m)
				83	if overlap:
				84	print("Solution failed, overlapping buffer!")
				85	print(tens_n.address, tens_n.address + n.size, n.name)
				86	print(tens_m.address, tens_m.address + m.size, m.name)
				87	print()
				88	return False
				89
				90	return True
				91
				92
				93	def allocate_live_ranges(nng, arch, live_ranges, mem_area, verbose_allocation=False):
				94	g = GreedyAllocator(nng, arch, live_ranges, mem_area)
				95	return g.allocate_live_ranges(verbose_allocation)