Blame - ethosu/vela/insert_dma.py - ml/ethos-u/ethos-u-vela

blob: b63c1ea1261172d6b146050891650108d70d32c1 [file] [log] [blame]

Tim Hall	79d07d2	2020-04-27 18:20:16 +0100	[diff] [blame^]	1	# Copyright (C) 2020 Arm Limited or its affiliates. All rights reserved.
				2	#
				3	# SPDX-License-Identifier: Apache-2.0
				4	#
				5	# Licensed under the Apache License, Version 2.0 (the License); you may
				6	# not use this file except in compliance with the License.
				7	# You may obtain a copy of the License at
				8	#
				9	# www.apache.org/licenses/LICENSE-2.0
				10	#
				11	# Unless required by applicable law or agreed to in writing, software
				12	# distributed under the License is distributed on an AS IS BASIS, WITHOUT
				13	# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
				14	# See the License for the specific language governing permissions and
				15	# limitations under the License.
				16
				17
				18	# Description:
				19	# Insert DMA operations into the graph for transfering weights.
				20
				21	from .nn_graph import Operation, MemArea, TensorPurpose, NpuBlockType
				22	from . import rewrite_graph
				23
				24
				25	def insert_dma_cmd(op, arch):
				26	if op.type == "DMA":
				27	return op # Already rewritten
				28	for idx, tens in enumerate(op.inputs):
				29
				30	if tens.mem_area in (MemArea.Dram, MemArea.OffChipFlash) and tens.mem_area != arch.fast_storage_mem_area:
				31	if tens.purpose == TensorPurpose.Weights:
				32	only_vector_product_consumers = True
				33	for oper in tens.consumers():
				34	if oper is None or oper.attrs.get("npu_block_type") != NpuBlockType.VectorProduct:
				35	only_vector_product_consumers = False
				36	break
				37
				38	# Tensor products has no need for DMA, tensors are only read once and can be in flash.
				39	# Other operations re-reads tensors, this is better done from SRAM.
				40	if not only_vector_product_consumers:
				41	# Insert a DMA command here, as well as a new tensor situated in SRAM of the same size.
				42	new_tens = tens.clone_into_fast_storage(arch)
				43	dma_cmd = Operation("DMA", tens.ops[0].name + "_dma")
				44	dma_cmd.inputs = [tens]
				45	dma_cmd.outputs = [new_tens]
				46	dma_cmd.attrs["source"] = tens.mem_area
				47	dma_cmd.attrs["destination"] = new_tens.mem_area
				48	dma_cmd.run_on_npu = True
				49	new_tens.ops = [dma_cmd]
				50	op.inputs[idx] = new_tens
				51	return op
				52
				53
				54	def insert_dma_commands(nng, arch, verbose_graph=False):
				55
				56	for idx, sg in enumerate(nng.subgraphs):
				57	nng.subgraphs[idx] = rewrite_graph.rewrite_graph_pre_order(sg, arch, [], [insert_dma_cmd])
				58	if verbose_graph:
				59	nng.print_graph()
				60	return nng