MLBEDSW-2688: Improved LUT support
- Support for more than one 256-byte LUT in SHRAM
- No DMA is performed for a LUT that is already located in SHRAM
- Added MemArea.Shram, used for LUT, to avoid false address collision
asserts during SRAM tensor allocation
- Added read access to LUT in memory access calculation
Change-Id: If4d1eded5ed029d253f4f5efb2d80495fc3eac99
Signed-off-by: Louis Verhaard <louis.verhaard@arm.com>
diff --git a/ethosu/vela/pass_packing.py b/ethosu/vela/pass_packing.py
index 8e108db..7b69e35 100644
--- a/ethosu/vela/pass_packing.py
+++ b/ethosu/vela/pass_packing.py
@@ -381,12 +381,18 @@
input_set.add(input_tens)
ordered_input_list = []
+ # Keep LUT-s in a separate list and add as inputs at the end
+ # to avoid that they would accidentally be assigned as ifm or ifm2
+ lut_list = []
input_refcounts = collections.defaultdict(int)
for op in ops_list:
for inp in op.inputs:
if inp in input_set:
if input_refcounts[inp] == 0:
- ordered_input_list.append(inp)
+ if inp.purpose == TensorPurpose.LUT:
+ lut_list.append(inp)
+ else:
+ ordered_input_list.append(inp)
input_refcounts[inp] += 1
name = ops_list[0].name
@@ -416,6 +422,7 @@
ps.weight_tensor = ps.get_primary_op_ifm_weights()[1]
ps.scale_tensor = ps.get_primary_op_ifm_weights_biases_ofm()[2]
ps.lut_tensor = ps.get_primary_op_lut()
+ ps.inputs.extend(lut_list)
for op in ps.ops:
op.scheduled_pass = ps