MLBEDSW-2688: Improved LUT support

- Support for more than one 256-byte LUT in SHRAM
- No DMA is performed for a LUT that is already located in SHRAM
- Added MemArea.Shram, used for LUT, to avoid false address collision
  asserts during SRAM tensor allocation
- Added read access to LUT in memory access calculation

Change-Id: If4d1eded5ed029d253f4f5efb2d80495fc3eac99
Signed-off-by: Louis Verhaard <louis.verhaard@arm.com>
diff --git a/ethosu/vela/compiler_driver.py b/ethosu/vela/compiler_driver.py
index f407fdc..5e9e38f 100644
--- a/ethosu/vela/compiler_driver.py
+++ b/ethosu/vela/compiler_driver.py
@@ -22,6 +22,7 @@
 from . import high_level_command_stream_generator
 from . import insert_dma
 from . import live_range
+from . import lut
 from . import mark_tensors
 from . import npu_performance
 from . import npu_serialisation
@@ -198,6 +199,7 @@
         high_level_command_stream_generator.generate_high_level_command_stream(
             nng, sg, arch, options.verbose_high_level_command_stream
         )
+        lut.optimize_high_level_cmd_stream(sg, arch)
         register_command_stream_generator.generate_register_command_stream(
             nng, sg, arch, options.verbose_register_command_stream
         )