MLBEDSW-2688: LeakyRelu rewrite to LUT or MUL/MAX

Replaces LeakyRelu operations with LUT activation function when possible,
else to a combination of multiplication/maximization.

Signed-off-by: Louis Verhaard <louis.verhaard@arm.com>
Change-Id: I3d2eb2dba7145997c3cc711d0ef18ab355fbb416
diff --git a/ethosu/vela/lut.py b/ethosu/vela/lut.py
index 39101fa..0e8dcc9 100644
--- a/ethosu/vela/lut.py
+++ b/ethosu/vela/lut.py
@@ -18,8 +18,11 @@
 import uuid
 from functools import lru_cache
 
+import numpy as np
+
 from . import numeric_util
 from .high_level_command_stream import CommandType
+from .tensor import create_const_tensor
 from .tensor import TensorPurpose
 
 
@@ -85,6 +88,19 @@
     return slot
 
 
+def create_lut_tensor(name, values, dtype):
+    # Creates constant LUT tensor with the given values as lookup table.
+    # The tensor's equivalence_id is based on these values, so if multiple
+    # LUT tensors are created with identical values, they will get the same
+    # address in constant memory, and unnecessary DMA operations can be avoided.
+    sz = len(values)
+    assert sz in (256, 512)
+    ntype = np.uint8 if dtype.size_in_bytes() == 1 else np.uint32
+    tens = create_const_tensor(name, [1, 1, 1, sz], dtype, values, ntype, TensorPurpose.LUT)
+    tens.equivalence_id = create_equivalence_id(tuple(values))
+    return tens
+
+
 def optimize_high_level_cmd_stream(sg, arch):
     # - Allocates SHRAM address/lut index to LUT tensors
     # - Removes unnecessary DMA operations of LUT-s that are already present in SHRAM from sg's command stream