MLBEDSW-1971: Verify ifm block size calculation against specification
This commit ensures the IFM block size calculation
in architecture_features.py matches the specification
by correctly setting the ifm upscaling factor based on
the upscaling mode.
This requires adding an attribute to the Tensor object
which stores the upscaling mode for that specific
tensor and making sure that information is correctly
carried over to shared_buffer_allocation.py.
Signed-off-by: Dwight Lidman <dwight.lidman@arm.com>
Change-Id: I4ab56086f4c694d3bf759bbad30cdb969b4a26db
diff --git a/ethosu/vela/architecture_features.py b/ethosu/vela/architecture_features.py
index 1bf9d95..b59122e 100644
--- a/ethosu/vela/architecture_features.py
+++ b/ethosu/vela/architecture_features.py
@@ -22,6 +22,7 @@
import numpy as np
from .errors import OptionError
+from .ethos_u55_regs.ethos_u55_regs import resampling_mode
from .numeric_util import round_up
from .numeric_util import round_up_divide
from .operation import NpuBlockType
@@ -347,10 +348,10 @@
return min(max_block_depth, ifm_depth)
# Calculate the size of the IFM block given a depth, target OFM block and a kernel
- def get_ifm_block_size(
- self, ifm_block_depth, ofm_block: Block, kernel: Kernel, subkernel: Block = Block(8, 8, 65536)
- ):
- upscaling = 1
+ def get_ifm_block_size(self, ifm_block_depth, ofm_block: Block,
+ kernel: Kernel, subkernel: Block = Block(8, 8, 65536),
+ ifm_resampling_mode=resampling_mode.NONE):
+ upscaling = 1 if ifm_resampling_mode == resampling_mode.NONE else 2
# Height
ifm_odd_2x_height_enable = 0
dilated_kernel_height = ((kernel.height - 1) * kernel.dilation.y) + 1
diff --git a/ethosu/vela/shared_buffer_allocation.py b/ethosu/vela/shared_buffer_allocation.py
index 2bfe594..72caa1b 100644
--- a/ethosu/vela/shared_buffer_allocation.py
+++ b/ethosu/vela/shared_buffer_allocation.py
@@ -85,6 +85,7 @@
else:
assert self.ifm_bits == 8, "Unexpected IFM bitdepth"
+ self.ifm_resampling_mode = ifm_tensor.resampling_mode
self.ifm_block_depth = arch.calc_ifm_block_depth(self.ifm_depth, self.ifm_bits)
self.ofm_tensor = ofm_tensor
@@ -105,7 +106,7 @@
def try_block(self, ofm_block: Block):
# Get IFM block configuration
ifm_block_depth = ofm_block.depth if self.is_equal_depth_op else self.ifm_block_depth
- ifm_block = self.arch.get_ifm_block_size(ifm_block_depth, ofm_block, self.kernel)
+ ifm_block = self.arch.get_ifm_block_size(ifm_block_depth, ofm_block, self.kernel, ifm_resampling_mode=self.ifm_resampling_mode)
ifm_config = self.arch.get_block_config(ifm_block.width, ifm_block.height, ifm_block.depth)
if ifm_config is None:
return None
diff --git a/ethosu/vela/tensor.py b/ethosu/vela/tensor.py
index 19258b5..160cf63 100644
--- a/ethosu/vela/tensor.py
+++ b/ethosu/vela/tensor.py
@@ -21,6 +21,7 @@
import numpy as np
from . import numeric_util
+from .ethos_u55_regs.ethos_u55_regs import resampling_mode
from .numeric_util import round_up_divide
from .range_set import MemoryRangeSet
@@ -230,6 +231,7 @@
"cpu_tensor",
"npu_tensor",
"equivalence_id",
+ "resampling_mode",
)
AllocationQuantum = 16
@@ -274,6 +276,7 @@
self.reshaped = False
self.block_traversal = TensorBlockTraversal.Default
+ self.resampling_mode = resampling_mode.NONE
def element_size(self):
if self.element_size_bytes == 0:
@@ -312,6 +315,8 @@
else:
res.quantization = None
+ res.resampling_mode = self.resampling_mode
+
return res
def clone_into_fast_storage(self, arch):
diff --git a/ethosu/vela/tflite_reader.py b/ethosu/vela/tflite_reader.py
index 850690f..4ee3963 100644
--- a/ethosu/vela/tflite_reader.py
+++ b/ethosu/vela/tflite_reader.py
@@ -20,6 +20,7 @@
import numpy as np
from .errors import UnsupportedFeatureError
+from .ethos_u55_regs.ethos_u55_regs import resampling_mode
from .nn_graph import Graph
from .nn_graph import Subgraph
from .operation import Operation
@@ -146,7 +147,8 @@
op.attrs = opt_serializer.deserialize(op_data.BuiltinOptions(), op_data.CustomOptionsAsNumpy())
if op_type.startswith("ResizeBilinear"):
- upscaled_shape = [op.inputs[0].shape[1] * 2, op.inputs[0].shape[2] * 2]
+ input_tensor = op.inputs[0]
+ upscaled_shape = [input_tensor.shape[1] * 2, input_tensor.shape[2] * 2]
out_shape = op.outputs[0].shape[1:3]
if not op.attrs["align_corners"] and out_shape == upscaled_shape:
# this means the output is supposed to be a x2 upscale,
@@ -160,6 +162,8 @@
raise UnsupportedFeatureError("ResizeBilinear: Only 2x upscaling is supported")
op.attrs.update({"filter_width": 2, "filter_height": 2, "stride_w": 1, "stride_h": 1})
+ input_tensor.resampling_mode = resampling_mode.NEAREST
+
if "stride_w" in op.attrs:
op.attrs["strides"] = (1, op.attrs["stride_h"], op.attrs["stride_w"], 1)
if "filter_width" in op.attrs: