blob: f78644421f18361fa5140c04ff56981e26b15799 [file] [log] [blame]
Louis Verhaarde8a5a782020-11-02 18:04:27 +01001# Copyright (C) 2020 Arm Limited or its affiliates. All rights reserved.
2#
3# SPDX-License-Identifier: Apache-2.0
4#
5# Licensed under the Apache License, Version 2.0 (the License); you may
6# not use this file except in compliance with the License.
7# You may obtain a copy of the License at
8#
9# www.apache.org/licenses/LICENSE-2.0
10#
11# Unless required by applicable law or agreed to in writing, software
12# distributed under the License is distributed on an AS IS BASIS, WITHOUT
13# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14# See the License for the specific language governing permissions and
15# limitations under the License.
16#
17# Description:
18# Conversion from high level command to NpuOperation
19from enum import IntEnum
20from typing import List
21from typing import Optional
22
23from .api import NpuActivation
24from .api import NpuActivationOp
25from .api import NpuAddressRange
26from .api import NpuBlockOperation
27from .api import NpuBlockTraversal
28from .api import NpuConv2DOperation
29from .api import NpuConvDepthWiseOperation
30from .api import NpuDataType
31from .api import NpuDmaOperation
32from .api import NpuElementWiseOp
33from .api import NpuElementWiseOperation
34from .api import NpuFeatureMap
35from .api import NpuKernel
36from .api import NpuLayout
37from .api import NpuOperation
38from .api import NpuPadding
39from .api import NpuPoolingOp
40from .api import NpuPoolingOperation
41from .api import NpuQuantization
42from .api import NpuResamplingMode
43from .api import NpuRoundingMode
44from .api import NpuShape3D
45from .api import NpuTileBox
46from .architecture_features import ArchitectureFeatures
Louis Verhaard69b31762020-11-17 09:45:20 +010047from .architecture_features import Block
Louis Verhaarde8a5a782020-11-02 18:04:27 +010048from .data_type import DataType
49from .high_level_command_stream import Box
50from .high_level_command_stream import Command
51from .high_level_command_stream import CommandType
52from .high_level_command_stream import DMA
53from .high_level_command_stream import NpuStripe
54from .operation import Kernel
55from .operation import NpuBlockType
56from .operation import Op
57from .operation import Operation
58from .tensor import MemType
59from .tensor import Tensor
60from .tensor import TensorBlockTraversal
61from .tensor import TensorFormat
62from .tensor import TensorPurpose
63
64
65unary_elementwise_ops = set((NpuElementWiseOp.ABS, NpuElementWiseOp.LRELU, NpuElementWiseOp.CLZ,))
66
67
68class BasePointerIndex(IntEnum):
69 WeightTensor = 0 # base address index for the Weight tensor
70 ScratchTensor = 1 # base address index for the Scratch_tensor in the TensorArena
71 ScratchFastTensor = 2 # base address for the Scratch_fast_tensor
72 Mem2Mem = (1 << 8) | (3 << 0) # base address slot for memory 2 memory transfer
73
74
75dtype_map = {
76 DataType.uint8: NpuDataType.UINT8,
77 DataType.int8: NpuDataType.INT8,
78 DataType.uint16: NpuDataType.UINT16,
79 DataType.int16: NpuDataType.INT16,
80 DataType.int32: NpuDataType.INT32,
81}
82
83
84block_traversal_map = {
85 TensorBlockTraversal.DepthFirst: NpuBlockTraversal.DEPTH_FIRST,
86 TensorBlockTraversal.PartKernelFirst: NpuBlockTraversal.PART_KERNEL_FIRST,
87}
88
89
90# Maps an elementwise op type to an elementwise_mode enum value used by NPU_OP_ELEMENTWISE
91elementwise_op_map = {
92 Op.Mul: NpuElementWiseOp.MUL,
93 Op.Add: NpuElementWiseOp.ADD,
94 Op.Sub: NpuElementWiseOp.SUB,
95 Op.Minimum: NpuElementWiseOp.MIN,
96 Op.Maximum: NpuElementWiseOp.MAX,
97 Op.LeakyRelu: NpuElementWiseOp.LRELU,
98 Op.Abs: NpuElementWiseOp.ABS,
99 Op.CLZ: NpuElementWiseOp.CLZ,
100 Op.SHR: NpuElementWiseOp.SHR,
101 Op.SHL: NpuElementWiseOp.SHL,
102}
103
104
105def to_npu_kernel(kernel: Kernel) -> NpuKernel:
106 """Converts the given internally used kernel object to NpuKernel (of public API)"""
107 return NpuKernel(
108 kernel.width, kernel.height, kernel.stride.x, kernel.stride.y, kernel.dilation.x, kernel.dilation.y
109 )
110
111
112def to_kernel(kernel: Optional[NpuKernel]) -> Kernel:
113 """Converts the given public API object to Kernel (used internally)"""
114 if kernel is None:
115 return Kernel(1, 1)
116 return Kernel(kernel.width, kernel.height, kernel.stride_x, kernel.stride_y, kernel.dilation_x, kernel.dilation_y)
117
118
119def ifm_ifm2_correct_order(ifm_shape: List[int], ifm2_shape: List[int]) -> bool:
120 if ifm_shape == []:
121 # Scalar needs to be in IFM2
122 return False
123 if ifm2_shape == []:
124 return True
125
126 for ifm, ifm2 in zip(ifm_shape, ifm2_shape):
127 if ifm != ifm2 and ifm == 1:
128 # Broadcasted FM needs to be in IFM2
129 return False
130 return True
131
132
Patrik Gustavssonb0ca2742020-11-18 07:59:09 +0100133def get_rounding_mode(op: Operation, fused_quantize: bool) -> NpuRoundingMode:
Louis Verhaarde8a5a782020-11-02 18:04:27 +0100134 """Specifies type of rounding to be used"""
135 rounding_mode = NpuRoundingMode.TFL
136 if op.type == Op.ResizeBilinear:
137 rounding_mode = NpuRoundingMode.TRUNCATE
138 elif (
139 op.type.npu_block_type in (NpuBlockType.ConvolutionMxN, NpuBlockType.ConvolutionDepthWise)
140 and op.ifm.dtype == DataType.int16
141 ):
142 rounding_mode = NpuRoundingMode.NATURAL
Patrik Gustavssonb0ca2742020-11-18 07:59:09 +0100143 elif (
144 not fused_quantize
145 and op.type.is_avgpool_op()
146 and op.memory_function == Op.ConcatSliceWrite
147 and op.kernel.elements_wh() == 1
148 ):
Louis Verhaarde8a5a782020-11-02 18:04:27 +0100149 rounding_mode = NpuRoundingMode.NATURAL
150 rounding_mode = op.attrs.get("rounding_mode", rounding_mode)
151 return rounding_mode
152
153
154def create_padding(cmd: NpuStripe, primary_op: Operation) -> NpuPadding:
155 if primary_op.type.npu_block_type == NpuBlockType.VectorProduct:
156 return NpuPadding(top=0, left=0, bottom=0, right=0)
Louis Verhaard69b31762020-11-17 09:45:20 +0100157 top, left, bottom, right = primary_op.attrs["explicit_padding"]
Louis Verhaarde8a5a782020-11-02 18:04:27 +0100158
159 # Check if this is for horizontal ifm streaming
160 if not (cmd.is_first_h_stripe and cmd.is_last_h_stripe):
Louis Verhaard69b31762020-11-17 09:45:20 +0100161 top = cmd.pad_top
162 bottom = cmd.pad_bottom
Louis Verhaarde8a5a782020-11-02 18:04:27 +0100163
164 # Indexing from end since a 1x1 Avgpool might have been added with non 4-dimensional input/output,
165 # because of activation function needed to be fused.
Andreas Nevalainen083f1032020-11-18 10:45:50 +0100166 if len(cmd.ifm_box.start_coord) >= 2 and cmd.ifm_box.start_coord[-2] > 0:
Louis Verhaard69b31762020-11-17 09:45:20 +0100167 left = 0
Andreas Nevalainen083f1032020-11-18 10:45:50 +0100168 if len(cmd.ifm_box.end_coord) >= 2 and cmd.ifm_box.end_coord[-2] < Block.from_shape(cmd.ifm_tensor.shape).width:
Louis Verhaard69b31762020-11-17 09:45:20 +0100169 right = 0
170 return NpuPadding(top=top, left=left, bottom=bottom, right=right)
Louis Verhaarde8a5a782020-11-02 18:04:27 +0100171
172
173def get_region(tens: Tensor, arch: ArchitectureFeatures) -> int:
174 if arch.feature_map_storage_mem_area == arch.fast_storage_mem_area:
175 base_ptr_idx_map = {
176 MemType.Permanent_NPU: BasePointerIndex.WeightTensor,
177 MemType.Permanent_CPU: BasePointerIndex.WeightTensor,
178 MemType.Scratch: BasePointerIndex.ScratchTensor,
179 MemType.Scratch_fast: BasePointerIndex.ScratchTensor,
180 }
181 else:
182 base_ptr_idx_map = {
183 MemType.Permanent_NPU: BasePointerIndex.WeightTensor,
184 MemType.Permanent_CPU: BasePointerIndex.WeightTensor,
185 MemType.Scratch: BasePointerIndex.ScratchTensor,
186 MemType.Scratch_fast: BasePointerIndex.ScratchFastTensor,
187 }
188 return int(base_ptr_idx_map[tens.mem_type])
189
190
191def get_upscale(op: Operation) -> NpuResamplingMode:
192 upscale = NpuResamplingMode.NONE
193 if op.type == Op.ResizeBilinear:
194 # perform nearest neighbor upscale
195 upscale = NpuResamplingMode.NEAREST
196 elif op.type == Op.Conv2DBackpropInputSwitchedBias:
197 # perform insert zero upscale
198 upscale = NpuResamplingMode.TRANSPOSE
199 return upscale
200
201
202def get_ifm_depth(npu_block_type: NpuBlockType, ifm_box: Box, ofm_box: Box) -> int:
203 if npu_block_type in (NpuBlockType.ConvolutionMxN, NpuBlockType.VectorProduct, NpuBlockType.ReduceSum):
Louis Verhaard69b31762020-11-17 09:45:20 +0100204 block = ifm_box.get_block()
Louis Verhaarde8a5a782020-11-02 18:04:27 +0100205 else:
Louis Verhaard69b31762020-11-17 09:45:20 +0100206 block = ofm_box.get_block()
207 return block.depth
Louis Verhaarde8a5a782020-11-02 18:04:27 +0100208
209
210def use_zero_point_0(ps, tens: Tensor, is_ifm_tensor: bool) -> bool:
211 """Checks if quantization should use 0 as zero point"""
212 if tens.dtype == DataType.int32 and is_ifm_tensor:
213 return True
214 if ps.primary_op.type not in (Op.AvgPool, Op.ResizeBilinear, Op.CLZ, Op.SHL):
215 return False
216 fused_quantize = any(op.type == Op.Quantize for op in ps.ops)
217 forced_ofm_quantization = ps.primary_op.forced_output_quantization
218 use_0 = (
219 (ps.primary_op.activation is None or forced_ofm_quantization is not None)
220 and (ps.primary_op.memory_function != Op.ConcatSliceWrite)
221 and not fused_quantize
222 )
223 return use_0
224
225
226def get_ifm_or_ifm2_quantization(ps, tens: Tensor) -> Optional[NpuQuantization]:
227 """Gets quantization for IFM/IFM2"""
228 if tens.quantization is None:
229 return None
230 if use_zero_point_0(ps, tens, True):
231 zero_point = 0
232 else:
233 zero_point = int(tens.quantization.zero_point)
234 return NpuQuantization(scale_f32=tens.quantization.scale_f32, zero_point=zero_point)
235
236
237def get_ofm_quantization(ps, tens: Tensor) -> Optional[NpuQuantization]:
238 """Gets quantization for OFM"""
239 op = ps.primary_op
240 # Check if operation's output quantization is should be used instead of the output tensor's quantization
241 # (used in LUTs)
242 ofm_quant = op.forced_output_quantization if op.forced_output_quantization is not None else tens.quantization
243 if ofm_quant is None:
244 return None
245 if use_zero_point_0(ps, tens, False):
246 zero_point = 0
247 else:
248 zero_point = int(ofm_quant.zero_point)
249 return NpuQuantization(scale_f32=ofm_quant.scale_f32, zero_point=zero_point)
250
251
252def create_feature_map(tens: Tensor, box: Box, arch: ArchitectureFeatures) -> NpuFeatureMap:
253 """Creates feature map with common fields populated"""
254 fm = NpuFeatureMap()
255 fm.region = get_region(tens, arch)
256 fm.data_type = dtype_map[tens.dtype]
257 if tens.format == TensorFormat.NHWC:
258 fm.layout = NpuLayout.NHWC
259 elif tens.format == TensorFormat.NHCWB16:
260 fm.layout = NpuLayout.NHCWB16
261 else:
262 assert 0, "Incorrect tensor format"
263 height_0, height_1, width_0, addresses = tens.addresses_for_rolling_buffer(box.start_coord, box.end_coord)
264 for idx, addr in enumerate(addresses):
265 if addr is None:
266 addresses[idx] = 0
267 fm.tiles = NpuTileBox(
268 height_0=height_0, height_1=height_1, width_0=width_0, addresses=[int(addr) for addr in addresses]
269 )
270 strides = tens.get_strides()
271 fm.strides = NpuShape3D(height=int(strides[2]), width=int(strides[3]), depth=int(strides[1]))
272 return fm
273
274
275def create_weights(weight_tensor: Tensor, weight_box: Box, arch: ArchitectureFeatures) -> List[NpuAddressRange]:
276 """Returns address ranges for weights"""
277 weights = []
278 stream_index = weight_tensor.compressed_stream_index_from_coord(weight_box.start_coord)
279 weight_substream_offsets = weight_tensor.compressed_values_substream_offsets[stream_index]
280 substreams = len(weight_substream_offsets) - 1 # Offset list must terminate with full stream length
281
282 # Extract weight substream offsets and calculate their lengths
283 assert len(weight_substream_offsets) > 1 and (weight_substream_offsets[0] == 0)
284 weight_addr = weight_tensor.address_for_coordinate(weight_box.start_coord)
285 region = get_region(weight_tensor, arch)
286 for core in range(substreams):
287 address = weight_addr + weight_substream_offsets[core]
288 length = weight_substream_offsets[core + 1] - weight_substream_offsets[core]
289 addr_range = NpuAddressRange(region, int(address), int(length))
290 weights.append(addr_range)
291 return weights
292
293
294def create_biases(
295 weight_tensor: Tensor, scale_tensor: Tensor, weight_box: Box, arch: ArchitectureFeatures
296) -> List[NpuAddressRange]:
297 """Returns address ranges for biases"""
298 biases = []
299 stream_index = weight_tensor.compressed_stream_index_from_coord(weight_box.start_coord)
300 scale_substream_offsets = scale_tensor.compressed_values_substream_offsets[stream_index]
301 substreams = len(scale_substream_offsets) - 1 # Offset list must terminate with full stream length
302
303 # Extract scale substream offsets and calculate their lengths
304 assert len(scale_substream_offsets) > 1 and (scale_substream_offsets[0] == 0)
305 scale_addr = scale_tensor.address_for_coordinate(weight_box.start_coord[-1:])
306
307 region = get_region(scale_tensor, arch)
308 for core in range(substreams):
309 address = scale_addr + scale_substream_offsets[core]
310 length = scale_substream_offsets[core + 1] - scale_substream_offsets[core]
311 addr_range = NpuAddressRange(region, int(address), int(length))
312 biases.append(addr_range)
313 return biases
314
315
316def create_npu_activation(op: Operation) -> NpuActivation:
317 """Creates fused activation function"""
318 if op.activation is None:
319 return NpuActivation(NpuActivationOp.NONE_OR_RELU)
320 faf = op.activation.op_type
321 act_op = NpuActivationOp.NONE_OR_RELU
322 if faf == Op.Tanh:
323 act_op = NpuActivationOp.TANH
324 elif faf == Op.Sigmoid:
325 act_op = NpuActivationOp.SIGMOID
326 elif faf == Op.LUT:
327 act_op = NpuActivationOp.TABLE_LOOKUP
328 elif not faf.is_relu_op():
329 raise Exception("Unsupported fused_activation_function = " + faf.name)
330
331 act = NpuActivation(act_op)
332 act.min = op.activation.min
333 act.max = op.activation.max
334 act.lookup_table_index = op.activation.lut_index
335 return act
336
337
338def set_common_op_fields(npu_op: NpuBlockOperation, cmd: NpuStripe, arch: ArchitectureFeatures):
339 """Sets common fields of the given operation"""
340 ps = cmd.ps
341 op = ps.primary_op
Louis Verhaard69b31762020-11-17 09:45:20 +0100342
343 ifm_height = cmd.ifm_box.get_block().height
344 ifm_width = Block.from_shape(cmd.ifm_tensor.shape).width
345 ifm_depth = get_ifm_depth(op.type.npu_block_type, cmd.ifm_box, cmd.ofm_box)
Louis Verhaarde8a5a782020-11-02 18:04:27 +0100346
347 npu_op.ifm = create_feature_map(cmd.ifm_tensor, cmd.ifm_box, arch)
Louis Verhaard69b31762020-11-17 09:45:20 +0100348 npu_op.ifm.shape = NpuShape3D(height=ifm_height, width=ifm_width, depth=ifm_depth)
Louis Verhaarde8a5a782020-11-02 18:04:27 +0100349 npu_op.ifm.quantization = get_ifm_or_ifm2_quantization(ps, cmd.ifm_tensor)
Louis Verhaard69b31762020-11-17 09:45:20 +0100350
351 out_block = cmd.ofm_box.get_block()
Louis Verhaarde8a5a782020-11-02 18:04:27 +0100352 npu_op.ofm = create_feature_map(cmd.ofm_tensor, cmd.ofm_box, arch)
Louis Verhaard69b31762020-11-17 09:45:20 +0100353 npu_op.ofm.shape = NpuShape3D(height=out_block.height, width=out_block.width, depth=out_block.depth)
Louis Verhaarde8a5a782020-11-02 18:04:27 +0100354 npu_op.ofm.quantization = get_ofm_quantization(ps, cmd.ofm_tensor)
355
356 if cmd.weight_tensor is not None:
357 npu_op.weights = create_weights(cmd.weight_tensor, cmd.weight_box, arch)
358 if cmd.scale_tensor is not None:
359 npu_op.biases = create_biases(cmd.weight_tensor, cmd.scale_tensor, cmd.weight_box, arch)
360 npu_op.activation = create_npu_activation(op)
Patrik Gustavssonb0ca2742020-11-18 07:59:09 +0100361 npu_op.fused_quantize = any(op.type == Op.Quantize for op in ps.ops)
362 npu_op.rounding_mode = get_rounding_mode(op, npu_op.fused_quantize)
Louis Verhaarde8a5a782020-11-02 18:04:27 +0100363 npu_op.block_config = NpuShape3D(height=ps.block_config[0], width=ps.block_config[1], depth=ps.block_config[3])
364
365 if not op.type.is_elementwise_op():
366 npu_op.padding = create_padding(cmd, op)
367 npu_op.kernel = to_npu_kernel(op.kernel)
368 npu_op.ifm_upscale = get_upscale(op)
Louis Verhaarde8a5a782020-11-02 18:04:27 +0100369 return npu_op
370
371
372def create_npu_conv2d_op(cmd: NpuStripe, arch: ArchitectureFeatures) -> NpuConv2DOperation:
373 """Converts the command to NpuConv2DOperation"""
374 npu_op = NpuConv2DOperation()
375 set_common_op_fields(npu_op, cmd, arch)
376 if cmd.ps.primary_op.type.npu_block_type == NpuBlockType.VectorProduct:
377 npu_op.block_traversal = NpuBlockTraversal.DEPTH_FIRST
378 else:
379 npu_op.block_traversal = block_traversal_map[cmd.weight_tensor.block_traversal]
380 return npu_op
381
382
383def create_npu_conv_depthwise_op(cmd: NpuStripe, arch: ArchitectureFeatures) -> NpuConvDepthWiseOperation:
384 """Converts the command to NpuConvDepthWiseOperation"""
385 npu_op = NpuConvDepthWiseOperation()
386 set_common_op_fields(npu_op, cmd, arch)
387 return npu_op
388
389
390def create_npu_pool_op(cmd: NpuStripe, arch: ArchitectureFeatures) -> NpuPoolingOperation:
391 """Converts the command to NpuPoolingOperation"""
392 ps = cmd.ps
393 op = ps.primary_op
394 pool_op = NpuPoolingOp.AVERAGE
395 if op.type.is_maxpool_op():
396 pool_op = NpuPoolingOp.MAX
397 elif op.type.is_avgpool_op() or op.type == Op.ResizeBilinear:
398 pool_op = NpuPoolingOp.AVERAGE
399 elif op.type == Op.ReduceSum:
400 pool_op = NpuPoolingOp.REDUCE_SUM
401 else:
402 assert 0, f"Unknown pool type {op.type}"
403 npu_op = NpuPoolingOperation(pool_op)
404 set_common_op_fields(npu_op, cmd, arch)
405 # Pooling specific info
406 if op.type == Op.ResizeBilinear and "rescale" in op.attrs:
407 npu_op.rescale = op.attrs["rescale"]
408 return npu_op
409
410
411def create_npu_elementwise_op(cmd: NpuStripe, arch: ArchitectureFeatures) -> NpuElementWiseOperation:
412 """Converts the command to NpuElementWiseOperation"""
413 ps = cmd.ps
414 op = ps.primary_op
415 assert op.type in elementwise_op_map, f"Unknown elementwise type {op.type}"
416 elemwise_op = elementwise_op_map[op.type]
417 npu_op = NpuElementWiseOperation(elemwise_op)
418 if elemwise_op not in unary_elementwise_ops:
419 if not ifm_ifm2_correct_order(cmd.ifm_tensor.shape, cmd.ifm2_tensor.shape):
420 # The scalar/broadcasted feature map has to be the ifm2 tensor so switch the ifms
421 cmd.ifm_tensor, cmd.ifm2_tensor = cmd.ifm2_tensor, cmd.ifm_tensor
422 cmd.ifm_box, cmd.ifm2_box = cmd.ifm2_box, cmd.ifm_box
423 npu_op.reversed_operands = True
424 npu_op.ifm2 = create_feature_map(cmd.ifm2_tensor, cmd.ifm2_box, arch)
425 npu_op.ifm2.quantization = get_ifm_or_ifm2_quantization(ps, cmd.ifm2_tensor)
426 if cmd.ifm2_tensor.shape == []:
427 # scalar
428 assert cmd.ifm2_tensor.quant_values.size == 1
429 npu_op.ifm2_scalar = cmd.ifm2_tensor.values.item(0)
430 npu_op.ifm2.shape = NpuShape3D(height=0, width=0, depth=0)
431 else:
Louis Verhaard69b31762020-11-17 09:45:20 +0100432 ifm2_blk = cmd.ifm2_box.get_block()
433 ifm2_width = Block.from_shape(cmd.ifm2_tensor.shape).width
434 npu_op.ifm2.shape = NpuShape3D(height=ifm2_blk.height, width=ifm2_width, depth=ifm2_blk.depth)
Louis Verhaarde8a5a782020-11-02 18:04:27 +0100435 set_common_op_fields(npu_op, cmd, arch)
436 # Check if output scale needs to be overridden
437 output_scale = None
438 if op.type == Op.Add and "resizebilinear" in op.attrs:
439 # Force output scale same as the input scale for
440 # resizebilinear 1x1 that is converted to add
441 output_scale = npu_op.ifm2.quantization.scale_f32
442 if op.type == Op.LeakyRelu:
443 output_scale = op.attrs["alpha"]
444 if op.type in (Op.Add, Op.Sub) and "rescale" in op.attrs:
445 npu_op.rescale = op.attrs.get("rescale")
446 if op.type in (Op.Add, Op.Mul, Op.Sub):
447 if op.activation is not None and op.activation.op_type in (Op.Sigmoid, Op.Tanh):
448 output_scale = 1 / 0x3000
449 if output_scale is not None:
450 npu_op.ofm.quantization = NpuQuantization(scale_f32=output_scale, zero_point=npu_op.ofm.quantization.zero_point)
451 return npu_op
452
453
454def create_dma_op(cmd: DMA, arch: ArchitectureFeatures) -> NpuDmaOperation:
455 """Converts the command to NpuDmaOperation"""
456 src_region = get_region(cmd.in_tensor, arch)
457 if cmd.out_tensor.purpose == TensorPurpose.LUT:
458 dest_region = BasePointerIndex.Mem2Mem
459 else:
460 dest_region = get_region(cmd.out_tensor, arch)
461
462 start_coord = cmd.box.start_coord
463 src_addr = cmd.in_tensor.address_for_coordinate(start_coord)
464 dest_addr = cmd.out_tensor.address_for_coordinate(start_coord)
465
466 if cmd.in_tensor.compressed_values is not None:
467 if cmd.out_tensor.purpose == TensorPurpose.FSBias:
468 sz = cmd.in_tensor.storage_size()
469 else:
470 stream_index = cmd.in_tensor.compressed_stream_index_from_coord(start_coord)
471 sz = cmd.in_tensor.size_of_compressed_stream(stream_index)
472 else:
473 sz = cmd.in_tensor.address_for_coordinate(cmd.box.end_coord, is_top_box=True) - src_addr
474 src = NpuAddressRange(src_region, int(src_addr), int(sz))
475 dest = NpuAddressRange(dest_region, int(dest_addr), int(sz))
476 return NpuDmaOperation(src, dest)
477
478
479def convert_command_to_npu_op(cmd: Command, arch: ArchitectureFeatures) -> NpuOperation:
480 """Converts the high level command to NpuOperation"""
481 if cmd.cmdtype == CommandType.DMA:
482 npu_op = create_dma_op(cmd, arch)
483 elif cmd.cmdtype == CommandType.NpuStripe:
484 npu_block_type = cmd.ps.primary_op.type.npu_block_type
485 if npu_block_type in (NpuBlockType.ConvolutionMxN, NpuBlockType.VectorProduct):
486 npu_op = create_npu_conv2d_op(cmd, arch)
487 elif npu_block_type == NpuBlockType.ConvolutionDepthWise:
488 npu_op = create_npu_conv_depthwise_op(cmd, arch)
489 elif npu_block_type in (NpuBlockType.Pooling, NpuBlockType.ReduceSum):
490 npu_op = create_npu_pool_op(cmd, arch)
491 elif npu_block_type == NpuBlockType.ElementWise:
492 npu_op = create_npu_elementwise_op(cmd, arch)
493 else:
494 assert 0, f"Unknown command type {npu_block_type}"
495 # add a link to the high level command for debugging purposes
496 npu_op.cmd = cmd
497 return npu_op