| # Copyright (C) 2020 Arm Limited or its affiliates. All rights reserved. |
| # |
| # SPDX-License-Identifier: Apache-2.0 |
| # |
| # Licensed under the Apache License, Version 2.0 (the License); you may |
| # not use this file except in compliance with the License. |
| # You may obtain a copy of the License at |
| # |
| # www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, software |
| # distributed under the License is distributed on an AS IS BASIS, WITHOUT |
| # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| # See the License for the specific language governing permissions and |
| # limitations under the License. |
| # Description: |
| # Mark purpose and select formats for Tensors. Also compresses the weights. |
| from . import rewrite_graph |
| from . import weight_compressor |
| from .errors import OperatorError |
| from .operation import CustomType |
| from .operation import Op |
| from .tensor import MemType |
| from .tensor import TensorFormat |
| from .tensor import TensorPurpose |
| |
| |
| def purpose_from_list(lst): |
| def purpose(op, idx): |
| return lst[idx] |
| |
| return purpose |
| |
| |
| def all_fm(op, idx): |
| return TensorPurpose.FeatureMap |
| |
| |
| def all_parameter(op, idx): |
| return TensorPurpose.FeatureMap |
| |
| |
| def input0_from_output_rest_parameter(op, idx): |
| if idx == 0: |
| res = op.outputs[0].purpose |
| if res == TensorPurpose.Unknown: |
| print("Warning: Propagating unknown tensor purpose", op) |
| return res |
| return TensorPurpose.FeatureMap |
| |
| |
| def inputs_from_output(op, idx): |
| res = op.outputs[0].purpose |
| if res == TensorPurpose.Unknown: |
| print("Warning: Propagating unknown tensor purpose", op) |
| return res |
| |
| |
| tensor_purposes = [ # ops, input_purpose |
| ( |
| set( |
| ( |
| Op.Relu, |
| Op.Relu6, |
| Op.Rsqrt, |
| Op.Abs, |
| Op.Cast, |
| Op.Exp, |
| Op.Floor, |
| Op.FloorDiv, |
| Op.FloorMod, |
| Op.SquaredDifference, |
| Op.AddN, |
| Op.Maximum, |
| Op.Minimum, |
| Op.Sigmoid, |
| Op.Tanh, |
| Op.AvgPool, |
| Op.MaxPool, |
| Op.Squeeze, |
| Op.Softmax, |
| Op.LRN, |
| Op.BatchMatMul, |
| Op.ZerosLike, |
| Op.Mul, |
| Op.Add, |
| Op.Sub, |
| Op.Div, |
| Op.LeakyRelu, |
| Op.CLZ, |
| Op.SHL, |
| Op.SHR, |
| Op.ReduceSum, |
| ) |
| ), |
| all_fm, |
| ), |
| ( |
| set((Op.Conv2D, Op.MatMul, Op.Conv2DBias, Op.DepthwiseConv2DBias, Op.FullyConnected,)), |
| purpose_from_list([TensorPurpose.FeatureMap, TensorPurpose.Weights, TensorPurpose.FeatureMap]), |
| ), |
| ( |
| set((Op.Conv2DBackpropInputSwitchedBias,)), |
| purpose_from_list( |
| [TensorPurpose.FeatureMap, TensorPurpose.Weights, TensorPurpose.FeatureMap, TensorPurpose.FeatureMap] |
| ), |
| ), |
| ( |
| set((Op.QuantizedConv2D, Op.QuantizedMatMul)), |
| purpose_from_list( |
| [ |
| TensorPurpose.FeatureMap, |
| TensorPurpose.Weights, |
| TensorPurpose.FeatureMap, |
| TensorPurpose.FeatureMap, |
| TensorPurpose.FeatureMap, |
| TensorPurpose.FeatureMap, |
| ] |
| ), |
| ), |
| ( |
| set( |
| ( |
| Op.Reshape, |
| Op.Min, |
| Op.Max, |
| Op.Mean, |
| Op.Pad, |
| Op.MirrorPad, |
| Op.ArgMax, |
| Op.ArgMin, |
| Op.ExpandDims, |
| Op.ResizeNearestNeighbor, |
| Op.ResizeBilinear, |
| Op.Tile, |
| Op.Transpose, |
| ) |
| ), |
| purpose_from_list([TensorPurpose.FeatureMap, TensorPurpose.FeatureMap]), |
| ), |
| ( |
| set((Op.QuantizedReshape,)), |
| purpose_from_list( |
| [TensorPurpose.FeatureMap, TensorPurpose.FeatureMap, TensorPurpose.FeatureMap, TensorPurpose.FeatureMap] |
| ), |
| ), |
| ( |
| set((Op.Dequantize, Op.Quantize, Op.QuantizedAvgPool, Op.QuantizedMaxPool, Op.Slice, Op.SplitV,)), |
| purpose_from_list([TensorPurpose.FeatureMap, TensorPurpose.FeatureMap, TensorPurpose.FeatureMap]), |
| ), |
| ( |
| set((Op.BatchToSpaceND, Op.SpaceToBatchND, Op.DepthToSpace, Op.SpaceToDepth)), |
| purpose_from_list([TensorPurpose.FeatureMap, TensorPurpose.FeatureMap, TensorPurpose.FeatureMap]), |
| ), |
| ( |
| set((Op.BlockLSTM,)), |
| purpose_from_list( |
| [ |
| TensorPurpose.FeatureMap, |
| TensorPurpose.FeatureMap, |
| TensorPurpose.FeatureMap, |
| TensorPurpose.FeatureMap, |
| TensorPurpose.Weights, |
| TensorPurpose.FeatureMap, |
| TensorPurpose.FeatureMap, |
| TensorPurpose.FeatureMap, |
| TensorPurpose.FeatureMap, |
| ] |
| ), |
| ), |
| (set((Op.SplitSliceRead,)), purpose_from_list([TensorPurpose.FeatureMap, TensorPurpose.FeatureMap])), |
| (set((Op.Shape, Op.ConcatSliceWrite)), purpose_from_list([TensorPurpose.FeatureMap])), |
| ( |
| set((Op.StridedSlice,)), |
| purpose_from_list( |
| [TensorPurpose.FeatureMap, TensorPurpose.FeatureMap, TensorPurpose.FeatureMap, TensorPurpose.FeatureMap] |
| ), |
| ), |
| (set((Op.Fill, Op.Pack, Op.Range)), all_parameter), |
| (set((Op.Placeholder, Op.SubgraphInput, Op.Const,)), purpose_from_list([])), |
| (set((Op.FakeQuantWithMinMaxArgs,)), input0_from_output_rest_parameter), |
| (set((Op.Square, Op.Sqrt, Op.Log, Op.Less, Op.Identity,)), inputs_from_output,), |
| (None, all_fm), |
| ] |
| |
| |
| for ops, input_purpose in tensor_purposes: |
| if ops is None: |
| continue |
| |
| |
| def mark_tensor_purpose(nng, arch, verbose_tensor_purpose=False): |
| def mark_tensor_helper(tens, purpose): |
| if tens.purpose == TensorPurpose.Unknown or tens.purpose == purpose: |
| tens.purpose = purpose |
| elif tens.purpose != TensorPurpose.LUT: |
| assert 0, "Cannot resolve tensor purpose %s and %s for tensor %s" % (tens.purpose, purpose, tens) |
| tens.mem_area = arch.tensor_storage_mem_area[tens.purpose] |
| tens.mem_type = arch.tensor_storage_mem_type[tens.purpose] |
| |
| if len(tens.ops) == 1 and tens.ops[0].type == Op.Const: |
| tens.mem_area = ( |
| arch.permanent_storage_mem_area |
| ) # special case constants, as they must be in permanent storage |
| tens.mem_type = MemType.Permanent_NPU |
| |
| def rewrite_mark_tensor_purpose(op, arch, nng): |
| # find disconnected outputs and mark as parameters |
| for tens in op.outputs: |
| if not tens.consumers(): |
| mark_tensor_helper(tens, TensorPurpose.FeatureMap) |
| |
| for ops, input_purpose in tensor_purposes: |
| if ops is None or op.type in ops: |
| if ops is None: |
| print( |
| "Warning: Don't know how to mark up purpose for", |
| op.type, |
| op.inputs, |
| "triggering all feature map fallback", |
| ) |
| |
| for idx, tens in enumerate(op.inputs): |
| if tens is None: |
| continue |
| purpose = input_purpose(op, idx) if tens.purpose == TensorPurpose.Unknown else tens.purpose |
| mark_tensor_helper(tens, purpose) |
| |
| if op.type == Op.Reshape: |
| # Reshape's input and output point to same data |
| op.outputs[0].mem_area = op.inputs[0].mem_area |
| |
| if op.type == Op.Custom and op.attrs.get("custom_type") == CustomType.ExistingNpuOp: |
| scratch_tensor = None |
| |
| if len(op.inputs) >= 3: |
| scratch_tensor = op.inputs[2] # should be existing scratch tensor |
| if scratch_tensor.name.endswith("_scratch"): |
| scratch_tensor.purpose = TensorPurpose.Scratch |
| |
| if scratch_tensor is None: |
| OperatorError(op, "Scratch tensor not found.") |
| |
| break |
| |
| return op |
| |
| for sg in nng.subgraphs: |
| sg = rewrite_graph.rewrite_graph_pre_order(nng, sg, arch, [], [rewrite_mark_tensor_purpose]) |
| for tens in sg.output_tensors: |
| mark_tensor_helper(tens, TensorPurpose.FeatureMap) |
| |
| if verbose_tensor_purpose: |
| nng.print_graph_with_tensors() |
| |
| return nng |
| |
| |
| def mark_tensor_format(nng, arch, verbose_tensor_format=False): |
| formats_for_tensor = {} |
| |
| def init_tens(tens): |
| if tens.purpose in (TensorPurpose.FeatureMap, TensorPurpose.LUT): |
| fmt = arch.default_feature_map_format |
| elif tens.purpose == TensorPurpose.Weights: |
| fmt = arch.default_weight_format |
| elif tens.purpose == TensorPurpose.Scratch: |
| fmt = arch.default_feature_map_format |
| elif tens.purpose == TensorPurpose.Unknown: |
| fmt = TensorFormat.Unknown |
| else: |
| assert 0, "unknown tensor purpose %s" % (tens.purpose,) |
| return fmt |
| |
| def visit_tens(tens, ps): |
| if tens not in formats_for_tensor: |
| fmt = init_tens(tens) |
| else: |
| fmt = formats_for_tensor[tens] |
| |
| formats_for_tensor[tens] = fmt |
| |
| for sg in nng.subgraphs: |
| for ps in sg.passes: |
| for tens in ps.outputs: |
| visit_tens(tens, ps) |
| for tens in ps.intermediates: |
| visit_tens(tens, ps) |
| for tens in ps.inputs: |
| visit_tens(tens, ps) |
| |
| for tens, fmt in formats_for_tensor.items(): |
| if len(tens.shape) > 4: |
| continue |
| tens.set_format(fmt, arch) |
| if fmt == TensorFormat.WeightsCompressed and tens.values is not None: |
| src_tens = tens.get_dma_src_tensor() |
| if src_tens is not None: |
| op = tens.find_npu_op() |
| if op is not None: |
| weight_compressor.compress_weights( |
| arch, nng, tens, op.type.npu_block_type, 16, 16, op.get_dilation_h_w() |
| ) |
| # Alias compressed weights back into source tensor |
| src_tens.copy_compressed_weight_info(tens) |
| |
| if verbose_tensor_format: |
| nng.print_passes_with_tensors() |