blob: 36508762681fb788a3ba055a8c0a0496d46ce446 [file] [log] [blame]
Tim Hall79d07d22020-04-27 18:20:16 +01001# Copyright (C) 2020 Arm Limited or its affiliates. All rights reserved.
2#
3# SPDX-License-Identifier: Apache-2.0
4#
5# Licensed under the Apache License, Version 2.0 (the License); you may
6# not use this file except in compliance with the License.
7# You may obtain a copy of the License at
8#
9# www.apache.org/licenses/LICENSE-2.0
10#
11# Unless required by applicable law or agreed to in writing, software
12# distributed under the License is distributed on an AS IS BASIS, WITHOUT
13# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14# See the License for the specific language governing permissions and
15# limitations under the License.
Tim Hall79d07d22020-04-27 18:20:16 +010016# Description:
17# Mark purpose and select formats for Tensors. Also compresses the weights.
Tim Hall79d07d22020-04-27 18:20:16 +010018from . import rewrite_graph
19from . import weight_compressor
20from .architecture_features import Block
Tim Hall79d07d22020-04-27 18:20:16 +010021from .operation import NpuBlockType
Diego Russoe8a10452020-04-21 17:39:10 +010022from .tensor import TensorFormat
23from .tensor import TensorPurpose
Tim Hall79d07d22020-04-27 18:20:16 +010024
25
26def purpose_from_list(lst):
27 def purpose(op, idx):
28 return lst[idx]
29
30 return purpose
31
32
33def all_fm(op, idx):
34 return TensorPurpose.FeatureMap
35
36
37def all_parameter(op, idx):
38 return TensorPurpose.FeatureMap
39
40
41def input0_from_output_rest_parameter(op, idx):
42 if idx == 0:
43 res = op.outputs[0].purpose
44 if res == TensorPurpose.Unknown:
45 print("Warning: Propagating unknown tensor purpose", op)
46 return res
47 return TensorPurpose.FeatureMap
48
49
50def inputs_from_output(op, idx):
51 res = op.outputs[0].purpose
52 if res == TensorPurpose.Unknown:
53 print("Warning: Propagating unknown tensor purpose", op)
54 return res
55
Diego Russoea6111a2020-04-14 18:41:58 +010056
Tim Hall79d07d22020-04-27 18:20:16 +010057tensor_purposes = [ # ops, input_purpose
58 (
59 set(
60 (
61 "Relu",
62 "Relu6",
63 "Mul",
64 "Add",
65 "Sub",
66 "Rsqrt",
67 "Abs",
68 "Cast",
69 "Exp",
70 "Floor",
71 "FloorDiv",
72 "FloorMod",
73 "SquaredDifference",
74 "AddN",
75 "BiasAdd",
76 "RealDiv",
77 "Maximum",
78 "Minimum",
79 "Sigmoid",
80 "Tanh",
81 "FusedBatchNorm",
82 "AvgPool",
83 "MaxPool",
84 "Squeeze",
85 "Softmax",
86 "LRN",
87 "Assign",
88 "BatchMatMul",
89 "ZerosLike",
90 "ExtractImagePatches",
91 "MulAct",
92 "AddAct",
93 "SubAct",
94 "DivAct",
95 "AvgPoolAct",
96 "MaxPoolAct",
97 "LeakyRelu",
98 )
99 ),
100 all_fm,
101 ),
102 (
103 set(
104 (
105 "Conv2D",
106 "DepthwiseConv2dNative",
107 "MatMul",
108 "Conv2DBiasAct",
109 "DepthwiseConv2dBiasAct",
110 "FullyConnectedAct",
111 )
112 ),
113 purpose_from_list([TensorPurpose.FeatureMap, TensorPurpose.Weights, TensorPurpose.FeatureMap]),
114 ),
115 (
116 set(("Conv2DBackpropInputSwitched",)),
117 purpose_from_list([TensorPurpose.FeatureMap, TensorPurpose.Weights, TensorPurpose.FeatureMap]),
118 ),
119 (
120 set(("QuantizedConv2D", "QuantizedMatMul")),
121 purpose_from_list(
122 [
123 TensorPurpose.FeatureMap,
124 TensorPurpose.Weights,
125 TensorPurpose.FeatureMap,
126 TensorPurpose.FeatureMap,
127 TensorPurpose.FeatureMap,
128 TensorPurpose.FeatureMap,
129 ]
130 ),
131 ),
132 (
133 set(
134 (
135 "Reshape",
136 "Min",
137 "Max",
138 "Mean",
139 "Pad",
140 "MirrorPad",
141 "ArgMax",
142 "ArgMin",
143 "ExpandDims",
144 "ResizeNearestNeighbor",
145 "ResizeBilinear",
146 "Tile",
147 "Transpose",
148 "Mfcc",
149 )
150 ),
151 purpose_from_list([TensorPurpose.FeatureMap, TensorPurpose.FeatureMap]),
152 ),
153 (
154 set(("QuantizedReshape", "QuantizedResizeBilinear")),
155 purpose_from_list(
156 [TensorPurpose.FeatureMap, TensorPurpose.FeatureMap, TensorPurpose.FeatureMap, TensorPurpose.FeatureMap]
157 ),
158 ),
159 (
160 set(("QuantizedBiasAdd", "QuantizedAdd", "QuantizedMul")),
161 purpose_from_list(
162 [
163 TensorPurpose.FeatureMap,
164 TensorPurpose.FeatureMap,
165 TensorPurpose.FeatureMap,
166 TensorPurpose.FeatureMap,
167 TensorPurpose.FeatureMap,
168 TensorPurpose.FeatureMap,
169 ]
170 ),
171 ),
172 (
173 set(
174 (
175 "Dequantize",
176 "Quantize",
177 "QuantizeV2",
178 "QuantizedRelu",
179 "QuantizedRelu1",
180 "QuantizedRelu6",
181 "QuantizedAvgPool",
182 "QuantizedMaxPool",
183 "Slice",
184 "SplitV",
185 )
186 ),
187 purpose_from_list([TensorPurpose.FeatureMap, TensorPurpose.FeatureMap, TensorPurpose.FeatureMap]),
188 ),
189 (
190 set(("BatchToSpaceND", "SpaceToBatchND", "DepthToSpaceND", "SpaceToDepthND")),
191 purpose_from_list([TensorPurpose.FeatureMap, TensorPurpose.FeatureMap, TensorPurpose.FeatureMap]),
192 ),
193 (
194 set(("BlockLSTM",)),
195 purpose_from_list(
196 [
197 TensorPurpose.FeatureMap,
198 TensorPurpose.FeatureMap,
199 TensorPurpose.FeatureMap,
200 TensorPurpose.FeatureMap,
201 TensorPurpose.Weights,
202 TensorPurpose.FeatureMap,
203 TensorPurpose.FeatureMap,
204 TensorPurpose.FeatureMap,
205 TensorPurpose.FeatureMap,
206 ]
207 ),
208 ),
209 (set(("SplitSliceRead",)), purpose_from_list([TensorPurpose.FeatureMap, TensorPurpose.FeatureMap])),
210 (set(("Shape", "ConcatSliceWrite", "AudioSpectrogram")), purpose_from_list([TensorPurpose.FeatureMap])),
211 (
212 set(("StridedSlice",)),
213 purpose_from_list(
214 [TensorPurpose.FeatureMap, TensorPurpose.FeatureMap, TensorPurpose.FeatureMap, TensorPurpose.FeatureMap]
215 ),
216 ),
217 (set(("Fill", "Pack", "Range")), all_parameter),
218 (
219 set(("Requantize",)),
220 purpose_from_list(
221 [
222 TensorPurpose.FeatureMap,
223 TensorPurpose.FeatureMap,
224 TensorPurpose.FeatureMap,
225 TensorPurpose.FeatureMap,
226 TensorPurpose.FeatureMap,
227 ]
228 ),
229 ),
230 (set(("Placeholder", "SubgraphInput", "Const", "VariableV2")), purpose_from_list([])),
231 (set(("FakeQuantWithMinMaxArgs", "FakeQuantWithMinMaxVars")), input0_from_output_rest_parameter),
232 (
233 set(("Square", "Sqrt", "Log", "Less", "Enter", "Exit", "Identity", "StopGradient", "Merge", "Switch")),
234 inputs_from_output,
235 ),
236 (None, all_fm),
237]
238
239
240for ops, input_purpose in tensor_purposes:
241 if ops is None:
242 continue
243 for op in ops:
244 assert len(op) > 1, "string literal has been decomposed"
245
246
247def mark_tensor_purpose(nng, arch, verbose_tensor_purpose=False):
248 def mark_tensor_helper(tens, purpose):
249
250 if tens.purpose == TensorPurpose.Unknown or tens.purpose == purpose:
251 tens.purpose = purpose
252 else:
253 assert 0, "Cannot resolve tensor purpose %s and %s for tensor %s" % (tens.purpose, purpose, tens)
254 tens.mem_area = arch.tensor_storage_mem_area[tens.purpose]
255
256 if len(tens.ops) == 1 and tens.ops[0].type == "Const":
257 tens.mem_area = (
258 arch.permanent_storage_mem_area
259 ) # special case constants, as they must be in permanent storage
260
261 def rewrite_mark_tensor_purpose(op, arch):
262 # find disconnected outputs and mark as parameters
263 for tens in op.outputs:
264 if not tens.consumers():
265 mark_tensor_helper(tens, TensorPurpose.FeatureMap)
266
267 for ops, input_purpose in tensor_purposes:
268 if ops is None or op.type in ops:
269 if ops is None:
270 print(
271 "warning: don't know how to mark up purpose for",
272 op.type,
273 op.inputs,
274 "triggering all feature map fallback",
275 )
276 for idx, tens in enumerate(op.inputs):
277 purpose = input_purpose(op, idx)
278 mark_tensor_helper(tens, purpose)
279 break
280 return op
281
282 for sg in nng.subgraphs:
283 sg = rewrite_graph.rewrite_graph_pre_order(sg, arch, [], [rewrite_mark_tensor_purpose])
284 for tens in sg.output_tensors:
285 mark_tensor_helper(tens, TensorPurpose.FeatureMap)
286
287 if verbose_tensor_purpose:
288 nng.print_graph_with_tensors()
289
290 return nng
291
292
293reshape_operations = set(
294 (
295 "Reshape",
296 "QuantizedReshape",
297 "ExpandDims",
298 "Squeeze",
299 "BatchToSpaceND",
300 "SpaceToBatchND",
301 "DepthToSpaceND",
302 "SpaceToDepthND",
303 "Placeholder",
304 )
305)
306
307
308def mark_tensor_format(nng, arch, verbose_tensor_format=False):
309 formats_for_tensor = {}
310
311 def init_tens(tens):
312 if tens.purpose == TensorPurpose.FeatureMap:
313 fmt = arch.default_feature_map_format
314 elif tens.purpose == TensorPurpose.Weights:
315 fmt = arch.default_weight_format
316 else:
317 assert 0, "unknown tensor purpose %s" % (tens.purpose,)
318 return fmt
319
320 def find_npu_usage_of_tensor(tens):
321 for op in tens.consumers():
322 if op.type == "DMA":
323 return find_npu_usage_of_tensor(op.outputs[0])
324 if "npu_block_type" in op.attrs:
325 return op.attrs["npu_block_type"]
326 return NpuBlockType.Default
327
328 def visit_tens(tens, ps):
Diego Russoea6111a2020-04-14 18:41:58 +0100329 if tens not in formats_for_tensor:
Tim Hall79d07d22020-04-27 18:20:16 +0100330 fmt = init_tens(tens)
331 else:
332 fmt = formats_for_tensor[tens]
333
334 formats_for_tensor[tens] = fmt
335
336 for sg in nng.subgraphs:
337 for ps in sg.passes:
338 for tens in ps.outputs:
339 visit_tens(tens, ps)
340 for tens in ps.intermediates:
341 visit_tens(tens, ps)
342 for tens in ps.inputs:
343 visit_tens(tens, ps)
344
345 for tens, fmt in formats_for_tensor.items():
346 tens.set_format(fmt, arch)
347 if fmt == TensorFormat.WeightsCompressed and tens.values is not None:
348 npu_block_type = find_npu_usage_of_tensor(tens)
349 if len(tens.ops) == 1 and tens.ops[0].type == "DMA":
350 weight_compressor.compress_weights(tens, arch, npu_block_type, Block(32, 32, 32), 32)
351 # Alias compressed weights back into source tensor
352 src_tens = tens.ops[0].inputs[0]
353 src_tens.compressed_values = tens.compressed_values
354 src_tens.storage_shape = tens.storage_shape
355 src_tens.brick_size = tens.brick_size
356 src_tens.weight_compression_scales = tens.weight_compression_scales
357 src_tens.weight_compressed_offsets = tens.weight_compressed_offsets
358 src_tens.compression_scale_for_worst_weight_stream = tens.compression_scale_for_worst_weight_stream
359 src_tens.storage_compression_scale = tens.storage_compression_scale
360
361 if verbose_tensor_format:
362 nng.print_passes_with_tensors()