blob: f55d1ce510405c8ee8e2f9e2c566221619e4f0b5 [file] [log] [blame]
Tim Hall79d07d22020-04-27 18:20:16 +01001# Copyright (C) 2020 Arm Limited or its affiliates. All rights reserved.
2#
3# SPDX-License-Identifier: Apache-2.0
4#
5# Licensed under the Apache License, Version 2.0 (the License); you may
6# not use this file except in compliance with the License.
7# You may obtain a copy of the License at
8#
9# www.apache.org/licenses/LICENSE-2.0
10#
11# Unless required by applicable law or agreed to in writing, software
12# distributed under the License is distributed on an AS IS BASIS, WITHOUT
13# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14# See the License for the specific language governing permissions and
15# limitations under the License.
16
17
18# Description:
19# Functions used to write to a TensorFlow Lite format file. Supports adding in file identifiers.
20
21import flatbuffers
22
23from .tflite import Tensor
24from .tflite import QuantizationParameters
25from .tflite import Model
26from .tflite import SubGraph
27from .tflite import OperatorCode
28from .tflite import Operator
29from .tflite import Buffer
30from .tflite import Metadata
31
32import numpy as np
33
34from .tflite_mapping import datatype_inv_map, builtin_operator_inv_map, custom_prefix, BuiltinOperator
35from .nn_graph import PassPlacement
36from .tensor import TensorPurpose, MemArea
37from flatbuffers.builder import UOffsetTFlags
38
39tflite_version = 3
40tflite_file_identifier = "TFL" + str(tflite_version)
41
42
43import flatbuffers.number_types as N
44from flatbuffers import encode
45
46
47def FinishWithFileIdentifier(self, rootTable, fid):
48 if fid is None or len(fid) != 4:
49 raise Exception("fid must be 4 chars")
50
51 flags = N.Uint8Flags
52 prepSize = 4
53 self.Prep(self.minalign, prepSize + len(fid))
54 for i in range(3, -1, -1):
55 self.head = self.head - flags.bytewidth
56 encode.Write(flags.packer_type, self.Bytes, self.Head(), ord(fid[i]))
57
58 return self.Finish(rootTable)
59
60
61flatbuffers.Builder.FinishWithFileIdentifier = FinishWithFileIdentifier
62
63
64def make_vector(v):
65 try:
66 len(v)
67 return v
68 except TypeError:
69 return [v]
70
71
72class TFLiteSerialiser:
73 def __init__(self, nng):
74 self.builder = flatbuffers.Builder(0)
75 self.nng = nng
76
77 self.scratch_buf_id = 0 # Always assign scratch to buffer 0
78 self.buffer_offsets_map = {}
79 self.buffers_to_write = [] # have an empty array there
80
81 self.input_tensors = []
82 self.ops_to_ignore = set(("Const", "Placeholder", "SubgraphInput"))
83
84 self.tensors_to_reshape = {}
85
86 self.subgraphs_to_write = [sg for sg in self.nng.subgraphs if sg.placement == PassPlacement.Cpu]
87
88 all_ops = []
89 for sg in self.subgraphs_to_write:
90 for ps in sg.passes:
91 for op in ps.ops:
92 if op.type not in self.ops_to_ignore:
93 all_ops.append(op)
94 if op.type.startswith("Conv2D") or op.type.startswith("DepthwiseConv2d"):
95 self.tensors_to_reshape[op.inputs[1]] = (3, 0, 1, 2)
96 if op.type.startswith("FullyConnected"):
97 self.tensors_to_reshape[op.inputs[1]] = (1, 0)
98
99 self.operator_codes = list(sorted(set(op.type for op in all_ops)))
100 self.operator_code_map = {}
101
102 def write_byte_vector(self, v, alignment=1):
103 builder = self.builder
104 builder.StartVector(1, len(v), alignment)
105 for e in v[::-1]:
106 builder.PrependByte(e)
107 return builder.EndVector(len(v))
108
109 def write_int_vector(self, v):
110 builder = self.builder
111 builder.StartVector(4, len(v), 4)
112 for e in v[::-1]:
113 builder.PrependInt32(e)
114 return builder.EndVector(len(v))
115
116 def write_long_vector(self, v):
117 builder = self.builder
118 builder.StartVector(8, len(v), 8)
119 for e in v[::-1]:
120 builder.PrependInt64(e)
121 return builder.EndVector(len(v))
122
123 def write_float_vector(self, v):
124 builder = self.builder
125 builder.StartVector(4, len(v), 4)
126 for e in v[::-1]:
127 builder.PrependFloat32(e)
128 return builder.EndVector(len(v))
129
130 def write_offset_vector(self, v):
131 builder = self.builder
132 builder.StartVector(4, len(v), 4)
133 for e in v[::-1]:
134 builder.PrependUOffsetTRelative(e)
135 return builder.EndVector(len(v))
136
137 def assign_buffers_to_tensors(self, tensors):
138 buffer_map = {}
139 scratch_tensor = [tens for tens in tensors if tens.purpose == TensorPurpose.Scratch][0]
140 buf_idx = 1
141
142 for tens in tensors:
143 if tens.mem_area == scratch_tensor.mem_area:
144 buffer_map[tens] = self.scratch_buf_id
145 else:
146 buffer_map[tens] = buf_idx
147 buf_idx += 1
148
149 # Initialize buffers_to_write to a length equal to numer of buffers so
150 # they can be appended at the correct index during tensor serialization
151 self.buffers_to_write = [None] * (buf_idx)
152
153 return buffer_map
154
155 def serialise_operator_code(self, idx, code):
156 builder = self.builder
157 custom_code_offset = None
158 if code.startswith(custom_prefix):
159 tf_code, opt_serializer = builtin_operator_inv_map[custom_prefix]
160 custom_code_offset = builder.CreateString(code[len(custom_prefix) :])
161 else:
162 try:
163 tf_code, opt_serializer = builtin_operator_inv_map[code]
164 except KeyError:
165 print(
166 "Warning: Writing operation %s, which does not have a direct TensorFlow Lite mapping, as a custom operation"
167 % (code,)
168 )
169 tf_code, opt_serializer = builtin_operator_inv_map[custom_prefix]
170
171 if tf_code == BuiltinOperator.CUSTOM:
172 assert code == "NpuOp" # Currently only support serialising NPU operators as a custom op
173 custom_code_offset = builder.CreateString("ethos-u")
174
175 self.operator_code_map[code] = (idx, tf_code, opt_serializer)
176
177 OperatorCode.OperatorCodeStart(builder)
178 OperatorCode.OperatorCodeAddBuiltinCode(builder, tf_code)
179 if custom_code_offset is not None:
180 OperatorCode.OperatorCodeAddCustomCode(builder, custom_code_offset)
181
182 return OperatorCode.OperatorCodeEnd(builder)
183
184 def serialise_quantization_parameters(self, quant):
185 builder = self.builder
186
187 min = None
188 max = None
189 scale = None
190 zero_point = None
191 if quant is not None:
192 if quant.min is not None:
193 min = self.write_float_vector(make_vector(quant.min))
194 if quant.max is not None:
195 max = self.write_float_vector(make_vector(quant.max))
196 if quant.scale_f32 is not None:
197 scale = self.write_float_vector(make_vector(quant.scale_f32))
198 if quant.zero_point is not None:
199 zero_point = self.write_long_vector(make_vector(quant.zero_point))
200
201 QuantizationParameters.QuantizationParametersStart(builder)
202 if min is not None:
203 QuantizationParameters.QuantizationParametersAddMin(builder, min)
204 if max is not None:
205 QuantizationParameters.QuantizationParametersAddMax(builder, max)
206 if scale is not None:
207 QuantizationParameters.QuantizationParametersAddScale(builder, scale)
208 if zero_point is not None:
209 QuantizationParameters.QuantizationParametersAddZeroPoint(builder, zero_point)
210 return QuantizationParameters.QuantizationParametersEnd(builder)
211
212 def serialise_tensor(self, tens):
213 builder = self.builder
214 tens_shape = tens.shape
215 values = tens.quant_values
216 if values is None:
217 values = tens.values
218
219 if values is None:
220 values = np.empty(shape=(0), dtype=np.uint8)
221
222 if tens in self.tensors_to_reshape:
223 reorder = self.tensors_to_reshape[tens]
224 tens_shape = [tens_shape[idx] for idx in reorder]
225 values = values.transpose(reorder)
226
227 if tens.purpose == TensorPurpose.Scratch:
228 tens_shape = [0]
229 self.buffers_to_write[self.scratch_buf_id] = values.flatten().view(np.uint8)
230
231 buf_id = self.buffer_map[tens]
232 if buf_id != self.scratch_buf_id:
233 self.buffers_to_write[buf_id] = values.flatten().view(np.uint8)
234
235 shape = self.write_int_vector(tens_shape)
236
237 name = builder.CreateString(tens.name)
238 quant = self.serialise_quantization_parameters(tens.quantization)
239
240 Tensor.TensorStart(builder)
241 Tensor.TensorAddShape(builder, shape)
242 Tensor.TensorAddType(builder, datatype_inv_map[tens.dtype])
243 # All tensors must have a valid backing buffer, even if it is empty.
244 # Empty buffers should be kept unique for TensorFlow Lite Micro
245 Tensor.TensorAddBuffer(builder, buf_id)
246 Tensor.TensorAddName(builder, name)
247 Tensor.TensorAddQuantization(builder, quant)
248
249 res = Tensor.TensorEnd(builder)
250 return res
251
252 def serialise_operator(self, op):
253 builder = self.builder
254
255 inputs_offset = self.write_int_vector([self.tensor_map[tens] for tens in op.inputs])
256 outputs_offset = self.write_int_vector([self.tensor_map[tens] for tens in op.outputs])
257
258 op_idx, tflop, opt_serializer = self.operator_code_map[op.type]
259
260 builtin_opt_offset = None
261 custom_opt_offset = None
262 if opt_serializer is not None:
263 attrs = dict(op.attrs)
264 if "strides" in attrs:
265 attrs["stride_h"] = attrs["strides"][1]
266 attrs["stride_w"] = attrs["strides"][2]
267 if "ksize" in attrs:
268 attrs["filter_height"] = attrs["ksize"][1]
269 attrs["filter_width"] = attrs["ksize"][2]
270 if "dilation" in attrs:
271 attrs["dilation_h_factor"] = attrs["dilation"][1]
272 attrs["dilation_w_factor"] = attrs["dilation"][2]
273 if "channel_multiplier" in attrs:
274 attrs["depth_multiplier"] = attrs["channel_multiplier"]
275
276 builtin_opt_offset, custom_opt_offset = opt_serializer.serialize(builder, attrs)
277
278 mutating_variable_inputs_offset = self.write_byte_vector([])
279 Operator.OperatorStart(builder)
280 Operator.OperatorAddOpcodeIndex(builder, op_idx)
281 Operator.OperatorAddInputs(builder, inputs_offset)
282 Operator.OperatorAddOutputs(builder, outputs_offset)
283
284 if builtin_opt_offset is not None:
285 Operator.OperatorAddBuiltinOptionsType(builder, opt_serializer.builtin_opt_type)
286 Operator.OperatorAddBuiltinOptions(builder, builtin_opt_offset)
287 if custom_opt_offset is not None:
288 Operator.OperatorAddCustomOptions(builder, custom_opt_offset)
289 Operator.OperatorAddCustomOptionsFormat(builder, opt_serializer.custom_opt_format)
290
291 Operator.OperatorAddMutatingVariableInputs(builder, mutating_variable_inputs_offset)
292 return Operator.OperatorEnd(builder)
293
294 def serialise_subgraph(self, sg):
295 builder = self.builder
296 tensor_set = set()
297
298 all_ops = []
299 for ps in sg.passes:
300 for op in ps.ops:
301 if op.type not in self.ops_to_ignore:
302 all_ops.append(op)
303
304 for op in all_ops:
305 for tens in op.inputs + op.outputs:
306 tensor_set.add(tens)
307
308 all_tensors = [tens for nm, idx, tens in sorted((tens.name, idx, tens) for idx, tens in enumerate(tensor_set))]
309
310 self.tensor_map = {tens: idx for idx, tens in enumerate(all_tensors)}
311 self.buffer_map = self.assign_buffers_to_tensors(all_tensors)
312
313 tensors_offset = self.write_offset_vector([self.serialise_tensor(tens) for tens in all_tensors])
314
315 # Add the Scratch Tensor as input to the NPU subgraph to get it allocated by TensorFlow Lite Micro
316 scratch_tensor_idx = [v for k, v in self.tensor_map.items() if k.name.endswith("scratch")]
317
318 # Make sure the input_tensors haven't been modified
319 assert all(inp in sg.original_inputs for inp in sg.input_tensors)
320 inputs_offset = self.write_int_vector(
321 [self.tensor_map[tens] for tens in sg.original_inputs] + scratch_tensor_idx
322 )
323 outputs_offset = self.write_int_vector([self.tensor_map[tens] for tens in sg.output_tensors])
324
325 operators_offset = self.write_offset_vector([self.serialise_operator(op) for op in all_ops])
326
327 SubGraph.SubGraphStart(builder)
328 SubGraph.SubGraphAddTensors(builder, tensors_offset)
329 SubGraph.SubGraphAddInputs(builder, inputs_offset)
330 SubGraph.SubGraphAddOutputs(builder, outputs_offset)
331
332 SubGraph.SubGraphAddOperators(builder, operators_offset)
333
334 return SubGraph.SubGraphEnd(builder)
335
336 def write_aligned_bytes(self, buf):
337 builder = self.builder
338 builder.nested = True
339 data = bytes(buf)
340 length_bytes = UOffsetTFlags.py_type(len(data))
341 builder.Prep(16, length_bytes) # Reserve aligned storage
342 builder.head = UOffsetTFlags.py_type(builder.Head() - length_bytes) # Update FlatBuffer internal pointer
343 builder.Bytes[builder.Head() : builder.Head() + length_bytes] = data # Assign bytes to aligned area
344 return builder.EndVector(length_bytes)
345
346 def serialise_buffer(self, buf):
347 builder = self.builder
348 data = None
349 if buf is not None:
350 data = self.write_aligned_bytes(buf)
351 Buffer.BufferStart(builder)
352 if data is not None:
353 Buffer.BufferAddData(builder, data)
354 return Buffer.BufferEnd(builder)
355
356 def serialise_metadata(self, metadata):
357 builder = self.builder
358 name = builder.CreateString(metadata[0])
359
360 Metadata.MetadataStart(builder)
361 Metadata.MetadataAddName(builder, name)
362 Metadata.MetadataAddBuffer(builder, metadata[1])
363
364 return Metadata.MetadataEnd(builder)
365
366 def serialise_model(self):
367 builder = self.builder
368 operator_code_offset = self.write_offset_vector(
369 [self.serialise_operator_code(idx, code) for idx, code in enumerate(self.operator_codes)]
370 )
371
372 description = builder.CreateString("Vela Optimised")
373
374 subgraph_offset = self.write_offset_vector([self.serialise_subgraph(sg) for sg in self.subgraphs_to_write])
375
376 # Fill the metadata buffer
377 version = np.int32(0)
378 subgraph_idx = np.int32(len(self.subgraphs_to_write)) # Only 1 supported currently
379 nbr_tensors = np.int32(len(self.tensor_map))
380
381 # An offset of -1 indicates that the tensor will be allocated online by Tensorflow Lite Micro
382 offsets = [np.int32(-1)] * nbr_tensors
383
384 # Ensure that the order of the offsets match the order of the tensors
385 for tens, idx in self.tensor_map.items():
386 if tens.mem_area == MemArea.Sram:
387 offsets[idx] = np.int32(tens.address)
388
389 metadata_buffer = np.array([version, subgraph_idx, nbr_tensors] + offsets)
390 self.buffers_to_write.append(metadata_buffer)
391
392 buffers_offset = self.write_offset_vector([self.serialise_buffer(buf) for buf in self.buffers_to_write])
393
394 metadata_list = [("OfflineMemoryAllocation", len(self.buffers_to_write) - 1)]
395 metadata_offset = self.write_offset_vector([self.serialise_metadata(metadata) for metadata in metadata_list])
396
397 Model.ModelStart(builder)
398 Model.ModelAddVersion(builder, tflite_version)
399 Model.ModelAddOperatorCodes(builder, operator_code_offset)
400 Model.ModelAddSubgraphs(builder, subgraph_offset)
401 Model.ModelAddDescription(builder, description)
402 Model.ModelAddBuffers(builder, buffers_offset)
403 Model.ModelAddMetadata(builder, metadata_offset)
404 return Model.ModelEnd(builder)
405
406 def serialise(self):
407
408 model = self.serialise_model()
409
410 self.builder.FinishWithFileIdentifier(model, tflite_file_identifier)
411
412 return self.builder.Output()
413
414 def write(self, filename):
415 with open(self.filename, "wb") as f:
416 f.write(self.serialised_buf)
417
418
419def write_tflite(nng, filename):
420 writer = TFLiteSerialiser(nng)
421 buf = writer.serialise()
422
423 with open(filename, "wb") as f:
424 f.write(buf)