blob: 99df849b619f93f1fc9d749825ba1501cd12db80 [file] [log] [blame]
Tim Hall79d07d22020-04-27 18:20:16 +01001# Copyright (C) 2020 Arm Limited or its affiliates. All rights reserved.
2#
3# SPDX-License-Identifier: Apache-2.0
4#
5# Licensed under the Apache License, Version 2.0 (the License); you may
6# not use this file except in compliance with the License.
7# You may obtain a copy of the License at
8#
9# www.apache.org/licenses/LICENSE-2.0
10#
11# Unless required by applicable law or agreed to in writing, software
12# distributed under the License is distributed on an AS IS BASIS, WITHOUT
13# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14# See the License for the specific language governing permissions and
15# limitations under the License.
Tim Hall79d07d22020-04-27 18:20:16 +010016# Description:
17# Functions used to write to a TensorFlow Lite format file. Supports adding in file identifiers.
Tim Hall79d07d22020-04-27 18:20:16 +010018import flatbuffers
Diego Russoe8a10452020-04-21 17:39:10 +010019import flatbuffers.number_types as N
20import numpy as np
21from flatbuffers import encode
Diego Russoea6111a2020-04-14 18:41:58 +010022from flatbuffers.builder import UOffsetTFlags
23
Diego Russoe8a10452020-04-21 17:39:10 +010024from .nn_graph import PassPlacement
25from .tensor import MemArea
26from .tensor import TensorPurpose
Tim Hall79d07d22020-04-27 18:20:16 +010027from .tflite import Buffer
28from .tflite import Metadata
Diego Russoe8a10452020-04-21 17:39:10 +010029from .tflite import Model
30from .tflite import Operator
31from .tflite import OperatorCode
32from .tflite import QuantizationParameters
33from .tflite import SubGraph
34from .tflite import Tensor
35from .tflite_mapping import builtin_operator_inv_map
36from .tflite_mapping import BuiltinOperator
37from .tflite_mapping import custom_prefix
38from .tflite_mapping import datatype_inv_map
39
40# ugh, the python flatbuffer interface is missing a method to add in file identifier. patching it in here:
Tim Hall79d07d22020-04-27 18:20:16 +010041
42tflite_version = 3
43tflite_file_identifier = "TFL" + str(tflite_version)
44
45
Tim Hall79d07d22020-04-27 18:20:16 +010046def FinishWithFileIdentifier(self, rootTable, fid):
47 if fid is None or len(fid) != 4:
48 raise Exception("fid must be 4 chars")
49
50 flags = N.Uint8Flags
51 prepSize = 4
52 self.Prep(self.minalign, prepSize + len(fid))
53 for i in range(3, -1, -1):
54 self.head = self.head - flags.bytewidth
55 encode.Write(flags.packer_type, self.Bytes, self.Head(), ord(fid[i]))
56
57 return self.Finish(rootTable)
58
59
60flatbuffers.Builder.FinishWithFileIdentifier = FinishWithFileIdentifier
61
62
63def make_vector(v):
64 try:
65 len(v)
66 return v
67 except TypeError:
68 return [v]
69
70
71class TFLiteSerialiser:
72 def __init__(self, nng):
73 self.builder = flatbuffers.Builder(0)
74 self.nng = nng
75
76 self.scratch_buf_id = 0 # Always assign scratch to buffer 0
77 self.buffer_offsets_map = {}
78 self.buffers_to_write = [] # have an empty array there
79
80 self.input_tensors = []
81 self.ops_to_ignore = set(("Const", "Placeholder", "SubgraphInput"))
82
83 self.tensors_to_reshape = {}
84
85 self.subgraphs_to_write = [sg for sg in self.nng.subgraphs if sg.placement == PassPlacement.Cpu]
86
87 all_ops = []
88 for sg in self.subgraphs_to_write:
89 for ps in sg.passes:
90 for op in ps.ops:
91 if op.type not in self.ops_to_ignore:
92 all_ops.append(op)
93 if op.type.startswith("Conv2D") or op.type.startswith("DepthwiseConv2d"):
94 self.tensors_to_reshape[op.inputs[1]] = (3, 0, 1, 2)
95 if op.type.startswith("FullyConnected"):
96 self.tensors_to_reshape[op.inputs[1]] = (1, 0)
97
98 self.operator_codes = list(sorted(set(op.type for op in all_ops)))
99 self.operator_code_map = {}
100
101 def write_byte_vector(self, v, alignment=1):
102 builder = self.builder
103 builder.StartVector(1, len(v), alignment)
104 for e in v[::-1]:
105 builder.PrependByte(e)
106 return builder.EndVector(len(v))
107
108 def write_int_vector(self, v):
109 builder = self.builder
110 builder.StartVector(4, len(v), 4)
111 for e in v[::-1]:
112 builder.PrependInt32(e)
113 return builder.EndVector(len(v))
114
115 def write_long_vector(self, v):
116 builder = self.builder
117 builder.StartVector(8, len(v), 8)
118 for e in v[::-1]:
119 builder.PrependInt64(e)
120 return builder.EndVector(len(v))
121
122 def write_float_vector(self, v):
123 builder = self.builder
124 builder.StartVector(4, len(v), 4)
125 for e in v[::-1]:
126 builder.PrependFloat32(e)
127 return builder.EndVector(len(v))
128
129 def write_offset_vector(self, v):
130 builder = self.builder
131 builder.StartVector(4, len(v), 4)
132 for e in v[::-1]:
133 builder.PrependUOffsetTRelative(e)
134 return builder.EndVector(len(v))
135
136 def assign_buffers_to_tensors(self, tensors):
137 buffer_map = {}
138 scratch_tensor = [tens for tens in tensors if tens.purpose == TensorPurpose.Scratch][0]
139 buf_idx = 1
140
141 for tens in tensors:
142 if tens.mem_area == scratch_tensor.mem_area:
143 buffer_map[tens] = self.scratch_buf_id
144 else:
145 buffer_map[tens] = buf_idx
146 buf_idx += 1
147
148 # Initialize buffers_to_write to a length equal to numer of buffers so
149 # they can be appended at the correct index during tensor serialization
150 self.buffers_to_write = [None] * (buf_idx)
151
152 return buffer_map
153
154 def serialise_operator_code(self, idx, code):
155 builder = self.builder
156 custom_code_offset = None
157 if code.startswith(custom_prefix):
158 tf_code, opt_serializer = builtin_operator_inv_map[custom_prefix]
159 custom_code_offset = builder.CreateString(code[len(custom_prefix) :])
160 else:
161 try:
162 tf_code, opt_serializer = builtin_operator_inv_map[code]
163 except KeyError:
164 print(
Diego Russoea6111a2020-04-14 18:41:58 +0100165 "Warning: Writing operation %s, which does not have a direct TensorFlow Lite mapping,"
166 "as a custom operation" % (code,)
Tim Hall79d07d22020-04-27 18:20:16 +0100167 )
168 tf_code, opt_serializer = builtin_operator_inv_map[custom_prefix]
169
170 if tf_code == BuiltinOperator.CUSTOM:
171 assert code == "NpuOp" # Currently only support serialising NPU operators as a custom op
172 custom_code_offset = builder.CreateString("ethos-u")
173
174 self.operator_code_map[code] = (idx, tf_code, opt_serializer)
175
176 OperatorCode.OperatorCodeStart(builder)
177 OperatorCode.OperatorCodeAddBuiltinCode(builder, tf_code)
178 if custom_code_offset is not None:
179 OperatorCode.OperatorCodeAddCustomCode(builder, custom_code_offset)
180
181 return OperatorCode.OperatorCodeEnd(builder)
182
183 def serialise_quantization_parameters(self, quant):
184 builder = self.builder
185
186 min = None
187 max = None
188 scale = None
189 zero_point = None
190 if quant is not None:
191 if quant.min is not None:
192 min = self.write_float_vector(make_vector(quant.min))
193 if quant.max is not None:
194 max = self.write_float_vector(make_vector(quant.max))
195 if quant.scale_f32 is not None:
196 scale = self.write_float_vector(make_vector(quant.scale_f32))
197 if quant.zero_point is not None:
198 zero_point = self.write_long_vector(make_vector(quant.zero_point))
199
200 QuantizationParameters.QuantizationParametersStart(builder)
201 if min is not None:
202 QuantizationParameters.QuantizationParametersAddMin(builder, min)
203 if max is not None:
204 QuantizationParameters.QuantizationParametersAddMax(builder, max)
205 if scale is not None:
206 QuantizationParameters.QuantizationParametersAddScale(builder, scale)
207 if zero_point is not None:
208 QuantizationParameters.QuantizationParametersAddZeroPoint(builder, zero_point)
209 return QuantizationParameters.QuantizationParametersEnd(builder)
210
211 def serialise_tensor(self, tens):
212 builder = self.builder
213 tens_shape = tens.shape
214 values = tens.quant_values
215 if values is None:
216 values = tens.values
217
218 if values is None:
219 values = np.empty(shape=(0), dtype=np.uint8)
220
221 if tens in self.tensors_to_reshape:
222 reorder = self.tensors_to_reshape[tens]
223 tens_shape = [tens_shape[idx] for idx in reorder]
224 values = values.transpose(reorder)
225
226 if tens.purpose == TensorPurpose.Scratch:
227 tens_shape = [0]
228 self.buffers_to_write[self.scratch_buf_id] = values.flatten().view(np.uint8)
229
230 buf_id = self.buffer_map[tens]
231 if buf_id != self.scratch_buf_id:
232 self.buffers_to_write[buf_id] = values.flatten().view(np.uint8)
233
234 shape = self.write_int_vector(tens_shape)
235
236 name = builder.CreateString(tens.name)
237 quant = self.serialise_quantization_parameters(tens.quantization)
238
239 Tensor.TensorStart(builder)
240 Tensor.TensorAddShape(builder, shape)
241 Tensor.TensorAddType(builder, datatype_inv_map[tens.dtype])
242 # All tensors must have a valid backing buffer, even if it is empty.
243 # Empty buffers should be kept unique for TensorFlow Lite Micro
244 Tensor.TensorAddBuffer(builder, buf_id)
245 Tensor.TensorAddName(builder, name)
246 Tensor.TensorAddQuantization(builder, quant)
247
248 res = Tensor.TensorEnd(builder)
249 return res
250
251 def serialise_operator(self, op):
252 builder = self.builder
253
254 inputs_offset = self.write_int_vector([self.tensor_map[tens] for tens in op.inputs])
255 outputs_offset = self.write_int_vector([self.tensor_map[tens] for tens in op.outputs])
256
257 op_idx, tflop, opt_serializer = self.operator_code_map[op.type]
258
259 builtin_opt_offset = None
260 custom_opt_offset = None
261 if opt_serializer is not None:
262 attrs = dict(op.attrs)
263 if "strides" in attrs:
264 attrs["stride_h"] = attrs["strides"][1]
265 attrs["stride_w"] = attrs["strides"][2]
266 if "ksize" in attrs:
267 attrs["filter_height"] = attrs["ksize"][1]
268 attrs["filter_width"] = attrs["ksize"][2]
269 if "dilation" in attrs:
270 attrs["dilation_h_factor"] = attrs["dilation"][1]
271 attrs["dilation_w_factor"] = attrs["dilation"][2]
272 if "channel_multiplier" in attrs:
273 attrs["depth_multiplier"] = attrs["channel_multiplier"]
274
275 builtin_opt_offset, custom_opt_offset = opt_serializer.serialize(builder, attrs)
276
277 mutating_variable_inputs_offset = self.write_byte_vector([])
278 Operator.OperatorStart(builder)
279 Operator.OperatorAddOpcodeIndex(builder, op_idx)
280 Operator.OperatorAddInputs(builder, inputs_offset)
281 Operator.OperatorAddOutputs(builder, outputs_offset)
282
283 if builtin_opt_offset is not None:
284 Operator.OperatorAddBuiltinOptionsType(builder, opt_serializer.builtin_opt_type)
285 Operator.OperatorAddBuiltinOptions(builder, builtin_opt_offset)
286 if custom_opt_offset is not None:
287 Operator.OperatorAddCustomOptions(builder, custom_opt_offset)
288 Operator.OperatorAddCustomOptionsFormat(builder, opt_serializer.custom_opt_format)
289
290 Operator.OperatorAddMutatingVariableInputs(builder, mutating_variable_inputs_offset)
291 return Operator.OperatorEnd(builder)
292
293 def serialise_subgraph(self, sg):
294 builder = self.builder
295 tensor_set = set()
296
297 all_ops = []
298 for ps in sg.passes:
299 for op in ps.ops:
300 if op.type not in self.ops_to_ignore:
301 all_ops.append(op)
302
303 for op in all_ops:
304 for tens in op.inputs + op.outputs:
305 tensor_set.add(tens)
306
307 all_tensors = [tens for nm, idx, tens in sorted((tens.name, idx, tens) for idx, tens in enumerate(tensor_set))]
308
309 self.tensor_map = {tens: idx for idx, tens in enumerate(all_tensors)}
310 self.buffer_map = self.assign_buffers_to_tensors(all_tensors)
311
312 tensors_offset = self.write_offset_vector([self.serialise_tensor(tens) for tens in all_tensors])
313
314 # Add the Scratch Tensor as input to the NPU subgraph to get it allocated by TensorFlow Lite Micro
315 scratch_tensor_idx = [v for k, v in self.tensor_map.items() if k.name.endswith("scratch")]
316
317 # Make sure the input_tensors haven't been modified
318 assert all(inp in sg.original_inputs for inp in sg.input_tensors)
319 inputs_offset = self.write_int_vector(
320 [self.tensor_map[tens] for tens in sg.original_inputs] + scratch_tensor_idx
321 )
322 outputs_offset = self.write_int_vector([self.tensor_map[tens] for tens in sg.output_tensors])
323
324 operators_offset = self.write_offset_vector([self.serialise_operator(op) for op in all_ops])
325
326 SubGraph.SubGraphStart(builder)
327 SubGraph.SubGraphAddTensors(builder, tensors_offset)
328 SubGraph.SubGraphAddInputs(builder, inputs_offset)
329 SubGraph.SubGraphAddOutputs(builder, outputs_offset)
330
331 SubGraph.SubGraphAddOperators(builder, operators_offset)
332
333 return SubGraph.SubGraphEnd(builder)
334
335 def write_aligned_bytes(self, buf):
336 builder = self.builder
337 builder.nested = True
338 data = bytes(buf)
339 length_bytes = UOffsetTFlags.py_type(len(data))
340 builder.Prep(16, length_bytes) # Reserve aligned storage
341 builder.head = UOffsetTFlags.py_type(builder.Head() - length_bytes) # Update FlatBuffer internal pointer
342 builder.Bytes[builder.Head() : builder.Head() + length_bytes] = data # Assign bytes to aligned area
343 return builder.EndVector(length_bytes)
344
345 def serialise_buffer(self, buf):
346 builder = self.builder
347 data = None
348 if buf is not None:
349 data = self.write_aligned_bytes(buf)
350 Buffer.BufferStart(builder)
351 if data is not None:
352 Buffer.BufferAddData(builder, data)
353 return Buffer.BufferEnd(builder)
354
355 def serialise_metadata(self, metadata):
356 builder = self.builder
357 name = builder.CreateString(metadata[0])
358
359 Metadata.MetadataStart(builder)
360 Metadata.MetadataAddName(builder, name)
361 Metadata.MetadataAddBuffer(builder, metadata[1])
362
363 return Metadata.MetadataEnd(builder)
364
365 def serialise_model(self):
366 builder = self.builder
367 operator_code_offset = self.write_offset_vector(
368 [self.serialise_operator_code(idx, code) for idx, code in enumerate(self.operator_codes)]
369 )
370
371 description = builder.CreateString("Vela Optimised")
372
373 subgraph_offset = self.write_offset_vector([self.serialise_subgraph(sg) for sg in self.subgraphs_to_write])
374
375 # Fill the metadata buffer
376 version = np.int32(0)
377 subgraph_idx = np.int32(len(self.subgraphs_to_write)) # Only 1 supported currently
378 nbr_tensors = np.int32(len(self.tensor_map))
379
380 # An offset of -1 indicates that the tensor will be allocated online by Tensorflow Lite Micro
381 offsets = [np.int32(-1)] * nbr_tensors
382
383 # Ensure that the order of the offsets match the order of the tensors
384 for tens, idx in self.tensor_map.items():
385 if tens.mem_area == MemArea.Sram:
386 offsets[idx] = np.int32(tens.address)
387
388 metadata_buffer = np.array([version, subgraph_idx, nbr_tensors] + offsets)
389 self.buffers_to_write.append(metadata_buffer)
390
391 buffers_offset = self.write_offset_vector([self.serialise_buffer(buf) for buf in self.buffers_to_write])
392
393 metadata_list = [("OfflineMemoryAllocation", len(self.buffers_to_write) - 1)]
394 metadata_offset = self.write_offset_vector([self.serialise_metadata(metadata) for metadata in metadata_list])
395
396 Model.ModelStart(builder)
397 Model.ModelAddVersion(builder, tflite_version)
398 Model.ModelAddOperatorCodes(builder, operator_code_offset)
399 Model.ModelAddSubgraphs(builder, subgraph_offset)
400 Model.ModelAddDescription(builder, description)
401 Model.ModelAddBuffers(builder, buffers_offset)
402 Model.ModelAddMetadata(builder, metadata_offset)
403 return Model.ModelEnd(builder)
404
405 def serialise(self):
406
407 model = self.serialise_model()
408
409 self.builder.FinishWithFileIdentifier(model, tflite_file_identifier)
410
411 return self.builder.Output()
412
413 def write(self, filename):
414 with open(self.filename, "wb") as f:
415 f.write(self.serialised_buf)
416
417
418def write_tflite(nng, filename):
419 writer = TFLiteSerialiser(nng)
420 buf = writer.serialise()
421
422 with open(filename, "wb") as f:
423 f.write(buf)