Tim Hall | 79d07d2 | 2020-04-27 18:20:16 +0100 | [diff] [blame^] | 1 | # Copyright (C) 2020 Arm Limited or its affiliates. All rights reserved. |
| 2 | # |
| 3 | # SPDX-License-Identifier: Apache-2.0 |
| 4 | # |
| 5 | # Licensed under the Apache License, Version 2.0 (the License); you may |
| 6 | # not use this file except in compliance with the License. |
| 7 | # You may obtain a copy of the License at |
| 8 | # |
| 9 | # www.apache.org/licenses/LICENSE-2.0 |
| 10 | # |
| 11 | # Unless required by applicable law or agreed to in writing, software |
| 12 | # distributed under the License is distributed on an AS IS BASIS, WITHOUT |
| 13 | # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 14 | # See the License for the specific language governing permissions and |
| 15 | # limitations under the License. |
| 16 | |
| 17 | |
| 18 | # Description: |
| 19 | # Functions used to write to a TensorFlow Lite format file. Supports adding in file identifiers. |
| 20 | |
| 21 | import flatbuffers |
| 22 | |
| 23 | from .tflite import Tensor |
| 24 | from .tflite import QuantizationParameters |
| 25 | from .tflite import Model |
| 26 | from .tflite import SubGraph |
| 27 | from .tflite import OperatorCode |
| 28 | from .tflite import Operator |
| 29 | from .tflite import Buffer |
| 30 | from .tflite import Metadata |
| 31 | |
| 32 | import numpy as np |
| 33 | |
| 34 | from .tflite_mapping import datatype_inv_map, builtin_operator_inv_map, custom_prefix, BuiltinOperator |
| 35 | from .nn_graph import PassPlacement |
| 36 | from .tensor import TensorPurpose, MemArea |
| 37 | from flatbuffers.builder import UOffsetTFlags |
| 38 | |
| 39 | tflite_version = 3 |
| 40 | tflite_file_identifier = "TFL" + str(tflite_version) |
| 41 | |
| 42 | |
| 43 | import flatbuffers.number_types as N |
| 44 | from flatbuffers import encode |
| 45 | |
| 46 | |
| 47 | def FinishWithFileIdentifier(self, rootTable, fid): |
| 48 | if fid is None or len(fid) != 4: |
| 49 | raise Exception("fid must be 4 chars") |
| 50 | |
| 51 | flags = N.Uint8Flags |
| 52 | prepSize = 4 |
| 53 | self.Prep(self.minalign, prepSize + len(fid)) |
| 54 | for i in range(3, -1, -1): |
| 55 | self.head = self.head - flags.bytewidth |
| 56 | encode.Write(flags.packer_type, self.Bytes, self.Head(), ord(fid[i])) |
| 57 | |
| 58 | return self.Finish(rootTable) |
| 59 | |
| 60 | |
| 61 | flatbuffers.Builder.FinishWithFileIdentifier = FinishWithFileIdentifier |
| 62 | |
| 63 | |
| 64 | def make_vector(v): |
| 65 | try: |
| 66 | len(v) |
| 67 | return v |
| 68 | except TypeError: |
| 69 | return [v] |
| 70 | |
| 71 | |
| 72 | class TFLiteSerialiser: |
| 73 | def __init__(self, nng): |
| 74 | self.builder = flatbuffers.Builder(0) |
| 75 | self.nng = nng |
| 76 | |
| 77 | self.scratch_buf_id = 0 # Always assign scratch to buffer 0 |
| 78 | self.buffer_offsets_map = {} |
| 79 | self.buffers_to_write = [] # have an empty array there |
| 80 | |
| 81 | self.input_tensors = [] |
| 82 | self.ops_to_ignore = set(("Const", "Placeholder", "SubgraphInput")) |
| 83 | |
| 84 | self.tensors_to_reshape = {} |
| 85 | |
| 86 | self.subgraphs_to_write = [sg for sg in self.nng.subgraphs if sg.placement == PassPlacement.Cpu] |
| 87 | |
| 88 | all_ops = [] |
| 89 | for sg in self.subgraphs_to_write: |
| 90 | for ps in sg.passes: |
| 91 | for op in ps.ops: |
| 92 | if op.type not in self.ops_to_ignore: |
| 93 | all_ops.append(op) |
| 94 | if op.type.startswith("Conv2D") or op.type.startswith("DepthwiseConv2d"): |
| 95 | self.tensors_to_reshape[op.inputs[1]] = (3, 0, 1, 2) |
| 96 | if op.type.startswith("FullyConnected"): |
| 97 | self.tensors_to_reshape[op.inputs[1]] = (1, 0) |
| 98 | |
| 99 | self.operator_codes = list(sorted(set(op.type for op in all_ops))) |
| 100 | self.operator_code_map = {} |
| 101 | |
| 102 | def write_byte_vector(self, v, alignment=1): |
| 103 | builder = self.builder |
| 104 | builder.StartVector(1, len(v), alignment) |
| 105 | for e in v[::-1]: |
| 106 | builder.PrependByte(e) |
| 107 | return builder.EndVector(len(v)) |
| 108 | |
| 109 | def write_int_vector(self, v): |
| 110 | builder = self.builder |
| 111 | builder.StartVector(4, len(v), 4) |
| 112 | for e in v[::-1]: |
| 113 | builder.PrependInt32(e) |
| 114 | return builder.EndVector(len(v)) |
| 115 | |
| 116 | def write_long_vector(self, v): |
| 117 | builder = self.builder |
| 118 | builder.StartVector(8, len(v), 8) |
| 119 | for e in v[::-1]: |
| 120 | builder.PrependInt64(e) |
| 121 | return builder.EndVector(len(v)) |
| 122 | |
| 123 | def write_float_vector(self, v): |
| 124 | builder = self.builder |
| 125 | builder.StartVector(4, len(v), 4) |
| 126 | for e in v[::-1]: |
| 127 | builder.PrependFloat32(e) |
| 128 | return builder.EndVector(len(v)) |
| 129 | |
| 130 | def write_offset_vector(self, v): |
| 131 | builder = self.builder |
| 132 | builder.StartVector(4, len(v), 4) |
| 133 | for e in v[::-1]: |
| 134 | builder.PrependUOffsetTRelative(e) |
| 135 | return builder.EndVector(len(v)) |
| 136 | |
| 137 | def assign_buffers_to_tensors(self, tensors): |
| 138 | buffer_map = {} |
| 139 | scratch_tensor = [tens for tens in tensors if tens.purpose == TensorPurpose.Scratch][0] |
| 140 | buf_idx = 1 |
| 141 | |
| 142 | for tens in tensors: |
| 143 | if tens.mem_area == scratch_tensor.mem_area: |
| 144 | buffer_map[tens] = self.scratch_buf_id |
| 145 | else: |
| 146 | buffer_map[tens] = buf_idx |
| 147 | buf_idx += 1 |
| 148 | |
| 149 | # Initialize buffers_to_write to a length equal to numer of buffers so |
| 150 | # they can be appended at the correct index during tensor serialization |
| 151 | self.buffers_to_write = [None] * (buf_idx) |
| 152 | |
| 153 | return buffer_map |
| 154 | |
| 155 | def serialise_operator_code(self, idx, code): |
| 156 | builder = self.builder |
| 157 | custom_code_offset = None |
| 158 | if code.startswith(custom_prefix): |
| 159 | tf_code, opt_serializer = builtin_operator_inv_map[custom_prefix] |
| 160 | custom_code_offset = builder.CreateString(code[len(custom_prefix) :]) |
| 161 | else: |
| 162 | try: |
| 163 | tf_code, opt_serializer = builtin_operator_inv_map[code] |
| 164 | except KeyError: |
| 165 | print( |
| 166 | "Warning: Writing operation %s, which does not have a direct TensorFlow Lite mapping, as a custom operation" |
| 167 | % (code,) |
| 168 | ) |
| 169 | tf_code, opt_serializer = builtin_operator_inv_map[custom_prefix] |
| 170 | |
| 171 | if tf_code == BuiltinOperator.CUSTOM: |
| 172 | assert code == "NpuOp" # Currently only support serialising NPU operators as a custom op |
| 173 | custom_code_offset = builder.CreateString("ethos-u") |
| 174 | |
| 175 | self.operator_code_map[code] = (idx, tf_code, opt_serializer) |
| 176 | |
| 177 | OperatorCode.OperatorCodeStart(builder) |
| 178 | OperatorCode.OperatorCodeAddBuiltinCode(builder, tf_code) |
| 179 | if custom_code_offset is not None: |
| 180 | OperatorCode.OperatorCodeAddCustomCode(builder, custom_code_offset) |
| 181 | |
| 182 | return OperatorCode.OperatorCodeEnd(builder) |
| 183 | |
| 184 | def serialise_quantization_parameters(self, quant): |
| 185 | builder = self.builder |
| 186 | |
| 187 | min = None |
| 188 | max = None |
| 189 | scale = None |
| 190 | zero_point = None |
| 191 | if quant is not None: |
| 192 | if quant.min is not None: |
| 193 | min = self.write_float_vector(make_vector(quant.min)) |
| 194 | if quant.max is not None: |
| 195 | max = self.write_float_vector(make_vector(quant.max)) |
| 196 | if quant.scale_f32 is not None: |
| 197 | scale = self.write_float_vector(make_vector(quant.scale_f32)) |
| 198 | if quant.zero_point is not None: |
| 199 | zero_point = self.write_long_vector(make_vector(quant.zero_point)) |
| 200 | |
| 201 | QuantizationParameters.QuantizationParametersStart(builder) |
| 202 | if min is not None: |
| 203 | QuantizationParameters.QuantizationParametersAddMin(builder, min) |
| 204 | if max is not None: |
| 205 | QuantizationParameters.QuantizationParametersAddMax(builder, max) |
| 206 | if scale is not None: |
| 207 | QuantizationParameters.QuantizationParametersAddScale(builder, scale) |
| 208 | if zero_point is not None: |
| 209 | QuantizationParameters.QuantizationParametersAddZeroPoint(builder, zero_point) |
| 210 | return QuantizationParameters.QuantizationParametersEnd(builder) |
| 211 | |
| 212 | def serialise_tensor(self, tens): |
| 213 | builder = self.builder |
| 214 | tens_shape = tens.shape |
| 215 | values = tens.quant_values |
| 216 | if values is None: |
| 217 | values = tens.values |
| 218 | |
| 219 | if values is None: |
| 220 | values = np.empty(shape=(0), dtype=np.uint8) |
| 221 | |
| 222 | if tens in self.tensors_to_reshape: |
| 223 | reorder = self.tensors_to_reshape[tens] |
| 224 | tens_shape = [tens_shape[idx] for idx in reorder] |
| 225 | values = values.transpose(reorder) |
| 226 | |
| 227 | if tens.purpose == TensorPurpose.Scratch: |
| 228 | tens_shape = [0] |
| 229 | self.buffers_to_write[self.scratch_buf_id] = values.flatten().view(np.uint8) |
| 230 | |
| 231 | buf_id = self.buffer_map[tens] |
| 232 | if buf_id != self.scratch_buf_id: |
| 233 | self.buffers_to_write[buf_id] = values.flatten().view(np.uint8) |
| 234 | |
| 235 | shape = self.write_int_vector(tens_shape) |
| 236 | |
| 237 | name = builder.CreateString(tens.name) |
| 238 | quant = self.serialise_quantization_parameters(tens.quantization) |
| 239 | |
| 240 | Tensor.TensorStart(builder) |
| 241 | Tensor.TensorAddShape(builder, shape) |
| 242 | Tensor.TensorAddType(builder, datatype_inv_map[tens.dtype]) |
| 243 | # All tensors must have a valid backing buffer, even if it is empty. |
| 244 | # Empty buffers should be kept unique for TensorFlow Lite Micro |
| 245 | Tensor.TensorAddBuffer(builder, buf_id) |
| 246 | Tensor.TensorAddName(builder, name) |
| 247 | Tensor.TensorAddQuantization(builder, quant) |
| 248 | |
| 249 | res = Tensor.TensorEnd(builder) |
| 250 | return res |
| 251 | |
| 252 | def serialise_operator(self, op): |
| 253 | builder = self.builder |
| 254 | |
| 255 | inputs_offset = self.write_int_vector([self.tensor_map[tens] for tens in op.inputs]) |
| 256 | outputs_offset = self.write_int_vector([self.tensor_map[tens] for tens in op.outputs]) |
| 257 | |
| 258 | op_idx, tflop, opt_serializer = self.operator_code_map[op.type] |
| 259 | |
| 260 | builtin_opt_offset = None |
| 261 | custom_opt_offset = None |
| 262 | if opt_serializer is not None: |
| 263 | attrs = dict(op.attrs) |
| 264 | if "strides" in attrs: |
| 265 | attrs["stride_h"] = attrs["strides"][1] |
| 266 | attrs["stride_w"] = attrs["strides"][2] |
| 267 | if "ksize" in attrs: |
| 268 | attrs["filter_height"] = attrs["ksize"][1] |
| 269 | attrs["filter_width"] = attrs["ksize"][2] |
| 270 | if "dilation" in attrs: |
| 271 | attrs["dilation_h_factor"] = attrs["dilation"][1] |
| 272 | attrs["dilation_w_factor"] = attrs["dilation"][2] |
| 273 | if "channel_multiplier" in attrs: |
| 274 | attrs["depth_multiplier"] = attrs["channel_multiplier"] |
| 275 | |
| 276 | builtin_opt_offset, custom_opt_offset = opt_serializer.serialize(builder, attrs) |
| 277 | |
| 278 | mutating_variable_inputs_offset = self.write_byte_vector([]) |
| 279 | Operator.OperatorStart(builder) |
| 280 | Operator.OperatorAddOpcodeIndex(builder, op_idx) |
| 281 | Operator.OperatorAddInputs(builder, inputs_offset) |
| 282 | Operator.OperatorAddOutputs(builder, outputs_offset) |
| 283 | |
| 284 | if builtin_opt_offset is not None: |
| 285 | Operator.OperatorAddBuiltinOptionsType(builder, opt_serializer.builtin_opt_type) |
| 286 | Operator.OperatorAddBuiltinOptions(builder, builtin_opt_offset) |
| 287 | if custom_opt_offset is not None: |
| 288 | Operator.OperatorAddCustomOptions(builder, custom_opt_offset) |
| 289 | Operator.OperatorAddCustomOptionsFormat(builder, opt_serializer.custom_opt_format) |
| 290 | |
| 291 | Operator.OperatorAddMutatingVariableInputs(builder, mutating_variable_inputs_offset) |
| 292 | return Operator.OperatorEnd(builder) |
| 293 | |
| 294 | def serialise_subgraph(self, sg): |
| 295 | builder = self.builder |
| 296 | tensor_set = set() |
| 297 | |
| 298 | all_ops = [] |
| 299 | for ps in sg.passes: |
| 300 | for op in ps.ops: |
| 301 | if op.type not in self.ops_to_ignore: |
| 302 | all_ops.append(op) |
| 303 | |
| 304 | for op in all_ops: |
| 305 | for tens in op.inputs + op.outputs: |
| 306 | tensor_set.add(tens) |
| 307 | |
| 308 | all_tensors = [tens for nm, idx, tens in sorted((tens.name, idx, tens) for idx, tens in enumerate(tensor_set))] |
| 309 | |
| 310 | self.tensor_map = {tens: idx for idx, tens in enumerate(all_tensors)} |
| 311 | self.buffer_map = self.assign_buffers_to_tensors(all_tensors) |
| 312 | |
| 313 | tensors_offset = self.write_offset_vector([self.serialise_tensor(tens) for tens in all_tensors]) |
| 314 | |
| 315 | # Add the Scratch Tensor as input to the NPU subgraph to get it allocated by TensorFlow Lite Micro |
| 316 | scratch_tensor_idx = [v for k, v in self.tensor_map.items() if k.name.endswith("scratch")] |
| 317 | |
| 318 | # Make sure the input_tensors haven't been modified |
| 319 | assert all(inp in sg.original_inputs for inp in sg.input_tensors) |
| 320 | inputs_offset = self.write_int_vector( |
| 321 | [self.tensor_map[tens] for tens in sg.original_inputs] + scratch_tensor_idx |
| 322 | ) |
| 323 | outputs_offset = self.write_int_vector([self.tensor_map[tens] for tens in sg.output_tensors]) |
| 324 | |
| 325 | operators_offset = self.write_offset_vector([self.serialise_operator(op) for op in all_ops]) |
| 326 | |
| 327 | SubGraph.SubGraphStart(builder) |
| 328 | SubGraph.SubGraphAddTensors(builder, tensors_offset) |
| 329 | SubGraph.SubGraphAddInputs(builder, inputs_offset) |
| 330 | SubGraph.SubGraphAddOutputs(builder, outputs_offset) |
| 331 | |
| 332 | SubGraph.SubGraphAddOperators(builder, operators_offset) |
| 333 | |
| 334 | return SubGraph.SubGraphEnd(builder) |
| 335 | |
| 336 | def write_aligned_bytes(self, buf): |
| 337 | builder = self.builder |
| 338 | builder.nested = True |
| 339 | data = bytes(buf) |
| 340 | length_bytes = UOffsetTFlags.py_type(len(data)) |
| 341 | builder.Prep(16, length_bytes) # Reserve aligned storage |
| 342 | builder.head = UOffsetTFlags.py_type(builder.Head() - length_bytes) # Update FlatBuffer internal pointer |
| 343 | builder.Bytes[builder.Head() : builder.Head() + length_bytes] = data # Assign bytes to aligned area |
| 344 | return builder.EndVector(length_bytes) |
| 345 | |
| 346 | def serialise_buffer(self, buf): |
| 347 | builder = self.builder |
| 348 | data = None |
| 349 | if buf is not None: |
| 350 | data = self.write_aligned_bytes(buf) |
| 351 | Buffer.BufferStart(builder) |
| 352 | if data is not None: |
| 353 | Buffer.BufferAddData(builder, data) |
| 354 | return Buffer.BufferEnd(builder) |
| 355 | |
| 356 | def serialise_metadata(self, metadata): |
| 357 | builder = self.builder |
| 358 | name = builder.CreateString(metadata[0]) |
| 359 | |
| 360 | Metadata.MetadataStart(builder) |
| 361 | Metadata.MetadataAddName(builder, name) |
| 362 | Metadata.MetadataAddBuffer(builder, metadata[1]) |
| 363 | |
| 364 | return Metadata.MetadataEnd(builder) |
| 365 | |
| 366 | def serialise_model(self): |
| 367 | builder = self.builder |
| 368 | operator_code_offset = self.write_offset_vector( |
| 369 | [self.serialise_operator_code(idx, code) for idx, code in enumerate(self.operator_codes)] |
| 370 | ) |
| 371 | |
| 372 | description = builder.CreateString("Vela Optimised") |
| 373 | |
| 374 | subgraph_offset = self.write_offset_vector([self.serialise_subgraph(sg) for sg in self.subgraphs_to_write]) |
| 375 | |
| 376 | # Fill the metadata buffer |
| 377 | version = np.int32(0) |
| 378 | subgraph_idx = np.int32(len(self.subgraphs_to_write)) # Only 1 supported currently |
| 379 | nbr_tensors = np.int32(len(self.tensor_map)) |
| 380 | |
| 381 | # An offset of -1 indicates that the tensor will be allocated online by Tensorflow Lite Micro |
| 382 | offsets = [np.int32(-1)] * nbr_tensors |
| 383 | |
| 384 | # Ensure that the order of the offsets match the order of the tensors |
| 385 | for tens, idx in self.tensor_map.items(): |
| 386 | if tens.mem_area == MemArea.Sram: |
| 387 | offsets[idx] = np.int32(tens.address) |
| 388 | |
| 389 | metadata_buffer = np.array([version, subgraph_idx, nbr_tensors] + offsets) |
| 390 | self.buffers_to_write.append(metadata_buffer) |
| 391 | |
| 392 | buffers_offset = self.write_offset_vector([self.serialise_buffer(buf) for buf in self.buffers_to_write]) |
| 393 | |
| 394 | metadata_list = [("OfflineMemoryAllocation", len(self.buffers_to_write) - 1)] |
| 395 | metadata_offset = self.write_offset_vector([self.serialise_metadata(metadata) for metadata in metadata_list]) |
| 396 | |
| 397 | Model.ModelStart(builder) |
| 398 | Model.ModelAddVersion(builder, tflite_version) |
| 399 | Model.ModelAddOperatorCodes(builder, operator_code_offset) |
| 400 | Model.ModelAddSubgraphs(builder, subgraph_offset) |
| 401 | Model.ModelAddDescription(builder, description) |
| 402 | Model.ModelAddBuffers(builder, buffers_offset) |
| 403 | Model.ModelAddMetadata(builder, metadata_offset) |
| 404 | return Model.ModelEnd(builder) |
| 405 | |
| 406 | def serialise(self): |
| 407 | |
| 408 | model = self.serialise_model() |
| 409 | |
| 410 | self.builder.FinishWithFileIdentifier(model, tflite_file_identifier) |
| 411 | |
| 412 | return self.builder.Output() |
| 413 | |
| 414 | def write(self, filename): |
| 415 | with open(self.filename, "wb") as f: |
| 416 | f.write(self.serialised_buf) |
| 417 | |
| 418 | |
| 419 | def write_tflite(nng, filename): |
| 420 | writer = TFLiteSerialiser(nng) |
| 421 | buf = writer.serialise() |
| 422 | |
| 423 | with open(filename, "wb") as f: |
| 424 | f.write(buf) |