blob: 42069f5d8955f1dd2333d6403a15f65b1a1cda15 [file] [log] [blame]
Alex Tawsedaba3cf2023-09-29 15:55:38 +01001# SPDX-FileCopyrightText: Copyright 2021, 2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
alexander3c798932021-03-26 21:42:19 +00002# SPDX-License-Identifier: Apache-2.0
3#
4# Licensed under the Apache License, Version 2.0 (the "License");
5# you may not use this file except in compliance with the License.
6# You may obtain a copy of the License at
7#
8# http://www.apache.org/licenses/LICENSE-2.0
9#
10# Unless required by applicable law or agreed to in writing, software
11# distributed under the License is distributed on an "AS IS" BASIS,
12# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13# See the License for the specific language governing permissions and
14# limitations under the License.
15"""
Alex Tawsedaba3cf2023-09-29 15:55:38 +010016This script will provide you with an example of how to perform
17post-training quantization in TensorFlow.
alexander3c798932021-03-26 21:42:19 +000018
Alex Tawsedaba3cf2023-09-29 15:55:38 +010019The output from this example will be a TensorFlow Lite model file
20where weights and activations are quantized to 8bit integer values.
alexander3c798932021-03-26 21:42:19 +000021
Alex Tawsedaba3cf2023-09-29 15:55:38 +010022Quantization helps reduce the size of your models and is necessary
23for running models on certain hardware such as Arm Ethos NPU.
alexander3c798932021-03-26 21:42:19 +000024
Alex Tawsedaba3cf2023-09-29 15:55:38 +010025In addition to quantizing weights, post-training quantization uses
26a calibration dataset to capture the minimum and maximum values of
27all variable tensors in your model. By capturing these ranges it
28is possible to fully quantize not just the weights of the model
29but also the activations.
alexander3c798932021-03-26 21:42:19 +000030
Alex Tawsedaba3cf2023-09-29 15:55:38 +010031Depending on the model you are quantizing there may be some accuracy loss,
32but for a lot of models the loss should be minimal.
alexander3c798932021-03-26 21:42:19 +000033
Alex Tawsedaba3cf2023-09-29 15:55:38 +010034If you are targeting an Arm Ethos-U55 NPU then the output
35TensorFlow Lite file will also need to be passed through the Vela
alexander3c798932021-03-26 21:42:19 +000036compiler for further optimizations before it can be used.
37
Alex Tawsedaba3cf2023-09-29 15:55:38 +010038For more information on using Vela see:
39 https://git.mlplatform.org/ml/ethos-u/ethos-u-vela.git/about/
40For more information on post-training quantization see:
41 https://www.tensorflow.org/lite/performance/post_training_integer_quant
alexander3c798932021-03-26 21:42:19 +000042"""
Alex Tawsedaba3cf2023-09-29 15:55:38 +010043
alexander3c798932021-03-26 21:42:19 +000044import pathlib
45
46import numpy as np
47import tensorflow as tf
48
49from training_utils import get_data, create_model
50
51
52def post_training_quantize(keras_model, sample_data):
Alex Tawsedaba3cf2023-09-29 15:55:38 +010053 """
54 Quantize Keras model using post-training quantization with some sample data.
alexander3c798932021-03-26 21:42:19 +000055
56 TensorFlow Lite will have fp32 inputs/outputs and the model will handle quantizing/dequantizing.
57
58 Args:
59 keras_model: Keras model to quantize.
60 sample_data: A numpy array of data to use as a representative dataset.
61
62 Returns:
63 Quantized TensorFlow Lite model.
64 """
65
66 converter = tf.lite.TFLiteConverter.from_keras_model(keras_model)
67
68 # We set the following converter options to ensure our model is fully quantized.
69 # An error should get thrown if there is any ops that can't be quantized.
70 converter.optimizations = [tf.lite.Optimize.DEFAULT]
71 converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]
72
73 # To use post training quantization we must provide some sample data that will be used to
74 # calculate activation ranges for quantization. This data should be representative of the data
75 # we expect to feed the model and must be provided by a generator function.
76 def generate_repr_dataset():
77 for i in range(100): # 100 samples is all we should need in this example.
78 yield [np.expand_dims(sample_data[i], axis=0)]
79
80 converter.representative_dataset = generate_repr_dataset
81 tflite_model = converter.convert()
82
83 return tflite_model
84
85
Alex Tawsedaba3cf2023-09-29 15:55:38 +010086# pylint: disable=duplicate-code
87def evaluate_tflite_model(
88 tflite_save_path: pathlib.Path,
89 x_test: np.ndarray,
90 y_test: np.ndarray
91):
92 """
93 Calculate the accuracy of a TensorFlow Lite model using TensorFlow Lite interpreter.
alexander3c798932021-03-26 21:42:19 +000094
95 Args:
96 tflite_save_path: Path to TensorFlow Lite model to test.
97 x_test: numpy array of testing data.
98 y_test: numpy array of testing labels (sparse categorical).
99 """
100
101 interpreter = tf.lite.Interpreter(model_path=str(tflite_save_path))
102
103 interpreter.allocate_tensors()
104 input_details = interpreter.get_input_details()
105 output_details = interpreter.get_output_details()
106
107 accuracy_count = 0
108 num_test_images = len(y_test)
109
110 for i in range(num_test_images):
111 interpreter.set_tensor(input_details[0]['index'], x_test[i][np.newaxis, ...])
112 interpreter.invoke()
113 output_data = interpreter.get_tensor(output_details[0]['index'])
114
115 if np.argmax(output_data) == y_test[i]:
116 accuracy_count += 1
117
118 print(f"Test accuracy quantized: {accuracy_count / num_test_images:.3f}")
119
120
121def main():
Alex Tawsedaba3cf2023-09-29 15:55:38 +0100122 """
123 Run post-training quantization
124 """
alexander3c798932021-03-26 21:42:19 +0000125 x_train, y_train, x_test, y_test = get_data()
126 model = create_model()
127
128 # Compile and train the model in fp32 as normal.
129 model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
130 loss=tf.keras.losses.sparse_categorical_crossentropy,
131 metrics=['accuracy'])
132
133 model.fit(x=x_train, y=y_train, batch_size=128, epochs=5, verbose=1, shuffle=True)
134
135 # Test the fp32 model accuracy.
Alex Tawsedaba3cf2023-09-29 15:55:38 +0100136 test_loss, test_acc = model.evaluate(x_test, y_test) # pylint: disable=unused-variable
alexander3c798932021-03-26 21:42:19 +0000137 print(f"Test accuracy float: {test_acc:.3f}")
138
139 # Quantize and export the resulting TensorFlow Lite model to file.
140 tflite_model = post_training_quantize(model, x_train)
141
142 tflite_models_dir = pathlib.Path('./conditioned_models/')
143 tflite_models_dir.mkdir(exist_ok=True, parents=True)
144
145 quant_model_save_path = tflite_models_dir / 'post_training_quant_model.tflite'
146 with open(quant_model_save_path, 'wb') as f:
147 f.write(tflite_model)
148
149 # Test the quantized model accuracy. Save time by only testing a subset of the whole data.
150 num_test_samples = 1000
Alex Tawsedaba3cf2023-09-29 15:55:38 +0100151 evaluate_tflite_model(
152 quant_model_save_path,
153 x_test[0:num_test_samples],
154 y_test[0:num_test_samples]
155 )
156# pylint: enable=duplicate-code
alexander3c798932021-03-26 21:42:19 +0000157
158
159if __name__ == "__main__":
160 main()