blob: 931167dda817df86732020ae28c8a0b330314b4a [file] [log] [blame]
Colm Donelan6f68ad22023-09-07 10:36:17 +01001#!/bin/bash
2#set -x
3#
4# Copyright © 2023 Arm Ltd and Contributors. All rights reserved.
5# SPDX-License-Identifier: MIT
6#
7# This script will run a TfLite model through ExecuteNetwork trying all available backends to measure
8# both speed and accuracy. In addition, it will try some of the performance options that are available.
9#
10# Prerequisites: ExecuteNetwork must be built with:
11# * CpuRef enabled (-DARMNNREF=1)
12# * TfLite delegate enabled (-DBUILD_CLASSIC_DELEGATE=1)
13# * TfLite parser enabled (-DBUILD_TF_LITE_PARSER=1)
14# * Any backend you want to test against. E.g. -DARMCOMPUTENEON=1 -DARMCOMPUTECL=1
15# * The model must be fully supported by Arm NN.
16#
17# Usage:
18# evaluate_network.sh -e <Path to ExecuteNetwork> -m <Tfite model to test>
19#
20# Sample usage:
21# evaluate_network.sh -e ./build/release/armnn/test -m ./my_tflite_model.tflite
22#
23
24CMD=$( basename "$0" )
25
26usage() {
27 echo "Usage: $CMD -e <Path to ExecuteNetwork> -m <Test model>"
28 echo "Options: -e <Path to ExecuteNetwork>"
29 echo " -m <Test model>"
30 exit 1
31}
32
33# Errors if the previous command had a non-zero exit code.
34function AssertZeroExitCode {
35 EXITCODE=$?
36 if [ $EXITCODE -ne 0 ]; then
37 echo -e "Previous command exited with code $EXITCODE"
38 exit 1
39 fi
40}
41
42OPTION_COUNTER=0
43while getopts "e:m:" opt; do
44 ((OPTION_COUNTER+=1))
45 case "$opt" in
46 h|\?) usage;;
47 e) EXECUTE_NETWORK_PATH="$OPTARG";;
48 m) MODEL="$OPTARG";;
49 esac
50done
51shift $((OPTIND - 1))
52
53# Both parameters are mandatory.
54if [ -z "$EXECUTE_NETWORK_PATH" ] || [ -z "$MODEL" ]; then
55 usage
56 exit 1
57fi
58
59# Check the path to execute network will find the executable.
60if [ -x "$EXECUTE_NETWORK_PATH/ExecuteNetwork" ]; then
61 echo -e "Using Execute Network from\t\t\t: $EXECUTE_NETWORK_PATH/ExecuteNetwork"
62 EXECUTE_NETWORK="$EXECUTE_NETWORK_PATH/ExecuteNetwork"
63else
64 echo "Execute Network does not exist at \"$EXECUTE_NETWORK_PATH/ExecuteNetwork\""
65 usage
66 exit 1
67fi
68
69# Check that the model exists and has a supported extension.
70if [ -f $MODEL ]; then
71 if [[ ! $MODEL =~ (tflite)$ ]]; then
72 echo "Only .tflite files are supported."
73 exit 1
74 fi
75else
76 echo Model file: "\"$MODEL\" could not be found."
77 usage
78 exit 1
79fi
80
81# Find out the available backends. Unfortunaltey the list of backends spans multiple lines.
82# This means we have to do this in several steps.
83echo -n -e "Available backends on this executable\t\t:"
84HELP_OUTOUT=`$EXECUTE_NETWORK --help`
85BACKENDS=`echo $HELP_OUTOUT | sed 's/.*: \[//' | sed 's/\].*//' | sed 's/,//g'`
86# Remove the leading space to make it look prettier.
87BACKENDS="${BACKENDS:1}"
88if [ -z "$BACKENDS" ]; then
89 echo ""
90 echo "Execute Network reported no available backends!"
91 exit 1
92else
93 echo " $BACKENDS"
94 # We really need the CpuRef to be in there.
95 if [[ ! $BACKENDS =~ "CpuRef" ]]; then
96 echo ""
97 echo "Fatal: Please recompile ExecuteNetwork to include the CpuRef backend. (-DARMNNREF=1)"
98 exit 1
99 fi
100fi
101
102
103# This is where the real work starts.
104# Model execution can take a long time. Trap ctrl-c and tell the user.
105trap ctrl_c INT
106
107function ctrl_c() {
108 echo -e "Interrupted.\nNo patience eh? Try a smaller model."
109 exit 1
110}
111
112
113# We need to check that the delegate is supported otherwise we can't run through the tf runtime.
114echo -n -e "Is the delegate supported on this executable?\t:"
115TFLITE_EXECUTION=`$EXECUTE_NETWORK -m $MODEL -T tflite -c CpuRef -N`
116# Check for an error message about building with the delegate.
117if [[ $TFLITE_EXECUTION =~ "Tensorflow-Lite delegate support" ]]; then
118 echo ""
119 echo "Fatal: Please recompile ExecuteNetwork with TfLite delegate support enabled. (-DBUILD_CLASSIC_DELEGATE=1)"
120 exit 1
121else
122 echo " Yes"
123fi
124
125# Run through CpuRef to see if Arm NN supports the model.
126echo -n -e "Is the model fully supported by Arm NN?\t\t:"
127REF_EXECUTION=`$EXECUTE_NETWORK -m $MODEL -c CpuRef -N`
128# If it failed look for the most common reason - an unsupported layer.
129if [ $? -ne 0 ]; then
130 if [[ $REF_EXECUTION =~ "is not supported on requested backend CpuRef" ]]; then
131 echo -e " No - One or more layers are not supported by Arm NN"
132 else
133 echo -e " No - Execution using CpuRef backend failed."
134 fi
135 echo -e "The Reported problems were\t:"
136 echo `echo "$REF_EXECUTION" | sed '/Warning\|ERROR\|Fatal/!d'`
137 echo "To recreate this error try: \"$EXECUTE_NETWORK -m $MODEL -c CpuRef\" "
138 exit 1
139fi
140echo " Yes"
141
142# This function will execute the model and return a string representation of the results. This is the
143# first time the model will be executed.
144# Is done wth -c $BACKEND,CpuRef to allow the odd layer to be supported by an unaccelerated backend.
145#
146# Parameters:
147# $1 Backend string like CpuRef.
148# $2 Additional ExecuteNetwork parameters.
149#
150function RunAccuracyOnBackendWithParameters {
151 BACKEND=$1
152 ADDITIONAL_PARAM=$2
153 # Run on BACKEND to check accuracy against TfLite runtime first. This will be a warning not a failure.
154 ACCURACY_RUN=`$EXECUTE_NETWORK -m $MODEL -c $BACKEND $ADDITIONAL_PARAM -A -N`
155 # Start by checking the return code.
156 if [ $? -ne 0 ]; then
157 # Maybe this backend isn't supported.
158 if [[ $ACCURACY_RUN =~ "None of the preferred backends [$BACKEND ] are supported" ]]; then
159 echo -e "\t\t***Is not supported***"
160 return 1
161 elif [[ $ACCURACY_RUN =~ "is not supported on requested backend" ]]; then
162 # One or more layers require a fall back. Run again with CpuRef fall back.
163 ACCURACY_RUN=`$EXECUTE_NETWORK -m $MODEL -c $BACKEND,CpuRef $ADDITIONAL_PARAM -A -N`
164 REQUIRES_CPUREF="*"
165 else
166 # In the case of a general failure against this backend tell the user what we tried and then
167 # ignore this backend.
168 echo -e "\t***Execution failed. Ignoring this backend. Command was: \"$EXECUTE_NETWORK -m $MODEL -c $BACKEND -A -N\""
169 return 1
170 fi
171 fi
172 # Now check the RMS value. If it isn't 0 then mark this as questionable accuracy.
173 ACCURACY_VALUE=`echo "$ACCURACY_RUN" | grep 'Byte level'`
174 if [[ ! $ACCURACY_VALUE == *0 ]]; then
175 ACCURACY=!`echo $ACCURACY_VALUE | sed 's/[a-zA-Z:]*//g'`
176 else
177 ACCURACY="OK"
178 fi
179 # Add on the * if we needed to add CpuRef.
180 if [ -z $REQUIRES_CPUREF ]; then
181 echo -e "$ACCURACY $REQUIRES_CPUREF\t\t"
182 else
183 echo -e "$ACCURACY\t\t"
184 fi
185}
186
187# This function will execute the model and return a string representation of the results. The execution
188# Is done wth -c $BACKEND,CpuRef to allow the odd layer to ot be supported by an accelerated backend.
189#
190# Parameters:
191# $1 Backend string like CpuRef.
192# $2 Additional ExecuteNetwork parameters.
193#
194function RunPerformanceOnBackendWithParameters {
195 BACKEND=$1
196 ADDITIONAL_PARAM=$2
197 # Execute with 6 inferences. Mark the first as initial inference. Average the rest.
198 SPEED_RUN=`$EXECUTE_NETWORK -m $MODEL -c $BACKEND,CpuRef -I 6 -N $ADDITIONAL_PARAM`
199
200 # Extract the model load time
201 MODEL_LOAD_TIME=`echo "$SPEED_RUN" | grep "Initialization time" | sed 's/[a-zA-Z:]*//g'`
202 MODEL_LOAD_TIME=`echo ${MODEL_LOAD_TIME::-2}` # Remove the tailing space and full stop.
203 # and the optimization time.
204 OPTIMIZATION_TIME=`echo "$SPEED_RUN" | grep "Optimization time" | sed 's/[a-zA-Z:]*//g'`
205 OPTIMIZATION_TIME=`echo ${OPTIMIZATION_TIME::-1}` # Remove the tailing space.
206
207 # All 6 inference times.
208 RAW_INFERENCE=`echo "$SPEED_RUN" | grep "Inference time"`
209 # This will take "Info: Inference time: 0.03 ms Info:..." and transform to "0.03 0.01 0.01"
210 INFERENCE_TIMES=`echo $RAW_INFERENCE | sed 's/[a-zA-Z:]*//g'`
211 INITIAL_INFERENCE_TIME=`echo $INFERENCE_TIMES | cut -d ' ' -f 1`
212 # Now remove the initial inference time as it will skew the average.
213 INFERENCE_TIMES=`echo $INFERENCE_TIMES | sed 's/[^ ]* //'`
214 # Use awk to sum and average the remaining 5 numbers.
215 AVERAGE_INFERENCE_TIME=`echo $INFERENCE_TIMES | awk '{s+=$1}END{print s/NR}' RS=" "`
216
217 # Result format is: MODEL LOAD | OPTIMIZATION | INITIAL INFERENCE | AVERAGE INFERENCE
218 echo -e "$MODEL_LOAD_TIME\t\t$OPTIMIZATION_TIME\t\t\t$INITIAL_INFERENCE_TIME\t\t\t$AVERAGE_INFERENCE_TIME\t"
219}
220
221
222# Check execution in all available backends.
223echo "==================================================================================="
224echo -e "BACKEND\t\tACCURACY\tMODEL LOAD(ms)\tOPTIMIZATION(ms)\tINITIAL INFERENCE(ms)\tAVERAGE INFERENCE(ms)"
225for backend in $BACKENDS
226do
227 echo -n -e "$backend\t\t"
228 RESULT=$(RunAccuracyOnBackendWithParameters $backend)
229 echo -n -e "$RESULT"
230 if [[ $RESULT =~ "*" ]]; then
231 REQUIRED_CPU_REF=1
232 fi
233 # It's possible the backend wasn't supported.
234 if [[ ! "$RESULT" =~ "not supported" ]]; then
235 # It was, continue.
236 RESULT=$(RunPerformanceOnBackendWithParameters $backend)
237 echo -n -e "$RESULT"
238 # Save some specific values for use later.
239 if [ $backend == "CpuAcc" ]; then
240 # In the case of CpuAcc we save the avrage inference time.
241 CPUACC_AVERAGE_INFERENCE_TIME=`echo $RESULT | cut -d ' ' -f 4`
242 fi
243 if [ $backend == "GpuAcc" ]; then
244 # In the case of GpuAcc we save the avrage inference time.
245 GPUACC_AVERAGE_INFERENCE_TIME=`echo $RESULT | cut -d ' ' -f 4`
246 fi
247 else
248 # Remove this backend from future tests.
249 BACKENDS=`echo $BACKENDS | sed "s/$backend//"`
250 fi
251 echo
252done
253# Only print this if it was required.
254if [ ! -z $REQUIRED_CPU_REF ]; then
255 echo "* denotes this backend required fallback to CpuRef."
256 echo
257fi
258
259# Now its time to look at backend specific parameters.
260
261# This function first run the accuracy test and then the performance test. It uses the average from earlier
262# to compare to.
263function RunAccuracyAndPerformanceWithExtraParameter
264{
265 BACKEND=$1
266 EXTRA_PARAM=$2
267 AVERAGE_INFERENCE_TIME=$3
268 echo -e "ACCURACY\tMODEL LOAD(ms)\tOPTIMIZATION(ms)\tINITIAL INFERENCE(ms)\tAVERAGE INFERENCE(ms)\t\tDELTA(ms)"
269 RESULT=$(RunAccuracyOnBackendWithParameters $BACKEND,CpuRef $EXTRA_PARAM)
270 echo -n "$RESULT"
271 RESULT=$(RunPerformanceOnBackendWithParameters $BACKEND,CpuRef $EXTRA_PARAM)
272 PARAM_AVERAGE_INFERENCE_TIME=`echo $RESULT | cut -d ' ' -f 4`
273 # If adding the parameter was faster then incude by how much.
274 if [[ "$PARAM_AVERAGE_INFERENCE_TIME" < "$AVERAGE_INFERENCE_TIME" ]]; then
275 DELTA=`echo $AVERAGE_INFERENCE_TIME - $PARAM_AVERAGE_INFERENCE_TIME | bc`
276 echo -e "$RESULT\t\t\t$DELTA ($PARAM_AVERAGE_INFERENCE_TIME v $AVERAGE_INFERENCE_TIME)"
277 else
278 echo -e "$RESULT\t\t\t**No improvment**"
279 fi
280}
281
282
283# Start with CpuAcc. Three knobs to twiddle, threads, fast-math and fp16.
284if [[ $BACKENDS =~ "CpuAcc" ]]; then
285 echo
286 echo "CpuAcc optimizations."
287 echo "============================"
288 echo "The value of \"number-of-threads\" parameter by default is decided on by the backend."
289 echo "Cycle through number-of-threads=1 -> 12 and see if any are faster than the default."
290 echo
291 for i in {1..12}
292 do
293 RESULT=$(RunPerformanceOnBackendWithParameters "CpuAcc,CpuRef" "--number-of-threads $i")
294 AVERAGE_INFERENCE_TIME=`echo $RESULT | cut -d ' ' -f 4`
295 # Print something out if the returned average is less than the previously saved average.
296 if (( $(echo "$AVERAGE_INFERENCE_TIME < $CPUACC_AVERAGE_INFERENCE_TIME" | bc -l) )); then
297 DELTA=`echo $CPUACC_AVERAGE_INFERENCE_TIME - $AVERAGE_INFERENCE_TIME | bc`
298 echo " \"--number-of-threads $i\" resulted in a faster average inference by $DELTA ms. ($AVERAGE_INFERENCE_TIME v $CPUACC_AVERAGE_INFERENCE_TIME)"
299 FASTER=1
300 fi
301 done
302 if [ -z $FASTER ]; then
303 echo "No value of \"number-of-threads\" was faster than the default."
304 fi
305 # Next is fp16-turbo-mode. We do both accuracy and speed on this one.
306 echo
307 echo -n "Now trying to enable fp16-turbo-mode. This will only have positive results with fp32 models."
308 echo
309 RunAccuracyAndPerformanceWithExtraParameter CpuAcc "--fp16-turbo-mode" $CPUACC_AVERAGE_INFERENCE_TIME
310
311 # Next is enable-fast-math. Again both accuracy and speed on this one.
312 echo
313 echo -n "Now trying \"enable-fast-math\"."
314 echo
315 RunAccuracyAndPerformanceWithExtraParameter CpuAcc "--enable-fast-math" $CPUACC_AVERAGE_INFERENCE_TIME
316fi
317
318# GpuAcc.
319# Options to check enable-fast-math, fp16-turbo-mode, and tuning-level/tuning-path.
320if [[ $BACKENDS =~ "GpuAcc" ]]; then
321 echo
322 echo "GpuAcc optimizations."
323 echo "============================"
324
325 # fp16-turbo-mode. We do both accuracy and speed on this one.
326 echo
327 echo -n "Now trying to enable fp16-turbo-mode. This will only have positive results with fp32 models."
328 echo
329 RunAccuracyAndPerformanceWithExtraParameter GpuAcc "--fp16-turbo-mode" $GPUACC_AVERAGE_INFERENCE_TIME
330
331 # Next is enable-fast-math. Again both accuracy and speed on this one.
332 echo
333 echo -n "Now trying \"enable-fast-math\"."
334 echo
335 RunAccuracyAndPerformanceWithExtraParameter GpuAcc "--enable-fast-math" $GPUACC_AVERAGE_INFERENCE_TIME
336
337 # Next is tuning levels. Just speed on this one.
338 echo
339 echo -n "Now trying \"tuning-level/tuning-path\"."
340 echo
341 for i in {1..3}
342 do
343 touch ./tuned-network.bin
344 # Create tuned network file with the first run.
345 OUTPUT=`$EXECUTE_NETWORK -m $MODEL -c $GpuAcc,CpuRef --tuning-path ./tuned-network.bin --tuning-level $i -N`
346 AssertZeroExitCode
347 # Now run the perforance test reusing that saved network.
348 RESULT=$(RunPerformanceOnBackendWithParameters "GpuAcc,CpuRef" "--tuning-path ./tuned-network.bin")
349 AVERAGE_INFERENCE_TIME=`echo $RESULT | cut -d ' ' -f 4`
350 if (( $(echo "$AVERAGE_INFERENCE_TIME < $GPUACC_AVERAGE_INFERENCE_TIME" | bc -l) )); then
351 DELTA=`echo $AVERAGE_INFERENCE_TIME - $GPUACC_AVERAGE_INFERENCE_TIME | bc`
352 echo " \"--tuning-level $i\" resulted in a faster average inference by $DELTA ms. ($AVERAGE_INFERENCE_TIME v $GPUACC_AVERAGE_INFERENCE_TIME)"
353 else
354 echo " \"--tuning-level $i\" did not result in a faster average inference time. ($AVERAGE_INFERENCE_TIME v $GPUACC_AVERAGE_INFERENCE_TIME)"
355 fi
356 rm ./tuned-network.bin
357 done
358fi