Colm Donelan | 6f68ad2 | 2023-09-07 10:36:17 +0100 | [diff] [blame] | 1 | #!/bin/bash |
| 2 | #set -x |
| 3 | # |
| 4 | # Copyright © 2023 Arm Ltd and Contributors. All rights reserved. |
| 5 | # SPDX-License-Identifier: MIT |
| 6 | # |
| 7 | # This script will run a TfLite model through ExecuteNetwork trying all available backends to measure |
| 8 | # both speed and accuracy. In addition, it will try some of the performance options that are available. |
| 9 | # |
| 10 | # Prerequisites: ExecuteNetwork must be built with: |
| 11 | # * CpuRef enabled (-DARMNNREF=1) |
| 12 | # * TfLite delegate enabled (-DBUILD_CLASSIC_DELEGATE=1) |
| 13 | # * TfLite parser enabled (-DBUILD_TF_LITE_PARSER=1) |
| 14 | # * Any backend you want to test against. E.g. -DARMCOMPUTENEON=1 -DARMCOMPUTECL=1 |
| 15 | # * The model must be fully supported by Arm NN. |
| 16 | # |
| 17 | # Usage: |
| 18 | # evaluate_network.sh -e <Path to ExecuteNetwork> -m <Tfite model to test> |
| 19 | # |
| 20 | # Sample usage: |
| 21 | # evaluate_network.sh -e ./build/release/armnn/test -m ./my_tflite_model.tflite |
| 22 | # |
| 23 | |
| 24 | CMD=$( basename "$0" ) |
| 25 | |
| 26 | usage() { |
| 27 | echo "Usage: $CMD -e <Path to ExecuteNetwork> -m <Test model>" |
| 28 | echo "Options: -e <Path to ExecuteNetwork>" |
| 29 | echo " -m <Test model>" |
| 30 | exit 1 |
| 31 | } |
| 32 | |
| 33 | # Errors if the previous command had a non-zero exit code. |
| 34 | function AssertZeroExitCode { |
| 35 | EXITCODE=$? |
| 36 | if [ $EXITCODE -ne 0 ]; then |
| 37 | echo -e "Previous command exited with code $EXITCODE" |
| 38 | exit 1 |
| 39 | fi |
| 40 | } |
| 41 | |
| 42 | OPTION_COUNTER=0 |
| 43 | while getopts "e:m:" opt; do |
| 44 | ((OPTION_COUNTER+=1)) |
| 45 | case "$opt" in |
| 46 | h|\?) usage;; |
| 47 | e) EXECUTE_NETWORK_PATH="$OPTARG";; |
| 48 | m) MODEL="$OPTARG";; |
| 49 | esac |
| 50 | done |
| 51 | shift $((OPTIND - 1)) |
| 52 | |
| 53 | # Both parameters are mandatory. |
| 54 | if [ -z "$EXECUTE_NETWORK_PATH" ] || [ -z "$MODEL" ]; then |
| 55 | usage |
| 56 | exit 1 |
| 57 | fi |
| 58 | |
| 59 | # Check the path to execute network will find the executable. |
| 60 | if [ -x "$EXECUTE_NETWORK_PATH/ExecuteNetwork" ]; then |
| 61 | echo -e "Using Execute Network from\t\t\t: $EXECUTE_NETWORK_PATH/ExecuteNetwork" |
| 62 | EXECUTE_NETWORK="$EXECUTE_NETWORK_PATH/ExecuteNetwork" |
| 63 | else |
| 64 | echo "Execute Network does not exist at \"$EXECUTE_NETWORK_PATH/ExecuteNetwork\"" |
| 65 | usage |
| 66 | exit 1 |
| 67 | fi |
| 68 | |
| 69 | # Check that the model exists and has a supported extension. |
| 70 | if [ -f $MODEL ]; then |
| 71 | if [[ ! $MODEL =~ (tflite)$ ]]; then |
| 72 | echo "Only .tflite files are supported." |
| 73 | exit 1 |
| 74 | fi |
| 75 | else |
| 76 | echo Model file: "\"$MODEL\" could not be found." |
| 77 | usage |
| 78 | exit 1 |
| 79 | fi |
| 80 | |
| 81 | # Find out the available backends. Unfortunaltey the list of backends spans multiple lines. |
| 82 | # This means we have to do this in several steps. |
| 83 | echo -n -e "Available backends on this executable\t\t:" |
| 84 | HELP_OUTOUT=`$EXECUTE_NETWORK --help` |
| 85 | BACKENDS=`echo $HELP_OUTOUT | sed 's/.*: \[//' | sed 's/\].*//' | sed 's/,//g'` |
| 86 | # Remove the leading space to make it look prettier. |
| 87 | BACKENDS="${BACKENDS:1}" |
| 88 | if [ -z "$BACKENDS" ]; then |
| 89 | echo "" |
| 90 | echo "Execute Network reported no available backends!" |
| 91 | exit 1 |
| 92 | else |
| 93 | echo " $BACKENDS" |
| 94 | # We really need the CpuRef to be in there. |
| 95 | if [[ ! $BACKENDS =~ "CpuRef" ]]; then |
| 96 | echo "" |
| 97 | echo "Fatal: Please recompile ExecuteNetwork to include the CpuRef backend. (-DARMNNREF=1)" |
| 98 | exit 1 |
| 99 | fi |
| 100 | fi |
| 101 | |
| 102 | |
| 103 | # This is where the real work starts. |
| 104 | # Model execution can take a long time. Trap ctrl-c and tell the user. |
| 105 | trap ctrl_c INT |
| 106 | |
| 107 | function ctrl_c() { |
| 108 | echo -e "Interrupted.\nNo patience eh? Try a smaller model." |
| 109 | exit 1 |
| 110 | } |
| 111 | |
| 112 | |
| 113 | # We need to check that the delegate is supported otherwise we can't run through the tf runtime. |
| 114 | echo -n -e "Is the delegate supported on this executable?\t:" |
| 115 | TFLITE_EXECUTION=`$EXECUTE_NETWORK -m $MODEL -T tflite -c CpuRef -N` |
| 116 | # Check for an error message about building with the delegate. |
| 117 | if [[ $TFLITE_EXECUTION =~ "Tensorflow-Lite delegate support" ]]; then |
| 118 | echo "" |
| 119 | echo "Fatal: Please recompile ExecuteNetwork with TfLite delegate support enabled. (-DBUILD_CLASSIC_DELEGATE=1)" |
| 120 | exit 1 |
| 121 | else |
| 122 | echo " Yes" |
| 123 | fi |
| 124 | |
| 125 | # Run through CpuRef to see if Arm NN supports the model. |
| 126 | echo -n -e "Is the model fully supported by Arm NN?\t\t:" |
| 127 | REF_EXECUTION=`$EXECUTE_NETWORK -m $MODEL -c CpuRef -N` |
| 128 | # If it failed look for the most common reason - an unsupported layer. |
| 129 | if [ $? -ne 0 ]; then |
| 130 | if [[ $REF_EXECUTION =~ "is not supported on requested backend CpuRef" ]]; then |
| 131 | echo -e " No - One or more layers are not supported by Arm NN" |
| 132 | else |
| 133 | echo -e " No - Execution using CpuRef backend failed." |
| 134 | fi |
| 135 | echo -e "The Reported problems were\t:" |
| 136 | echo `echo "$REF_EXECUTION" | sed '/Warning\|ERROR\|Fatal/!d'` |
| 137 | echo "To recreate this error try: \"$EXECUTE_NETWORK -m $MODEL -c CpuRef\" " |
| 138 | exit 1 |
| 139 | fi |
| 140 | echo " Yes" |
| 141 | |
| 142 | # This function will execute the model and return a string representation of the results. This is the |
| 143 | # first time the model will be executed. |
| 144 | # Is done wth -c $BACKEND,CpuRef to allow the odd layer to be supported by an unaccelerated backend. |
| 145 | # |
| 146 | # Parameters: |
| 147 | # $1 Backend string like CpuRef. |
| 148 | # $2 Additional ExecuteNetwork parameters. |
| 149 | # |
| 150 | function RunAccuracyOnBackendWithParameters { |
| 151 | BACKEND=$1 |
| 152 | ADDITIONAL_PARAM=$2 |
| 153 | # Run on BACKEND to check accuracy against TfLite runtime first. This will be a warning not a failure. |
| 154 | ACCURACY_RUN=`$EXECUTE_NETWORK -m $MODEL -c $BACKEND $ADDITIONAL_PARAM -A -N` |
| 155 | # Start by checking the return code. |
| 156 | if [ $? -ne 0 ]; then |
| 157 | # Maybe this backend isn't supported. |
| 158 | if [[ $ACCURACY_RUN =~ "None of the preferred backends [$BACKEND ] are supported" ]]; then |
| 159 | echo -e "\t\t***Is not supported***" |
| 160 | return 1 |
| 161 | elif [[ $ACCURACY_RUN =~ "is not supported on requested backend" ]]; then |
| 162 | # One or more layers require a fall back. Run again with CpuRef fall back. |
| 163 | ACCURACY_RUN=`$EXECUTE_NETWORK -m $MODEL -c $BACKEND,CpuRef $ADDITIONAL_PARAM -A -N` |
| 164 | REQUIRES_CPUREF="*" |
| 165 | else |
| 166 | # In the case of a general failure against this backend tell the user what we tried and then |
| 167 | # ignore this backend. |
| 168 | echo -e "\t***Execution failed. Ignoring this backend. Command was: \"$EXECUTE_NETWORK -m $MODEL -c $BACKEND -A -N\"" |
| 169 | return 1 |
| 170 | fi |
| 171 | fi |
| 172 | # Now check the RMS value. If it isn't 0 then mark this as questionable accuracy. |
| 173 | ACCURACY_VALUE=`echo "$ACCURACY_RUN" | grep 'Byte level'` |
| 174 | if [[ ! $ACCURACY_VALUE == *0 ]]; then |
| 175 | ACCURACY=!`echo $ACCURACY_VALUE | sed 's/[a-zA-Z:]*//g'` |
| 176 | else |
| 177 | ACCURACY="OK" |
| 178 | fi |
| 179 | # Add on the * if we needed to add CpuRef. |
| 180 | if [ -z $REQUIRES_CPUREF ]; then |
| 181 | echo -e "$ACCURACY $REQUIRES_CPUREF\t\t" |
| 182 | else |
| 183 | echo -e "$ACCURACY\t\t" |
| 184 | fi |
| 185 | } |
| 186 | |
| 187 | # This function will execute the model and return a string representation of the results. The execution |
| 188 | # Is done wth -c $BACKEND,CpuRef to allow the odd layer to ot be supported by an accelerated backend. |
| 189 | # |
| 190 | # Parameters: |
| 191 | # $1 Backend string like CpuRef. |
| 192 | # $2 Additional ExecuteNetwork parameters. |
| 193 | # |
| 194 | function RunPerformanceOnBackendWithParameters { |
| 195 | BACKEND=$1 |
| 196 | ADDITIONAL_PARAM=$2 |
| 197 | # Execute with 6 inferences. Mark the first as initial inference. Average the rest. |
| 198 | SPEED_RUN=`$EXECUTE_NETWORK -m $MODEL -c $BACKEND,CpuRef -I 6 -N $ADDITIONAL_PARAM` |
| 199 | |
| 200 | # Extract the model load time |
| 201 | MODEL_LOAD_TIME=`echo "$SPEED_RUN" | grep "Initialization time" | sed 's/[a-zA-Z:]*//g'` |
| 202 | MODEL_LOAD_TIME=`echo ${MODEL_LOAD_TIME::-2}` # Remove the tailing space and full stop. |
| 203 | # and the optimization time. |
| 204 | OPTIMIZATION_TIME=`echo "$SPEED_RUN" | grep "Optimization time" | sed 's/[a-zA-Z:]*//g'` |
| 205 | OPTIMIZATION_TIME=`echo ${OPTIMIZATION_TIME::-1}` # Remove the tailing space. |
| 206 | |
| 207 | # All 6 inference times. |
| 208 | RAW_INFERENCE=`echo "$SPEED_RUN" | grep "Inference time"` |
| 209 | # This will take "Info: Inference time: 0.03 ms Info:..." and transform to "0.03 0.01 0.01" |
| 210 | INFERENCE_TIMES=`echo $RAW_INFERENCE | sed 's/[a-zA-Z:]*//g'` |
| 211 | INITIAL_INFERENCE_TIME=`echo $INFERENCE_TIMES | cut -d ' ' -f 1` |
| 212 | # Now remove the initial inference time as it will skew the average. |
| 213 | INFERENCE_TIMES=`echo $INFERENCE_TIMES | sed 's/[^ ]* //'` |
| 214 | # Use awk to sum and average the remaining 5 numbers. |
| 215 | AVERAGE_INFERENCE_TIME=`echo $INFERENCE_TIMES | awk '{s+=$1}END{print s/NR}' RS=" "` |
| 216 | |
| 217 | # Result format is: MODEL LOAD | OPTIMIZATION | INITIAL INFERENCE | AVERAGE INFERENCE |
| 218 | echo -e "$MODEL_LOAD_TIME\t\t$OPTIMIZATION_TIME\t\t\t$INITIAL_INFERENCE_TIME\t\t\t$AVERAGE_INFERENCE_TIME\t" |
| 219 | } |
| 220 | |
| 221 | |
| 222 | # Check execution in all available backends. |
| 223 | echo "===================================================================================" |
| 224 | echo -e "BACKEND\t\tACCURACY\tMODEL LOAD(ms)\tOPTIMIZATION(ms)\tINITIAL INFERENCE(ms)\tAVERAGE INFERENCE(ms)" |
| 225 | for backend in $BACKENDS |
| 226 | do |
| 227 | echo -n -e "$backend\t\t" |
| 228 | RESULT=$(RunAccuracyOnBackendWithParameters $backend) |
| 229 | echo -n -e "$RESULT" |
| 230 | if [[ $RESULT =~ "*" ]]; then |
| 231 | REQUIRED_CPU_REF=1 |
| 232 | fi |
| 233 | # It's possible the backend wasn't supported. |
| 234 | if [[ ! "$RESULT" =~ "not supported" ]]; then |
| 235 | # It was, continue. |
| 236 | RESULT=$(RunPerformanceOnBackendWithParameters $backend) |
| 237 | echo -n -e "$RESULT" |
| 238 | # Save some specific values for use later. |
| 239 | if [ $backend == "CpuAcc" ]; then |
| 240 | # In the case of CpuAcc we save the avrage inference time. |
| 241 | CPUACC_AVERAGE_INFERENCE_TIME=`echo $RESULT | cut -d ' ' -f 4` |
| 242 | fi |
| 243 | if [ $backend == "GpuAcc" ]; then |
| 244 | # In the case of GpuAcc we save the avrage inference time. |
| 245 | GPUACC_AVERAGE_INFERENCE_TIME=`echo $RESULT | cut -d ' ' -f 4` |
| 246 | fi |
| 247 | else |
| 248 | # Remove this backend from future tests. |
| 249 | BACKENDS=`echo $BACKENDS | sed "s/$backend//"` |
| 250 | fi |
| 251 | echo |
| 252 | done |
| 253 | # Only print this if it was required. |
| 254 | if [ ! -z $REQUIRED_CPU_REF ]; then |
| 255 | echo "* denotes this backend required fallback to CpuRef." |
| 256 | echo |
| 257 | fi |
| 258 | |
| 259 | # Now its time to look at backend specific parameters. |
| 260 | |
| 261 | # This function first run the accuracy test and then the performance test. It uses the average from earlier |
| 262 | # to compare to. |
| 263 | function RunAccuracyAndPerformanceWithExtraParameter |
| 264 | { |
| 265 | BACKEND=$1 |
| 266 | EXTRA_PARAM=$2 |
| 267 | AVERAGE_INFERENCE_TIME=$3 |
| 268 | echo -e "ACCURACY\tMODEL LOAD(ms)\tOPTIMIZATION(ms)\tINITIAL INFERENCE(ms)\tAVERAGE INFERENCE(ms)\t\tDELTA(ms)" |
| 269 | RESULT=$(RunAccuracyOnBackendWithParameters $BACKEND,CpuRef $EXTRA_PARAM) |
| 270 | echo -n "$RESULT" |
| 271 | RESULT=$(RunPerformanceOnBackendWithParameters $BACKEND,CpuRef $EXTRA_PARAM) |
| 272 | PARAM_AVERAGE_INFERENCE_TIME=`echo $RESULT | cut -d ' ' -f 4` |
| 273 | # If adding the parameter was faster then incude by how much. |
| 274 | if [[ "$PARAM_AVERAGE_INFERENCE_TIME" < "$AVERAGE_INFERENCE_TIME" ]]; then |
| 275 | DELTA=`echo $AVERAGE_INFERENCE_TIME - $PARAM_AVERAGE_INFERENCE_TIME | bc` |
| 276 | echo -e "$RESULT\t\t\t$DELTA ($PARAM_AVERAGE_INFERENCE_TIME v $AVERAGE_INFERENCE_TIME)" |
| 277 | else |
| 278 | echo -e "$RESULT\t\t\t**No improvment**" |
| 279 | fi |
| 280 | } |
| 281 | |
| 282 | |
| 283 | # Start with CpuAcc. Three knobs to twiddle, threads, fast-math and fp16. |
| 284 | if [[ $BACKENDS =~ "CpuAcc" ]]; then |
| 285 | echo |
| 286 | echo "CpuAcc optimizations." |
| 287 | echo "============================" |
| 288 | echo "The value of \"number-of-threads\" parameter by default is decided on by the backend." |
| 289 | echo "Cycle through number-of-threads=1 -> 12 and see if any are faster than the default." |
| 290 | echo |
| 291 | for i in {1..12} |
| 292 | do |
| 293 | RESULT=$(RunPerformanceOnBackendWithParameters "CpuAcc,CpuRef" "--number-of-threads $i") |
| 294 | AVERAGE_INFERENCE_TIME=`echo $RESULT | cut -d ' ' -f 4` |
| 295 | # Print something out if the returned average is less than the previously saved average. |
| 296 | if (( $(echo "$AVERAGE_INFERENCE_TIME < $CPUACC_AVERAGE_INFERENCE_TIME" | bc -l) )); then |
| 297 | DELTA=`echo $CPUACC_AVERAGE_INFERENCE_TIME - $AVERAGE_INFERENCE_TIME | bc` |
| 298 | echo " \"--number-of-threads $i\" resulted in a faster average inference by $DELTA ms. ($AVERAGE_INFERENCE_TIME v $CPUACC_AVERAGE_INFERENCE_TIME)" |
| 299 | FASTER=1 |
| 300 | fi |
| 301 | done |
| 302 | if [ -z $FASTER ]; then |
| 303 | echo "No value of \"number-of-threads\" was faster than the default." |
| 304 | fi |
| 305 | # Next is fp16-turbo-mode. We do both accuracy and speed on this one. |
| 306 | echo |
| 307 | echo -n "Now trying to enable fp16-turbo-mode. This will only have positive results with fp32 models." |
| 308 | echo |
| 309 | RunAccuracyAndPerformanceWithExtraParameter CpuAcc "--fp16-turbo-mode" $CPUACC_AVERAGE_INFERENCE_TIME |
| 310 | |
| 311 | # Next is enable-fast-math. Again both accuracy and speed on this one. |
| 312 | echo |
| 313 | echo -n "Now trying \"enable-fast-math\"." |
| 314 | echo |
| 315 | RunAccuracyAndPerformanceWithExtraParameter CpuAcc "--enable-fast-math" $CPUACC_AVERAGE_INFERENCE_TIME |
| 316 | fi |
| 317 | |
| 318 | # GpuAcc. |
| 319 | # Options to check enable-fast-math, fp16-turbo-mode, and tuning-level/tuning-path. |
| 320 | if [[ $BACKENDS =~ "GpuAcc" ]]; then |
| 321 | echo |
| 322 | echo "GpuAcc optimizations." |
| 323 | echo "============================" |
| 324 | |
| 325 | # fp16-turbo-mode. We do both accuracy and speed on this one. |
| 326 | echo |
| 327 | echo -n "Now trying to enable fp16-turbo-mode. This will only have positive results with fp32 models." |
| 328 | echo |
| 329 | RunAccuracyAndPerformanceWithExtraParameter GpuAcc "--fp16-turbo-mode" $GPUACC_AVERAGE_INFERENCE_TIME |
| 330 | |
| 331 | # Next is enable-fast-math. Again both accuracy and speed on this one. |
| 332 | echo |
| 333 | echo -n "Now trying \"enable-fast-math\"." |
| 334 | echo |
| 335 | RunAccuracyAndPerformanceWithExtraParameter GpuAcc "--enable-fast-math" $GPUACC_AVERAGE_INFERENCE_TIME |
| 336 | |
| 337 | # Next is tuning levels. Just speed on this one. |
| 338 | echo |
| 339 | echo -n "Now trying \"tuning-level/tuning-path\"." |
| 340 | echo |
| 341 | for i in {1..3} |
| 342 | do |
| 343 | touch ./tuned-network.bin |
| 344 | # Create tuned network file with the first run. |
| 345 | OUTPUT=`$EXECUTE_NETWORK -m $MODEL -c $GpuAcc,CpuRef --tuning-path ./tuned-network.bin --tuning-level $i -N` |
| 346 | AssertZeroExitCode |
| 347 | # Now run the perforance test reusing that saved network. |
| 348 | RESULT=$(RunPerformanceOnBackendWithParameters "GpuAcc,CpuRef" "--tuning-path ./tuned-network.bin") |
| 349 | AVERAGE_INFERENCE_TIME=`echo $RESULT | cut -d ' ' -f 4` |
| 350 | if (( $(echo "$AVERAGE_INFERENCE_TIME < $GPUACC_AVERAGE_INFERENCE_TIME" | bc -l) )); then |
| 351 | DELTA=`echo $AVERAGE_INFERENCE_TIME - $GPUACC_AVERAGE_INFERENCE_TIME | bc` |
| 352 | echo " \"--tuning-level $i\" resulted in a faster average inference by $DELTA ms. ($AVERAGE_INFERENCE_TIME v $GPUACC_AVERAGE_INFERENCE_TIME)" |
| 353 | else |
| 354 | echo " \"--tuning-level $i\" did not result in a faster average inference time. ($AVERAGE_INFERENCE_TIME v $GPUACC_AVERAGE_INFERENCE_TIME)" |
| 355 | fi |
| 356 | rm ./tuned-network.bin |
| 357 | done |
| 358 | fi |