| #!/bin/bash |
| #set -x |
| # |
| # Copyright © 2023 Arm Ltd and Contributors. All rights reserved. |
| # SPDX-License-Identifier: MIT |
| # |
| # This script will run a TfLite model through ExecuteNetwork trying all available backends to measure |
| # both speed and accuracy. In addition, it will try some of the performance options that are available. |
| # |
| # Prerequisites: ExecuteNetwork must be built with: |
| # * CpuRef enabled (-DARMNNREF=1) |
| # * TfLite delegate enabled (-DBUILD_CLASSIC_DELEGATE=1) |
| # * TfLite parser enabled (-DBUILD_TF_LITE_PARSER=1) |
| # * Any backend you want to test against. E.g. -DARMCOMPUTENEON=1 -DARMCOMPUTECL=1 |
| # * The model must be fully supported by Arm NN. |
| # |
| # Usage: |
| # evaluate_network.sh -e <Path to ExecuteNetwork> -m <Tfite model to test> |
| # |
| # Sample usage: |
| # evaluate_network.sh -e ./build/release/armnn/test -m ./my_tflite_model.tflite |
| # |
| |
| CMD=$( basename "$0" ) |
| |
| usage() { |
| echo "Usage: $CMD -e <Path to ExecuteNetwork> -m <Test model>" |
| echo "Options: -e <Path to ExecuteNetwork>" |
| echo " -m <Test model>" |
| exit 1 |
| } |
| |
| # Errors if the previous command had a non-zero exit code. |
| function AssertZeroExitCode { |
| EXITCODE=$? |
| if [ $EXITCODE -ne 0 ]; then |
| echo -e "Previous command exited with code $EXITCODE" |
| exit 1 |
| fi |
| } |
| |
| OPTION_COUNTER=0 |
| while getopts "e:m:" opt; do |
| ((OPTION_COUNTER+=1)) |
| case "$opt" in |
| h|\?) usage;; |
| e) EXECUTE_NETWORK_PATH="$OPTARG";; |
| m) MODEL="$OPTARG";; |
| esac |
| done |
| shift $((OPTIND - 1)) |
| |
| # Both parameters are mandatory. |
| if [ -z "$EXECUTE_NETWORK_PATH" ] || [ -z "$MODEL" ]; then |
| usage |
| exit 1 |
| fi |
| |
| # Check the path to execute network will find the executable. |
| if [ -x "$EXECUTE_NETWORK_PATH/ExecuteNetwork" ]; then |
| echo -e "Using Execute Network from\t\t\t: $EXECUTE_NETWORK_PATH/ExecuteNetwork" |
| EXECUTE_NETWORK="$EXECUTE_NETWORK_PATH/ExecuteNetwork" |
| else |
| echo "Execute Network does not exist at \"$EXECUTE_NETWORK_PATH/ExecuteNetwork\"" |
| usage |
| exit 1 |
| fi |
| |
| # Check that the model exists and has a supported extension. |
| if [ -f $MODEL ]; then |
| if [[ ! $MODEL =~ (tflite)$ ]]; then |
| echo "Only .tflite files are supported." |
| exit 1 |
| fi |
| else |
| echo Model file: "\"$MODEL\" could not be found." |
| usage |
| exit 1 |
| fi |
| |
| # Find out the available backends. Unfortunaltey the list of backends spans multiple lines. |
| # This means we have to do this in several steps. |
| echo -n -e "Available backends on this executable\t\t:" |
| HELP_OUTOUT=`$EXECUTE_NETWORK --help` |
| BACKENDS=`echo $HELP_OUTOUT | sed 's/.*: \[//' | sed 's/\].*//' | sed 's/,//g'` |
| # Remove the leading space to make it look prettier. |
| BACKENDS="${BACKENDS:1}" |
| if [ -z "$BACKENDS" ]; then |
| echo "" |
| echo "Execute Network reported no available backends!" |
| exit 1 |
| else |
| echo " $BACKENDS" |
| # We really need the CpuRef to be in there. |
| if [[ ! $BACKENDS =~ "CpuRef" ]]; then |
| echo "" |
| echo "Fatal: Please recompile ExecuteNetwork to include the CpuRef backend. (-DARMNNREF=1)" |
| exit 1 |
| fi |
| fi |
| |
| |
| # This is where the real work starts. |
| # Model execution can take a long time. Trap ctrl-c and tell the user. |
| trap ctrl_c INT |
| |
| function ctrl_c() { |
| echo -e "Interrupted.\nNo patience eh? Try a smaller model." |
| exit 1 |
| } |
| |
| |
| # We need to check that the delegate is supported otherwise we can't run through the tf runtime. |
| echo -n -e "Is the delegate supported on this executable?\t:" |
| TFLITE_EXECUTION=`$EXECUTE_NETWORK -m $MODEL -T tflite -c CpuRef -N` |
| # Check for an error message about building with the delegate. |
| if [[ $TFLITE_EXECUTION =~ "Tensorflow-Lite delegate support" ]]; then |
| echo "" |
| echo "Fatal: Please recompile ExecuteNetwork with TfLite delegate support enabled. (-DBUILD_CLASSIC_DELEGATE=1)" |
| exit 1 |
| else |
| echo " Yes" |
| fi |
| |
| # Run through CpuRef to see if Arm NN supports the model. |
| echo -n -e "Is the model fully supported by Arm NN?\t\t:" |
| REF_EXECUTION=`$EXECUTE_NETWORK -m $MODEL -c CpuRef -N` |
| # If it failed look for the most common reason - an unsupported layer. |
| if [ $? -ne 0 ]; then |
| if [[ $REF_EXECUTION =~ "is not supported on requested backend CpuRef" ]]; then |
| echo -e " No - One or more layers are not supported by Arm NN" |
| else |
| echo -e " No - Execution using CpuRef backend failed." |
| fi |
| echo -e "The Reported problems were\t:" |
| echo `echo "$REF_EXECUTION" | sed '/Warning\|ERROR\|Fatal/!d'` |
| echo "To recreate this error try: \"$EXECUTE_NETWORK -m $MODEL -c CpuRef\" " |
| exit 1 |
| fi |
| echo " Yes" |
| |
| # This function will execute the model and return a string representation of the results. This is the |
| # first time the model will be executed. |
| # Is done wth -c $BACKEND,CpuRef to allow the odd layer to be supported by an unaccelerated backend. |
| # |
| # Parameters: |
| # $1 Backend string like CpuRef. |
| # $2 Additional ExecuteNetwork parameters. |
| # |
| function RunAccuracyOnBackendWithParameters { |
| BACKEND=$1 |
| ADDITIONAL_PARAM=$2 |
| # Run on BACKEND to check accuracy against TfLite runtime first. This will be a warning not a failure. |
| ACCURACY_RUN=`$EXECUTE_NETWORK -m $MODEL -c $BACKEND $ADDITIONAL_PARAM -A -N` |
| # Start by checking the return code. |
| if [ $? -ne 0 ]; then |
| # Maybe this backend isn't supported. |
| if [[ $ACCURACY_RUN =~ "None of the preferred backends [$BACKEND ] are supported" ]]; then |
| echo -e "\t\t***Is not supported***" |
| return 1 |
| elif [[ $ACCURACY_RUN =~ "is not supported on requested backend" ]]; then |
| # One or more layers require a fall back. Run again with CpuRef fall back. |
| ACCURACY_RUN=`$EXECUTE_NETWORK -m $MODEL -c $BACKEND,CpuRef $ADDITIONAL_PARAM -A -N` |
| REQUIRES_CPUREF="*" |
| else |
| # In the case of a general failure against this backend tell the user what we tried and then |
| # ignore this backend. |
| echo -e "\t***Execution failed. Ignoring this backend. Command was: \"$EXECUTE_NETWORK -m $MODEL -c $BACKEND -A -N\"" |
| return 1 |
| fi |
| fi |
| # Now check the RMS value. If it isn't 0 then mark this as questionable accuracy. |
| ACCURACY_VALUE=`echo "$ACCURACY_RUN" | grep 'Byte level'` |
| if [[ ! $ACCURACY_VALUE == *0 ]]; then |
| ACCURACY=!`echo $ACCURACY_VALUE | sed 's/[a-zA-Z:]*//g'` |
| else |
| ACCURACY="OK" |
| fi |
| # Add on the * if we needed to add CpuRef. |
| if [ -z $REQUIRES_CPUREF ]; then |
| echo -e "$ACCURACY $REQUIRES_CPUREF\t\t" |
| else |
| echo -e "$ACCURACY\t\t" |
| fi |
| } |
| |
| # This function will execute the model and return a string representation of the results. The execution |
| # Is done wth -c $BACKEND,CpuRef to allow the odd layer to ot be supported by an accelerated backend. |
| # |
| # Parameters: |
| # $1 Backend string like CpuRef. |
| # $2 Additional ExecuteNetwork parameters. |
| # |
| function RunPerformanceOnBackendWithParameters { |
| BACKEND=$1 |
| ADDITIONAL_PARAM=$2 |
| # Execute with 6 inferences. Mark the first as initial inference. Average the rest. |
| SPEED_RUN=`$EXECUTE_NETWORK -m $MODEL -c $BACKEND,CpuRef -I 6 -N $ADDITIONAL_PARAM` |
| |
| # Extract the model load time |
| MODEL_LOAD_TIME=`echo "$SPEED_RUN" | grep "Initialization time" | sed 's/[a-zA-Z:]*//g'` |
| MODEL_LOAD_TIME=`echo ${MODEL_LOAD_TIME::-2}` # Remove the tailing space and full stop. |
| # and the optimization time. |
| OPTIMIZATION_TIME=`echo "$SPEED_RUN" | grep "Optimization time" | sed 's/[a-zA-Z:]*//g'` |
| OPTIMIZATION_TIME=`echo ${OPTIMIZATION_TIME::-1}` # Remove the tailing space. |
| |
| # All 6 inference times. |
| RAW_INFERENCE=`echo "$SPEED_RUN" | grep "Inference time"` |
| # This will take "Info: Inference time: 0.03 ms Info:..." and transform to "0.03 0.01 0.01" |
| INFERENCE_TIMES=`echo $RAW_INFERENCE | sed 's/[a-zA-Z:]*//g'` |
| INITIAL_INFERENCE_TIME=`echo $INFERENCE_TIMES | cut -d ' ' -f 1` |
| # Now remove the initial inference time as it will skew the average. |
| INFERENCE_TIMES=`echo $INFERENCE_TIMES | sed 's/[^ ]* //'` |
| # Use awk to sum and average the remaining 5 numbers. |
| AVERAGE_INFERENCE_TIME=`echo $INFERENCE_TIMES | awk '{s+=$1}END{print s/NR}' RS=" "` |
| |
| # Result format is: MODEL LOAD | OPTIMIZATION | INITIAL INFERENCE | AVERAGE INFERENCE |
| echo -e "$MODEL_LOAD_TIME\t\t$OPTIMIZATION_TIME\t\t\t$INITIAL_INFERENCE_TIME\t\t\t$AVERAGE_INFERENCE_TIME\t" |
| } |
| |
| |
| # Check execution in all available backends. |
| echo "===================================================================================" |
| echo -e "BACKEND\t\tACCURACY\tMODEL LOAD(ms)\tOPTIMIZATION(ms)\tINITIAL INFERENCE(ms)\tAVERAGE INFERENCE(ms)" |
| for backend in $BACKENDS |
| do |
| echo -n -e "$backend\t\t" |
| RESULT=$(RunAccuracyOnBackendWithParameters $backend) |
| echo -n -e "$RESULT" |
| if [[ $RESULT =~ "*" ]]; then |
| REQUIRED_CPU_REF=1 |
| fi |
| # It's possible the backend wasn't supported. |
| if [[ ! "$RESULT" =~ "not supported" ]]; then |
| # It was, continue. |
| RESULT=$(RunPerformanceOnBackendWithParameters $backend) |
| echo -n -e "$RESULT" |
| # Save some specific values for use later. |
| if [ $backend == "CpuAcc" ]; then |
| # In the case of CpuAcc we save the avrage inference time. |
| CPUACC_AVERAGE_INFERENCE_TIME=`echo $RESULT | cut -d ' ' -f 4` |
| fi |
| if [ $backend == "GpuAcc" ]; then |
| # In the case of GpuAcc we save the avrage inference time. |
| GPUACC_AVERAGE_INFERENCE_TIME=`echo $RESULT | cut -d ' ' -f 4` |
| fi |
| else |
| # Remove this backend from future tests. |
| BACKENDS=`echo $BACKENDS | sed "s/$backend//"` |
| fi |
| echo |
| done |
| # Only print this if it was required. |
| if [ ! -z $REQUIRED_CPU_REF ]; then |
| echo "* denotes this backend required fallback to CpuRef." |
| echo |
| fi |
| |
| # Now its time to look at backend specific parameters. |
| |
| # This function first run the accuracy test and then the performance test. It uses the average from earlier |
| # to compare to. |
| function RunAccuracyAndPerformanceWithExtraParameter |
| { |
| BACKEND=$1 |
| EXTRA_PARAM=$2 |
| AVERAGE_INFERENCE_TIME=$3 |
| echo -e "ACCURACY\tMODEL LOAD(ms)\tOPTIMIZATION(ms)\tINITIAL INFERENCE(ms)\tAVERAGE INFERENCE(ms)\t\tDELTA(ms)" |
| RESULT=$(RunAccuracyOnBackendWithParameters $BACKEND,CpuRef $EXTRA_PARAM) |
| echo -n "$RESULT" |
| RESULT=$(RunPerformanceOnBackendWithParameters $BACKEND,CpuRef $EXTRA_PARAM) |
| PARAM_AVERAGE_INFERENCE_TIME=`echo $RESULT | cut -d ' ' -f 4` |
| # If adding the parameter was faster then incude by how much. |
| if [[ "$PARAM_AVERAGE_INFERENCE_TIME" < "$AVERAGE_INFERENCE_TIME" ]]; then |
| DELTA=`echo $AVERAGE_INFERENCE_TIME - $PARAM_AVERAGE_INFERENCE_TIME | bc` |
| echo -e "$RESULT\t\t\t$DELTA ($PARAM_AVERAGE_INFERENCE_TIME v $AVERAGE_INFERENCE_TIME)" |
| else |
| echo -e "$RESULT\t\t\t**No improvment**" |
| fi |
| } |
| |
| |
| # Start with CpuAcc. Three knobs to twiddle, threads, fast-math and fp16. |
| if [[ $BACKENDS =~ "CpuAcc" ]]; then |
| echo |
| echo "CpuAcc optimizations." |
| echo "============================" |
| echo "The value of \"number-of-threads\" parameter by default is decided on by the backend." |
| echo "Cycle through number-of-threads=1 -> 12 and see if any are faster than the default." |
| echo |
| for i in {1..12} |
| do |
| RESULT=$(RunPerformanceOnBackendWithParameters "CpuAcc,CpuRef" "--number-of-threads $i") |
| AVERAGE_INFERENCE_TIME=`echo $RESULT | cut -d ' ' -f 4` |
| # Print something out if the returned average is less than the previously saved average. |
| if (( $(echo "$AVERAGE_INFERENCE_TIME < $CPUACC_AVERAGE_INFERENCE_TIME" | bc -l) )); then |
| DELTA=`echo $CPUACC_AVERAGE_INFERENCE_TIME - $AVERAGE_INFERENCE_TIME | bc` |
| echo " \"--number-of-threads $i\" resulted in a faster average inference by $DELTA ms. ($AVERAGE_INFERENCE_TIME v $CPUACC_AVERAGE_INFERENCE_TIME)" |
| FASTER=1 |
| fi |
| done |
| if [ -z $FASTER ]; then |
| echo "No value of \"number-of-threads\" was faster than the default." |
| fi |
| # Next is fp16-turbo-mode. We do both accuracy and speed on this one. |
| echo |
| echo -n "Now trying to enable fp16-turbo-mode. This will only have positive results with fp32 models." |
| echo |
| RunAccuracyAndPerformanceWithExtraParameter CpuAcc "--fp16-turbo-mode" $CPUACC_AVERAGE_INFERENCE_TIME |
| |
| # Next is enable-fast-math. Again both accuracy and speed on this one. |
| echo |
| echo -n "Now trying \"enable-fast-math\"." |
| echo |
| RunAccuracyAndPerformanceWithExtraParameter CpuAcc "--enable-fast-math" $CPUACC_AVERAGE_INFERENCE_TIME |
| fi |
| |
| # GpuAcc. |
| # Options to check enable-fast-math, fp16-turbo-mode, and tuning-level/tuning-path. |
| if [[ $BACKENDS =~ "GpuAcc" ]]; then |
| echo |
| echo "GpuAcc optimizations." |
| echo "============================" |
| |
| # fp16-turbo-mode. We do both accuracy and speed on this one. |
| echo |
| echo -n "Now trying to enable fp16-turbo-mode. This will only have positive results with fp32 models." |
| echo |
| RunAccuracyAndPerformanceWithExtraParameter GpuAcc "--fp16-turbo-mode" $GPUACC_AVERAGE_INFERENCE_TIME |
| |
| # Next is enable-fast-math. Again both accuracy and speed on this one. |
| echo |
| echo -n "Now trying \"enable-fast-math\"." |
| echo |
| RunAccuracyAndPerformanceWithExtraParameter GpuAcc "--enable-fast-math" $GPUACC_AVERAGE_INFERENCE_TIME |
| |
| # Next is tuning levels. Just speed on this one. |
| echo |
| echo -n "Now trying \"tuning-level/tuning-path\"." |
| echo |
| for i in {1..3} |
| do |
| touch ./tuned-network.bin |
| # Create tuned network file with the first run. |
| OUTPUT=`$EXECUTE_NETWORK -m $MODEL -c $GpuAcc,CpuRef --tuning-path ./tuned-network.bin --tuning-level $i -N` |
| AssertZeroExitCode |
| # Now run the perforance test reusing that saved network. |
| RESULT=$(RunPerformanceOnBackendWithParameters "GpuAcc,CpuRef" "--tuning-path ./tuned-network.bin") |
| AVERAGE_INFERENCE_TIME=`echo $RESULT | cut -d ' ' -f 4` |
| if (( $(echo "$AVERAGE_INFERENCE_TIME < $GPUACC_AVERAGE_INFERENCE_TIME" | bc -l) )); then |
| DELTA=`echo $AVERAGE_INFERENCE_TIME - $GPUACC_AVERAGE_INFERENCE_TIME | bc` |
| echo " \"--tuning-level $i\" resulted in a faster average inference by $DELTA ms. ($AVERAGE_INFERENCE_TIME v $GPUACC_AVERAGE_INFERENCE_TIME)" |
| else |
| echo " \"--tuning-level $i\" did not result in a faster average inference time. ($AVERAGE_INFERENCE_TIME v $GPUACC_AVERAGE_INFERENCE_TIME)" |
| fi |
| rm ./tuned-network.bin |
| done |
| fi |