Blame - tests/ExecuteNetwork/evaluate_network.sh - ml/armnn

blob: 931167dda817df86732020ae28c8a0b330314b4a [file] [log] [blame]

Colm Donelan	6f68ad2	2023-09-07 10:36:17 +0100	[diff] [blame^]	1	#!/bin/bash
				2	#set -x
				3	#
				4	# Copyright © 2023 Arm Ltd and Contributors. All rights reserved.
				5	# SPDX-License-Identifier: MIT
				6	#
				7	# This script will run a TfLite model through ExecuteNetwork trying all available backends to measure
				8	# both speed and accuracy. In addition, it will try some of the performance options that are available.
				9	#
				10	# Prerequisites: ExecuteNetwork must be built with:
				11	# * CpuRef enabled (-DARMNNREF=1)
				12	# * TfLite delegate enabled (-DBUILD_CLASSIC_DELEGATE=1)
				13	# * TfLite parser enabled (-DBUILD_TF_LITE_PARSER=1)
				14	# * Any backend you want to test against. E.g. -DARMCOMPUTENEON=1 -DARMCOMPUTECL=1
				15	# * The model must be fully supported by Arm NN.
				16	#
				17	# Usage:
				18	# evaluate_network.sh -e <Path to ExecuteNetwork> -m <Tfite model to test>
				19	#
				20	# Sample usage:
				21	# evaluate_network.sh -e ./build/release/armnn/test -m ./my_tflite_model.tflite
				22	#
				23
				24	CMD=$( basename "$0" )
				25
				26	usage() {
				27	echo "Usage: $CMD -e <Path to ExecuteNetwork> -m <Test model>"
				28	echo "Options: -e <Path to ExecuteNetwork>"
				29	echo " -m <Test model>"
				30	exit 1
				31	}
				32
				33	# Errors if the previous command had a non-zero exit code.
				34	function AssertZeroExitCode {
				35	EXITCODE=$?
				36	if [ $EXITCODE -ne 0 ]; then
				37	echo -e "Previous command exited with code $EXITCODE"
				38	exit 1
				39	fi
				40	}
				41
				42	OPTION_COUNTER=0
				43	while getopts "e:m:" opt; do
				44	((OPTION_COUNTER+=1))
				45	case "$opt" in
				46	h\|\?) usage;;
				47	e) EXECUTE_NETWORK_PATH="$OPTARG";;
				48	m) MODEL="$OPTARG";;
				49	esac
				50	done
				51	shift $((OPTIND - 1))
				52
				53	# Both parameters are mandatory.
				54	if [ -z "$EXECUTE_NETWORK_PATH" ] \|\| [ -z "$MODEL" ]; then
				55	usage
				56	exit 1
				57	fi
				58
				59	# Check the path to execute network will find the executable.
				60	if [ -x "$EXECUTE_NETWORK_PATH/ExecuteNetwork" ]; then
				61	echo -e "Using Execute Network from\t\t\t: $EXECUTE_NETWORK_PATH/ExecuteNetwork"
				62	EXECUTE_NETWORK="$EXECUTE_NETWORK_PATH/ExecuteNetwork"
				63	else
				64	echo "Execute Network does not exist at \"$EXECUTE_NETWORK_PATH/ExecuteNetwork\""
				65	usage
				66	exit 1
				67	fi
				68
				69	# Check that the model exists and has a supported extension.
				70	if [ -f $MODEL ]; then
				71	if [[ ! $MODEL =~ (tflite)$ ]]; then
				72	echo "Only .tflite files are supported."
				73	exit 1
				74	fi
				75	else
				76	echo Model file: "\"$MODEL\" could not be found."
				77	usage
				78	exit 1
				79	fi
				80
				81	# Find out the available backends. Unfortunaltey the list of backends spans multiple lines.
				82	# This means we have to do this in several steps.
				83	echo -n -e "Available backends on this executable\t\t:"
				84	HELP_OUTOUT=`$EXECUTE_NETWORK --help`
				85	BACKENDS=`echo $HELP_OUTOUT \| sed 's/.: \[//' \| sed 's/\].//' \| sed 's/,//g'`
				86	# Remove the leading space to make it look prettier.
				87	BACKENDS="${BACKENDS:1}"
				88	if [ -z "$BACKENDS" ]; then
				89	echo ""
				90	echo "Execute Network reported no available backends!"
				91	exit 1
				92	else
				93	echo " $BACKENDS"
				94	# We really need the CpuRef to be in there.
				95	if [[ ! $BACKENDS =~ "CpuRef" ]]; then
				96	echo ""
				97	echo "Fatal: Please recompile ExecuteNetwork to include the CpuRef backend. (-DARMNNREF=1)"
				98	exit 1
				99	fi
				100	fi
				101
				102
				103	# This is where the real work starts.
				104	# Model execution can take a long time. Trap ctrl-c and tell the user.
				105	trap ctrl_c INT
				106
				107	function ctrl_c() {
				108	echo -e "Interrupted.\nNo patience eh? Try a smaller model."
				109	exit 1
				110	}
				111
				112
				113	# We need to check that the delegate is supported otherwise we can't run through the tf runtime.
				114	echo -n -e "Is the delegate supported on this executable?\t:"
				115	TFLITE_EXECUTION=`$EXECUTE_NETWORK -m $MODEL -T tflite -c CpuRef -N`
				116	# Check for an error message about building with the delegate.
				117	if [[ $TFLITE_EXECUTION =~ "Tensorflow-Lite delegate support" ]]; then
				118	echo ""
				119	echo "Fatal: Please recompile ExecuteNetwork with TfLite delegate support enabled. (-DBUILD_CLASSIC_DELEGATE=1)"
				120	exit 1
				121	else
				122	echo " Yes"
				123	fi
				124
				125	# Run through CpuRef to see if Arm NN supports the model.
				126	echo -n -e "Is the model fully supported by Arm NN?\t\t:"
				127	REF_EXECUTION=`$EXECUTE_NETWORK -m $MODEL -c CpuRef -N`
				128	# If it failed look for the most common reason - an unsupported layer.
				129	if [ $? -ne 0 ]; then
				130	if [[ $REF_EXECUTION =~ "is not supported on requested backend CpuRef" ]]; then
				131	echo -e " No - One or more layers are not supported by Arm NN"
				132	else
				133	echo -e " No - Execution using CpuRef backend failed."
				134	fi
				135	echo -e "The Reported problems were\t:"
				136	echo `echo "$REF_EXECUTION" \| sed '/Warning\\|ERROR\\|Fatal/!d'`
				137	echo "To recreate this error try: \"$EXECUTE_NETWORK -m $MODEL -c CpuRef\" "
				138	exit 1
				139	fi
				140	echo " Yes"
				141
				142	# This function will execute the model and return a string representation of the results. This is the
				143	# first time the model will be executed.
				144	# Is done wth -c $BACKEND,CpuRef to allow the odd layer to be supported by an unaccelerated backend.
				145	#
				146	# Parameters:
				147	# $1 Backend string like CpuRef.
				148	# $2 Additional ExecuteNetwork parameters.
				149	#
				150	function RunAccuracyOnBackendWithParameters {
				151	BACKEND=$1
				152	ADDITIONAL_PARAM=$2
				153	# Run on BACKEND to check accuracy against TfLite runtime first. This will be a warning not a failure.
				154	ACCURACY_RUN=`$EXECUTE_NETWORK -m $MODEL -c $BACKEND $ADDITIONAL_PARAM -A -N`
				155	# Start by checking the return code.
				156	if [ $? -ne 0 ]; then
				157	# Maybe this backend isn't supported.
				158	if [[ $ACCURACY_RUN =~ "None of the preferred backends [$BACKEND ] are supported" ]]; then
				159	echo -e "\t\t*Is not supported*"
				160	return 1
				161	elif [[ $ACCURACY_RUN =~ "is not supported on requested backend" ]]; then
				162	# One or more layers require a fall back. Run again with CpuRef fall back.
				163	ACCURACY_RUN=`$EXECUTE_NETWORK -m $MODEL -c $BACKEND,CpuRef $ADDITIONAL_PARAM -A -N`
				164	REQUIRES_CPUREF="*"
				165	else
				166	# In the case of a general failure against this backend tell the user what we tried and then
				167	# ignore this backend.
				168	echo -e "\t***Execution failed. Ignoring this backend. Command was: \"$EXECUTE_NETWORK -m $MODEL -c $BACKEND -A -N\""
				169	return 1
				170	fi
				171	fi
				172	# Now check the RMS value. If it isn't 0 then mark this as questionable accuracy.
				173	ACCURACY_VALUE=`echo "$ACCURACY_RUN" \| grep 'Byte level'`
				174	if [[ ! $ACCURACY_VALUE == *0 ]]; then
				175	ACCURACY=!`echo $ACCURACY_VALUE \| sed 's/[a-zA-Z:]*//g'`
				176	else
				177	ACCURACY="OK"
				178	fi
				179	# Add on the * if we needed to add CpuRef.
				180	if [ -z $REQUIRES_CPUREF ]; then
				181	echo -e "$ACCURACY $REQUIRES_CPUREF\t\t"
				182	else
				183	echo -e "$ACCURACY\t\t"
				184	fi
				185	}
				186
				187	# This function will execute the model and return a string representation of the results. The execution
				188	# Is done wth -c $BACKEND,CpuRef to allow the odd layer to ot be supported by an accelerated backend.
				189	#
				190	# Parameters:
				191	# $1 Backend string like CpuRef.
				192	# $2 Additional ExecuteNetwork parameters.
				193	#
				194	function RunPerformanceOnBackendWithParameters {
				195	BACKEND=$1
				196	ADDITIONAL_PARAM=$2
				197	# Execute with 6 inferences. Mark the first as initial inference. Average the rest.
				198	SPEED_RUN=`$EXECUTE_NETWORK -m $MODEL -c $BACKEND,CpuRef -I 6 -N $ADDITIONAL_PARAM`
				199
				200	# Extract the model load time
				201	MODEL_LOAD_TIME=`echo "$SPEED_RUN" \| grep "Initialization time" \| sed 's/[a-zA-Z:]*//g'`
				202	MODEL_LOAD_TIME=`echo ${MODEL_LOAD_TIME::-2}` # Remove the tailing space and full stop.
				203	# and the optimization time.
				204	OPTIMIZATION_TIME=`echo "$SPEED_RUN" \| grep "Optimization time" \| sed 's/[a-zA-Z:]*//g'`
				205	OPTIMIZATION_TIME=`echo ${OPTIMIZATION_TIME::-1}` # Remove the tailing space.
				206
				207	# All 6 inference times.
				208	RAW_INFERENCE=`echo "$SPEED_RUN" \| grep "Inference time"`
				209	# This will take "Info: Inference time: 0.03 ms Info:..." and transform to "0.03 0.01 0.01"
				210	INFERENCE_TIMES=`echo $RAW_INFERENCE \| sed 's/[a-zA-Z:]*//g'`
				211	INITIAL_INFERENCE_TIME=`echo $INFERENCE_TIMES \| cut -d ' ' -f 1`
				212	# Now remove the initial inference time as it will skew the average.
				213	INFERENCE_TIMES=`echo $INFERENCE_TIMES \| sed 's/[^ ]* //'`
				214	# Use awk to sum and average the remaining 5 numbers.
				215	AVERAGE_INFERENCE_TIME=`echo $INFERENCE_TIMES \| awk '{s+=$1}END{print s/NR}' RS=" "`
				216
				217	# Result format is: MODEL LOAD \| OPTIMIZATION \| INITIAL INFERENCE \| AVERAGE INFERENCE
				218	echo -e "$MODEL_LOAD_TIME\t\t$OPTIMIZATION_TIME\t\t\t$INITIAL_INFERENCE_TIME\t\t\t$AVERAGE_INFERENCE_TIME\t"
				219	}
				220
				221
				222	# Check execution in all available backends.
				223	echo "==================================================================================="
				224	echo -e "BACKEND\t\tACCURACY\tMODEL LOAD(ms)\tOPTIMIZATION(ms)\tINITIAL INFERENCE(ms)\tAVERAGE INFERENCE(ms)"
				225	for backend in $BACKENDS
				226	do
				227	echo -n -e "$backend\t\t"
				228	RESULT=$(RunAccuracyOnBackendWithParameters $backend)
				229	echo -n -e "$RESULT"
				230	if [[ $RESULT =~ "*" ]]; then
				231	REQUIRED_CPU_REF=1
				232	fi
				233	# It's possible the backend wasn't supported.
				234	if [[ ! "$RESULT" =~ "not supported" ]]; then
				235	# It was, continue.
				236	RESULT=$(RunPerformanceOnBackendWithParameters $backend)
				237	echo -n -e "$RESULT"
				238	# Save some specific values for use later.
				239	if [ $backend == "CpuAcc" ]; then
				240	# In the case of CpuAcc we save the avrage inference time.
				241	CPUACC_AVERAGE_INFERENCE_TIME=`echo $RESULT \| cut -d ' ' -f 4`
				242	fi
				243	if [ $backend == "GpuAcc" ]; then
				244	# In the case of GpuAcc we save the avrage inference time.
				245	GPUACC_AVERAGE_INFERENCE_TIME=`echo $RESULT \| cut -d ' ' -f 4`
				246	fi
				247	else
				248	# Remove this backend from future tests.
				249	BACKENDS=`echo $BACKENDS \| sed "s/$backend//"`
				250	fi
				251	echo
				252	done
				253	# Only print this if it was required.
				254	if [ ! -z $REQUIRED_CPU_REF ]; then
				255	echo "* denotes this backend required fallback to CpuRef."
				256	echo
				257	fi
				258
				259	# Now its time to look at backend specific parameters.
				260
				261	# This function first run the accuracy test and then the performance test. It uses the average from earlier
				262	# to compare to.
				263	function RunAccuracyAndPerformanceWithExtraParameter
				264	{
				265	BACKEND=$1
				266	EXTRA_PARAM=$2
				267	AVERAGE_INFERENCE_TIME=$3
				268	echo -e "ACCURACY\tMODEL LOAD(ms)\tOPTIMIZATION(ms)\tINITIAL INFERENCE(ms)\tAVERAGE INFERENCE(ms)\t\tDELTA(ms)"
				269	RESULT=$(RunAccuracyOnBackendWithParameters $BACKEND,CpuRef $EXTRA_PARAM)
				270	echo -n "$RESULT"
				271	RESULT=$(RunPerformanceOnBackendWithParameters $BACKEND,CpuRef $EXTRA_PARAM)
				272	PARAM_AVERAGE_INFERENCE_TIME=`echo $RESULT \| cut -d ' ' -f 4`
				273	# If adding the parameter was faster then incude by how much.
				274	if [[ "$PARAM_AVERAGE_INFERENCE_TIME" < "$AVERAGE_INFERENCE_TIME" ]]; then
				275	DELTA=`echo $AVERAGE_INFERENCE_TIME - $PARAM_AVERAGE_INFERENCE_TIME \| bc`
				276	echo -e "$RESULT\t\t\t$DELTA ($PARAM_AVERAGE_INFERENCE_TIME v $AVERAGE_INFERENCE_TIME)"
				277	else
				278	echo -e "$RESULT\t\t\tNo improvment"
				279	fi
				280	}
				281
				282
				283	# Start with CpuAcc. Three knobs to twiddle, threads, fast-math and fp16.
				284	if [[ $BACKENDS =~ "CpuAcc" ]]; then
				285	echo
				286	echo "CpuAcc optimizations."
				287	echo "============================"
				288	echo "The value of \"number-of-threads\" parameter by default is decided on by the backend."
				289	echo "Cycle through number-of-threads=1 -> 12 and see if any are faster than the default."
				290	echo
				291	for i in {1..12}
				292	do
				293	RESULT=$(RunPerformanceOnBackendWithParameters "CpuAcc,CpuRef" "--number-of-threads $i")
				294	AVERAGE_INFERENCE_TIME=`echo $RESULT \| cut -d ' ' -f 4`
				295	# Print something out if the returned average is less than the previously saved average.
				296	if (( $(echo "$AVERAGE_INFERENCE_TIME < $CPUACC_AVERAGE_INFERENCE_TIME" \| bc -l) )); then
				297	DELTA=`echo $CPUACC_AVERAGE_INFERENCE_TIME - $AVERAGE_INFERENCE_TIME \| bc`
				298	echo " \"--number-of-threads $i\" resulted in a faster average inference by $DELTA ms. ($AVERAGE_INFERENCE_TIME v $CPUACC_AVERAGE_INFERENCE_TIME)"
				299	FASTER=1
				300	fi
				301	done
				302	if [ -z $FASTER ]; then
				303	echo "No value of \"number-of-threads\" was faster than the default."
				304	fi
				305	# Next is fp16-turbo-mode. We do both accuracy and speed on this one.
				306	echo
				307	echo -n "Now trying to enable fp16-turbo-mode. This will only have positive results with fp32 models."
				308	echo
				309	RunAccuracyAndPerformanceWithExtraParameter CpuAcc "--fp16-turbo-mode" $CPUACC_AVERAGE_INFERENCE_TIME
				310
				311	# Next is enable-fast-math. Again both accuracy and speed on this one.
				312	echo
				313	echo -n "Now trying \"enable-fast-math\"."
				314	echo
				315	RunAccuracyAndPerformanceWithExtraParameter CpuAcc "--enable-fast-math" $CPUACC_AVERAGE_INFERENCE_TIME
				316	fi
				317
				318	# GpuAcc.
				319	# Options to check enable-fast-math, fp16-turbo-mode, and tuning-level/tuning-path.
				320	if [[ $BACKENDS =~ "GpuAcc" ]]; then
				321	echo
				322	echo "GpuAcc optimizations."
				323	echo "============================"
				324
				325	# fp16-turbo-mode. We do both accuracy and speed on this one.
				326	echo
				327	echo -n "Now trying to enable fp16-turbo-mode. This will only have positive results with fp32 models."
				328	echo
				329	RunAccuracyAndPerformanceWithExtraParameter GpuAcc "--fp16-turbo-mode" $GPUACC_AVERAGE_INFERENCE_TIME
				330
				331	# Next is enable-fast-math. Again both accuracy and speed on this one.
				332	echo
				333	echo -n "Now trying \"enable-fast-math\"."
				334	echo
				335	RunAccuracyAndPerformanceWithExtraParameter GpuAcc "--enable-fast-math" $GPUACC_AVERAGE_INFERENCE_TIME
				336
				337	# Next is tuning levels. Just speed on this one.
				338	echo
				339	echo -n "Now trying \"tuning-level/tuning-path\"."
				340	echo
				341	for i in {1..3}
				342	do
				343	touch ./tuned-network.bin
				344	# Create tuned network file with the first run.
				345	OUTPUT=`$EXECUTE_NETWORK -m $MODEL -c $GpuAcc,CpuRef --tuning-path ./tuned-network.bin --tuning-level $i -N`
				346	AssertZeroExitCode
				347	# Now run the perforance test reusing that saved network.
				348	RESULT=$(RunPerformanceOnBackendWithParameters "GpuAcc,CpuRef" "--tuning-path ./tuned-network.bin")
				349	AVERAGE_INFERENCE_TIME=`echo $RESULT \| cut -d ' ' -f 4`
				350	if (( $(echo "$AVERAGE_INFERENCE_TIME < $GPUACC_AVERAGE_INFERENCE_TIME" \| bc -l) )); then
				351	DELTA=`echo $AVERAGE_INFERENCE_TIME - $GPUACC_AVERAGE_INFERENCE_TIME \| bc`
				352	echo " \"--tuning-level $i\" resulted in a faster average inference by $DELTA ms. ($AVERAGE_INFERENCE_TIME v $GPUACC_AVERAGE_INFERENCE_TIME)"
				353	else
				354	echo " \"--tuning-level $i\" did not result in a faster average inference time. ($AVERAGE_INFERENCE_TIME v $GPUACC_AVERAGE_INFERENCE_TIME)"
				355	fi
				356	rm ./tuned-network.bin
				357	done
				358	fi