The first of several benchmarking scripts

I’m currently a file storage administrator, specializing in EMC Isilon. We have a rather large install (~60 heterogeneous nodes, ~4PB) as well as some smaller systems, an HPC dedicated GPFS filer from DDN, and an object based storage system from Scality. Obviously, all of these things have different performance characteristics, including the differing tiers of Isilon.

I’ve been benchmarking the various systems using the script below. I’ll walk through the various parts of the script. To date, this is probably one of my more ambitious attempts with Bash, and it would probably work better in Python, but I haven’t learned that yet. 😉


#!/bin/bash
usage () {
	echo "Command syntax is $(basename $0) [write|read|shufread|rm|parrm] [test|tier1|tier2|gpfs|localscratch|localssd|object]"
        echo "[filesizeG|M|K] [totalsize in GB] (optional) [file count per directory] (optional)"
}

if [ "$#" -lt 3 ]; then
	usage
	exit 1
fi

#CHANGE THESE PATHS TO FIT YOUR ENVIRONMENT
#set paths
case $2 in
	test) fspath=/mnt/dmtest/scicomp/scicompsys/ddcompare/$3 ;;
	tier1) fspath=/mnt/node-64-dm11/ddcompare/$3 ;;
	tier2) fspath=/mnt/node-64-tier2/ddcompare/$3 ;;
	gpfs) fspath=/gpfs1/nlsata/ddcompare/$3 ;;
        localscratch) fspath=/scratch/carlilek/ddcompare/$3 ;;
        localssd) fspath=/ssd/ddcompare/$3 ;;
        object) fspath=/srttest/ddcompare/$3 ;;
	*) usage && exit 1;;
esac

#some math to get the filesize in kilobytes
case $3 in
	*G) filesize=$(( 1024 * 1024 * `echo $3 | tr -d G`));;
	*M) filesize=$(( 1024 * `echo $3 | tr -d M` ));;
	*K) filesize=`echo $3 | tr -d K`;;
	*) usage && exit 1;;
esac	

#set the suffix for file names
suffix=$3

#set the total size of the test set
if [ ! -z $4 ]; then
	totalsize=$(( 1024 * 1024 * $4 ))
else
	totalsize=52428800 #The size of the test set in kb
fi
	
#set the number of files in subdirectories
if [ ! -z $5 ]; then
	filesperdir=$5
else
	filesperdir=5120 #Number of subdirs to use for large file counts
fi

#set up variables for dd commands
if [ $filesize -ge 1024 ]; then
	blocksize=1048576
else
	blocksize=$(( $filesize * 1024 ))
fi

#set up variables for subdirectories
totfilecount=$(( $totalsize / $filesize ))
blockcount=$(( $filesize * 1024 / $blocksize ))
if [ $filesperdir -le $totfilecount ]; then
	foldercount=$(( $totfilecount / $filesperdir ))
fi

#debug output
#echo $fspath
#echo filecount $totfilecount
#echo totalsize $totalsize KB
#echo filesize $filesize KB
#echo blockcount $blockcount
#echo blocksize $blocksize bytes

#defines output of time in realtime seconds to one decimal place
TIMEFORMAT=%1R

#creates directory to write to
createdir () {
	if [ ! -d $1 ]; then
		mkdir -p $1
	fi
}

#write test
writefiles () {
	#echo WRITE
	for i in `seq 1 $filecount`; do 
		#echo -n .
		dd if=/dev/zero of=$path/$i-$suffix bs=$blocksize count=$blockcount 2> /dev/null
	done
}

#read test
readfiles () {
	#echo READ
	for i in `seq 1 $filecount`; do 
		#echo -n .
		dd if=$path/$i-$suffix of=/dev/null bs=$blocksize 2> /dev/null
		#dd if=$path/$i-$suffix of=/dev/null bs=$blocksize
	done
}

#shuffled read test
shufreadfiles () {
	#echo SHUFFLE READ
	filearray=(`shuf -i 1-$filecount`)
	for i in ${filearray[*]}; do 
		#echo -n .
		#echo $path/$i-$suffix
		dd if=$path/$i-$suffix of=/dev/null bs=$blocksize 2> /dev/null
		#dd if=$path/$i-$suffix of=/dev/null bs=$blocksize
	done
}

#ObjectWrite
scalitywrite () {
    for i in `seq 1 $filecount`; do
        dd if=/dev/zero bs=$blocksize count=$blockcount 2> /dev/null | curl -s -X PUT http://localhost:81/proxy/bparc$fspath/$i-$suffix -T- > /dev/null
    done
}

#ObjectRead
scalityread () {
    for i in `seq 1 $filecount`; do
        curl -s -X GET http://localhost:81/proxy/bparc/$fspath/$i-$suffix > /dev/null
    done
}

#Do the work based on the work type

echo $1 $2 "filesize: "$3 "totalsize: "$4"G" "filesperdir: "$5
case $1 in
	write) 
        if [ $2 = scality ]; then
            filecount=$totfilecount
            time scalitywrite
            exit 0
        fi
        #Chunk file groups into folders if count is too high
	    if [ $totfilecount -ge 10000 ]; then
			for dir in `seq 1 $foldercount`; do
				createdir $fspath/$dir
			done
			time for dir in `seq 1 $foldercount`; do
				path=$fspath/$dir
				filecount=$(( $totfilecount / $foldercount ))
				writefiles
			done
		else
			path=$fspath
            createdir $path
			filecount=$totfilecount
			time writefiles
		fi
	;;
	read) #in order read
		sync; echo 1 > /proc/sys/vm/drop_caches
        if [ $2 = scality ]; then
            filecount=$totfilecount
            time scalityread
            exit 0
        fi
		if [ $totfilecount -ge 10000 ]; then
			time for dir in `seq 1 $foldercount`; do
				path=$fspath/$dir
				filecount=$(( $totfilecount / $foldercount ))
				readfiles
			done
		else
			path=$fspath
			filecount=$totfilecount
			time readfiles
		fi
	;;
	rm) #serial remove files
        if [ $2 = scality ]; then
            time for i in `seq 1 $totfilecount`; do
                curl -s -X DELETE http://localhost:81/proxy/bparc/$fspath/$i-$suffix > /dev/null
            done
            exit 0
        fi
		if [ $totfilecount -ge 10000 ]; then
			time for i in `seq 1 $foldercount`; do
				rm -f $fspath/$i/*-$suffix
				rmdir $fspath/$i
			done
		elif [ -d $fspath/$3 ]; then 
			time rm -f $fspath/*-$suffix
		fi
	;;
	parrm) #parallel remove files
		time ls $fspath | parallel -N 64 rm -rf $fspath/{}
	;;
	shufread) #shuffled read
		sync; echo 1 > /proc/sys/vm/drop_caches
		if [ $totfilecount -ge 10000 ]; then
			folderarray=(`shuf -i 1-$foldercount`)
			time for dir in ${folderarray[*]}; do
				path=$fspath/$dir
				filecount=$(( $totfilecount / $foldercount ))
				shufreadfiles
			done
		else
			path=$fspath
			filecount=$totfilecount
			time shufreadfiles
		fi
	;;
		
	*) usage && exit 1;;
esac
echo '------------------------'

I’ll break this all down in my next post.

Advertisements

About kcarlile
Twitter: @overclockdlemon

Leave a Reply

Fill in your details below or click an icon to log in:

WordPress.com Logo

You are commenting using your WordPress.com account. Log Out / Change )

Twitter picture

You are commenting using your Twitter account. Log Out / Change )

Facebook photo

You are commenting using your Facebook account. Log Out / Change )

Google+ photo

You are commenting using your Google+ account. Log Out / Change )

Connecting to %s

%d bloggers like this: