#!/bin/bash
# Runs Andrea Arcangeli's autonuma benchmark.
#
# This is a synthetic benchmark that stresses some NUMA effects but is not
# intended to represent any real workload as such. Very broadly speaking
# there are 4 benchmarks of interest.
#
# NUMA01
#   Two processes
#   NUMCPUSS/2 number of threads so all CPUs are in use
#   
#   On startup, the process forks
#   Each process mallocs a 3G buffer but there is no communication
#	between the processes.
#   Threads are created that zeros out the full buffer 1000 times
#
#   The objective of the test is that initially the two processes
#   allocate their memory on the same node. As the threads are
#   are created the memory will migrate from the initial node to
#   nodes that are closer to the referencing thread.
#
#   It is worth noting that this benchmark is specifically tuned
#   for two nodes and the expectation is that the two processes
#   and their threads split so that all process A runs on node 0
#   and all threads on process B run in node 1
#
#   With 4 and more nodes, this is actually an adverse workload.
#   As all the buffer is zeroed in both processes, there is an
#   expectation that it will continually bounce between two nodes.
#
#   So, on 2 nodes, this benchmark tests convergence. On 4 or more
#   nodes, this partially measures how much busy work automatic
#   NUMA migrate does and it'll be very noisy due to cache conflicts.
#
# NUMA01_THREADLOCAL
#   Two processes
#   NUMCPUSS/2 number of threads so all CPUs are in use
#
#   On startup, the process forks
#   Each process mallocs a 3G buffer but there is no communication
#       between the processes
#   Threads are created that zero out their own subset of the buffer.
#       Each buffer is 3G/NR_THREADS in size
#   
#   This benchmark is slightly better. In an ideal situation, each
#   thread will migrate its data to its local node. The test really
#   is to see does it converge and how quickly.
#
# NUMA02
#  One process, NR_CPU threads
#
#  On startup, malloc a 1G buffer
#  Create threads that zero out a thread-local portion of the buffer.
#	Zeros multiple times - the number of times is fixed and seems
#	to just be to take a period of time
#
#  This is similar in principal to NUMA01_THREADLOCAL except that only
#  one process is involved. I think it was aimed at being more JVM-like.
#
# NUMA02_SMT
#  One process, NR_CPU/2 threads
#
#  This is a variation of NUMA02 except that with half the cores idle it
#  is checking if the system migrates the memory to two or more nodes or
#  if it tries to fit everything in one node even though the memory should
#  migrate to be close to the CPU
P=autonumabench-bench
DEFAULT_VERSION=0
. $SHELLPACK_INCLUDE/common.sh
TIME_CMD=`which time`
if [ "$TIME_CMD" = "" ]; then
        TIMEFORMAT="%2Uuser %2Ssystem %Relapsed %P%%CPU"
        TIME_CMD="time"
fi

AUTONUMA_TESTS="NUMA01 NUMA02 NUMA01_THEADLOCAL NUMA02_SMT"
install-depends numactl libnuma-devel gnuplot

# Basic argument parser
TASKSET_SERVER=
TASKSET_CLIENT=
SERVERSIDE_COMMAND=none
SERVERSIDE_NAME=`date +%Y%m%d-%H%M-%S`

while [ "$1" != "" ]; do
	case "$1" in
	-v)
		VERSION=$2
		shift 2
		;;
	--serverside-command)
		SERVERSIDE_COMMAND=$2
		shift 2
		;;
	--serverside-name)
		SERVERSIDE_NAME=$2
		shift 2
		;;
	--install-only)
		INSTALL_ONLY=yes
		shift
		;;
	--install-force)
		INSTALL_FORCE=yes
		shift
		;;
	*)
		echo Unrecognised option: $1
		shift
	esac
done
if [ "$TASKSET_SERVER" != "" ]; then
	echo TASKSET_SERVER: $TASKSET_SERVER
	echo TASKSET_CLIENT: $TASKSET_CLIENT
fi
if [ -z "$VERSION" ]; then
	VERSION=$DEFAULT_VERSION
fi

if [ "$INSTALL_FORCE" = "yes" ]; then
	rm -rf $SHELLPACK_SOURCES/autonumabench-${VERSION}
fi
if [ ! -d $SHELLPACK_SOURCES/autonumabench-${VERSION}-installed ]; then
	mmtests_activity source-install
	$SHELLPACK_INCLUDE/shellpack-install-autonumabench -v ${VERSION}  || die autonumabench install script returned error
	mmtests_activity source-installed
fi
cd $SHELLPACK_SOURCES/autonumabench-${VERSION}-installed || die Failed to cd to autonumabench install directory
if [ "$INSTALL_ONLY" = "yes" ]; then
	echo autonumabench installed only as requested.
	exit $SHELLPACK_SUCCESS
fi
# Include monitor hooks
. $SHELLPACK_INCLUDE/include-monitor.sh

for TESTTYPE in $AUTONUMA_TESTS; do
	mmtests_activity $TESTTYPE
	SWITCH=
	case $TESTTYPE in
	NUMA01)
		SWITCH=-x
		;;
	NUMA02)
		SWITCH=-y
		;;
	NUMA01_THEADLOCAL)
		SWITCH=-t
		;;
	NUMA02_SMT)
		SWITCH=-s
		;;
	HARD_BIND)
		SWITCH=-b
		;;
	INVERSE_BIND)
		SWITCH=-i
		;;
	esac
	
	monitor_pre_hook $LOGDIR_RESULTS $TESTTYPE
	$TIME_CMD -o $LOGDIR_RESULTS/time.$TESTTYPE \
		./start_bench.sh $SWITCH | tee $LOGDIR_RESULTS/autonumabench.$TESTTYPE
	monitor_post_hook $LOGDIR_RESULTS $TESTTYPE
	mv *.txt *.pdf $LOGDIR_RESULTS
	grep elapsed $LOGDIR_RESULTS/time.$TESTTYPE
done

exit $SHELLPACK_SUCCESS
#### Description AutoNUMA Benchmark
#### Details autonumabench-bench 14
