#!/bin/bash
# This is the script for running the graph500 benchmark
#
###SHELLPACK preamble graph500-bench 2.1.4

MPIRUN=
MPIOPT=

###SHELLPACK parseargBegin
###SHELLPACK parseargParam	--workset		GRAPH500_WORKSET
###SHELLPACK parseargParam	--parallelize		GRAPH500_PARALLELIZE
###SHELLPACK parseargYes	--use-hugetlbfs		GRAPH500_HUGETLBFS
###SHELLPACK parseargEnd
###SHELLPACK monitor_hooks

# Check available memory
if [ $APPROXIMATE_USAGE -gt $((MEMTOTAL_BYTES*4/5)) ]; then
	die "Estimated memory usage $APPROXIMATE_USAGE bytes exceeds estimated available $((MEMTOTAL_BYTES*4/5))"
fi

###SHELLPACK check_install_required graph500-${VERSION}
###SHELLPACK init_complete

# Approximate equation for memory usage in gigabytes is
# ((2**SCALE) * (2 * EDGE + 1))*8/1048576/1024
#
# Multipliers apply depending on how it is parallelised.
#
# toy and mini are defined by the graph500 table of classes. The smaller
# classes are defined by mmtests in the interest in testing graph500 on
# single nodes instead of clusters.
SCALE=
EDGE=
case $GRAPH500_WORKSET in
infant)
	# 1G
	SCALE=22
	EDGE=14
	;;
kinder)
	# 8G
	SCALE=25
	EDGE=16
	;;
toy)
	# 17GB
	SCALE=26
	EDGE=16
	;;
mini)
	# 136 GB
	SCALE=29
	EDGE=16
	;;
esac

# Apply scaling multiplier
APPROXIMATE_USAGE=$((((2**SCALE)*(2*EDGE+1))*8))
case $GRAPH500_PARALLELIZE in
omp)
	GRAPH500_BINARY="./omp-csr/omp-csr"
	APPROXIMATE_USAGE=`perl -e "print int ($APPROXIMATE_USAGE*2.2)"`
	;;
mpi-simple)
	GRAPH500_BINARY="./mpi/graph500_mpi_simple"
	APPROXIMATE_USAGE=$((APPROXIMATE_USAGE*7/4))
	;;
mpi-one-sided)
	GRAPH500_BINARY="./mpi/graph500_mpi_one_sided"
	APPROXIMATE_USAGE=$((APPROXIMATE_USAGE*7/4))
	;;
mpi-replicated)
	GRAPH500_BINARY="./mpi/graph500_mpi_replicated"
	APPROXIMATE_USAGE=$((APPROXIMATE_USAGE*7/4))
	;;
mpi-replicated)
	GRAPH500_BINARY="./mpi/graph500_mpi_replicated_csc"
	APPROXIMATE_USAGE=$((APPROXIMATE_USAGE*7/4))
	;;
mpi-custom)
	GRAPH500_BINARY="./mpi/graph500_mpi_custom"
	APPROXIMATE_USAGE=$((APPROXIMATE_USAGE*7/4))
	;;
*)
	die Unrecognised parallelize method $GRAPH500_PARALLELIZE
	;;
esac

round_down_power_2_cpus() {
	POWER=1

	while [ $((1<<$POWER)) -le $NUMCPUS ]; do
		POWER=$((POWER+1))
	done

	MPICPUS=$((1<<(POWER-1)))
}
round_down_power_2_cpus

# Generate make.inc
echo "CFLAGS = -g -std=c99 -O3 -march=native -fgcse-sm -fgcse-las -fgcse-after-reload -floop-strip-mine -ftree-loop-im -fivopts -funswitch-loops" > make.inc
echo "LDLIBS = -lm -lrt" >> make.inc
if [ "$GRAPH500_HUGETLBFS" = "yes" ]; then
	echo "CPPFLAGS = -DUSE_MMAP_LARGE -DUSE_MMAP_LARGE_EXT"	>> make.inc
else
	echo "CPPFLAGS = "					>> make.inc
fi

# Set parallelize options
case $GRAPH500_PARALLELIZE in
omp)
	GRAPH500_COMMAND="$GRAPH500_BINARY -s $SCALE -e $EDGE"
	export OMP_NUM_THREADS=$MPICPUS
	unset MPIRUN
	unset MPIOPT
	echo BUILD_OPENMP = Yes		>> make.inc
	echo CFLAGS_OPENMP = -fopenmp	>> make.inc
	;;
mpi-simple)
	GRAPH500_COMMAND="$GRAPH500_BINARY $SCALE $EDGE"
	echo BUILD_MPI=Yes		>> make.inc
	echo MPICC=mpicc		>> make.inc
	export PATH=$GRAPH500_MPI_PATH:$PATH
	MPIRUN=$GRAPH500_MPI_PATH/mpirun
	MPIOPT="--allow-run-as-root -mca btl ^openib,udapl -np $MPICPUS"
	unset OMP_NUM_THREADS
esac

make clean || die Failed to clean
make
if [ $? -ne 0 ]; then
	echo Stripping loop-strip-mine
	sed -i -e 's/-floop-strip-mine//' make.inc
	make || die Failed to build
fi

monitor_pre_hook $LOGDIR_RESULTS $P
mmtests_activity hpcg-$ITERATION
echo Running graph500 scale $SCALE edge $EDGE
eval $MPIRUN $MPIOPT $GRAPH500_COMMAND 2>&1 | tee $LOGDIR_RESULTS/graph500.log
monitor_post_hook $LOGDIR_RESULTS $P

exit $SHELLPACK_SUCCESS
