Cleanup unit test. #2

Workflow file for this run

.github/workflows/production.yml at 6aef3d2

	name: Production

	on:
	# Trigger the workflow on push on the master branch, or for any pull request
	push:
	branches:
	- main
	pull_request:
	branches:
	- main

	concurrency:
	# Cancel all workflows that are stil running if any when updating branches associated with PRs,
	# BUT don't do anything for workflows that are not triggered by PRs.
	group: ${{ github.workflow }}-${{ github.head_ref \|\| github.ref }}
	cancel-in-progress: ${{ github.event_name == 'pull_request' }}

	env:
	# Note that secrets are not passed to workflows that are triggered by a pull request from a fork
	HF_TOKEN: ${{ secrets.HF_TOKEN }}
	HF_HUB_DOWNLOAD_TIMEOUT: 60
	GENESIS_IMAGE_VER: "1_14"
	TIMEOUT_MINUTES: 60
	FORCE_COLOR: 1
	PY_COLORS: 1
	MADRONA_DISABLE_CUDA_HEAP_SIZE: "1"
	OMNI_KIT_ACCEPT_EULA: "yes"
	OMNI_KIT_ALLOW_ROOT: "1"

	jobs:
	unit-tests:
	name: production-unit_tests-${{ matrix.GS_ENABLE_NDARRAY == '0' && 'field' \|\| 'ndarray' }}

	runs-on: [self-hosted, coreweave, genesis-world]

	strategy:
	fail-fast: true
	max-parallel: 1
	matrix:
	GS_ENABLE_NDARRAY: ["0", "1"]

	env:
	GS_ENABLE_NDARRAY: ${{ matrix.GS_ENABLE_NDARRAY }}

	steps:
	- name: Checkout code
	uses: actions/checkout@v4
	- name: Run unit tests
	if: github.event_name == 'pull_request'
	run: \|
	SLURM_JOB_NAME="$(uuidgen)_$(date +%Y%m%d_%H%M%S)"
	echo "SLURM_JOB_NAME=${SLURM_JOB_NAME}" >> $GITHUB_ENV

	mkdir -p "${HOME}/.cache" "${HOME}/.venv"

	# TODO: USD baking does not currently support Python 3.11 since
	# NVIDIA does not currently release `omniverse-kit==107.3` on PyPI.
	# See: https://github.com/Genesis-Embodied-AI/Genesis/pull/1300
	srun \
	--container-image="/mnt/data/images/genesis-v${GENESIS_IMAGE_VER}.sqsh" \
	--container-mounts=\
	"${HOME}/.venv":/root/.venv,\
	"${HOME}/.cache":/root/.cache,\
	"${{ github.workspace }}":/root/workspace \
	--no-container-mount-home --container-workdir=/root/workspace \
	--export=NVIDIA_DRIVER_CAPABILITIES=all,BASH_ENV=/root/.bashrc,HF_TOKEN,GS_ENABLE_NDARRAY=${GS_ENABLE_NDARRAY} \
	--partition=hpc-mid --nodes=1 --gpus=8 --exclusive --time="${TIMEOUT_MINUTES}" \
	--job-name=${SLURM_JOB_NAME} \
	bash -e -s << 'EOF'
	if test -n "$(find /root/.venv -maxdepth 0 -empty)"; then
	python3 -m venv --system-site-packages /root/.venv
	source /root/.venv/bin/activate
	pip install --no-input --upgrade pip pkg-info wheel
	pip install --no-input --ignore-installed --upgrade blinker pyparsing setuptools
	fi
	source /root/.venv/bin/activate

	pip install --no-input --extra-index-url https://pypi.nvidia.com/ omniverse-kit
	pip install --no-input ".[dev,render,usd]"

	pytest -v -ra --backend gpu --dev --forked ./tests
	EOF
	- name: Kill srun job systematically
	if: always()
	run: \|
	if [ -n "${SLURM_JOB_NAME}" ] ; then
	scancel --user=${USER} --name="${SLURM_JOB_NAME}"
	fi

	benchmarks:
	name: production-benchmarks-${{ matrix.GS_ENABLE_NDARRAY == '0' && 'field' \|\| 'ndarray' }}

	needs: unit-tests
	runs-on: [self-hosted, coreweave, genesis-world]

	strategy:
	matrix:
	GS_ENABLE_NDARRAY: ["0", "1"]

	env:
	# Note that secrets are not passed to workflows that are triggered by a pull request from a fork
	WANDB_API_KEY: ${{ secrets.WANDB_API_KEY }}
	GS_ENABLE_NDARRAY: ${{ matrix.GS_ENABLE_NDARRAY }}

	steps:
	- name: Checkout code
	uses: actions/checkout@v4
	with:
	# Checkout full history is required to shallow cloning while mark HEAD as "grafted". This breaks remote
	# tracking thereby making it impossible to detect whether a commit is contained in upstream main.
	fetch-depth: 0
	- name: Run benchmarks
	run: \|
	SLURM_JOB_NAME="$(uuidgen)_$(date +%Y%m%d_%H%M%S)"
	echo "SLURM_JOB_NAME=${SLURM_JOB_NAME}" >> $GITHUB_ENV

	SLURM_ENV_VARS="NVIDIA_DRIVER_CAPABILITIES=all,BASH_ENV=/root/.bashrc,HF_TOKEN,GS_ENABLE_NDARRAY=${GS_ENABLE_NDARRAY}"
	if [[ "${{ github.repository }}" == 'Genesis-Embodied-AI/Genesis' && "${{ github.ref }}" == 'refs/heads/main' ]] ; then
	SLURM_ENV_VARS="${SLURM_ENV_VARS},WANDB_API_KEY"
	fi

	srun \
	--container-image="/mnt/data/images/genesis-v${GENESIS_IMAGE_VER}.sqsh" \
	--container-mounts=\
	"${HOME}/.venv":/root/.venv,\
	/mnt/data/artifacts:/mnt/data/artifacts,\
	"${{ github.workspace }}":/root/workspace \
	--no-container-mount-home --container-workdir=/root/workspace \
	--export=${SLURM_ENV_VARS} \
	--partition=hpc-mid --nodes=1 --gpus=8 --exclusive --time="${TIMEOUT_MINUTES}" \
	--job-name=${SLURM_JOB_NAME} \
	bash -e -s << 'EOF'
	# sudo apt update
	# sudo apt install -y tmate
	# tmate -S /tmp/tmate.sock new-session -d
	# tmate -S /tmp/tmate.sock wait tmate-ready
	# tmate -S /tmp/tmate.sock display -p '#{tmate_ssh}'

	source /root/.venv/bin/activate
	pip install --no-input ".[dev,render]"

	pytest --print -x -m "benchmarks" ./tests
	cat speed_test*.txt > "/mnt/data/artifacts/speed_test_${SLURM_JOB_NAME}.txt"

	# tmate -S /tmp/tmate.sock wait tmate-exit
	EOF
	- name: Kill srun job systematically
	if: always()
	run: \|
	if [ -n "${SLURM_JOB_NAME}" ] ; then
	scancel --user=${USER} --name="${SLURM_JOB_NAME}"
	fi
	- name: Display benchmark stats
	run: \|
	cat "/mnt/data/artifacts/speed_test_${SLURM_JOB_NAME}.txt"
	- name: Upload benchmark stats as artifact
	uses: actions/upload-artifact@v4
	with:
	name: speed-test-${{ matrix.GS_ENABLE_NDARRAY }}
	path: "/mnt/data/artifacts/speed_test_${{ env.SLURM_JOB_NAME }}.txt"

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Cleanup unit test. #2

Workflow file

Cleanup unit test. #2

Uh oh!

Workflow file for this run