[env] feat: update docker file building schema, from VLLM base images (volcengine#3937)

ISEEKYAN · wuxibin89 · web-flow · commit 77814f1d8368 · 2025-11-06T09:00:39.000+08:00
### What does this PR do? Use VLLM/SGLANG images as base to build stable images. Would like to build nightly env image with all the latest version of main components including CUDA/CUDNN/VLLM/SGLANG/TransformerEngine/Megatron. ### Test `docker/Dockerfile.stable.vllm011` is tested with `examples/grpo_trainer/run_qwen3_vl-30b-megatron.sh`. sglang image is not tested yet. > [!IMPORTANT] > Please check all the following items before requesting a review, otherwise the reviewer might deprioritize this PR for review. - [ ] Read the [Contribute Guide](https://github.com/volcengine/verl/blob/main/CONTRIBUTING.md). - [ ] Apply [pre-commit checks](https://github.com/volcengine/verl/blob/main/CONTRIBUTING.md#code-linting-and-formatting): `pre-commit install && pre-commit run --all-files --show-diff-on-failure --color=always` - [ ] Add / Update [the documentation](https://github.com/volcengine/verl/tree/main/docs). - [ ] Add unit or end-to-end test(s) to [the CI workflow](https://github.com/volcengine/verl/tree/main/.github/workflows) to cover all the code. If not feasible, explain why: ... - [ ] Once your PR is ready for CI, send a message in [the `ci-request` channel](https://verl-project.slack.com/archives/C091TCESWB1) in [the `verl` Slack workspace](https://join.slack.com/t/verl-project/shared_invite/zt-3855yhg8g-CTkqXu~hKojPCmo7k_yXTQ). (If not accessible, please try [the Feishu group (飞书群)](https://applink.larkoffice.com/client/chat/chatter/add_by_link?link_token=772jd4f1-cd91-441e-a820-498c6614126a).) --------- Co-authored-by: wuxibin <wuxibin@bytedance.com>
diff --git a/.github/workflows/.deprecate/e2e_eval_aime24.yml b/.github/workflows/.deprecate/e2e_eval_aime24.yml
@@ -88,7 +88,7 @@ permissions:
   contents: read
 
 env:
-  IMAGE: "verl-ci-cn-beijing.cr.volces.com/verlai/verl:app-verl0.5-transformers4.55.4-vllm0.10.0-mcore0.13.0-te2.2"
+  IMAGE: "verl-ci-cn-beijing.cr.volces.com/verlai/verl:vllm011.dev7"
   DYNAMIC_RUNNER_ENDPOINT: "https://sd10g3clalm04ug7alq90.apigateway-cn-beijing.volceapi.com/runner"
 
 jobs:
diff --git a/.github/workflows/cpu_unit_tests.yml b/.github/workflows/cpu_unit_tests.yml
@@ -68,16 +68,17 @@ jobs:
       NO_PROXY: "localhost,127.0.0.1,hf-mirror.com"
       HF_ENDPOINT: "https://hf-mirror.com"
       HF_HUB_ENABLE_HF_TRANSFER: "0" # This is more stable
+      TORCH_COMPILE_DISABLE: 1
+      TORCHINDUCTOR_DISABLE: 1
     container:
-      image: verlai/verl:app-verl0.5-transformers4.55.4-vllm0.10.0-mcore0.13.0-te2.2
+      image: verl-ci-cn-beijing.cr.volces.com/verlai/verl:vllm011.dev7
     steps:
       - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
         with:
           fetch-depth: 0
       - name: Install the current repository
         run: |
-          pip install -e .[test,prime,geo]
-          pip install --upgrade "ray>=2.40.0" pillow
+          pip install -e .[test,geo]
       - name: Download datasets
         run: |
           huggingface-cli download verl-team/gsm8k-v0.4.1 --repo-type dataset --local-dir ~/verl-data/gsm8k
diff --git a/.github/workflows/e2e_dapo.yml b/.github/workflows/e2e_dapo.yml
@@ -84,7 +84,7 @@ permissions:
   contents: read
 
 env:
-  IMAGE: "verl-ci-cn-beijing.cr.volces.com/verlai/verl:app-verl0.5-transformers4.55.4-vllm0.10.0-mcore0.13.0-te2.2"
+  IMAGE: "verl-ci-cn-beijing.cr.volces.com/verlai/verl:vllm011.dev7"
   DYNAMIC_RUNNER_ENDPOINT: "https://sd10g3clalm04ug7alq90.apigateway-cn-beijing.volceapi.com/runner"
 
 jobs:
diff --git a/.github/workflows/e2e_fully_async_policy.yml b/.github/workflows/e2e_fully_async_policy.yml
@@ -84,7 +84,7 @@ permissions:
   contents: read
 
 env:
-  IMAGE: "verl-ci-cn-beijing.cr.volces.com/verlai/verl:app-verl0.5-transformers4.55.4-vllm0.10.0-mcore0.13.0-te2.2"
+  IMAGE: "verl-ci-cn-beijing.cr.volces.com/verlai/verl:vllm011.dev7"
   DYNAMIC_RUNNER_ENDPOINT: "https://sd10g3clalm04ug7alq90.apigateway-cn-beijing.volceapi.com/runner"
   TRANSFORMERS_VERSION: "4.56.2"
 
diff --git a/.github/workflows/e2e_genrm_remote.yml b/.github/workflows/e2e_genrm_remote.yml
@@ -78,7 +78,7 @@ permissions:
   contents: read
 
 env:
-  IMAGE: "verl-ci-cn-beijing.cr.volces.com/verlai/verl:app-verl0.5-transformers4.55.4-vllm0.10.0-mcore0.13.0-te2.2"
+  IMAGE: "verl-ci-cn-beijing.cr.volces.com/verlai/verl:vllm011.dev7"
   DYNAMIC_RUNNER_ENDPOINT: "https://sd10g3clalm04ug7alq90.apigateway-cn-beijing.volceapi.com/runner"
 
 jobs:
diff --git a/.github/workflows/e2e_one_step_off_policy.yml b/.github/workflows/e2e_one_step_off_policy.yml
@@ -84,7 +84,7 @@ permissions:
   contents: read
 
 env:
-  IMAGE: "verl-ci-cn-beijing.cr.volces.com/verlai/verl:app-verl0.5-transformers4.55.4-vllm0.10.0-mcore0.13.0-te2.2"
+  IMAGE: "verl-ci-cn-beijing.cr.volces.com/verlai/verl:vllm011.dev7"
   DYNAMIC_RUNNER_ENDPOINT: "https://sd10g3clalm04ug7alq90.apigateway-cn-beijing.volceapi.com/runner"
   TRANSFORMERS_VERSION: "4.56.2"
 
diff --git a/.github/workflows/e2e_ppo_trainer_megatron_sglang_2.yml b/.github/workflows/e2e_ppo_trainer_megatron_sglang_2.yml
@@ -227,7 +227,7 @@ jobs:
           ray stop --force
           TRAIN_FILES=$HOME/data/geo3k/train.parquet VAL_FILES=$HOME/data/geo3k/test.parquet \
             MAX_PROMPT_LEN=1536 MAX_RESPONSE_LEN=1536 \
-            MODEL_ID=Qwen/Qwen2-VL-2B-Instruct \
+            MODEL_ID=Qwen/Qwen2.5-VL-3B-Instruct \
             ADV_ESTIMATOR=grpo RM_PAD=True USE_KL=True ENABLE_CHUNKED_PREFILL=False \
             ENGINE=sglang GPU_MEMORY_UTILIZATION=0.6 ACTOR_FSDP_PARAM_OFFLOAD=True \
             ACTOR_FSDP_OPTIMIZER_OFFLOAD=True REF_FSDP_PARAM_OFFLOAD=True \
diff --git a/.github/workflows/e2e_ppo_trainer_megatron_vllm.yml b/.github/workflows/e2e_ppo_trainer_megatron_vllm.yml
@@ -85,7 +85,7 @@ permissions:
   contents: read
 
 env:
-  IMAGE: "verl-ci-cn-beijing.cr.volces.com/verlai/verl:app-verl0.5-transformers4.55.4-vllm0.10.0-mcore0.13.0-te2.2"
+  IMAGE: "verl-ci-cn-beijing.cr.volces.com/verlai/verl:vllm011.dev7"
   DYNAMIC_RUNNER_ENDPOINT: "https://sd10g3clalm04ug7alq90.apigateway-cn-beijing.volceapi.com/runner"
   TRANSFORMERS_VERSION: "4.56.2"
 
diff --git a/.github/workflows/e2e_ppo_trainer_megatron_vllm_2.yml b/.github/workflows/e2e_ppo_trainer_megatron_vllm_2.yml
@@ -85,7 +85,7 @@ permissions:
   contents: read
 
 env:
-  IMAGE: "verl-ci-cn-beijing.cr.volces.com/verlai/verl:app-verl0.5-transformers4.55.4-vllm0.10.0-mcore0.13.0-te2.2"
+  IMAGE: "verl-ci-cn-beijing.cr.volces.com/verlai/verl:vllm011.dev7"
   DYNAMIC_RUNNER_ENDPOINT: "https://sd10g3clalm04ug7alq90.apigateway-cn-beijing.volceapi.com/runner"
   TRANSFORMERS_VERSION: "4.56.2"
 
@@ -164,8 +164,8 @@ jobs:
         run: |
           ray stop --force
           ADV_ESTIMATOR=grpo USE_DUMMY_MODEL=True DUMMY_MODEL_CONFIG_PATH=tests/special_e2e/ppo_trainer/expert_parallel/qwen2moe_minimal.json \
-          PPO_MAX_TOKEN_LEN=512 FWD_MAX_TOKEN_LEN=512 \
-          MAX_PROMPT_LENGTH=256 MAX_RESPONSE_LENGTH=256 \
+          PPO_MAX_TOKEN_LEN=1024 FWD_MAX_TOKEN_LEN=1024 \
+          MAX_PROMPT_LENGTH=512 MAX_RESPONSE_LENGTH=512 \
           MODEL_ID=Qwen/Qwen1.5-MoE-A2.7B-Chat USE_MBRIDGE=True \
           COMMON_PP=2 COMMON_VPP=null COMMON_CP=1 COMMON_TP=4 COMMON_EP=4 COMMON_ETP=1 INFER_TP=8 \
           USE_DIST_CKPT=True ALL_OFFLOAD=True SKIP_SAVE_HF_MODEL=1 bash tests/special_e2e/run_ppo_trainer_megatron.sh
@@ -374,7 +374,7 @@ jobs:
           ray stop --force
           TRAIN_FILES=$HOME/data/geo3k/train.parquet VAL_FILES=$HOME/data/geo3k/test.parquet \
             MAX_PROMPT_LEN=1536 MAX_RESPONSE_LEN=1536 \
-            MODEL_ID=Qwen/Qwen2-VL-2B-Instruct \
+            MODEL_ID=Qwen/Qwen2.5-VL-3B-Instruct \
             ADV_ESTIMATOR=grpo RM_PAD=True USE_KL=True ENABLE_CHUNKED_PREFILL=False \
             SP_SIZE=2 \
             bash tests/special_e2e/ppo_trainer/run_function_reward.sh
@@ -384,7 +384,7 @@ jobs:
           ray stop --force
           TRAIN_FILES=$HOME/data/geo3k/train.parquet VAL_FILES=$HOME/data/geo3k/test.parquet \
             MAX_PROMPT_LEN=1536 MAX_RESPONSE_LEN=1536 \
-            MODEL_ID=Qwen/Qwen2-VL-2B-Instruct \
+            MODEL_ID=Qwen/Qwen2.5-VL-3B-Instruct \
             ADV_ESTIMATOR=gae RM_PAD=True USE_KL=True ENABLE_CHUNKED_PREFILL=False \
             SP_SIZE=2 \
             bash tests/special_e2e/ppo_trainer/run_function_reward.sh
@@ -393,7 +393,7 @@ jobs:
           ray stop --force
           TRAIN_FILES=$HOME/data/geo3k/train.parquet VAL_FILES=$HOME/data/geo3k/test.parquet \
             MAX_PROMPT_LEN=1536 MAX_RESPONSE_LEN=1536 \
-            MODEL_ID=Qwen/Qwen2-VL-2B-Instruct \
+            MODEL_ID=Qwen/Qwen2.5-VL-3B-Instruct \
             ADV_ESTIMATOR=grpo RM_PAD=True USE_KL=True ENABLE_CHUNKED_PREFILL=False \
             SP_SIZE=2 \
             LORA_RANK=32 LORA_EXCLUDE=".*visual.*" \
diff --git a/.github/workflows/model.yml b/.github/workflows/model.yml
@@ -64,7 +64,7 @@ concurrency:
 
 
 env:
-  IMAGE: "verl-ci-cn-beijing.cr.volces.com/verlai/verl:app-verl0.5-transformers4.55.4-vllm0.10.0-mcore0.13.0-te2.2"
+  IMAGE: "verl-ci-cn-beijing.cr.volces.com/verlai/verl:vllm011.dev7"
   DYNAMIC_RUNNER_ENDPOINT: "https://sd10g3clalm04ug7alq90.apigateway-cn-beijing.volceapi.com/runner"
 
 jobs:
@@ -99,7 +99,7 @@ jobs:
           fetch-depth: 0
       - name: Install the current repository and upgrade to latest transformers(4.54.0)/flash_attn, transformers 4.55.0 has strange behavior with model backward
         run: |
-          pip3 install --no-deps -e .[test]
+          pip3 install -e .[test]
           pip3 install --upgrade transformers
       - name: Running rmpad model tests on 8 L20 GPUs + flash_attn 2.5.8
         run: |
@@ -147,8 +147,7 @@ jobs:
           fetch-depth: 0
       - name: Install the current repository and upgrade to latest transformers/flash_attn
         run: |
-          pip3 install --no-deps -e .[test]
-          pip3 install --upgrade transformers
+          pip3 install -e .[test]
       - name: Running FSDP2 rmpad model tests on 8 L20 GPUs + latest flash_attn
         run: |
           STRATEGY=fsdp2 torchrun --nproc_per_node=8 tests/special_distributed/test_fsdp_ckpt.py
@@ -169,8 +168,7 @@ jobs:
           fetch-depth: 0
       - name: Install the current repository
         run: |
-          pip3 install --no-deps -e .[test]
-          pip install --upgrade "huggingface_hub[cli]"
+          pip3 install -e .[test]
 #      - name: Download model config files
 #        run: |
 #          hf download Qwen/Qwen2.5-7B config.json --local-dir $HOME/configs/Qwen/Qwen2.5-7B
@@ -199,9 +197,7 @@ jobs:
           fetch-depth: 0
       - name: Install the current repository
         run: |
-          pip3 install --no-deps -e .[test]
-          pip3 install --upgrade tensordict transformers
-          pip install --upgrade "huggingface_hub[cli]"
+          pip3 install -e .[test]
       - name: Download model config files
         run: |
           hf download Qwen/Qwen2.5-0.5B-Instruct --local-dir $HOME/models/Qwen/Qwen2.5-0.5B-Instruct
diff --git a/.github/workflows/reward_model_vllm.yml b/.github/workflows/reward_model_vllm.yml
@@ -59,7 +59,7 @@ permissions:
   contents: read
 
 env:
-  IMAGE: "verl-ci-cn-beijing.cr.volces.com/verlai/verl:app-verl0.5-transformers4.55.4-vllm0.10.0-mcore0.13.0-te2.2"
+  IMAGE: "verl-ci-cn-beijing.cr.volces.com/verlai/verl:vllm011.dev7"
   DYNAMIC_RUNNER_ENDPOINT: "https://sd10g3clalm04ug7alq90.apigateway-cn-beijing.volceapi.com/runner"
 
 jobs:
diff --git a/.github/workflows/vllm.yml b/.github/workflows/vllm.yml
@@ -74,7 +74,7 @@ permissions:
   contents: read
 
 env:
-  IMAGE: "verl-ci-cn-beijing.cr.volces.com/verlai/verl:app-verl0.5-transformers4.55.4-vllm0.10.0-mcore0.13.0-te2.2"
+  IMAGE: "verl-ci-cn-beijing.cr.volces.com/verlai/verl:vllm011.dev7"
   DYNAMIC_RUNNER_ENDPOINT: "https://sd10g3clalm04ug7alq90.apigateway-cn-beijing.volceapi.com/runner"
 
 jobs:
diff --git a/docker/Dockerfile.stable.vllm011 b/docker/Dockerfile.stable.vllm011
@@ -0,0 +1,88 @@
+FROM vllm/vllm-openai:v0.11.0
+
+RUN pip install pybind11
+
+RUN wget https://developer.download.nvidia.com/compute/cuda/repos/debian12/x86_64/cuda-keyring_1.1-1_all.deb && \
+    dpkg -i cuda-keyring_1.1-1_all.deb && \
+    apt-get update && \
+    apt-get -y install cudnn
+
+RUN pip install nvidia-mathdx
+
+RUN pip install -v --disable-pip-version-check --no-cache-dir --no-build-isolation --config-settings "--build-option=--cpp_ext" --config-settings "--build-option=--cuda_ext" git+https://github.com/NVIDIA/apex.git
+
+RUN export NVTE_FRAMEWORK=pytorch && MAX_JOBS=128 NVTE_BUILD_THREADS_PER_JOB=4 pip3 install --resume-retries 999 --no-cache-dir --no-build-isolation git+https://github.com/NVIDIA/TransformerEngine.git@release_v2.8
+
+RUN pip install --upgrade --no-cache-dir transformers tokenizers
+
+RUN pip install codetiming tensordict mathruler pylatexenc qwen_vl_utils
+
+RUN pip install flash_attn==2.8.1
+
+RUN apt update && apt install numactl
+
+RUN wget https://developer.nvidia.com/downloads/assets/tools/secure/nsight-systems/2025_5/nsight-systems-2025.5.1_2025.5.1.121-1_amd64.deb && \
+    apt-get update && apt-get install -y libxcb-cursor0
+
+RUN apt-get install -y ./nsight-systems-2025.5.1_2025.5.1.121-1_amd64.deb && \
+    rm -rf /usr/local/cuda/bin/nsys && \
+    ln -s /opt/nvidia/nsight-systems/2025.3.1/target-linux-x64/nsys  /usr/local/cuda/bin/nsys && \
+    rm -rf /usr/local/cuda/bin/nsys-ui && \
+    ln -s /opt/nvidia/nsight-systems/2025.3.1/target-linux-x64/nsys-ui /usr/local/cuda/bin/nsys-ui && \
+    rm nsight-systems-2025.5.1_2025.5.1.121-1_amd64.deb
+
+# =========================
+# Install DeepEP
+# =========================
+# the dependency of IBGDA
+# RUN ln -s /usr/lib/x86_64-linux-gnu/libmlx5.so.1 /usr/lib/x86_64-linux-gnu/libmlx5.so
+
+# Clone and build deepep and deepep-nvshmem
+WORKDIR /home/dpsk_a2a
+RUN git clone -b v2.3.1 https://github.com/NVIDIA/gdrcopy.git && \
+    git clone https://github.com/deepseek-ai/DeepEP.git && cd DeepEP && git checkout a84a248 && \
+    cd /home/dpsk_a2a && \
+    wget https://developer.nvidia.com/downloads/assets/secure/nvshmem/nvshmem_src_3.2.5-1.txz && \
+    tar -xvf nvshmem_src_3.2.5-1.txz && mv nvshmem_src deepep-nvshmem && \
+    cd deepep-nvshmem && git apply /home/dpsk_a2a/DeepEP/third-party/nvshmem.patch && \
+    sed -i '16i#include <getopt.h>' /home/dpsk_a2a/deepep-nvshmem/examples/moe_shuffle.cu
+
+ENV CUDA_HOME=/usr/local/cuda
+# Set MPI environment variables. Having errors when not set.
+ENV CPATH=/usr/local/mpi/include:$CPATH
+ENV LD_LIBRARY_PATH=/usr/local/mpi/lib:$LD_LIBRARY_PATH
+ENV LD_LIBRARY_PATH=/usr/local/x86_64-linux-gnu:$LD_LIBRARY_PATH
+ENV GDRCOPY_HOME=/home/dpsk_a2a/gdrcopy
+
+# Build deepep-nvshmem
+WORKDIR /home/dpsk_a2a/deepep-nvshmem
+RUN NVSHMEM_SHMEM_SUPPORT=0 \
+    NVSHMEM_UCX_SUPPORT=0 \
+    NVSHMEM_USE_NCCL=0 \
+    NVSHMEM_MPI_SUPPORT=0 \
+    NVSHMEM_IBGDA_SUPPORT=1 \
+    NVSHMEM_PMIX_SUPPORT=0 \
+    NVSHMEM_TIMEOUT_DEVICE_POLLING=0 \
+    NVSHMEM_USE_GDRCOPY=1 \
+    cmake -S . -B build/ -DCMAKE_INSTALL_PREFIX=/home/dpsk_a2a/deepep-nvshmem/install -DCMAKE_CUDA_ARCHITECTURES=90 && cd build && make install -j
+
+RUN ln -s /usr/bin/python3 /usr/bin/python
+
+# Build deepep
+WORKDIR /home/dpsk_a2a/DeepEP
+ENV NVSHMEM_DIR=/home/dpsk_a2a/deepep-nvshmem/install
+RUN NVSHMEM_DIR=/home/dpsk_a2a/deepep-nvshmem/install python setup.py develop && \
+    NVSHMEM_DIR=/home/dpsk_a2a/deepep-nvshmem/install python setup.py install
+
+
+RUN pip3 install --no-cache-dir --no-deps trl
+
+RUN pip3 install nvtx matplotlib liger_kernel
+
+RUN pip install -U git+https://github.com/ISEEKYAN/mbridge.git
+
+RUN pip install --no-deps --no-cache-dir git+https://github.com/NVIDIA/Megatron-LM.git@core_v0.14.0rc7
+
+RUN pip install git+https://github.com/volcengine/verl.git@v0.6.0
+
+RUN pip uninstall -y verl
diff --git a/examples/grpo_trainer/run_qwen3_vl-235b-megatron.sh b/examples/grpo_trainer/run_qwen3_vl-235b-megatron.sh
@@ -2,14 +2,6 @@ set -x
 ENGINE=${1:-vllm}
 export CUDA_DEVICE_MAX_CONNECTIONS=1 # For megatron communication/computation overlapping
 
-# dependency: vllm>=0.11.0, megatron-lm>=0.13, mbridge with qwen3vl_cp branch
-# environment option1: use a stable container later than docker://verlai/verl:vllm011.dev6 
-    # and install mbridge in it by following the instruction in the container
-            # pip remove mbridge if you have installed it
-            # pip install git+https://github.com/ISEEKYAN/mbridge.git@qwen3vl_cp # for correct mbridge
-# environment option2: use container docker://verlai/verl:vllm011.dev_qwenvl_cp
- 
-
 export VLLM_ALLREDUCE_USE_SYMM_MEM=0 # for vllm0.11.0 with TP
 
 
diff --git a/examples/grpo_trainer/run_qwen3_vl-30b-megatron.sh b/examples/grpo_trainer/run_qwen3_vl-30b-megatron.sh
@@ -2,15 +2,6 @@ set -x
 ENGINE=${1:-vllm}
 export CUDA_DEVICE_MAX_CONNECTIONS=1 # For megatron communication/computation overlapping
 
-
-# dependency: vllm>=0.11.0, megatron-lm>=0.13, mbridge with qwen3vl_cp branch
-# environment option1: use a stable container later than docker://verlai/verl:vllm011.dev6 
-    # and install mbridge in it by following the instruction in the container
-            # pip remove mbridge if you have installed it
-            # pip install git+https://github.com/ISEEKYAN/mbridge.git@qwen3vl_cp # for correct mbridge
-# environment option2: use container docker://verlai/verl:vllm011.dev_qwenvl_cp
- 
-
 export VLLM_ALLREDUCE_USE_SYMM_MEM=0 # for vllm0.11.0 with TP
 
 
diff --git a/tests/single_controller/detached_worker/server.py b/tests/single_controller/detached_worker/server.py
@@ -54,7 +54,6 @@ def __init__(self):
                 tensor_model_parallel_size=2,
                 pipeline_model_parallel_size=1,
                 virtual_pipeline_model_parallel_size=None,
-                pipeline_model_parallel_split_rank=None,
                 use_sharp=False,
                 context_parallel_size=1,
                 expert_model_parallel_size=1,
diff --git a/tests/special_distributed/test_mcore_config_converter.py b/tests/special_distributed/test_mcore_config_converter.py
@@ -80,7 +80,6 @@ def test_mcore_config_converter():
         tensor_model_parallel_size=2,
         pipeline_model_parallel_size=2,
         virtual_pipeline_model_parallel_size=None,
-        pipeline_model_parallel_split_rank=None,
         use_sharp=False,
         context_parallel_size=2,
         expert_model_parallel_size=1,
diff --git a/tests/utils/dataset/test_rl_dataset_on_cpu.py b/tests/utils/dataset/test_rl_dataset_on_cpu.py
@@ -96,7 +96,7 @@ def test_image_rl_data():
             "prompt_key": "prompt",
             "max_prompt_length": 1024,
             "filter_overlong_prompts": True,
-            "filter_overlong_prompts_workers": 1,
+            "filter_overlong_prompts_workers": None,  # num_workers=1 hang in ci
         }
     )
     dataset = RLHFDataset(
diff --git a/tests/utils/reward_score/test_sandbox_on_cpu.py b/tests/utils/reward_score/test_sandbox_on_cpu.py
@@ -18,8 +18,7 @@
 
 import pytest
 
-from verl.utils.reward_score import default_compute_score, prime_code, sandbox_fusion
-from verl.utils.reward_score.prime_code import apps_check_correctness
+from verl.utils.reward_score import default_compute_score, sandbox_fusion
 from verl.workers.reward_manager.prime import parallel_compute_score_async
 
 prime_math_answers = [
@@ -115,6 +114,7 @@ def test_parallelism():
     print(scores)
 
 
+@pytest.mark.skip("pyext not compatible with python 3.12")
 def test_prime_code():
     """
     Test PRIME code sandbox.
@@ -149,6 +149,8 @@ def test_continuous_score_consistency():
     Verify that continuous score calculation is consistent between prime_code and sandbox_fusion.
     Uses a test case where the first 9 out of 11 sub-cases pass (expected score 0.9).
     """
+    from verl.utils.reward_score import prime_code
+
     completion = prime_code_answers[1]  # Use the second sample
     ground_truth = prime_code_gts[1]  # Use the second sample (9/11 pass, first 9 pass)
     expected_continuous_score = 0.9
@@ -170,7 +172,10 @@ def test_continuous_score_consistency():
     print(f"Continuous Score (Sandbox Fusion): {fusion_score}")
 
 
+@pytest.mark.skip("pyext not compatible with python 3.12")
 def test_check_correctness():
+    from verl.utils.reward_score.prime_code import apps_check_correctness
+
     completion = prime_code_answers[0]
     ground_truth = json.loads(prime_code_gts[0])
     ground_truth_single = {"inputs": ground_truth["inputs"][:1], "outputs": ground_truth["outputs"][:1]}
diff --git a/verl/models/transformers/monkey_patch.py b/verl/models/transformers/monkey_patch.py
diff --git a/verl/trainer/constants_ppo.py b/verl/trainer/constants_ppo.py
diff --git a/verl/utils/dataset/rl_dataset.py b/verl/utils/dataset/rl_dataset.py
diff --git a/verl/utils/megatron_utils.py b/verl/utils/megatron_utils.py
diff --git a/verl/workers/actor/megatron_actor.py b/verl/workers/actor/megatron_actor.py
diff --git a/verl/workers/critic/megatron_critic.py b/verl/workers/critic/megatron_critic.py
diff --git a/verl/workers/engine/megatron/transformer_impl.py b/verl/workers/engine/megatron/transformer_impl.py