[ci] feat: upgrade sglang to 0.5.2 (#3613)

### What does this PR do? Solve https://github.com/volcengine/verl/pull/3530#issuecomment-3332840437
2025-10-20 13:43:50 +08:00 · 2025-09-26 09:25:53 +08:00
parent 14c397f474
commit 6ff2b43d13
15 changed files with 128 additions and 17 deletions
--- a/.github/workflows/.deprecate/e2e_ppo_trainer.yml
+++ b/.github/workflows/.deprecate/e2e_ppo_trainer.yml
@ -77,7 +77,7 @@ jobs:
      HF_ENDPOINT: "https://hf-mirror.com"
      HF_HUB_ENABLE_HF_TRANSFER: "0" # This is more stable
    container:
-      image: verlai/verl:app-verl0.5-transformers4.55.4-sglang0.4.10.post2-mcore0.13.0-te2.2
+      image: verlai/verl:app-verl0.6-transformers4.56.1-sglang0.5.2-mcore0.13.0-te2.2
      options: --gpus all --shm-size=10g
    steps:
      - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
@ -110,7 +110,7 @@ jobs:
      HF_ENDPOINT: "https://hf-mirror.com"
      HF_HUB_ENABLE_HF_TRANSFER: "0" # This is more stable
    container:
-      image: verlai/verl:app-verl0.5-transformers4.55.4-sglang0.4.10.post2-mcore0.13.0-te2.2
+      image: verlai/verl:app-verl0.6-transformers4.56.1-sglang0.5.2-mcore0.13.0-te2.2
      options: --gpus all --shm-size=10g
    steps:
      - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
--- a/.github/workflows/.deprecate/e2e_ppo_trainer_megatron_sglang.yml
+++ b/.github/workflows/.deprecate/e2e_ppo_trainer_megatron_sglang.yml
@ -75,7 +75,7 @@ permissions:
  contents: read
 env:
-  IMAGE: "verl-ci-cn-beijing.cr.volces.com/verlai/verl:app-verl0.5-transformers4.55.4-sglang0.4.10.post2-mcore0.13.0-te2.2"
+  IMAGE: "verl-ci-cn-beijing.cr.volces.com/verlai/verl:app-verl0.6-transformers4.56.1-sglang0.5.2-mcore0.13.0-te2.2"
  DYNAMIC_RUNNER_ENDPOINT: "https://sd10g3clalm04ug7alq90.apigateway-cn-beijing.volceapi.com/runner"
 jobs:
--- a/.github/workflows/.deprecate/e2e_spin.yml
+++ b/.github/workflows/.deprecate/e2e_spin.yml
@ -53,7 +53,7 @@ permissions:
  contents: read
 env:
-  IMAGE: "verl-ci-cn-beijing.cr.volces.com/verlai/verl:app-verl0.5-transformers4.55.4-sglang0.4.10.post2-mcore0.13.0-te2.2"
+  IMAGE: "verl-ci-cn-beijing.cr.volces.com/verlai/verl:app-verl0.6-transformers4.56.1-sglang0.5.2-mcore0.13.0-te2.2"
  DYNAMIC_RUNNER_ENDPOINT: "https://sd10g3clalm04ug7alq90.apigateway-cn-beijing.volceapi.com/runner"
 # Cancel jobs on the same ref if a new one is triggered
--- a/.github/workflows/.deprecate/e2e_sppo.yml
+++ b/.github/workflows/.deprecate/e2e_sppo.yml
@ -56,7 +56,7 @@ concurrency:
  cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
 env:
-  IMAGE: "verl-ci-cn-beijing.cr.volces.com/verlai/verl:app-verl0.5-transformers4.55.4-sglang0.4.10.post2-mcore0.13.0-te2.2"
+  IMAGE: "verl-ci-cn-beijing.cr.volces.com/verlai/verl:app-verl0.6-transformers4.56.1-sglang0.5.2-mcore0.13.0-te2.2"
  DYNAMIC_RUNNER_ENDPOINT: "https://sd10g3clalm04ug7alq90.apigateway-cn-beijing.volceapi.com/runner"
  TRANSFORMERS_VERSION: "4.56.2"
--- a/.github/workflows/checkpoint_converter.yml
+++ b/.github/workflows/checkpoint_converter.yml
@ -81,7 +81,7 @@ jobs:
      NO_PROXY: "localhost,127.0.0.1"
      HF_HUB_ENABLE_HF_TRANSFER: "0" # This is more stable
    container:
-      image: verlai/verl:app-verl0.5-transformers4.55.4-sglang0.4.10.post2-mcore0.13.0-te2.2
+      image: verlai/verl:app-verl0.6-transformers4.56.1-sglang0.5.2-mcore0.13.0-te2.2
      options: --gpus all --shm-size=10g
    steps:
      - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
@ -116,7 +116,7 @@ jobs:
      HF_HUB_ENABLE_HF_TRANSFER: "0" # This is more stable
      HF_ENDPOINT: "https://hf-mirror.com"
    container:
-      image: verlai/verl:app-verl0.5-transformers4.55.4-sglang0.4.10.post2-mcore0.13.0-te2.2
+      image: verlai/verl:app-verl0.6-transformers4.56.1-sglang0.5.2-mcore0.13.0-te2.2
      options: --gpus all --shm-size=10g
    steps:
      - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
--- a/.github/workflows/e2e_ppo_trainer_megatron_sglang.yml
+++ b/.github/workflows/e2e_ppo_trainer_megatron_sglang.yml
@ -86,7 +86,7 @@ permissions:
  contents: read
 env:
-  IMAGE: "verl-ci-cn-beijing.cr.volces.com/verlai/verl:app-verl0.5-transformers4.55.4-sglang0.4.10.post2-mcore0.13.0-te2.2"
+  IMAGE: "verl-ci-cn-beijing.cr.volces.com/verlai/verl:app-verl0.6-transformers4.56.1-sglang0.5.2-mcore0.13.0-te2.2"
  DYNAMIC_RUNNER_ENDPOINT: "https://sd10g3clalm04ug7alq90.apigateway-cn-beijing.volceapi.com/runner"
 jobs:
--- a/.github/workflows/e2e_ppo_trainer_megatron_sglang_2.yml
+++ b/.github/workflows/e2e_ppo_trainer_megatron_sglang_2.yml
@ -86,7 +86,7 @@ permissions:
  contents: read
 env:
-  IMAGE: "verl-ci-cn-beijing.cr.volces.com/verlai/verl:app-verl0.5-transformers4.55.4-sglang0.4.10.post2-mcore0.13.0-te2.2"
+  IMAGE: "verl-ci-cn-beijing.cr.volces.com/verlai/verl:app-verl0.6-transformers4.56.1-sglang0.5.2-mcore0.13.0-te2.2"
  DYNAMIC_RUNNER_ENDPOINT: "https://sd10g3clalm04ug7alq90.apigateway-cn-beijing.volceapi.com/runner"
 jobs:
@ -217,7 +217,6 @@ jobs:
      - name: Install the current repository
        run: |
          pip3 install -e .[test,geo,gpu,sglang] --no-deps
          pip install "transformers[hf_xet]==4.54.0"
      # Geo3k
      - name: Prepare GEO3K dataset
        run: |
--- a/.github/workflows/e2e_sft.yml
+++ b/.github/workflows/e2e_sft.yml
@ -70,7 +70,7 @@ permissions:
  contents: read
 env:
-  IMAGE: "verl-ci-cn-beijing.cr.volces.com/verlai/verl:app-verl0.5-transformers4.55.4-sglang0.4.10.post2-mcore0.13.0-te2.2"
+  IMAGE: "verl-ci-cn-beijing.cr.volces.com/verlai/verl:app-verl0.6-transformers4.56.1-sglang0.5.2-mcore0.13.0-te2.2"
  DYNAMIC_RUNNER_ENDPOINT: "https://sd10g3clalm04ug7alq90.apigateway-cn-beijing.volceapi.com/runner"
 jobs:
--- a/.github/workflows/gpu_unit_tests.yml
+++ b/.github/workflows/gpu_unit_tests.yml
@ -80,7 +80,7 @@ jobs:
      NO_PROXY: "localhost,127.0.0.1"
      HF_HUB_ENABLE_HF_TRANSFER: 1
    container:
-      image: verlai/verl:app-verl0.5-transformers4.55.4-sglang0.4.10.post2-mcore0.13.0-te2.2
+      image: verlai/verl:app-verl0.6-transformers4.56.1-sglang0.5.2-mcore0.13.0-te2.2
      options: --gpus all --shm-size=10g
    steps:
      - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
--- a/.github/workflows/sgl.yml
+++ b/.github/workflows/sgl.yml
@ -77,7 +77,7 @@ permissions:
  contents: read
 env:
-  IMAGE: "verl-ci-cn-beijing.cr.volces.com/verlai/verl:app-verl0.5-transformers4.55.4-sglang0.4.10.post2-mcore0.13.0-te2.2"
+  IMAGE: "verl-ci-cn-beijing.cr.volces.com/verlai/verl:app-verl0.6-transformers4.56.1-sglang0.5.2-mcore0.13.0-te2.2"
  DYNAMIC_RUNNER_ENDPOINT: "https://sd10g3clalm04ug7alq90.apigateway-cn-beijing.volceapi.com/runner"
 jobs:
--- a/docker/verl0.6-cu128-torch2.8.0-fa2.7.4/Dockerfile.app.sglang
+++ b/docker/verl0.6-cu128-torch2.8.0-fa2.7.4/Dockerfile.app.sglang
@ -0,0 +1,4 @@
 FROM verlai/verl:base-verl0.6-cu128-cudnn9.8-torch2.8.0-fa2.7.4
 RUN pip install --no-cache-dir "sglang[all]==0.5.2"
 RUN pip install --no-cache-dir "torch-memory-saver==0.0.9rc1"
--- a/docker/verl0.6-cu128-torch2.8.0-fa2.7.4/Dockerfile.base
+++ b/docker/verl0.6-cu128-torch2.8.0-fa2.7.4/Dockerfile.base
@ -0,0 +1,108 @@
 # Start from the NVIDIA official image (ubuntu-24.04 + cuda-12.8 + python-3.12)
 # https://docs.nvidia.com/deeplearning/frameworks/pytorch-release-notes/rel-25-03.html
 FROM nvcr.io/nvidia/pytorch:25.03-py3
 # Define environments
 ENV MAX_JOBS=32
 ENV VLLM_WORKER_MULTIPROC_METHOD=spawn
 ENV DEBIAN_FRONTEND=noninteractive
 ENV NODE_OPTIONS=""
 ENV PIP_ROOT_USER_ACTION=ignore
 ENV HF_HUB_ENABLE_HF_TRANSFER="1"
 ENV PIP_CONSTRAINT=""
 ARG PIP_INDEX=https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple
 # Change pip source
 RUN pip config set global.index-url "${PIP_INDEX}" && \
    pip config set global.extra-index-url "${PIP_INDEX}" && \
    pip config set global.no-cache-dir "true" && \
    python -m pip install --upgrade pip
 # Install systemctl
 RUN apt-get update && \
    apt-get install -y -o Dpkg::Options::="--force-confdef" systemd && \
    apt-get clean
 # Install libxml2
 RUN apt-get update && \
    apt-get install -y libxml2 aria2 && \
    apt-get clean
 # Uninstall nv-pytorch fork
 RUN pip uninstall -y torch torchvision torchaudio \
    pytorch-quantization pytorch-triton torch-tensorrt \
    transformer_engine flash_attn apex megatron-core \
    xgboost opencv grpcio
 # Fix packages
 RUN pip install --no-cache-dir tensordict torchdata "transformers[hf_xet]==4.55.4" accelerate datasets peft hf-transfer \
    "numpy<2.0.0" "pyarrow>=19.0.1" pandas \
    ray[default] codetiming hydra-core pylatexenc qwen-vl-utils wandb dill pybind11 liger-kernel mathruler blobfile xgrammar \
    pytest py-spy pre-commit ruff
 # Fix cv2
 RUN rm -rf /usr/local/lib/python3.11/dist-packages/cv2
 # Install torch
 RUN pip install --no-cache-dir torch==2.8.0 --index-url https://download.pytorch.org/whl/cu128
 # Install flash-attn
 RUN pip install --no-cache-dir --no-build-isolation flash_attn==2.7.4.post1
 # Install DeepEP
 # the dependency of IBGDA
 RUN ln -s /usr/lib/x86_64-linux-gnu/libmlx5.so.1 /usr/lib/x86_64-linux-gnu/libmlx5.so
 # Clone and build deepep and deepep-nvshmem
 RUN git clone -b v2.3.1 https://github.com/NVIDIA/gdrcopy.git && \
    git clone https://github.com/deepseek-ai/DeepEP.git  && \
    cd DeepEP && git checkout a84a248
 # Prepare nvshmem
 RUN wget https://developer.nvidia.com/downloads/assets/secure/nvshmem/nvshmem_src_3.2.5-1.txz && \
    tar -xvf nvshmem_src_3.2.5-1.txz && mv nvshmem_src deepep-nvshmem && \
    cd deepep-nvshmem && git apply ../DeepEP/third-party/nvshmem.patch
 ## Build deepep-nvshmem
 RUN apt-get install -y ninja-build cmake
 ENV CUDA_HOME=/usr/local/cuda
 ### Set MPI environment variables. Having errors when not set.
 ENV CPATH=/usr/local/mpi/include:$CPATH
 ENV LD_LIBRARY_PATH=/usr/local/mpi/lib:$LD_LIBRARY_PATH
 ENV LD_LIBRARY_PATH=/usr/local/x86_64-linux-gnu:$LD_LIBRARY_PATH
 ENV GDRCOPY_HOME=/workspace/gdrcopy
 ENV GDRCOPY_INCLUDE=/workspace/gdrcopy/include
 RUN cd deepep-nvshmem && \
    NVSHMEM_SHMEM_SUPPORT=0 \
    NVSHMEM_UCX_SUPPORT=0 \
    NVSHMEM_USE_NCCL=0 \
    NVSHMEM_MPI_SUPPORT=0 \
    NVSHMEM_IBGDA_SUPPORT=1 \
    NVSHMEM_PMIX_SUPPORT=0 \
    NVSHMEM_TIMEOUT_DEVICE_POLLING=0 \
    NVSHMEM_USE_GDRCOPY=1 \
    cmake -G Ninja -S . -B build/ -DCMAKE_INSTALL_PREFIX=/workspace/deepep-nvshmem/install && cmake --build build/ --target install
 ENV NVSHMEM_DIR=/workspace/deepep-nvshmem/install
 ENV LD_LIBRARY_PATH=$NVSHMEM_DIR/lib:$LD_LIBRARY_PATH
 ENV PATH=$NVSHMEM_DIR/bin:$PATH
 ## Build deepep
 RUN cd DeepEP && \
    python setup.py install
 # Install Apex
 RUN pip install -v --disable-pip-version-check --no-cache-dir --no-build-isolation --config-settings "--build-option=--cpp_ext" --config-settings "--build-option=--cuda_ext" git+https://github.com/NVIDIA/apex.git
 # Install TransformerEngine
 RUN export NVTE_FRAMEWORK=pytorch && pip3 install --no-deps --no-cache-dir --no-build-isolation git+https://github.com/NVIDIA/TransformerEngine.git@v2.2.1
 # Install Megatron-LM
 RUN git clone -b core_v0.13.0 https://github.com/NVIDIA/Megatron-LM.git && \
    cd Megatron-LM && pip3 install --no-deps -e .
 # Install mbridge
 RUN pip3 install --no-cache-dir git+https://github.com/ISEEKYAN/mbridge.git
--- a/requirements_sglang.txt
+++ b/requirements_sglang.txt
@ -17,5 +17,5 @@ torchdata
 torchvision
 transformers
 wandb
-sglang[all]==0.4.10.post2
+sglang[all]==0.5.2
 huggingface_hub
--- a/setup.py
+++ b/setup.py
@ -52,8 +52,8 @@ MATH_REQUIRES = ["math-verify"]  # Add math-verify as an optional dependency
 VLLM_REQUIRES = ["tensordict>=0.8.0,<=0.10.0,!=0.9.0", "vllm>=0.7.3,<=0.9.1"]
 SGLANG_REQUIRES = [
    "tensordict>=0.8.0,<=0.10.0,!=0.9.0",
-    "sglang[srt,openai]==0.4.10.post2",
+    "sglang[srt,openai]==0.5.2",
-    "torch==2.7.1",
+    "torch==2.8.0",
 ]
 TRL_REQUIRES = ["trl<=0.9.6"]
 MCORE_REQUIRES = ["mbridge"]
--- a/verl/workers/rollout/sglang_rollout/async_sglang_server.py
+++ b/verl/workers/rollout/sglang_rollout/async_sglang_server.py
@ -159,7 +159,7 @@ class SGLangHttpServer:
                scheduler_info=self.scheduler_info,
            )
        )
-
+        app.is_single_tokenizer_mode = True
        self._server_port, self._server_task = await run_unvicorn(app, server_args)
    async def wake_up(self):