[docker] feat: update Dockerfile.rocm7 (#3781)

### What does this PR do?

Parameterize Dockerfile.rocm

### Test

DOCKER_BUILDKIT=1 docker build --no-cache -f docker/Dockerfile.rocm7
--build-arg VERL_BRANCH=v0.6.x -t verl-0.6.x_rocm7.0 .
This commit is contained in:
vickytsang
2025-10-15 20:02:43 -07:00
committed by GitHub
parent 061535208c
commit e81e7db725

View File

@ -1,7 +1,7 @@
# default base image # default base image
ARG REMOTE_VLLM="1" ARG REMOTE_VLLM="1"
ARG COMMON_WORKDIR=/app ARG COMMON_WORKDIR=/app
ARG BASE_IMAGE=rocm/vllm-dev:base_rocm7_0930_rc1_20250916_tuned_20250917 ARG BASE_IMAGE=rocm/vllm-dev:base
FROM ${BASE_IMAGE} AS base FROM ${BASE_IMAGE} AS base
@ -104,7 +104,7 @@ ARG COMMON_WORKDIR
COPY --from=export_vllm /benchmarks ${COMMON_WORKDIR}/vllm/benchmarks COPY --from=export_vllm /benchmarks ${COMMON_WORKDIR}/vllm/benchmarks
COPY --from=export_vllm /examples ${COMMON_WORKDIR}/vllm/examples COPY --from=export_vllm /examples ${COMMON_WORKDIR}/vllm/examples
ENV RAY_EXPERIMENTAL_NOSET_ROCR_VISIBLE_DEVICES=1 ENV RAY_EXPERIMENTAL_NOSET_HIP_VISIBLE_DEVICES=1
ENV TOKENIZERS_PARALLELISM=false ENV TOKENIZERS_PARALLELISM=false
# ENV that can improve safe tensor loading, and end-to-end time # ENV that can improve safe tensor loading, and end-to-end time
@ -115,6 +115,8 @@ ENV HIP_FORCE_DEV_KERNARG=1
# ----------------------- # -----------------------
# Install verl # Install verl
ARG VERL_REPO=https://github.com/volcengine/verl.git
ARG VERL_BRANCH=main
RUN pip install "tensordict==0.6.2" --no-deps && \ RUN pip install "tensordict==0.6.2" --no-deps && \
pip install accelerate \ pip install accelerate \
codetiming \ codetiming \
@ -133,8 +135,9 @@ RUN pip install "tensordict==0.6.2" --no-deps && \
pybind11 pybind11
WORKDIR /workspace/ WORKDIR /workspace/
RUN git clone https://github.com/volcengine/verl.git && \ RUN git clone ${VERL_REPO} && \
cd verl && \ cd verl && \
git checkout ${VERL_BRANCH} && \
pip install -e . pip install -e .
CMD ["/bin/bash"] CMD ["/bin/bash"]