mirror of
https://github.com/volcengine/verl.git
synced 2025-10-20 21:53:50 +08:00
[Done] - Update the Docker file and Apptainer file to support the SGLang engines - Add the 3rd-party [torch_memory_saver](torch_memory_saver](https://github.com/ExtremeViscent/torch_memory_saver) within the docker file in rocm version
57 lines
1.5 KiB
Plaintext
57 lines
1.5 KiB
Plaintext
Bootstrap: docker
|
|
|
|
# Support - Traing: fsdp; Inference: vllm
|
|
# FROM: rocm/vllm:rocm6.2_mi300_ubuntu20.04_py3.9_vllm_0.6.4
|
|
# Support - Traing: fsdp; Inference: vllm, sglang
|
|
FROM lmsysorg/sglang:v0.4.5-rocm630
|
|
|
|
%environment
|
|
export PYTORCH_ROCM_ARCH="gfx90a;gfx942"
|
|
|
|
export HIPCC_COMPILE_FLAGS_APPEND="--amdgpu-target=gfx90a;gfx942 -D__HIP_PLATFORM_AMD__"
|
|
export CFLAGS="-D__HIP_PLATFORM_AMD__"
|
|
export CXXFLAGS="-D__HIP_PLATFORM_AMD__"
|
|
|
|
%post
|
|
# Create source directory
|
|
mkdir -p /opt/src
|
|
|
|
# Uninstall and reinstall vllm
|
|
pip uninstall -y vllm
|
|
cd /opt/src
|
|
git clone -b v0.6.3 https://github.com/vllm-project/vllm.git
|
|
cd vllm
|
|
MAX_JOBS=$(nproc) python3 setup.py install
|
|
cd /opt
|
|
rm -rf /opt/src/vllm
|
|
|
|
# Install dependencies
|
|
pip install "tensordict<0.6" --no-deps
|
|
pip install accelerate \
|
|
codetiming \
|
|
datasets \
|
|
dill \
|
|
hydra-core \
|
|
liger-kernel \
|
|
numpy \
|
|
pandas \
|
|
peft \
|
|
"pyarrow>=15.0.0" \
|
|
pylatexenc \
|
|
"ray[data,train,tune,serve]" \
|
|
torchdata \
|
|
transformers \
|
|
wandb \
|
|
orjson \
|
|
pybind11
|
|
|
|
# Clone and install verl from GitHub
|
|
cd /opt
|
|
git clone https://github.com/volcengine/verl.git
|
|
cd verl
|
|
# Uncomment to use a specific version
|
|
# git checkout v0.3.0.post0
|
|
pip install -e . --no-deps
|
|
|
|
# Install torch_memory_saver
|
|
pip install git+https://github.com/ExtremeViscent/torch_memory_saver.git --no-deps |