# docker buildx build --platform linux/x86_64 -t "verlai/verl:ngc-th2.4.0-cu124-vllm0.6.3-ray2.4-te1.7-v0.0.6" -f docker/Dockerfile.ngc.vllm . --builder cloud-verlai-verl-builder --progress=plain --push FROM nvcr.io/nvidia/pytorch:24.05-py3 # uninstall nv-pytorch fork RUN pip3 uninstall pytorch-quantization \ pytorch-triton \ torch \ torch-tensorrt \ torchvision \ xgboost transformer_engine flash_attn \ apex megatron-core -y RUN pip3 install torch==2.4.0 torchvision==0.19.0 torchaudio==2.4.0 --index-url https://download.pytorch.org/whl/cu124 # =============== Megatron dependencies (optional) ================= # install apex, set MAX_JOBS to avoid OOMs RUN MAX_JOBS=4 pip3 install -v --disable-pip-version-check --no-cache-dir --no-build-isolation \ --config-settings "--build-option=--cpp_ext" --config-settings "--build-option=--cuda_ext" \ git+https://github.com/NVIDIA/apex # =============== End of Megatron dependencies (optional) ================= RUN pip3 install --no-cache-dir \ accelerate \ codetiming \ datasets \ dill \ hydra-core \ numpy \ 'pandas' \ 'peft' \ 'pyarrow>=15.0.0' \ 'pybind11' \ 'pylatexenc' \ 'ray>=2.10' \ 'tensordict<0.6' \ 'transformers' \ 'vllm==0.6.3.post1' \ 'wandb' \ 'tensorboard' # full dependencies RUN pip3 install pytest pre-commit py-spy pyext liger-kernel # =============== Megatron dependencies (optional) ================= # install Transformer Engine, which requires FA 2.5.8. Do it in a separate step for docker cache RUN MAX_JOBS=4 NINJA_FLAGS="-j4" pip3 install flash-attn==2.5.8 --no-cache-dir --no-build-isolation RUN MAX_JOBS=1 NINJA_FLAGS="-j1" TE_BUILD_WITH_NINJA=0 pip3 install git+https://github.com/eric-haibin-lin/TransformerEngine.git@v1.7.0 # =============== End of Megatron dependencies (optional) =================