[ROCm][Build] Clean up the ROCm build (#19040)

Signed-off-by: Gregory Shtrasberg <Gregory.Shtrasberg@amd.com>
2025-06-02 23:47:47 -04:00
parent 8655f47f37
commit 9e6f61e8c3
4 changed files with 2 additions and 23 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -182,9 +182,6 @@ include(FetchContent)
 file(MAKE_DIRECTORY ${FETCHCONTENT_BASE_DIR}) # Ensure the directory exists
 message(STATUS "FetchContent base directory: ${FETCHCONTENT_BASE_DIR}")
 #
 # Set rocm version dev int.
 #
 if(VLLM_GPU_LANG STREQUAL "HIP")
  #
  # Overriding the default -O set up by cmake, adding ggdb3 for the most verbose devug info
@ -192,7 +189,6 @@ if(VLLM_GPU_LANG STREQUAL "HIP")
  set(CMAKE_${VLLM_GPU_LANG}_FLAGS_DEBUG "${CMAKE_${VLLM_GPU_LANG}_FLAGS_DEBUG} -O0 -ggdb3")
  set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -O0 -ggdb3")
  #
  # Certain HIP functions are marked as [[nodiscard]], yet vllm ignores the result which generates
  # a lot of warnings that always mask real issues. Suppressing until this is properly addressed.
--- a/docker/Dockerfile.rocm
+++ b/docker/Dockerfile.rocm
@ -1,7 +1,5 @@
 # default base image
 ARG REMOTE_VLLM="0"
 ARG USE_CYTHON="0"
 ARG BUILD_RPD="1"
 ARG COMMON_WORKDIR=/app
 ARG BASE_IMAGE=rocm/vllm-dev:base
@ -36,12 +34,10 @@ FROM fetch_vllm_${REMOTE_VLLM} AS fetch_vllm
 # -----------------------
 # vLLM build stages
 FROM fetch_vllm AS build_vllm
 ARG USE_CYTHON
 # Build vLLM
 RUN cd vllm \
    && python3 -m pip install -r requirements/rocm.txt \
    && python3 setup.py clean --all  \
    && if [ ${USE_CYTHON} -eq "1" ]; then python3 tests/build_cython.py build_ext --inplace; fi \
    && python3 setup.py bdist_wheel --dist-dir=dist
 FROM scratch AS export_vllm
 ARG COMMON_WORKDIR
@ -90,13 +86,6 @@ RUN case "$(which python3)" in \
        *) ;; esac
 RUN python3 -m pip install --upgrade huggingface-hub[cli]
 ARG BUILD_RPD
 RUN if [ ${BUILD_RPD} -eq "1" ]; then \
    git clone -b nvtx_enabled https://github.com/ROCm/rocmProfileData.git \
    && cd rocmProfileData/rpd_tracer \
    && pip install -r requirements.txt && cd ../ \
    && make && make install \
    && cd hipMarker && python3 setup.py install ; fi
 # Install vLLM
 RUN --mount=type=bind,from=export_vllm,src=/,target=/install \
@ -117,12 +106,6 @@ ENV TOKENIZERS_PARALLELISM=false
 # ENV that can improve safe tensor loading, and end-to-end time
 ENV SAFETENSORS_FAST_GPU=1
 # User-friendly environment setting for multi-processing to avoid below RuntimeError.
 # RuntimeError: Cannot re-initialize CUDA in forked subprocess. To use CUDA with multiprocessing,
 # you must use the 'spawn' start method 
 # See https://pytorch.org/docs/stable/notes/multiprocessing.html#cuda-in-multiprocessing
 ENV VLLM_WORKER_MULTIPROC_METHOD=spawn
 # Performance environment variable.
 ENV HIP_FORCE_DEV_KERNARG=1
--- a/docs/getting_started/installation/gpu/rocm.inc.md
+++ b/docs/getting_started/installation/gpu/rocm.inc.md
@ -179,8 +179,6 @@ It is important that the user kicks off the docker build using buildkit. Either
 It provides flexibility to customize the build of docker image using the following arguments:
 - `BASE_IMAGE`: specifies the base image used when running `docker build`. The default value `rocm/vllm-dev:base` is an image published and maintained by AMD. It is being built using <gh-file:docker/Dockerfile.rocm_base>
 - `USE_CYTHON`: An option to run cython compilation on a subset of python files upon docker build
 - `BUILD_RPD`: Include RocmProfileData profiling tool in the image
 - `ARG_PYTORCH_ROCM_ARCH`: Allows to override the gfx architecture values from the base docker image
 Their values can be passed in when running `docker build` with `--build-arg` options.
--- a/requirements/rocm.txt
+++ b/requirements/rocm.txt
@ -12,5 +12,7 @@ ray>=2.10.0,<2.45.0
 peft
 pytest-asyncio
 tensorizer>=2.9.0
 setuptools-scm>=8
 setuptools>=77.0.3,<80.0.0
 runai-model-streamer==0.11.0
 runai-model-streamer-s3==0.11.0