[CI] add kvcache-connector dependency definition and add into CI build (#18193)
Signed-off-by: Peter Pan <Peter.Pan@daocloud.io>
This commit is contained in:
@ -52,7 +52,7 @@ steps:
|
|||||||
queue: cpu_queue_postmerge
|
queue: cpu_queue_postmerge
|
||||||
commands:
|
commands:
|
||||||
- "aws ecr-public get-login-password --region us-east-1 | docker login --username AWS --password-stdin public.ecr.aws/q9t5s3a7"
|
- "aws ecr-public get-login-password --region us-east-1 | docker login --username AWS --password-stdin public.ecr.aws/q9t5s3a7"
|
||||||
- "DOCKER_BUILDKIT=1 docker build --build-arg max_jobs=16 --build-arg USE_SCCACHE=1 --build-arg GIT_REPO_CHECK=1 --build-arg CUDA_VERSION=12.8.1 --tag public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT --target vllm-openai --progress plain -f docker/Dockerfile ."
|
- "DOCKER_BUILDKIT=1 docker build --build-arg max_jobs=16 --build-arg USE_SCCACHE=1 --build-arg GIT_REPO_CHECK=1 --build-arg CUDA_VERSION=12.8.1 --build-arg INSTALL_KV_CONNECTORS=true --tag public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT --target vllm-openai --progress plain -f docker/Dockerfile ."
|
||||||
- "docker push public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT"
|
- "docker push public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT"
|
||||||
|
|
||||||
- label: "Annotate release workflow"
|
- label: "Annotate release workflow"
|
||||||
|
@ -1,3 +1,4 @@
|
|||||||
|
|
||||||
# The vLLM Dockerfile is used to construct vLLM image that can be directly used
|
# The vLLM Dockerfile is used to construct vLLM image that can be directly used
|
||||||
# to run the OpenAI compatible server.
|
# to run the OpenAI compatible server.
|
||||||
|
|
||||||
@ -62,12 +63,16 @@ ARG PYTORCH_CUDA_NIGHTLY_INDEX_BASE_URL=https://download.pytorch.org/whl/nightly
|
|||||||
ARG PIP_KEYRING_PROVIDER=disabled
|
ARG PIP_KEYRING_PROVIDER=disabled
|
||||||
ARG UV_KEYRING_PROVIDER=${PIP_KEYRING_PROVIDER}
|
ARG UV_KEYRING_PROVIDER=${PIP_KEYRING_PROVIDER}
|
||||||
|
|
||||||
|
# Flag enables build-in KV-connector dependency libs into docker images
|
||||||
|
ARG INSTALL_KV_CONNECTORS=false
|
||||||
|
|
||||||
#################### BASE BUILD IMAGE ####################
|
#################### BASE BUILD IMAGE ####################
|
||||||
# prepare basic build environment
|
# prepare basic build environment
|
||||||
FROM ${BUILD_BASE_IMAGE} AS base
|
FROM ${BUILD_BASE_IMAGE} AS base
|
||||||
ARG CUDA_VERSION
|
ARG CUDA_VERSION
|
||||||
ARG PYTHON_VERSION
|
ARG PYTHON_VERSION
|
||||||
ARG TARGETPLATFORM
|
ARG TARGETPLATFORM
|
||||||
|
ARG INSTALL_KV_CONNECTORS=false
|
||||||
ENV DEBIAN_FRONTEND=noninteractive
|
ENV DEBIAN_FRONTEND=noninteractive
|
||||||
|
|
||||||
ARG DEADSNAKES_MIRROR_URL
|
ARG DEADSNAKES_MIRROR_URL
|
||||||
@ -276,6 +281,7 @@ RUN --mount=type=cache,target=/root/.cache/uv \
|
|||||||
FROM ${FINAL_BASE_IMAGE} AS vllm-base
|
FROM ${FINAL_BASE_IMAGE} AS vllm-base
|
||||||
ARG CUDA_VERSION
|
ARG CUDA_VERSION
|
||||||
ARG PYTHON_VERSION
|
ARG PYTHON_VERSION
|
||||||
|
ARG INSTALL_KV_CONNECTORS=false
|
||||||
WORKDIR /vllm-workspace
|
WORKDIR /vllm-workspace
|
||||||
ENV DEBIAN_FRONTEND=noninteractive
|
ENV DEBIAN_FRONTEND=noninteractive
|
||||||
ARG TARGETPLATFORM
|
ARG TARGETPLATFORM
|
||||||
@ -485,6 +491,7 @@ RUN mv mkdocs.yaml test_docs/
|
|||||||
# base openai image with additional requirements, for any subsequent openai-style images
|
# base openai image with additional requirements, for any subsequent openai-style images
|
||||||
FROM vllm-base AS vllm-openai-base
|
FROM vllm-base AS vllm-openai-base
|
||||||
ARG TARGETPLATFORM
|
ARG TARGETPLATFORM
|
||||||
|
ARG INSTALL_KV_CONNECTORS=false
|
||||||
|
|
||||||
ARG PIP_INDEX_URL UV_INDEX_URL
|
ARG PIP_INDEX_URL UV_INDEX_URL
|
||||||
ARG PIP_EXTRA_INDEX_URL UV_EXTRA_INDEX_URL
|
ARG PIP_EXTRA_INDEX_URL UV_EXTRA_INDEX_URL
|
||||||
@ -493,8 +500,13 @@ ARG PIP_EXTRA_INDEX_URL UV_EXTRA_INDEX_URL
|
|||||||
# Reference: https://github.com/astral-sh/uv/pull/1694
|
# Reference: https://github.com/astral-sh/uv/pull/1694
|
||||||
ENV UV_HTTP_TIMEOUT=500
|
ENV UV_HTTP_TIMEOUT=500
|
||||||
|
|
||||||
|
COPY requirements/kv_connectors.txt requirements/kv_connectors.txt
|
||||||
|
|
||||||
# install additional dependencies for openai api server
|
# install additional dependencies for openai api server
|
||||||
RUN --mount=type=cache,target=/root/.cache/uv \
|
RUN --mount=type=cache,target=/root/.cache/uv \
|
||||||
|
if [ "$INSTALL_KV_CONNECTORS" = "true" ]; then \
|
||||||
|
uv pip install --system -r requirements/kv_connectors.txt; \
|
||||||
|
fi; \
|
||||||
if [ "$TARGETPLATFORM" = "linux/arm64" ]; then \
|
if [ "$TARGETPLATFORM" = "linux/arm64" ]; then \
|
||||||
uv pip install --system accelerate hf_transfer 'modelscope!=1.15.0' 'bitsandbytes>=0.42.0' 'timm==0.9.10' boto3 runai-model-streamer runai-model-streamer[s3]; \
|
uv pip install --system accelerate hf_transfer 'modelscope!=1.15.0' 'bitsandbytes>=0.42.0' 'timm==0.9.10' boto3 runai-model-streamer runai-model-streamer[s3]; \
|
||||||
else \
|
else \
|
||||||
|
1
requirements/kv_connectors.txt
Normal file
1
requirements/kv_connectors.txt
Normal file
@ -0,0 +1 @@
|
|||||||
|
lmcache
|
Reference in New Issue
Block a user