compilation is fixed

This commit is contained in:
Alexander Matveev
2025-02-06 20:49:29 +00:00
parent 5fb9dbe6f6
commit 70b4e46e70
3 changed files with 42 additions and 13 deletions

View File

@ -5,7 +5,7 @@ requests >= 2.26.0
tqdm
blake3
py-cpuinfo
transformers >= 4.45.2 # Required for Llama 3.2 and Qwen2-VL.
transformers >= 4.48.2 # Required for Bamba model and Transformers backend.
tokenizers >= 0.19.1 # Required for Llama 3.
protobuf # Required by LlamaTokenizer.
fastapi >= 0.107.0, < 0.113.0; python_version < '3.9'
@ -34,6 +34,6 @@ pyyaml
six>=1.16.0; python_version > '3.11' # transitive dependency of pandas that needs to be the latest version for python 3.12
setuptools>=74.1.1; python_version > '3.11' # Setuptools is used by triton, we need to ensure a modern version is installed for 3.12+ so that it does not try to import distutils, which was removed in 3.12
einops # Required for Qwen2-VL.
compressed-tensors == 0.8.1 # required for compressed-tensors
compressed-tensors == 0.9.1 # required for compressed-tensors
depyf==0.18.0 # required for profiling and debugging with compilation config
cloudpickle # allows pickling lambda functions in model_executor/models/registry.py
cloudpickle # allows pickling lambda functions in model_executor/models/registry.py

View File

@ -2,7 +2,7 @@
# This file is autogenerated by pip-compile with Python 3.12
# by the following command:
#
# python3.12 -m piptools compile requirements-test.in -o requirements-test.txt
# python3.12 -m piptools compile requirements-test.in -o requirements-test.txt
#
absl-py==2.1.0
# via rouge-score
@ -106,9 +106,17 @@ dnspython==2.7.0
docutils==0.16
# via awscli
einops==0.8.0
# via -r requirements-test.in
# via
# -r requirements-test.in
# encodec
# vector-quantize-pytorch
# vocos
einx==0.3.0
# via vector-quantize-pytorch
email-validator==2.2.0
# via pydantic
encodec==0.1.1
# via vocos
evaluate==0.4.3
# via lm-eval
fastparquet==2024.11.0
@ -125,6 +133,8 @@ filelock==3.16.1
# triton
fonttools==4.54.1
# via matplotlib
frozendict==2.4.6
# via einx
frozenlist==1.5.0
# via
# aiohttp
@ -159,6 +169,7 @@ huggingface-hub==0.26.2
# timm
# tokenizers
# transformers
# vocos
idna==3.10
# via
# anyio
@ -261,6 +272,8 @@ numpy==1.26.4
# cupy-cuda12x
# datasets
# decord
# einx
# encodec
# evaluate
# fastparquet
# genai-perf
@ -283,6 +296,7 @@ numpy==1.26.4
# torchvision
# transformers
# tritonclient
# vocos
nvidia-cublas-cu12==12.4.5.8
# via
# nvidia-cudnn-cu12
@ -455,6 +469,7 @@ pyyaml==6.0.2
# responses
# timm
# transformers
# vocos
ray[adag]==2.40.0
# via -r requirements-test.in
redis==5.2.0
@ -517,6 +532,7 @@ scipy==1.13.1
# scikit-learn
# sentence-transformers
# statsmodels
# vocos
sentence-transformers==3.2.1
# via -r requirements-test.in
sentencepiece==0.2.0
@ -540,7 +556,9 @@ sqlitedict==2.1.0
statsmodels==0.14.4
# via genai-perf
sympy==1.13.1
# via torch
# via
# einx
# torch
tabledata==1.3.3
# via pytablewriter
tabulate==0.9.0
@ -568,12 +586,21 @@ torch==2.5.1
# -r requirements-test.in
# accelerate
# bitsandbytes
# encodec
# lm-eval
# peft
# sentence-transformers
# tensorizer
# timm
# torchaudio
# torchvision
# vector-quantize-pytorch
# vocos
torchaudio==2.5.1
# via
# -r requirements-test.in
# encodec
# vocos
torchvision==0.20.1
# via timm
tqdm==4.66.6
@ -584,13 +611,15 @@ tqdm==4.66.6
# lm-eval
# nltk
# peft
# pqdm
# sentence-transformers
# tqdm-multiprocess
# transformers
tqdm-multiprocess==0.0.11
# via lm-eval
transformers==4.47.0
transformers==4.48.2
# via
# -r requirements-test.in
# genai-perf
# lm-eval
# peft
@ -615,6 +644,7 @@ typing-extensions==4.12.2
# huggingface-hub
# librosa
# mistral-common
# pqdm
# pydantic
# pydantic-core
# torch
@ -626,6 +656,10 @@ urllib3==2.2.3
# requests
# responses
# tritonclient
vector-quantize-pytorch==1.21.2
# via -r requirements-test.in
vocos==0.1.0
# via -r requirements-test.in
word2number==1.1
# via lm-eval
xxhash==3.5.0
@ -638,4 +672,4 @@ zstandard==0.23.0
# via lm-eval
# The following packages are considered to be unsafe in a requirements file:
# setuptools
# setuptools

View File

@ -766,11 +766,6 @@ class TPUModelRunner(ModelRunnerBase):
logger.info(" -- Compilation for decode done in %.2f [secs].",
end - start)
def _initialize_kv_cache(self):
kv_cache_spec = self.get_kv_cache_spec()
kv_cache_config = get_kv_cache_config(vllm_config, kv_cache_spec,
availble_gpu_memory)
def initialize_kv_cache(self, kv_cache_config: KVCacheConfig) -> None:
"""
Initialize KV cache based on `kv_cache_config`.