Update PyTorch to 2.8.0 (#20358)

Signed-off-by: Huy Do <huydhn@gmail.com>
Co-authored-by: Michael Goin <mgoin64@gmail.com>
This commit is contained in:
Huy Do
2025-08-29 03:57:35 -07:00
committed by GitHub
parent 69f46359dd
commit 67c14906aa
12 changed files with 50 additions and 45 deletions

View File

@ -462,8 +462,8 @@ steps:
- tests/quantization
commands:
# temporary install here since we need nightly, will move to requirements/test.in
# after torchao 0.12 release
- pip install --pre torchao --index-url https://download.pytorch.org/whl/nightly/cu126
# after torchao 0.12 release, and pin a working version of torchao nightly here
- pip install --pre torchao==0.13.0.dev20250814 --index-url https://download.pytorch.org/whl/nightly/cu128
- VLLM_TEST_FORCE_LOAD_FORMAT=auto pytest -v -s quantization
- label: LM Eval Small Models # 53min

View File

@ -45,8 +45,8 @@ set(HIP_SUPPORTED_ARCHS "gfx906;gfx908;gfx90a;gfx942;gfx950;gfx1030;gfx1100;gfx1
# requirements.txt files and should be kept consistent. The ROCm torch
# versions are derived from docker/Dockerfile.rocm
#
set(TORCH_SUPPORTED_VERSION_CUDA "2.7.1")
set(TORCH_SUPPORTED_VERSION_ROCM "2.7.0")
set(TORCH_SUPPORTED_VERSION_CUDA "2.8.0")
set(TORCH_SUPPORTED_VERSION_ROCM "2.8.0")
#
# Try to find python package with an executable that exactly matches

View File

@ -6,7 +6,7 @@ requires = [
"packaging>=24.2",
"setuptools>=77.0.3,<80.0.0",
"setuptools-scm>=8.0",
"torch == 2.7.1",
"torch == 2.8.0",
"wheel",
"jinja2",
]

View File

@ -4,7 +4,8 @@ ninja
packaging>=24.2
setuptools>=77.0.3,<80.0.0
setuptools-scm>=8
torch==2.7.1
torch==2.8.0
wheel
jinja2>=3.1.6
regex
build

View File

@ -9,17 +9,16 @@ packaging>=24.2
setuptools>=77.0.3,<80.0.0
--extra-index-url https://download.pytorch.org/whl/cpu
torch==2.6.0+cpu; platform_machine == "x86_64" # torch>2.6.0+cpu has performance regression on x86 platform, see https://github.com/pytorch/pytorch/pull/151218
torch==2.7.0; platform_system == "Darwin"
torch==2.7.0; platform_machine == "ppc64le"
torch==2.6.0; platform_machine == "aarch64" # for arm64 CPUs, torch 2.7.0 has a issue: https://github.com/vllm-project/vllm/issues/17960
torch==2.8.0; platform_system == "Darwin"
torch==2.8.0; platform_machine == "ppc64le" or platform_machine == "aarch64"
# required for the image processor of minicpm-o-2_6, this must be updated alongside torch
torchaudio; platform_machine != "ppc64le" and platform_machine != "s390x"
torchaudio==2.7.0; platform_machine == "ppc64le"
torchaudio==2.8.0; platform_machine == "ppc64le"
# required for the image processor of phi3v, this must be updated alongside torch
torchvision; platform_machine != "ppc64le" and platform_machine != "s390x"
torchvision==0.22.0; platform_machine == "ppc64le"
torchvision==0.23.0; platform_machine == "ppc64le"
datasets # for benchmark scripts
# Intel Extension for PyTorch, only for x86_64 CPUs

View File

@ -6,9 +6,9 @@ numba == 0.61.2; python_version > '3.9'
# Dependencies for NVIDIA GPUs
ray[cgraph]>=2.48.0 # Ray Compiled Graph, required for pipeline parallelism in V1.
torch==2.7.1
torchaudio==2.7.1
torch==2.8.0
torchaudio==2.8.0
# These must be updated alongside torch
torchvision==0.22.1 # Required for phi3v processor. See https://github.com/pytorch/vision?tab=readme-ov-file#installation for corresponding version
# https://github.com/facebookresearch/xformers/releases/tag/v0.0.31
xformers==0.0.31; platform_system == 'Linux' and platform_machine == 'x86_64' # Requires PyTorch >= 2.7
torchvision==0.23.0 # Required for phi3v processor. See https://github.com/pytorch/vision?tab=readme-ov-file#installation for corresponding version
# https://github.com/facebookresearch/xformers/releases/tag/v0.0.32.post1
xformers==0.0.32.post1; platform_system == 'Linux' and platform_machine == 'x86_64' # Requires PyTorch >= 2.8

View File

@ -1,10 +1,10 @@
# Common dependencies
-r common.txt
--extra-index-url https://download.pytorch.org/whl/rocm6.2.4
torch==2.7.0
torchvision==0.22.0
torchaudio==2.7.0
--extra-index-url https://download.pytorch.org/whl/rocm6.3
torch==2.8.0
torchvision==0.23.0
torchaudio==2.8.0
triton==3.3.0
cmake>=3.26.1,<4

View File

@ -22,9 +22,9 @@ sentence-transformers # required for embedding tests
soundfile # required for audio tests
jiwer # required for audio tests
timm >=1.0.17 # required for internvl and gemma3n-mm test
torch==2.7.1
torchaudio==2.7.1
torchvision==0.22.1
torch==2.8.0
torchaudio==2.8.0
torchvision==0.23.0
transformers_stream_generator # required for qwen-vl test
matplotlib # required for qwen-vl test
mistral_common[image,audio] >= 1.8.2 # required for voxtral test

View File

@ -541,42 +541,42 @@ numpy==1.26.4
# tritonclient
# vocos
# xarray
nvidia-cublas-cu12==12.8.3.14
nvidia-cublas-cu12==12.8.4.1
# via
# nvidia-cudnn-cu12
# nvidia-cusolver-cu12
# torch
nvidia-cuda-cupti-cu12==12.8.57
nvidia-cuda-cupti-cu12==12.8.90
# via torch
nvidia-cuda-nvrtc-cu12==12.8.61
nvidia-cuda-nvrtc-cu12==12.8.93
# via torch
nvidia-cuda-runtime-cu12==12.8.57
nvidia-cuda-runtime-cu12==12.8.90
# via torch
nvidia-cudnn-cu12==9.7.1.26
nvidia-cudnn-cu12==9.10.2.21
# via torch
nvidia-cufft-cu12==11.3.3.41
nvidia-cufft-cu12==11.3.3.83
# via torch
nvidia-cufile-cu12==1.13.0.11
nvidia-cufile-cu12==1.13.1.3
# via torch
nvidia-curand-cu12==10.3.9.55
nvidia-curand-cu12==10.3.9.90
# via torch
nvidia-cusolver-cu12==11.7.2.55
nvidia-cusolver-cu12==11.7.3.90
# via torch
nvidia-cusparse-cu12==12.5.7.53
nvidia-cusparse-cu12==12.5.8.93
# via
# nvidia-cusolver-cu12
# torch
nvidia-cusparselt-cu12==0.6.3
nvidia-cusparselt-cu12==0.7.1
# via torch
nvidia-nccl-cu12==2.26.2
nvidia-nccl-cu12==2.27.3
# via torch
nvidia-nvjitlink-cu12==12.8.61
nvidia-nvjitlink-cu12==12.8.93
# via
# nvidia-cufft-cu12
# nvidia-cusolver-cu12
# nvidia-cusparse-cu12
# torch
nvidia-nvtx-cu12==12.8.55
nvidia-nvtx-cu12==12.8.90
# via torch
omegaconf==2.3.0
# via
@ -1069,7 +1069,7 @@ tomli==2.2.1
# via schemathesis
tomli-w==1.2.0
# via schemathesis
torch==2.7.1+cu128
torch==2.8.0+cu128
# via
# -r requirements/test.in
# accelerate
@ -1098,7 +1098,7 @@ torch==2.7.1+cu128
# torchvision
# vector-quantize-pytorch
# vocos
torchaudio==2.7.1+cu128
torchaudio==2.8.0+cu128
# via
# -r requirements/test.in
# encodec
@ -1111,7 +1111,7 @@ torchmetrics==1.7.4
# pytorch-lightning
# terratorch
# torchgeo
torchvision==0.22.1+cu128
torchvision==0.23.0+cu128
# via
# -r requirements/test.in
# lightly
@ -1152,7 +1152,7 @@ transformers==4.55.2
# transformers-stream-generator
transformers-stream-generator==0.0.5
# via -r requirements/test.in
triton==3.3.1
triton==3.4.0
# via torch
tritonclient==2.51.0
# via

View File

@ -292,7 +292,7 @@ SP_TEST_MODELS = [
# TODO support other models
# [LANGUAGE GENERATION]
"meta-llama/Llama-3.2-1B-Instruct",
"RedHatAI/Meta-Llama-3.1-8B-Instruct-FP8"
"RedHatAI/Meta-Llama-3.1-8B-Instruct-FP8",
]

View File

@ -87,6 +87,9 @@ def test_chatglm3_lora_tp4(chatglm3_lora_files):
@multi_gpu_test(num_gpus=4)
@create_new_process_for_each_test()
def test_chatglm3_lora_tp4_fully_sharded_loras(chatglm3_lora_files):
# https://github.com/NVIDIA/nccl/issues/1790, set a lower value for
# gpu_memory_utilization here because NCCL >= 2.26.3 seems to use
# more GPU memory causing vLLM to OOM
llm = vllm.LLM(MODEL_PATH,
max_model_len=1024,
enable_lora=True,
@ -95,7 +98,8 @@ def test_chatglm3_lora_tp4_fully_sharded_loras(chatglm3_lora_files):
tensor_parallel_size=4,
trust_remote_code=True,
fully_sharded_loras=True,
enable_chunked_prefill=True)
enable_chunked_prefill=True,
gpu_memory_utilization=0.85)
output1 = do_sample(llm, chatglm3_lora_files, lora_id=1)
for i in range(len(EXPECTED_LORA_OUTPUT)):
assert output1[i] == EXPECTED_LORA_OUTPUT[i]

View File

@ -789,6 +789,7 @@ def get_kernel_options(query, block_m, block_n,
device_props = torch.cuda.get_device_properties()
max_shared_memory = device_props.shared_memory_per_block_optin
if max_shared_memory < 144 * 1024:
kernel_options["BLOCK_M"] = 32
kernel_options["BLOCK_N"] = 32
kernel_options["BLOCK_M"] = kernel_options["BLOCK_M"] // 2
kernel_options["BLOCK_N"] = kernel_options["BLOCK_N"] // 2
return kernel_options