mirror of
https://github.com/vllm-project/vllm.git
synced 2025-10-20 14:53:52 +08:00
[Misc] Add dummy maverick test to CI (#21324)
Signed-off-by: Ming Yang <minos.future@gmail.com> Co-authored-by: Cyrus Leung <cyrus.tl.leung@gmail.com>
This commit is contained in:
@ -718,6 +718,7 @@ steps:
|
||||
- VLLM_USE_V1=0 CUDA_VISIBLE_DEVICES=0,1 pytest -v -s test_sharded_state_loader.py
|
||||
- VLLM_USE_V1=0 CUDA_VISIBLE_DEVICES=0,1 pytest -v -s kv_transfer/test_disagg.py
|
||||
- CUDA_VISIBLE_DEVICES=0,1 pytest -v -s v1/shutdown
|
||||
- pytest -v -s models/multimodal/generation/test_maverick.py
|
||||
|
||||
- label: Plugin Tests (2 GPUs) # 40min
|
||||
mirror_hardwares: [amdexperimental]
|
||||
|
@ -23,6 +23,8 @@ from transformers import (AutoConfig, AutoProcessor, AutoTokenizer,
|
||||
|
||||
from vllm import LLM, SamplingParams
|
||||
|
||||
from ....utils import multi_gpu_test
|
||||
|
||||
# Sample prompts for testing
|
||||
PROMPTS: list[str] = [
|
||||
"Hello, my name is",
|
||||
@ -541,6 +543,7 @@ def run_reduced_model(model_path: str,
|
||||
print("-" * 40)
|
||||
|
||||
|
||||
@multi_gpu_test(num_gpus=2)
|
||||
@pytest.mark.parametrize(
|
||||
"original_model_name,text_layers,num_experts,vision_layers,",
|
||||
[("meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8", 4, 4, 2)])
|
||||
|
Reference in New Issue
Block a user