mirror of
https://github.com/vllm-project/vllm.git
synced 2025-10-20 14:53:52 +08:00
[CI] Disable flash_attn backend for spec decode (#5286)
This commit is contained in:
@ -45,7 +45,7 @@ steps:
|
||||
- TEST_DIST_MODEL=meta-llama/Llama-2-7b-hf DISTRIBUTED_EXECUTOR_BACKEND=mp pytest -v -s distributed/test_basic_distributed_correctness.py
|
||||
- TEST_DIST_MODEL=facebook/opt-125m DISTRIBUTED_EXECUTOR_BACKEND=mp pytest -v -s distributed/test_chunked_prefill_distributed.py
|
||||
- TEST_DIST_MODEL=meta-llama/Llama-2-7b-hf DISTRIBUTED_EXECUTOR_BACKEND=mp pytest -v -s distributed/test_chunked_prefill_distributed.py
|
||||
- pytest -v -s spec_decode/e2e/test_integration_dist.py
|
||||
- pytest -v -s spec_decode/e2e/test_integration_dist.py
|
||||
|
||||
- label: Distributed Tests (Multiple Groups)
|
||||
#mirror_hardwares: [amd]
|
||||
@ -124,7 +124,10 @@ steps:
|
||||
|
||||
- label: Speculative decoding tests
|
||||
#mirror_hardwares: [amd]
|
||||
command: pytest -v -s spec_decode
|
||||
commands:
|
||||
# See https://github.com/vllm-project/vllm/issues/5152
|
||||
- export VLLM_ATTENTION_BACKEND=XFORMERS
|
||||
- pytest -v -s spec_decode
|
||||
|
||||
- label: LoRA Test %N
|
||||
#mirror_hardwares: [amd]
|
||||
|
Reference in New Issue
Block a user