mirror of
https://github.com/vllm-project/vllm.git
synced 2025-10-20 14:53:52 +08:00
Enabling multi-group kernel tests. (#17115)
Signed-off-by: Alexei V. Ivanov <alexei.ivanov@amd.com>
This commit is contained in:
committed by
GitHub
parent
06ffc7e1d3
commit
608968b7c5
@ -75,37 +75,51 @@ HF_MOUNT="/root/.cache/huggingface"
|
||||
commands=$@
|
||||
echo "Commands:$commands"
|
||||
#ignore certain kernels tests
|
||||
if [[ $commands == *" kernels "* ]]; then
|
||||
if [[ $commands == *" kernels/core"* ]]; then
|
||||
commands="${commands} \
|
||||
--ignore=kernels/test_attention_selector.py \
|
||||
--ignore=kernels/test_blocksparse_attention.py \
|
||||
--ignore=kernels/test_causal_conv1d.py \
|
||||
--ignore=kernels/test_cutlass.py \
|
||||
--ignore=kernels/test_encoder_decoder_attn.py \
|
||||
--ignore=kernels/test_flash_attn.py \
|
||||
--ignore=kernels/test_flashinfer.py \
|
||||
--ignore=kernels/test_int8_quant.py \
|
||||
--ignore=kernels/test_machete_gemm.py \
|
||||
--ignore=kernels/test_mamba_ssm.py \
|
||||
--ignore=kernels/test_marlin_gemm.py \
|
||||
--ignore=kernels/test_moe.py \
|
||||
--ignore=kernels/test_prefix_prefill.py \
|
||||
--ignore=kernels/test_rand.py \
|
||||
--ignore=kernels/test_sampler.py \
|
||||
--ignore=kernels/test_cascade_flash_attn.py \
|
||||
--ignore=kernels/test_mamba_mixer2.py \
|
||||
--ignore=kernels/test_aqlm.py \
|
||||
--ignore=kernels/test_machete_mm.py \
|
||||
--ignore=kernels/test_mha_attn.py \
|
||||
--ignore=kernels/test_block_fp8.py \
|
||||
--ignore=kernels/test_cutlass_moe.py \
|
||||
--ignore=kernels/test_mamba_ssm_ssd.py \
|
||||
--ignore=kernels/test_attention.py \
|
||||
--ignore=kernels/test_block_int8.py \
|
||||
--ignore=kernels/test_fused_quant_layernorm.py \
|
||||
--ignore=kernels/test_int8_kernel.py \
|
||||
--ignore=kernels/test_triton_moe_ptpc_fp8.py \
|
||||
--ignore=kernels/test_permute_cols.py"
|
||||
--ignore=kernels/core/test_fused_quant_layernorm.py \
|
||||
--ignore=kernels/core/test_permute_cols.py"
|
||||
fi
|
||||
|
||||
if [[ $commands == *" kernels/attention"* ]]; then
|
||||
commands="${commands} \
|
||||
--ignore=kernels/attention/stest_attention_selector.py \
|
||||
--ignore=kernels/attention/test_blocksparse_attention.py \
|
||||
--ignore=kernels/attention/test_encoder_decoder_attn.py \
|
||||
--ignore=kernels/attention/test_attention_selector.py \
|
||||
--ignore=kernels/attention/test_flash_attn.py \
|
||||
--ignore=kernels/attention/test_flashinfer.py \
|
||||
--ignore=kernels/attention/test_prefix_prefill.py \
|
||||
--ignore=kernels/attention/test_cascade_flash_attn.py \
|
||||
--ignore=kernels/attention/test_mha_attn.py \
|
||||
--ignore=kernels/attention/test_lightning_attn.py \
|
||||
--ignore=kernels/attention/test_attention.py"
|
||||
fi
|
||||
|
||||
if [[ $commands == *" kernels/quantization"* ]]; then
|
||||
commands="${commands} \
|
||||
--ignore=kernels/quantization/test_int8_quant.py \
|
||||
--ignore=kernels/quantization/test_aqlm.py \
|
||||
--ignore=kernels/quantization/test_machete_mm.py \
|
||||
--ignore=kernels/quantization/test_block_fp8.py \
|
||||
--ignore=kernels/quantization/test_block_int8.py \
|
||||
--ignore=kernels/quantization/test_marlin_gemm.py \
|
||||
--ignore=kernels/quantization/test_cutlass_scaled_mm.py \
|
||||
--ignore=kernels/quantization/test_int8_kernel.py"
|
||||
fi
|
||||
|
||||
if [[ $commands == *" kernels/mamba"* ]]; then
|
||||
commands="${commands} \
|
||||
--ignore=kernels/mamba/test_mamba_mixer2.py \
|
||||
--ignore=kernels/mamba/test_causal_conv1d.py \
|
||||
--ignore=kernels/mamba/test_mamba_ssm_ssd.py"
|
||||
fi
|
||||
|
||||
if [[ $commands == *" kernels/moe"* ]]; then
|
||||
commands="${commands} \
|
||||
--ignore=kernels/moe/test_moe.py \
|
||||
--ignore=kernels/moe/test_cutlass_moe.py \
|
||||
--ignore=kernels/moe/test_triton_moe_ptpc_fp8.py"
|
||||
fi
|
||||
|
||||
#ignore certain Entrypoints/openai tests
|
||||
|
@ -319,6 +319,7 @@ steps:
|
||||
- pytest -v -s compile/test_full_graph.py
|
||||
|
||||
- label: Kernels Core Operation Test
|
||||
mirror_hardwares: [amd]
|
||||
source_file_dependencies:
|
||||
- csrc/
|
||||
- tests/kernels/core
|
||||
@ -326,6 +327,7 @@ steps:
|
||||
- pytest -v -s kernels/core
|
||||
|
||||
- label: Kernels Attention Test %N
|
||||
mirror_hardwares: [amd]
|
||||
source_file_dependencies:
|
||||
- csrc/attention/
|
||||
- vllm/attention
|
||||
@ -336,6 +338,7 @@ steps:
|
||||
parallelism: 2
|
||||
|
||||
- label: Kernels Quantization Test %N
|
||||
mirror_hardwares: [amd]
|
||||
source_file_dependencies:
|
||||
- csrc/quantization/
|
||||
- vllm/model_executor/layers/quantization
|
||||
@ -345,6 +348,7 @@ steps:
|
||||
parallelism: 2
|
||||
|
||||
- label: Kernels MoE Test
|
||||
#mirror_hardwares: [amd]
|
||||
source_file_dependencies:
|
||||
- csrc/moe/
|
||||
- tests/kernels/moe
|
||||
@ -353,6 +357,7 @@ steps:
|
||||
- pytest -v -s kernels/moe
|
||||
|
||||
- label: Kernels Mamba Test
|
||||
#mirror_hardwares: [amd]
|
||||
source_file_dependencies:
|
||||
- csrc/mamba/
|
||||
- tests/kernels/mamba
|
||||
|
Reference in New Issue
Block a user