[KERNEL] Sampler. CUDA kernel for applying repetition penalty (#18437)

This commit is contained in:
Vadim Gimpelson
2025-06-04 08:13:01 +04:00
committed by GitHub
parent 1409ef9134
commit 5d6d1adf15
7 changed files with 218 additions and 9 deletions

View File

@ -242,6 +242,7 @@ set(VLLM_EXT_SRC
"csrc/activation_kernels.cu"
"csrc/layernorm_kernels.cu"
"csrc/layernorm_quant_kernels.cu"
"csrc/sampler.cu"
"csrc/cuda_view.cu"
"csrc/quantization/gptq/q_gemm.cu"
"csrc/quantization/compressed_tensors/int8_quant_kernels.cu"