mirror of
https://github.com/vllm-project/vllm.git
synced 2025-10-20 14:53:52 +08:00
[Bugfix] fix an illegal memory access was encountered
of marlin kernel + act_order (#18245)
This commit is contained in:
@ -1767,17 +1767,20 @@ __global__ void Marlin(
|
||||
|
||||
if constexpr (has_act_order) {
|
||||
slice_k_start += tb_k * stages;
|
||||
slice_k_start_shared_fetch += tb_k * stages;
|
||||
int first_group_id = g_idx[slice_k_start];
|
||||
int last_g_idx = slice_k_start + stages * tb_k * 2;
|
||||
if (last_g_idx >= prob_k) {
|
||||
last_g_idx = prob_k - 1;
|
||||
}
|
||||
int last_group_id = g_idx[last_g_idx];
|
||||
if (last_group_id >= sh_first_group_id + sh_num_groups) {
|
||||
fetch_act_order_scales_to_shared(false, first_group_id,
|
||||
last_group_id);
|
||||
__syncthreads();
|
||||
|
||||
if (slice_k_start < prob_k) {
|
||||
slice_k_start_shared_fetch += tb_k * stages;
|
||||
int first_group_id = g_idx[slice_k_start];
|
||||
int last_g_idx = slice_k_start + stages * tb_k * 2;
|
||||
if (last_g_idx >= prob_k) {
|
||||
last_g_idx = prob_k - 1;
|
||||
}
|
||||
int last_group_id = g_idx[last_g_idx];
|
||||
if (last_group_id >= sh_first_group_id + sh_num_groups) {
|
||||
fetch_act_order_scales_to_shared(false, first_group_id,
|
||||
last_group_id);
|
||||
__syncthreads();
|
||||
}
|
||||
}
|
||||
}
|
||||
if (slice_iters == 0) {
|
||||
|
@ -1588,16 +1588,20 @@ __global__ void Marlin(
|
||||
|
||||
if constexpr (has_act_order) {
|
||||
slice_k_start += tb_k * stages;
|
||||
slice_k_start_shared_fetch += tb_k * stages;
|
||||
int first_group_id = g_idx[slice_k_start];
|
||||
int last_g_idx = slice_k_start + stages * tb_k * 2;
|
||||
if (last_g_idx >= prob_k) {
|
||||
last_g_idx = prob_k - 1;
|
||||
}
|
||||
int last_group_id = g_idx[last_g_idx];
|
||||
if (last_group_id >= sh_first_group_id + sh_num_groups) {
|
||||
fetch_act_order_scales_to_shared(false, first_group_id, last_group_id);
|
||||
__syncthreads();
|
||||
|
||||
if (slice_k_start < prob_k) {
|
||||
slice_k_start_shared_fetch += tb_k * stages;
|
||||
int first_group_id = g_idx[slice_k_start];
|
||||
int last_g_idx = slice_k_start + stages * tb_k * 2;
|
||||
if (last_g_idx >= prob_k) {
|
||||
last_g_idx = prob_k - 1;
|
||||
}
|
||||
int last_group_id = g_idx[last_g_idx];
|
||||
if (last_group_id >= sh_first_group_id + sh_num_groups) {
|
||||
fetch_act_order_scales_to_shared(false, first_group_id,
|
||||
last_group_id);
|
||||
__syncthreads();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -2,7 +2,7 @@ gptq_marlin, robertgshaw2/zephyr-7b-beta-channelwise-gptq, main
|
||||
gptq_marlin, TheBloke/Llama-2-7B-GPTQ, main
|
||||
gptq_marlin, TheBloke/TinyLlama-1.1B-Chat-v1.0-GPTQ, main
|
||||
gptq_marlin, TheBloke/TinyLlama-1.1B-Chat-v1.0-GPTQ, gptq-8bit--1g-actorder_True
|
||||
#gptq_marlin, TheBloke/TinyLlama-1.1B-Chat-v1.0-GPTQ, gptq-8bit-32g-actorder_True
|
||||
gptq_marlin, TheBloke/TinyLlama-1.1B-Chat-v1.0-GPTQ, gptq-8bit-32g-actorder_True
|
||||
gptq_marlin, TechxGenus/gemma-1.1-2b-it-GPTQ, main
|
||||
gptq, robertgshaw2/zephyr-7b-beta-channelwise-gptq, main
|
||||
gptq, TheBloke/Llama-2-7B-GPTQ, main
|
||||
|
Reference in New Issue
Block a user