[Bugfix] Fix illegal memory access in FP8 MoE kernel (#6382)

This commit is contained in:
Cody Yu
2024-07-12 14:33:33 -07:00
committed by GitHub
parent 21b2dcedab
commit 75f64d8b94

View File

@ -492,12 +492,14 @@ def fused_experts(hidden_states: torch.Tensor,
if tokens_in_chunk == 0:
break
if tokens_in_chunk < CHUNK_SIZE:
# will only happen in the last chunk
if tokens_in_chunk < CHUNK_SIZE and chunk > 0:
# Adjust the intermediate cache size and config for the last
# chunk. Note that in most cases we only have one chunk
# so the cache size and config are already set correctly and
# do not need to be adjusted.
intermediate_cache1 = intermediate_cache1[:tokens_in_chunk]
intermediate_cache2 = intermediate_cache2[:tokens_in_chunk]
intermediate_cache3 = intermediate_cache3[:tokens_in_chunk]
# reload config to get better performance on the last chunk
config = get_config_func(tokens_in_chunk)
curr_topk_ids = topk_ids[begin_chunk_idx:end_chunk_idx]