fix some typos (#24616)

Signed-off-by: co63oc <co63oc@users.noreply.github.com>
This commit is contained in:
co63oc
2025-09-12 01:48:46 +08:00
committed by GitHub
parent c1eda615ba
commit e26fef8397
5 changed files with 6 additions and 6 deletions

View File

@ -23,7 +23,7 @@ class TestSetting:
fullgraph: bool
# we cannot afford testing the full Catesian product
# we cannot afford testing the full Cartesian product
# of all models and all levels
@pytest.mark.parametrize(
"test_setting",

View File

@ -345,7 +345,7 @@ def test_mamba_chunk_scan_cont_batch_prefill_chunking(chunk_size, seqlens):
# in the mamba2 ssd kernels, by comparing concatenation (in the sequence
# dimension) of chunked results with the full sequence result.
# It is different from test_mamba_chunk_scan_cont_batch by:
# 1. Not using the naive torch implementaion (ssd_minimal_discrete) to get
# 1. Not using the naive torch implementation (ssd_minimal_discrete) to get
# reference outputs. Instead, it compares chunked kernel outputs to full
# sequence kernel outputs. This is the most straightforward way to
# assert chunked prefill correctness.

View File

@ -179,7 +179,7 @@ def chunk_local_cumsum_vector(
def grid(meta):
return (triton.cdiv(meta['S'], meta['BS']), NT, B * H)
# keep cummulative normalizer in fp32
# keep cumulative normalizer in fp32
# this kernel is equivalent to
# g = g.view(B, H, NT, BT, -1).cumsum(-2).view(B, H, T, -1)
chunk_local_cumsum_vector_kernel[grid](g_org,

View File

@ -1322,7 +1322,7 @@ class MLACommonImpl(MLAAttentionImpl[M], Generic[M]):
k_scale: torch.Tensor,
dcp_world_size: int,
):
assert k_scale is None, "DCP not support sacled kvcache now."
assert k_scale is None, "DCP not support scaled kvcache now."
assert attn_metadata.prefill is not None
prefill_metadata = attn_metadata.prefill
assert prefill_metadata.chunked_context is not None

View File

@ -112,9 +112,9 @@ class BlockTable:
# tokens.
virtual_block_offsets = positions % virtual_block_size
mask = virtual_block_offsets % self.dcp_world_size == self.dcp_rank
# Calcuate local block_offsets
# Calculate local block_offsets
block_offsets = virtual_block_offsets // self.dcp_world_size
# Calcuate slot_mapping
# Calculate slot_mapping
slot_mapping = block_numbers * self.block_size + block_offsets
# Write final slots, use -1 for not-local
self.slot_mapping_np[:req_indices.shape[0]] = np.where(