[ROCm][Kernel] Using the correct warp_size value

2025-10-20 14:53:52 +08:00 · 2025-02-05 22:15:08 -05:00
parent 75404d041b
commit 5b19b93082
1 changed files with 2 additions and 2 deletions
--- a/csrc/moe/moe_align_sum_kernels.cu
+++ b/csrc/moe/moe_align_sum_kernels.cu
@ -207,8 +207,8 @@ __global__ void sgl_moe_align_block_size_kernel(
  __shared__ int32_t shared_counts[32][8];
  __shared__ int32_t local_offsets[256];

-  const int warp_id = threadIdx.x / WARP_SIZE;
-  const int lane_id = threadIdx.x % WARP_SIZE;
+  const int warp_id = threadIdx.x / 32;
+  const int lane_id = threadIdx.x % 32;
  const int experts_per_warp = 8;
  const int my_expert_start = warp_id * experts_per_warp;