diff --git a/vllm/model_executor/layers/fused_moe/cutlass_moe.py b/vllm/model_executor/layers/fused_moe/cutlass_moe.py index 978c532236..a1f87ba92a 100644 --- a/vllm/model_executor/layers/fused_moe/cutlass_moe.py +++ b/vllm/model_executor/layers/fused_moe/cutlass_moe.py @@ -571,34 +571,42 @@ def _valid_cutlass_block_scaled_grouped_gemm( _, K, N = w2.size() if not _valid_cutlass_block_scaled_grouped_gemm_shape(N, K): - logger.debug( - "CutlassBlockScaledGroupedGemm disabled: unalinged problem size.") + logger.debug_once( + "CutlassBlockScaledGroupedGemm disabled: unaligned problem size. " + "N: %s, K: %s", + N, + K, + ) return False if (w1.dtype != torch.float8_e4m3fn or w2.dtype != torch.float8_e4m3fn): - logger.debug( - "CutlassBlockScaledGroupedGemm disabled: invalid weight dtype(s).") + logger.debug_once( + "CutlassBlockScaledGroupedGemm disabled: invalid weight dtype(s). " + "w1.dtype: %s, w2.dtype: %s", + w1.dtype, + w2.dtype, + ) return False if expert_map is not None: - logger.debug( + logger.debug_once( "CutlassBlockScaledGroupedGemm disabled: expert_parallel is" " not supported.") return False if activation != "silu": - logger.debug( + logger.debug_once( "CutlassBlockScaledGroupedGemm disabled: only activation silu is" " supported.") return False if apply_router_weight_on_input: - logger.debug("CutlassBlockScaledGroupedGemm disabled:" - " apply_router_weight_on_input is not supported.") + logger.debug_once("CutlassBlockScaledGroupedGemm disabled:" + " apply_router_weight_on_input is not supported.") return False if inplace: - logger.debug( + logger.debug_once( "CutlassBlockScaledGroupedGemm disabled: inplace is not supported." ) return False diff --git a/vllm/model_executor/layers/fused_moe/deep_gemm_moe.py b/vllm/model_executor/layers/fused_moe/deep_gemm_moe.py index bb462938a3..f0c4ca5e52 100644 --- a/vllm/model_executor/layers/fused_moe/deep_gemm_moe.py +++ b/vllm/model_executor/layers/fused_moe/deep_gemm_moe.py @@ -50,17 +50,33 @@ def _valid_deep_gemm(hidden_states: torch.Tensor, w1: torch.Tensor, M = hidden_states.size(0) _, K, N = w2.size() if not _valid_deep_gemm_shape(M, N, K): - logger.debug("DeepGemm disabled: unaligned problem size.") + logger.debug_once( + "DeepGemm disabled: unaligned problem size. M: %s, N: %s, K: %s", + M, + N, + K, + ) return False if (w1.dtype != torch.float8_e4m3fn or w2.dtype != torch.float8_e4m3fn): - logger.debug("DeepGemm disabled: invalid weight dtype(s).") + logger.debug_once( + "DeepGemm disabled: invalid weight dtype(s). " + "w1.dtype: %s, w2.dtype: %s", + w1.dtype, + w2.dtype, + ) return False if (not hidden_states.is_contiguous() or not w1.is_contiguous() or not w2.is_contiguous()): - logger.debug( - "DeepGemm disabled: weights or activations not contiguous.") + logger.debug_once( + "DeepGemm disabled: weights or activations not contiguous. " + "hidden_states.is_contiguous(): %s, w1.is_contiguous(): %s, " + "w2.is_contiguous(): %s", + hidden_states.is_contiguous(), + w1.is_contiguous(), + w2.is_contiguous(), + ) return False return True