mirror of
https://github.com/vllm-project/vllm.git
synced 2025-10-20 23:03:52 +08:00
[Log] Debugging Log with more Information (#20770)
Signed-off-by: yewentao256 <zhyanwentao@126.com>
This commit is contained in:
@ -571,34 +571,42 @@ def _valid_cutlass_block_scaled_grouped_gemm(
|
||||
|
||||
_, K, N = w2.size()
|
||||
if not _valid_cutlass_block_scaled_grouped_gemm_shape(N, K):
|
||||
logger.debug(
|
||||
"CutlassBlockScaledGroupedGemm disabled: unalinged problem size.")
|
||||
logger.debug_once(
|
||||
"CutlassBlockScaledGroupedGemm disabled: unaligned problem size. "
|
||||
"N: %s, K: %s",
|
||||
N,
|
||||
K,
|
||||
)
|
||||
return False
|
||||
|
||||
if (w1.dtype != torch.float8_e4m3fn or w2.dtype != torch.float8_e4m3fn):
|
||||
logger.debug(
|
||||
"CutlassBlockScaledGroupedGemm disabled: invalid weight dtype(s).")
|
||||
logger.debug_once(
|
||||
"CutlassBlockScaledGroupedGemm disabled: invalid weight dtype(s). "
|
||||
"w1.dtype: %s, w2.dtype: %s",
|
||||
w1.dtype,
|
||||
w2.dtype,
|
||||
)
|
||||
return False
|
||||
|
||||
if expert_map is not None:
|
||||
logger.debug(
|
||||
logger.debug_once(
|
||||
"CutlassBlockScaledGroupedGemm disabled: expert_parallel is"
|
||||
" not supported.")
|
||||
return False
|
||||
|
||||
if activation != "silu":
|
||||
logger.debug(
|
||||
logger.debug_once(
|
||||
"CutlassBlockScaledGroupedGemm disabled: only activation silu is"
|
||||
" supported.")
|
||||
return False
|
||||
|
||||
if apply_router_weight_on_input:
|
||||
logger.debug("CutlassBlockScaledGroupedGemm disabled:"
|
||||
" apply_router_weight_on_input is not supported.")
|
||||
logger.debug_once("CutlassBlockScaledGroupedGemm disabled:"
|
||||
" apply_router_weight_on_input is not supported.")
|
||||
return False
|
||||
|
||||
if inplace:
|
||||
logger.debug(
|
||||
logger.debug_once(
|
||||
"CutlassBlockScaledGroupedGemm disabled: inplace is not supported."
|
||||
)
|
||||
return False
|
||||
|
@ -50,17 +50,33 @@ def _valid_deep_gemm(hidden_states: torch.Tensor, w1: torch.Tensor,
|
||||
M = hidden_states.size(0)
|
||||
_, K, N = w2.size()
|
||||
if not _valid_deep_gemm_shape(M, N, K):
|
||||
logger.debug("DeepGemm disabled: unaligned problem size.")
|
||||
logger.debug_once(
|
||||
"DeepGemm disabled: unaligned problem size. M: %s, N: %s, K: %s",
|
||||
M,
|
||||
N,
|
||||
K,
|
||||
)
|
||||
return False
|
||||
|
||||
if (w1.dtype != torch.float8_e4m3fn or w2.dtype != torch.float8_e4m3fn):
|
||||
logger.debug("DeepGemm disabled: invalid weight dtype(s).")
|
||||
logger.debug_once(
|
||||
"DeepGemm disabled: invalid weight dtype(s). "
|
||||
"w1.dtype: %s, w2.dtype: %s",
|
||||
w1.dtype,
|
||||
w2.dtype,
|
||||
)
|
||||
return False
|
||||
|
||||
if (not hidden_states.is_contiguous() or not w1.is_contiguous()
|
||||
or not w2.is_contiguous()):
|
||||
logger.debug(
|
||||
"DeepGemm disabled: weights or activations not contiguous.")
|
||||
logger.debug_once(
|
||||
"DeepGemm disabled: weights or activations not contiguous. "
|
||||
"hidden_states.is_contiguous(): %s, w1.is_contiguous(): %s, "
|
||||
"w2.is_contiguous(): %s",
|
||||
hidden_states.is_contiguous(),
|
||||
w1.is_contiguous(),
|
||||
w2.is_contiguous(),
|
||||
)
|
||||
return False
|
||||
|
||||
return True
|
||||
|
Reference in New Issue
Block a user