mirror of
https://github.com/vllm-project/vllm.git
synced 2025-10-20 14:53:52 +08:00
[Bugfix] Fix packed_factor missing attribute error (#23902)
Signed-off-by: Kyuyeun Kim <kyuyeunk@google.com>
This commit is contained in:
@ -722,8 +722,8 @@ class MergedColumnParallelLinear(ColumnParallelLinear):
|
||||
# If quantized, we need to adjust the offset and size to account
|
||||
# for the packing.
|
||||
if packed_dim == output_dim:
|
||||
shard_size = shard_size // param.pack_factor
|
||||
shard_offset = shard_offset // param.pack_factor
|
||||
shard_size = shard_size // param.packed_factor
|
||||
shard_offset = shard_offset // param.packed_factor
|
||||
# Special case for Marlin.
|
||||
shard_size, shard_offset = adjust_marlin_shard(
|
||||
param, shard_size, shard_offset)
|
||||
@ -756,8 +756,8 @@ class MergedColumnParallelLinear(ColumnParallelLinear):
|
||||
# for the packing.
|
||||
packed_dim = getattr(param, "packed_dim", None)
|
||||
if packed_dim == output_dim:
|
||||
shard_size = shard_size // param.pack_factor
|
||||
shard_offset = shard_offset // param.pack_factor
|
||||
shard_size = shard_size // param.packed_factor
|
||||
shard_offset = shard_offset // param.packed_factor
|
||||
# Special case for Marlin.
|
||||
shard_size, shard_offset = adjust_marlin_shard(
|
||||
param, shard_size, shard_offset)
|
||||
@ -1107,8 +1107,8 @@ class QKVParallelLinear(ColumnParallelLinear):
|
||||
# If quantized, we need to adjust the offset and size to account
|
||||
# for the packing.
|
||||
if packed_dim == output_dim:
|
||||
shard_size = shard_size // param.pack_factor
|
||||
shard_offset = shard_offset // param.pack_factor
|
||||
shard_size = shard_size // param.packed_factor
|
||||
shard_offset = shard_offset // param.packed_factor
|
||||
|
||||
# Special case for Marlin.
|
||||
shard_size, shard_offset = adjust_marlin_shard(
|
||||
@ -1155,8 +1155,8 @@ class QKVParallelLinear(ColumnParallelLinear):
|
||||
# for the packing.
|
||||
packed_dim = getattr(param, "packed_dim", None)
|
||||
if packed_dim == output_dim:
|
||||
shard_size = shard_size // param.pack_factor
|
||||
shard_offset = shard_offset // param.pack_factor
|
||||
shard_size = shard_size // param.packed_factor
|
||||
shard_offset = shard_offset // param.packed_factor
|
||||
|
||||
# Special case for Marlin.
|
||||
shard_size, shard_offset = adjust_marlin_shard(
|
||||
|
Reference in New Issue
Block a user