mirror of
https://github.com/deepspeedai/DeepSpeed.git
synced 2025-10-20 15:33:51 +08:00
Add defence for offload_states and reload_states w/o optimizer (#7211)
When the optimizer is not specified, the optimizer will be type
`DeepSpeedZeRoOffload` instead of `DeepSpeedZeroOptimizer_Stage3` (e.g.
for ZeRO-3 pure inference), while `DeepSpeedZeRoOffload` hasn't
implemented methods `reload_states` and `offload_states`.
56005d2b25/deepspeed/runtime/engine.py (L1684-L1707)
```log
File "deepspeed/runtime/engine.py", line 3904, in offload_states
self.optimizer.offload_states(include=include, device=device, pin_memory=pin_memory, non_blocking=non_blocking)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
AttributeError: 'DeepSpeedZeRoOffload' object has no attribute 'offload_states'
```
In addition, https://github.com/deepspeedai/DeepSpeed/pull/6855 seems to
forget removing the check for `assert not self.zero_offload_param()`, as
suggested by
https://github.com/deepspeedai/DeepSpeed/issues/6833#issuecomment-2537295310,
it returns None when offload_param is not given, and the newly added
assertions have already covered these cases. This PR also removed this
old check.
Signed-off-by: Hollow Man <hollowman@opensuse.org>
This commit is contained in:
@ -3894,7 +3894,9 @@ class DeepSpeedEngine(Module):
|
||||
param_offload_config = self.zero_offload_param()
|
||||
assert param_offload_config is None or param_offload_config.device == OffloadDeviceEnum.none, "Moving states across devices is not supported for offloaded parameters."
|
||||
|
||||
assert not self.zero_offload_param(), "Moving states across devices is not supported for offloaded parameters."
|
||||
assert not isinstance(
|
||||
self.optimizer,
|
||||
DeepSpeedZeRoOffload), "Moving states across devices is not supported without an optimizer."
|
||||
|
||||
if device == OffloadDeviceEnum.none:
|
||||
logger.warning("No device specified for offloading states.")
|
||||
@ -3913,4 +3915,9 @@ class DeepSpeedEngine(Module):
|
||||
"""
|
||||
assert self.zero_optimization_stage(
|
||||
) == ZeroStageEnum.weights, "Moving buffers back is supported only for ZeRO stage 3."
|
||||
|
||||
assert not isinstance(
|
||||
self.optimizer,
|
||||
DeepSpeedZeRoOffload), "Moving states across devices is not supported without an optimizer."
|
||||
|
||||
self.optimizer.reload_states(non_blocking=non_blocking)
|
||||
|
Reference in New Issue
Block a user