Compare commits

...

4 Commits

Author SHA1 Message Date
8d0a3eeaf7 Release: v0.13.2 2022-10-17 11:09:16 -04:00
2a810a0ebd [Device map] nn.Parameter don't have children (#747)
* [Device map] nn.Parameter don't have children

* Update src/accelerate/utils/modeling.py

Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com>

Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com>
2022-10-17 11:07:54 -04:00
0f3828a4a0 v0.13.1: Release 2022-10-07 12:28:35 -04:00
2ef7973baf Fix num_processes is not defined (#746)
* Fix num_processes is not defined

* Also reorganize questions

Co-authored-by: Sylvain Gugger <Sylvain.gugger@gmail.com>
2022-10-07 12:27:49 -04:00
4 changed files with 10 additions and 9 deletions

View File

@ -32,7 +32,7 @@ extras["sagemaker"] = [
setup(
name="accelerate",
version="0.13.0",
version="0.13.2",
description="Accelerate",
long_description=open("README.md", "r", encoding="utf-8").read(),
long_description_content_type="text/markdown",

View File

@ -2,7 +2,7 @@
# There's no way to ignore "F401 '...' imported but unused" warnings in this
# module, but to preserve other warnings. So, don't check this module at all.
__version__ = "0.13.0"
__version__ = "0.13.2"
from .accelerator import Accelerator
from .big_modeling import cpu_offload, disk_offload, dispatch_model, init_empty_weights, load_checkpoint_and_dispatch

View File

@ -35,6 +35,7 @@ def get_cluster_input():
machine_rank = 0
num_machines = 1
num_processes = 1
gpu_ids = None
main_process_ip = None
main_process_port = None
@ -294,12 +295,6 @@ def get_cluster_input():
default=1,
error_message="Please enter an integer.",
)
if distributed_type in [DistributedType.MULTI_GPU, DistributedType.NO] and not use_cpu:
gpu_ids = _ask_field(
"What GPU(s) (by id) should be used for training on this machine as a comma-seperated list? [all]:",
default="all",
)
elif distributed_type in [DistributedType.FSDP, DistributedType.DEEPSPEED]:
num_processes = _ask_field(
"How many GPU(s) should be used for distributed training? [1]:",
@ -310,6 +305,12 @@ def get_cluster_input():
else:
num_processes = 1
if distributed_type in [DistributedType.MULTI_GPU, DistributedType.NO] and not use_cpu:
gpu_ids = _ask_field(
"What GPU(s) (by id) should be used for training on this machine as a comma-seperated list? [all]:",
default="all",
)
if distributed_type != DistributedType.TPU:
if distributed_type == DistributedType.DEEPSPEED and use_deepspeed_config:
mixed_precision = "no"

View File

@ -258,7 +258,7 @@ def get_max_layer_size(
modules_to_treat = modules.copy()
while len(modules_to_treat) > 0:
module_name, module = modules_to_treat.pop(0)
modules_children = list(module.named_children())
modules_children = list(module.named_children()) if isinstance(module, torch.nn.Module) else []
if len(modules_children) == 0 or module.__class__.__name__ in no_split_module_classes:
# No splitting this one so we compare to the max_size
size = module_sizes[module_name]