diff --git a/.gitignore b/.gitignore index da99824a..ff24394d 100644 --- a/.gitignore +++ b/.gitignore @@ -138,4 +138,7 @@ dmypy.json .DS_Store # More test things -wandb \ No newline at end of file +wandb + +# ruff +.ruff_cache diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index d0e142b1..d3edd1c4 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -152,7 +152,7 @@ Follow these steps to start contributing: $ make test ``` - `accelerate` relies on `black` and `isort` to format its source code + `accelerate` relies on `black` and `ruff` to format its source code consistently. After you make changes, apply automatic style corrections and code verifications that can't be automated in one go with: @@ -165,7 +165,7 @@ Follow these steps to start contributing: $ make style ``` - `accelerate` also uses `flake8` and a few custom scripts to check for coding mistakes. Quality + `accelerate` also uses a few custom scripts to check for coding mistakes. Quality control runs in CI, however you can also run the same checks with: ```bash diff --git a/Makefile b/Makefile index 57ecab5d..13fc81c2 100644 --- a/Makefile +++ b/Makefile @@ -13,14 +13,13 @@ extra_quality_checks: # this target runs checks on all files quality: black --check $(check_dirs) - isort --check-only $(check_dirs) - flake8 $(check_dirs) + ruff $(check_dirs) doc-builder style src/accelerate docs/source --max_len 119 --check_only # Format source code automatically and check is there are any problems left that need manual fixing style: black $(check_dirs) - isort $(check_dirs) + ruff $(check_dirs) --fix doc-builder style src/accelerate docs/source --max_len 119 # Run tests for the library diff --git a/benchmarks/big_model_inference.py b/benchmarks/big_model_inference.py index cb832d12..d7874e62 100644 --- a/benchmarks/big_model_inference.py +++ b/benchmarks/big_model_inference.py @@ -16,12 +16,12 @@ import argparse import time import torch - import transformers -from accelerate.utils import compute_module_sizes from measures_util import end_measure, log_measures, start_measure from transformers import AutoConfig, AutoModelForCausalLM, AutoModelForSeq2SeqLM, AutoTokenizer +from accelerate.utils import compute_module_sizes + DEFAULT_MODELS = { "gpt-j-6b": {"is_causal": True, "model": "sgugger/sharded-gpt-j-6B", "tokenizer": "EleutherAI/gpt-j-6B"}, diff --git a/benchmarks/measures_util.py b/benchmarks/measures_util.py index b6ac76b5..3c316cfd 100644 --- a/benchmarks/measures_util.py +++ b/benchmarks/measures_util.py @@ -2,9 +2,8 @@ import gc import threading import time -import torch - import psutil +import torch class PeakCPUMemory: diff --git a/docs/source/usage_guides/megatron_lm.mdx b/docs/source/usage_guides/megatron_lm.mdx index 23b024f7..056dde60 100644 --- a/docs/source/usage_guides/megatron_lm.mdx +++ b/docs/source/usage_guides/megatron_lm.mdx @@ -290,6 +290,7 @@ You will implement the `accelerate.utils.AbstractTrainStep` or inherit from thei ```python from accelerate.utils import MegatronLMDummyScheduler, GPTTrainStep, avg_losses_across_data_parallel_group + # Custom loss function for the Megatron model class GPTTrainStepWithCustomLoss(GPTTrainStep): def __init__(self, megatron_args, **kwargs): diff --git a/examples/by_feature/automatic_gradient_accumulation.py b/examples/by_feature/automatic_gradient_accumulation.py index 56fbeda8..64e387fa 100644 --- a/examples/by_feature/automatic_gradient_accumulation.py +++ b/examples/by_feature/automatic_gradient_accumulation.py @@ -14,16 +14,16 @@ import argparse import os -import torch -from torch.optim import AdamW -from torch.utils.data import DataLoader - # New Code # import evaluate +import torch +from datasets import load_dataset +from torch.optim import AdamW +from torch.utils.data import DataLoader +from transformers import AutoModelForSequenceClassification, AutoTokenizer, get_linear_schedule_with_warmup, set_seed + from accelerate import Accelerator, DistributedType from accelerate.utils import find_executable_batch_size -from datasets import load_dataset -from transformers import AutoModelForSequenceClassification, AutoTokenizer, get_linear_schedule_with_warmup, set_seed ######################################################################## diff --git a/examples/by_feature/checkpointing.py b/examples/by_feature/checkpointing.py index a26eb870..c8e3fa56 100644 --- a/examples/by_feature/checkpointing.py +++ b/examples/by_feature/checkpointing.py @@ -15,15 +15,15 @@ import argparse import os +import evaluate import torch +from datasets import load_dataset from torch.optim import AdamW from torch.utils.data import DataLoader - -import evaluate -from accelerate import Accelerator, DistributedType -from datasets import load_dataset from transformers import AutoModelForSequenceClassification, AutoTokenizer, get_linear_schedule_with_warmup, set_seed +from accelerate import Accelerator, DistributedType + ######################################################################## # This is a fully working simple example to use Accelerate, diff --git a/examples/by_feature/cross_validation.py b/examples/by_feature/cross_validation.py index a9edbe1a..adba82d7 100644 --- a/examples/by_feature/cross_validation.py +++ b/examples/by_feature/cross_validation.py @@ -15,20 +15,20 @@ import argparse from typing import List +import evaluate import numpy as np import torch -from torch.optim import AdamW -from torch.utils.data import DataLoader - -import evaluate -from accelerate import Accelerator, DistributedType from datasets import DatasetDict, load_dataset # New Code # # We'll be using StratifiedKFold for this example from sklearn.model_selection import StratifiedKFold +from torch.optim import AdamW +from torch.utils.data import DataLoader from transformers import AutoModelForSequenceClassification, AutoTokenizer, get_linear_schedule_with_warmup, set_seed +from accelerate import Accelerator, DistributedType + ######################################################################## # This is a fully working simple example to use Accelerate, diff --git a/examples/by_feature/deepspeed_with_config_support.py b/examples/by_feature/deepspeed_with_config_support.py index 6cfee9dd..c33ff741 100755 --- a/examples/by_feature/deepspeed_with_config_support.py +++ b/examples/by_feature/deepspeed_with_config_support.py @@ -31,16 +31,12 @@ import random from itertools import chain from pathlib import Path -import torch -from torch.utils.data import DataLoader - import datasets +import torch import transformers -from accelerate import Accelerator, DistributedType -from accelerate.logging import get_logger -from accelerate.utils import DummyOptim, DummyScheduler, set_seed from datasets import load_dataset from huggingface_hub import Repository +from torch.utils.data import DataLoader from tqdm.auto import tqdm from transformers import ( CONFIG_MAPPING, @@ -55,6 +51,10 @@ from transformers import ( from transformers.utils import get_full_repo_name from transformers.utils.versions import require_version +from accelerate import Accelerator, DistributedType +from accelerate.logging import get_logger +from accelerate.utils import DummyOptim, DummyScheduler, set_seed + logger = get_logger(__name__) diff --git a/examples/by_feature/fsdp_with_peak_mem_tracking.py b/examples/by_feature/fsdp_with_peak_mem_tracking.py index c8cf5e2b..344612bf 100644 --- a/examples/by_feature/fsdp_with_peak_mem_tracking.py +++ b/examples/by_feature/fsdp_with_peak_mem_tracking.py @@ -16,14 +16,14 @@ import argparse import gc import os -import torch -from torch.utils.data import DataLoader - import evaluate -from accelerate import Accelerator, DistributedType +import torch from datasets import load_dataset +from torch.utils.data import DataLoader from transformers import AutoModelForSequenceClassification, AutoTokenizer, get_linear_schedule_with_warmup, set_seed +from accelerate import Accelerator, DistributedType + ######################################################################## # This is a fully working simple example to use Accelerate diff --git a/examples/by_feature/gradient_accumulation.py b/examples/by_feature/gradient_accumulation.py index ddadb434..07fe6b6f 100644 --- a/examples/by_feature/gradient_accumulation.py +++ b/examples/by_feature/gradient_accumulation.py @@ -15,15 +15,15 @@ import argparse import os +import evaluate import torch +from datasets import load_dataset from torch.optim import AdamW from torch.utils.data import DataLoader - -import evaluate -from accelerate import Accelerator, DistributedType -from datasets import load_dataset from transformers import AutoModelForSequenceClassification, AutoTokenizer, get_linear_schedule_with_warmup, set_seed +from accelerate import Accelerator, DistributedType + ######################################################################## # This is a fully working simple example to use Accelerate diff --git a/examples/by_feature/megatron_lm_gpt_pretraining.py b/examples/by_feature/megatron_lm_gpt_pretraining.py index 3fcf1020..3c048b26 100644 --- a/examples/by_feature/megatron_lm_gpt_pretraining.py +++ b/examples/by_feature/megatron_lm_gpt_pretraining.py @@ -31,16 +31,12 @@ import random from itertools import chain from pathlib import Path -import torch -from torch.utils.data import DataLoader - import datasets +import torch import transformers -from accelerate import Accelerator, DistributedType -from accelerate.logging import get_logger -from accelerate.utils import MegatronLMDummyScheduler, set_seed from datasets import load_dataset from huggingface_hub import Repository +from torch.utils.data import DataLoader from tqdm.auto import tqdm from transformers import ( CONFIG_MAPPING, @@ -55,6 +51,10 @@ from transformers import ( from transformers.utils import check_min_version, get_full_repo_name, send_example_telemetry from transformers.utils.versions import require_version +from accelerate import Accelerator, DistributedType +from accelerate.logging import get_logger +from accelerate.utils import MegatronLMDummyScheduler, set_seed + # Will error if the minimal version of Transformers is not installed. Remove at your own risks. check_min_version("4.23.0.dev0") diff --git a/examples/by_feature/memory.py b/examples/by_feature/memory.py index be82320a..fa4b5db3 100644 --- a/examples/by_feature/memory.py +++ b/examples/by_feature/memory.py @@ -14,16 +14,16 @@ import argparse import os -import torch -from torch.optim import AdamW -from torch.utils.data import DataLoader - # New Code # import evaluate +import torch +from datasets import load_dataset +from torch.optim import AdamW +from torch.utils.data import DataLoader +from transformers import AutoModelForSequenceClassification, AutoTokenizer, get_linear_schedule_with_warmup, set_seed + from accelerate import Accelerator, DistributedType from accelerate.utils import find_executable_batch_size -from datasets import load_dataset -from transformers import AutoModelForSequenceClassification, AutoTokenizer, get_linear_schedule_with_warmup, set_seed ######################################################################## diff --git a/examples/by_feature/multi_process_metrics.py b/examples/by_feature/multi_process_metrics.py index 5121ddfa..f5623f99 100644 --- a/examples/by_feature/multi_process_metrics.py +++ b/examples/by_feature/multi_process_metrics.py @@ -15,15 +15,15 @@ import argparse import os +import evaluate import torch +from datasets import load_dataset from torch.optim import AdamW from torch.utils.data import DataLoader - -import evaluate -from accelerate import Accelerator, DistributedType -from datasets import load_dataset from transformers import AutoModelForSequenceClassification, AutoTokenizer, get_linear_schedule_with_warmup, set_seed +from accelerate import Accelerator, DistributedType + ######################################################################## # This is a fully working simple example to use Accelerate, diff --git a/examples/by_feature/tracking.py b/examples/by_feature/tracking.py index 7be2a005..e9c88656 100644 --- a/examples/by_feature/tracking.py +++ b/examples/by_feature/tracking.py @@ -15,15 +15,15 @@ import argparse import os +import evaluate import torch +from datasets import load_dataset from torch.optim import AdamW from torch.utils.data import DataLoader - -import evaluate -from accelerate import Accelerator, DistributedType -from datasets import load_dataset from transformers import AutoModelForSequenceClassification, AutoTokenizer, get_linear_schedule_with_warmup, set_seed +from accelerate import Accelerator, DistributedType + ######################################################################## # This is a fully working simple example to use Accelerate, diff --git a/examples/complete_cv_example.py b/examples/complete_cv_example.py index a7fe0c01..2d83fd87 100644 --- a/examples/complete_cv_example.py +++ b/examples/complete_cv_example.py @@ -17,15 +17,15 @@ import os import re import numpy as np +import PIL import torch +from timm import create_model from torch.optim.lr_scheduler import OneCycleLR from torch.utils.data import DataLoader, Dataset - -import PIL -from accelerate import Accelerator -from timm import create_model from torchvision.transforms import Compose, RandomResizedCrop, Resize, ToTensor +from accelerate import Accelerator + ######################################################################## # This is a fully working simple example to use Accelerate diff --git a/examples/complete_nlp_example.py b/examples/complete_nlp_example.py index e64b6ee8..a60934bc 100644 --- a/examples/complete_nlp_example.py +++ b/examples/complete_nlp_example.py @@ -15,15 +15,15 @@ import argparse import os +import evaluate import torch +from datasets import load_dataset from torch.optim import AdamW from torch.utils.data import DataLoader - -import evaluate -from accelerate import Accelerator, DistributedType -from datasets import load_dataset from transformers import AutoModelForSequenceClassification, AutoTokenizer, get_linear_schedule_with_warmup, set_seed +from accelerate import Accelerator, DistributedType + ######################################################################## # This is a fully working simple example to use Accelerate diff --git a/examples/cv_example.py b/examples/cv_example.py index bdf69eac..47b1c793 100644 --- a/examples/cv_example.py +++ b/examples/cv_example.py @@ -17,15 +17,15 @@ import os import re import numpy as np +import PIL import torch +from timm import create_model from torch.optim.lr_scheduler import OneCycleLR from torch.utils.data import DataLoader, Dataset - -import PIL -from accelerate import Accelerator -from timm import create_model from torchvision.transforms import Compose, RandomResizedCrop, Resize, ToTensor +from accelerate import Accelerator + ######################################################################## # This is a fully working simple example to use Accelerate diff --git a/examples/nlp_example.py b/examples/nlp_example.py index e3d504c7..c824ca26 100644 --- a/examples/nlp_example.py +++ b/examples/nlp_example.py @@ -14,15 +14,15 @@ # limitations under the License. import argparse +import evaluate import torch +from datasets import load_dataset from torch.optim import AdamW from torch.utils.data import DataLoader - -import evaluate -from accelerate import Accelerator, DistributedType -from datasets import load_dataset from transformers import AutoModelForSequenceClassification, AutoTokenizer, get_linear_schedule_with_warmup, set_seed +from accelerate import Accelerator, DistributedType + ######################################################################## # This is a fully working simple example to use Accelerate diff --git a/pyproject.toml b/pyproject.toml index b7465bb1..1f50f3fb 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,3 +1,17 @@ [tool.black] line-length = 119 -target-version = ['py36'] +target-version = ['py37'] + +[tool.ruff] +# Never enforce `E501` (line length violations). +ignore = ["E501", "E741", "W605"] +select = ["E", "F", "I", "W"] +line-length = 119 + +# Ignore import violations in all `__init__.py` files. +[tool.ruff.per-file-ignores] +"__init__.py" = ["E402", "F401", "F403", "F811"] + +[tool.ruff.isort] +lines-after-imports = 2 +known-first-party = ["accelerate"] diff --git a/setup.cfg b/setup.cfg index 37cf3479..92959f1a 100644 --- a/setup.cfg +++ b/setup.cfg @@ -4,11 +4,6 @@ ensure_newline_before_comments = True force_grid_wrap = 0 include_trailing_comma = True known_first_party = accelerate -known_third_party = - numpy - torch - torch_xla - line_length = 119 lines_after_imports = 2 multi_line_output = 3 diff --git a/setup.py b/setup.py index 9f6f182e..c985812f 100644 --- a/setup.py +++ b/setup.py @@ -16,7 +16,7 @@ from setuptools import setup from setuptools import find_packages extras = {} -extras["quality"] = ["black ~= 22.0", "isort >= 5.5.4", "flake8 >= 3.8.3", "hf-doc-builder >= 0.3.0"] +extras["quality"] = ["black ~= 23.1", "ruff >= 0.0.241", "hf-doc-builder >= 0.3.0"] extras["docs"] = [] extras["test_prod"] = ["pytest", "pytest-xdist", "pytest-subtests", "parameterized"] extras["test_dev"] = ["datasets", "evaluate", "transformers", "scipy", "scikit-learn", "deepspeed<0.7.0", "tqdm"] diff --git a/src/accelerate/__init__.py b/src/accelerate/__init__.py index ab5944e5..ba3ff61c 100644 --- a/src/accelerate/__init__.py +++ b/src/accelerate/__init__.py @@ -1,7 +1,3 @@ -# flake8: noqa -# There's no way to ignore "F401 '...' imported but unused" warnings in this -# module, but to preserve other warnings. So, don't check this module at all. - __version__ = "0.17.0.dev0" from .accelerator import Accelerator diff --git a/src/accelerate/accelerator.py b/src/accelerate/accelerator.py index d9813e18..c8eb9952 100644 --- a/src/accelerate/accelerator.py +++ b/src/accelerate/accelerator.py @@ -1017,7 +1017,6 @@ class Accelerator: return model def _prepare_deepspeed(self, *args): - deepspeed_plugin = self.state.deepspeed_plugin if deepspeed_plugin.deepspeed_config["train_micro_batch_size_per_gpu"] == "auto": @@ -1469,7 +1468,7 @@ class Accelerator: >>> accelerator = Accelerator(gradient_accumulation_steps=2) >>> dataloader, model, optimizer, scheduler = accelerator.prepare(dataloader, model, optimizer, scheduler) - >>> for (input, target) in dataloader: + >>> for input, target in dataloader: ... optimizer.zero_grad() ... output = model(input) ... loss = loss_func(output, target) @@ -1504,7 +1503,7 @@ class Accelerator: >>> accelerator = Accelerator(gradient_accumulation_steps=2) >>> dataloader, model, optimizer, scheduler = accelerator.prepare(dataloader, model, optimizer, scheduler) - >>> for (input, target) in dataloader: + >>> for input, target in dataloader: ... optimizer.zero_grad() ... output = model(input) ... loss = loss_func(output, target) @@ -1594,7 +1593,7 @@ class Accelerator: else: # Not at the end of the dataloader, no need to adjust the tensors return tensor - except: + except Exception: # Dataset had no length or raised an error return tensor return tensor @@ -2349,7 +2348,7 @@ class Accelerator: >>> accelerator = Accelerator() >>> dataloader, model, optimizer, scheduler = accelerator.prepare(dataloader, model, optimizer, scheduler) - >>> for (input, target) in accelerator.skip_first_batches(dataloader, num_batches=2): + >>> for input, target in accelerator.skip_first_batches(dataloader, num_batches=2): ... optimizer.zero_grad() ... output = model(input) ... loss = loss_func(output, target) diff --git a/src/accelerate/checkpointing.py b/src/accelerate/checkpointing.py index 780bcc76..9e3cfdd2 100644 --- a/src/accelerate/checkpointing.py +++ b/src/accelerate/checkpointing.py @@ -169,7 +169,7 @@ def load_accelerator_state( if is_tpu_available(): xm.set_rng_state(states["xm_seed"]) logger.info("All random states loaded successfully") - except: + except Exception: logger.info("Could not load random states") diff --git a/src/accelerate/commands/config/config_utils.py b/src/accelerate/commands/config/config_utils.py index dbf903c2..d5b65ef4 100644 --- a/src/accelerate/commands/config/config_utils.py +++ b/src/accelerate/commands/config/config_utils.py @@ -48,7 +48,7 @@ def _ask_field(input_text, convert_value=None, default=None, error_message=None) if default is not None and len(result) == 0: return default return convert_value(result) if convert_value is not None else result - except: + except Exception: if error_message is not None: print(error_message) diff --git a/src/accelerate/commands/launch.py b/src/accelerate/commands/launch.py index 09934f1b..d4d5a3e7 100644 --- a/src/accelerate/commands/launch.py +++ b/src/accelerate/commands/launch.py @@ -25,9 +25,9 @@ from ast import literal_eval from pathlib import Path from typing import Dict, List +import psutil import torch -import psutil from accelerate.commands.config import default_config_file, load_config_from_file from accelerate.commands.config.config_args import SageMakerConfig from accelerate.commands.config.config_utils import DYNAMO_BACKENDS @@ -644,7 +644,7 @@ def multi_gpu_launcher(args): with patch_environment(**current_env): try: distrib_run.run(args) - except: + except Exception: if is_rich_available() and debug: console = get_console() console.print("\n[bold red]Using --debug, `torch.distributed` Stack Trace:[/bold red]") @@ -770,7 +770,7 @@ def deepspeed_launcher(args): with patch_environment(**current_env): try: distrib_run.run(args) - except: + except Exception: if is_rich_available() and debug: console = get_console() console.print("\n[bold red]Using --debug, `torch.distributed` Stack Trace:[/bold red]") diff --git a/src/accelerate/commands/menu/__init__.py b/src/accelerate/commands/menu/__init__.py index ec17fba5..a9475e1b 100644 --- a/src/accelerate/commands/menu/__init__.py +++ b/src/accelerate/commands/menu/__init__.py @@ -1,5 +1 @@ -# flake8: noqa -# There's no way to ignore "F401 '...' imported but unused" warnings in this -# module, but to preserve other warnings. So, don't check this module at all - from .selection_menu import BulletMenu diff --git a/src/accelerate/commands/tpu.py b/src/accelerate/commands/tpu.py index 6b90770c..1293b69f 100644 --- a/src/accelerate/commands/tpu.py +++ b/src/accelerate/commands/tpu.py @@ -18,9 +18,10 @@ import argparse import os import subprocess -from accelerate.commands.config.config_args import default_config_file, load_config_from_file from packaging.version import Version, parse +from accelerate.commands.config.config_args import default_config_file, load_config_from_file + _description = "Run commands across TPU VMs for initial setup before running `accelerate launch`." diff --git a/src/accelerate/memory_utils.py b/src/accelerate/memory_utils.py index eba10bb7..fa2e2c8b 100644 --- a/src/accelerate/memory_utils.py +++ b/src/accelerate/memory_utils.py @@ -12,11 +12,6 @@ # See the License for the specific language governing permissions and # limitations under the License. -# flake8: noqa -# There's no way to ignore "F401 '...' imported but unused" warnings in this -# module, but to preserve other warnings. So, don't check this module at all - - import warnings @@ -25,5 +20,3 @@ warnings.warn( "`from accelerate import find_executable_batch_size` to avoid this warning.", FutureWarning, ) - -from .utils.memory import find_executable_batch_size diff --git a/src/accelerate/test_utils/__init__.py b/src/accelerate/test_utils/__init__.py index b58b932b..1c42c744 100644 --- a/src/accelerate/test_utils/__init__.py +++ b/src/accelerate/test_utils/__init__.py @@ -1,7 +1,3 @@ -# flake8: noqa -# There's no way to ignore "F401 '...' imported but unused" warnings in this -# module, but to preserve other warnings. So, don't check this module at all. - from .testing import ( are_the_same_tensors, execute_subprocess_async, @@ -19,4 +15,4 @@ from .testing import ( from .training import RegressionDataset, RegressionModel -from .scripts import test_script, test_sync # isort:skip +from .scripts import test_script, test_sync # isort: skip diff --git a/src/accelerate/test_utils/scripts/external_deps/test_checkpointing.py b/src/accelerate/test_utils/scripts/external_deps/test_checkpointing.py index cde602df..d8654352 100644 --- a/src/accelerate/test_utils/scripts/external_deps/test_checkpointing.py +++ b/src/accelerate/test_utils/scripts/external_deps/test_checkpointing.py @@ -16,15 +16,15 @@ import argparse import json import os +import evaluate import torch +from datasets import load_dataset from torch.optim import AdamW from torch.utils.data import DataLoader +from transformers import AutoModelForSequenceClassification, AutoTokenizer, get_linear_schedule_with_warmup, set_seed -import evaluate from accelerate import Accelerator, DistributedType from accelerate.utils.deepspeed import DummyOptim, DummyScheduler -from datasets import load_dataset -from transformers import AutoModelForSequenceClassification, AutoTokenizer, get_linear_schedule_with_warmup, set_seed MAX_GPU_BATCH_SIZE = 16 diff --git a/src/accelerate/test_utils/scripts/external_deps/test_metrics.py b/src/accelerate/test_utils/scripts/external_deps/test_metrics.py index c984b369..e32137ab 100755 --- a/src/accelerate/test_utils/scripts/external_deps/test_metrics.py +++ b/src/accelerate/test_utils/scripts/external_deps/test_metrics.py @@ -15,17 +15,17 @@ import math from copy import deepcopy -import torch -from torch.utils.data import DataLoader - import datasets import evaluate +import torch import transformers +from datasets import load_dataset +from torch.utils.data import DataLoader +from transformers import AutoModelForSequenceClassification, AutoTokenizer + from accelerate import Accelerator from accelerate.test_utils import RegressionDataset, RegressionModel from accelerate.utils import is_tpu_available, set_seed -from datasets import load_dataset -from transformers import AutoModelForSequenceClassification, AutoTokenizer def get_basic_setup(accelerator, num_samples=82, batch_size=16): @@ -84,7 +84,7 @@ def generate_predictions(model, dataloader, accelerator): logit, target = accelerator.gather_for_metrics((logit, target)) logits_and_targets.append((logit, target)) logits, targs = [], [] - for (logit, targ) in logits_and_targets: + for logit, targ in logits_and_targets: logits.append(logit) targs.append(targ) logits, targs = torch.cat(logits), torch.cat(targs) diff --git a/src/accelerate/test_utils/scripts/external_deps/test_peak_memory_usage.py b/src/accelerate/test_utils/scripts/external_deps/test_peak_memory_usage.py index 7bb5ca3b..70bb8f84 100644 --- a/src/accelerate/test_utils/scripts/external_deps/test_peak_memory_usage.py +++ b/src/accelerate/test_utils/scripts/external_deps/test_peak_memory_usage.py @@ -18,13 +18,13 @@ import json import os import torch +from datasets import load_dataset from torch.optim import AdamW from torch.utils.data import DataLoader +from transformers import AutoModelForSequenceClassification, AutoTokenizer, get_linear_schedule_with_warmup, set_seed from accelerate import Accelerator, DistributedType from accelerate.utils.deepspeed import DummyOptim, DummyScheduler -from datasets import load_dataset -from transformers import AutoModelForSequenceClassification, AutoTokenizer, get_linear_schedule_with_warmup, set_seed MAX_GPU_BATCH_SIZE = 16 diff --git a/src/accelerate/test_utils/scripts/external_deps/test_performance.py b/src/accelerate/test_utils/scripts/external_deps/test_performance.py index 324a1854..93382026 100644 --- a/src/accelerate/test_utils/scripts/external_deps/test_performance.py +++ b/src/accelerate/test_utils/scripts/external_deps/test_performance.py @@ -16,15 +16,15 @@ import argparse import json import os +import evaluate import torch +from datasets import load_dataset from torch.optim import AdamW from torch.utils.data import DataLoader +from transformers import AutoModelForSequenceClassification, AutoTokenizer, get_linear_schedule_with_warmup, set_seed -import evaluate from accelerate import Accelerator, DistributedType from accelerate.utils.deepspeed import DummyOptim, DummyScheduler -from datasets import load_dataset -from transformers import AutoModelForSequenceClassification, AutoTokenizer, get_linear_schedule_with_warmup, set_seed MAX_GPU_BATCH_SIZE = 16 diff --git a/src/accelerate/test_utils/scripts/test_distributed_data_loop.py b/src/accelerate/test_utils/scripts/test_distributed_data_loop.py index 6576e035..850f7310 100644 --- a/src/accelerate/test_utils/scripts/test_distributed_data_loop.py +++ b/src/accelerate/test_utils/scripts/test_distributed_data_loop.py @@ -77,7 +77,6 @@ def verify_dataloader_batch_sizes( def test_default_ensures_even_batch_sizes(): - accelerator = create_accelerator() # without padding, we would expect a different number of batches @@ -144,7 +143,6 @@ def test_can_join_uneven_inputs(): def test_join_raises_warning_for_non_ddp_distributed(accelerator): - with warnings.catch_warnings(record=True) as w: with accelerator.join_uneven_inputs([Mock()]): pass diff --git a/src/accelerate/test_utils/testing.py b/src/accelerate/test_utils/testing.py index 26968b22..66846e00 100644 --- a/src/accelerate/test_utils/testing.py +++ b/src/accelerate/test_utils/testing.py @@ -338,7 +338,6 @@ async def _stream_subprocess(cmd, env=None, stdin=None, timeout=None, quiet=Fals def execute_subprocess_async(cmd, env=None, stdin=None, timeout=180, quiet=False, echo=True) -> _RunOutput: - loop = asyncio.get_event_loop() result = loop.run_until_complete( _stream_subprocess(cmd, env=env, stdin=stdin, timeout=timeout, quiet=quiet, echo=echo) diff --git a/src/accelerate/utils/__init__.py b/src/accelerate/utils/__init__.py index 9cfb3d88..2eaa827e 100644 --- a/src/accelerate/utils/__init__.py +++ b/src/accelerate/utils/__init__.py @@ -1,7 +1,3 @@ -# flake8: noqa -# There's no way to ignore "F401 '...' imported but unused" warnings in this -# module, but to preserve other warnings. So, don't check this module at all - from .constants import MODEL_NAME, OPTIMIZER_NAME, RNG_STATE_NAME, SCALER_NAME, SCHEDULER_NAME, TORCH_LAUNCH_PARAMS from .dataclasses import ( ComputeEnvironment, diff --git a/src/accelerate/utils/deepspeed.py b/src/accelerate/utils/deepspeed.py index 03488cc3..bafe34de 100644 --- a/src/accelerate/utils/deepspeed.py +++ b/src/accelerate/utils/deepspeed.py @@ -40,7 +40,6 @@ class HfDeepSpeedConfig: """ def __init__(self, config_file_or_dict): - if isinstance(config_file_or_dict, dict): # Don't modify user's data should they want to reuse it (e.g. in tests), because once we # modified it, it will not be accepted here again, since `auto` values would have been overridden diff --git a/src/accelerate/utils/imports.py b/src/accelerate/utils/imports.py index 0e8ada7e..44d694d3 100644 --- a/src/accelerate/utils/imports.py +++ b/src/accelerate/utils/imports.py @@ -20,7 +20,6 @@ from distutils.util import strtobool from functools import lru_cache import torch - from packaging.version import parse from .environment import parse_flag_from_env diff --git a/src/accelerate/utils/megatron_lm.py b/src/accelerate/utils/megatron_lm.py index ab3bd8c3..4897463a 100644 --- a/src/accelerate/utils/megatron_lm.py +++ b/src/accelerate/utils/megatron_lm.py @@ -58,9 +58,8 @@ if is_megatron_lm_available(): set_jit_fusion_options, write_args_to_tensorboard, ) - from megatron.model import BertModel + from megatron.model import BertModel, Float16Module, GPTModel, ModelType, T5Model from megatron.model import DistributedDataParallel as LocalDDP - from megatron.model import Float16Module, GPTModel, ModelType, T5Model from megatron.model.classification import Classification from megatron.optimizer import get_megatron_optimizer from megatron.schedules import get_forward_backward_func @@ -101,7 +100,6 @@ def model_provider_func(pre_process=True, post_process=True, add_encoder=True, a post_process=post_process, ) else: - model = Classification( num_classes=args.num_labels, num_tokentypes=2, pre_process=pre_process, post_process=post_process ) @@ -270,7 +268,6 @@ class MegatronLMDummyDataLoader: # Data loader only on rank 0 of each model parallel group. if mpu.get_tensor_model_parallel_rank() == 0: - # Number of train/valid/test samples. if args.train_samples: train_samples = args.train_samples diff --git a/tests/deepspeed/test_deepspeed.py b/tests/deepspeed/test_deepspeed.py index ffd0c7f7..9831addb 100644 --- a/tests/deepspeed/test_deepspeed.py +++ b/tests/deepspeed/test_deepspeed.py @@ -22,7 +22,12 @@ from copy import deepcopy from pathlib import Path import torch +from parameterized import parameterized from torch.utils.data import DataLoader +from transformers import AutoModel, AutoModelForCausalLM, get_scheduler +from transformers.testing_utils import mockenv_context +from transformers.trainer_utils import set_seed +from transformers.utils import is_torch_bf16_available import accelerate from accelerate.accelerator import Accelerator @@ -47,11 +52,6 @@ from accelerate.utils.deepspeed import ( DummyScheduler, ) from accelerate.utils.other import patch_environment -from parameterized import parameterized -from transformers import AutoModel, AutoModelForCausalLM, get_scheduler -from transformers.testing_utils import mockenv_context -from transformers.trainer_utils import set_seed -from transformers.utils import is_torch_bf16_available set_seed(42) @@ -133,7 +133,6 @@ class DeepSpeedConfigIntegration(AccelerateTestCase): @parameterized.expand(stages, name_func=parameterized_custom_name_func) def test_deepspeed_plugin(self, stage): - # Test zero3_init_flag will be set to False when ZeRO stage != 3 deepspeed_plugin = DeepSpeedPlugin( gradient_accumulation_steps=1, diff --git a/tests/fsdp/test_fsdp.py b/tests/fsdp/test_fsdp.py index e71a91ea..38d8d506 100644 --- a/tests/fsdp/test_fsdp.py +++ b/tests/fsdp/test_fsdp.py @@ -17,6 +17,9 @@ import inspect import os import torch +from transformers import AutoModel +from transformers.testing_utils import mockenv_context +from transformers.trainer_utils import set_seed import accelerate from accelerate.accelerator import Accelerator @@ -38,9 +41,6 @@ from accelerate.utils.constants import ( ) from accelerate.utils.dataclasses import FullyShardedDataParallelPlugin from accelerate.utils.other import patch_environment -from transformers import AutoModel -from transformers.testing_utils import mockenv_context -from transformers.trainer_utils import set_seed set_seed(42) diff --git a/tests/test_big_modeling.py b/tests/test_big_modeling.py index d15f8a48..893ab062 100644 --- a/tests/test_big_modeling.py +++ b/tests/test_big_modeling.py @@ -18,6 +18,7 @@ from tempfile import TemporaryDirectory import torch import torch.nn as nn +from transformers import AutoModelForCausalLM, AutoTokenizer from accelerate.big_modeling import ( cpu_offload, @@ -31,7 +32,6 @@ from accelerate.big_modeling import ( from accelerate.hooks import remove_hook_from_submodules from accelerate.test_utils import require_cuda, require_mps, require_multi_gpu, require_torch_min_version, slow from accelerate.utils import offload_state_dict -from transformers import AutoModelForCausalLM, AutoTokenizer class ModelForTest(nn.Module): diff --git a/tests/test_sagemaker.py b/tests/test_sagemaker.py index 2824493d..55cdf7cb 100644 --- a/tests/test_sagemaker.py +++ b/tests/test_sagemaker.py @@ -2,6 +2,7 @@ import unittest from dataclasses import dataclass import pytest + from accelerate.commands.config.config_args import SageMakerConfig from accelerate.commands.launch import _convert_nargs_to_dict from accelerate.utils import ComputeEnvironment