From 12a50f294d50e3d0e124511f2b6f43625f73ffce Mon Sep 17 00:00:00 2001 From: Yuanyuan Chen Date: Fri, 17 Oct 2025 23:00:40 +0800 Subject: [PATCH] Enable FURB rules in ruff (#41395) * Apply ruff FURB rules Signed-off-by: Yuanyuan Chen * Enable ruff FURB rules Signed-off-by: Yuanyuan Chen * More fixes Signed-off-by: Yuanyuan Chen * More fixes Signed-off-by: Yuanyuan Chen * Revert changes Signed-off-by: Yuanyuan Chen * More fixes Signed-off-by: Yuanyuan Chen --------- Signed-off-by: Yuanyuan Chen --- examples/legacy/seq2seq/pack_dataset.py | 4 ++-- examples/legacy/seq2seq/rouge_cli.py | 4 ++-- examples/legacy/seq2seq/run_distributed_eval.py | 2 +- examples/legacy/seq2seq/run_eval.py | 6 +++--- examples/legacy/seq2seq/utils.py | 2 +- examples/legacy/token-classification/scripts/preprocess.py | 2 +- examples/pytorch/token-classification/run_ner.py | 3 +-- pyproject.toml | 2 +- src/transformers/models/bartpho/tokenization_bartpho.py | 2 +- src/transformers/models/d_fine/modeling_d_fine.py | 2 +- .../models/deprecated/jukebox/modeling_jukebox.py | 2 +- .../models/efficientloftr/modeling_efficientloftr.py | 2 +- src/transformers/models/ibert/quant_modules.py | 2 +- src/transformers/models/luke/tokenization_luke.py | 2 +- src/transformers/models/mluke/tokenization_mluke.py | 2 +- src/transformers/models/rt_detr/modeling_rt_detr.py | 2 +- src/transformers/models/rt_detr_v2/modeling_rt_detr_v2.py | 2 +- src/transformers/models/swin2sr/modeling_swin2sr.py | 2 +- src/transformers/models/swinv2/modeling_swinv2.py | 2 +- src/transformers/models/vits/convert_original_checkpoint.py | 2 +- .../models/vivit/convert_vivit_flax_to_pytorch.py | 3 +-- src/transformers/models/whisper/convert_openai_to_hf.py | 3 +-- tests/models/bartpho/test_tokenization_bartpho.py | 3 +-- tests/models/bertweet/test_tokenization_bertweet.py | 3 +-- tests/models/phobert/test_tokenization_phobert.py | 3 +-- utils/fetch_hub_objects_for_ci.py | 3 +-- utils/modular_integrations.py | 2 +- 27 files changed, 31 insertions(+), 38 deletions(-) diff --git a/examples/legacy/seq2seq/pack_dataset.py b/examples/legacy/seq2seq/pack_dataset.py index 034537d1cff..d85e31a7c2c 100755 --- a/examples/legacy/seq2seq/pack_dataset.py +++ b/examples/legacy/seq2seq/pack_dataset.py @@ -60,8 +60,8 @@ def pack_data_dir(tok, data_dir: Path, max_tokens, save_path): save_path.mkdir(exist_ok=True) for split in ["train"]: src_path, tgt_path = data_dir / f"{split}.source", data_dir / f"{split}.target" - src_docs = [x.rstrip() for x in Path(src_path).open().readlines()] - tgt_docs = [x.rstrip() for x in Path(tgt_path).open().readlines()] + src_docs = [x.rstrip() for x in Path(src_path).open()] + tgt_docs = [x.rstrip() for x in Path(tgt_path).open()] packed_src, packed_tgt = pack_examples(tok, src_docs, tgt_docs, max_tokens) print(f"packed {split} split from {len(src_docs)} examples -> {len(packed_src)}.") Path(save_path / f"{split}.source").open("w").write("\n".join(packed_src)) diff --git a/examples/legacy/seq2seq/rouge_cli.py b/examples/legacy/seq2seq/rouge_cli.py index cd636bbcd1c..d46544b9615 100644 --- a/examples/legacy/seq2seq/rouge_cli.py +++ b/examples/legacy/seq2seq/rouge_cli.py @@ -19,8 +19,8 @@ from utils import calculate_rouge, save_json def calculate_rouge_path(pred_path, tgt_path, save_path=None, **kwargs): """Kwargs will be passed to calculate_rouge""" - pred_lns = [x.strip() for x in open(pred_path).readlines()] - tgt_lns = [x.strip() for x in open(tgt_path).readlines()][: len(pred_lns)] + pred_lns = [x.strip() for x in open(pred_path)] + tgt_lns = [x.strip() for x in open(tgt_path)][: len(pred_lns)] metrics = calculate_rouge(pred_lns, tgt_lns, **kwargs) if save_path is not None: save_json(metrics, save_path, indent=None) diff --git a/examples/legacy/seq2seq/run_distributed_eval.py b/examples/legacy/seq2seq/run_distributed_eval.py index 795da2efe4b..53fab8af997 100755 --- a/examples/legacy/seq2seq/run_distributed_eval.py +++ b/examples/legacy/seq2seq/run_distributed_eval.py @@ -205,7 +205,7 @@ def run_generate(): return tgt_file = Path(args.data_dir).joinpath(args.type_path + ".target") with open(tgt_file) as f: - labels = [x.rstrip() for x in f.readlines()][: len(preds)] + labels = [x.rstrip() for x in f][: len(preds)] # Calculate metrics, save metrics, and save _generations.txt calc_bleu = "translation" in args.task diff --git a/examples/legacy/seq2seq/run_eval.py b/examples/legacy/seq2seq/run_eval.py index f5ef4f5d165..180143046bb 100755 --- a/examples/legacy/seq2seq/run_eval.py +++ b/examples/legacy/seq2seq/run_eval.py @@ -130,7 +130,7 @@ def run_generate(verbose=True): parsed_args = parse_numeric_n_bool_cl_kwargs(rest) if parsed_args and verbose: print(f"parsed the following generate kwargs: {parsed_args}") - examples = [" " + x.rstrip() if "t5" in args.model_name else x.rstrip() for x in open(args.input_path).readlines()] + examples = [" " + x.rstrip() if "t5" in args.model_name else x.rstrip() for x in open(args.input_path)] if args.n_obs > 0: examples = examples[: args.n_obs] Path(args.save_path).parent.mkdir(exist_ok=True) @@ -159,8 +159,8 @@ def run_generate(verbose=True): # Compute scores score_fn = calculate_bleu if "translation" in args.task else calculate_rouge - output_lns = [x.rstrip() for x in open(args.save_path).readlines()] - reference_lns = [x.rstrip() for x in open(args.reference_path).readlines()][: len(output_lns)] + output_lns = [x.rstrip() for x in open(args.save_path)] + reference_lns = [x.rstrip() for x in open(args.reference_path)][: len(output_lns)] scores: dict = score_fn(output_lns, reference_lns) scores.update(runtime_metrics) diff --git a/examples/legacy/seq2seq/utils.py b/examples/legacy/seq2seq/utils.py index 9f96e6d6617..d296c846fbb 100644 --- a/examples/legacy/seq2seq/utils.py +++ b/examples/legacy/seq2seq/utils.py @@ -162,7 +162,7 @@ class AbstractSeq2SeqDataset(Dataset): @staticmethod def get_char_lens(data_file): - return [len(x) for x in Path(data_file).open().readlines()] + return [len(x) for x in Path(data_file).open()] @cached_property def tgt_lens(self): diff --git a/examples/legacy/token-classification/scripts/preprocess.py b/examples/legacy/token-classification/scripts/preprocess.py index 40ecf2b32ac..8a4cef710db 100644 --- a/examples/legacy/token-classification/scripts/preprocess.py +++ b/examples/legacy/token-classification/scripts/preprocess.py @@ -31,7 +31,7 @@ with open(dataset) as f_p: continue if (subword_len_counter + current_subwords_len) > max_len: - print("") + print() print(line) subword_len_counter = current_subwords_len continue diff --git a/examples/pytorch/token-classification/run_ner.py b/examples/pytorch/token-classification/run_ner.py index 60548096e3a..bf172eb2567 100755 --- a/examples/pytorch/token-classification/run_ner.py +++ b/examples/pytorch/token-classification/run_ner.py @@ -616,8 +616,7 @@ def main(): output_predictions_file = os.path.join(training_args.output_dir, "predictions.txt") if trainer.is_world_process_zero(): with open(output_predictions_file, "w") as writer: - for prediction in true_predictions: - writer.write(" ".join(prediction) + "\n") + writer.writelines(" ".join(prediction) + "\n" for prediction in true_predictions) kwargs = {"finetuned_from": model_args.model_name_or_path, "tasks": "token-classification"} if data_args.dataset_name is not None: diff --git a/pyproject.toml b/pyproject.toml index 10ee24cf0a6..54ec1618e38 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -32,7 +32,7 @@ line-length = 119 ignore = ["C901", "E501", "E741", "F402", "F823", "SIM1", "SIM300", "SIM212", "SIM905", "UP009", "UP015", "UP031", "UP028", "UP004", "UP045", "UP007"] # RUF013: Checks for the use of implicit Optional # in type annotations when the default parameter value is None. -select = ["C", "E", "F", "I", "W", "RUF013", "PERF102", "PLC1802", "PLC0208", "SIM", "UP", "PIE794"] +select = ["C", "E", "F", "I", "W", "RUF013", "PERF102", "PLC1802", "PLC0208", "SIM", "UP", "PIE794", "FURB"] extend-safe-fixes = ["UP006"] # Ignore import violations in all `__init__.py` files. diff --git a/src/transformers/models/bartpho/tokenization_bartpho.py b/src/transformers/models/bartpho/tokenization_bartpho.py index 41a122bf913..9b16357a83e 100644 --- a/src/transformers/models/bartpho/tokenization_bartpho.py +++ b/src/transformers/models/bartpho/tokenization_bartpho.py @@ -140,7 +140,7 @@ class BartphoTokenizer(PreTrainedTokenizer): self.fairseq_tokens_to_ids[str(token)] = cnt cnt += 1 with open(monolingual_vocab_file, "r", encoding="utf-8") as f: - for line in f.readlines(): + for line in f: token = line.strip().split()[0] self.fairseq_tokens_to_ids[token] = len(self.fairseq_tokens_to_ids) if str(mask_token) not in self.fairseq_tokens_to_ids: diff --git a/src/transformers/models/d_fine/modeling_d_fine.py b/src/transformers/models/d_fine/modeling_d_fine.py index 701e37b2619..5e79b02f571 100644 --- a/src/transformers/models/d_fine/modeling_d_fine.py +++ b/src/transformers/models/d_fine/modeling_d_fine.py @@ -2171,7 +2171,7 @@ class DFineHybridEncoder(nn.Module): new_fpn_feature_map = fpn_block(fused_feature_map) fpn_feature_maps.append(new_fpn_feature_map) - fpn_feature_maps = fpn_feature_maps[::-1] + fpn_feature_maps.reverse() # bottom-up PAN pan_feature_maps = [fpn_feature_maps[0]] diff --git a/src/transformers/models/deprecated/jukebox/modeling_jukebox.py b/src/transformers/models/deprecated/jukebox/modeling_jukebox.py index daaa4b2ee48..ac859736152 100755 --- a/src/transformers/models/deprecated/jukebox/modeling_jukebox.py +++ b/src/transformers/models/deprecated/jukebox/modeling_jukebox.py @@ -265,7 +265,7 @@ class JukeboxResnet1D(nn.Module): blocks.append(JukeboxResConv1DBlock(config, conv_width, block_depth, res_scale)) if reverse_dilation: - blocks = blocks[::-1] + blocks.reverse() self.resnet_block = nn.ModuleList(blocks) def forward(self, hidden_states): diff --git a/src/transformers/models/efficientloftr/modeling_efficientloftr.py b/src/transformers/models/efficientloftr/modeling_efficientloftr.py index 07f53e9dee6..16c9eabdcd6 100644 --- a/src/transformers/models/efficientloftr/modeling_efficientloftr.py +++ b/src/transformers/models/efficientloftr/modeling_efficientloftr.py @@ -617,7 +617,7 @@ class EfficientLoFTRFineFusionLayer(nn.Module): def forward( self, coarse_features: torch.Tensor, - residual_features: list[torch.Tensor], + residual_features: list[torch.Tensor] | tuple[torch.Tensor], ) -> tuple[torch.Tensor, torch.Tensor]: """ For each image pair, compute the fine features of pixels. diff --git a/src/transformers/models/ibert/quant_modules.py b/src/transformers/models/ibert/quant_modules.py index 949702a5af9..06789af620e 100644 --- a/src/transformers/models/ibert/quant_modules.py +++ b/src/transformers/models/ibert/quant_modules.py @@ -723,7 +723,7 @@ def batch_frexp(inputs, max_bit=31): tmp_m = [] for m in output_m: int_m_shifted = int( - decimal.Decimal(m * (2**max_bit)).quantize(decimal.Decimal("1"), rounding=decimal.ROUND_HALF_UP) + decimal.Decimal(m * (2**max_bit)).quantize(decimal.Decimal(1), rounding=decimal.ROUND_HALF_UP) ) tmp_m.append(int_m_shifted) output_m = np.array(tmp_m) diff --git a/src/transformers/models/luke/tokenization_luke.py b/src/transformers/models/luke/tokenization_luke.py index 4bb19bb5ee7..0e038b46b8e 100644 --- a/src/transformers/models/luke/tokenization_luke.py +++ b/src/transformers/models/luke/tokenization_luke.py @@ -1033,7 +1033,7 @@ class LukeTokenizer(PreTrainedTokenizer): if head_token_span[0] < tail_token_span[0]: first_entity_token_spans[0] = (head_token_span[0], head_token_span[1] + 2) first_entity_token_spans[1] = (tail_token_span[0] + 2, tail_token_span[1] + 4) - token_span_with_special_token_ids = reversed(token_span_with_special_token_ids) + token_span_with_special_token_ids.reverse() else: first_entity_token_spans[0] = (head_token_span[0] + 2, head_token_span[1] + 4) first_entity_token_spans[1] = (tail_token_span[0], tail_token_span[1] + 2) diff --git a/src/transformers/models/mluke/tokenization_mluke.py b/src/transformers/models/mluke/tokenization_mluke.py index d63129c7b7e..4ecf6e9ab5d 100644 --- a/src/transformers/models/mluke/tokenization_mluke.py +++ b/src/transformers/models/mluke/tokenization_mluke.py @@ -868,7 +868,7 @@ class MLukeTokenizer(PreTrainedTokenizer): if head_token_span[0] < tail_token_span[0]: first_entity_token_spans[0] = (head_token_span[0], head_token_span[1] + 2) first_entity_token_spans[1] = (tail_token_span[0] + 2, tail_token_span[1] + 4) - token_span_with_special_token_ids = reversed(token_span_with_special_token_ids) + token_span_with_special_token_ids.reverse() else: first_entity_token_spans[0] = (head_token_span[0] + 2, head_token_span[1] + 4) first_entity_token_spans[1] = (tail_token_span[0], tail_token_span[1] + 2) diff --git a/src/transformers/models/rt_detr/modeling_rt_detr.py b/src/transformers/models/rt_detr/modeling_rt_detr.py index acead9a8705..05159b06e33 100644 --- a/src/transformers/models/rt_detr/modeling_rt_detr.py +++ b/src/transformers/models/rt_detr/modeling_rt_detr.py @@ -1262,7 +1262,7 @@ class RTDetrHybridEncoder(nn.Module): new_fpn_feature_map = fpn_block(fused_feature_map) fpn_feature_maps.append(new_fpn_feature_map) - fpn_feature_maps = fpn_feature_maps[::-1] + fpn_feature_maps.reverse() # bottom-up PAN pan_feature_maps = [fpn_feature_maps[0]] diff --git a/src/transformers/models/rt_detr_v2/modeling_rt_detr_v2.py b/src/transformers/models/rt_detr_v2/modeling_rt_detr_v2.py index 5b736796cfe..6f85dacad09 100644 --- a/src/transformers/models/rt_detr_v2/modeling_rt_detr_v2.py +++ b/src/transformers/models/rt_detr_v2/modeling_rt_detr_v2.py @@ -1218,7 +1218,7 @@ class RTDetrV2HybridEncoder(nn.Module): new_fpn_feature_map = fpn_block(fused_feature_map) fpn_feature_maps.append(new_fpn_feature_map) - fpn_feature_maps = fpn_feature_maps[::-1] + fpn_feature_maps.reverse() # bottom-up PAN pan_feature_maps = [fpn_feature_maps[0]] diff --git a/src/transformers/models/swin2sr/modeling_swin2sr.py b/src/transformers/models/swin2sr/modeling_swin2sr.py index 534b2f2102b..4fb1267f47c 100644 --- a/src/transformers/models/swin2sr/modeling_swin2sr.py +++ b/src/transformers/models/swin2sr/modeling_swin2sr.py @@ -457,7 +457,7 @@ class Swin2SRLayer(nn.Module): self.layernorm_after = nn.LayerNorm(dim, eps=config.layer_norm_eps) def _compute_window_shift(self, target_window_size, target_shift_size) -> tuple[tuple[int, int], tuple[int, int]]: - window_size = [r if r <= w else w for r, w in zip(self.input_resolution, target_window_size)] + window_size = [min(r, w) for r, w in zip(self.input_resolution, target_window_size)] shift_size = [0 if r <= w else s for r, w, s in zip(self.input_resolution, window_size, target_shift_size)] return window_size, shift_size diff --git a/src/transformers/models/swinv2/modeling_swinv2.py b/src/transformers/models/swinv2/modeling_swinv2.py index afdcf3396b4..0d87c23ffc6 100644 --- a/src/transformers/models/swinv2/modeling_swinv2.py +++ b/src/transformers/models/swinv2/modeling_swinv2.py @@ -626,7 +626,7 @@ class Swinv2Layer(nn.Module): self.layernorm_after = nn.LayerNorm(dim, eps=config.layer_norm_eps) def _compute_window_shift(self, target_window_size, target_shift_size) -> tuple[tuple[int, int], tuple[int, int]]: - window_size = [r if r <= w else w for r, w in zip(self.input_resolution, target_window_size)] + window_size = [min(r, w) for r, w in zip(self.input_resolution, target_window_size)] shift_size = [0 if r <= w else s for r, w, s in zip(self.input_resolution, window_size, target_shift_size)] return window_size, shift_size diff --git a/src/transformers/models/vits/convert_original_checkpoint.py b/src/transformers/models/vits/convert_original_checkpoint.py index 7f122e86fa5..73552f7c2b2 100644 --- a/src/transformers/models/vits/convert_original_checkpoint.py +++ b/src/transformers/models/vits/convert_original_checkpoint.py @@ -329,7 +329,7 @@ def convert_checkpoint( phonemize = True else: # Save vocab as temporary json file - symbols = [line.replace("\n", "") for line in open(vocab_path, encoding="utf-8").readlines()] + symbols = [line.replace("\n", "") for line in open(vocab_path, encoding="utf-8")] symbol_to_id = {s: i for i, s in enumerate(symbols)} # MMS-TTS does not use a token, so we set to the token used to space characters _pad = symbols[0] diff --git a/src/transformers/models/vivit/convert_vivit_flax_to_pytorch.py b/src/transformers/models/vivit/convert_vivit_flax_to_pytorch.py index 517f8fdb153..f18b26a4e6a 100644 --- a/src/transformers/models/vivit/convert_vivit_flax_to_pytorch.py +++ b/src/transformers/models/vivit/convert_vivit_flax_to_pytorch.py @@ -36,8 +36,7 @@ def download_checkpoint(path): with open(path, "wb") as f: with requests.get(url, stream=True) as req: - for chunk in req.iter_content(chunk_size=2048): - f.write(chunk) + f.writelines(req.iter_content(chunk_size=2048)) def get_vivit_config() -> VivitConfig: diff --git a/src/transformers/models/whisper/convert_openai_to_hf.py b/src/transformers/models/whisper/convert_openai_to_hf.py index 5684154717a..ca0b61c73e5 100755 --- a/src/transformers/models/whisper/convert_openai_to_hf.py +++ b/src/transformers/models/whisper/convert_openai_to_hf.py @@ -317,8 +317,7 @@ def convert_tiktoken_to_hf( with open(merge_file, "w", encoding="utf-8") as writer: writer.write("#version: 0.2\n") - for bpe_tokens in merges: - writer.write(bpe_tokens + "\n") + writer.writelines(bpe_tokens + "\n" for bpe_tokens in merges) hf_tokenizer = WhisperTokenizer(vocab_file, merge_file) diff --git a/tests/models/bartpho/test_tokenization_bartpho.py b/tests/models/bartpho/test_tokenization_bartpho.py index 10e3ad5623c..f78b17896cb 100644 --- a/tests/models/bartpho/test_tokenization_bartpho.py +++ b/tests/models/bartpho/test_tokenization_bartpho.py @@ -40,8 +40,7 @@ class BartphoTokenizerTest(TokenizerTesterMixin, unittest.TestCase): cls.monolingual_vocab_file = os.path.join(cls.tmpdirname, VOCAB_FILES_NAMES["monolingual_vocab_file"]) with open(cls.monolingual_vocab_file, "w", encoding="utf-8") as fp: - for token in vocab_tokens: - fp.write(f"{token} {vocab_tokens[token]}\n") + fp.writelines(f"{token} {vocab_tokens[token]}\n" for token in vocab_tokens) tokenizer = BartphoTokenizer(SAMPLE_VOCAB, cls.monolingual_vocab_file, **cls.special_tokens_map) tokenizer.save_pretrained(cls.tmpdirname) diff --git a/tests/models/bertweet/test_tokenization_bertweet.py b/tests/models/bertweet/test_tokenization_bertweet.py index c6bffb7e2e6..9c4aefecb9c 100644 --- a/tests/models/bertweet/test_tokenization_bertweet.py +++ b/tests/models/bertweet/test_tokenization_bertweet.py @@ -38,8 +38,7 @@ class BertweetTokenizationTest(TokenizerTesterMixin, unittest.TestCase): cls.vocab_file = os.path.join(cls.tmpdirname, VOCAB_FILES_NAMES["vocab_file"]) cls.merges_file = os.path.join(cls.tmpdirname, VOCAB_FILES_NAMES["merges_file"]) with open(cls.vocab_file, "w", encoding="utf-8") as fp: - for token in vocab_tokens: - fp.write(f"{token} {vocab_tokens[token]}\n") + fp.writelines(f"{token} {vocab_tokens[token]}\n" for token in vocab_tokens) with open(cls.merges_file, "w", encoding="utf-8") as fp: fp.write("\n".join(merges)) diff --git a/tests/models/phobert/test_tokenization_phobert.py b/tests/models/phobert/test_tokenization_phobert.py index e459f013533..922a9cac155 100644 --- a/tests/models/phobert/test_tokenization_phobert.py +++ b/tests/models/phobert/test_tokenization_phobert.py @@ -39,8 +39,7 @@ class PhobertTokenizationTest(TokenizerTesterMixin, unittest.TestCase): cls.merges_file = os.path.join(cls.tmpdirname, VOCAB_FILES_NAMES["merges_file"]) with open(cls.vocab_file, "w", encoding="utf-8") as fp: - for token in vocab_tokens: - fp.write(f"{token} {vocab_tokens[token]}\n") + fp.writelines(f"{token} {vocab_tokens[token]}\n" for token in vocab_tokens) with open(cls.merges_file, "w", encoding="utf-8") as fp: fp.write("\n".join(merges)) diff --git a/utils/fetch_hub_objects_for_ci.py b/utils/fetch_hub_objects_for_ci.py index 55e314a3793..2cdc2d8643b 100644 --- a/utils/fetch_hub_objects_for_ci.py +++ b/utils/fetch_hub_objects_for_ci.py @@ -210,8 +210,7 @@ if __name__ == "__main__": response.raise_for_status() with open(filename, "wb") as f: - for chunk in response.iter_content(chunk_size=8192): - f.write(chunk) + f.writelines(response.iter_content(chunk_size=8192)) print(f"Successfully downloaded: {filename}") except requests.exceptions.RequestException as e: print(f"Error downloading {filename}: {e}") diff --git a/utils/modular_integrations.py b/utils/modular_integrations.py index e88c522557e..2c2ecc6c541 100644 --- a/utils/modular_integrations.py +++ b/utils/modular_integrations.py @@ -34,7 +34,7 @@ def convert_relative_import_to_absolute( rel_level = len(import_node.relative) # Strip file extension and split into parts - file_path_no_ext = file_path[:-3] if file_path.endswith(".py") else file_path + file_path_no_ext = file_path.removesuffix(".py") file_parts = file_path_no_ext.split(os.path.sep) # Ensure the file path includes the package name