Enable FURB rules in ruff (#41395)

* Apply ruff FURB rules Signed-off-by: Yuanyuan Chen <cyyever@outlook.com> * Enable ruff FURB rules Signed-off-by: Yuanyuan Chen <cyyever@outlook.com> * More fixes Signed-off-by: Yuanyuan Chen <cyyever@outlook.com> * More fixes Signed-off-by: Yuanyuan Chen <cyyever@outlook.com> * Revert changes Signed-off-by: Yuanyuan Chen <cyyever@outlook.com> * More fixes Signed-off-by: Yuanyuan Chen <cyyever@outlook.com> --------- Signed-off-by: Yuanyuan Chen <cyyever@outlook.com>
2025-10-20 09:03:53 +08:00 · 2025-10-17 23:00:40 +08:00
parent 39b6d3bf7e
commit 12a50f294d
27 changed files with 31 additions and 38 deletions
--- a/examples/legacy/seq2seq/pack_dataset.py
+++ b/examples/legacy/seq2seq/pack_dataset.py
@ -60,8 +60,8 @@ def pack_data_dir(tok, data_dir: Path, max_tokens, save_path):
    save_path.mkdir(exist_ok=True)
    for split in ["train"]:
        src_path, tgt_path = data_dir / f"{split}.source", data_dir / f"{split}.target"
-        src_docs = [x.rstrip() for x in Path(src_path).open().readlines()]
-        tgt_docs = [x.rstrip() for x in Path(tgt_path).open().readlines()]
+        src_docs = [x.rstrip() for x in Path(src_path).open()]
+        tgt_docs = [x.rstrip() for x in Path(tgt_path).open()]
        packed_src, packed_tgt = pack_examples(tok, src_docs, tgt_docs, max_tokens)
        print(f"packed {split} split from {len(src_docs)} examples -> {len(packed_src)}.")
        Path(save_path / f"{split}.source").open("w").write("\n".join(packed_src))
--- a/examples/legacy/seq2seq/rouge_cli.py
+++ b/examples/legacy/seq2seq/rouge_cli.py
@ -19,8 +19,8 @@ from utils import calculate_rouge, save_json

 def calculate_rouge_path(pred_path, tgt_path, save_path=None, **kwargs):
    """Kwargs will be passed to calculate_rouge"""
-    pred_lns = [x.strip() for x in open(pred_path).readlines()]
-    tgt_lns = [x.strip() for x in open(tgt_path).readlines()][: len(pred_lns)]
+    pred_lns = [x.strip() for x in open(pred_path)]
+    tgt_lns = [x.strip() for x in open(tgt_path)][: len(pred_lns)]
    metrics = calculate_rouge(pred_lns, tgt_lns, **kwargs)
    if save_path is not None:
        save_json(metrics, save_path, indent=None)
--- a/examples/legacy/seq2seq/run_distributed_eval.py
+++ b/examples/legacy/seq2seq/run_distributed_eval.py
@ -205,7 +205,7 @@ def run_generate():
            return
        tgt_file = Path(args.data_dir).joinpath(args.type_path + ".target")
        with open(tgt_file) as f:
-            labels = [x.rstrip() for x in f.readlines()][: len(preds)]
+            labels = [x.rstrip() for x in f][: len(preds)]

        # Calculate metrics, save metrics,  and save _generations.txt
        calc_bleu = "translation" in args.task
--- a/examples/legacy/seq2seq/run_eval.py
+++ b/examples/legacy/seq2seq/run_eval.py
@ -130,7 +130,7 @@ def run_generate(verbose=True):
    parsed_args = parse_numeric_n_bool_cl_kwargs(rest)
    if parsed_args and verbose:
        print(f"parsed the following generate kwargs: {parsed_args}")
-    examples = [" " + x.rstrip() if "t5" in args.model_name else x.rstrip() for x in open(args.input_path).readlines()]
+    examples = [" " + x.rstrip() if "t5" in args.model_name else x.rstrip() for x in open(args.input_path)]
    if args.n_obs > 0:
        examples = examples[: args.n_obs]
    Path(args.save_path).parent.mkdir(exist_ok=True)
@ -159,8 +159,8 @@ def run_generate(verbose=True):

    # Compute scores
    score_fn = calculate_bleu if "translation" in args.task else calculate_rouge
-    output_lns = [x.rstrip() for x in open(args.save_path).readlines()]
-    reference_lns = [x.rstrip() for x in open(args.reference_path).readlines()][: len(output_lns)]
+    output_lns = [x.rstrip() for x in open(args.save_path)]
+    reference_lns = [x.rstrip() for x in open(args.reference_path)][: len(output_lns)]
    scores: dict = score_fn(output_lns, reference_lns)
    scores.update(runtime_metrics)

--- a/examples/legacy/seq2seq/utils.py
+++ b/examples/legacy/seq2seq/utils.py
@ -162,7 +162,7 @@ class AbstractSeq2SeqDataset(Dataset):

    @staticmethod
    def get_char_lens(data_file):
-        return [len(x) for x in Path(data_file).open().readlines()]
+        return [len(x) for x in Path(data_file).open()]

    @cached_property
    def tgt_lens(self):
--- a/examples/legacy/token-classification/scripts/preprocess.py
+++ b/examples/legacy/token-classification/scripts/preprocess.py
@ -31,7 +31,7 @@ with open(dataset) as f_p:
            continue

        if (subword_len_counter + current_subwords_len) > max_len:
-            print("")
+            print()
            print(line)
            subword_len_counter = current_subwords_len
            continue
--- a/examples/pytorch/token-classification/run_ner.py
+++ b/examples/pytorch/token-classification/run_ner.py
@ -616,8 +616,7 @@ def main():
        output_predictions_file = os.path.join(training_args.output_dir, "predictions.txt")
        if trainer.is_world_process_zero():
            with open(output_predictions_file, "w") as writer:
-                for prediction in true_predictions:
-                    writer.write(" ".join(prediction) + "\n")
+                writer.writelines(" ".join(prediction) + "\n" for prediction in true_predictions)

    kwargs = {"finetuned_from": model_args.model_name_or_path, "tasks": "token-classification"}
    if data_args.dataset_name is not None:
--- a/pyproject.toml
+++ b/pyproject.toml
@ -32,7 +32,7 @@ line-length = 119
 ignore = ["C901", "E501", "E741", "F402", "F823", "SIM1", "SIM300", "SIM212", "SIM905", "UP009", "UP015", "UP031", "UP028", "UP004", "UP045", "UP007"]
 # RUF013: Checks for the use of implicit Optional
 #  in type annotations when the default parameter value is None.
-select = ["C", "E", "F", "I", "W", "RUF013", "PERF102", "PLC1802", "PLC0208", "SIM", "UP", "PIE794"]
+select = ["C", "E", "F", "I", "W", "RUF013", "PERF102", "PLC1802", "PLC0208", "SIM", "UP", "PIE794", "FURB"]
 extend-safe-fixes = ["UP006"]

 # Ignore import violations in all `__init__.py` files.
--- a/src/transformers/models/bartpho/tokenization_bartpho.py
+++ b/src/transformers/models/bartpho/tokenization_bartpho.py
@ -140,7 +140,7 @@ class BartphoTokenizer(PreTrainedTokenizer):
                self.fairseq_tokens_to_ids[str(token)] = cnt
                cnt += 1
        with open(monolingual_vocab_file, "r", encoding="utf-8") as f:
-            for line in f.readlines():
+            for line in f:
                token = line.strip().split()[0]
                self.fairseq_tokens_to_ids[token] = len(self.fairseq_tokens_to_ids)
        if str(mask_token) not in self.fairseq_tokens_to_ids:
--- a/src/transformers/models/d_fine/modeling_d_fine.py
+++ b/src/transformers/models/d_fine/modeling_d_fine.py
@ -2171,7 +2171,7 @@ class DFineHybridEncoder(nn.Module):
            new_fpn_feature_map = fpn_block(fused_feature_map)
            fpn_feature_maps.append(new_fpn_feature_map)

-        fpn_feature_maps = fpn_feature_maps[::-1]
+        fpn_feature_maps.reverse()

        # bottom-up PAN
        pan_feature_maps = [fpn_feature_maps[0]]
--- a/src/transformers/models/deprecated/jukebox/modeling_jukebox.py
+++ b/src/transformers/models/deprecated/jukebox/modeling_jukebox.py
@ -265,7 +265,7 @@ class JukeboxResnet1D(nn.Module):
            blocks.append(JukeboxResConv1DBlock(config, conv_width, block_depth, res_scale))

        if reverse_dilation:
-            blocks = blocks[::-1]
+            blocks.reverse()
        self.resnet_block = nn.ModuleList(blocks)

    def forward(self, hidden_states):
--- a/src/transformers/models/efficientloftr/modeling_efficientloftr.py
+++ b/src/transformers/models/efficientloftr/modeling_efficientloftr.py
@ -617,7 +617,7 @@ class EfficientLoFTRFineFusionLayer(nn.Module):
    def forward(
        self,
        coarse_features: torch.Tensor,
-        residual_features: list[torch.Tensor],
+        residual_features: list[torch.Tensor] | tuple[torch.Tensor],
    ) -> tuple[torch.Tensor, torch.Tensor]:
        """
        For each image pair, compute the fine features of pixels.
--- a/src/transformers/models/ibert/quant_modules.py
+++ b/src/transformers/models/ibert/quant_modules.py
@ -723,7 +723,7 @@ def batch_frexp(inputs, max_bit=31):
    tmp_m = []
    for m in output_m:
        int_m_shifted = int(
-            decimal.Decimal(m * (2**max_bit)).quantize(decimal.Decimal("1"), rounding=decimal.ROUND_HALF_UP)
+            decimal.Decimal(m * (2**max_bit)).quantize(decimal.Decimal(1), rounding=decimal.ROUND_HALF_UP)
        )
        tmp_m.append(int_m_shifted)
    output_m = np.array(tmp_m)
--- a/src/transformers/models/luke/tokenization_luke.py
+++ b/src/transformers/models/luke/tokenization_luke.py
@ -1033,7 +1033,7 @@ class LukeTokenizer(PreTrainedTokenizer):
            if head_token_span[0] < tail_token_span[0]:
                first_entity_token_spans[0] = (head_token_span[0], head_token_span[1] + 2)
                first_entity_token_spans[1] = (tail_token_span[0] + 2, tail_token_span[1] + 4)
-                token_span_with_special_token_ids = reversed(token_span_with_special_token_ids)
+                token_span_with_special_token_ids.reverse()
            else:
                first_entity_token_spans[0] = (head_token_span[0] + 2, head_token_span[1] + 4)
                first_entity_token_spans[1] = (tail_token_span[0], tail_token_span[1] + 2)
--- a/src/transformers/models/mluke/tokenization_mluke.py
+++ b/src/transformers/models/mluke/tokenization_mluke.py
@ -868,7 +868,7 @@ class MLukeTokenizer(PreTrainedTokenizer):
            if head_token_span[0] < tail_token_span[0]:
                first_entity_token_spans[0] = (head_token_span[0], head_token_span[1] + 2)
                first_entity_token_spans[1] = (tail_token_span[0] + 2, tail_token_span[1] + 4)
-                token_span_with_special_token_ids = reversed(token_span_with_special_token_ids)
+                token_span_with_special_token_ids.reverse()
            else:
                first_entity_token_spans[0] = (head_token_span[0] + 2, head_token_span[1] + 4)
                first_entity_token_spans[1] = (tail_token_span[0], tail_token_span[1] + 2)
--- a/src/transformers/models/rt_detr/modeling_rt_detr.py
+++ b/src/transformers/models/rt_detr/modeling_rt_detr.py
@ -1262,7 +1262,7 @@ class RTDetrHybridEncoder(nn.Module):
            new_fpn_feature_map = fpn_block(fused_feature_map)
            fpn_feature_maps.append(new_fpn_feature_map)

-        fpn_feature_maps = fpn_feature_maps[::-1]
+        fpn_feature_maps.reverse()

        # bottom-up PAN
        pan_feature_maps = [fpn_feature_maps[0]]
--- a/src/transformers/models/rt_detr_v2/modeling_rt_detr_v2.py
+++ b/src/transformers/models/rt_detr_v2/modeling_rt_detr_v2.py
@ -1218,7 +1218,7 @@ class RTDetrV2HybridEncoder(nn.Module):
            new_fpn_feature_map = fpn_block(fused_feature_map)
            fpn_feature_maps.append(new_fpn_feature_map)

-        fpn_feature_maps = fpn_feature_maps[::-1]
+        fpn_feature_maps.reverse()

        # bottom-up PAN
        pan_feature_maps = [fpn_feature_maps[0]]
--- a/src/transformers/models/swin2sr/modeling_swin2sr.py
+++ b/src/transformers/models/swin2sr/modeling_swin2sr.py
@ -457,7 +457,7 @@ class Swin2SRLayer(nn.Module):
        self.layernorm_after = nn.LayerNorm(dim, eps=config.layer_norm_eps)

    def _compute_window_shift(self, target_window_size, target_shift_size) -> tuple[tuple[int, int], tuple[int, int]]:
-        window_size = [r if r <= w else w for r, w in zip(self.input_resolution, target_window_size)]
+        window_size = [min(r, w) for r, w in zip(self.input_resolution, target_window_size)]
        shift_size = [0 if r <= w else s for r, w, s in zip(self.input_resolution, window_size, target_shift_size)]
        return window_size, shift_size

--- a/src/transformers/models/swinv2/modeling_swinv2.py
+++ b/src/transformers/models/swinv2/modeling_swinv2.py
@ -626,7 +626,7 @@ class Swinv2Layer(nn.Module):
        self.layernorm_after = nn.LayerNorm(dim, eps=config.layer_norm_eps)

    def _compute_window_shift(self, target_window_size, target_shift_size) -> tuple[tuple[int, int], tuple[int, int]]:
-        window_size = [r if r <= w else w for r, w in zip(self.input_resolution, target_window_size)]
+        window_size = [min(r, w) for r, w in zip(self.input_resolution, target_window_size)]
        shift_size = [0 if r <= w else s for r, w, s in zip(self.input_resolution, window_size, target_shift_size)]
        return window_size, shift_size

--- a/src/transformers/models/vits/convert_original_checkpoint.py
+++ b/src/transformers/models/vits/convert_original_checkpoint.py
@ -329,7 +329,7 @@ def convert_checkpoint(
        phonemize = True
    else:
        # Save vocab as temporary json file
-        symbols = [line.replace("\n", "") for line in open(vocab_path, encoding="utf-8").readlines()]
+        symbols = [line.replace("\n", "") for line in open(vocab_path, encoding="utf-8")]
        symbol_to_id = {s: i for i, s in enumerate(symbols)}
        # MMS-TTS does not use a <pad> token, so we set to the token used to space characters
        _pad = symbols[0]
--- a/src/transformers/models/vivit/convert_vivit_flax_to_pytorch.py
+++ b/src/transformers/models/vivit/convert_vivit_flax_to_pytorch.py
@ -36,8 +36,7 @@ def download_checkpoint(path):

    with open(path, "wb") as f:
        with requests.get(url, stream=True) as req:
-            for chunk in req.iter_content(chunk_size=2048):
-                f.write(chunk)
+            f.writelines(req.iter_content(chunk_size=2048))


 def get_vivit_config() -> VivitConfig:
--- a/src/transformers/models/whisper/convert_openai_to_hf.py
+++ b/src/transformers/models/whisper/convert_openai_to_hf.py
@ -317,8 +317,7 @@ def convert_tiktoken_to_hf(

        with open(merge_file, "w", encoding="utf-8") as writer:
            writer.write("#version: 0.2\n")
-            for bpe_tokens in merges:
-                writer.write(bpe_tokens + "\n")
+            writer.writelines(bpe_tokens + "\n" for bpe_tokens in merges)

        hf_tokenizer = WhisperTokenizer(vocab_file, merge_file)

--- a/tests/models/bartpho/test_tokenization_bartpho.py
+++ b/tests/models/bartpho/test_tokenization_bartpho.py
@ -40,8 +40,7 @@ class BartphoTokenizerTest(TokenizerTesterMixin, unittest.TestCase):

        cls.monolingual_vocab_file = os.path.join(cls.tmpdirname, VOCAB_FILES_NAMES["monolingual_vocab_file"])
        with open(cls.monolingual_vocab_file, "w", encoding="utf-8") as fp:
-            for token in vocab_tokens:
-                fp.write(f"{token} {vocab_tokens[token]}\n")
+            fp.writelines(f"{token} {vocab_tokens[token]}\n" for token in vocab_tokens)

        tokenizer = BartphoTokenizer(SAMPLE_VOCAB, cls.monolingual_vocab_file, **cls.special_tokens_map)
        tokenizer.save_pretrained(cls.tmpdirname)
--- a/tests/models/bertweet/test_tokenization_bertweet.py
+++ b/tests/models/bertweet/test_tokenization_bertweet.py
@ -38,8 +38,7 @@ class BertweetTokenizationTest(TokenizerTesterMixin, unittest.TestCase):
        cls.vocab_file = os.path.join(cls.tmpdirname, VOCAB_FILES_NAMES["vocab_file"])
        cls.merges_file = os.path.join(cls.tmpdirname, VOCAB_FILES_NAMES["merges_file"])
        with open(cls.vocab_file, "w", encoding="utf-8") as fp:
-            for token in vocab_tokens:
-                fp.write(f"{token} {vocab_tokens[token]}\n")
+            fp.writelines(f"{token} {vocab_tokens[token]}\n" for token in vocab_tokens)
        with open(cls.merges_file, "w", encoding="utf-8") as fp:
            fp.write("\n".join(merges))

--- a/tests/models/phobert/test_tokenization_phobert.py
+++ b/tests/models/phobert/test_tokenization_phobert.py
@ -39,8 +39,7 @@ class PhobertTokenizationTest(TokenizerTesterMixin, unittest.TestCase):
        cls.merges_file = os.path.join(cls.tmpdirname, VOCAB_FILES_NAMES["merges_file"])

        with open(cls.vocab_file, "w", encoding="utf-8") as fp:
-            for token in vocab_tokens:
-                fp.write(f"{token} {vocab_tokens[token]}\n")
+            fp.writelines(f"{token} {vocab_tokens[token]}\n" for token in vocab_tokens)
        with open(cls.merges_file, "w", encoding="utf-8") as fp:
            fp.write("\n".join(merges))

--- a/utils/fetch_hub_objects_for_ci.py
+++ b/utils/fetch_hub_objects_for_ci.py
@ -210,8 +210,7 @@ if __name__ == "__main__":
            response.raise_for_status()

            with open(filename, "wb") as f:
-                for chunk in response.iter_content(chunk_size=8192):
-                    f.write(chunk)
+                f.writelines(response.iter_content(chunk_size=8192))
            print(f"Successfully downloaded: {filename}")
        except requests.exceptions.RequestException as e:
            print(f"Error downloading {filename}: {e}")
--- a/utils/modular_integrations.py
+++ b/utils/modular_integrations.py
@ -34,7 +34,7 @@ def convert_relative_import_to_absolute(
    rel_level = len(import_node.relative)

    # Strip file extension and split into parts
-    file_path_no_ext = file_path[:-3] if file_path.endswith(".py") else file_path
+    file_path_no_ext = file_path.removesuffix(".py")
    file_parts = file_path_no_ext.split(os.path.sep)

    # Ensure the file path includes the package name