Enable FURB rules in ruff (#41395)

* Apply ruff FURB rules

Signed-off-by: Yuanyuan Chen <cyyever@outlook.com>

* Enable ruff FURB rules

Signed-off-by: Yuanyuan Chen <cyyever@outlook.com>

* More fixes

Signed-off-by: Yuanyuan Chen <cyyever@outlook.com>

* More fixes

Signed-off-by: Yuanyuan Chen <cyyever@outlook.com>

* Revert changes

Signed-off-by: Yuanyuan Chen <cyyever@outlook.com>

* More fixes

Signed-off-by: Yuanyuan Chen <cyyever@outlook.com>

---------

Signed-off-by: Yuanyuan Chen <cyyever@outlook.com>
This commit is contained in:
Yuanyuan Chen
2025-10-17 23:00:40 +08:00
committed by GitHub
parent 39b6d3bf7e
commit 12a50f294d
27 changed files with 31 additions and 38 deletions

View File

@ -60,8 +60,8 @@ def pack_data_dir(tok, data_dir: Path, max_tokens, save_path):
save_path.mkdir(exist_ok=True) save_path.mkdir(exist_ok=True)
for split in ["train"]: for split in ["train"]:
src_path, tgt_path = data_dir / f"{split}.source", data_dir / f"{split}.target" src_path, tgt_path = data_dir / f"{split}.source", data_dir / f"{split}.target"
src_docs = [x.rstrip() for x in Path(src_path).open().readlines()] src_docs = [x.rstrip() for x in Path(src_path).open()]
tgt_docs = [x.rstrip() for x in Path(tgt_path).open().readlines()] tgt_docs = [x.rstrip() for x in Path(tgt_path).open()]
packed_src, packed_tgt = pack_examples(tok, src_docs, tgt_docs, max_tokens) packed_src, packed_tgt = pack_examples(tok, src_docs, tgt_docs, max_tokens)
print(f"packed {split} split from {len(src_docs)} examples -> {len(packed_src)}.") print(f"packed {split} split from {len(src_docs)} examples -> {len(packed_src)}.")
Path(save_path / f"{split}.source").open("w").write("\n".join(packed_src)) Path(save_path / f"{split}.source").open("w").write("\n".join(packed_src))

View File

@ -19,8 +19,8 @@ from utils import calculate_rouge, save_json
def calculate_rouge_path(pred_path, tgt_path, save_path=None, **kwargs): def calculate_rouge_path(pred_path, tgt_path, save_path=None, **kwargs):
"""Kwargs will be passed to calculate_rouge""" """Kwargs will be passed to calculate_rouge"""
pred_lns = [x.strip() for x in open(pred_path).readlines()] pred_lns = [x.strip() for x in open(pred_path)]
tgt_lns = [x.strip() for x in open(tgt_path).readlines()][: len(pred_lns)] tgt_lns = [x.strip() for x in open(tgt_path)][: len(pred_lns)]
metrics = calculate_rouge(pred_lns, tgt_lns, **kwargs) metrics = calculate_rouge(pred_lns, tgt_lns, **kwargs)
if save_path is not None: if save_path is not None:
save_json(metrics, save_path, indent=None) save_json(metrics, save_path, indent=None)

View File

@ -205,7 +205,7 @@ def run_generate():
return return
tgt_file = Path(args.data_dir).joinpath(args.type_path + ".target") tgt_file = Path(args.data_dir).joinpath(args.type_path + ".target")
with open(tgt_file) as f: with open(tgt_file) as f:
labels = [x.rstrip() for x in f.readlines()][: len(preds)] labels = [x.rstrip() for x in f][: len(preds)]
# Calculate metrics, save metrics, and save _generations.txt # Calculate metrics, save metrics, and save _generations.txt
calc_bleu = "translation" in args.task calc_bleu = "translation" in args.task

View File

@ -130,7 +130,7 @@ def run_generate(verbose=True):
parsed_args = parse_numeric_n_bool_cl_kwargs(rest) parsed_args = parse_numeric_n_bool_cl_kwargs(rest)
if parsed_args and verbose: if parsed_args and verbose:
print(f"parsed the following generate kwargs: {parsed_args}") print(f"parsed the following generate kwargs: {parsed_args}")
examples = [" " + x.rstrip() if "t5" in args.model_name else x.rstrip() for x in open(args.input_path).readlines()] examples = [" " + x.rstrip() if "t5" in args.model_name else x.rstrip() for x in open(args.input_path)]
if args.n_obs > 0: if args.n_obs > 0:
examples = examples[: args.n_obs] examples = examples[: args.n_obs]
Path(args.save_path).parent.mkdir(exist_ok=True) Path(args.save_path).parent.mkdir(exist_ok=True)
@ -159,8 +159,8 @@ def run_generate(verbose=True):
# Compute scores # Compute scores
score_fn = calculate_bleu if "translation" in args.task else calculate_rouge score_fn = calculate_bleu if "translation" in args.task else calculate_rouge
output_lns = [x.rstrip() for x in open(args.save_path).readlines()] output_lns = [x.rstrip() for x in open(args.save_path)]
reference_lns = [x.rstrip() for x in open(args.reference_path).readlines()][: len(output_lns)] reference_lns = [x.rstrip() for x in open(args.reference_path)][: len(output_lns)]
scores: dict = score_fn(output_lns, reference_lns) scores: dict = score_fn(output_lns, reference_lns)
scores.update(runtime_metrics) scores.update(runtime_metrics)

View File

@ -162,7 +162,7 @@ class AbstractSeq2SeqDataset(Dataset):
@staticmethod @staticmethod
def get_char_lens(data_file): def get_char_lens(data_file):
return [len(x) for x in Path(data_file).open().readlines()] return [len(x) for x in Path(data_file).open()]
@cached_property @cached_property
def tgt_lens(self): def tgt_lens(self):

View File

@ -31,7 +31,7 @@ with open(dataset) as f_p:
continue continue
if (subword_len_counter + current_subwords_len) > max_len: if (subword_len_counter + current_subwords_len) > max_len:
print("") print()
print(line) print(line)
subword_len_counter = current_subwords_len subword_len_counter = current_subwords_len
continue continue

View File

@ -616,8 +616,7 @@ def main():
output_predictions_file = os.path.join(training_args.output_dir, "predictions.txt") output_predictions_file = os.path.join(training_args.output_dir, "predictions.txt")
if trainer.is_world_process_zero(): if trainer.is_world_process_zero():
with open(output_predictions_file, "w") as writer: with open(output_predictions_file, "w") as writer:
for prediction in true_predictions: writer.writelines(" ".join(prediction) + "\n" for prediction in true_predictions)
writer.write(" ".join(prediction) + "\n")
kwargs = {"finetuned_from": model_args.model_name_or_path, "tasks": "token-classification"} kwargs = {"finetuned_from": model_args.model_name_or_path, "tasks": "token-classification"}
if data_args.dataset_name is not None: if data_args.dataset_name is not None:

View File

@ -32,7 +32,7 @@ line-length = 119
ignore = ["C901", "E501", "E741", "F402", "F823", "SIM1", "SIM300", "SIM212", "SIM905", "UP009", "UP015", "UP031", "UP028", "UP004", "UP045", "UP007"] ignore = ["C901", "E501", "E741", "F402", "F823", "SIM1", "SIM300", "SIM212", "SIM905", "UP009", "UP015", "UP031", "UP028", "UP004", "UP045", "UP007"]
# RUF013: Checks for the use of implicit Optional # RUF013: Checks for the use of implicit Optional
# in type annotations when the default parameter value is None. # in type annotations when the default parameter value is None.
select = ["C", "E", "F", "I", "W", "RUF013", "PERF102", "PLC1802", "PLC0208", "SIM", "UP", "PIE794"] select = ["C", "E", "F", "I", "W", "RUF013", "PERF102", "PLC1802", "PLC0208", "SIM", "UP", "PIE794", "FURB"]
extend-safe-fixes = ["UP006"] extend-safe-fixes = ["UP006"]
# Ignore import violations in all `__init__.py` files. # Ignore import violations in all `__init__.py` files.

View File

@ -140,7 +140,7 @@ class BartphoTokenizer(PreTrainedTokenizer):
self.fairseq_tokens_to_ids[str(token)] = cnt self.fairseq_tokens_to_ids[str(token)] = cnt
cnt += 1 cnt += 1
with open(monolingual_vocab_file, "r", encoding="utf-8") as f: with open(monolingual_vocab_file, "r", encoding="utf-8") as f:
for line in f.readlines(): for line in f:
token = line.strip().split()[0] token = line.strip().split()[0]
self.fairseq_tokens_to_ids[token] = len(self.fairseq_tokens_to_ids) self.fairseq_tokens_to_ids[token] = len(self.fairseq_tokens_to_ids)
if str(mask_token) not in self.fairseq_tokens_to_ids: if str(mask_token) not in self.fairseq_tokens_to_ids:

View File

@ -2171,7 +2171,7 @@ class DFineHybridEncoder(nn.Module):
new_fpn_feature_map = fpn_block(fused_feature_map) new_fpn_feature_map = fpn_block(fused_feature_map)
fpn_feature_maps.append(new_fpn_feature_map) fpn_feature_maps.append(new_fpn_feature_map)
fpn_feature_maps = fpn_feature_maps[::-1] fpn_feature_maps.reverse()
# bottom-up PAN # bottom-up PAN
pan_feature_maps = [fpn_feature_maps[0]] pan_feature_maps = [fpn_feature_maps[0]]

View File

@ -265,7 +265,7 @@ class JukeboxResnet1D(nn.Module):
blocks.append(JukeboxResConv1DBlock(config, conv_width, block_depth, res_scale)) blocks.append(JukeboxResConv1DBlock(config, conv_width, block_depth, res_scale))
if reverse_dilation: if reverse_dilation:
blocks = blocks[::-1] blocks.reverse()
self.resnet_block = nn.ModuleList(blocks) self.resnet_block = nn.ModuleList(blocks)
def forward(self, hidden_states): def forward(self, hidden_states):

View File

@ -617,7 +617,7 @@ class EfficientLoFTRFineFusionLayer(nn.Module):
def forward( def forward(
self, self,
coarse_features: torch.Tensor, coarse_features: torch.Tensor,
residual_features: list[torch.Tensor], residual_features: list[torch.Tensor] | tuple[torch.Tensor],
) -> tuple[torch.Tensor, torch.Tensor]: ) -> tuple[torch.Tensor, torch.Tensor]:
""" """
For each image pair, compute the fine features of pixels. For each image pair, compute the fine features of pixels.

View File

@ -723,7 +723,7 @@ def batch_frexp(inputs, max_bit=31):
tmp_m = [] tmp_m = []
for m in output_m: for m in output_m:
int_m_shifted = int( int_m_shifted = int(
decimal.Decimal(m * (2**max_bit)).quantize(decimal.Decimal("1"), rounding=decimal.ROUND_HALF_UP) decimal.Decimal(m * (2**max_bit)).quantize(decimal.Decimal(1), rounding=decimal.ROUND_HALF_UP)
) )
tmp_m.append(int_m_shifted) tmp_m.append(int_m_shifted)
output_m = np.array(tmp_m) output_m = np.array(tmp_m)

View File

@ -1033,7 +1033,7 @@ class LukeTokenizer(PreTrainedTokenizer):
if head_token_span[0] < tail_token_span[0]: if head_token_span[0] < tail_token_span[0]:
first_entity_token_spans[0] = (head_token_span[0], head_token_span[1] + 2) first_entity_token_spans[0] = (head_token_span[0], head_token_span[1] + 2)
first_entity_token_spans[1] = (tail_token_span[0] + 2, tail_token_span[1] + 4) first_entity_token_spans[1] = (tail_token_span[0] + 2, tail_token_span[1] + 4)
token_span_with_special_token_ids = reversed(token_span_with_special_token_ids) token_span_with_special_token_ids.reverse()
else: else:
first_entity_token_spans[0] = (head_token_span[0] + 2, head_token_span[1] + 4) first_entity_token_spans[0] = (head_token_span[0] + 2, head_token_span[1] + 4)
first_entity_token_spans[1] = (tail_token_span[0], tail_token_span[1] + 2) first_entity_token_spans[1] = (tail_token_span[0], tail_token_span[1] + 2)

View File

@ -868,7 +868,7 @@ class MLukeTokenizer(PreTrainedTokenizer):
if head_token_span[0] < tail_token_span[0]: if head_token_span[0] < tail_token_span[0]:
first_entity_token_spans[0] = (head_token_span[0], head_token_span[1] + 2) first_entity_token_spans[0] = (head_token_span[0], head_token_span[1] + 2)
first_entity_token_spans[1] = (tail_token_span[0] + 2, tail_token_span[1] + 4) first_entity_token_spans[1] = (tail_token_span[0] + 2, tail_token_span[1] + 4)
token_span_with_special_token_ids = reversed(token_span_with_special_token_ids) token_span_with_special_token_ids.reverse()
else: else:
first_entity_token_spans[0] = (head_token_span[0] + 2, head_token_span[1] + 4) first_entity_token_spans[0] = (head_token_span[0] + 2, head_token_span[1] + 4)
first_entity_token_spans[1] = (tail_token_span[0], tail_token_span[1] + 2) first_entity_token_spans[1] = (tail_token_span[0], tail_token_span[1] + 2)

View File

@ -1262,7 +1262,7 @@ class RTDetrHybridEncoder(nn.Module):
new_fpn_feature_map = fpn_block(fused_feature_map) new_fpn_feature_map = fpn_block(fused_feature_map)
fpn_feature_maps.append(new_fpn_feature_map) fpn_feature_maps.append(new_fpn_feature_map)
fpn_feature_maps = fpn_feature_maps[::-1] fpn_feature_maps.reverse()
# bottom-up PAN # bottom-up PAN
pan_feature_maps = [fpn_feature_maps[0]] pan_feature_maps = [fpn_feature_maps[0]]

View File

@ -1218,7 +1218,7 @@ class RTDetrV2HybridEncoder(nn.Module):
new_fpn_feature_map = fpn_block(fused_feature_map) new_fpn_feature_map = fpn_block(fused_feature_map)
fpn_feature_maps.append(new_fpn_feature_map) fpn_feature_maps.append(new_fpn_feature_map)
fpn_feature_maps = fpn_feature_maps[::-1] fpn_feature_maps.reverse()
# bottom-up PAN # bottom-up PAN
pan_feature_maps = [fpn_feature_maps[0]] pan_feature_maps = [fpn_feature_maps[0]]

View File

@ -457,7 +457,7 @@ class Swin2SRLayer(nn.Module):
self.layernorm_after = nn.LayerNorm(dim, eps=config.layer_norm_eps) self.layernorm_after = nn.LayerNorm(dim, eps=config.layer_norm_eps)
def _compute_window_shift(self, target_window_size, target_shift_size) -> tuple[tuple[int, int], tuple[int, int]]: def _compute_window_shift(self, target_window_size, target_shift_size) -> tuple[tuple[int, int], tuple[int, int]]:
window_size = [r if r <= w else w for r, w in zip(self.input_resolution, target_window_size)] window_size = [min(r, w) for r, w in zip(self.input_resolution, target_window_size)]
shift_size = [0 if r <= w else s for r, w, s in zip(self.input_resolution, window_size, target_shift_size)] shift_size = [0 if r <= w else s for r, w, s in zip(self.input_resolution, window_size, target_shift_size)]
return window_size, shift_size return window_size, shift_size

View File

@ -626,7 +626,7 @@ class Swinv2Layer(nn.Module):
self.layernorm_after = nn.LayerNorm(dim, eps=config.layer_norm_eps) self.layernorm_after = nn.LayerNorm(dim, eps=config.layer_norm_eps)
def _compute_window_shift(self, target_window_size, target_shift_size) -> tuple[tuple[int, int], tuple[int, int]]: def _compute_window_shift(self, target_window_size, target_shift_size) -> tuple[tuple[int, int], tuple[int, int]]:
window_size = [r if r <= w else w for r, w in zip(self.input_resolution, target_window_size)] window_size = [min(r, w) for r, w in zip(self.input_resolution, target_window_size)]
shift_size = [0 if r <= w else s for r, w, s in zip(self.input_resolution, window_size, target_shift_size)] shift_size = [0 if r <= w else s for r, w, s in zip(self.input_resolution, window_size, target_shift_size)]
return window_size, shift_size return window_size, shift_size

View File

@ -329,7 +329,7 @@ def convert_checkpoint(
phonemize = True phonemize = True
else: else:
# Save vocab as temporary json file # Save vocab as temporary json file
symbols = [line.replace("\n", "") for line in open(vocab_path, encoding="utf-8").readlines()] symbols = [line.replace("\n", "") for line in open(vocab_path, encoding="utf-8")]
symbol_to_id = {s: i for i, s in enumerate(symbols)} symbol_to_id = {s: i for i, s in enumerate(symbols)}
# MMS-TTS does not use a <pad> token, so we set to the token used to space characters # MMS-TTS does not use a <pad> token, so we set to the token used to space characters
_pad = symbols[0] _pad = symbols[0]

View File

@ -36,8 +36,7 @@ def download_checkpoint(path):
with open(path, "wb") as f: with open(path, "wb") as f:
with requests.get(url, stream=True) as req: with requests.get(url, stream=True) as req:
for chunk in req.iter_content(chunk_size=2048): f.writelines(req.iter_content(chunk_size=2048))
f.write(chunk)
def get_vivit_config() -> VivitConfig: def get_vivit_config() -> VivitConfig:

View File

@ -317,8 +317,7 @@ def convert_tiktoken_to_hf(
with open(merge_file, "w", encoding="utf-8") as writer: with open(merge_file, "w", encoding="utf-8") as writer:
writer.write("#version: 0.2\n") writer.write("#version: 0.2\n")
for bpe_tokens in merges: writer.writelines(bpe_tokens + "\n" for bpe_tokens in merges)
writer.write(bpe_tokens + "\n")
hf_tokenizer = WhisperTokenizer(vocab_file, merge_file) hf_tokenizer = WhisperTokenizer(vocab_file, merge_file)

View File

@ -40,8 +40,7 @@ class BartphoTokenizerTest(TokenizerTesterMixin, unittest.TestCase):
cls.monolingual_vocab_file = os.path.join(cls.tmpdirname, VOCAB_FILES_NAMES["monolingual_vocab_file"]) cls.monolingual_vocab_file = os.path.join(cls.tmpdirname, VOCAB_FILES_NAMES["monolingual_vocab_file"])
with open(cls.monolingual_vocab_file, "w", encoding="utf-8") as fp: with open(cls.monolingual_vocab_file, "w", encoding="utf-8") as fp:
for token in vocab_tokens: fp.writelines(f"{token} {vocab_tokens[token]}\n" for token in vocab_tokens)
fp.write(f"{token} {vocab_tokens[token]}\n")
tokenizer = BartphoTokenizer(SAMPLE_VOCAB, cls.monolingual_vocab_file, **cls.special_tokens_map) tokenizer = BartphoTokenizer(SAMPLE_VOCAB, cls.monolingual_vocab_file, **cls.special_tokens_map)
tokenizer.save_pretrained(cls.tmpdirname) tokenizer.save_pretrained(cls.tmpdirname)

View File

@ -38,8 +38,7 @@ class BertweetTokenizationTest(TokenizerTesterMixin, unittest.TestCase):
cls.vocab_file = os.path.join(cls.tmpdirname, VOCAB_FILES_NAMES["vocab_file"]) cls.vocab_file = os.path.join(cls.tmpdirname, VOCAB_FILES_NAMES["vocab_file"])
cls.merges_file = os.path.join(cls.tmpdirname, VOCAB_FILES_NAMES["merges_file"]) cls.merges_file = os.path.join(cls.tmpdirname, VOCAB_FILES_NAMES["merges_file"])
with open(cls.vocab_file, "w", encoding="utf-8") as fp: with open(cls.vocab_file, "w", encoding="utf-8") as fp:
for token in vocab_tokens: fp.writelines(f"{token} {vocab_tokens[token]}\n" for token in vocab_tokens)
fp.write(f"{token} {vocab_tokens[token]}\n")
with open(cls.merges_file, "w", encoding="utf-8") as fp: with open(cls.merges_file, "w", encoding="utf-8") as fp:
fp.write("\n".join(merges)) fp.write("\n".join(merges))

View File

@ -39,8 +39,7 @@ class PhobertTokenizationTest(TokenizerTesterMixin, unittest.TestCase):
cls.merges_file = os.path.join(cls.tmpdirname, VOCAB_FILES_NAMES["merges_file"]) cls.merges_file = os.path.join(cls.tmpdirname, VOCAB_FILES_NAMES["merges_file"])
with open(cls.vocab_file, "w", encoding="utf-8") as fp: with open(cls.vocab_file, "w", encoding="utf-8") as fp:
for token in vocab_tokens: fp.writelines(f"{token} {vocab_tokens[token]}\n" for token in vocab_tokens)
fp.write(f"{token} {vocab_tokens[token]}\n")
with open(cls.merges_file, "w", encoding="utf-8") as fp: with open(cls.merges_file, "w", encoding="utf-8") as fp:
fp.write("\n".join(merges)) fp.write("\n".join(merges))

View File

@ -210,8 +210,7 @@ if __name__ == "__main__":
response.raise_for_status() response.raise_for_status()
with open(filename, "wb") as f: with open(filename, "wb") as f:
for chunk in response.iter_content(chunk_size=8192): f.writelines(response.iter_content(chunk_size=8192))
f.write(chunk)
print(f"Successfully downloaded: {filename}") print(f"Successfully downloaded: {filename}")
except requests.exceptions.RequestException as e: except requests.exceptions.RequestException as e:
print(f"Error downloading {filename}: {e}") print(f"Error downloading {filename}: {e}")

View File

@ -34,7 +34,7 @@ def convert_relative_import_to_absolute(
rel_level = len(import_node.relative) rel_level = len(import_node.relative)
# Strip file extension and split into parts # Strip file extension and split into parts
file_path_no_ext = file_path[:-3] if file_path.endswith(".py") else file_path file_path_no_ext = file_path.removesuffix(".py")
file_parts = file_path_no_ext.split(os.path.sep) file_parts = file_path_no_ext.split(os.path.sep)
# Ensure the file path includes the package name # Ensure the file path includes the package name