mirror of
https://github.com/hiyouga/LLaMA-Factory.git
synced 2025-10-20 12:54:18 +08:00
[misc] upgrade format to py39 (#7256)
This commit is contained in:
@ -10,7 +10,7 @@ _DESCRIPTION = "BELLE multiturn chat dataset."
|
||||
|
||||
_CITATION = """\
|
||||
@article{belle2023exploring,
|
||||
title={Exploring the Impact of Instruction Data Scaling on Large Language Models: An Empirical Study on Real-World Use Cases},
|
||||
title={Exploring the Impact of Instruction Data Scaling on Large Language Models},
|
||||
author={Yunjie Ji, Yong Deng, Yan Gong, Yiping Peng, Qiang Niu, Lei Zhang, Baochang Ma, Xiangang Li},
|
||||
journal={arXiv preprint arXiv:2303.14742},
|
||||
year={2023}
|
||||
|
@ -1,6 +1,5 @@
|
||||
import json
|
||||
import os
|
||||
from typing import List
|
||||
|
||||
import datasets
|
||||
|
||||
@ -50,7 +49,7 @@ class HhRlhfEn(datasets.GeneratorBasedBuilder):
|
||||
datasets.SplitGenerator(name=datasets.Split.TEST, gen_kwargs={"filepaths": file_path["test"]}),
|
||||
]
|
||||
|
||||
def _generate_examples(self, filepaths: List[str]):
|
||||
def _generate_examples(self, filepaths: list[str]):
|
||||
key = 0
|
||||
for filepath in filepaths:
|
||||
with open(filepath, encoding="utf-8") as f:
|
||||
|
@ -1,6 +1,5 @@
|
||||
import json
|
||||
import os
|
||||
from typing import List
|
||||
|
||||
import datasets
|
||||
|
||||
@ -11,7 +10,7 @@ _DESCRIPTION = "UltraChat: Large-scale, Informative, and Diverse Multi-round Dia
|
||||
|
||||
_CITATION = """\
|
||||
@misc{UltraChat,
|
||||
author = {Ding, Ning and Chen, Yulin and Xu, Bokai and Hu, Shengding and Qin, Yujia and Liu, Zhiyuan and Sun, Maosong and Zhou, Bowen},
|
||||
author = {Ding, Ning and Chen, Yulin and Xu, Bokai and Hu, Shengding and others},
|
||||
title = {UltraChat: A Large-scale Auto-generated Multi-round Dialogue Data},
|
||||
year = {2023},
|
||||
publisher = {GitHub},
|
||||
@ -40,7 +39,7 @@ class UltraChat(datasets.GeneratorBasedBuilder):
|
||||
file_paths = [dl_manager.download(_BASE_DATA_URL.format(idx=idx)) for idx in range(10)] # multiple shards
|
||||
return [datasets.SplitGenerator(name=datasets.Split.TRAIN, gen_kwargs={"filepaths": file_paths})]
|
||||
|
||||
def _generate_examples(self, filepaths: List[str]):
|
||||
def _generate_examples(self, filepaths: list[str]):
|
||||
for filepath in filepaths:
|
||||
with open(filepath, encoding="utf-8") as f:
|
||||
for row in f:
|
||||
@ -49,7 +48,7 @@ class UltraChat(datasets.GeneratorBasedBuilder):
|
||||
except Exception:
|
||||
continue
|
||||
key: int = data["id"]
|
||||
content: List[str] = data["data"]
|
||||
content: list[str] = data["data"]
|
||||
if len(content) % 2 == 1:
|
||||
content.pop(-1)
|
||||
if len(content) < 2:
|
||||
|
Reference in New Issue
Block a user