mirror of
https://github.com/huggingface/trl.git
synced 2025-10-20 18:43:52 +08:00
Deprecate unused dataset_formatting module (#4242)
Co-authored-by: behroozazarkhalili <ermiaazarkhalili> Co-authored-by: Quentin Gallouédec <gallouedec.quentin@gmail.com>
This commit is contained in:
committed by
GitHub
parent
bcd059a384
commit
039d526d24
@ -14,6 +14,7 @@
|
||||
|
||||
from typing import Callable
|
||||
|
||||
import pytest
|
||||
from datasets import Dataset, load_dataset
|
||||
from transformers import AutoModelForCausalLM, AutoModelForSequenceClassification, AutoTokenizer
|
||||
|
||||
@ -23,6 +24,7 @@ from trl.models.utils import ChatMlSpecialTokens, clone_chat_template, setup_cha
|
||||
from .testing_utils import TrlTestCase
|
||||
|
||||
|
||||
@pytest.mark.filterwarnings("ignore::FutureWarning")
|
||||
class TestDatasetFormatting(TrlTestCase):
|
||||
def setup_method(self):
|
||||
self.llama_tokenizer = AutoTokenizer.from_pretrained("trl-internal-testing/tiny-MistralForCausalLM-0.1")
|
||||
|
@ -13,6 +13,7 @@
|
||||
# limitations under the License.
|
||||
|
||||
import logging
|
||||
import warnings
|
||||
from typing import Callable, Literal, Optional
|
||||
|
||||
import datasets
|
||||
@ -41,7 +42,17 @@ def conversations_formatting_function(
|
||||
r"""
|
||||
return a callable function that takes in a "messages" dataset and returns a formatted dataset, based on the
|
||||
tokenizer apply chat template to the dataset along with the schema of the list of functions in the tools list.
|
||||
|
||||
.. deprecated:: 0.24.0
|
||||
`conversations_formatting_function` is deprecated and will be removed in version 0.27.
|
||||
Please use `tokenizer.apply_chat_template()` directly instead.
|
||||
"""
|
||||
warnings.warn(
|
||||
"`conversations_formatting_function` is deprecated and will be removed in TRL 0.27. "
|
||||
"Please use `tokenizer.apply_chat_template()` directly instead.",
|
||||
DeprecationWarning,
|
||||
stacklevel=2,
|
||||
)
|
||||
|
||||
def format_dataset(examples):
|
||||
if isinstance(examples[messages_field][0], list):
|
||||
@ -61,7 +72,17 @@ def instructions_formatting_function(tokenizer: AutoTokenizer):
|
||||
r"""
|
||||
return a callable function that takes in an "instructions" dataset and returns a formatted dataset, based on the
|
||||
tokenizer apply chat template to the dataset
|
||||
|
||||
.. deprecated:: 0.24.0
|
||||
`instructions_formatting_function` is deprecated and will be removed in version 0.27.
|
||||
Please use `tokenizer.apply_chat_template()` directly instead.
|
||||
"""
|
||||
warnings.warn(
|
||||
"`instructions_formatting_function` is deprecated and will be removed in TRL 0.27. "
|
||||
"Please use `tokenizer.apply_chat_template()` directly instead.",
|
||||
DeprecationWarning,
|
||||
stacklevel=2,
|
||||
)
|
||||
|
||||
def format_dataset(examples):
|
||||
if isinstance(examples["prompt"], list):
|
||||
@ -99,7 +120,18 @@ def get_formatting_func_from_dataset(
|
||||
|
||||
Returns:
|
||||
Callable: Formatting function if the dataset format is supported else None
|
||||
|
||||
.. deprecated:: 0.24.0
|
||||
`get_formatting_func_from_dataset` is deprecated and will be removed in version 0.27.
|
||||
Please use `tokenizer.apply_chat_template()` directly instead.
|
||||
"""
|
||||
warnings.warn(
|
||||
"`get_formatting_func_from_dataset` is deprecated and will be removed in TRL 0.27. "
|
||||
"Please use `tokenizer.apply_chat_template()` directly instead.",
|
||||
DeprecationWarning,
|
||||
stacklevel=2,
|
||||
)
|
||||
|
||||
if isinstance(dataset, Dataset):
|
||||
if "messages" in dataset.features:
|
||||
if dataset.features["messages"] == FORMAT_MAPPING["chatml"]:
|
||||
|
Reference in New Issue
Block a user