remove dummy EncodingFast (#40864)

Signed-off-by: Yuanyuan Chen <cyyever@outlook.com>
2025-10-20 17:13:56 +08:00 · 2025-09-16 20:56:11 +08:00
parent d0af4269ec
commit 96bc19bcdf
1 changed files with 5 additions and 8 deletions
--- a/src/transformers/tokenization_utils_base.py
+++ b/src/transformers/tokenization_utils_base.py
@ -98,9 +98,11 @@ def flatten(arr: list):
    return res


+if is_tokenizers_available() or TYPE_CHECKING:
+    from tokenizers import Encoding as EncodingFast
+
 if is_tokenizers_available():
    from tokenizers import AddedToken
-    from tokenizers import Encoding as EncodingFast
 else:

    @dataclass(frozen=False, eq=True)
@ -129,12 +131,6 @@ else:
        def __str__(self):
            return self.content

-    @dataclass
-    class EncodingFast:
-        """This is dummy class because without the `tokenizers` library we don't have these objects anyway"""
-
-        pass
-

 logger = logging.get_logger(__name__)

@ -238,7 +234,8 @@ class BatchEncoding(UserDict):
    ):
        super().__init__(data)

-        if isinstance(encoding, EncodingFast):
+        # If encoding is not None, the fast tokenization is used
+        if encoding is not None and isinstance(encoding, EncodingFast):
            encoding = [encoding]

        self._encodings = encoding