mirror of
https://github.com/vllm-project/vllm.git
synced 2025-10-20 14:53:52 +08:00
[Bugfix] Replace custom Encoding class with BatchEncoding in MistralTokenizer (#22786)
Signed-off-by: zjy0516 <riverclouds.zhu@qq.com>
This commit is contained in:
@ -2,13 +2,13 @@
|
||||
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
||||
|
||||
import os
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
from typing import TYPE_CHECKING, Any, Optional, Union, cast
|
||||
|
||||
import huggingface_hub
|
||||
import regex as re
|
||||
from huggingface_hub import HfApi, hf_hub_download
|
||||
from transformers.tokenization_utils_base import BatchEncoding
|
||||
|
||||
from vllm.logger import init_logger
|
||||
from vllm.transformers_utils.tokenizer_base import TokenizerBase
|
||||
@ -27,11 +27,6 @@ if TYPE_CHECKING:
|
||||
logger = init_logger(__name__)
|
||||
|
||||
|
||||
@dataclass
|
||||
class Encoding:
|
||||
input_ids: Union[list[int], list[list[int]]]
|
||||
|
||||
|
||||
def maybe_serialize_tool_calls(request: "ChatCompletionRequest"):
|
||||
# SEE: https://github.com/vllm-project/vllm/pull/9951
|
||||
# Credits go to: @gcalmettes
|
||||
@ -359,7 +354,7 @@ class MistralTokenizer(TokenizerBase):
|
||||
# For str, single prompt text
|
||||
else:
|
||||
input_ids = self.encode_one(text, truncation, max_length)
|
||||
return Encoding(input_ids=input_ids)
|
||||
return BatchEncoding({"input_ids": input_ids})
|
||||
|
||||
def get_vocab(self) -> dict[str, int]:
|
||||
# NB: the dictionary form of the vocabulary collapses token ids that map
|
||||
|
Reference in New Issue
Block a user