mirror of
https://github.com/vllm-project/vllm.git
synced 2025-10-20 14:53:52 +08:00
[Deprecation] Remove everything scheduled for removal in v0.10.0 (#20979)
Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
This commit is contained in:
@ -103,9 +103,7 @@ When tool_choice='required' is set, the model is guaranteed to generate one or m
|
||||
|
||||
vLLM supports the `tool_choice='none'` option in the chat completion API. When this option is set, the model will not generate any tool calls and will respond with regular text content only, even if tools are defined in the request.
|
||||
|
||||
By default, when `tool_choice='none'` is specified, vLLM excludes tool definitions from the prompt to optimize context usage. To include tool definitions even with `tool_choice='none'`, use the `--expand-tools-even-if-tool-choice-none` option.
|
||||
|
||||
Note: This behavior will change in v0.10.0, where tool definitions will be included by default even with `tool_choice='none'`.
|
||||
However, when `tool_choice='none'` is specified, vLLM includes tool definitions from the prompt.
|
||||
|
||||
## Automatic Function Calling
|
||||
|
||||
|
@ -26,7 +26,7 @@ from pydantic import (ConfigDict, SkipValidation, TypeAdapter, field_validator,
|
||||
from pydantic.dataclasses import dataclass
|
||||
from safetensors.torch import _TYPES as _SAFETENSORS_TO_TORCH_DTYPE
|
||||
from torch.distributed import ProcessGroup, ReduceOp
|
||||
from typing_extensions import Self, deprecated, runtime_checkable
|
||||
from typing_extensions import Self, runtime_checkable
|
||||
|
||||
import vllm.envs as envs
|
||||
from vllm import version
|
||||
@ -3659,18 +3659,6 @@ GuidedDecodingBackend = Literal[GuidedDecodingBackendV0,
|
||||
class DecodingConfig:
|
||||
"""Dataclass which contains the decoding strategy of the engine."""
|
||||
|
||||
@property
|
||||
@deprecated(
|
||||
"`guided_decoding_backend` is deprecated and has been renamed to "
|
||||
"`backend`. This will be removed in v0.10.0. Please use the "
|
||||
"`backend` argument instead.")
|
||||
def guided_decoding_backend(self) -> GuidedDecodingBackend:
|
||||
return self.backend
|
||||
|
||||
@guided_decoding_backend.setter
|
||||
def guided_decoding_backend(self, value: GuidedDecodingBackend):
|
||||
self.backend = value
|
||||
|
||||
backend: GuidedDecodingBackend = "auto" if envs.VLLM_USE_V1 else "xgrammar"
|
||||
"""Which engine will be used for guided decoding (JSON schema / regex etc)
|
||||
by default. With "auto", we will make opinionated choices based on request
|
||||
@ -3713,9 +3701,6 @@ class DecodingConfig:
|
||||
return hash_str
|
||||
|
||||
def __post_init__(self):
|
||||
if ":" in self.backend:
|
||||
self._extract_backend_options()
|
||||
|
||||
if envs.VLLM_USE_V1:
|
||||
valid_guided_backends = get_args(GuidedDecodingBackendV1)
|
||||
else:
|
||||
@ -3731,24 +3716,6 @@ class DecodingConfig:
|
||||
raise ValueError("disable_additional_properties is only supported "
|
||||
"for the guidance backend.")
|
||||
|
||||
@deprecated(
|
||||
"Passing guided decoding backend options inside backend in the format "
|
||||
"'backend:...' is deprecated. This will be removed in v0.10.0. Please "
|
||||
"use the dedicated arguments '--disable-fallback', "
|
||||
"'--disable-any-whitespace' and '--disable-additional-properties' "
|
||||
"instead.")
|
||||
def _extract_backend_options(self):
|
||||
"""Extract backend options from the backend string."""
|
||||
backend, options = self.backend.split(":")
|
||||
self.backend = cast(GuidedDecodingBackend, backend)
|
||||
options_set = set(options.strip().split(","))
|
||||
if "no-fallback" in options_set:
|
||||
self.disable_fallback = True
|
||||
if "disable-any-whitespace" in options_set:
|
||||
self.disable_any_whitespace = True
|
||||
if "no-additional-properties" in options_set:
|
||||
self.disable_additional_properties = True
|
||||
|
||||
|
||||
DetailedTraceModules = Literal["model", "worker", "all"]
|
||||
|
||||
|
@ -9,7 +9,6 @@ import functools
|
||||
import json
|
||||
import sys
|
||||
import threading
|
||||
import warnings
|
||||
from dataclasses import MISSING, dataclass, fields, is_dataclass
|
||||
from itertools import permutations
|
||||
from typing import (TYPE_CHECKING, Annotated, Any, Callable, Dict, List,
|
||||
@ -434,7 +433,6 @@ class EngineArgs:
|
||||
|
||||
speculative_config: Optional[Dict[str, Any]] = None
|
||||
|
||||
qlora_adapter_name_or_path: Optional[str] = None
|
||||
show_hidden_metrics_for_version: Optional[str] = \
|
||||
ObservabilityConfig.show_hidden_metrics_for_version
|
||||
otlp_traces_endpoint: Optional[str] = \
|
||||
@ -468,7 +466,6 @@ class EngineArgs:
|
||||
|
||||
additional_config: dict[str, Any] = \
|
||||
get_field(VllmConfig, "additional_config")
|
||||
enable_reasoning: Optional[bool] = None # DEPRECATED
|
||||
reasoning_parser: str = DecodingConfig.reasoning_backend
|
||||
|
||||
use_tqdm_on_load: bool = LoadConfig.use_tqdm_on_load
|
||||
@ -486,13 +483,6 @@ class EngineArgs:
|
||||
if isinstance(self.compilation_config, (int, dict)):
|
||||
self.compilation_config = CompilationConfig.from_cli(
|
||||
str(self.compilation_config))
|
||||
if self.qlora_adapter_name_or_path is not None:
|
||||
warnings.warn(
|
||||
"The `qlora_adapter_name_or_path` is deprecated "
|
||||
"and will be removed in v0.10.0. ",
|
||||
DeprecationWarning,
|
||||
stacklevel=2,
|
||||
)
|
||||
# Setup plugins
|
||||
from vllm.plugins import load_general_plugins
|
||||
load_general_plugins()
|
||||
@ -605,14 +595,6 @@ class EngineArgs:
|
||||
**load_kwargs["ignore_patterns"])
|
||||
load_group.add_argument("--use-tqdm-on-load",
|
||||
**load_kwargs["use_tqdm_on_load"])
|
||||
load_group.add_argument(
|
||||
"--qlora-adapter-name-or-path",
|
||||
type=str,
|
||||
default=None,
|
||||
help="The `--qlora-adapter-name-or-path` has no effect, do not set"
|
||||
" it, and it will be removed in v0.10.0.",
|
||||
deprecated=True,
|
||||
)
|
||||
load_group.add_argument('--pt-load-map-location',
|
||||
**load_kwargs["pt_load_map_location"])
|
||||
|
||||
@ -633,15 +615,6 @@ class EngineArgs:
|
||||
guided_decoding_group.add_argument(
|
||||
"--guided-decoding-disable-additional-properties",
|
||||
**guided_decoding_kwargs["disable_additional_properties"])
|
||||
guided_decoding_group.add_argument(
|
||||
"--enable-reasoning",
|
||||
action=argparse.BooleanOptionalAction,
|
||||
deprecated=True,
|
||||
help="[DEPRECATED] The `--enable-reasoning` flag is deprecated as "
|
||||
"of v0.9.0. Use `--reasoning-parser` to specify the reasoning "
|
||||
"parser backend instead. This flag (`--enable-reasoning`) will be "
|
||||
"removed in v0.10.0. When `--reasoning-parser` is specified, "
|
||||
"reasoning mode is automatically enabled.")
|
||||
guided_decoding_group.add_argument(
|
||||
"--reasoning-parser",
|
||||
# This choices is a special case because it's not static
|
||||
|
@ -1514,8 +1514,6 @@ async def init_app_state(
|
||||
chat_template_content_format=args.chat_template_content_format,
|
||||
return_tokens_as_token_ids=args.return_tokens_as_token_ids,
|
||||
enable_auto_tools=args.enable_auto_tool_choice,
|
||||
expand_tools_even_if_tool_choice_none=args.
|
||||
expand_tools_even_if_tool_choice_none,
|
||||
tool_parser=args.tool_call_parser,
|
||||
reasoning_parser=args.reasoning_parser,
|
||||
enable_prompt_tokens_details=args.enable_prompt_tokens_details,
|
||||
@ -1531,8 +1529,6 @@ async def init_app_state(
|
||||
chat_template_content_format=args.chat_template_content_format,
|
||||
return_tokens_as_token_ids=args.return_tokens_as_token_ids,
|
||||
enable_auto_tools=args.enable_auto_tool_choice,
|
||||
expand_tools_even_if_tool_choice_none=args.
|
||||
expand_tools_even_if_tool_choice_none,
|
||||
tool_parser=args.tool_call_parser,
|
||||
reasoning_parser=args.reasoning_parser,
|
||||
enable_prompt_tokens_details=args.enable_prompt_tokens_details,
|
||||
|
@ -182,13 +182,6 @@ schema. Example: `[{"type": "text", "text": "Hello world!"}]`"""
|
||||
"""If set to True, enable tracking server_load_metrics in the app state."""
|
||||
enable_force_include_usage: bool = False
|
||||
"""If set to True, including usage on every request."""
|
||||
expand_tools_even_if_tool_choice_none: bool = False
|
||||
"""Include tool definitions in prompts even when `tool_choice='none'`.
|
||||
|
||||
This is a transitional option that will be removed in v0.10.0. In
|
||||
v0.10.0, tool definitions will always be included regardless of
|
||||
`tool_choice` setting. Use this flag to test the upcoming behavior
|
||||
before the breaking change."""
|
||||
|
||||
@staticmethod
|
||||
def add_cli_args(parser: FlexibleArgumentParser) -> FlexibleArgumentParser:
|
||||
@ -225,11 +218,6 @@ schema. Example: `[{"type": "text", "text": "Hello world!"}]`"""
|
||||
valid_tool_parsers = list(ToolParserManager.tool_parsers.keys())
|
||||
frontend_kwargs["tool_call_parser"]["choices"] = valid_tool_parsers
|
||||
|
||||
# Special case for expand-tools-even-if-tool-choice-none because of
|
||||
# the deprecation field
|
||||
frontend_kwargs["expand_tools_even_if_tool_choice_none"]\
|
||||
["deprecated"] = True
|
||||
|
||||
frontend_group = parser.add_argument_group(
|
||||
title="Frontend",
|
||||
description=FrontendArgs.__doc__,
|
||||
|
@ -63,7 +63,6 @@ class OpenAIServingChat(OpenAIServing):
|
||||
return_tokens_as_token_ids: bool = False,
|
||||
reasoning_parser: str = "",
|
||||
enable_auto_tools: bool = False,
|
||||
expand_tools_even_if_tool_choice_none: bool = False,
|
||||
tool_parser: Optional[str] = None,
|
||||
enable_prompt_tokens_details: bool = False,
|
||||
enable_force_include_usage: bool = False,
|
||||
@ -112,8 +111,6 @@ class OpenAIServingChat(OpenAIServing):
|
||||
raise TypeError("Error: --enable-auto-tool-choice requires "
|
||||
f"tool_parser:'{tool_parser}' which has not "
|
||||
"been registered") from e
|
||||
self.expand_tools_even_if_tool_choice_none = (
|
||||
expand_tools_even_if_tool_choice_none)
|
||||
|
||||
self.enable_prompt_tokens_details = enable_prompt_tokens_details
|
||||
self.enable_force_include_usage = enable_force_include_usage
|
||||
@ -182,20 +179,6 @@ class OpenAIServingChat(OpenAIServing):
|
||||
|
||||
if request.tools is None:
|
||||
tool_dicts = None
|
||||
elif (request.tool_choice == "none"
|
||||
and not self.expand_tools_even_if_tool_choice_none):
|
||||
if len(request.tools) > 0:
|
||||
logger.warning_once(
|
||||
"Tools are specified but tool_choice is set to 'none' "
|
||||
"and --expand-tools-even-if-tool-choice-none is not "
|
||||
"enabled. Tool definitions will be excluded from the "
|
||||
"prompt. This behavior will change in vLLM v0.10 where "
|
||||
"tool definitions will be included by default even "
|
||||
"with tool_choice='none'. To adopt the new behavior "
|
||||
"now, use --expand-tools-even-if-tool-choice-none. "
|
||||
"To suppress this warning, either remove tools from "
|
||||
"the request or set tool_choice to a different value.")
|
||||
tool_dicts = None
|
||||
else:
|
||||
tool_dicts = [tool.model_dump() for tool in request.tools]
|
||||
|
||||
|
@ -51,7 +51,6 @@ class OpenAIServingResponses(OpenAIServing):
|
||||
return_tokens_as_token_ids: bool = False,
|
||||
reasoning_parser: str = "",
|
||||
enable_auto_tools: bool = False,
|
||||
expand_tools_even_if_tool_choice_none: bool = False,
|
||||
tool_parser: Optional[str] = None,
|
||||
enable_prompt_tokens_details: bool = False,
|
||||
enable_force_include_usage: bool = False,
|
||||
|
@ -9,7 +9,6 @@ from typing import Annotated, Any, Optional, Union
|
||||
|
||||
import msgspec
|
||||
from pydantic import BaseModel
|
||||
from typing_extensions import deprecated
|
||||
|
||||
from vllm.logger import init_logger
|
||||
from vllm.logits_process import LogitsProcessor
|
||||
@ -84,27 +83,6 @@ class GuidedDecodingParams:
|
||||
"You can only use one kind of guided decoding but multiple are "
|
||||
f"specified: {self.__dict__}")
|
||||
|
||||
if self.backend is not None and ":" in self.backend:
|
||||
self._extract_backend_options()
|
||||
|
||||
@deprecated(
|
||||
"Passing guided decoding backend options inside backend in the format "
|
||||
"'backend:...' is deprecated. This will be removed in v0.10.0. Please "
|
||||
"use the dedicated arguments '--disable-fallback', "
|
||||
"'--disable-any-whitespace' and '--disable-additional-properties' "
|
||||
"instead.")
|
||||
def _extract_backend_options(self):
|
||||
"""Extract backend options from the backend string."""
|
||||
assert isinstance(self.backend, str)
|
||||
self.backend, options = self.backend.split(":")
|
||||
options_set = set(options.strip().split(","))
|
||||
if "no-fallback" in options_set:
|
||||
self.disable_fallback = True
|
||||
if "disable-any-whitespace" in options_set:
|
||||
self.disable_any_whitespace = True
|
||||
if "no-additional-properties" in options_set:
|
||||
self.disable_additional_properties = True
|
||||
|
||||
|
||||
class RequestOutputKind(Enum):
|
||||
# Return entire output so far in every RequestOutput
|
||||
|
Reference in New Issue
Block a user