mirror of
https://github.com/vllm-project/vllm.git
synced 2025-10-20 14:53:52 +08:00
Move StructuredOutputsConfig
from config/__init__.py
to config/structured_outputs.py
(#25153)
Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
This commit is contained in:
@ -42,6 +42,7 @@ from vllm.config.parallel import (DistributedExecutorBackend, EPLBConfig,
|
||||
ParallelConfig)
|
||||
from vllm.config.scheduler import SchedulerConfig, SchedulerPolicy
|
||||
from vllm.config.speculative import SpeculativeConfig
|
||||
from vllm.config.structured_outputs import StructuredOutputsConfig
|
||||
from vllm.config.utils import ConfigType, config
|
||||
from vllm.logger import init_logger
|
||||
from vllm.model_executor.layers.quantization import QuantizationMethods
|
||||
@ -2277,66 +2278,6 @@ def get_served_model_name(model: str,
|
||||
return served_model_name
|
||||
|
||||
|
||||
StructuredOutputsBackend = Literal["auto", "xgrammar", "guidance", "outlines",
|
||||
"lm-format-enforcer"]
|
||||
|
||||
|
||||
@config
|
||||
@dataclass
|
||||
class StructuredOutputsConfig:
|
||||
"""Dataclass which contains structured outputs config for the engine."""
|
||||
|
||||
backend: StructuredOutputsBackend = "auto"
|
||||
"""Which engine will be used for structured outputs (e.g. JSON schema,
|
||||
regex, etc) by default. With "auto", we will make opinionated choices
|
||||
based on request contents and what the backend libraries currently support,
|
||||
so the behavior is subject to change in each release."""
|
||||
|
||||
disable_fallback: bool = False
|
||||
"""If `True`, vLLM will not fallback to a different backend on error."""
|
||||
|
||||
disable_any_whitespace: bool = False
|
||||
"""If `True`, the model will not generate any whitespace during structured
|
||||
outputs. This is only supported for xgrammar and guidance backends."""
|
||||
|
||||
disable_additional_properties: bool = False
|
||||
"""If `True`, the `guidance` backend will not use `additionalProperties`
|
||||
in the JSON schema. This is only supported for the `guidance` backend and
|
||||
is used to better align its behaviour with `outlines` and `xgrammar`."""
|
||||
|
||||
reasoning_parser: str = ""
|
||||
"""Select the reasoning parser depending on the model that you're using.
|
||||
This is used to parse the reasoning content into OpenAI API format."""
|
||||
|
||||
def compute_hash(self) -> str:
|
||||
"""
|
||||
WARNING: Whenever a new field is added to this config,
|
||||
ensure that it is included in the factors list if
|
||||
it affects the computation graph.
|
||||
|
||||
Provide a hash that uniquely identifies all the configs
|
||||
that affect the structure of the computation
|
||||
graph from input ids/embeddings to the final hidden states,
|
||||
excluding anything before input ids/embeddings and after
|
||||
the final hidden states.
|
||||
"""
|
||||
# no factors to consider.
|
||||
# this config will not affect the computation graph.
|
||||
factors: list[Any] = []
|
||||
hash_str = hashlib.md5(str(factors).encode(),
|
||||
usedforsecurity=False).hexdigest()
|
||||
return hash_str
|
||||
|
||||
def __post_init__(self):
|
||||
if (self.disable_any_whitespace
|
||||
and self.backend not in ("xgrammar", "guidance")):
|
||||
raise ValueError("disable_any_whitespace is only supported for "
|
||||
"xgrammar and guidance backends.")
|
||||
if (self.disable_additional_properties and self.backend != "guidance"):
|
||||
raise ValueError("disable_additional_properties is only supported "
|
||||
"for the guidance backend.")
|
||||
|
||||
|
||||
DetailedTraceModules = Literal["model", "worker", "all"]
|
||||
|
||||
|
||||
|
64
vllm/config/structured_outputs.py
Normal file
64
vllm/config/structured_outputs.py
Normal file
@ -0,0 +1,64 @@
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
||||
|
||||
import hashlib
|
||||
from typing import Any, Literal
|
||||
|
||||
from pydantic.dataclasses import dataclass
|
||||
|
||||
from vllm.config.utils import config
|
||||
|
||||
StructuredOutputsBackend = Literal["auto", "xgrammar", "guidance", "outlines",
|
||||
"lm-format-enforcer"]
|
||||
|
||||
|
||||
@config
|
||||
@dataclass
|
||||
class StructuredOutputsConfig:
|
||||
"""Dataclass which contains structured outputs config for the engine."""
|
||||
|
||||
backend: StructuredOutputsBackend = "auto"
|
||||
"""Which engine will be used for structured outputs (e.g. JSON schema,
|
||||
regex, etc) by default. With "auto", we will make opinionated choices
|
||||
based on request contents and what the backend libraries currently support,
|
||||
so the behavior is subject to change in each release."""
|
||||
disable_fallback: bool = False
|
||||
"""If `True`, vLLM will not fallback to a different backend on error."""
|
||||
disable_any_whitespace: bool = False
|
||||
"""If `True`, the model will not generate any whitespace during structured
|
||||
outputs. This is only supported for xgrammar and guidance backends."""
|
||||
disable_additional_properties: bool = False
|
||||
"""If `True`, the `guidance` backend will not use `additionalProperties`
|
||||
in the JSON schema. This is only supported for the `guidance` backend and
|
||||
is used to better align its behaviour with `outlines` and `xgrammar`."""
|
||||
reasoning_parser: str = ""
|
||||
"""Select the reasoning parser depending on the model that you're using.
|
||||
This is used to parse the reasoning content into OpenAI API format."""
|
||||
|
||||
def compute_hash(self) -> str:
|
||||
"""
|
||||
WARNING: Whenever a new field is added to this config,
|
||||
ensure that it is included in the factors list if
|
||||
it affects the computation graph.
|
||||
|
||||
Provide a hash that uniquely identifies all the configs
|
||||
that affect the structure of the computation
|
||||
graph from input ids/embeddings to the final hidden states,
|
||||
excluding anything before input ids/embeddings and after
|
||||
the final hidden states.
|
||||
"""
|
||||
# no factors to consider.
|
||||
# this config will not affect the computation graph.
|
||||
factors: list[Any] = []
|
||||
hash_str = hashlib.md5(str(factors).encode(),
|
||||
usedforsecurity=False).hexdigest()
|
||||
return hash_str
|
||||
|
||||
def __post_init__(self):
|
||||
if (self.disable_any_whitespace
|
||||
and self.backend not in ("xgrammar", "guidance")):
|
||||
raise ValueError("disable_any_whitespace is only supported for "
|
||||
"xgrammar and guidance backends.")
|
||||
if (self.disable_additional_properties and self.backend != "guidance"):
|
||||
raise ValueError("disable_additional_properties is only supported "
|
||||
"for the guidance backend.")
|
Reference in New Issue
Block a user