mirror of
https://github.com/vllm-project/vllm.git
synced 2025-10-20 23:03:52 +08:00
80 lines
2.5 KiB
Python
80 lines
2.5 KiB
Python
# SPDX-License-Identifier: Apache-2.0
|
|
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
|
|
|
from collections.abc import Sequence
|
|
from typing import Optional, Union
|
|
|
|
import torch
|
|
|
|
from vllm.logger import init_logger
|
|
from vllm.lora.request import LoRARequest
|
|
from vllm.pooling_params import PoolingParams
|
|
from vllm.sampling_params import BeamSearchParams, SamplingParams
|
|
|
|
logger = init_logger(__name__)
|
|
|
|
|
|
class RequestLogger:
|
|
|
|
def __init__(self, *, max_log_len: Optional[int]) -> None:
|
|
self.max_log_len = max_log_len
|
|
|
|
def log_inputs(
|
|
self,
|
|
request_id: str,
|
|
prompt: Optional[str],
|
|
prompt_token_ids: Optional[list[int]],
|
|
prompt_embeds: Optional[torch.Tensor],
|
|
params: Optional[Union[SamplingParams, PoolingParams,
|
|
BeamSearchParams]],
|
|
lora_request: Optional[LoRARequest],
|
|
) -> None:
|
|
max_log_len = self.max_log_len
|
|
if max_log_len is not None:
|
|
if prompt is not None:
|
|
prompt = prompt[:max_log_len]
|
|
|
|
if prompt_token_ids is not None:
|
|
prompt_token_ids = prompt_token_ids[:max_log_len]
|
|
|
|
logger.info(
|
|
"Received request %s: prompt: %r, "
|
|
"params: %s, prompt_token_ids: %s, "
|
|
"prompt_embeds shape: %s, "
|
|
"lora_request: %s.", request_id, prompt, params, prompt_token_ids,
|
|
prompt_embeds.shape if prompt_embeds is not None else None,
|
|
lora_request)
|
|
|
|
def log_outputs(
|
|
self,
|
|
request_id: str,
|
|
outputs: str,
|
|
output_token_ids: Optional[Sequence[int]],
|
|
finish_reason: Optional[str] = None,
|
|
is_streaming: bool = False,
|
|
delta: bool = False,
|
|
) -> None:
|
|
max_log_len = self.max_log_len
|
|
if max_log_len is not None:
|
|
if outputs is not None:
|
|
outputs = outputs[:max_log_len]
|
|
|
|
if output_token_ids is not None:
|
|
# Convert to list and apply truncation
|
|
output_token_ids = list(output_token_ids)[:max_log_len]
|
|
|
|
stream_info = ""
|
|
if is_streaming:
|
|
stream_info = (" (streaming delta)"
|
|
if delta else " (streaming complete)")
|
|
|
|
logger.info(
|
|
"Generated response %s%s: output: %r, "
|
|
"output_token_ids: %s, finish_reason: %s",
|
|
request_id,
|
|
stream_info,
|
|
outputs,
|
|
output_token_ids,
|
|
finish_reason,
|
|
)
|