[V1] Fix profiling.py

Signed-off-by: Alexander Matveev <alexm@neuralmagic.com>
This commit is contained in:
Alexander Matveev
2025-04-10 21:00:25 +00:00
parent 4d022cbc75
commit ccd21e1993
2 changed files with 13 additions and 3 deletions

View File

@ -12,7 +12,7 @@ from typing import Any, Optional, TypeAlias
import torch
import tqdm
from vllm import LLM, SamplingParams
from vllm import LLM, SamplingParams, envs
from vllm.engine.arg_utils import EngineArgs
from vllm.profiler import layerwise_profile
from vllm.utils import FlexibleArgumentParser
@ -261,8 +261,13 @@ def run_profile(context: ProfileContext, csv_output: Optional[str],
decode_profs = []
for _ in tqdm.tqdm(range(num_steps_to_profile - 1)):
if envs.VLLM_USE_V1:
num_running_seqs = llm.llm_engine.scheduler[
0].get_num_unfinished_requests()
else:
num_running_seqs = llm.llm_engine.scheduler[
0].get_num_unfinished_seq_groups()
with layerwise_profile(
num_running_seqs=num_running_seqs) as decode_prof:
llm.llm_engine.step()

View File

@ -69,6 +69,8 @@ class LLMEngine:
self.dp_group = None
self.should_execute_dummy_batch = False
self.scheduler_config = vllm_config.scheduler_config
# Tokenizer (+ ensure liveness if running in another process).
self.tokenizer = init_tokenizer_from_configs(
model_config=vllm_config.model_config,
@ -98,6 +100,9 @@ class LLMEngine:
if not multiprocess_mode:
# for v0 compatibility
self.model_executor = self.engine_core.engine_core.model_executor # type: ignore
self.scheduler = [
self.engine_core.engine_core.scheduler # type: ignore
] # type: ignore
@classmethod
def from_vllm_config(