Compare commits

...

1 Commits

Author SHA1 Message Date
ccd21e1993 [V1] Fix profiling.py
Signed-off-by: Alexander Matveev <alexm@neuralmagic.com>
2025-04-11 18:36:37 +00:00
2 changed files with 13 additions and 3 deletions

View File

@ -12,7 +12,7 @@ from typing import Any, Optional, TypeAlias
import torch import torch
import tqdm import tqdm
from vllm import LLM, SamplingParams from vllm import LLM, SamplingParams, envs
from vllm.engine.arg_utils import EngineArgs from vllm.engine.arg_utils import EngineArgs
from vllm.profiler import layerwise_profile from vllm.profiler import layerwise_profile
from vllm.utils import FlexibleArgumentParser from vllm.utils import FlexibleArgumentParser
@ -261,8 +261,13 @@ def run_profile(context: ProfileContext, csv_output: Optional[str],
decode_profs = [] decode_profs = []
for _ in tqdm.tqdm(range(num_steps_to_profile - 1)): for _ in tqdm.tqdm(range(num_steps_to_profile - 1)):
num_running_seqs = llm.llm_engine.scheduler[ if envs.VLLM_USE_V1:
0].get_num_unfinished_seq_groups() num_running_seqs = llm.llm_engine.scheduler[
0].get_num_unfinished_requests()
else:
num_running_seqs = llm.llm_engine.scheduler[
0].get_num_unfinished_seq_groups()
with layerwise_profile( with layerwise_profile(
num_running_seqs=num_running_seqs) as decode_prof: num_running_seqs=num_running_seqs) as decode_prof:
llm.llm_engine.step() llm.llm_engine.step()

View File

@ -69,6 +69,8 @@ class LLMEngine:
self.dp_group = None self.dp_group = None
self.should_execute_dummy_batch = False self.should_execute_dummy_batch = False
self.scheduler_config = vllm_config.scheduler_config
# Tokenizer (+ ensure liveness if running in another process). # Tokenizer (+ ensure liveness if running in another process).
self.tokenizer = init_tokenizer_from_configs( self.tokenizer = init_tokenizer_from_configs(
model_config=vllm_config.model_config, model_config=vllm_config.model_config,
@ -98,6 +100,9 @@ class LLMEngine:
if not multiprocess_mode: if not multiprocess_mode:
# for v0 compatibility # for v0 compatibility
self.model_executor = self.engine_core.engine_core.model_executor # type: ignore self.model_executor = self.engine_core.engine_core.model_executor # type: ignore
self.scheduler = [
self.engine_core.engine_core.scheduler # type: ignore
] # type: ignore
@classmethod @classmethod
def from_vllm_config( def from_vllm_config(