mirror of
https://github.com/vllm-project/vllm.git
synced 2025-10-20 23:03:52 +08:00
Compare commits
1 Commits
v0.11.0rc1
...
v1_fix_pro
Author | SHA1 | Date | |
---|---|---|---|
ccd21e1993 |
@ -12,7 +12,7 @@ from typing import Any, Optional, TypeAlias
|
||||
import torch
|
||||
import tqdm
|
||||
|
||||
from vllm import LLM, SamplingParams
|
||||
from vllm import LLM, SamplingParams, envs
|
||||
from vllm.engine.arg_utils import EngineArgs
|
||||
from vllm.profiler import layerwise_profile
|
||||
from vllm.utils import FlexibleArgumentParser
|
||||
@ -261,8 +261,13 @@ def run_profile(context: ProfileContext, csv_output: Optional[str],
|
||||
|
||||
decode_profs = []
|
||||
for _ in tqdm.tqdm(range(num_steps_to_profile - 1)):
|
||||
num_running_seqs = llm.llm_engine.scheduler[
|
||||
0].get_num_unfinished_seq_groups()
|
||||
if envs.VLLM_USE_V1:
|
||||
num_running_seqs = llm.llm_engine.scheduler[
|
||||
0].get_num_unfinished_requests()
|
||||
else:
|
||||
num_running_seqs = llm.llm_engine.scheduler[
|
||||
0].get_num_unfinished_seq_groups()
|
||||
|
||||
with layerwise_profile(
|
||||
num_running_seqs=num_running_seqs) as decode_prof:
|
||||
llm.llm_engine.step()
|
||||
|
@ -69,6 +69,8 @@ class LLMEngine:
|
||||
self.dp_group = None
|
||||
self.should_execute_dummy_batch = False
|
||||
|
||||
self.scheduler_config = vllm_config.scheduler_config
|
||||
|
||||
# Tokenizer (+ ensure liveness if running in another process).
|
||||
self.tokenizer = init_tokenizer_from_configs(
|
||||
model_config=vllm_config.model_config,
|
||||
@ -98,6 +100,9 @@ class LLMEngine:
|
||||
if not multiprocess_mode:
|
||||
# for v0 compatibility
|
||||
self.model_executor = self.engine_core.engine_core.model_executor # type: ignore
|
||||
self.scheduler = [
|
||||
self.engine_core.engine_core.scheduler # type: ignore
|
||||
] # type: ignore
|
||||
|
||||
@classmethod
|
||||
def from_vllm_config(
|
||||
|
Reference in New Issue
Block a user