mirror of
https://github.com/vllm-project/vllm.git
synced 2025-10-20 23:03:52 +08:00
Compare commits
1 Commits
v0.9.1
...
v1_fix_pro
Author | SHA1 | Date | |
---|---|---|---|
ccd21e1993 |
@ -12,7 +12,7 @@ from typing import Any, Optional, TypeAlias
|
|||||||
import torch
|
import torch
|
||||||
import tqdm
|
import tqdm
|
||||||
|
|
||||||
from vllm import LLM, SamplingParams
|
from vllm import LLM, SamplingParams, envs
|
||||||
from vllm.engine.arg_utils import EngineArgs
|
from vllm.engine.arg_utils import EngineArgs
|
||||||
from vllm.profiler import layerwise_profile
|
from vllm.profiler import layerwise_profile
|
||||||
from vllm.utils import FlexibleArgumentParser
|
from vllm.utils import FlexibleArgumentParser
|
||||||
@ -261,8 +261,13 @@ def run_profile(context: ProfileContext, csv_output: Optional[str],
|
|||||||
|
|
||||||
decode_profs = []
|
decode_profs = []
|
||||||
for _ in tqdm.tqdm(range(num_steps_to_profile - 1)):
|
for _ in tqdm.tqdm(range(num_steps_to_profile - 1)):
|
||||||
num_running_seqs = llm.llm_engine.scheduler[
|
if envs.VLLM_USE_V1:
|
||||||
0].get_num_unfinished_seq_groups()
|
num_running_seqs = llm.llm_engine.scheduler[
|
||||||
|
0].get_num_unfinished_requests()
|
||||||
|
else:
|
||||||
|
num_running_seqs = llm.llm_engine.scheduler[
|
||||||
|
0].get_num_unfinished_seq_groups()
|
||||||
|
|
||||||
with layerwise_profile(
|
with layerwise_profile(
|
||||||
num_running_seqs=num_running_seqs) as decode_prof:
|
num_running_seqs=num_running_seqs) as decode_prof:
|
||||||
llm.llm_engine.step()
|
llm.llm_engine.step()
|
||||||
|
@ -69,6 +69,8 @@ class LLMEngine:
|
|||||||
self.dp_group = None
|
self.dp_group = None
|
||||||
self.should_execute_dummy_batch = False
|
self.should_execute_dummy_batch = False
|
||||||
|
|
||||||
|
self.scheduler_config = vllm_config.scheduler_config
|
||||||
|
|
||||||
# Tokenizer (+ ensure liveness if running in another process).
|
# Tokenizer (+ ensure liveness if running in another process).
|
||||||
self.tokenizer = init_tokenizer_from_configs(
|
self.tokenizer = init_tokenizer_from_configs(
|
||||||
model_config=vllm_config.model_config,
|
model_config=vllm_config.model_config,
|
||||||
@ -98,6 +100,9 @@ class LLMEngine:
|
|||||||
if not multiprocess_mode:
|
if not multiprocess_mode:
|
||||||
# for v0 compatibility
|
# for v0 compatibility
|
||||||
self.model_executor = self.engine_core.engine_core.model_executor # type: ignore
|
self.model_executor = self.engine_core.engine_core.model_executor # type: ignore
|
||||||
|
self.scheduler = [
|
||||||
|
self.engine_core.engine_core.scheduler # type: ignore
|
||||||
|
] # type: ignore
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def from_vllm_config(
|
def from_vllm_config(
|
||||||
|
Reference in New Issue
Block a user