mirror of
https://github.com/vllm-project/vllm.git
synced 2025-10-20 14:53:52 +08:00
[Misc] small update (#20462)
Signed-off-by: reidliu41 <reid201711@gmail.com>
This commit is contained in:
@ -57,7 +57,10 @@ Once you have collected your profiles with this script, you can visualize them u
|
||||
Here are most likely the dependencies you need to install:
|
||||
|
||||
```bash
|
||||
pip install tensorflow-cpu tensorboard-plugin-profile etils importlib_resources
|
||||
pip install tensorflow-cpu \
|
||||
tensorboard-plugin-profile \
|
||||
etils \
|
||||
importlib_resources
|
||||
```
|
||||
|
||||
Then you just need to point TensorBoard to the directory where you saved the profiles and visit `http://localhost:6006/` in your browser:
|
||||
|
@ -13,13 +13,15 @@ vllm serve Qwen/Qwen2.5-3B-Instruct
|
||||
To serve a reasoning model, you can use the following command:
|
||||
|
||||
```bash
|
||||
vllm serve deepseek-ai/DeepSeek-R1-Distill-Qwen-7B --reasoning-parser deepseek_r1
|
||||
vllm serve deepseek-ai/DeepSeek-R1-Distill-Qwen-7B \
|
||||
--reasoning-parser deepseek_r1
|
||||
```
|
||||
|
||||
If you want to run this script standalone with `uv`, you can use the following:
|
||||
|
||||
```bash
|
||||
uvx --from git+https://github.com/vllm-project/vllm#subdirectory=examples/online_serving/structured_outputs structured-output
|
||||
uvx --from git+https://github.com/vllm-project/vllm#subdirectory=examples/online_serving/structured_outputs \
|
||||
structured-output
|
||||
```
|
||||
|
||||
See [feature docs](https://docs.vllm.ai/en/latest/features/structured_outputs.html) for more information.
|
||||
@ -44,7 +46,9 @@ uv run structured_outputs.py --stream
|
||||
Run certain constraints, for example `structural_tag` and `regex`, streaming:
|
||||
|
||||
```bash
|
||||
uv run structured_outputs.py --constraint structural_tag regex --stream
|
||||
uv run structured_outputs.py \
|
||||
--constraint structural_tag regex \
|
||||
--stream
|
||||
```
|
||||
|
||||
Run all constraints, with reasoning models and streaming:
|
||||
|
@ -202,7 +202,7 @@ def parse_args():
|
||||
|
||||
|
||||
|
||||
def deserialize():
|
||||
def deserialize(args, tensorizer_config):
|
||||
if args.lora_path:
|
||||
tensorizer_config.lora_dir = tensorizer_config.tensorizer_dir
|
||||
llm = LLM(model=args.model,
|
||||
@ -242,7 +242,7 @@ def deserialize():
|
||||
return llm
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
def main():
|
||||
args = parse_args()
|
||||
|
||||
s3_access_key_id = (getattr(args, 's3_access_key_id', None)
|
||||
@ -260,8 +260,6 @@ if __name__ == '__main__':
|
||||
|
||||
model_ref = args.model
|
||||
|
||||
model_name = model_ref.split("/")[1]
|
||||
|
||||
if args.command == "serialize" or args.command == "deserialize":
|
||||
keyfile = args.keyfile
|
||||
else:
|
||||
@ -309,6 +307,10 @@ if __name__ == '__main__':
|
||||
encryption_keyfile = keyfile,
|
||||
**credentials
|
||||
)
|
||||
deserialize()
|
||||
deserialize(args, tensorizer_config)
|
||||
else:
|
||||
raise ValueError("Either serialize or deserialize must be specified.")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
Reference in New Issue
Block a user