[Doc] Add inplace weights loading example (#19640)

Signed-off-by: 22quinn <33176974+22quinn@users.noreply.github.com>
2025-10-20 14:53:52 +08:00 · 2025-07-17 21:12:23 -07:00
parent 89cab4d01f
commit b38baabcf9
1 changed files with 53 additions and 0 deletions
--- a/examples/offline_inference/skip_loading_weights_in_engine_init.py
+++ b/examples/offline_inference/skip_loading_weights_in_engine_init.py
@ -0,0 +1,53 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+from vllm import LLM, RequestOutput, SamplingParams
+
+# Sample prompts.
+prompts = [
+    "Hello, my name is",
+    "The president of the United States is",
+    "The capital of France is",
+    "The future of AI is",
+]
+# Create a sampling params object.
+sampling_params = SamplingParams(temperature=0.8, top_p=0.95)
+
+
+def print_prompts_and_outputs(outputs: list[RequestOutput]) -> None:
+    print("-" * 60)
+    for output in outputs:
+        prompt = output.prompt
+        generated_text = output.outputs[0].text
+        print(f"Prompt:    {prompt!r}")
+        print(f"Output:    {generated_text!r}")
+        print("-" * 60)
+
+
+def main():
+    # Create an LLM without loading real weights
+    llm = LLM(
+        model="Qwen/Qwen3-0.6B",
+        load_format="dummy",
+        enforce_eager=True,
+        tensor_parallel_size=4,
+    )
+    outputs = llm.generate(prompts, sampling_params)
+    print("\nOutputs do not make sense:")
+    print_prompts_and_outputs(outputs)
+
+    # Update load format from `dummy` to `auto`
+    llm.collective_rpc(
+        "update_config", args=({"load_config": {"load_format": "auto"}},)
+    )
+    # Now reload real weights inplace
+    llm.collective_rpc("reload_weights")
+
+    # Check outputs make sense
+    outputs = llm.generate(prompts, sampling_params)
+    print("\nOutputs make sense after loading real weights:")
+    print_prompts_and_outputs(outputs)
+
+
+if __name__ == "__main__":
+    main()