diff --git a/examples/disaggregated_prefill/disaggregated_prefill_offline.py b/examples/disaggregated_prefill/disaggregated_prefill_offline.py
index d7dd4b88b..ea131034b 100644
--- a/examples/disaggregated_prefill/disaggregated_prefill_offline.py
+++ b/examples/disaggregated_prefill/disaggregated_prefill_offline.py
@@ -12,6 +12,9 @@ import os
 import time
 from multiprocessing import Event, Process
 
+os.environ["VLLM_USE_MODELSCOPE"] = "True"
+os.environ["VLLM_WORKER_MULTIPROC_METHOD"] = "spawn"
+
 kv_connector_extra_config = {
     "prefill_device_ips": ["1.2.3.1", "1.2.3.2"],
     "decode_device_ips": ["1.2.3.9", "1.2.3.10"],
diff --git a/examples/disaggregated_prefill/dp_proxy.py b/examples/disaggregated_prefill/dp_proxy.py
index 42bf12039..415e98134 100644
--- a/examples/disaggregated_prefill/dp_proxy.py
+++ b/examples/disaggregated_prefill/dp_proxy.py
@@ -13,6 +13,9 @@ import msgpack  # type: ignore
 import zmq
 from quart import Quart, make_response, request
 
+os.environ["VLLM_USE_MODELSCOPE"] = "True"
+os.environ["VLLM_WORKER_MULTIPROC_METHOD"] = "spawn"
+
 DP_PROXY_HTTP_PORT = 10004
 DP_PROXY_ZMQ_REG_PORT = 30006
 DP_PROXY_ZMQ_NOTIFY_PORT = 30005
diff --git a/examples/disaggregated_prefill/p2p_disaggrefated_prefill_proxy.py b/examples/disaggregated_prefill/p2p_disaggrefated_prefill_proxy.py
index 03192561e..5baa355a0 100644
--- a/examples/disaggregated_prefill/p2p_disaggrefated_prefill_proxy.py
+++ b/examples/disaggregated_prefill/p2p_disaggrefated_prefill_proxy.py
@@ -8,6 +8,9 @@ import msgpack  # type: ignore
 import zmq
 from quart import Quart, make_response, request
 
+os.environ["VLLM_USE_MODELSCOPE"] = "True"
+os.environ["VLLM_WORKER_MULTIPROC_METHOD"] = "spawn"
+
 prefill_instances: dict[str, str] = {}  # http_address: zmq_address
 decode_instances: dict[str, str] = {}  # http_address: zmq_address
 
diff --git a/examples/eplb/eplb_strategy.py b/examples/eplb/eplb_strategy.py
index 9470b952b..bcccbf23c 100644
--- a/examples/eplb/eplb_strategy.py
+++ b/examples/eplb/eplb_strategy.py
@@ -8,6 +8,9 @@ import matplotlib.pyplot as plt  # type: ignore
 import numpy as np
 import torch
 
+os.environ["VLLM_USE_MODELSCOPE"] = "True"
+os.environ["VLLM_WORKER_MULTIPROC_METHOD"] = "spawn"
+
 logger = logging.getLogger("msit_logger")
 
 
diff --git a/examples/offline_data_parallel.py b/examples/offline_data_parallel.py
index 754dfbc7c..024ef9805 100644
--- a/examples/offline_data_parallel.py
+++ b/examples/offline_data_parallel.py
@@ -60,6 +60,9 @@ from time import sleep
 from vllm import LLM, SamplingParams
 from vllm.utils import get_open_port
 
+os.environ["VLLM_USE_MODELSCOPE"] = "True"
+os.environ["VLLM_WORKER_MULTIPROC_METHOD"] = "spawn"
+
 
 def parse_args():
     import argparse
diff --git a/examples/offline_disaggregated_prefill_npu.py b/examples/offline_disaggregated_prefill_npu.py
index 9cea63ace..84fa3fe65 100644
--- a/examples/offline_disaggregated_prefill_npu.py
+++ b/examples/offline_disaggregated_prefill_npu.py
@@ -21,6 +21,8 @@ import os
 import time
 from multiprocessing import Event, Process
 
+os.environ["VLLM_USE_MODELSCOPE"] = "True"
+os.environ["VLLM_WORKER_MULTIPROC_METHOD"] = "spawn"
 
 def clean_up():
     import gc
diff --git a/examples/offline_distributed_inference_npu.py b/examples/offline_distributed_inference_npu.py
index 1af962b3b..4e2e7ed37 100644
--- a/examples/offline_distributed_inference_npu.py
+++ b/examples/offline_distributed_inference_npu.py
@@ -17,28 +17,37 @@
 # Adapted from vllm-project/vllm/examples/offline_inference/basic.py
 #
 
+import os
 from vllm import LLM, SamplingParams
 
-prompts = [
-    "Hello, my name is",
-    "The president of the United States is",
-    "The capital of France is",
-    "The future of AI is",
-]
+os.environ["VLLM_USE_MODELSCOPE"] = "True"
+os.environ["VLLM_WORKER_MULTIPROC_METHOD"] = "spawn"
 
-# Create a sampling params object.
-sampling_params = SamplingParams(max_tokens=100, temperature=0.0)
-# Create an LLM.
-llm = LLM(
-    model="Qwen/Qwen2.5-0.5B-Instruct",
-    tensor_parallel_size=2,
-    distributed_executor_backend="mp",
-    trust_remote_code=True,
-)
+def main():
+    prompts = [
+        "Hello, my name is",
+        "The president of the United States is",
+        "The capital of France is",
+        "The future of AI is",
+    ]
 
-# Generate texts from the prompts.
-outputs = llm.generate(prompts, sampling_params)
-for output in outputs:
-    prompt = output.prompt
-    generated_text = output.outputs[0].text
-    print(f"Prompt: {prompt!r}, Generated text: {generated_text!r}")
+    # Create a sampling params object.
+    sampling_params = SamplingParams(max_tokens=100, temperature=0.0)
+    # Create an LLM.
+    llm = LLM(
+        model="Qwen/Qwen2.5-0.5B-Instruct",
+        tensor_parallel_size=2,
+        distributed_executor_backend="mp",
+        trust_remote_code=True,
+    )
+
+    # Generate texts from the prompts.
+    outputs = llm.generate(prompts, sampling_params)
+    for output in outputs:
+        prompt = output.prompt
+        generated_text = output.outputs[0].text
+        print(f"Prompt: {prompt!r}, Generated text: {generated_text!r}")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/examples/offline_dualbatch_overlap_npu.py b/examples/offline_dualbatch_overlap_npu.py
index e721ab2aa..2cc52137c 100644
--- a/examples/offline_dualbatch_overlap_npu.py
+++ b/examples/offline_dualbatch_overlap_npu.py
@@ -3,6 +3,8 @@ import time
 
 from vllm import LLM, SamplingParams
 
+os.environ["VLLM_USE_MODELSCOPE"] = "True"
+os.environ["VLLM_WORKER_MULTIPROC_METHOD"] = "spawn"
 # enable dual-batch overlap for vllm ascend
 os.environ["VLLM_ASCEND_ENABLE_DBO"] = "1"
 
diff --git a/examples/offline_embed.py b/examples/offline_embed.py
index 91fba3870..7707e5fb2 100644
--- a/examples/offline_embed.py
+++ b/examples/offline_embed.py
@@ -19,35 +19,40 @@
 
 import os
 
-os.environ["VLLM_USE_MODELSCOPE"] = "True"
-
 import torch
 from vllm import LLM
 
+os.environ["VLLM_USE_MODELSCOPE"] = "True"
+os.environ["VLLM_WORKER_MULTIPROC_METHOD"] = "spawn"
 
 def get_detailed_instruct(task_description: str, query: str) -> str:
     return f'Instruct: {task_description}\nQuery:{query}'
 
 
-# Each query must come with a one-sentence instruction that describes the task
-task = 'Given a web search query, retrieve relevant passages that answer the query'
+def main():
+    # Each query must come with a one-sentence instruction that describes the task
+    task = 'Given a web search query, retrieve relevant passages that answer the query'
 
-queries = [
-    get_detailed_instruct(task, 'What is the capital of China?'),
-    get_detailed_instruct(task, 'Explain gravity')
-]
-# No need to add instruction for retrieval documents
-documents = [
-    "The capital of China is Beijing.",
-    "Gravity is a force that attracts two bodies towards each other. It gives weight to physical objects and is responsible for the movement of planets around the sun."
-]
-input_texts = queries + documents
+    queries = [
+        get_detailed_instruct(task, 'What is the capital of China?'),
+        get_detailed_instruct(task, 'Explain gravity')
+    ]
+    # No need to add instruction for retrieval documents
+    documents = [
+        "The capital of China is Beijing.",
+        "Gravity is a force that attracts two bodies towards each other. It gives weight to physical objects and is responsible for the movement of planets around the sun."
+    ]
+    input_texts = queries + documents
 
-model = LLM(model="Qwen/Qwen3-Embedding-0.6B", task="embed")
+    model = LLM(model="Qwen/Qwen3-Embedding-0.6B", task="embed")
 
-outputs = model.embed(input_texts)
-embeddings = torch.tensor([o.outputs.embedding for o in outputs])
-# Calculate the similarity scores between the first two queries and the last two documents
-scores = (embeddings[:2] @ embeddings[2:].T)
-print(scores.tolist())
-# [[0.7620252966880798, 0.14078938961029053], [0.1358368694782257, 0.6013815999031067]]
+    outputs = model.embed(input_texts)
+    embeddings = torch.tensor([o.outputs.embedding for o in outputs])
+    # Calculate the similarity scores between the first two queries and the last two documents
+    scores = (embeddings[:2] @ embeddings[2:].T)
+    print(scores.tolist())
+    # [[0.7620252966880798, 0.14078938961029053], [0.1358368694782257, 0.6013815999031067]]
+
+
+if __name__ == "__main__":
+    main()
diff --git a/examples/offline_inference_audio_language.py b/examples/offline_inference_audio_language.py
index 7392283a1..03bb1cb52 100644
--- a/examples/offline_inference_audio_language.py
+++ b/examples/offline_inference_audio_language.py
@@ -24,9 +24,14 @@ For most models, the prompt format should follow corresponding examples
 on HuggingFace model repository.
 """
 
+import os
+
 from vllm import LLM, SamplingParams
 from vllm.assets.audio import AudioAsset
 
+os.environ["VLLM_USE_MODELSCOPE"] = "True"
+os.environ["VLLM_WORKER_MULTIPROC_METHOD"] = "spawn"
+
 audio_assets = [AudioAsset("mary_had_lamb"), AudioAsset("winning_call")]
 question_per_audio_count = {
     1: "What is recited in the audio?",
diff --git a/examples/offline_inference_npu.py b/examples/offline_inference_npu.py
index 3e88c0017..4630bd1dd 100644
--- a/examples/offline_inference_npu.py
+++ b/examples/offline_inference_npu.py
@@ -21,24 +21,31 @@
 import os
 
 os.environ["VLLM_USE_MODELSCOPE"] = "True"
+os.environ["VLLM_WORKER_MULTIPROC_METHOD"] = "spawn"
 
 from vllm import LLM, SamplingParams
 
-prompts = [
-    "Hello, my name is",
-    "The president of the United States is",
-    "The capital of France is",
-    "The future of AI is",
-]
 
-# Create a sampling params object.
-sampling_params = SamplingParams(max_tokens=100, temperature=0.0)
-# Create an LLM.
-llm = LLM(model="Qwen/Qwen2.5-0.5B-Instruct")
+def main():
+    prompts = [
+        "Hello, my name is",
+        "The president of the United States is",
+        "The capital of France is",
+        "The future of AI is",
+    ]
 
-# Generate texts from the prompts.
-outputs = llm.generate(prompts, sampling_params)
-for output in outputs:
-    prompt = output.prompt
-    generated_text = output.outputs[0].text
-    print(f"Prompt: {prompt!r}, Generated text: {generated_text!r}")
+    # Create a sampling params object.
+    sampling_params = SamplingParams(max_tokens=100, temperature=0.0)
+    # Create an LLM.
+    llm = LLM(model="Qwen/Qwen2.5-0.5B-Instruct")
+
+    # Generate texts from the prompts.
+    outputs = llm.generate(prompts, sampling_params)
+    for output in outputs:
+        prompt = output.prompt
+        generated_text = output.outputs[0].text
+        print(f"Prompt: {prompt!r}, Generated text: {generated_text!r}")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/examples/offline_inference_npu_tp2.py b/examples/offline_inference_npu_tp2.py
index 9f01c3af5..05082b004 100644
--- a/examples/offline_inference_npu_tp2.py
+++ b/examples/offline_inference_npu_tp2.py
@@ -25,7 +25,8 @@ os.environ["VLLM_WORKER_MULTIPROC_METHOD"] = "spawn"
 
 from vllm import LLM, SamplingParams
 
-if __name__ == "__main__":
+
+def main():
     prompts = [
         "Hello, my name is",
         "The president of the United States is",
@@ -48,3 +49,7 @@ if __name__ == "__main__":
         prompt = output.prompt
         generated_text = output.outputs[0].text
         print(f"Prompt: {prompt!r}, Generated text: {generated_text!r}")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/examples/offline_inference_sleep_mode_npu.py b/examples/offline_inference_sleep_mode_npu.py
index 7b7d42268..5ffcff6fb 100644
--- a/examples/offline_inference_sleep_mode_npu.py
+++ b/examples/offline_inference_sleep_mode_npu.py
@@ -25,7 +25,7 @@ from vllm.utils import GiB_bytes
 os.environ["VLLM_USE_MODELSCOPE"] = "True"
 os.environ["VLLM_WORKER_MULTIPROC_METHOD"] = "spawn"
 
-if __name__ == "__main__":
+def main():
     prompt = "How are you?"
 
     free, total = torch.npu.mem_get_info()
@@ -51,3 +51,7 @@ if __name__ == "__main__":
     output2 = llm.generate(prompt, sampling_params)
     # cmp output
     assert output[0].outputs[0].text == output2[0].outputs[0].text
+
+
+if __name__ == "__main__":
+    main()
diff --git a/examples/offline_multi_step_custom_ops.py b/examples/offline_multi_step_custom_ops.py
index 59c7fafcc..8aa6af4bf 100644
--- a/examples/offline_multi_step_custom_ops.py
+++ b/examples/offline_multi_step_custom_ops.py
@@ -17,34 +17,45 @@
 # limitations under the License.
 #
 
+import os
+
 from vllm import LLM, SamplingParams
 
-prompts = [
-    "Hello, my name is",
-    "The president of the United States is",
-    "The capital of France is",
-    "The future of AI is",
-    "China is",
-]
+os.environ["VLLM_USE_MODELSCOPE"] = "True"
+os.environ["VLLM_WORKER_MULTIPROC_METHOD"] = "spawn"
 
-# Create a sampling params object.
-sampling_params = SamplingParams(max_tokens=100, temperature=0.0)
-# Create an LLM.
-llm = LLM(
-    model="Qwen/Qwen2.5-0.5B",
-    block_size=128,
-    max_model_len=1024,  # max length of prompt
-    tensor_parallel_size=1,  # number of NPUs to be used
-    max_num_seqs=26,  # max batch number
-    enforce_eager=
-    True,  # Force PyTorch eager execution to debug intermediate tensors (disables graph optimizations)
-    trust_remote_code=
-    True,  # If the model is a cuscd tom model not yet available in the HuggingFace transformers library
-    num_scheduler_steps=8,
-    gpu_memory_utilization=0.5)
 
-outputs = llm.generate(prompts, sampling_params)
-for output in outputs:
-    prompt = output.prompt
-    generated_text = output.outputs[0].text
-    print(f"Prompt: {prompt!r}, Generated text: {generated_text!r}")
+def main():
+    prompts = [
+        "Hello, my name is",
+        "The president of the United States is",
+        "The capital of France is",
+        "The future of AI is",
+        "China is",
+    ]
+
+    # Create a sampling params object.
+    sampling_params = SamplingParams(max_tokens=100, temperature=0.0)
+    # Create an LLM.
+    llm = LLM(
+        model="Qwen/Qwen2.5-0.5B",
+        block_size=128,
+        max_model_len=1024,  # max length of prompt
+        tensor_parallel_size=1,  # number of NPUs to be used
+        max_num_seqs=26,  # max batch number
+        enforce_eager=
+        True,  # Force PyTorch eager execution to debug intermediate tensors (disables graph optimizations)
+        trust_remote_code=
+        True,  # If the model is a cuscd tom model not yet available in the HuggingFace transformers library
+        num_scheduler_steps=8,
+        gpu_memory_utilization=0.5)
+
+    outputs = llm.generate(prompts, sampling_params)
+    for output in outputs:
+        prompt = output.prompt
+        generated_text = output.outputs[0].text
+        print(f"Prompt: {prompt!r}, Generated text: {generated_text!r}")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/examples/prompt_embedding_inference.py b/examples/prompt_embedding_inference.py
index e375a8b4f..c95323874 100644
--- a/examples/prompt_embedding_inference.py
+++ b/examples/prompt_embedding_inference.py
@@ -1,8 +1,13 @@
+import os
+
 import torch
 from transformers import (AutoModelForCausalLM, AutoTokenizer,
                           PreTrainedTokenizer)
 from vllm import LLM
 
+os.environ["VLLM_USE_MODELSCOPE"] = "True"
+os.environ["VLLM_WORKER_MULTIPROC_METHOD"] = "spawn"
+
 
 def init_tokenizer_and_llm(model_name: str):
     tokenizer = AutoTokenizer.from_pretrained(model_name)