[Bug Fix] Fix address/port already in use error for deep_ep test (#20094)

Signed-off-by: yewentao256 <zhyanwentao@126.com>
This commit is contained in:
Wentao Ye
2025-06-26 10:33:13 -04:00
committed by GitHub
parent 1f5d178e9c
commit c894c5dc1f
2 changed files with 13 additions and 1 deletions

View File

@ -4,6 +4,7 @@ DeepEP test utilities
"""
import dataclasses
import importlib
import os
import traceback
from typing import Callable, Optional
@ -13,6 +14,8 @@ from torch.multiprocessing import (
spawn) # pyright: ignore[reportPrivateImportUsage]
from typing_extensions import Concatenate, ParamSpec
from vllm.model_executor.layers.fused_moe.utils import find_free_port
has_deep_ep = importlib.util.find_spec("deep_ep") is not None
if has_deep_ep:
from vllm.model_executor.layers.fused_moe.deepep_ht_prepare_finalize import ( # noqa: E501
@ -92,7 +95,7 @@ def parallel_launch(
world_size,
world_size,
0,
"tcp://localhost:29500",
f"tcp://{os.getenv('LOCALHOST', 'localhost')}:{find_free_port()}",
worker,
) + args,
nprocs=world_size,

View File

@ -1,5 +1,7 @@
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import socket
from contextlib import closing
from math import prod
from typing import Optional
@ -96,3 +98,10 @@ def _fp8_perm(m: torch.Tensor, idx: torch.Tensor) -> torch.Tensor:
return m.view(dtype=torch.uint8)[idx, ...].view(dtype=m.dtype)
else:
return m[idx, ...]
def find_free_port():
with closing(socket.socket(socket.AF_INET, socket.SOCK_STREAM)) as s:
s.bind(('', 0))
s.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
return s.getsockname()[1]