[Misc] Revive to use loopback address for driver IP (#7091)

Signed-off-by: Rui Qiao <ruisearch42@gmail.com>
This commit is contained in:
Rui Qiao
2024-08-02 15:50:00 -07:00
committed by GitHub
parent 05308891e2
commit 22e718ff1a

View File

@ -216,6 +216,16 @@ class RayGPUExecutor(DistributedGPUExecutor):
self._run_workers("update_environment_variables",
all_args=all_args_to_update_environment_variables)
if len(node_gpus) == 1:
# in single node case, we don't need to get the IP address.
# the loopback address is sufficient
# NOTE: a node may have several IP addresses, one for each
# network interface. `get_ip()` might return any of them,
# while they might not work for communication inside the node
# if the network setup is complicated. Using the loopback address
# solves this issue, as it always works for communication inside
# the node.
driver_ip = "127.0.0.1"
distributed_init_method = get_distributed_init_method(
driver_ip, get_open_port())