mirror of
https://github.com/pytorch/pytorch.git
synced 2025-10-21 05:34:18 +08:00
Nccl update to 2.25.1 for cuda 12.4-12.8 (#146073)
Should resolve: https://github.com/pytorch/pytorch/issues/144768 We use one common nccl version for cuda builds 12.4-12.8 : ``NCCL_VERSION=v2.25.1-1`` For CUDA 11.8 we use legacy ``NCCL_VERSION=v2.21.1-1`` We use pinned version of NCCL rather then submodule. Move nccl location from ``third_party/nccl/nccl`` to ``third_party/nccl`` Pull Request resolved: https://github.com/pytorch/pytorch/pull/146073 Approved by: https://github.com/Skylion007, https://github.com/malfet, https://github.com/kwen2501, https://github.com/fduwjj
This commit is contained in:
committed by
PyTorch MergeBot
parent
bd370c138a
commit
4ece056791
@ -2,12 +2,18 @@ from __future__ import annotations
|
||||
|
||||
import os
|
||||
import platform
|
||||
import subprocess
|
||||
from glob import glob
|
||||
from pathlib import Path
|
||||
|
||||
from .setup_helpers.cmake import CMake, USE_NINJA
|
||||
from .setup_helpers.env import check_negative_env_flag, IS_64BIT, IS_WINDOWS
|
||||
|
||||
|
||||
repo_root = Path(__file__).absolute().parent.parent
|
||||
third_party_path = os.path.join(repo_root, "third_party")
|
||||
|
||||
|
||||
def _get_vc_env(vc_arch: str) -> dict[str, str]:
|
||||
try:
|
||||
from setuptools import distutils # type: ignore[import]
|
||||
@ -79,6 +85,31 @@ def _create_build_env() -> dict[str, str]:
|
||||
return my_env
|
||||
|
||||
|
||||
def read_nccl_pin() -> str:
|
||||
nccl_file = "nccl-cu12.txt"
|
||||
if os.getenv("DESIRED_CUDA", "").startswith("11") or os.getenv(
|
||||
"CUDA_VERSION", ""
|
||||
).startswith("11"):
|
||||
nccl_file = "nccl-cu11.txt"
|
||||
nccl_pin_path = os.path.join(
|
||||
repo_root, ".ci", "docker", "ci_commit_pins", nccl_file
|
||||
)
|
||||
with open(nccl_pin_path) as f:
|
||||
return f.read().strip()
|
||||
|
||||
|
||||
def checkout_nccl() -> None:
|
||||
release_tag = read_nccl_pin()
|
||||
print(f"-- Checkout nccl release tag: {release_tag}")
|
||||
nccl_basedir = os.path.join(third_party_path, "nccl")
|
||||
if not os.path.exists(nccl_basedir):
|
||||
subprocess.check_call(
|
||||
["git", "clone", "https://github.com/NVIDIA/nccl.git", "nccl"],
|
||||
cwd=third_party_path,
|
||||
)
|
||||
subprocess.check_call(["git", "checkout", release_tag], cwd=nccl_basedir)
|
||||
|
||||
|
||||
def build_pytorch(
|
||||
version: str | None,
|
||||
cmake_python_library: str | None,
|
||||
@ -88,6 +119,7 @@ def build_pytorch(
|
||||
cmake: CMake,
|
||||
) -> None:
|
||||
my_env = _create_build_env()
|
||||
checkout_nccl()
|
||||
build_test = not check_negative_env_flag("BUILD_TEST")
|
||||
cmake.generate(
|
||||
version, cmake_python_library, build_python, build_test, my_env, rerun_cmake
|
||||
|
Reference in New Issue
Block a user