Support CUDA nightly package in tools/nightly.py (#131133)

Add a new option `--cuda` to `tools/nightly.py` to pull the nightly packages with CUDA support. ```bash # installs pytorch-nightly with cpuonly tools/nightly.py pull # The following only available on Linux and Windows # installs pytorch-nightly with latest CUDA we support tools/nightly.py pull --cuda # installs pytorch-nightly with CUDA 12.1 tools/nightly.py pull --cuda 12.1 ``` Also add targets in `Makefile` and instructions in constribution guidelines. ```bash # setup conda environment with pytorch-nightly make setup-env # setup conda environment with pytorch-nightly with CUDA support make setup-env-cuda ``` Pull Request resolved: https://github.com/pytorch/pytorch/pull/131133 Approved by: https://github.com/ezyang
2025-10-20 21:14:14 +08:00 · 2024-07-19 08:38:29 +00:00
parent ceab3121de
commit 42a4df9447
5 changed files with 136 additions and 36 deletions
--- a/.devcontainer/scripts/install-dev-tools.sh
+++ b/.devcontainer/scripts/install-dev-tools.sh
@ -5,7 +5,7 @@ git submodule sync
 git submodule update --init --recursive

 # This takes some time
-make setup_lint
+make setup-lint

 # Add CMAKE_PREFIX_PATH to bashrc
 echo 'export CMAKE_PREFIX_PATH=${CONDA_PREFIX:-"$(dirname $(which conda))/../"}' >> ~/.bashrc
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@ -11,6 +11,7 @@ aspects of contributing to PyTorch.
 <!-- toc -->

 - [Developing PyTorch](#developing-pytorch)
+  - [Setup the development environment](#setup-the-development-environment)
  - [Tips and Debugging](#tips-and-debugging)
 - [Nightly Checkout & Pull](#nightly-checkout--pull)
 - [Codebase structure](#codebase-structure)
@ -64,8 +65,24 @@ aspects of contributing to PyTorch.
 <!-- tocstop -->

 ## Developing PyTorch
+
 Follow the instructions for [installing PyTorch from source](https://github.com/pytorch/pytorch#from-source). If you get stuck when developing PyTorch on your machine, check out the [tips and debugging](#tips-and-debugging) section below for common solutions.

+### Setup the development environment
+
+First, you need to [fork the PyTorch project on GitHub](https://github.com/pytorch/pytorch/fork) and follow the instructions at [Connecting to GitHub with SSH](https://docs.github.com/en/authentication/connecting-to-github-with-ssh) to setup your SSH authentication credentials.
+
+Then clone the PyTorch project and setup the development environment:
+
+```bash
+git clone git@github.com:<USERNAME>/pytorch.git
+cd pytorch
+git remote add origin git@github.com:pytorch/pytorch.git
+
+make setup-env  # or make setup-env-cuda for pre-built CUDA binaries
+conda activate pytorch-deps
+```
+
 ### Tips and Debugging

 * If you want to have no-op incremental rebuilds (which are fast), see [Make no-op build fast](#make-no-op-build-fast) below.
@ -175,6 +192,13 @@ the regular environment parameters (`--name` or `--prefix`):
 conda activate my-env
 ```

+To install the nightly binaries built with CUDA, you can pass in the flag `--cuda`:
+
+```bash
+./tools/nightly.py checkout -b my-nightly-branch --cuda
+conda activate pytorch-deps
+```
+
 You can also use this tool to pull the nightly commits into the current branch:

 ```bash
@ -325,7 +349,7 @@ command runs tests such as `TestNN.test_BCELoss` and
 Install all prerequisites by running

 ```bash
-make setup_lint
+make setup-lint
 ```

 You can now run the same linting steps that are used in CI locally via `make`:
--- a/22
+++ b/22
@ -1,6 +1,7 @@
 # This makefile does nothing but delegating the actual building to cmake.
 PYTHON = python3
-PIP = pip3
+PIP = $(PYTHON) -m pip
+NIGHTLY_TOOL_OPTS := pull

 all:
 	@mkdir -p build && cd build && cmake .. $(shell $(PYTHON) ./scripts/get_python_cmake_flags.py) && $(MAKE)
@ -22,10 +23,27 @@ linecount:
 		echo "Cloc is not available on the machine. You can install cloc with " && \
 		echo "    sudo apt-get install cloc"

-setup_lint:
+ensure-branch-clean:
+	@if [ -n "$(shell git status --porcelain)" ]; then \
+		echo "Please commit or stash all changes before running this script"; \
+		exit 1; \
+	fi
+
+setup-env: ensure-branch-clean
+	$(PYTHON) tools/nightly.py $(NIGHTLY_TOOL_OPTS)
+
+setup-env-cuda:
+	$(MAKE) setup-env PYTHON="$(PYTHON)" NIGHTLY_TOOL_OPTS="$(NIGHTLY_TOOL_OPTS) --cuda"
+
+setup_env: setup-env
+setup_env_cuda: setup-env-cuda
+
+setup-lint:
 	$(PIP) install lintrunner
 	lintrunner init

+setup_lint: setup-lint
+
 lint:
 	lintrunner

--- a/tools/linter/adapters/lintrunner_version_linter.py
+++ b/tools/linter/adapters/lintrunner_version_linter.py
@ -54,7 +54,7 @@ if __name__ == "__main__":
            name="command-failed",
            original=None,
            replacement=None,
-            description="Lintrunner is not installed, did you forget to run `make setup_lint && make lint`?",
+            description="Lintrunner is not installed, did you forget to run `make setup-lint && make lint`?",
        )
        sys.exit(0)

--- a/tools/nightly.py
+++ b/tools/nightly.py
@ -15,22 +15,29 @@ the regular environment parameters (--name or --prefix)::
    $ ./tools/nightly.py checkout -b my-nightly-branch -n my-env
    $ conda activate my-env

+To install the nightly binaries built with CUDA, you can pass in the flag --cuda::
+
+    $ ./tools/nightly.py checkout -b my-nightly-branch --cuda
+    $ conda activate pytorch-deps
+
 You can also use this tool to pull the nightly commits into the current branch as
-well. This can be done with
+well. This can be done with::

    $ ./tools/nightly.py pull -n my-env
    $ conda activate my-env

-Pulling will reinstalle the conda dependencies as well as the nightly binaries into
+Pulling will reinstall the conda dependencies as well as the nightly binaries into
 the repo directory.
 """

 from __future__ import annotations

+import argparse
 import contextlib
 import datetime
 import functools
 import glob
+import itertools
 import json
 import logging
 import os
@ -41,8 +48,8 @@ import sys
 import tempfile
 import time
 import uuid
-from argparse import ArgumentParser
 from ast import literal_eval
+from platform import system as platform_system
 from typing import Any, Callable, cast, Generator, Iterable, Iterator, Sequence, TypeVar


@ -52,7 +59,7 @@ DATETIME_FORMAT = "%Y-%m-%d_%Hh%Mm%Ss"
 SHA1_RE = re.compile("([0-9a-fA-F]{40})")
 USERNAME_PASSWORD_RE = re.compile(r":\/\/(.*?)\@")
 LOG_DIRNAME_RE = re.compile(
-    r"(\d{4}-\d\d-\d\d_\d\dh\d\dm\d\ds)_" r"[0-9a-f]{8}-(?:[0-9a-f]{4}-){3}[0-9a-f]{12}"
+    r"(\d{4}-\d\d-\d\d_\d\dh\d\dm\d\ds)_[0-9a-f]{8}-(?:[0-9a-f]{4}-){3}[0-9a-f]{12}"
 )
 SPECS_TO_INSTALL = ("pytorch", "mypy", "pytest", "hypothesis", "ipython", "sphinx")

@ -261,6 +268,8 @@ def _make_channel_args(

@timed("Solving conda environment")
 def conda_solve(
+    specs: Iterable[str],
+    *,
    name: str | None = None,
    prefix: str | None = None,
    channels: Iterable[str] = ("pytorch-nightly",),
@ -302,12 +311,13 @@ def conda_solve(
        channels=channels, override_channels=override_channels
    )
    cmd.extend(channel_args)
-    cmd.extend(SPECS_TO_INSTALL)
+    cmd.extend(specs)
    p = subprocess.run(cmd, capture_output=True, check=True)
    # parse solution
    solve = json.loads(p.stdout)
    link = solve["actions"]["LINK"]
    deps = []
+    pytorch, platform = "", ""
    for pkg in link:
        url = URL_FORMAT.format(**pkg)
        if pkg["name"] == "pytorch":
@ -315,6 +325,8 @@ def conda_solve(
            platform = pkg["platform"]
        else:
            deps.append(url)
+    assert pytorch, "PyTorch package not found in solve"
+    assert platform, "Platform not found in solve"
    return deps, pytorch, platform, existing_env, env_opts


@ -412,23 +424,33 @@ def pull_nightly_version(spdir: str) -> None:


 def _get_listing_linux(source_dir: str) -> list[str]:
-    listing = glob.glob(os.path.join(source_dir, "*.so"))
-    listing.extend(glob.glob(os.path.join(source_dir, "lib", "*.so")))
-    return listing
+    return list(
+        itertools.chain(
+            glob.iglob(os.path.join(source_dir, "*.so")),
+            glob.iglob(os.path.join(source_dir, "lib", "*.so")),
+            glob.iglob(os.path.join(source_dir, "lib", "*.so.*")),
+        )
+    )


 def _get_listing_osx(source_dir: str) -> list[str]:
    # oddly, these are .so files even on Mac
-    listing = glob.glob(os.path.join(source_dir, "*.so"))
-    listing.extend(glob.glob(os.path.join(source_dir, "lib", "*.dylib")))
-    return listing
+    return list(
+        itertools.chain(
+            glob.iglob(os.path.join(source_dir, "*.so")),
+            glob.iglob(os.path.join(source_dir, "lib", "*.dylib")),
+        )
+    )


 def _get_listing_win(source_dir: str) -> list[str]:
-    listing = glob.glob(os.path.join(source_dir, "*.pyd"))
-    listing.extend(glob.glob(os.path.join(source_dir, "lib", "*.lib")))
-    listing.extend(glob.glob(os.path.join(source_dir, "lib", "*.dll")))
-    return listing
+    return list(
+        itertools.chain(
+            glob.iglob(os.path.join(source_dir, "*.pyd")),
+            glob.iglob(os.path.join(source_dir, "lib", "*.lib")),
+            glob.iglob(os.path.join(source_dir, "lib", "*.dll")),
+        )
+    )


 def _glob_pyis(d: str) -> set[str]:
@ -480,6 +502,8 @@ def _move_single(
    is_dir = os.path.isdir(src)
    relpath = os.path.relpath(src, source_dir)
    trg = os.path.join(target_dir, relpath)
+    src = os.path.normpath(src)
+    trg = os.path.normpath(trg)
    _remove_existing(trg, is_dir)
    # move over new files
    if is_dir:
@ -488,8 +512,8 @@ def _move_single(
            relroot = os.path.relpath(root, src)
            for name in files:
                relname = os.path.join(relroot, name)
-                s = os.path.join(src, relname)
-                t = os.path.join(trg, relname)
+                s = os.path.normpath(os.path.join(src, relname))
+                t = os.path.normpath(os.path.join(trg, relname))
                print(f"{verb} {s} -> {t}")
                mover(s, t)
            for name in dirs:
@ -515,7 +539,9 @@ def move_nightly_files(spdir: str, platform: str) -> None:
    """Moves PyTorch files from temporary installed location to repo."""
    # get file listing
    source_dir = os.path.join(spdir, "torch")
-    target_dir = os.path.abspath("torch")
+    target_dir = os.path.abspath(
+        os.path.join(os.path.dirname(os.path.dirname(__file__)), "torch")
+    )
    listing = _get_listing(source_dir, target_dir, platform)
    # copy / link files
    if platform.startswith("win"):
@ -569,6 +595,7 @@ def write_pth(env_opts: list[str], platform: str) -> None:


 def install(
+    specs: Iterable[str],
    *,
    logger: logging.Logger,
    subcommand: str = "checkout",
@ -579,8 +606,13 @@ def install(
    override_channels: bool = False,
 ) -> None:
    """Development install of PyTorch"""
+    specs = list(specs)
    deps, pytorch, platform, existing_env, env_opts = conda_solve(
-        name=name, prefix=prefix, channels=channels, override_channels=override_channels
+        specs=specs,
+        name=name,
+        prefix=prefix,
+        channels=channels,
+        override_channels=override_channels,
    )
    if deps:
        deps_install(deps, existing_env, env_opts)
@ -602,12 +634,12 @@ def install(
    )


-def make_parser() -> ArgumentParser:
-    p = ArgumentParser("nightly")
+def make_parser() -> argparse.ArgumentParser:
+    p = argparse.ArgumentParser()
    # subcommands
    subcmd = p.add_subparsers(dest="subcmd", help="subcommand to execute")
-    co = subcmd.add_parser("checkout", help="checkout a new branch")
-    co.add_argument(
+    checkout = subcmd.add_parser("checkout", help="checkout a new branch")
+    checkout.add_argument(
        "-b",
        "--branch",
        help="Branch name to checkout",
@ -619,9 +651,9 @@ def make_parser() -> ArgumentParser:
        "pull", help="pulls the nightly commits into the current branch"
    )
    # general arguments
-    subps = [co, pull]
-    for subp in subps:
-        subp.add_argument(
+    subparsers = [checkout, pull]
+    for subparser in subparsers:
+        subparser.add_argument(
            "-n",
            "--name",
            help="Name of environment",
@ -629,7 +661,7 @@ def make_parser() -> ArgumentParser:
            default=None,
            metavar="ENVIRONMENT",
        )
-        subp.add_argument(
+        subparser.add_argument(
            "-p",
            "--prefix",
            help="Full path to environment location (i.e. prefix)",
@ -637,7 +669,7 @@ def make_parser() -> ArgumentParser:
            default=None,
            metavar="PATH",
        )
-        subp.add_argument(
+        subparser.add_argument(
            "-v",
            "--verbose",
            help="Provide debugging info",
@ -645,21 +677,36 @@ def make_parser() -> ArgumentParser:
            default=False,
            action="store_true",
        )
-        subp.add_argument(
+        subparser.add_argument(
            "--override-channels",
            help="Do not search default or .condarc channels.",
            dest="override_channels",
            default=False,
            action="store_true",
        )
-        subp.add_argument(
+        subparser.add_argument(
            "-c",
            "--channel",
-            help="Additional channel to search for packages. 'pytorch-nightly' will always be prepended to this list.",
+            help=(
+                "Additional channel to search for packages. "
+                "'pytorch-nightly' will always be prepended to this list."
+            ),
            dest="channels",
            action="append",
            metavar="CHANNEL",
        )
+        if platform_system() in {"Linux", "Windows"}:
+            subparser.add_argument(
+                "--cuda",
+                help=(
+                    "CUDA version to install "
+                    "(defaults to the latest version available on the platform)"
+                ),
+                dest="cuda",
+                nargs="?",
+                default=argparse.SUPPRESS,
+                metavar="VERSION",
+            )
    return p


@ -673,12 +720,23 @@ def main(args: Sequence[str] | None = None) -> None:
    status = status or check_branch(ns.subcmd, ns.branch)
    if status:
        sys.exit(status)
+    specs = list(SPECS_TO_INSTALL)
    channels = ["pytorch-nightly"]
+    if hasattr(ns, "cuda"):
+        if ns.cuda is not None:
+            specs.append(f"pytorch-cuda={ns.cuda}")
+        else:
+            specs.append("pytorch-cuda")
+        specs.append("pytorch-mutex=*=*cuda*")
+        channels.append("nvidia")
+    else:
+        specs.append("pytorch-mutex=*=*cpu*")
    if ns.channels:
        channels.extend(ns.channels)
    with logging_manager(debug=ns.verbose) as logger:
        LOGGER = logger
        install(
+            specs=specs,
            subcommand=ns.subcmd,
            branch=ns.branch,
            name=ns.name,