Refactor clang_tidy.py (#61119)

Summary: Pull Request resolved: https://github.com/pytorch/pytorch/pull/61119 This change spilts the clang-tidy CI job into smaller steps and uses a refactored version of the clang_tidy.py script. The new folder structure is as follows: ``` tools/linter/clang_tidy |_ __main__py |_ requirements.txt |_ run.py |_ setup.sh ``` `__main__.py` This script will run `tools/linter/clang_tidy/setup.sh` if a `build` directory doesn't exist, mimicing what used to be done as a separate step in the CI job. After that, it will invoke `clang-tidy` with default arguments being declared in the script itself (as opposed to declaring them in lint.yml). The reasoning behind this approach is two-fold: - Make it easier to run `clang-tidy` locally using this script - De-duplicate the option passing `requirements.txt` Contains a list of additional python dependencies needed by the `clang-tidy` script. `setup.sh` If a build directory doesn't exist, this command will run the necessary codegen and build commands for running `clang-tidy` Example usage: ``` python3 tools/linter/clang_tidy --parallel ``` Notice that we don't have to put the `.py` at the end of `clang_tidy`. Test Plan: Run the following command: ``` python3 tools/linter/clang_tidy --paths torch/csrc/fx --parallel ``` Reviewed By: walterddr, janeyx99 Differential Revision: D29568582 Pulled By: 1ntEgr8 fbshipit-source-id: cd6d11c5cb8ba9f1344a87c35647a1cd8dd45b04
2025-10-20 12:54:11 +08:00 · 2021-07-06 16:00:47 -07:00
parent 81e36d02a6
commit a1ad28da10
17 changed files with 402 additions and 303 deletions
--- a/.github/workflows/lint.yml
+++ b/.github/workflows/lint.yml
@ -326,75 +326,26 @@ jobs:
          mkdir clang-tidy-output
          cd clang-tidy-output
          echo "$HEAD_SHA" > commit-sha.txt
-      - name: Generate build files
-        run: |
-          cd "${GITHUB_WORKSPACE}"
-          set -eux
-
-          if [ ! -d build ]; then
-            git submodule update --init --recursive
-
-            export USE_NCCL=0
-            export USE_DEPLOY=1
-            # We really only need compile_commands.json, so no need to build!
-            time python3 setup.py --cmake-only build
-
-            # Generate ATen files.
-            time python3 -m tools.codegen.gen \
-              -s aten/src/ATen \
-              -d build/aten/src/ATen
-
-            # Generate PyTorch files.
-            time python3 tools/setup_helpers/generate_code.py            \
-              --declarations-path build/aten/src/ATen/Declarations.yaml \
-              --native-functions-path aten/src/ATen/native/native_functions.yaml \
-              --nn-path aten/src
-          fi
-      - name: Run clang-tidy
+      - name: Fetch PR diff
        env:
-          HEAD_SHA: ${{ github.event.pull_request.head.sha }}
          PR_NUMBER: ${{ github.event.pull_request.number }}
        run: |
          cd "${GITHUB_WORKSPACE}"
-          set -eux
-
          wget -O pr.diff "https://patch-diff.githubusercontent.com/raw/pytorch/pytorch/pull/$PR_NUMBER.diff"
-
-          # Run Clang-Tidy
-          # The negative filters below are to exclude files that include onnx_pb.h or
-          # caffe2_pb.h, otherwise we'd have to build protos as part of this CI job.
-          # FunctionsManual.cpp is excluded to keep this diff clean. It will be fixed
-          # in a follow up PR.
-          # /torch/csrc/generic/*.cpp is excluded because those files aren't actually built.
-          # deploy/interpreter files are excluded due to using macros and other techniques
-          # that are not easily converted to accepted c++
-          python3 tools/linter/clang_tidy.py \
+      - name: Run clang-tidy
+        run: |
+          cd "${GITHUB_WORKSPACE}"
+          python3 tools/linter/clang_tidy \
+            --diff-file pr.diff \
            --parallel \
            --verbose \
-            --paths torch/csrc/ \
-            --diff-file pr.diff \
-            --include-dir /usr/lib/llvm-11/include/openmp \
-            -g"-torch/csrc/jit/passes/onnx/helper.cpp" \
-            -g"-torch/csrc/jit/passes/onnx/shape_type_inference.cpp" \
-            -g"-torch/csrc/jit/serialization/onnx.cpp" \
-            -g"-torch/csrc/jit/serialization/export.cpp" \
-            -g"-torch/csrc/jit/serialization/import.cpp" \
-            -g"-torch/csrc/jit/serialization/import_legacy.cpp" \
-            -g"-torch/csrc/onnx/init.cpp" \
-            -g"-torch/csrc/cuda/nccl.*" \
-            -g"-torch/csrc/cuda/python_nccl.cpp" \
-            -g"-torch/csrc/autograd/FunctionsManual.cpp" \
-            -g"-torch/csrc/generic/*.cpp" \
-            -g"-torch/csrc/jit/codegen/cuda/runtime/*" \
-            -g"-torch/csrc/deploy/interpreter/interpreter.cpp" \
-            -g"-torch/csrc/deploy/interpreter/interpreter.h" \
-            -g"-torch/csrc/deploy/interpreter/interpreter_impl.h" \
-            -g"-torch/csrc/deploy/interpreter/test_main.cpp" \
            "$@" >"${GITHUB_WORKSPACE}"/clang-tidy-output.txt
-
-
          cat "${GITHUB_WORKSPACE}"/clang-tidy-output.txt
-
+      - name: Annotate output
+        env:
+          HEAD_SHA: ${{ github.event.pull_request.head.sha }}
+        run: |
+          cd "${GITHUB_WORKSPACE}"
          tools/linter/translate_annotations.py \
            --file=clang-tidy-output.txt \
            --regex='^(?P<filename>.*?):(?P<lineNumber>\d+):(?P<columnNumber>\d+): (?P<errorDesc>.*?) \[(?P<errorCode>.*)\]' \
--- a/.gitignore
+++ b/.gitignore
@ -283,8 +283,9 @@ TAGS
 # ccls file
 .ccls-cache/

-# clang-format storage location used by apply_clang_format.py
+# clang tooling storage location
 .clang-format-bin
+.clang-tidy-bin

 # clangd background index
 .clangd/
@ -305,3 +306,6 @@ bazel-*

 # core dump files
 core.*
+
+# Generated if you use the pre-commit script for clang-tidy
+pr.diff
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@ -1097,7 +1097,8 @@ have more checks than older versions. In our CI, we run clang-tidy-6.0.
   uncommitted changes). Changes are picked up based on a `git diff` with the
   given revision:
  ```bash
-  python tools/linter/clang_tidy.py -d build -p torch/csrc --diff 'HEAD~1'
+  git diff HEAD~1 > pr.diff
+  python tools/linter/clang_tidy --paths torch/csrc --diff-file "pr.diff"
  ```

 Above, it is assumed you are in the PyTorch root folder. `path/to/build` should
--- a/3
+++ b/3
@ -70,6 +70,7 @@ setup_lint:
 		--job 'shellcheck' --step 'Install ShellCheck' --no-quiet; \
 	fi
 	pip install jinja2
+	pip install -r tools/linter/clang_tidy/requirements.txt

 quick_checks:
 # TODO: This is broken when 'git config submodule.recurse' is 'true' since the
@ -116,4 +117,4 @@ toc:
 lint: flake8 mypy quick_checks cmakelint shellcheck

 quicklint: CHANGED_ONLY=--changed-only
-quicklint: mypy flake8 mypy quick_checks cmakelint shellcheck
+quicklint: mypy flake8 quick_checks cmakelint shellcheck
--- a/mypy.ini
+++ b/mypy.ini
@ -37,11 +37,6 @@ files =
    test/test_type_hints.py,
    test/test_type_info.py,
    test/test_utils.py,
-    tools/linter/clang_format_utils.py,
-    tools/linter/clang_tidy.py,
-    tools/generate_torch_version.py,
-    tools/render_junit.py,
-    tools/stats

 #
 # `exclude` is a regex, not a list of paths like `files` (sigh)
--- a/tools/README.md
+++ b/tools/README.md
@ -37,7 +37,7 @@ Build system pieces:

 Developer tools which you might find useful:

-* [linter/clang_tidy.py](linter/clang_tidy.py) - Script for running clang-tidy
+* [linter/clang_tidy](linter/clang_tidy/__main__.py) - Script for running clang-tidy
  on lines of your script which you changed.
 * [extract_scripts.py](extract_scripts.py) - Extract scripts from
  `.github/workflows/*.yml` into a specified dir, on which linters such as
--- a/tools/git-pre-commit
+++ b/tools/git-pre-commit
@ -7,9 +7,10 @@ python tools/linter/flake8_hook.py
 if [ $(which clang-tidy) ]
 then
  echo "Running pre-commit clang-tidy"
-  python tools/linter/clang_tidy.py \
+  git diff HEAD > pr.diff
+  python tools/linter/clang_tidy \
    --paths torch/csrc \
-    --diff HEAD \
+    --diff-file "pr.diff" \
    -g"-torch/csrc/jit/passes/onnx/helper.cpp" \
    -g"-torch/csrc/jit/passes/onnx/shape_type_inference.cpp" \
    -g"-torch/csrc/jit/serialization/onnx.cpp" \
--- a/tools/linter/clang_format_utils.py
+++ b/tools/linter/clang_format_utils.py
@ -1,16 +1,5 @@
-import platform
-import sys
-import stat
-import hashlib
 import os
-import urllib.request
-import urllib.error
-
-# String representing the host platform (e.g. Linux, Darwin).
-HOST_PLATFORM = platform.system()
-
-# PyTorch directory root, derived from the location of this file.
-PYTORCH_ROOT = os.path.dirname(os.path.dirname(os.path.dirname(os.path.realpath(__file__))))
+from install.download_bin import download, PYTORCH_ROOT  # type: ignore[import]

 # This dictionary maps each platform to the S3 object URL for its clang-format binary.
 PLATFORM_TO_CF_URL = {
@ -24,135 +13,8 @@ PLATFORM_TO_HASH = {
    "Linux": os.path.join("tools", "clang_format_hash", "linux64", "clang-format-linux64"),
 }

-# Directory and file paths for the clang-format binary.
 CLANG_FORMAT_DIR = os.path.join(PYTORCH_ROOT, ".clang-format-bin")
 CLANG_FORMAT_PATH = os.path.join(CLANG_FORMAT_DIR, "clang-format")

-def compute_file_sha256(path: str) -> str:
-    """Compute the SHA256 hash of a file and return it as a hex string."""
-    # If the file doesn't exist, return an empty string.
-    if not os.path.exists(path):
-        return ""
-
-    hash = hashlib.sha256()
-
-    # Open the file in binary mode and hash it.
-    with open(path, "rb") as f:
-        for b in f:
-            hash.update(b)
-
-    # Return the hash as a hexadecimal string.
-    return hash.hexdigest()
-
-
-def report_download_progress(chunk_number: int, chunk_size: int, file_size: int) -> None:
-    """
-    Pretty printer for file download progress.
-    """
-    if file_size != -1:
-        percent = min(1, (chunk_number * chunk_size) / file_size)
-        bar = "#" * int(64 * percent)
-        sys.stdout.write("\r0% |{:<64}| {}%".format(bar, int(percent * 100)))
-
-
-def download_clang_format(path: str) -> bool:
-    """
-    Downloads a clang-format binary appropriate for the host platform and stores it at the given location.
-    """
-    if HOST_PLATFORM not in PLATFORM_TO_CF_URL:
-        print("Unsupported platform: {}".format(HOST_PLATFORM))
-        return False
-
-    cf_url = PLATFORM_TO_CF_URL[HOST_PLATFORM]
-    filename = os.path.join(path, "clang-format")
-
-    # Try to download clang-format.
-    print("Downloading clang-format to {}".format(path))
-    try:
-        urllib.request.urlretrieve(
-            cf_url, filename, reporthook=report_download_progress if sys.stdout.isatty() else None
-        )
-    except urllib.error.URLError as e:
-        print("Error downloading {}: {}".format(filename, str(e)))
-        return False
-    finally:
-        print()
-
-    return True
-
-
 def get_and_check_clang_format(verbose: bool = False) -> bool:
-    """
-    Download a platform-appropriate clang-format binary if one doesn't already exist at the expected location and verify
-    that it is the right binary by checking its SHA256 hash against the expected hash.
-    """
-    if not os.path.exists(CLANG_FORMAT_DIR):
-        # If the directory doesn't exist, try to create it.
-        try:
-            os.mkdir(CLANG_FORMAT_DIR)
-        except OSError as e:
-            print("Unable to create directory for clang-format binary: {}".format(CLANG_FORMAT_DIR))
-            return False
-        finally:
-            if verbose:
-                print("Created directory {} for clang-format binary".format(CLANG_FORMAT_DIR))
-
-        # If the directory didn't exist, neither did the binary, so download it.
-        ok = download_clang_format(CLANG_FORMAT_DIR)
-
-        if not ok:
-            return False
-    else:
-        # If the directory exists but the binary doesn't, download it.
-        if not os.path.exists(CLANG_FORMAT_PATH):
-            ok = download_clang_format(CLANG_FORMAT_DIR)
-
-            if not ok:
-                return False
-        else:
-            if verbose:
-                print("Found pre-existing clang-format binary, skipping download")
-
-    # Now that the binary is where it should be, hash it.
-    actual_bin_hash = compute_file_sha256(CLANG_FORMAT_PATH)
-
-    # If the host platform is not in PLATFORM_TO_HASH, it is unsupported.
-    if HOST_PLATFORM not in PLATFORM_TO_HASH:
-        print("Unsupported platform: {}".format(HOST_PLATFORM))
-        return False
-
-    # This is the path to the file containing the reference hash.
-    hashpath = os.path.join(PYTORCH_ROOT, PLATFORM_TO_HASH[HOST_PLATFORM])
-
-    if not os.path.exists(hashpath):
-        print("Unable to find reference binary hash")
-        return False
-
-    # Load the reference hash and compare the actual hash to it.
-    with open(hashpath, "r") as f:
-        reference_bin_hash = f.readline().strip()
-
-        if verbose:
-            print("Reference Hash: {}".format(repr(reference_bin_hash)))
-            print("Actual Hash: {}".format(repr(actual_bin_hash)))
-
-        if reference_bin_hash != actual_bin_hash:
-            print("The downloaded binary is not what was expected!")
-
-            # Err on the side of caution and try to delete the downloaded binary.
-            try:
-                os.unlink(CLANG_FORMAT_PATH)
-                print("The binary has been deleted just to be safe")
-            except OSError as e:
-                print("Failed to delete binary: {}".format(str(e)))
-                print("Delete this binary as soon as possible and do not execute it!")
-
-            return False
-        else:
-            # Make sure the binary is executable.
-            mode = os.stat(CLANG_FORMAT_PATH).st_mode
-            mode |= stat.S_IXUSR
-            os.chmod(CLANG_FORMAT_PATH, mode)
-            print("Using clang-format located at {}".format(CLANG_FORMAT_PATH))
-
-    return True
+    return bool(download("clang-format", CLANG_FORMAT_DIR, PLATFORM_TO_CF_URL, PLATFORM_TO_HASH))
--- a/tools/linter/clang_tidy/main.py
+++ b/tools/linter/clang_tidy/main.py
@ -0,0 +1,130 @@
+import argparse
+import pathlib
+
+from run import run
+from generate_build_files import generate_build_files
+
+
+DEFAULTS = {
+    "glob": [
+        # The negative filters below are to exclude files that include onnx_pb.h or
+        # caffe2_pb.h, otherwise we'd have to build protos as part of this CI job.
+        # FunctionsManual.cpp is excluded to keep this diff clean. It will be fixed
+        # in a follow up PR.
+        # /torch/csrc/generic/*.cpp is excluded because those files aren't actually built.
+        # deploy/interpreter files are excluded due to using macros and other techniquies
+        # that are not easily converted to accepted c++
+        "-torch/csrc/jit/passes/onnx/helper.cpp",
+        "-torch/csrc/jit/passes/onnx/shape_type_inference.cpp",
+        "-torch/csrc/jit/serialization/onnx.cpp",
+        "-torch/csrc/jit/serialization/export.cpp",
+        "-torch/csrc/jit/serialization/import.cpp",
+        "-torch/csrc/jit/serialization/import_legacy.cpp",
+        "-torch/csrc/onnx/init.cpp",
+        "-torch/csrc/cuda/nccl.*",
+        "-torch/csrc/cuda/python_nccl.cpp",
+        "-torch/csrc/autograd/FunctionsManual.cpp",
+        "-torch/csrc/generic/*.cpp",
+        "-torch/csrc/jit/codegen/cuda/runtime/*",
+        "-torch/csrc/deploy/interpreter/interpreter.cpp",
+        "-torch/csrc/deploy/interpreter/interpreter.h",
+        "-torch/csrc/deploy/interpreter/interpreter_impl.h",
+        "-torch/csrc/deploy/interpreter/test_main.cpp",
+    ],
+    "paths": ["torch/csrc/"],
+    "include-dir": ["/usr/lib/llvm-11/include/openmp"],
+}
+
+
+def parse_args() -> argparse.Namespace:
+    parser = argparse.ArgumentParser(description="Run Clang-Tidy (on your Git changes)")
+    parser.add_argument(
+        "-e",
+        "--clang-tidy-exe",
+        default="clang-tidy",
+        help="Path to clang-tidy executable",
+    )
+    parser.add_argument(
+        "-g",
+        "--glob",
+        action="append",
+        default=DEFAULTS["glob"],
+        help="Only lint files that match these glob patterns "
+        "(see documentation for `fnmatch` for supported syntax)."
+        "If a pattern starts with a - the search is negated for that pattern.",
+    )
+    parser.add_argument(
+        "-x",
+        "--regex",
+        action="append",
+        default=[],
+        help="Only lint files that match these regular expressions (from the start of the filename). "
+        "If a pattern starts with a - the search is negated for that pattern.",
+    )
+    parser.add_argument(
+        "-c",
+        "--compile-commands-dir",
+        default="build",
+        help="Path to the folder containing compile_commands.json",
+    )
+    parser.add_argument(
+        "--diff-file", help="File containing diff to use for determining files to lint and line filters"
+    )
+    parser.add_argument(
+        "-p",
+        "--paths",
+        nargs="+",
+        default=DEFAULTS["paths"],
+        help="Lint only the given paths (recursively)",
+    )
+    parser.add_argument(
+        "-n",
+        "--dry-run",
+        action="store_true",
+        help="Only show the command to be executed, without running it",
+    )
+    parser.add_argument("-v", "--verbose", action="store_true", help="Verbose output")
+    parser.add_argument(
+        "--config-file",
+        help="Path to a clang-tidy config file. Defaults to '.clang-tidy'.",
+    )
+    parser.add_argument(
+        "-k",
+        "--keep-going",
+        action="store_true",
+        help="Don't error on compiler errors (clang-diagnostic-error)",
+    )
+    parser.add_argument(
+        "-j",
+        "--parallel",
+        action="store_true",
+        help="Run clang tidy in parallel per-file (requires ninja to be installed).",
+    )
+    parser.add_argument(
+        "--print-include-paths",
+        action="store_true",
+        help="Print the search paths used for include directives"
+    )
+    parser.add_argument(
+        "-I",
+        "--include-dir",
+        action="append",
+        default=DEFAULTS["include-dir"],
+        help="Add the specified directory to the search path for include files",
+    )
+    parser.add_argument("-s", "--suppress-diagnostics", action="store_true",
+                        help="Add NOLINT to suppress clang-tidy violations")
+    parser.add_argument(
+        "extra_args", nargs="*", help="Extra arguments to forward to clang-tidy"
+    )
+    return parser.parse_args()
+
+
+def main() -> None:
+    if not pathlib.Path("build").exists():
+        generate_build_files()
+    options = parse_args()
+    run(options)
+
+
+main()
--- a/tools/linter/clang_tidy/generate_build_files.py
+++ b/tools/linter/clang_tidy/generate_build_files.py
@ -0,0 +1,62 @@
+import subprocess
+import sys
+import os
+from typing import List
+
+
+def run_cmd(cmd: List[str]) -> None:
+    print(f"Running: {cmd}")
+    result = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE,)
+    stdout, stderr = result.stdout.decode("utf-8").strip(), result.stderr.decode("utf-8").strip()
+    print(stdout)
+    print(stderr)
+    if result.returncode != 0:
+        print(f"Failed to run {cmd}")
+        exit(1)
+
+
+def run_timed_cmd(cmd: List[str]) -> None:
+    run_cmd(["time"] + cmd)
+
+
+def update_submodules() -> None:
+    run_cmd(["git", "submodule", "update", "--init", "--recursive"])
+
+
+def gen_compile_commands() -> None:
+    os.environ["USE_NCCL"] = "0"
+    os.environ["USE_DEPLOY"] = "1"
+    run_timed_cmd([sys.executable, "setup.py", "--cmake-only", "build"])
+
+
+def run_autogen() -> None:
+    run_timed_cmd(
+        [
+            sys.executable,
+            "-m",
+            "tools.codegen.gen",
+            "-s",
+            "aten/src/ATen",
+            "-d",
+            "build/aten/src/ATen",
+        ]
+    )
+
+    run_timed_cmd(
+        [
+            sys.executable,
+            "tools/setup_helpers/generate_code.py",
+            "--declarations-path",
+            "build/aten/src/ATen/Declarations.yaml",
+            "--native-functions-path",
+            "aten/src/ATen/native/native_functions.yaml",
+            "--nn-path",
+            "aten/src",
+        ]
+    )
+
+
+def generate_build_files() -> None:
+    update_submodules()
+    gen_compile_commands()
+    run_autogen()
--- a/tools/linter/clang_tidy/requirements.txt
+++ b/tools/linter/clang_tidy/requirements.txt
@ -0,0 +1 @@
+unidiff==0.6.0
--- a/tools/linter/clang_tidy/run.py
+++ b/tools/linter/clang_tidy/run.py
@ -14,7 +14,6 @@ glob or regular expressions.



-import argparse
 import collections
 import fnmatch
 import json
@ -267,93 +266,7 @@ def apply_nolint(fname: str, warnings: Dict[int, Set[str]]) -> None:
        f.write("".join(lines))


-def parse_options() -> Any:
-    """Parses the command line options."""
-    parser = argparse.ArgumentParser(description="Run Clang-Tidy (on your Git changes)")
-    parser.add_argument(
-        "-e",
-        "--clang-tidy-exe",
-        default="clang-tidy",
-        help="Path to clang-tidy executable",
-    )
-    parser.add_argument(
-        "-g",
-        "--glob",
-        action="append",
-        default=[],
-        help="Only lint files that match these glob patterns "
-        "(see documentation for `fnmatch` for supported syntax)."
-        "If a pattern starts with a - the search is negated for that pattern.",
-    )
-    parser.add_argument(
-        "-x",
-        "--regex",
-        action="append",
-        default=[],
-        help="Only lint files that match these regular expressions (from the start of the filename). "
-        "If a pattern starts with a - the search is negated for that pattern.",
-    )
-    parser.add_argument(
-        "-c",
-        "--compile-commands-dir",
-        default="build",
-        help="Path to the folder containing compile_commands.json",
-    )
-    parser.add_argument(
-        "--diff-file", help="File containing diff to use for determining files to lint and line filters"
-    )
-    parser.add_argument(
-        "-p",
-        "--paths",
-        nargs="+",
-        default=["."],
-        help="Lint only the given paths (recursively)",
-    )
-    parser.add_argument(
-        "-n",
-        "--dry-run",
-        action="store_true",
-        help="Only show the command to be executed, without running it",
-    )
-    parser.add_argument("-v", "--verbose", action="store_true", help="Verbose output")
-    parser.add_argument(
-        "--config-file",
-        help="Path to a clang-tidy config file. Defaults to '.clang-tidy'.",
-    )
-    parser.add_argument(
-        "-k",
-        "--keep-going",
-        action="store_true",
-        help="Don't error on compiler errors (clang-diagnostic-error)",
-    )
-    parser.add_argument(
-        "-j",
-        "--parallel",
-        action="store_true",
-        help="Run clang tidy in parallel per-file (requires ninja to be installed).",
-    )
-    parser.add_argument(
-        "--print-include-paths",
-        action="store_true",
-        help="Print the search paths used for include directives"
-    )
-    parser.add_argument(
-        "-I",
-        "--include-dir",
-        action="append",
-        help="Add the specified directory to the search path for include files",
-    )
-    parser.add_argument("-s", "--suppress-diagnostics", action="store_true",
-                        help="Add NOLINT to suppress clang-tidy violations")
-    parser.add_argument(
-        "extra_args", nargs="*", help="Extra arguments to forward to clang-tidy"
-    )
-    return parser.parse_args()
-
-
-def main() -> None:
-    options = parse_options()
-
+def run(options: Any) -> None:
    # This flag is pervasive enough to set it globally. It makes the code
    # cleaner compared to threading it through every single function.
    global VERBOSE
@ -404,6 +317,3 @@ def main() -> None:
    for line in clang_tidy_output.splitlines():
        if line.startswith(pwd):
            print(line[len(pwd):])
-
-if __name__ == "__main__":
-    main()
--- a/tools/linter/install/init.py
+++ b/tools/linter/install/init.py
--- a/tools/linter/install/clang_tidy.py
+++ b/tools/linter/install/clang_tidy.py
@ -0,0 +1,18 @@
+import os
+from tools.linter.install.download_bin import download, PYTORCH_ROOT, HASH_PATH
+
+PLATFORM_TO_URL = {
+    "Linux": "https://oss-clang-format.s3.us-east-2.amazonaws.com/linux64/clang-tidy",
+}
+
+PLATFORM_TO_HASH = {
+    "Linux": os.path.join(HASH_PATH, "clang-tidy-linux64"),
+}
+
+OUTPUT_DIR = os.path.join(PYTORCH_ROOT, ".clang-tidy-bin")
+
+if __name__ == "__main__":
+    ok = download("clang-tidy", OUTPUT_DIR, PLATFORM_TO_URL, PLATFORM_TO_HASH)
+    if not ok:
+        print("Installation failed!")
+        exit(1)
--- a/tools/linter/install/download_bin.py
+++ b/tools/linter/install/download_bin.py
@ -0,0 +1,163 @@
+import platform
+import sys
+import stat
+import hashlib
+import subprocess
+import os
+import urllib.request
+import urllib.error
+
+from typing import Dict
+
+# String representing the host platform (e.g. Linux, Darwin).
+HOST_PLATFORM = platform.system()
+
+# PyTorch directory root
+result = subprocess.run(
+    ["git", "rev-parse", "--show-toplevel"], stdout=subprocess.PIPE, check=True,
+)
+PYTORCH_ROOT = result.stdout.decode("utf-8").strip()
+
+HASH_PATH = os.path.join(os.path.dirname(os.path.realpath(__file__)), "hashes")
+
+
+def compute_file_sha256(path: str) -> str:
+    """Compute the SHA256 hash of a file and return it as a hex string."""
+    # If the file doesn't exist, return an empty string.
+    if not os.path.exists(path):
+        return ""
+
+    hash = hashlib.sha256()
+
+    # Open the file in binary mode and hash it.
+    with open(path, "rb") as f:
+        for b in f:
+            hash.update(b)
+
+    # Return the hash as a hexadecimal string.
+    return hash.hexdigest()
+
+
+def report_download_progress(
+    chunk_number: int, chunk_size: int, file_size: int
+) -> None:
+    """
+    Pretty printer for file download progress.
+    """
+    if file_size != -1:
+        percent = min(1, (chunk_number * chunk_size) / file_size)
+        bar = "#" * int(64 * percent)
+        sys.stdout.write("\r0% |{:<64}| {}%".format(bar, int(percent * 100)))
+
+
+def download_bin(name: str, output_dir: str, platform_to_url: Dict[str, str]) -> bool:
+    """
+    Downloads the binary appropriate for the host platform and stores it in the given output directory.
+    """
+    if HOST_PLATFORM not in platform_to_url:
+        print(f"Unsupported platform: {HOST_PLATFORM}")
+        return False
+
+    url = platform_to_url[HOST_PLATFORM]
+    filename = os.path.join(output_dir, name)
+
+    # Try to download binary.
+    print(f"Downloading {name} to {output_dir}")
+    try:
+        urllib.request.urlretrieve(
+            url,
+            filename,
+            reporthook=report_download_progress if sys.stdout.isatty() else None,
+        )
+    except urllib.error.URLError as e:
+        print(f"Error downloading {filename}: {e}")
+        return False
+    finally:
+        print()
+
+    return True
+
+
+def download(
+    name: str,
+    output_dir: str,
+    platform_to_url: Dict[str, str],
+    platform_to_hash: Dict[str, str],
+    verbose: bool = False,
+) -> bool:
+    """
+    Download a platform-appropriate binary if one doesn't already exist at the expected location and verifies
+    that it is the right binary by checking its SHA256 hash against the expected hash.
+    """
+
+    output_path = os.path.join(output_dir, name)
+    if not os.path.exists(output_dir):
+        # If the directory doesn't exist, try to create it.
+        try:
+            os.mkdir(output_dir)
+        except OSError as e:
+            print(f"Unable to create directory for {name} binary: {output_dir}")
+            return False
+        finally:
+            if verbose:
+                print(f"Created directory {output_dir} for {name} binary")
+
+        # If the directory didn't exist, neither did the binary, so download it.
+        ok = download_bin(name, output_dir, platform_to_url)
+
+        if not ok:
+            return False
+    else:
+        # If the directory exists but the binary doesn't, download it.
+        if not os.path.exists(output_path):
+            ok = download_bin(name, output_dir, platform_to_url)
+
+            if not ok:
+                return False
+        else:
+            if verbose:
+                print(f"Found pre-existing {name} binary, skipping download")
+
+    # Now that the binary is where it should be, hash it.
+    actual_bin_hash = compute_file_sha256(output_path)
+
+    # If the host platform is not in platform_to_hash, it is unsupported.
+    if HOST_PLATFORM not in platform_to_hash:
+        print(f"Unsupported platform: {HOST_PLATFORM}")
+        return False
+
+    # This is the path to the file containing the reference hash.
+    hashpath = os.path.join(PYTORCH_ROOT, platform_to_hash[HOST_PLATFORM])
+
+    if not os.path.exists(hashpath):
+        print("Unable to find reference binary hash")
+        return False
+
+    # Load the reference hash and compare the actual hash to it.
+    with open(hashpath, "r") as f:
+        reference_bin_hash = f.readline().strip()
+
+        if verbose:
+            print(f"Reference Hash: {reference_bin_hash}")
+            print(f"Actual Hash: {repr(actual_bin_hash)}")
+
+        if reference_bin_hash != actual_bin_hash:
+            print("The downloaded binary is not what was expected!")
+
+            # Err on the side of caution and try to delete the downloaded binary.
+            try:
+                os.unlink(output_path)
+                print("The binary has been deleted just to be safe")
+            except OSError as e:
+                print(f"Failed to delete binary: {e}")
+                print("Delete this binary as soon as possible and do not execute it!")
+
+            return False
+        else:
+            # Make sure the binary is executable.
+            mode = os.stat(output_path).st_mode
+            mode |= stat.S_IXUSR
+            os.chmod(output_path, mode)
+            print(f"Using {name} located at {output_path}")
+
+    return True
--- a/tools/linter/install/hashes/clang-tidy-linux64
+++ b/tools/linter/install/hashes/clang-tidy-linux64
@ -0,0 +1 @@
+44103f662c866921dabd40d047788935c43402d260be10613ab2111e01726892
--- a/tools/linter/translate_annotations.py
+++ b/tools/linter/translate_annotations.py
@ -24,7 +24,6 @@ class Diff(TypedDict):
    hunks: List[Hunk]


-# adapted from the similar regex in tools/linter/clang_tidy.py
 # @@ -start,count +start,count @@
 hunk_pattern = r'^@@\s+-(\d+)(?:,(\d+))?\s+\+(\d+)(?:,(\d+))?\s+@@'
				`@ -0,0 +1 @@`
				`44103f662c866921dabd40d047788935c43402d260be10613ab2111e01726892`