From a1ad28da102f54e297b2db3360c1c5ed54a1f5ae Mon Sep 17 00:00:00 2001
From: Elton Leander Pinto <eltonpinto@fb.com>
Date: Tue, 6 Jul 2021 16:00:47 -0700
Subject: [PATCH] Refactor clang_tidy.py (#61119)

Summary:
Pull Request resolved: https://github.com/pytorch/pytorch/pull/61119

This change spilts the clang-tidy CI job into smaller steps and uses a
refactored version of the clang_tidy.py script.

The new folder structure is as follows:
```
tools/linter/clang_tidy
|_ __main__py
|_ requirements.txt
|_ run.py
|_ setup.sh
```

`__main__.py`

This script will run `tools/linter/clang_tidy/setup.sh` if a `build`
directory doesn't exist, mimicing what used to be done as a separate
step in the CI job.

After that, it will invoke `clang-tidy` with default arguments being
declared in the script itself (as opposed to declaring them in
lint.yml).

The reasoning behind this approach is two-fold:

- Make it easier to run `clang-tidy` locally using this script
- De-duplicate the option passing

`requirements.txt`

Contains a list of additional python dependencies needed by the
`clang-tidy` script.

`setup.sh`

If a build directory doesn't exist, this command will run the necessary
codegen and build commands for running `clang-tidy`

Example usage:
```
python3 tools/linter/clang_tidy --parallel
```
Notice that we don't have to put the `.py` at the end of `clang_tidy`.

Test Plan:
Run the following command:
```
python3 tools/linter/clang_tidy --paths torch/csrc/fx --parallel
```

Reviewed By: walterddr, janeyx99

Differential Revision: D29568582

Pulled By: 1ntEgr8

fbshipit-source-id: cd6d11c5cb8ba9f1344a87c35647a1cd8dd45b04
---
 .github/workflows/lint.yml                    |  71 ++------
 .gitignore                                    |   6 +-
 CONTRIBUTING.md                               |   3 +-
 Makefile                                      |   3 +-
 mypy.ini                                      |   5 -
 tools/README.md                               |   2 +-
 tools/git-pre-commit                          |   5 +-
 tools/linter/clang_format_utils.py            | 142 +--------------
 tools/linter/clang_tidy/__main__.py           | 130 ++++++++++++++
 .../linter/clang_tidy/generate_build_files.py |  62 +++++++
 tools/linter/clang_tidy/requirements.txt      |   1 +
 .../{clang_tidy.py => clang_tidy/run.py}      |  92 +---------
 tools/linter/install/__init__.py              |   0
 tools/linter/install/clang_tidy.py            |  18 ++
 tools/linter/install/download_bin.py          | 163 ++++++++++++++++++
 .../linter/install/hashes/clang-tidy-linux64  |   1 +
 tools/linter/translate_annotations.py         |   1 -
 17 files changed, 402 insertions(+), 303 deletions(-)
 create mode 100644 tools/linter/clang_tidy/__main__.py
 create mode 100644 tools/linter/clang_tidy/generate_build_files.py
 create mode 100644 tools/linter/clang_tidy/requirements.txt
 rename tools/linter/{clang_tidy.py => clang_tidy/run.py} (80%)
 mode change 100755 => 100644
 create mode 100644 tools/linter/install/__init__.py
 create mode 100644 tools/linter/install/clang_tidy.py
 create mode 100644 tools/linter/install/download_bin.py
 create mode 100644 tools/linter/install/hashes/clang-tidy-linux64

diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml
index a0f12933a259..a2a75361c17d 100644
--- a/.github/workflows/lint.yml
+++ b/.github/workflows/lint.yml
@@ -326,75 +326,26 @@ jobs:
           mkdir clang-tidy-output
           cd clang-tidy-output
           echo "$HEAD_SHA" > commit-sha.txt
-      - name: Generate build files
-        run: |
-          cd "${GITHUB_WORKSPACE}"
-          set -eux
-
-          if [ ! -d build ]; then
-            git submodule update --init --recursive
-
-            export USE_NCCL=0
-            export USE_DEPLOY=1
-            # We really only need compile_commands.json, so no need to build!
-            time python3 setup.py --cmake-only build
-
-            # Generate ATen files.
-            time python3 -m tools.codegen.gen \
-              -s aten/src/ATen \
-              -d build/aten/src/ATen
-
-            # Generate PyTorch files.
-            time python3 tools/setup_helpers/generate_code.py            \
-              --declarations-path build/aten/src/ATen/Declarations.yaml \
-              --native-functions-path aten/src/ATen/native/native_functions.yaml \
-              --nn-path aten/src
-          fi
-      - name: Run clang-tidy
+      - name: Fetch PR diff
         env:
-          HEAD_SHA: ${{ github.event.pull_request.head.sha }}
           PR_NUMBER: ${{ github.event.pull_request.number }}
         run: |
           cd "${GITHUB_WORKSPACE}"
-          set -eux
-
           wget -O pr.diff "https://patch-diff.githubusercontent.com/raw/pytorch/pytorch/pull/$PR_NUMBER.diff"
-
-          # Run Clang-Tidy
-          # The negative filters below are to exclude files that include onnx_pb.h or
-          # caffe2_pb.h, otherwise we'd have to build protos as part of this CI job.
-          # FunctionsManual.cpp is excluded to keep this diff clean. It will be fixed
-          # in a follow up PR.
-          # /torch/csrc/generic/*.cpp is excluded because those files aren't actually built.
-          # deploy/interpreter files are excluded due to using macros and other techniques
-          # that are not easily converted to accepted c++
-          python3 tools/linter/clang_tidy.py \
+      - name: Run clang-tidy
+        run: |
+          cd "${GITHUB_WORKSPACE}"
+          python3 tools/linter/clang_tidy \
+            --diff-file pr.diff \
             --parallel \
             --verbose \
-            --paths torch/csrc/ \
-            --diff-file pr.diff \
-            --include-dir /usr/lib/llvm-11/include/openmp \
-            -g"-torch/csrc/jit/passes/onnx/helper.cpp" \
-            -g"-torch/csrc/jit/passes/onnx/shape_type_inference.cpp" \
-            -g"-torch/csrc/jit/serialization/onnx.cpp" \
-            -g"-torch/csrc/jit/serialization/export.cpp" \
-            -g"-torch/csrc/jit/serialization/import.cpp" \
-            -g"-torch/csrc/jit/serialization/import_legacy.cpp" \
-            -g"-torch/csrc/onnx/init.cpp" \
-            -g"-torch/csrc/cuda/nccl.*" \
-            -g"-torch/csrc/cuda/python_nccl.cpp" \
-            -g"-torch/csrc/autograd/FunctionsManual.cpp" \
-            -g"-torch/csrc/generic/*.cpp" \
-            -g"-torch/csrc/jit/codegen/cuda/runtime/*" \
-            -g"-torch/csrc/deploy/interpreter/interpreter.cpp" \
-            -g"-torch/csrc/deploy/interpreter/interpreter.h" \
-            -g"-torch/csrc/deploy/interpreter/interpreter_impl.h" \
-            -g"-torch/csrc/deploy/interpreter/test_main.cpp" \
             "$@" >"${GITHUB_WORKSPACE}"/clang-tidy-output.txt
-
-
           cat "${GITHUB_WORKSPACE}"/clang-tidy-output.txt
-
+      - name: Annotate output
+        env:
+          HEAD_SHA: ${{ github.event.pull_request.head.sha }}
+        run: |
+          cd "${GITHUB_WORKSPACE}"
           tools/linter/translate_annotations.py \
             --file=clang-tidy-output.txt \
             --regex='^(?P<filename>.*?):(?P<lineNumber>\d+):(?P<columnNumber>\d+): (?P<errorDesc>.*?) \[(?P<errorCode>.*)\]' \
diff --git a/.gitignore b/.gitignore
index f4313361f233..89f139cc18d5 100644
--- a/.gitignore
+++ b/.gitignore
@@ -283,8 +283,9 @@ TAGS
 # ccls file
 .ccls-cache/
 
-# clang-format storage location used by apply_clang_format.py
+# clang tooling storage location
 .clang-format-bin
+.clang-tidy-bin
 
 # clangd background index
 .clangd/
@@ -305,3 +306,6 @@ bazel-*
 
 # core dump files
 core.*
+
+# Generated if you use the pre-commit script for clang-tidy
+pr.diff
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 92bd46e39dc6..189cd18099ca 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -1097,7 +1097,8 @@ have more checks than older versions. In our CI, we run clang-tidy-6.0.
    uncommitted changes). Changes are picked up based on a `git diff` with the
    given revision:
   ```bash
-  python tools/linter/clang_tidy.py -d build -p torch/csrc --diff 'HEAD~1'
+  git diff HEAD~1 > pr.diff
+  python tools/linter/clang_tidy --paths torch/csrc --diff-file "pr.diff"
   ```
 
 Above, it is assumed you are in the PyTorch root folder. `path/to/build` should
diff --git a/Makefile b/Makefile
index 4c59f8e66ac9..94688032c424 100644
--- a/Makefile
+++ b/Makefile
@@ -70,6 +70,7 @@ setup_lint:
 		--job 'shellcheck' --step 'Install ShellCheck' --no-quiet; \
 	fi
 	pip install jinja2
+	pip install -r tools/linter/clang_tidy/requirements.txt
 
 quick_checks:
 # TODO: This is broken when 'git config submodule.recurse' is 'true' since the
@@ -116,4 +117,4 @@ toc:
 lint: flake8 mypy quick_checks cmakelint shellcheck
 
 quicklint: CHANGED_ONLY=--changed-only
-quicklint: mypy flake8 mypy quick_checks cmakelint shellcheck
+quicklint: mypy flake8 quick_checks cmakelint shellcheck
diff --git a/mypy.ini b/mypy.ini
index 9a6bd999df09..f6ced626ae4f 100644
--- a/mypy.ini
+++ b/mypy.ini
@@ -37,11 +37,6 @@ files =
     test/test_type_hints.py,
     test/test_type_info.py,
     test/test_utils.py,
-    tools/linter/clang_format_utils.py,
-    tools/linter/clang_tidy.py,
-    tools/generate_torch_version.py,
-    tools/render_junit.py,
-    tools/stats
 
 #
 # `exclude` is a regex, not a list of paths like `files` (sigh)
diff --git a/tools/README.md b/tools/README.md
index 795f37ef82bb..a28affa5f30a 100644
--- a/tools/README.md
+++ b/tools/README.md
@@ -37,7 +37,7 @@ Build system pieces:
 
 Developer tools which you might find useful:
 
-* [linter/clang_tidy.py](linter/clang_tidy.py) - Script for running clang-tidy
+* [linter/clang_tidy](linter/clang_tidy/__main__.py) - Script for running clang-tidy
   on lines of your script which you changed.
 * [extract_scripts.py](extract_scripts.py) - Extract scripts from
   `.github/workflows/*.yml` into a specified dir, on which linters such as
diff --git a/tools/git-pre-commit b/tools/git-pre-commit
index a829fe9444b3..cdca9a4b1155 100755
--- a/tools/git-pre-commit
+++ b/tools/git-pre-commit
@@ -7,9 +7,10 @@ python tools/linter/flake8_hook.py
 if [ $(which clang-tidy) ]
 then
   echo "Running pre-commit clang-tidy"
-  python tools/linter/clang_tidy.py \
+  git diff HEAD > pr.diff
+  python tools/linter/clang_tidy \
     --paths torch/csrc \
-    --diff HEAD \
+    --diff-file "pr.diff" \
     -g"-torch/csrc/jit/passes/onnx/helper.cpp" \
     -g"-torch/csrc/jit/passes/onnx/shape_type_inference.cpp" \
     -g"-torch/csrc/jit/serialization/onnx.cpp" \
diff --git a/tools/linter/clang_format_utils.py b/tools/linter/clang_format_utils.py
index 6d40a8acc690..021ba9162cca 100644
--- a/tools/linter/clang_format_utils.py
+++ b/tools/linter/clang_format_utils.py
@@ -1,16 +1,5 @@
-import platform
-import sys
-import stat
-import hashlib
 import os
-import urllib.request
-import urllib.error
-
-# String representing the host platform (e.g. Linux, Darwin).
-HOST_PLATFORM = platform.system()
-
-# PyTorch directory root, derived from the location of this file.
-PYTORCH_ROOT = os.path.dirname(os.path.dirname(os.path.dirname(os.path.realpath(__file__))))
+from install.download_bin import download, PYTORCH_ROOT  # type: ignore[import]
 
 # This dictionary maps each platform to the S3 object URL for its clang-format binary.
 PLATFORM_TO_CF_URL = {
@@ -24,135 +13,8 @@ PLATFORM_TO_HASH = {
     "Linux": os.path.join("tools", "clang_format_hash", "linux64", "clang-format-linux64"),
 }
 
-# Directory and file paths for the clang-format binary.
 CLANG_FORMAT_DIR = os.path.join(PYTORCH_ROOT, ".clang-format-bin")
 CLANG_FORMAT_PATH = os.path.join(CLANG_FORMAT_DIR, "clang-format")
 
-def compute_file_sha256(path: str) -> str:
-    """Compute the SHA256 hash of a file and return it as a hex string."""
-    # If the file doesn't exist, return an empty string.
-    if not os.path.exists(path):
-        return ""
-
-    hash = hashlib.sha256()
-
-    # Open the file in binary mode and hash it.
-    with open(path, "rb") as f:
-        for b in f:
-            hash.update(b)
-
-    # Return the hash as a hexadecimal string.
-    return hash.hexdigest()
-
-
-def report_download_progress(chunk_number: int, chunk_size: int, file_size: int) -> None:
-    """
-    Pretty printer for file download progress.
-    """
-    if file_size != -1:
-        percent = min(1, (chunk_number * chunk_size) / file_size)
-        bar = "#" * int(64 * percent)
-        sys.stdout.write("\r0% |{:<64}| {}%".format(bar, int(percent * 100)))
-
-
-def download_clang_format(path: str) -> bool:
-    """
-    Downloads a clang-format binary appropriate for the host platform and stores it at the given location.
-    """
-    if HOST_PLATFORM not in PLATFORM_TO_CF_URL:
-        print("Unsupported platform: {}".format(HOST_PLATFORM))
-        return False
-
-    cf_url = PLATFORM_TO_CF_URL[HOST_PLATFORM]
-    filename = os.path.join(path, "clang-format")
-
-    # Try to download clang-format.
-    print("Downloading clang-format to {}".format(path))
-    try:
-        urllib.request.urlretrieve(
-            cf_url, filename, reporthook=report_download_progress if sys.stdout.isatty() else None
-        )
-    except urllib.error.URLError as e:
-        print("Error downloading {}: {}".format(filename, str(e)))
-        return False
-    finally:
-        print()
-
-    return True
-
-
 def get_and_check_clang_format(verbose: bool = False) -> bool:
-    """
-    Download a platform-appropriate clang-format binary if one doesn't already exist at the expected location and verify
-    that it is the right binary by checking its SHA256 hash against the expected hash.
-    """
-    if not os.path.exists(CLANG_FORMAT_DIR):
-        # If the directory doesn't exist, try to create it.
-        try:
-            os.mkdir(CLANG_FORMAT_DIR)
-        except OSError as e:
-            print("Unable to create directory for clang-format binary: {}".format(CLANG_FORMAT_DIR))
-            return False
-        finally:
-            if verbose:
-                print("Created directory {} for clang-format binary".format(CLANG_FORMAT_DIR))
-
-        # If the directory didn't exist, neither did the binary, so download it.
-        ok = download_clang_format(CLANG_FORMAT_DIR)
-
-        if not ok:
-            return False
-    else:
-        # If the directory exists but the binary doesn't, download it.
-        if not os.path.exists(CLANG_FORMAT_PATH):
-            ok = download_clang_format(CLANG_FORMAT_DIR)
-
-            if not ok:
-                return False
-        else:
-            if verbose:
-                print("Found pre-existing clang-format binary, skipping download")
-
-    # Now that the binary is where it should be, hash it.
-    actual_bin_hash = compute_file_sha256(CLANG_FORMAT_PATH)
-
-    # If the host platform is not in PLATFORM_TO_HASH, it is unsupported.
-    if HOST_PLATFORM not in PLATFORM_TO_HASH:
-        print("Unsupported platform: {}".format(HOST_PLATFORM))
-        return False
-
-    # This is the path to the file containing the reference hash.
-    hashpath = os.path.join(PYTORCH_ROOT, PLATFORM_TO_HASH[HOST_PLATFORM])
-
-    if not os.path.exists(hashpath):
-        print("Unable to find reference binary hash")
-        return False
-
-    # Load the reference hash and compare the actual hash to it.
-    with open(hashpath, "r") as f:
-        reference_bin_hash = f.readline().strip()
-
-        if verbose:
-            print("Reference Hash: {}".format(repr(reference_bin_hash)))
-            print("Actual Hash: {}".format(repr(actual_bin_hash)))
-
-        if reference_bin_hash != actual_bin_hash:
-            print("The downloaded binary is not what was expected!")
-
-            # Err on the side of caution and try to delete the downloaded binary.
-            try:
-                os.unlink(CLANG_FORMAT_PATH)
-                print("The binary has been deleted just to be safe")
-            except OSError as e:
-                print("Failed to delete binary: {}".format(str(e)))
-                print("Delete this binary as soon as possible and do not execute it!")
-
-            return False
-        else:
-            # Make sure the binary is executable.
-            mode = os.stat(CLANG_FORMAT_PATH).st_mode
-            mode |= stat.S_IXUSR
-            os.chmod(CLANG_FORMAT_PATH, mode)
-            print("Using clang-format located at {}".format(CLANG_FORMAT_PATH))
-
-    return True
+    return bool(download("clang-format", CLANG_FORMAT_DIR, PLATFORM_TO_CF_URL, PLATFORM_TO_HASH))
diff --git a/tools/linter/clang_tidy/__main__.py b/tools/linter/clang_tidy/__main__.py
new file mode 100644
index 000000000000..0254ddc43b37
--- /dev/null
+++ b/tools/linter/clang_tidy/__main__.py
@@ -0,0 +1,130 @@
+import argparse
+import pathlib
+
+from run import run
+from generate_build_files import generate_build_files
+
+
+DEFAULTS = {
+    "glob": [
+        # The negative filters below are to exclude files that include onnx_pb.h or
+        # caffe2_pb.h, otherwise we'd have to build protos as part of this CI job.
+        # FunctionsManual.cpp is excluded to keep this diff clean. It will be fixed
+        # in a follow up PR.
+        # /torch/csrc/generic/*.cpp is excluded because those files aren't actually built.
+        # deploy/interpreter files are excluded due to using macros and other techniquies
+        # that are not easily converted to accepted c++
+        "-torch/csrc/jit/passes/onnx/helper.cpp",
+        "-torch/csrc/jit/passes/onnx/shape_type_inference.cpp",
+        "-torch/csrc/jit/serialization/onnx.cpp",
+        "-torch/csrc/jit/serialization/export.cpp",
+        "-torch/csrc/jit/serialization/import.cpp",
+        "-torch/csrc/jit/serialization/import_legacy.cpp",
+        "-torch/csrc/onnx/init.cpp",
+        "-torch/csrc/cuda/nccl.*",
+        "-torch/csrc/cuda/python_nccl.cpp",
+        "-torch/csrc/autograd/FunctionsManual.cpp",
+        "-torch/csrc/generic/*.cpp",
+        "-torch/csrc/jit/codegen/cuda/runtime/*",
+        "-torch/csrc/deploy/interpreter/interpreter.cpp",
+        "-torch/csrc/deploy/interpreter/interpreter.h",
+        "-torch/csrc/deploy/interpreter/interpreter_impl.h",
+        "-torch/csrc/deploy/interpreter/test_main.cpp",
+    ],
+    "paths": ["torch/csrc/"],
+    "include-dir": ["/usr/lib/llvm-11/include/openmp"],
+}
+
+
+def parse_args() -> argparse.Namespace:
+    parser = argparse.ArgumentParser(description="Run Clang-Tidy (on your Git changes)")
+    parser.add_argument(
+        "-e",
+        "--clang-tidy-exe",
+        default="clang-tidy",
+        help="Path to clang-tidy executable",
+    )
+    parser.add_argument(
+        "-g",
+        "--glob",
+        action="append",
+        default=DEFAULTS["glob"],
+        help="Only lint files that match these glob patterns "
+        "(see documentation for `fnmatch` for supported syntax)."
+        "If a pattern starts with a - the search is negated for that pattern.",
+    )
+    parser.add_argument(
+        "-x",
+        "--regex",
+        action="append",
+        default=[],
+        help="Only lint files that match these regular expressions (from the start of the filename). "
+        "If a pattern starts with a - the search is negated for that pattern.",
+    )
+    parser.add_argument(
+        "-c",
+        "--compile-commands-dir",
+        default="build",
+        help="Path to the folder containing compile_commands.json",
+    )
+    parser.add_argument(
+        "--diff-file", help="File containing diff to use for determining files to lint and line filters"
+    )
+    parser.add_argument(
+        "-p",
+        "--paths",
+        nargs="+",
+        default=DEFAULTS["paths"],
+        help="Lint only the given paths (recursively)",
+    )
+    parser.add_argument(
+        "-n",
+        "--dry-run",
+        action="store_true",
+        help="Only show the command to be executed, without running it",
+    )
+    parser.add_argument("-v", "--verbose", action="store_true", help="Verbose output")
+    parser.add_argument(
+        "--config-file",
+        help="Path to a clang-tidy config file. Defaults to '.clang-tidy'.",
+    )
+    parser.add_argument(
+        "-k",
+        "--keep-going",
+        action="store_true",
+        help="Don't error on compiler errors (clang-diagnostic-error)",
+    )
+    parser.add_argument(
+        "-j",
+        "--parallel",
+        action="store_true",
+        help="Run clang tidy in parallel per-file (requires ninja to be installed).",
+    )
+    parser.add_argument(
+        "--print-include-paths",
+        action="store_true",
+        help="Print the search paths used for include directives"
+    )
+    parser.add_argument(
+        "-I",
+        "--include-dir",
+        action="append",
+        default=DEFAULTS["include-dir"],
+        help="Add the specified directory to the search path for include files",
+    )
+    parser.add_argument("-s", "--suppress-diagnostics", action="store_true",
+                        help="Add NOLINT to suppress clang-tidy violations")
+    parser.add_argument(
+        "extra_args", nargs="*", help="Extra arguments to forward to clang-tidy"
+    )
+    return parser.parse_args()
+
+
+def main() -> None:
+    if not pathlib.Path("build").exists():
+        generate_build_files()
+    options = parse_args()
+    run(options)
+
+
+main()
diff --git a/tools/linter/clang_tidy/generate_build_files.py b/tools/linter/clang_tidy/generate_build_files.py
new file mode 100644
index 000000000000..72107a31fab7
--- /dev/null
+++ b/tools/linter/clang_tidy/generate_build_files.py
@@ -0,0 +1,62 @@
+import subprocess
+import sys
+import os
+from typing import List
+
+
+def run_cmd(cmd: List[str]) -> None:
+    print(f"Running: {cmd}")
+    result = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE,)
+    stdout, stderr = result.stdout.decode("utf-8").strip(), result.stderr.decode("utf-8").strip()
+    print(stdout)
+    print(stderr)
+    if result.returncode != 0:
+        print(f"Failed to run {cmd}")
+        exit(1)
+
+
+def run_timed_cmd(cmd: List[str]) -> None:
+    run_cmd(["time"] + cmd)
+
+
+def update_submodules() -> None:
+    run_cmd(["git", "submodule", "update", "--init", "--recursive"])
+
+
+def gen_compile_commands() -> None:
+    os.environ["USE_NCCL"] = "0"
+    os.environ["USE_DEPLOY"] = "1"
+    run_timed_cmd([sys.executable, "setup.py", "--cmake-only", "build"])
+
+
+def run_autogen() -> None:
+    run_timed_cmd(
+        [
+            sys.executable,
+            "-m",
+            "tools.codegen.gen",
+            "-s",
+            "aten/src/ATen",
+            "-d",
+            "build/aten/src/ATen",
+        ]
+    )
+
+    run_timed_cmd(
+        [
+            sys.executable,
+            "tools/setup_helpers/generate_code.py",
+            "--declarations-path",
+            "build/aten/src/ATen/Declarations.yaml",
+            "--native-functions-path",
+            "aten/src/ATen/native/native_functions.yaml",
+            "--nn-path",
+            "aten/src",
+        ]
+    )
+
+
+def generate_build_files() -> None:
+    update_submodules()
+    gen_compile_commands()
+    run_autogen()
diff --git a/tools/linter/clang_tidy/requirements.txt b/tools/linter/clang_tidy/requirements.txt
new file mode 100644
index 000000000000..faea93fd550a
--- /dev/null
+++ b/tools/linter/clang_tidy/requirements.txt
@@ -0,0 +1 @@
+unidiff==0.6.0
diff --git a/tools/linter/clang_tidy.py b/tools/linter/clang_tidy/run.py
old mode 100755
new mode 100644
similarity index 80%
rename from tools/linter/clang_tidy.py
rename to tools/linter/clang_tidy/run.py
index 210d6bd53db5..e84d91f4bafb
--- a/tools/linter/clang_tidy.py
+++ b/tools/linter/clang_tidy/run.py
@@ -14,7 +14,6 @@ glob or regular expressions.
 
 
 
-import argparse
 import collections
 import fnmatch
 import json
@@ -267,93 +266,7 @@ def apply_nolint(fname: str, warnings: Dict[int, Set[str]]) -> None:
         f.write("".join(lines))
 
 
-def parse_options() -> Any:
-    """Parses the command line options."""
-    parser = argparse.ArgumentParser(description="Run Clang-Tidy (on your Git changes)")
-    parser.add_argument(
-        "-e",
-        "--clang-tidy-exe",
-        default="clang-tidy",
-        help="Path to clang-tidy executable",
-    )
-    parser.add_argument(
-        "-g",
-        "--glob",
-        action="append",
-        default=[],
-        help="Only lint files that match these glob patterns "
-        "(see documentation for `fnmatch` for supported syntax)."
-        "If a pattern starts with a - the search is negated for that pattern.",
-    )
-    parser.add_argument(
-        "-x",
-        "--regex",
-        action="append",
-        default=[],
-        help="Only lint files that match these regular expressions (from the start of the filename). "
-        "If a pattern starts with a - the search is negated for that pattern.",
-    )
-    parser.add_argument(
-        "-c",
-        "--compile-commands-dir",
-        default="build",
-        help="Path to the folder containing compile_commands.json",
-    )
-    parser.add_argument(
-        "--diff-file", help="File containing diff to use for determining files to lint and line filters"
-    )
-    parser.add_argument(
-        "-p",
-        "--paths",
-        nargs="+",
-        default=["."],
-        help="Lint only the given paths (recursively)",
-    )
-    parser.add_argument(
-        "-n",
-        "--dry-run",
-        action="store_true",
-        help="Only show the command to be executed, without running it",
-    )
-    parser.add_argument("-v", "--verbose", action="store_true", help="Verbose output")
-    parser.add_argument(
-        "--config-file",
-        help="Path to a clang-tidy config file. Defaults to '.clang-tidy'.",
-    )
-    parser.add_argument(
-        "-k",
-        "--keep-going",
-        action="store_true",
-        help="Don't error on compiler errors (clang-diagnostic-error)",
-    )
-    parser.add_argument(
-        "-j",
-        "--parallel",
-        action="store_true",
-        help="Run clang tidy in parallel per-file (requires ninja to be installed).",
-    )
-    parser.add_argument(
-        "--print-include-paths",
-        action="store_true",
-        help="Print the search paths used for include directives"
-    )
-    parser.add_argument(
-        "-I",
-        "--include-dir",
-        action="append",
-        help="Add the specified directory to the search path for include files",
-    )
-    parser.add_argument("-s", "--suppress-diagnostics", action="store_true",
-                        help="Add NOLINT to suppress clang-tidy violations")
-    parser.add_argument(
-        "extra_args", nargs="*", help="Extra arguments to forward to clang-tidy"
-    )
-    return parser.parse_args()
-
-
-def main() -> None:
-    options = parse_options()
-
+def run(options: Any) -> None:
     # This flag is pervasive enough to set it globally. It makes the code
     # cleaner compared to threading it through every single function.
     global VERBOSE
@@ -404,6 +317,3 @@ def main() -> None:
     for line in clang_tidy_output.splitlines():
         if line.startswith(pwd):
             print(line[len(pwd):])
-
-if __name__ == "__main__":
-    main()
diff --git a/tools/linter/install/__init__.py b/tools/linter/install/__init__.py
new file mode 100644
index 000000000000..e69de29bb2d1
diff --git a/tools/linter/install/clang_tidy.py b/tools/linter/install/clang_tidy.py
new file mode 100644
index 000000000000..b2e82afa22f7
--- /dev/null
+++ b/tools/linter/install/clang_tidy.py
@@ -0,0 +1,18 @@
+import os
+from tools.linter.install.download_bin import download, PYTORCH_ROOT, HASH_PATH
+
+PLATFORM_TO_URL = {
+    "Linux": "https://oss-clang-format.s3.us-east-2.amazonaws.com/linux64/clang-tidy",
+}
+
+PLATFORM_TO_HASH = {
+    "Linux": os.path.join(HASH_PATH, "clang-tidy-linux64"),
+}
+
+OUTPUT_DIR = os.path.join(PYTORCH_ROOT, ".clang-tidy-bin")
+
+if __name__ == "__main__":
+    ok = download("clang-tidy", OUTPUT_DIR, PLATFORM_TO_URL, PLATFORM_TO_HASH)
+    if not ok:
+        print("Installation failed!")
+        exit(1)
diff --git a/tools/linter/install/download_bin.py b/tools/linter/install/download_bin.py
new file mode 100644
index 000000000000..60fe7fa961e9
--- /dev/null
+++ b/tools/linter/install/download_bin.py
@@ -0,0 +1,163 @@
+import platform
+import sys
+import stat
+import hashlib
+import subprocess
+import os
+import urllib.request
+import urllib.error
+
+from typing import Dict
+
+# String representing the host platform (e.g. Linux, Darwin).
+HOST_PLATFORM = platform.system()
+
+# PyTorch directory root
+result = subprocess.run(
+    ["git", "rev-parse", "--show-toplevel"], stdout=subprocess.PIPE, check=True,
+)
+PYTORCH_ROOT = result.stdout.decode("utf-8").strip()
+
+HASH_PATH = os.path.join(os.path.dirname(os.path.realpath(__file__)), "hashes")
+
+
+def compute_file_sha256(path: str) -> str:
+    """Compute the SHA256 hash of a file and return it as a hex string."""
+    # If the file doesn't exist, return an empty string.
+    if not os.path.exists(path):
+        return ""
+
+    hash = hashlib.sha256()
+
+    # Open the file in binary mode and hash it.
+    with open(path, "rb") as f:
+        for b in f:
+            hash.update(b)
+
+    # Return the hash as a hexadecimal string.
+    return hash.hexdigest()
+
+
+def report_download_progress(
+    chunk_number: int, chunk_size: int, file_size: int
+) -> None:
+    """
+    Pretty printer for file download progress.
+    """
+    if file_size != -1:
+        percent = min(1, (chunk_number * chunk_size) / file_size)
+        bar = "#" * int(64 * percent)
+        sys.stdout.write("\r0% |{:<64}| {}%".format(bar, int(percent * 100)))
+
+
+def download_bin(name: str, output_dir: str, platform_to_url: Dict[str, str]) -> bool:
+    """
+    Downloads the binary appropriate for the host platform and stores it in the given output directory.
+    """
+    if HOST_PLATFORM not in platform_to_url:
+        print(f"Unsupported platform: {HOST_PLATFORM}")
+        return False
+
+    url = platform_to_url[HOST_PLATFORM]
+    filename = os.path.join(output_dir, name)
+
+    # Try to download binary.
+    print(f"Downloading {name} to {output_dir}")
+    try:
+        urllib.request.urlretrieve(
+            url,
+            filename,
+            reporthook=report_download_progress if sys.stdout.isatty() else None,
+        )
+    except urllib.error.URLError as e:
+        print(f"Error downloading {filename}: {e}")
+        return False
+    finally:
+        print()
+
+    return True
+
+
+def download(
+    name: str,
+    output_dir: str,
+    platform_to_url: Dict[str, str],
+    platform_to_hash: Dict[str, str],
+    verbose: bool = False,
+) -> bool:
+    """
+    Download a platform-appropriate binary if one doesn't already exist at the expected location and verifies
+    that it is the right binary by checking its SHA256 hash against the expected hash.
+    """
+
+    output_path = os.path.join(output_dir, name)
+    if not os.path.exists(output_dir):
+        # If the directory doesn't exist, try to create it.
+        try:
+            os.mkdir(output_dir)
+        except OSError as e:
+            print(f"Unable to create directory for {name} binary: {output_dir}")
+            return False
+        finally:
+            if verbose:
+                print(f"Created directory {output_dir} for {name} binary")
+
+        # If the directory didn't exist, neither did the binary, so download it.
+        ok = download_bin(name, output_dir, platform_to_url)
+
+        if not ok:
+            return False
+    else:
+        # If the directory exists but the binary doesn't, download it.
+        if not os.path.exists(output_path):
+            ok = download_bin(name, output_dir, platform_to_url)
+
+            if not ok:
+                return False
+        else:
+            if verbose:
+                print(f"Found pre-existing {name} binary, skipping download")
+
+    # Now that the binary is where it should be, hash it.
+    actual_bin_hash = compute_file_sha256(output_path)
+
+    # If the host platform is not in platform_to_hash, it is unsupported.
+    if HOST_PLATFORM not in platform_to_hash:
+        print(f"Unsupported platform: {HOST_PLATFORM}")
+        return False
+
+    # This is the path to the file containing the reference hash.
+    hashpath = os.path.join(PYTORCH_ROOT, platform_to_hash[HOST_PLATFORM])
+
+    if not os.path.exists(hashpath):
+        print("Unable to find reference binary hash")
+        return False
+
+    # Load the reference hash and compare the actual hash to it.
+    with open(hashpath, "r") as f:
+        reference_bin_hash = f.readline().strip()
+
+        if verbose:
+            print(f"Reference Hash: {reference_bin_hash}")
+            print(f"Actual Hash: {repr(actual_bin_hash)}")
+
+        if reference_bin_hash != actual_bin_hash:
+            print("The downloaded binary is not what was expected!")
+
+            # Err on the side of caution and try to delete the downloaded binary.
+            try:
+                os.unlink(output_path)
+                print("The binary has been deleted just to be safe")
+            except OSError as e:
+                print(f"Failed to delete binary: {e}")
+                print("Delete this binary as soon as possible and do not execute it!")
+
+            return False
+        else:
+            # Make sure the binary is executable.
+            mode = os.stat(output_path).st_mode
+            mode |= stat.S_IXUSR
+            os.chmod(output_path, mode)
+            print(f"Using {name} located at {output_path}")
+
+    return True
diff --git a/tools/linter/install/hashes/clang-tidy-linux64 b/tools/linter/install/hashes/clang-tidy-linux64
new file mode 100644
index 000000000000..b864628b23bb
--- /dev/null
+++ b/tools/linter/install/hashes/clang-tidy-linux64
@@ -0,0 +1 @@
+44103f662c866921dabd40d047788935c43402d260be10613ab2111e01726892
diff --git a/tools/linter/translate_annotations.py b/tools/linter/translate_annotations.py
index 4bcfc78486d4..ed0147e4a62a 100755
--- a/tools/linter/translate_annotations.py
+++ b/tools/linter/translate_annotations.py
@@ -24,7 +24,6 @@ class Diff(TypedDict):
     hunks: List[Hunk]
 
 
-# adapted from the similar regex in tools/linter/clang_tidy.py
 # @@ -start,count +start,count @@
 hunk_pattern = r'^@@\s+-(\d+)(?:,(\d+))?\s+\+(\d+)(?:,(\d+))?\s+@@'