From a1ad28da102f54e297b2db3360c1c5ed54a1f5ae Mon Sep 17 00:00:00 2001 From: Elton Leander Pinto Date: Tue, 6 Jul 2021 16:00:47 -0700 Subject: [PATCH] Refactor clang_tidy.py (#61119) Summary: Pull Request resolved: https://github.com/pytorch/pytorch/pull/61119 This change spilts the clang-tidy CI job into smaller steps and uses a refactored version of the clang_tidy.py script. The new folder structure is as follows: ``` tools/linter/clang_tidy |_ __main__py |_ requirements.txt |_ run.py |_ setup.sh ``` `__main__.py` This script will run `tools/linter/clang_tidy/setup.sh` if a `build` directory doesn't exist, mimicing what used to be done as a separate step in the CI job. After that, it will invoke `clang-tidy` with default arguments being declared in the script itself (as opposed to declaring them in lint.yml). The reasoning behind this approach is two-fold: - Make it easier to run `clang-tidy` locally using this script - De-duplicate the option passing `requirements.txt` Contains a list of additional python dependencies needed by the `clang-tidy` script. `setup.sh` If a build directory doesn't exist, this command will run the necessary codegen and build commands for running `clang-tidy` Example usage: ``` python3 tools/linter/clang_tidy --parallel ``` Notice that we don't have to put the `.py` at the end of `clang_tidy`. Test Plan: Run the following command: ``` python3 tools/linter/clang_tidy --paths torch/csrc/fx --parallel ``` Reviewed By: walterddr, janeyx99 Differential Revision: D29568582 Pulled By: 1ntEgr8 fbshipit-source-id: cd6d11c5cb8ba9f1344a87c35647a1cd8dd45b04 --- .github/workflows/lint.yml | 71 ++------ .gitignore | 6 +- CONTRIBUTING.md | 3 +- Makefile | 3 +- mypy.ini | 5 - tools/README.md | 2 +- tools/git-pre-commit | 5 +- tools/linter/clang_format_utils.py | 142 +-------------- tools/linter/clang_tidy/__main__.py | 130 ++++++++++++++ .../linter/clang_tidy/generate_build_files.py | 62 +++++++ tools/linter/clang_tidy/requirements.txt | 1 + .../{clang_tidy.py => clang_tidy/run.py} | 92 +--------- tools/linter/install/__init__.py | 0 tools/linter/install/clang_tidy.py | 18 ++ tools/linter/install/download_bin.py | 163 ++++++++++++++++++ .../linter/install/hashes/clang-tidy-linux64 | 1 + tools/linter/translate_annotations.py | 1 - 17 files changed, 402 insertions(+), 303 deletions(-) create mode 100644 tools/linter/clang_tidy/__main__.py create mode 100644 tools/linter/clang_tidy/generate_build_files.py create mode 100644 tools/linter/clang_tidy/requirements.txt rename tools/linter/{clang_tidy.py => clang_tidy/run.py} (80%) mode change 100755 => 100644 create mode 100644 tools/linter/install/__init__.py create mode 100644 tools/linter/install/clang_tidy.py create mode 100644 tools/linter/install/download_bin.py create mode 100644 tools/linter/install/hashes/clang-tidy-linux64 diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index a0f12933a259..a2a75361c17d 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -326,75 +326,26 @@ jobs: mkdir clang-tidy-output cd clang-tidy-output echo "$HEAD_SHA" > commit-sha.txt - - name: Generate build files - run: | - cd "${GITHUB_WORKSPACE}" - set -eux - - if [ ! -d build ]; then - git submodule update --init --recursive - - export USE_NCCL=0 - export USE_DEPLOY=1 - # We really only need compile_commands.json, so no need to build! - time python3 setup.py --cmake-only build - - # Generate ATen files. - time python3 -m tools.codegen.gen \ - -s aten/src/ATen \ - -d build/aten/src/ATen - - # Generate PyTorch files. - time python3 tools/setup_helpers/generate_code.py \ - --declarations-path build/aten/src/ATen/Declarations.yaml \ - --native-functions-path aten/src/ATen/native/native_functions.yaml \ - --nn-path aten/src - fi - - name: Run clang-tidy + - name: Fetch PR diff env: - HEAD_SHA: ${{ github.event.pull_request.head.sha }} PR_NUMBER: ${{ github.event.pull_request.number }} run: | cd "${GITHUB_WORKSPACE}" - set -eux - wget -O pr.diff "https://patch-diff.githubusercontent.com/raw/pytorch/pytorch/pull/$PR_NUMBER.diff" - - # Run Clang-Tidy - # The negative filters below are to exclude files that include onnx_pb.h or - # caffe2_pb.h, otherwise we'd have to build protos as part of this CI job. - # FunctionsManual.cpp is excluded to keep this diff clean. It will be fixed - # in a follow up PR. - # /torch/csrc/generic/*.cpp is excluded because those files aren't actually built. - # deploy/interpreter files are excluded due to using macros and other techniques - # that are not easily converted to accepted c++ - python3 tools/linter/clang_tidy.py \ + - name: Run clang-tidy + run: | + cd "${GITHUB_WORKSPACE}" + python3 tools/linter/clang_tidy \ + --diff-file pr.diff \ --parallel \ --verbose \ - --paths torch/csrc/ \ - --diff-file pr.diff \ - --include-dir /usr/lib/llvm-11/include/openmp \ - -g"-torch/csrc/jit/passes/onnx/helper.cpp" \ - -g"-torch/csrc/jit/passes/onnx/shape_type_inference.cpp" \ - -g"-torch/csrc/jit/serialization/onnx.cpp" \ - -g"-torch/csrc/jit/serialization/export.cpp" \ - -g"-torch/csrc/jit/serialization/import.cpp" \ - -g"-torch/csrc/jit/serialization/import_legacy.cpp" \ - -g"-torch/csrc/onnx/init.cpp" \ - -g"-torch/csrc/cuda/nccl.*" \ - -g"-torch/csrc/cuda/python_nccl.cpp" \ - -g"-torch/csrc/autograd/FunctionsManual.cpp" \ - -g"-torch/csrc/generic/*.cpp" \ - -g"-torch/csrc/jit/codegen/cuda/runtime/*" \ - -g"-torch/csrc/deploy/interpreter/interpreter.cpp" \ - -g"-torch/csrc/deploy/interpreter/interpreter.h" \ - -g"-torch/csrc/deploy/interpreter/interpreter_impl.h" \ - -g"-torch/csrc/deploy/interpreter/test_main.cpp" \ "$@" >"${GITHUB_WORKSPACE}"/clang-tidy-output.txt - - cat "${GITHUB_WORKSPACE}"/clang-tidy-output.txt - + - name: Annotate output + env: + HEAD_SHA: ${{ github.event.pull_request.head.sha }} + run: | + cd "${GITHUB_WORKSPACE}" tools/linter/translate_annotations.py \ --file=clang-tidy-output.txt \ --regex='^(?P.*?):(?P\d+):(?P\d+): (?P.*?) \[(?P.*)\]' \ diff --git a/.gitignore b/.gitignore index f4313361f233..89f139cc18d5 100644 --- a/.gitignore +++ b/.gitignore @@ -283,8 +283,9 @@ TAGS # ccls file .ccls-cache/ -# clang-format storage location used by apply_clang_format.py +# clang tooling storage location .clang-format-bin +.clang-tidy-bin # clangd background index .clangd/ @@ -305,3 +306,6 @@ bazel-* # core dump files core.* + +# Generated if you use the pre-commit script for clang-tidy +pr.diff diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 92bd46e39dc6..189cd18099ca 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -1097,7 +1097,8 @@ have more checks than older versions. In our CI, we run clang-tidy-6.0. uncommitted changes). Changes are picked up based on a `git diff` with the given revision: ```bash - python tools/linter/clang_tidy.py -d build -p torch/csrc --diff 'HEAD~1' + git diff HEAD~1 > pr.diff + python tools/linter/clang_tidy --paths torch/csrc --diff-file "pr.diff" ``` Above, it is assumed you are in the PyTorch root folder. `path/to/build` should diff --git a/Makefile b/Makefile index 4c59f8e66ac9..94688032c424 100644 --- a/Makefile +++ b/Makefile @@ -70,6 +70,7 @@ setup_lint: --job 'shellcheck' --step 'Install ShellCheck' --no-quiet; \ fi pip install jinja2 + pip install -r tools/linter/clang_tidy/requirements.txt quick_checks: # TODO: This is broken when 'git config submodule.recurse' is 'true' since the @@ -116,4 +117,4 @@ toc: lint: flake8 mypy quick_checks cmakelint shellcheck quicklint: CHANGED_ONLY=--changed-only -quicklint: mypy flake8 mypy quick_checks cmakelint shellcheck +quicklint: mypy flake8 quick_checks cmakelint shellcheck diff --git a/mypy.ini b/mypy.ini index 9a6bd999df09..f6ced626ae4f 100644 --- a/mypy.ini +++ b/mypy.ini @@ -37,11 +37,6 @@ files = test/test_type_hints.py, test/test_type_info.py, test/test_utils.py, - tools/linter/clang_format_utils.py, - tools/linter/clang_tidy.py, - tools/generate_torch_version.py, - tools/render_junit.py, - tools/stats # # `exclude` is a regex, not a list of paths like `files` (sigh) diff --git a/tools/README.md b/tools/README.md index 795f37ef82bb..a28affa5f30a 100644 --- a/tools/README.md +++ b/tools/README.md @@ -37,7 +37,7 @@ Build system pieces: Developer tools which you might find useful: -* [linter/clang_tidy.py](linter/clang_tidy.py) - Script for running clang-tidy +* [linter/clang_tidy](linter/clang_tidy/__main__.py) - Script for running clang-tidy on lines of your script which you changed. * [extract_scripts.py](extract_scripts.py) - Extract scripts from `.github/workflows/*.yml` into a specified dir, on which linters such as diff --git a/tools/git-pre-commit b/tools/git-pre-commit index a829fe9444b3..cdca9a4b1155 100755 --- a/tools/git-pre-commit +++ b/tools/git-pre-commit @@ -7,9 +7,10 @@ python tools/linter/flake8_hook.py if [ $(which clang-tidy) ] then echo "Running pre-commit clang-tidy" - python tools/linter/clang_tidy.py \ + git diff HEAD > pr.diff + python tools/linter/clang_tidy \ --paths torch/csrc \ - --diff HEAD \ + --diff-file "pr.diff" \ -g"-torch/csrc/jit/passes/onnx/helper.cpp" \ -g"-torch/csrc/jit/passes/onnx/shape_type_inference.cpp" \ -g"-torch/csrc/jit/serialization/onnx.cpp" \ diff --git a/tools/linter/clang_format_utils.py b/tools/linter/clang_format_utils.py index 6d40a8acc690..021ba9162cca 100644 --- a/tools/linter/clang_format_utils.py +++ b/tools/linter/clang_format_utils.py @@ -1,16 +1,5 @@ -import platform -import sys -import stat -import hashlib import os -import urllib.request -import urllib.error - -# String representing the host platform (e.g. Linux, Darwin). -HOST_PLATFORM = platform.system() - -# PyTorch directory root, derived from the location of this file. -PYTORCH_ROOT = os.path.dirname(os.path.dirname(os.path.dirname(os.path.realpath(__file__)))) +from install.download_bin import download, PYTORCH_ROOT # type: ignore[import] # This dictionary maps each platform to the S3 object URL for its clang-format binary. PLATFORM_TO_CF_URL = { @@ -24,135 +13,8 @@ PLATFORM_TO_HASH = { "Linux": os.path.join("tools", "clang_format_hash", "linux64", "clang-format-linux64"), } -# Directory and file paths for the clang-format binary. CLANG_FORMAT_DIR = os.path.join(PYTORCH_ROOT, ".clang-format-bin") CLANG_FORMAT_PATH = os.path.join(CLANG_FORMAT_DIR, "clang-format") -def compute_file_sha256(path: str) -> str: - """Compute the SHA256 hash of a file and return it as a hex string.""" - # If the file doesn't exist, return an empty string. - if not os.path.exists(path): - return "" - - hash = hashlib.sha256() - - # Open the file in binary mode and hash it. - with open(path, "rb") as f: - for b in f: - hash.update(b) - - # Return the hash as a hexadecimal string. - return hash.hexdigest() - - -def report_download_progress(chunk_number: int, chunk_size: int, file_size: int) -> None: - """ - Pretty printer for file download progress. - """ - if file_size != -1: - percent = min(1, (chunk_number * chunk_size) / file_size) - bar = "#" * int(64 * percent) - sys.stdout.write("\r0% |{:<64}| {}%".format(bar, int(percent * 100))) - - -def download_clang_format(path: str) -> bool: - """ - Downloads a clang-format binary appropriate for the host platform and stores it at the given location. - """ - if HOST_PLATFORM not in PLATFORM_TO_CF_URL: - print("Unsupported platform: {}".format(HOST_PLATFORM)) - return False - - cf_url = PLATFORM_TO_CF_URL[HOST_PLATFORM] - filename = os.path.join(path, "clang-format") - - # Try to download clang-format. - print("Downloading clang-format to {}".format(path)) - try: - urllib.request.urlretrieve( - cf_url, filename, reporthook=report_download_progress if sys.stdout.isatty() else None - ) - except urllib.error.URLError as e: - print("Error downloading {}: {}".format(filename, str(e))) - return False - finally: - print() - - return True - - def get_and_check_clang_format(verbose: bool = False) -> bool: - """ - Download a platform-appropriate clang-format binary if one doesn't already exist at the expected location and verify - that it is the right binary by checking its SHA256 hash against the expected hash. - """ - if not os.path.exists(CLANG_FORMAT_DIR): - # If the directory doesn't exist, try to create it. - try: - os.mkdir(CLANG_FORMAT_DIR) - except OSError as e: - print("Unable to create directory for clang-format binary: {}".format(CLANG_FORMAT_DIR)) - return False - finally: - if verbose: - print("Created directory {} for clang-format binary".format(CLANG_FORMAT_DIR)) - - # If the directory didn't exist, neither did the binary, so download it. - ok = download_clang_format(CLANG_FORMAT_DIR) - - if not ok: - return False - else: - # If the directory exists but the binary doesn't, download it. - if not os.path.exists(CLANG_FORMAT_PATH): - ok = download_clang_format(CLANG_FORMAT_DIR) - - if not ok: - return False - else: - if verbose: - print("Found pre-existing clang-format binary, skipping download") - - # Now that the binary is where it should be, hash it. - actual_bin_hash = compute_file_sha256(CLANG_FORMAT_PATH) - - # If the host platform is not in PLATFORM_TO_HASH, it is unsupported. - if HOST_PLATFORM not in PLATFORM_TO_HASH: - print("Unsupported platform: {}".format(HOST_PLATFORM)) - return False - - # This is the path to the file containing the reference hash. - hashpath = os.path.join(PYTORCH_ROOT, PLATFORM_TO_HASH[HOST_PLATFORM]) - - if not os.path.exists(hashpath): - print("Unable to find reference binary hash") - return False - - # Load the reference hash and compare the actual hash to it. - with open(hashpath, "r") as f: - reference_bin_hash = f.readline().strip() - - if verbose: - print("Reference Hash: {}".format(repr(reference_bin_hash))) - print("Actual Hash: {}".format(repr(actual_bin_hash))) - - if reference_bin_hash != actual_bin_hash: - print("The downloaded binary is not what was expected!") - - # Err on the side of caution and try to delete the downloaded binary. - try: - os.unlink(CLANG_FORMAT_PATH) - print("The binary has been deleted just to be safe") - except OSError as e: - print("Failed to delete binary: {}".format(str(e))) - print("Delete this binary as soon as possible and do not execute it!") - - return False - else: - # Make sure the binary is executable. - mode = os.stat(CLANG_FORMAT_PATH).st_mode - mode |= stat.S_IXUSR - os.chmod(CLANG_FORMAT_PATH, mode) - print("Using clang-format located at {}".format(CLANG_FORMAT_PATH)) - - return True + return bool(download("clang-format", CLANG_FORMAT_DIR, PLATFORM_TO_CF_URL, PLATFORM_TO_HASH)) diff --git a/tools/linter/clang_tidy/__main__.py b/tools/linter/clang_tidy/__main__.py new file mode 100644 index 000000000000..0254ddc43b37 --- /dev/null +++ b/tools/linter/clang_tidy/__main__.py @@ -0,0 +1,130 @@ +import argparse +import pathlib + +from run import run +from generate_build_files import generate_build_files + + +DEFAULTS = { + "glob": [ + # The negative filters below are to exclude files that include onnx_pb.h or + # caffe2_pb.h, otherwise we'd have to build protos as part of this CI job. + # FunctionsManual.cpp is excluded to keep this diff clean. It will be fixed + # in a follow up PR. + # /torch/csrc/generic/*.cpp is excluded because those files aren't actually built. + # deploy/interpreter files are excluded due to using macros and other techniquies + # that are not easily converted to accepted c++ + "-torch/csrc/jit/passes/onnx/helper.cpp", + "-torch/csrc/jit/passes/onnx/shape_type_inference.cpp", + "-torch/csrc/jit/serialization/onnx.cpp", + "-torch/csrc/jit/serialization/export.cpp", + "-torch/csrc/jit/serialization/import.cpp", + "-torch/csrc/jit/serialization/import_legacy.cpp", + "-torch/csrc/onnx/init.cpp", + "-torch/csrc/cuda/nccl.*", + "-torch/csrc/cuda/python_nccl.cpp", + "-torch/csrc/autograd/FunctionsManual.cpp", + "-torch/csrc/generic/*.cpp", + "-torch/csrc/jit/codegen/cuda/runtime/*", + "-torch/csrc/deploy/interpreter/interpreter.cpp", + "-torch/csrc/deploy/interpreter/interpreter.h", + "-torch/csrc/deploy/interpreter/interpreter_impl.h", + "-torch/csrc/deploy/interpreter/test_main.cpp", + ], + "paths": ["torch/csrc/"], + "include-dir": ["/usr/lib/llvm-11/include/openmp"], +} + + +def parse_args() -> argparse.Namespace: + parser = argparse.ArgumentParser(description="Run Clang-Tidy (on your Git changes)") + parser.add_argument( + "-e", + "--clang-tidy-exe", + default="clang-tidy", + help="Path to clang-tidy executable", + ) + parser.add_argument( + "-g", + "--glob", + action="append", + default=DEFAULTS["glob"], + help="Only lint files that match these glob patterns " + "(see documentation for `fnmatch` for supported syntax)." + "If a pattern starts with a - the search is negated for that pattern.", + ) + parser.add_argument( + "-x", + "--regex", + action="append", + default=[], + help="Only lint files that match these regular expressions (from the start of the filename). " + "If a pattern starts with a - the search is negated for that pattern.", + ) + parser.add_argument( + "-c", + "--compile-commands-dir", + default="build", + help="Path to the folder containing compile_commands.json", + ) + parser.add_argument( + "--diff-file", help="File containing diff to use for determining files to lint and line filters" + ) + parser.add_argument( + "-p", + "--paths", + nargs="+", + default=DEFAULTS["paths"], + help="Lint only the given paths (recursively)", + ) + parser.add_argument( + "-n", + "--dry-run", + action="store_true", + help="Only show the command to be executed, without running it", + ) + parser.add_argument("-v", "--verbose", action="store_true", help="Verbose output") + parser.add_argument( + "--config-file", + help="Path to a clang-tidy config file. Defaults to '.clang-tidy'.", + ) + parser.add_argument( + "-k", + "--keep-going", + action="store_true", + help="Don't error on compiler errors (clang-diagnostic-error)", + ) + parser.add_argument( + "-j", + "--parallel", + action="store_true", + help="Run clang tidy in parallel per-file (requires ninja to be installed).", + ) + parser.add_argument( + "--print-include-paths", + action="store_true", + help="Print the search paths used for include directives" + ) + parser.add_argument( + "-I", + "--include-dir", + action="append", + default=DEFAULTS["include-dir"], + help="Add the specified directory to the search path for include files", + ) + parser.add_argument("-s", "--suppress-diagnostics", action="store_true", + help="Add NOLINT to suppress clang-tidy violations") + parser.add_argument( + "extra_args", nargs="*", help="Extra arguments to forward to clang-tidy" + ) + return parser.parse_args() + + +def main() -> None: + if not pathlib.Path("build").exists(): + generate_build_files() + options = parse_args() + run(options) + + +main() diff --git a/tools/linter/clang_tidy/generate_build_files.py b/tools/linter/clang_tidy/generate_build_files.py new file mode 100644 index 000000000000..72107a31fab7 --- /dev/null +++ b/tools/linter/clang_tidy/generate_build_files.py @@ -0,0 +1,62 @@ +import subprocess +import sys +import os +from typing import List + + +def run_cmd(cmd: List[str]) -> None: + print(f"Running: {cmd}") + result = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE,) + stdout, stderr = result.stdout.decode("utf-8").strip(), result.stderr.decode("utf-8").strip() + print(stdout) + print(stderr) + if result.returncode != 0: + print(f"Failed to run {cmd}") + exit(1) + + +def run_timed_cmd(cmd: List[str]) -> None: + run_cmd(["time"] + cmd) + + +def update_submodules() -> None: + run_cmd(["git", "submodule", "update", "--init", "--recursive"]) + + +def gen_compile_commands() -> None: + os.environ["USE_NCCL"] = "0" + os.environ["USE_DEPLOY"] = "1" + run_timed_cmd([sys.executable, "setup.py", "--cmake-only", "build"]) + + +def run_autogen() -> None: + run_timed_cmd( + [ + sys.executable, + "-m", + "tools.codegen.gen", + "-s", + "aten/src/ATen", + "-d", + "build/aten/src/ATen", + ] + ) + + run_timed_cmd( + [ + sys.executable, + "tools/setup_helpers/generate_code.py", + "--declarations-path", + "build/aten/src/ATen/Declarations.yaml", + "--native-functions-path", + "aten/src/ATen/native/native_functions.yaml", + "--nn-path", + "aten/src", + ] + ) + + +def generate_build_files() -> None: + update_submodules() + gen_compile_commands() + run_autogen() diff --git a/tools/linter/clang_tidy/requirements.txt b/tools/linter/clang_tidy/requirements.txt new file mode 100644 index 000000000000..faea93fd550a --- /dev/null +++ b/tools/linter/clang_tidy/requirements.txt @@ -0,0 +1 @@ +unidiff==0.6.0 diff --git a/tools/linter/clang_tidy.py b/tools/linter/clang_tidy/run.py old mode 100755 new mode 100644 similarity index 80% rename from tools/linter/clang_tidy.py rename to tools/linter/clang_tidy/run.py index 210d6bd53db5..e84d91f4bafb --- a/tools/linter/clang_tidy.py +++ b/tools/linter/clang_tidy/run.py @@ -14,7 +14,6 @@ glob or regular expressions. -import argparse import collections import fnmatch import json @@ -267,93 +266,7 @@ def apply_nolint(fname: str, warnings: Dict[int, Set[str]]) -> None: f.write("".join(lines)) -def parse_options() -> Any: - """Parses the command line options.""" - parser = argparse.ArgumentParser(description="Run Clang-Tidy (on your Git changes)") - parser.add_argument( - "-e", - "--clang-tidy-exe", - default="clang-tidy", - help="Path to clang-tidy executable", - ) - parser.add_argument( - "-g", - "--glob", - action="append", - default=[], - help="Only lint files that match these glob patterns " - "(see documentation for `fnmatch` for supported syntax)." - "If a pattern starts with a - the search is negated for that pattern.", - ) - parser.add_argument( - "-x", - "--regex", - action="append", - default=[], - help="Only lint files that match these regular expressions (from the start of the filename). " - "If a pattern starts with a - the search is negated for that pattern.", - ) - parser.add_argument( - "-c", - "--compile-commands-dir", - default="build", - help="Path to the folder containing compile_commands.json", - ) - parser.add_argument( - "--diff-file", help="File containing diff to use for determining files to lint and line filters" - ) - parser.add_argument( - "-p", - "--paths", - nargs="+", - default=["."], - help="Lint only the given paths (recursively)", - ) - parser.add_argument( - "-n", - "--dry-run", - action="store_true", - help="Only show the command to be executed, without running it", - ) - parser.add_argument("-v", "--verbose", action="store_true", help="Verbose output") - parser.add_argument( - "--config-file", - help="Path to a clang-tidy config file. Defaults to '.clang-tidy'.", - ) - parser.add_argument( - "-k", - "--keep-going", - action="store_true", - help="Don't error on compiler errors (clang-diagnostic-error)", - ) - parser.add_argument( - "-j", - "--parallel", - action="store_true", - help="Run clang tidy in parallel per-file (requires ninja to be installed).", - ) - parser.add_argument( - "--print-include-paths", - action="store_true", - help="Print the search paths used for include directives" - ) - parser.add_argument( - "-I", - "--include-dir", - action="append", - help="Add the specified directory to the search path for include files", - ) - parser.add_argument("-s", "--suppress-diagnostics", action="store_true", - help="Add NOLINT to suppress clang-tidy violations") - parser.add_argument( - "extra_args", nargs="*", help="Extra arguments to forward to clang-tidy" - ) - return parser.parse_args() - - -def main() -> None: - options = parse_options() - +def run(options: Any) -> None: # This flag is pervasive enough to set it globally. It makes the code # cleaner compared to threading it through every single function. global VERBOSE @@ -404,6 +317,3 @@ def main() -> None: for line in clang_tidy_output.splitlines(): if line.startswith(pwd): print(line[len(pwd):]) - -if __name__ == "__main__": - main() diff --git a/tools/linter/install/__init__.py b/tools/linter/install/__init__.py new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/tools/linter/install/clang_tidy.py b/tools/linter/install/clang_tidy.py new file mode 100644 index 000000000000..b2e82afa22f7 --- /dev/null +++ b/tools/linter/install/clang_tidy.py @@ -0,0 +1,18 @@ +import os +from tools.linter.install.download_bin import download, PYTORCH_ROOT, HASH_PATH + +PLATFORM_TO_URL = { + "Linux": "https://oss-clang-format.s3.us-east-2.amazonaws.com/linux64/clang-tidy", +} + +PLATFORM_TO_HASH = { + "Linux": os.path.join(HASH_PATH, "clang-tidy-linux64"), +} + +OUTPUT_DIR = os.path.join(PYTORCH_ROOT, ".clang-tidy-bin") + +if __name__ == "__main__": + ok = download("clang-tidy", OUTPUT_DIR, PLATFORM_TO_URL, PLATFORM_TO_HASH) + if not ok: + print("Installation failed!") + exit(1) diff --git a/tools/linter/install/download_bin.py b/tools/linter/install/download_bin.py new file mode 100644 index 000000000000..60fe7fa961e9 --- /dev/null +++ b/tools/linter/install/download_bin.py @@ -0,0 +1,163 @@ +import platform +import sys +import stat +import hashlib +import subprocess +import os +import urllib.request +import urllib.error + +from typing import Dict + +# String representing the host platform (e.g. Linux, Darwin). +HOST_PLATFORM = platform.system() + +# PyTorch directory root +result = subprocess.run( + ["git", "rev-parse", "--show-toplevel"], stdout=subprocess.PIPE, check=True, +) +PYTORCH_ROOT = result.stdout.decode("utf-8").strip() + +HASH_PATH = os.path.join(os.path.dirname(os.path.realpath(__file__)), "hashes") + + +def compute_file_sha256(path: str) -> str: + """Compute the SHA256 hash of a file and return it as a hex string.""" + # If the file doesn't exist, return an empty string. + if not os.path.exists(path): + return "" + + hash = hashlib.sha256() + + # Open the file in binary mode and hash it. + with open(path, "rb") as f: + for b in f: + hash.update(b) + + # Return the hash as a hexadecimal string. + return hash.hexdigest() + + +def report_download_progress( + chunk_number: int, chunk_size: int, file_size: int +) -> None: + """ + Pretty printer for file download progress. + """ + if file_size != -1: + percent = min(1, (chunk_number * chunk_size) / file_size) + bar = "#" * int(64 * percent) + sys.stdout.write("\r0% |{:<64}| {}%".format(bar, int(percent * 100))) + + +def download_bin(name: str, output_dir: str, platform_to_url: Dict[str, str]) -> bool: + """ + Downloads the binary appropriate for the host platform and stores it in the given output directory. + """ + if HOST_PLATFORM not in platform_to_url: + print(f"Unsupported platform: {HOST_PLATFORM}") + return False + + url = platform_to_url[HOST_PLATFORM] + filename = os.path.join(output_dir, name) + + # Try to download binary. + print(f"Downloading {name} to {output_dir}") + try: + urllib.request.urlretrieve( + url, + filename, + reporthook=report_download_progress if sys.stdout.isatty() else None, + ) + except urllib.error.URLError as e: + print(f"Error downloading {filename}: {e}") + return False + finally: + print() + + return True + + +def download( + name: str, + output_dir: str, + platform_to_url: Dict[str, str], + platform_to_hash: Dict[str, str], + verbose: bool = False, +) -> bool: + """ + Download a platform-appropriate binary if one doesn't already exist at the expected location and verifies + that it is the right binary by checking its SHA256 hash against the expected hash. + """ + + output_path = os.path.join(output_dir, name) + if not os.path.exists(output_dir): + # If the directory doesn't exist, try to create it. + try: + os.mkdir(output_dir) + except OSError as e: + print(f"Unable to create directory for {name} binary: {output_dir}") + return False + finally: + if verbose: + print(f"Created directory {output_dir} for {name} binary") + + # If the directory didn't exist, neither did the binary, so download it. + ok = download_bin(name, output_dir, platform_to_url) + + if not ok: + return False + else: + # If the directory exists but the binary doesn't, download it. + if not os.path.exists(output_path): + ok = download_bin(name, output_dir, platform_to_url) + + if not ok: + return False + else: + if verbose: + print(f"Found pre-existing {name} binary, skipping download") + + # Now that the binary is where it should be, hash it. + actual_bin_hash = compute_file_sha256(output_path) + + # If the host platform is not in platform_to_hash, it is unsupported. + if HOST_PLATFORM not in platform_to_hash: + print(f"Unsupported platform: {HOST_PLATFORM}") + return False + + # This is the path to the file containing the reference hash. + hashpath = os.path.join(PYTORCH_ROOT, platform_to_hash[HOST_PLATFORM]) + + if not os.path.exists(hashpath): + print("Unable to find reference binary hash") + return False + + # Load the reference hash and compare the actual hash to it. + with open(hashpath, "r") as f: + reference_bin_hash = f.readline().strip() + + if verbose: + print(f"Reference Hash: {reference_bin_hash}") + print(f"Actual Hash: {repr(actual_bin_hash)}") + + if reference_bin_hash != actual_bin_hash: + print("The downloaded binary is not what was expected!") + + # Err on the side of caution and try to delete the downloaded binary. + try: + os.unlink(output_path) + print("The binary has been deleted just to be safe") + except OSError as e: + print(f"Failed to delete binary: {e}") + print("Delete this binary as soon as possible and do not execute it!") + + return False + else: + # Make sure the binary is executable. + mode = os.stat(output_path).st_mode + mode |= stat.S_IXUSR + os.chmod(output_path, mode) + print(f"Using {name} located at {output_path}") + + return True diff --git a/tools/linter/install/hashes/clang-tidy-linux64 b/tools/linter/install/hashes/clang-tidy-linux64 new file mode 100644 index 000000000000..b864628b23bb --- /dev/null +++ b/tools/linter/install/hashes/clang-tidy-linux64 @@ -0,0 +1 @@ +44103f662c866921dabd40d047788935c43402d260be10613ab2111e01726892 diff --git a/tools/linter/translate_annotations.py b/tools/linter/translate_annotations.py index 4bcfc78486d4..ed0147e4a62a 100755 --- a/tools/linter/translate_annotations.py +++ b/tools/linter/translate_annotations.py @@ -24,7 +24,6 @@ class Diff(TypedDict): hunks: List[Hunk] -# adapted from the similar regex in tools/linter/clang_tidy.py # @@ -start,count +start,count @@ hunk_pattern = r'^@@\s+-(\d+)(?:,(\d+))?\s+\+(\d+)(?:,(\d+))?\s+@@'