[lint] lintrunner fixes/improvements (#68292)

Summary: Pull Request resolved: https://github.com/pytorch/pytorch/pull/68292 - noqa was typo-d to be the same as type: ignore - generalize clang-tidy initialization and use it for clang_format as well - Add a script that lets you update the binaries in s3 relatively easily Test Plan: Imported from OSS Reviewed By: malfet Differential Revision: D32403934 Pulled By: suo fbshipit-source-id: 4e21b22605216f013d87d636a205707ca8e0af36
2025-10-20 12:54:11 +08:00 · 2021-11-15 11:04:30 -08:00
parent 43874d79e7
commit 24b60b2cbf
7 changed files with 368 additions and 228 deletions
--- a/.gitignore
+++ b/.gitignore
@ -287,6 +287,7 @@ TAGS
 # clang tooling storage location
 .clang-format-bin
 .clang-tidy-bin
+.lintbin

 # clangd background index
 .clangd/
--- a/.lintrunner.toml
+++ b/.lintrunner.toml
@ -53,10 +53,19 @@ include_patterns = [
    'test/cpp/tensorexpr/**/*.h',
    'test/cpp/tensorexpr/**/*.cpp',
 ]
+init_command = [
+    'python3',
+    'tools/linter/adapters/s3_init.py',
+    '--config-json=tools/linter/adapters/s3_init_config.json',
+    '--linter=clang-format',
+    '--dry-run={{DRYRUN}}',
+    '--output-dir=.lintbin',
+    '--output-name=clang-format',
+]
 command = [
    'python3',
    'tools/linter/adapters/clangformat_linter.py',
-    '--binary=clang-format',
+    '--binary=.lintbin/clang-format',
    '--',
    '@{{PATHSFILE}}'
 ]
@ -159,15 +168,17 @@ exclude_patterns = [
 ]
 init_command = [
    'python3',
-    'tools/linter/adapters/clangtidy_init.py',
-    '--dry_run={{DRYRUN}}',
-    '--output_dir=.clang-tidy-bin',
-    '--output_name=clang-tidy',
+    'tools/linter/adapters/s3_init.py',
+    '--config-json=tools/linter/adapters/s3_init_config.json',
+    '--linter=clang-tidy',
+    '--dry-run={{DRYRUN}}',
+    '--output-dir=.lintbin',
+    '--output-name=clang-tidy',
 ]
 command = [
    'python3',
    'tools/linter/adapters/clangtidy_linter.py',
-    '--binary=.clang-tidy-bin/clang-tidy',
+    '--binary=.lintbin/clang-tidy',
    '--build_dir=./build',
    '--',
    '@{{PATHSFILE}}'
@ -198,8 +209,8 @@ exclude_patterns = ['caffe2/**']
 command = [
    'python3',
    'tools/linter/adapters/grep_linter.py',
-    '--pattern=# type:\s*ignore(?!\[)',
-    '--linter-name=TYPEIGNORE',
+    '--pattern=# noqa(?!: [A-Z]+\d{3})',
+    '--linter-name=NOQA',
    '--error-name=unqualified noqa',
    """--error-description=\
        This line has an unqualified `noqa`; \
--- a/tools/linter/adapters/clangformat_linter.py
+++ b/tools/linter/adapters/clangformat_linter.py
@ -7,6 +7,7 @@ import subprocess
 import sys
 import time
 from enum import Enum
+from pathlib import Path
 from typing import Any, List, NamedTuple, Optional


@ -209,6 +210,23 @@ def main() -> None:
    )

    binary = os.path.normpath(args.binary) if IS_WINDOWS else args.binary
+    if not Path(binary).exists():
+        lint_message = LintMessage(
+            path=None,
+            line=None,
+            char=None,
+            code="CLANGFORMAT",
+            severity=LintSeverity.ERROR,
+            name="init-error",
+            original=None,
+            replacement=None,
+            description=(
+                f"Could not find clang-format binary at {binary}, "
+                "did you forget to run `lintrunner init`?"
+            ),
+        )
+        print(json.dumps(lint_message._asdict()), flush=True)
+        sys.exit(0)

    with concurrent.futures.ThreadPoolExecutor(
        max_workers=os.cpu_count(),
--- a/tools/linter/adapters/clangtidy_init.py
+++ b/tools/linter/adapters/clangtidy_init.py
@ -1,220 +0,0 @@
-import platform
-import argparse
-import sys
-import stat
-import hashlib
-import subprocess
-import os
-import urllib.request
-import urllib.error
-import pathlib
-
-from typing import Dict
-
-# String representing the host platform (e.g. Linux, Darwin).
-HOST_PLATFORM = platform.system()
-
-# PyTorch directory root
-result = subprocess.run(
-    ["git", "rev-parse", "--show-toplevel"], stdout=subprocess.PIPE, check=True,
-)
-PYTORCH_ROOT = result.stdout.decode("utf-8").strip()
-
-HASH_PATH = pathlib.Path(PYTORCH_ROOT) / "tools" / "linter" / "install" / "hashes"
-
-
-def compute_file_sha256(path: str) -> str:
-    """Compute the SHA256 hash of a file and return it as a hex string."""
-    # If the file doesn't exist, return an empty string.
-    if not os.path.exists(path):
-        return ""
-
-    hash = hashlib.sha256()
-
-    # Open the file in binary mode and hash it.
-    with open(path, "rb") as f:
-        for b in f:
-            hash.update(b)
-
-    # Return the hash as a hexadecimal string.
-    return hash.hexdigest()
-
-
-def report_download_progress(
-    chunk_number: int, chunk_size: int, file_size: int
-) -> None:
-    """
-    Pretty printer for file download progress.
-    """
-    if file_size != -1:
-        percent = min(1, (chunk_number * chunk_size) / file_size)
-        bar = "#" * int(64 * percent)
-        sys.stdout.write("\r0% |{:<64}| {}%".format(bar, int(percent * 100)))
-
-
-def download_bin(name: str, output_dir: str, platform_to_url: Dict[str, str], dry_run: bool) -> bool:
-    """
-    Downloads the binary appropriate for the host platform and stores it in the given output directory.
-    """
-    if HOST_PLATFORM not in platform_to_url:
-        print(f"Unsupported platform: {HOST_PLATFORM}")
-        return False
-
-    url = platform_to_url[HOST_PLATFORM]
-    filename = os.path.join(output_dir, name)
-    if dry_run:
-        print(f"DRY RUN: Would download {url} to {filename}")
-        return True
-
-    # Try to download binary.
-    print(f"Downloading {name} to {output_dir}")
-    try:
-        urllib.request.urlretrieve(
-            url,
-            filename,
-            reporthook=report_download_progress if sys.stdout.isatty() else None,
-        )
-    except urllib.error.URLError as e:
-        print(f"Error downloading {filename}: {e}")
-        return False
-    finally:
-        print()
-
-    return True
-
-
-def download(
-    name: str,
-    output_dir: str,
-    platform_to_url: Dict[str, str],
-    platform_to_hash: Dict[str, str],
-    dry_run: bool,
-) -> bool:
-    """
-    Download a platform-appropriate binary if one doesn't already exist at the expected location and verifies
-    that it is the right binary by checking its SHA256 hash against the expected hash.
-    """
-
-    output_path = os.path.join(output_dir, name)
-    if not os.path.exists(output_dir):
-        # If the directory doesn't exist, try to create it.
-        if dry_run:
-            print(f"DRY RUN: would create directory for {name} binary: {output_dir}")
-        else:
-            try:
-                os.mkdir(output_dir)
-            except OSError as e:
-                print(f"Unable to create directory for {name} binary: {output_dir}")
-                return False
-            finally:
-                print(f"Created directory {output_dir} for {name} binary")
-
-        # If the directory didn't exist, neither did the binary, so download it.
-        ok = download_bin(name, output_dir, platform_to_url, dry_run)
-
-        if not ok:
-            return False
-    else:
-        # If the directory exists but the binary doesn't, download it.
-        if not os.path.exists(output_path):
-            ok = download_bin(name, output_dir, platform_to_url, dry_run)
-
-            if not ok:
-                return False
-        else:
-            print(f"Found pre-existing {name} binary, skipping download")
-
-    # Now that the binary is where it should be, hash it.
-    actual_bin_hash = compute_file_sha256(output_path)
-
-    # If the host platform is not in platform_to_hash, it is unsupported.
-    if HOST_PLATFORM not in platform_to_hash:
-        print(f"Unsupported platform: {HOST_PLATFORM}")
-        return False
-
-    # This is the path to the file containing the reference hash.
-    hashpath = os.path.join(PYTORCH_ROOT, platform_to_hash[HOST_PLATFORM])
-
-    if not os.path.exists(hashpath):
-        print("Unable to find reference binary hash")
-        return False
-
-    # Load the reference hash and compare the actual hash to it.
-    if dry_run:
-        # We didn't download anything, just bail
-        return True
-
-    with open(hashpath, "r") as f:
-        reference_bin_hash = f.readline().strip()
-
-        print(f"Reference Hash: {reference_bin_hash}")
-        print(f"Actual Hash: {repr(actual_bin_hash)}")
-
-        if reference_bin_hash != actual_bin_hash:
-            print("The downloaded binary is not what was expected!")
-            print(f"Downloaded hash: {repr(actual_bin_hash)} vs expected {reference_bin_hash}")
-
-            # Err on the side of caution and try to delete the downloaded binary.
-            try:
-                os.unlink(output_path)
-                print("The binary has been deleted just to be safe")
-            except OSError as e:
-                print(f"Failed to delete binary: {e}")
-                print("Delete this binary as soon as possible and do not execute it!")
-
-            return False
-        else:
-            # Make sure the binary is executable.
-            mode = os.stat(output_path).st_mode
-            mode |= stat.S_IXUSR
-            os.chmod(output_path, mode)
-            print(f"Using {name} located at {output_path}")
-
-    return True
-
-
-
-PLATFORM_TO_URL = {
-    "Linux": "https://oss-clang-format.s3.us-east-2.amazonaws.com/linux64/clang-tidy",
-    "Darwin": "https://oss-clang-format.s3.us-east-2.amazonaws.com/macos/clang-tidy",
-}
-
-PLATFORM_TO_HASH = {
-    "Linux": os.path.join(HASH_PATH, "clang-tidy-linux64"),
-    "Darwin": os.path.join(HASH_PATH, "clang-tidy-macos"),
-}
-
-OUTPUT_DIR = os.path.join(PYTORCH_ROOT, ".clang-tidy-bin")
-INSTALLATION_PATH = os.path.join(OUTPUT_DIR, "clang-tidy")
-
-
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser(
-        description="downloads clang-tidy",
-    )
-    parser.add_argument(
-        "--output_dir",
-        required=True,
-        help="place to put the binary",
-    )
-    parser.add_argument(
-        "--output_name",
-        required=True,
-        help="name of binary",
-    )
-    parser.add_argument(
-        "--dry_run",
-        default=False,
-        help="do not download, just print what would be done",
-    )
-
-    args = parser.parse_args()
-    if args.dry_run == "0":
-        args.dry_run = False
-    else:
-        args.dry_run = True
-
-    ok = download(args.output_name, args.output_dir, PLATFORM_TO_URL, PLATFORM_TO_HASH, args.dry_run)
-    if not ok:
-        print(f"Failed to download clang-tidy binary from {PLATFORM_TO_URL}")
-        exit(1)
--- a/tools/linter/adapters/s3_init.py
+++ b/tools/linter/adapters/s3_init.py
@ -0,0 +1,206 @@
+import argparse
+import hashlib
+import json
+import logging
+import os
+import platform
+import stat
+import subprocess
+import sys
+import textwrap
+import urllib.error
+import urllib.request
+from pathlib import Path
+
+# String representing the host platform (e.g. Linux, Darwin).
+HOST_PLATFORM = platform.system()
+
+# PyTorch directory root
+result = subprocess.run(
+    ["git", "rev-parse", "--show-toplevel"],
+    stdout=subprocess.PIPE,
+    check=True,
+)
+PYTORCH_ROOT = result.stdout.decode("utf-8").strip()
+
+DRY_RUN = False
+
+
+def compute_file_sha256(path: str) -> str:
+    """Compute the SHA256 hash of a file and return it as a hex string."""
+    # If the file doesn't exist, return an empty string.
+    if not os.path.exists(path):
+        return ""
+
+    hash = hashlib.sha256()
+
+    # Open the file in binary mode and hash it.
+    with open(path, "rb") as f:
+        for b in f:
+            hash.update(b)
+
+    # Return the hash as a hexadecimal string.
+    return hash.hexdigest()
+
+
+def report_download_progress(
+    chunk_number: int, chunk_size: int, file_size: int
+) -> None:
+    """
+    Pretty printer for file download progress.
+    """
+    if file_size != -1:
+        percent = min(1, (chunk_number * chunk_size) / file_size)
+        bar = "#" * int(64 * percent)
+        sys.stdout.write("\r0% |{:<64}| {}%".format(bar, int(percent * 100)))
+
+
+def check(binary_path: Path, reference_hash: str) -> bool:
+    """Check whether the binary exists and is the right one.
+
+    If there is hash difference, delete the actual binary.
+    """
+    if not binary_path.exists():
+        logging.info(f"{binary_path} does not exist.")
+        return False
+
+    existing_binary_hash = compute_file_sha256(str(binary_path))
+    if existing_binary_hash == reference_hash:
+        return True
+
+    logging.warning(
+        textwrap.dedent(
+            f"""\
+            Found binary hash does not match reference!
+
+            Found hash: {existing_binary_hash}
+            Reference hash: {reference_hash}
+
+            Deleting {binary_path} just to be safe.
+            """
+        )
+    )
+    if DRY_RUN:
+        logging.critical(
+            "In dry run mode, so not actually deleting the binary. But consider deleting it ASAP!"
+        )
+        return False
+
+    try:
+        binary_path.unlink()
+    except OSError as e:
+        logging.critical(f"Failed to delete binary: {e}")
+        logging.critical(
+            "Delete this binary as soon as possible and do not execute it!"
+        )
+
+    return False
+
+
+def download(
+    name: str,
+    output_dir: str,
+    url: str,
+    reference_bin_hash: str,
+) -> bool:
+    """
+    Download a platform-appropriate binary if one doesn't already exist at the expected location and verifies
+    that it is the right binary by checking its SHA256 hash against the expected hash.
+    """
+    # First check if we need to do anything
+    binary_path = Path(output_dir, name)
+    if check(binary_path, reference_bin_hash):
+        logging.info(f"Correct binary already exists at {binary_path}. Exiting.")
+        return True
+
+    # Create the output folder
+    binary_path.parent.mkdir(parents=True, exist_ok=True)
+
+    # Download the binary
+    logging.info(f"Downloading {url} to {binary_path}")
+
+    if DRY_RUN:
+        logging.info("Exiting as there is nothing left to do in dry run mode")
+        return True
+
+    urllib.request.urlretrieve(
+        url,
+        binary_path,
+        reporthook=report_download_progress if sys.stdout.isatty() else None,
+    )
+
+    logging.info(f"Downloaded {name} successfully.")
+
+    # Check the downloaded binary
+    if not check(binary_path, reference_bin_hash):
+        logging.critical(f"Downloaded binary {name} failed its hash check")
+        return False
+
+    # Ensure that exeuctable bits are set
+    mode = os.stat(binary_path).st_mode
+    mode |= stat.S_IXUSR
+    os.chmod(binary_path, mode)
+
+    logging.info(f"Using {name} located at {binary_path}")
+    return True
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(
+        description="downloads and checks binaries from s3",
+    )
+    parser.add_argument(
+        "--config-json",
+        required=True,
+        help="Path to config json that describes where to find binaries and hashes",
+    )
+    parser.add_argument(
+        "--linter",
+        required=True,
+        help="Which linter to initialize from the config json",
+    )
+    parser.add_argument(
+        "--output-dir",
+        required=True,
+        help="place to put the binary",
+    )
+    parser.add_argument(
+        "--output-name",
+        required=True,
+        help="name of binary",
+    )
+    parser.add_argument(
+        "--dry-run",
+        default=False,
+        help="do not download, just print what would be done",
+    )
+
+    args = parser.parse_args()
+    if args.dry_run == "0":
+        DRY_RUN = False
+    else:
+        DRY_RUN = True
+
+    logging.basicConfig(
+        format="[DRY_RUN] %(levelname)s: %(message)s"
+        if DRY_RUN
+        else "%(levelname)s: %(message)s",
+        level=logging.INFO,
+        stream=sys.stderr,
+    )
+
+    config = json.load(open(args.config_json))
+    config = config[args.linter]
+
+    # If the host platform is not in platform_to_hash, it is unsupported.
+    if HOST_PLATFORM not in config:
+        logging.error(f"Unsupported platform: {HOST_PLATFORM}")
+        exit(1)
+
+    url = config[HOST_PLATFORM]["download_url"]
+    hash = config[HOST_PLATFORM]["hash"]
+
+    ok = download(args.output_name, args.output_dir, url, hash)
+    if not ok:
+        logging.critical(f"Unable to initialize {args.linter}")
+        sys.exit(1)
--- a/tools/linter/adapters/s3_init_config.json
+++ b/tools/linter/adapters/s3_init_config.json
@ -0,0 +1,30 @@
+{
+    "clang-format": {
+        "Darwin": {
+            "download_url": "https://oss-clang-format.s3.us-east-2.amazonaws.com/mac/clang-format-mojave",
+            "hash": "1485a242a96c737ba7cdd9f259114f2201accdb46d87ac7a8650b1a814cd4d4d",
+            "object_name": "mac/clang-format-mojave",
+            "s3_bucket": "oss-clang-format"
+        },
+        "Linux": {
+            "download_url": "https://oss-clang-format.s3.us-east-2.amazonaws.com/linux64/clang-format-linux64",
+            "hash": "e1c8b97b919541a99e0a355df5c3f9e8abebc64259dbee6f8c68e1ef90582856",
+            "object_name": "linux64/clang-format-linux64",
+            "s3_bucket": "oss-clang-format"
+        }
+    },
+    "clang-tidy": {
+        "Darwin": {
+            "download_url": "https://oss-clang-format.s3.us-east-2.amazonaws.com/macos/clang-tidy",
+            "hash": "541797a7b8fa795e2f3c1adcd8236cc336a40aa927028dc5bc79172e1d9eca36",
+            "object_name": "macos/clang-tidy",
+            "s3_bucket": "oss-clang-format"
+        },
+        "Linux": {
+            "download_url": "https://oss-clang-format.s3.us-east-2.amazonaws.com/linux64/clang-tidy",
+            "hash": "49343a448fcb75cd1e0fb9d6b1f6c2ef4b008b6f91d6ff899d4ac6060f5e52a5",
+            "object_name": "linx64/clang-tidy",
+            "s3_bucket": "oss-clang-format"
+        }
+    }
+}
--- a/tools/linter/adapters/update_s3.py
+++ b/tools/linter/adapters/update_s3.py
@ -0,0 +1,94 @@
+"""Uploads a new binary to s3 and updates its hash in the config file.
+
+You'll need to have appropriate credentials on the PyTorch AWS buckets, see:
+https://boto3.amazonaws.com/v1/documentation/api/latest/guide/quickstart.html#configuration
+for how to configure them.
+"""
+
+import argparse
+import boto3  # type: ignore[import]
+import json
+import hashlib
+import os
+import logging
+
+
+def compute_file_sha256(path: str) -> str:
+    """Compute the SHA256 hash of a file and return it as a hex string."""
+    # If the file doesn't exist, return an empty string.
+    if not os.path.exists(path):
+        return ""
+
+    hash = hashlib.sha256()
+
+    # Open the file in binary mode and hash it.
+    with open(path, "rb") as f:
+        for b in f:
+            hash.update(b)
+
+    # Return the hash as a hexadecimal string.
+    return hash.hexdigest()
+
+
+def main() -> None:
+    parser = argparse.ArgumentParser(
+        description="s3 binary updater",
+        fromfile_prefix_chars="@",
+    )
+    parser.add_argument(
+        "--config-json",
+        required=True,
+        help="path to config json that you are trying to update",
+    )
+    parser.add_argument(
+        "--linter",
+        required=True,
+        help="name of linter you're trying to update",
+    )
+    parser.add_argument(
+        "--platform",
+        required=True,
+        help="which platform you are uploading the binary for",
+    )
+    parser.add_argument(
+        "--file",
+        required=True,
+        help="file to upload",
+    )
+    parser.add_argument(
+        "--dry-run",
+        action="store_true",
+        help="if set, don't actually upload/write hash",
+    )
+    args = parser.parse_args()
+    logging.basicConfig(level=logging.INFO)
+
+    config = json.load(open(args.config_json))
+    linter_config = config[args.linter][args.platform]
+    bucket = linter_config["s3_bucket"]
+    object_name = linter_config["object_name"]
+
+    # Upload the file
+    logging.info(
+        f"Uploading file {args.file} to s3 bucket: {bucket}, object name: {object_name}"
+    )
+    if not args.dry_run:
+        s3_client = boto3.client("s3")
+        s3_client.upload_file(args.file, bucket, object_name)
+
+    # Update hash in repo
+    hash_of_new_binary = compute_file_sha256(args.file)
+    logging.info(f"Computed new hash for binary {hash_of_new_binary}")
+
+    linter_config["hash"] = hash_of_new_binary
+    config_dump = json.dumps(config, indent=4, sort_keys=True)
+
+    logging.info("Writing out new config:")
+    logging.info(config_dump)
+    if not args.dry_run:
+        with open(args.config_json, "w") as f:
+            f.write(config_dump)
+
+
+if __name__ == "__main__":
+    main()