diff --git a/.gitignore b/.gitignore index c15111dda73a..3340b2cb686e 100644 --- a/.gitignore +++ b/.gitignore @@ -287,6 +287,7 @@ TAGS # clang tooling storage location .clang-format-bin .clang-tidy-bin +.lintbin # clangd background index .clangd/ diff --git a/.lintrunner.toml b/.lintrunner.toml index bc3044be7f81..7126745fca1a 100644 --- a/.lintrunner.toml +++ b/.lintrunner.toml @@ -53,10 +53,19 @@ include_patterns = [ 'test/cpp/tensorexpr/**/*.h', 'test/cpp/tensorexpr/**/*.cpp', ] +init_command = [ + 'python3', + 'tools/linter/adapters/s3_init.py', + '--config-json=tools/linter/adapters/s3_init_config.json', + '--linter=clang-format', + '--dry-run={{DRYRUN}}', + '--output-dir=.lintbin', + '--output-name=clang-format', +] command = [ 'python3', 'tools/linter/adapters/clangformat_linter.py', - '--binary=clang-format', + '--binary=.lintbin/clang-format', '--', '@{{PATHSFILE}}' ] @@ -159,15 +168,17 @@ exclude_patterns = [ ] init_command = [ 'python3', - 'tools/linter/adapters/clangtidy_init.py', - '--dry_run={{DRYRUN}}', - '--output_dir=.clang-tidy-bin', - '--output_name=clang-tidy', + 'tools/linter/adapters/s3_init.py', + '--config-json=tools/linter/adapters/s3_init_config.json', + '--linter=clang-tidy', + '--dry-run={{DRYRUN}}', + '--output-dir=.lintbin', + '--output-name=clang-tidy', ] command = [ 'python3', 'tools/linter/adapters/clangtidy_linter.py', - '--binary=.clang-tidy-bin/clang-tidy', + '--binary=.lintbin/clang-tidy', '--build_dir=./build', '--', '@{{PATHSFILE}}' @@ -198,8 +209,8 @@ exclude_patterns = ['caffe2/**'] command = [ 'python3', 'tools/linter/adapters/grep_linter.py', - '--pattern=# type:\s*ignore(?!\[)', - '--linter-name=TYPEIGNORE', + '--pattern=# noqa(?!: [A-Z]+\d{3})', + '--linter-name=NOQA', '--error-name=unqualified noqa', """--error-description=\ This line has an unqualified `noqa`; \ diff --git a/tools/linter/adapters/clangformat_linter.py b/tools/linter/adapters/clangformat_linter.py index b577a18427a9..b4641306daf9 100644 --- a/tools/linter/adapters/clangformat_linter.py +++ b/tools/linter/adapters/clangformat_linter.py @@ -7,6 +7,7 @@ import subprocess import sys import time from enum import Enum +from pathlib import Path from typing import Any, List, NamedTuple, Optional @@ -209,6 +210,23 @@ def main() -> None: ) binary = os.path.normpath(args.binary) if IS_WINDOWS else args.binary + if not Path(binary).exists(): + lint_message = LintMessage( + path=None, + line=None, + char=None, + code="CLANGFORMAT", + severity=LintSeverity.ERROR, + name="init-error", + original=None, + replacement=None, + description=( + f"Could not find clang-format binary at {binary}, " + "did you forget to run `lintrunner init`?" + ), + ) + print(json.dumps(lint_message._asdict()), flush=True) + sys.exit(0) with concurrent.futures.ThreadPoolExecutor( max_workers=os.cpu_count(), diff --git a/tools/linter/adapters/clangtidy_init.py b/tools/linter/adapters/clangtidy_init.py deleted file mode 100644 index 5190fdbc97bc..000000000000 --- a/tools/linter/adapters/clangtidy_init.py +++ /dev/null @@ -1,220 +0,0 @@ -import platform -import argparse -import sys -import stat -import hashlib -import subprocess -import os -import urllib.request -import urllib.error -import pathlib - -from typing import Dict - -# String representing the host platform (e.g. Linux, Darwin). -HOST_PLATFORM = platform.system() - -# PyTorch directory root -result = subprocess.run( - ["git", "rev-parse", "--show-toplevel"], stdout=subprocess.PIPE, check=True, -) -PYTORCH_ROOT = result.stdout.decode("utf-8").strip() - -HASH_PATH = pathlib.Path(PYTORCH_ROOT) / "tools" / "linter" / "install" / "hashes" - - -def compute_file_sha256(path: str) -> str: - """Compute the SHA256 hash of a file and return it as a hex string.""" - # If the file doesn't exist, return an empty string. - if not os.path.exists(path): - return "" - - hash = hashlib.sha256() - - # Open the file in binary mode and hash it. - with open(path, "rb") as f: - for b in f: - hash.update(b) - - # Return the hash as a hexadecimal string. - return hash.hexdigest() - - -def report_download_progress( - chunk_number: int, chunk_size: int, file_size: int -) -> None: - """ - Pretty printer for file download progress. - """ - if file_size != -1: - percent = min(1, (chunk_number * chunk_size) / file_size) - bar = "#" * int(64 * percent) - sys.stdout.write("\r0% |{:<64}| {}%".format(bar, int(percent * 100))) - - -def download_bin(name: str, output_dir: str, platform_to_url: Dict[str, str], dry_run: bool) -> bool: - """ - Downloads the binary appropriate for the host platform and stores it in the given output directory. - """ - if HOST_PLATFORM not in platform_to_url: - print(f"Unsupported platform: {HOST_PLATFORM}") - return False - - url = platform_to_url[HOST_PLATFORM] - filename = os.path.join(output_dir, name) - if dry_run: - print(f"DRY RUN: Would download {url} to {filename}") - return True - - # Try to download binary. - print(f"Downloading {name} to {output_dir}") - try: - urllib.request.urlretrieve( - url, - filename, - reporthook=report_download_progress if sys.stdout.isatty() else None, - ) - except urllib.error.URLError as e: - print(f"Error downloading {filename}: {e}") - return False - finally: - print() - - return True - - -def download( - name: str, - output_dir: str, - platform_to_url: Dict[str, str], - platform_to_hash: Dict[str, str], - dry_run: bool, -) -> bool: - """ - Download a platform-appropriate binary if one doesn't already exist at the expected location and verifies - that it is the right binary by checking its SHA256 hash against the expected hash. - """ - - output_path = os.path.join(output_dir, name) - if not os.path.exists(output_dir): - # If the directory doesn't exist, try to create it. - if dry_run: - print(f"DRY RUN: would create directory for {name} binary: {output_dir}") - else: - try: - os.mkdir(output_dir) - except OSError as e: - print(f"Unable to create directory for {name} binary: {output_dir}") - return False - finally: - print(f"Created directory {output_dir} for {name} binary") - - # If the directory didn't exist, neither did the binary, so download it. - ok = download_bin(name, output_dir, platform_to_url, dry_run) - - if not ok: - return False - else: - # If the directory exists but the binary doesn't, download it. - if not os.path.exists(output_path): - ok = download_bin(name, output_dir, platform_to_url, dry_run) - - if not ok: - return False - else: - print(f"Found pre-existing {name} binary, skipping download") - - # Now that the binary is where it should be, hash it. - actual_bin_hash = compute_file_sha256(output_path) - - # If the host platform is not in platform_to_hash, it is unsupported. - if HOST_PLATFORM not in platform_to_hash: - print(f"Unsupported platform: {HOST_PLATFORM}") - return False - - # This is the path to the file containing the reference hash. - hashpath = os.path.join(PYTORCH_ROOT, platform_to_hash[HOST_PLATFORM]) - - if not os.path.exists(hashpath): - print("Unable to find reference binary hash") - return False - - # Load the reference hash and compare the actual hash to it. - if dry_run: - # We didn't download anything, just bail - return True - - with open(hashpath, "r") as f: - reference_bin_hash = f.readline().strip() - - print(f"Reference Hash: {reference_bin_hash}") - print(f"Actual Hash: {repr(actual_bin_hash)}") - - if reference_bin_hash != actual_bin_hash: - print("The downloaded binary is not what was expected!") - print(f"Downloaded hash: {repr(actual_bin_hash)} vs expected {reference_bin_hash}") - - # Err on the side of caution and try to delete the downloaded binary. - try: - os.unlink(output_path) - print("The binary has been deleted just to be safe") - except OSError as e: - print(f"Failed to delete binary: {e}") - print("Delete this binary as soon as possible and do not execute it!") - - return False - else: - # Make sure the binary is executable. - mode = os.stat(output_path).st_mode - mode |= stat.S_IXUSR - os.chmod(output_path, mode) - print(f"Using {name} located at {output_path}") - - return True - - - -PLATFORM_TO_URL = { - "Linux": "https://oss-clang-format.s3.us-east-2.amazonaws.com/linux64/clang-tidy", - "Darwin": "https://oss-clang-format.s3.us-east-2.amazonaws.com/macos/clang-tidy", -} - -PLATFORM_TO_HASH = { - "Linux": os.path.join(HASH_PATH, "clang-tidy-linux64"), - "Darwin": os.path.join(HASH_PATH, "clang-tidy-macos"), -} - -OUTPUT_DIR = os.path.join(PYTORCH_ROOT, ".clang-tidy-bin") -INSTALLATION_PATH = os.path.join(OUTPUT_DIR, "clang-tidy") - - -if __name__ == "__main__": - parser = argparse.ArgumentParser( - description="downloads clang-tidy", - ) - parser.add_argument( - "--output_dir", - required=True, - help="place to put the binary", - ) - parser.add_argument( - "--output_name", - required=True, - help="name of binary", - ) - parser.add_argument( - "--dry_run", - default=False, - help="do not download, just print what would be done", - ) - - args = parser.parse_args() - if args.dry_run == "0": - args.dry_run = False - else: - args.dry_run = True - - ok = download(args.output_name, args.output_dir, PLATFORM_TO_URL, PLATFORM_TO_HASH, args.dry_run) - if not ok: - print(f"Failed to download clang-tidy binary from {PLATFORM_TO_URL}") - exit(1) diff --git a/tools/linter/adapters/s3_init.py b/tools/linter/adapters/s3_init.py new file mode 100644 index 000000000000..f2bc9339776d --- /dev/null +++ b/tools/linter/adapters/s3_init.py @@ -0,0 +1,206 @@ +import argparse +import hashlib +import json +import logging +import os +import platform +import stat +import subprocess +import sys +import textwrap +import urllib.error +import urllib.request +from pathlib import Path + +# String representing the host platform (e.g. Linux, Darwin). +HOST_PLATFORM = platform.system() + +# PyTorch directory root +result = subprocess.run( + ["git", "rev-parse", "--show-toplevel"], + stdout=subprocess.PIPE, + check=True, +) +PYTORCH_ROOT = result.stdout.decode("utf-8").strip() + +DRY_RUN = False + + +def compute_file_sha256(path: str) -> str: + """Compute the SHA256 hash of a file and return it as a hex string.""" + # If the file doesn't exist, return an empty string. + if not os.path.exists(path): + return "" + + hash = hashlib.sha256() + + # Open the file in binary mode and hash it. + with open(path, "rb") as f: + for b in f: + hash.update(b) + + # Return the hash as a hexadecimal string. + return hash.hexdigest() + + +def report_download_progress( + chunk_number: int, chunk_size: int, file_size: int +) -> None: + """ + Pretty printer for file download progress. + """ + if file_size != -1: + percent = min(1, (chunk_number * chunk_size) / file_size) + bar = "#" * int(64 * percent) + sys.stdout.write("\r0% |{:<64}| {}%".format(bar, int(percent * 100))) + + +def check(binary_path: Path, reference_hash: str) -> bool: + """Check whether the binary exists and is the right one. + + If there is hash difference, delete the actual binary. + """ + if not binary_path.exists(): + logging.info(f"{binary_path} does not exist.") + return False + + existing_binary_hash = compute_file_sha256(str(binary_path)) + if existing_binary_hash == reference_hash: + return True + + logging.warning( + textwrap.dedent( + f"""\ + Found binary hash does not match reference! + + Found hash: {existing_binary_hash} + Reference hash: {reference_hash} + + Deleting {binary_path} just to be safe. + """ + ) + ) + if DRY_RUN: + logging.critical( + "In dry run mode, so not actually deleting the binary. But consider deleting it ASAP!" + ) + return False + + try: + binary_path.unlink() + except OSError as e: + logging.critical(f"Failed to delete binary: {e}") + logging.critical( + "Delete this binary as soon as possible and do not execute it!" + ) + + return False + + +def download( + name: str, + output_dir: str, + url: str, + reference_bin_hash: str, +) -> bool: + """ + Download a platform-appropriate binary if one doesn't already exist at the expected location and verifies + that it is the right binary by checking its SHA256 hash against the expected hash. + """ + # First check if we need to do anything + binary_path = Path(output_dir, name) + if check(binary_path, reference_bin_hash): + logging.info(f"Correct binary already exists at {binary_path}. Exiting.") + return True + + # Create the output folder + binary_path.parent.mkdir(parents=True, exist_ok=True) + + # Download the binary + logging.info(f"Downloading {url} to {binary_path}") + + if DRY_RUN: + logging.info("Exiting as there is nothing left to do in dry run mode") + return True + + urllib.request.urlretrieve( + url, + binary_path, + reporthook=report_download_progress if sys.stdout.isatty() else None, + ) + + logging.info(f"Downloaded {name} successfully.") + + # Check the downloaded binary + if not check(binary_path, reference_bin_hash): + logging.critical(f"Downloaded binary {name} failed its hash check") + return False + + # Ensure that exeuctable bits are set + mode = os.stat(binary_path).st_mode + mode |= stat.S_IXUSR + os.chmod(binary_path, mode) + + logging.info(f"Using {name} located at {binary_path}") + return True + + +if __name__ == "__main__": + parser = argparse.ArgumentParser( + description="downloads and checks binaries from s3", + ) + parser.add_argument( + "--config-json", + required=True, + help="Path to config json that describes where to find binaries and hashes", + ) + parser.add_argument( + "--linter", + required=True, + help="Which linter to initialize from the config json", + ) + parser.add_argument( + "--output-dir", + required=True, + help="place to put the binary", + ) + parser.add_argument( + "--output-name", + required=True, + help="name of binary", + ) + parser.add_argument( + "--dry-run", + default=False, + help="do not download, just print what would be done", + ) + + args = parser.parse_args() + if args.dry_run == "0": + DRY_RUN = False + else: + DRY_RUN = True + + logging.basicConfig( + format="[DRY_RUN] %(levelname)s: %(message)s" + if DRY_RUN + else "%(levelname)s: %(message)s", + level=logging.INFO, + stream=sys.stderr, + ) + + config = json.load(open(args.config_json)) + config = config[args.linter] + + # If the host platform is not in platform_to_hash, it is unsupported. + if HOST_PLATFORM not in config: + logging.error(f"Unsupported platform: {HOST_PLATFORM}") + exit(1) + + url = config[HOST_PLATFORM]["download_url"] + hash = config[HOST_PLATFORM]["hash"] + + ok = download(args.output_name, args.output_dir, url, hash) + if not ok: + logging.critical(f"Unable to initialize {args.linter}") + sys.exit(1) diff --git a/tools/linter/adapters/s3_init_config.json b/tools/linter/adapters/s3_init_config.json new file mode 100644 index 000000000000..0f3619ad0fff --- /dev/null +++ b/tools/linter/adapters/s3_init_config.json @@ -0,0 +1,30 @@ +{ + "clang-format": { + "Darwin": { + "download_url": "https://oss-clang-format.s3.us-east-2.amazonaws.com/mac/clang-format-mojave", + "hash": "1485a242a96c737ba7cdd9f259114f2201accdb46d87ac7a8650b1a814cd4d4d", + "object_name": "mac/clang-format-mojave", + "s3_bucket": "oss-clang-format" + }, + "Linux": { + "download_url": "https://oss-clang-format.s3.us-east-2.amazonaws.com/linux64/clang-format-linux64", + "hash": "e1c8b97b919541a99e0a355df5c3f9e8abebc64259dbee6f8c68e1ef90582856", + "object_name": "linux64/clang-format-linux64", + "s3_bucket": "oss-clang-format" + } + }, + "clang-tidy": { + "Darwin": { + "download_url": "https://oss-clang-format.s3.us-east-2.amazonaws.com/macos/clang-tidy", + "hash": "541797a7b8fa795e2f3c1adcd8236cc336a40aa927028dc5bc79172e1d9eca36", + "object_name": "macos/clang-tidy", + "s3_bucket": "oss-clang-format" + }, + "Linux": { + "download_url": "https://oss-clang-format.s3.us-east-2.amazonaws.com/linux64/clang-tidy", + "hash": "49343a448fcb75cd1e0fb9d6b1f6c2ef4b008b6f91d6ff899d4ac6060f5e52a5", + "object_name": "linx64/clang-tidy", + "s3_bucket": "oss-clang-format" + } + } +} diff --git a/tools/linter/adapters/update_s3.py b/tools/linter/adapters/update_s3.py new file mode 100644 index 000000000000..847b8b2d158f --- /dev/null +++ b/tools/linter/adapters/update_s3.py @@ -0,0 +1,94 @@ +"""Uploads a new binary to s3 and updates its hash in the config file. + +You'll need to have appropriate credentials on the PyTorch AWS buckets, see: +https://boto3.amazonaws.com/v1/documentation/api/latest/guide/quickstart.html#configuration +for how to configure them. +""" + +import argparse +import boto3 # type: ignore[import] +import json +import hashlib +import os +import logging + + +def compute_file_sha256(path: str) -> str: + """Compute the SHA256 hash of a file and return it as a hex string.""" + # If the file doesn't exist, return an empty string. + if not os.path.exists(path): + return "" + + hash = hashlib.sha256() + + # Open the file in binary mode and hash it. + with open(path, "rb") as f: + for b in f: + hash.update(b) + + # Return the hash as a hexadecimal string. + return hash.hexdigest() + + +def main() -> None: + parser = argparse.ArgumentParser( + description="s3 binary updater", + fromfile_prefix_chars="@", + ) + parser.add_argument( + "--config-json", + required=True, + help="path to config json that you are trying to update", + ) + parser.add_argument( + "--linter", + required=True, + help="name of linter you're trying to update", + ) + parser.add_argument( + "--platform", + required=True, + help="which platform you are uploading the binary for", + ) + parser.add_argument( + "--file", + required=True, + help="file to upload", + ) + parser.add_argument( + "--dry-run", + action="store_true", + help="if set, don't actually upload/write hash", + ) + args = parser.parse_args() + logging.basicConfig(level=logging.INFO) + + config = json.load(open(args.config_json)) + linter_config = config[args.linter][args.platform] + bucket = linter_config["s3_bucket"] + object_name = linter_config["object_name"] + + # Upload the file + logging.info( + f"Uploading file {args.file} to s3 bucket: {bucket}, object name: {object_name}" + ) + if not args.dry_run: + s3_client = boto3.client("s3") + s3_client.upload_file(args.file, bucket, object_name) + + # Update hash in repo + hash_of_new_binary = compute_file_sha256(args.file) + logging.info(f"Computed new hash for binary {hash_of_new_binary}") + + linter_config["hash"] = hash_of_new_binary + config_dump = json.dumps(config, indent=4, sort_keys=True) + + logging.info("Writing out new config:") + logging.info(config_dump) + if not args.dry_run: + with open(args.config_json, "w") as f: + f.write(config_dump) + + +if __name__ == "__main__": + main()