[lint] lintrunner fixes/improvements (#68292)

Summary:
Pull Request resolved: https://github.com/pytorch/pytorch/pull/68292

- noqa was typo-d to be the same as type: ignore
- generalize clang-tidy initialization and use it for clang_format as well
- Add a script that lets you update the binaries in s3 relatively easily

Test Plan: Imported from OSS

Reviewed By: malfet

Differential Revision: D32403934

Pulled By: suo

fbshipit-source-id: 4e21b22605216f013d87d636a205707ca8e0af36
This commit is contained in:
Michael Suo
2021-11-15 11:04:30 -08:00
committed by Facebook GitHub Bot
parent 43874d79e7
commit 24b60b2cbf
7 changed files with 368 additions and 228 deletions

1
.gitignore vendored
View File

@ -287,6 +287,7 @@ TAGS
# clang tooling storage location
.clang-format-bin
.clang-tidy-bin
.lintbin
# clangd background index
.clangd/

View File

@ -53,10 +53,19 @@ include_patterns = [
'test/cpp/tensorexpr/**/*.h',
'test/cpp/tensorexpr/**/*.cpp',
]
init_command = [
'python3',
'tools/linter/adapters/s3_init.py',
'--config-json=tools/linter/adapters/s3_init_config.json',
'--linter=clang-format',
'--dry-run={{DRYRUN}}',
'--output-dir=.lintbin',
'--output-name=clang-format',
]
command = [
'python3',
'tools/linter/adapters/clangformat_linter.py',
'--binary=clang-format',
'--binary=.lintbin/clang-format',
'--',
'@{{PATHSFILE}}'
]
@ -159,15 +168,17 @@ exclude_patterns = [
]
init_command = [
'python3',
'tools/linter/adapters/clangtidy_init.py',
'--dry_run={{DRYRUN}}',
'--output_dir=.clang-tidy-bin',
'--output_name=clang-tidy',
'tools/linter/adapters/s3_init.py',
'--config-json=tools/linter/adapters/s3_init_config.json',
'--linter=clang-tidy',
'--dry-run={{DRYRUN}}',
'--output-dir=.lintbin',
'--output-name=clang-tidy',
]
command = [
'python3',
'tools/linter/adapters/clangtidy_linter.py',
'--binary=.clang-tidy-bin/clang-tidy',
'--binary=.lintbin/clang-tidy',
'--build_dir=./build',
'--',
'@{{PATHSFILE}}'
@ -198,8 +209,8 @@ exclude_patterns = ['caffe2/**']
command = [
'python3',
'tools/linter/adapters/grep_linter.py',
'--pattern=# type:\s*ignore(?!\[)',
'--linter-name=TYPEIGNORE',
'--pattern=# noqa(?!: [A-Z]+\d{3})',
'--linter-name=NOQA',
'--error-name=unqualified noqa',
"""--error-description=\
This line has an unqualified `noqa`; \

View File

@ -7,6 +7,7 @@ import subprocess
import sys
import time
from enum import Enum
from pathlib import Path
from typing import Any, List, NamedTuple, Optional
@ -209,6 +210,23 @@ def main() -> None:
)
binary = os.path.normpath(args.binary) if IS_WINDOWS else args.binary
if not Path(binary).exists():
lint_message = LintMessage(
path=None,
line=None,
char=None,
code="CLANGFORMAT",
severity=LintSeverity.ERROR,
name="init-error",
original=None,
replacement=None,
description=(
f"Could not find clang-format binary at {binary}, "
"did you forget to run `lintrunner init`?"
),
)
print(json.dumps(lint_message._asdict()), flush=True)
sys.exit(0)
with concurrent.futures.ThreadPoolExecutor(
max_workers=os.cpu_count(),

View File

@ -1,220 +0,0 @@
import platform
import argparse
import sys
import stat
import hashlib
import subprocess
import os
import urllib.request
import urllib.error
import pathlib
from typing import Dict
# String representing the host platform (e.g. Linux, Darwin).
HOST_PLATFORM = platform.system()
# PyTorch directory root
result = subprocess.run(
["git", "rev-parse", "--show-toplevel"], stdout=subprocess.PIPE, check=True,
)
PYTORCH_ROOT = result.stdout.decode("utf-8").strip()
HASH_PATH = pathlib.Path(PYTORCH_ROOT) / "tools" / "linter" / "install" / "hashes"
def compute_file_sha256(path: str) -> str:
"""Compute the SHA256 hash of a file and return it as a hex string."""
# If the file doesn't exist, return an empty string.
if not os.path.exists(path):
return ""
hash = hashlib.sha256()
# Open the file in binary mode and hash it.
with open(path, "rb") as f:
for b in f:
hash.update(b)
# Return the hash as a hexadecimal string.
return hash.hexdigest()
def report_download_progress(
chunk_number: int, chunk_size: int, file_size: int
) -> None:
"""
Pretty printer for file download progress.
"""
if file_size != -1:
percent = min(1, (chunk_number * chunk_size) / file_size)
bar = "#" * int(64 * percent)
sys.stdout.write("\r0% |{:<64}| {}%".format(bar, int(percent * 100)))
def download_bin(name: str, output_dir: str, platform_to_url: Dict[str, str], dry_run: bool) -> bool:
"""
Downloads the binary appropriate for the host platform and stores it in the given output directory.
"""
if HOST_PLATFORM not in platform_to_url:
print(f"Unsupported platform: {HOST_PLATFORM}")
return False
url = platform_to_url[HOST_PLATFORM]
filename = os.path.join(output_dir, name)
if dry_run:
print(f"DRY RUN: Would download {url} to {filename}")
return True
# Try to download binary.
print(f"Downloading {name} to {output_dir}")
try:
urllib.request.urlretrieve(
url,
filename,
reporthook=report_download_progress if sys.stdout.isatty() else None,
)
except urllib.error.URLError as e:
print(f"Error downloading {filename}: {e}")
return False
finally:
print()
return True
def download(
name: str,
output_dir: str,
platform_to_url: Dict[str, str],
platform_to_hash: Dict[str, str],
dry_run: bool,
) -> bool:
"""
Download a platform-appropriate binary if one doesn't already exist at the expected location and verifies
that it is the right binary by checking its SHA256 hash against the expected hash.
"""
output_path = os.path.join(output_dir, name)
if not os.path.exists(output_dir):
# If the directory doesn't exist, try to create it.
if dry_run:
print(f"DRY RUN: would create directory for {name} binary: {output_dir}")
else:
try:
os.mkdir(output_dir)
except OSError as e:
print(f"Unable to create directory for {name} binary: {output_dir}")
return False
finally:
print(f"Created directory {output_dir} for {name} binary")
# If the directory didn't exist, neither did the binary, so download it.
ok = download_bin(name, output_dir, platform_to_url, dry_run)
if not ok:
return False
else:
# If the directory exists but the binary doesn't, download it.
if not os.path.exists(output_path):
ok = download_bin(name, output_dir, platform_to_url, dry_run)
if not ok:
return False
else:
print(f"Found pre-existing {name} binary, skipping download")
# Now that the binary is where it should be, hash it.
actual_bin_hash = compute_file_sha256(output_path)
# If the host platform is not in platform_to_hash, it is unsupported.
if HOST_PLATFORM not in platform_to_hash:
print(f"Unsupported platform: {HOST_PLATFORM}")
return False
# This is the path to the file containing the reference hash.
hashpath = os.path.join(PYTORCH_ROOT, platform_to_hash[HOST_PLATFORM])
if not os.path.exists(hashpath):
print("Unable to find reference binary hash")
return False
# Load the reference hash and compare the actual hash to it.
if dry_run:
# We didn't download anything, just bail
return True
with open(hashpath, "r") as f:
reference_bin_hash = f.readline().strip()
print(f"Reference Hash: {reference_bin_hash}")
print(f"Actual Hash: {repr(actual_bin_hash)}")
if reference_bin_hash != actual_bin_hash:
print("The downloaded binary is not what was expected!")
print(f"Downloaded hash: {repr(actual_bin_hash)} vs expected {reference_bin_hash}")
# Err on the side of caution and try to delete the downloaded binary.
try:
os.unlink(output_path)
print("The binary has been deleted just to be safe")
except OSError as e:
print(f"Failed to delete binary: {e}")
print("Delete this binary as soon as possible and do not execute it!")
return False
else:
# Make sure the binary is executable.
mode = os.stat(output_path).st_mode
mode |= stat.S_IXUSR
os.chmod(output_path, mode)
print(f"Using {name} located at {output_path}")
return True
PLATFORM_TO_URL = {
"Linux": "https://oss-clang-format.s3.us-east-2.amazonaws.com/linux64/clang-tidy",
"Darwin": "https://oss-clang-format.s3.us-east-2.amazonaws.com/macos/clang-tidy",
}
PLATFORM_TO_HASH = {
"Linux": os.path.join(HASH_PATH, "clang-tidy-linux64"),
"Darwin": os.path.join(HASH_PATH, "clang-tidy-macos"),
}
OUTPUT_DIR = os.path.join(PYTORCH_ROOT, ".clang-tidy-bin")
INSTALLATION_PATH = os.path.join(OUTPUT_DIR, "clang-tidy")
if __name__ == "__main__":
parser = argparse.ArgumentParser(
description="downloads clang-tidy",
)
parser.add_argument(
"--output_dir",
required=True,
help="place to put the binary",
)
parser.add_argument(
"--output_name",
required=True,
help="name of binary",
)
parser.add_argument(
"--dry_run",
default=False,
help="do not download, just print what would be done",
)
args = parser.parse_args()
if args.dry_run == "0":
args.dry_run = False
else:
args.dry_run = True
ok = download(args.output_name, args.output_dir, PLATFORM_TO_URL, PLATFORM_TO_HASH, args.dry_run)
if not ok:
print(f"Failed to download clang-tidy binary from {PLATFORM_TO_URL}")
exit(1)

View File

@ -0,0 +1,206 @@
import argparse
import hashlib
import json
import logging
import os
import platform
import stat
import subprocess
import sys
import textwrap
import urllib.error
import urllib.request
from pathlib import Path
# String representing the host platform (e.g. Linux, Darwin).
HOST_PLATFORM = platform.system()
# PyTorch directory root
result = subprocess.run(
["git", "rev-parse", "--show-toplevel"],
stdout=subprocess.PIPE,
check=True,
)
PYTORCH_ROOT = result.stdout.decode("utf-8").strip()
DRY_RUN = False
def compute_file_sha256(path: str) -> str:
"""Compute the SHA256 hash of a file and return it as a hex string."""
# If the file doesn't exist, return an empty string.
if not os.path.exists(path):
return ""
hash = hashlib.sha256()
# Open the file in binary mode and hash it.
with open(path, "rb") as f:
for b in f:
hash.update(b)
# Return the hash as a hexadecimal string.
return hash.hexdigest()
def report_download_progress(
chunk_number: int, chunk_size: int, file_size: int
) -> None:
"""
Pretty printer for file download progress.
"""
if file_size != -1:
percent = min(1, (chunk_number * chunk_size) / file_size)
bar = "#" * int(64 * percent)
sys.stdout.write("\r0% |{:<64}| {}%".format(bar, int(percent * 100)))
def check(binary_path: Path, reference_hash: str) -> bool:
"""Check whether the binary exists and is the right one.
If there is hash difference, delete the actual binary.
"""
if not binary_path.exists():
logging.info(f"{binary_path} does not exist.")
return False
existing_binary_hash = compute_file_sha256(str(binary_path))
if existing_binary_hash == reference_hash:
return True
logging.warning(
textwrap.dedent(
f"""\
Found binary hash does not match reference!
Found hash: {existing_binary_hash}
Reference hash: {reference_hash}
Deleting {binary_path} just to be safe.
"""
)
)
if DRY_RUN:
logging.critical(
"In dry run mode, so not actually deleting the binary. But consider deleting it ASAP!"
)
return False
try:
binary_path.unlink()
except OSError as e:
logging.critical(f"Failed to delete binary: {e}")
logging.critical(
"Delete this binary as soon as possible and do not execute it!"
)
return False
def download(
name: str,
output_dir: str,
url: str,
reference_bin_hash: str,
) -> bool:
"""
Download a platform-appropriate binary if one doesn't already exist at the expected location and verifies
that it is the right binary by checking its SHA256 hash against the expected hash.
"""
# First check if we need to do anything
binary_path = Path(output_dir, name)
if check(binary_path, reference_bin_hash):
logging.info(f"Correct binary already exists at {binary_path}. Exiting.")
return True
# Create the output folder
binary_path.parent.mkdir(parents=True, exist_ok=True)
# Download the binary
logging.info(f"Downloading {url} to {binary_path}")
if DRY_RUN:
logging.info("Exiting as there is nothing left to do in dry run mode")
return True
urllib.request.urlretrieve(
url,
binary_path,
reporthook=report_download_progress if sys.stdout.isatty() else None,
)
logging.info(f"Downloaded {name} successfully.")
# Check the downloaded binary
if not check(binary_path, reference_bin_hash):
logging.critical(f"Downloaded binary {name} failed its hash check")
return False
# Ensure that exeuctable bits are set
mode = os.stat(binary_path).st_mode
mode |= stat.S_IXUSR
os.chmod(binary_path, mode)
logging.info(f"Using {name} located at {binary_path}")
return True
if __name__ == "__main__":
parser = argparse.ArgumentParser(
description="downloads and checks binaries from s3",
)
parser.add_argument(
"--config-json",
required=True,
help="Path to config json that describes where to find binaries and hashes",
)
parser.add_argument(
"--linter",
required=True,
help="Which linter to initialize from the config json",
)
parser.add_argument(
"--output-dir",
required=True,
help="place to put the binary",
)
parser.add_argument(
"--output-name",
required=True,
help="name of binary",
)
parser.add_argument(
"--dry-run",
default=False,
help="do not download, just print what would be done",
)
args = parser.parse_args()
if args.dry_run == "0":
DRY_RUN = False
else:
DRY_RUN = True
logging.basicConfig(
format="[DRY_RUN] %(levelname)s: %(message)s"
if DRY_RUN
else "%(levelname)s: %(message)s",
level=logging.INFO,
stream=sys.stderr,
)
config = json.load(open(args.config_json))
config = config[args.linter]
# If the host platform is not in platform_to_hash, it is unsupported.
if HOST_PLATFORM not in config:
logging.error(f"Unsupported platform: {HOST_PLATFORM}")
exit(1)
url = config[HOST_PLATFORM]["download_url"]
hash = config[HOST_PLATFORM]["hash"]
ok = download(args.output_name, args.output_dir, url, hash)
if not ok:
logging.critical(f"Unable to initialize {args.linter}")
sys.exit(1)

View File

@ -0,0 +1,30 @@
{
"clang-format": {
"Darwin": {
"download_url": "https://oss-clang-format.s3.us-east-2.amazonaws.com/mac/clang-format-mojave",
"hash": "1485a242a96c737ba7cdd9f259114f2201accdb46d87ac7a8650b1a814cd4d4d",
"object_name": "mac/clang-format-mojave",
"s3_bucket": "oss-clang-format"
},
"Linux": {
"download_url": "https://oss-clang-format.s3.us-east-2.amazonaws.com/linux64/clang-format-linux64",
"hash": "e1c8b97b919541a99e0a355df5c3f9e8abebc64259dbee6f8c68e1ef90582856",
"object_name": "linux64/clang-format-linux64",
"s3_bucket": "oss-clang-format"
}
},
"clang-tidy": {
"Darwin": {
"download_url": "https://oss-clang-format.s3.us-east-2.amazonaws.com/macos/clang-tidy",
"hash": "541797a7b8fa795e2f3c1adcd8236cc336a40aa927028dc5bc79172e1d9eca36",
"object_name": "macos/clang-tidy",
"s3_bucket": "oss-clang-format"
},
"Linux": {
"download_url": "https://oss-clang-format.s3.us-east-2.amazonaws.com/linux64/clang-tidy",
"hash": "49343a448fcb75cd1e0fb9d6b1f6c2ef4b008b6f91d6ff899d4ac6060f5e52a5",
"object_name": "linx64/clang-tidy",
"s3_bucket": "oss-clang-format"
}
}
}

View File

@ -0,0 +1,94 @@
"""Uploads a new binary to s3 and updates its hash in the config file.
You'll need to have appropriate credentials on the PyTorch AWS buckets, see:
https://boto3.amazonaws.com/v1/documentation/api/latest/guide/quickstart.html#configuration
for how to configure them.
"""
import argparse
import boto3 # type: ignore[import]
import json
import hashlib
import os
import logging
def compute_file_sha256(path: str) -> str:
"""Compute the SHA256 hash of a file and return it as a hex string."""
# If the file doesn't exist, return an empty string.
if not os.path.exists(path):
return ""
hash = hashlib.sha256()
# Open the file in binary mode and hash it.
with open(path, "rb") as f:
for b in f:
hash.update(b)
# Return the hash as a hexadecimal string.
return hash.hexdigest()
def main() -> None:
parser = argparse.ArgumentParser(
description="s3 binary updater",
fromfile_prefix_chars="@",
)
parser.add_argument(
"--config-json",
required=True,
help="path to config json that you are trying to update",
)
parser.add_argument(
"--linter",
required=True,
help="name of linter you're trying to update",
)
parser.add_argument(
"--platform",
required=True,
help="which platform you are uploading the binary for",
)
parser.add_argument(
"--file",
required=True,
help="file to upload",
)
parser.add_argument(
"--dry-run",
action="store_true",
help="if set, don't actually upload/write hash",
)
args = parser.parse_args()
logging.basicConfig(level=logging.INFO)
config = json.load(open(args.config_json))
linter_config = config[args.linter][args.platform]
bucket = linter_config["s3_bucket"]
object_name = linter_config["object_name"]
# Upload the file
logging.info(
f"Uploading file {args.file} to s3 bucket: {bucket}, object name: {object_name}"
)
if not args.dry_run:
s3_client = boto3.client("s3")
s3_client.upload_file(args.file, bucket, object_name)
# Update hash in repo
hash_of_new_binary = compute_file_sha256(args.file)
logging.info(f"Computed new hash for binary {hash_of_new_binary}")
linter_config["hash"] = hash_of_new_binary
config_dump = json.dumps(config, indent=4, sort_keys=True)
logging.info("Writing out new config:")
logging.info(config_dump)
if not args.dry_run:
with open(args.config_json, "w") as f:
f.write(config_dump)
if __name__ == "__main__":
main()