mirror of
https://github.com/pytorch/pytorch.git
synced 2025-10-20 21:14:14 +08:00
Remove SHA checksum for bazel http_archive from GitHub (#95039)
An action item from https://github.com/pytorch/pytorch/issues/94346 Although the security practice of setting the checksum is good, it doesn't work when the archive is downloaded from some sites like GitHub because it can change. Specifically, GitHub gives no guarantee to keep the same value forever https://github.com/community/community/discussions/46034. This also adds a new linter to make sure that SHA checksum from GitHub can be removed quickly. The WORKSPACE file is actually updated using the new linter: ``` >>> Lint for WORKSPACE: Advice (BAZEL_LINTER) format Redundant SHA checksum. Run `lintrunner -a` to apply this patch. You can run `lintrunner -a` to apply this patch. 5 5 | 6 6 | http_archive( 7 7 | name = "rules_cuda", 7 |- sha256 = "f80438bee9906e9ecb1a8a4ae2365374ac1e8a283897281a2db2fb7fcf746333", 9 8 | strip_prefix = "runtime-b1c7cce21ba4661c17ac72421c6a0e2015e7bef3/third_party/rules_cuda", 10 9 | urls = ["b1c7cce21b
.tar.gz"], 11 10 | ) -------------------------------------------------------------------------------- 29 28 | name = "pybind11_bazel", 30 29 | strip_prefix = "pybind11_bazel-992381ced716ae12122360b0fbadbc3dda436dbf", 31 30 | urls = ["992381ced7
.zip"], 31 |- sha256 = "3dc6435bd41c058453efe102995ef084d0a86b0176fd6a67a6b7100a2e9a940e", 33 31 | ) 34 32 | 35 33 | new_local_repository( -------------------------------------------------------------------------------- 52 50 | urls = [ 53 51 | "https://github.com/gflags/gflags/archive/v2.2.2.tar.gz", 54 52 | ], 54 |- sha256 = "34af2f15cf7367513b352bdcd2493ab14ce43692d2dcd9dfc499492966c64dcf", 56 53 | ) 57 54 | 58 55 | new_local_repository( ``` Pull Request resolved: https://github.com/pytorch/pytorch/pull/95039 Approved by: https://github.com/ZainRizvi
This commit is contained in:
@ -895,3 +895,24 @@ command = [
|
||||
'--',
|
||||
'@{{PATHSFILE}}'
|
||||
]
|
||||
|
||||
[[linter]]
|
||||
code = 'BAZEL_LINTER'
|
||||
include_patterns = ['WORKSPACE']
|
||||
command = [
|
||||
'python3',
|
||||
'tools/linter/adapters/bazel_linter.py',
|
||||
'--binary=.lintbin/bazel',
|
||||
'--',
|
||||
'@{{PATHSFILE}}'
|
||||
]
|
||||
init_command = [
|
||||
'python3',
|
||||
'tools/linter/adapters/s3_init.py',
|
||||
'--config-json=tools/linter/adapters/s3_init_config.json',
|
||||
'--linter=bazel',
|
||||
'--dry-run={{DRYRUN}}',
|
||||
'--output-dir=.lintbin',
|
||||
'--output-name=bazel',
|
||||
]
|
||||
is_formatter = true
|
||||
|
@ -5,7 +5,6 @@ load("//tools/rules:workspace.bzl", "new_patched_local_repository")
|
||||
|
||||
http_archive(
|
||||
name = "rules_cuda",
|
||||
sha256 = "f80438bee9906e9ecb1a8a4ae2365374ac1e8a283897281a2db2fb7fcf746333",
|
||||
strip_prefix = "runtime-b1c7cce21ba4661c17ac72421c6a0e2015e7bef3/third_party/rules_cuda",
|
||||
urls = ["https://github.com/tensorflow/runtime/archive/b1c7cce21ba4661c17ac72421c6a0e2015e7bef3.tar.gz"],
|
||||
)
|
||||
@ -29,7 +28,6 @@ http_archive(
|
||||
name = "pybind11_bazel",
|
||||
strip_prefix = "pybind11_bazel-992381ced716ae12122360b0fbadbc3dda436dbf",
|
||||
urls = ["https://github.com/pybind/pybind11_bazel/archive/992381ced716ae12122360b0fbadbc3dda436dbf.zip"],
|
||||
sha256 = "3dc6435bd41c058453efe102995ef084d0a86b0176fd6a67a6b7100a2e9a940e",
|
||||
)
|
||||
|
||||
new_local_repository(
|
||||
@ -52,7 +50,6 @@ http_archive(
|
||||
urls = [
|
||||
"https://github.com/gflags/gflags/archive/v2.2.2.tar.gz",
|
||||
],
|
||||
sha256 = "34af2f15cf7367513b352bdcd2493ab14ce43692d2dcd9dfc499492966c64dcf",
|
||||
)
|
||||
|
||||
new_local_repository(
|
||||
|
175
tools/linter/adapters/bazel_linter.py
Normal file
175
tools/linter/adapters/bazel_linter.py
Normal file
@ -0,0 +1,175 @@
|
||||
"""
|
||||
This linter ensures that users don't set a SHA hash checksum in Bazel for the http_archive.
|
||||
Although the security practice of setting the checksum is good, it doesn't work when the
|
||||
archive is downloaded from some sites like GitHub because it can change. Specifically,
|
||||
GitHub gives no guarantee to keep the same value forever. Check for more details at
|
||||
https://github.com/community/community/discussions/46034.
|
||||
"""
|
||||
import argparse
|
||||
import json
|
||||
import re
|
||||
import subprocess
|
||||
import xml.etree.ElementTree as ET
|
||||
from enum import Enum
|
||||
from typing import List, NamedTuple, Optional, Set
|
||||
from urllib.parse import urlparse
|
||||
|
||||
|
||||
LINTER_CODE = "BAZEL_LINTER"
|
||||
SHA256_REGEX = re.compile(r"\s*sha256\s*=\s*['\"](?P<sha256>[a-zA-Z0-9]{64})['\"]\s*,")
|
||||
DOMAINS_WITH_UNSTABLE_CHECKSUM = {"github.com"}
|
||||
|
||||
|
||||
class LintSeverity(str, Enum):
|
||||
ERROR = "error"
|
||||
WARNING = "warning"
|
||||
ADVICE = "advice"
|
||||
DISABLED = "disabled"
|
||||
|
||||
|
||||
class LintMessage(NamedTuple):
|
||||
path: Optional[str]
|
||||
line: Optional[int]
|
||||
char: Optional[int]
|
||||
code: str
|
||||
severity: LintSeverity
|
||||
name: str
|
||||
original: Optional[str]
|
||||
replacement: Optional[str]
|
||||
description: Optional[str]
|
||||
|
||||
|
||||
def is_required_checksum(urls: List[Optional[str]]) -> bool:
|
||||
if not urls:
|
||||
return False
|
||||
|
||||
for url in urls:
|
||||
if not url:
|
||||
continue
|
||||
|
||||
parsed_url = urlparse(url)
|
||||
if parsed_url.hostname in DOMAINS_WITH_UNSTABLE_CHECKSUM:
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
|
||||
def get_disallowed_checksums(
|
||||
binary: str,
|
||||
) -> Set[str]:
|
||||
"""
|
||||
Return the set of disallowed checksums from all http_archive rules
|
||||
"""
|
||||
try:
|
||||
# Use bazel to get the list of external dependencies in XML format
|
||||
proc = subprocess.run(
|
||||
[binary, "query", "kind(http_archive, //external:*)", "--output=xml"],
|
||||
capture_output=True,
|
||||
)
|
||||
except OSError:
|
||||
raise
|
||||
|
||||
stdout = str(proc.stdout, "utf-8").strip()
|
||||
root = ET.fromstring(stdout)
|
||||
|
||||
disallowed_checksums = set()
|
||||
# Parse all the http_archive rules in the XML output
|
||||
for rule in root.findall('.//rule[@class="http_archive"]'):
|
||||
urls_node = rule.find('.//list[@name="urls"]')
|
||||
if urls_node is None:
|
||||
continue
|
||||
urls = [n.get("value") for n in urls_node.findall(".//string")]
|
||||
|
||||
checksum_node = rule.find('.//string[@name="sha256"]')
|
||||
if checksum_node is None:
|
||||
continue
|
||||
checksum = checksum_node.get("value")
|
||||
|
||||
if not checksum:
|
||||
continue
|
||||
|
||||
if not is_required_checksum(urls):
|
||||
disallowed_checksums.add(checksum)
|
||||
|
||||
return disallowed_checksums
|
||||
|
||||
|
||||
def check_bazel(
|
||||
filename: str,
|
||||
disallowed_checksums: Set[str],
|
||||
) -> List[LintMessage]:
|
||||
original = ""
|
||||
replacement = ""
|
||||
|
||||
with open(filename) as f:
|
||||
for line in f:
|
||||
original += f"{line}"
|
||||
|
||||
m = SHA256_REGEX.match(line)
|
||||
if m:
|
||||
sha256 = m.group("sha256")
|
||||
|
||||
if sha256 in disallowed_checksums:
|
||||
continue
|
||||
|
||||
replacement += f"{line}"
|
||||
|
||||
if original == replacement:
|
||||
return []
|
||||
|
||||
return [
|
||||
LintMessage(
|
||||
path=filename,
|
||||
line=None,
|
||||
char=None,
|
||||
code=LINTER_CODE,
|
||||
severity=LintSeverity.ADVICE,
|
||||
name="format",
|
||||
original=original,
|
||||
replacement=replacement,
|
||||
description="Found redundant SHA checksums. Run `lintrunner -a` to apply this patch.",
|
||||
)
|
||||
]
|
||||
|
||||
|
||||
def main() -> None:
|
||||
parser = argparse.ArgumentParser(
|
||||
description="A custom linter to detect redundant SHA checksums in Bazel",
|
||||
fromfile_prefix_chars="@",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--binary",
|
||||
required=True,
|
||||
help="bazel binary path",
|
||||
)
|
||||
parser.add_argument(
|
||||
"filenames",
|
||||
nargs="+",
|
||||
help="paths to lint",
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
try:
|
||||
disallowed_checksums = get_disallowed_checksums(args.binary)
|
||||
except Exception as e:
|
||||
err_msg = LintMessage(
|
||||
path=None,
|
||||
line=None,
|
||||
char=None,
|
||||
code=LINTER_CODE,
|
||||
severity=LintSeverity.ERROR,
|
||||
name="command-failed",
|
||||
original=None,
|
||||
replacement=None,
|
||||
description=(f"Failed due to {e.__class__.__name__}:\n{e}"),
|
||||
)
|
||||
print(json.dumps(err_msg._asdict()), flush=True)
|
||||
exit(0)
|
||||
|
||||
for filename in args.filenames:
|
||||
for lint_message in check_bazel(filename, disallowed_checksums):
|
||||
print(json.dumps(lint_message._asdict()), flush=True)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
@ -39,5 +39,15 @@
|
||||
"download_url": "https://oss-clang-format.s3.us-east-2.amazonaws.com/actionlint/1.6.21/Linux_arm64/actionlint",
|
||||
"hash": "025ac157db121b33971ef24af72d73d71cda3cb1e3a94795bb2708ef4032ca76"
|
||||
}
|
||||
},
|
||||
"bazel": {
|
||||
"Darwin": {
|
||||
"download_url": "https://ossci-macos.s3.amazonaws.com/bazel-4.2.1-darwin-x86_64",
|
||||
"hash": "74d93848f0c9d592e341e48341c53c87e3cb304a54a2a1ee9cff3df422f0b23c"
|
||||
},
|
||||
"Linux": {
|
||||
"download_url": "https://ossci-linux.s3.amazonaws.com/bazel-4.2.1-linux-x86_64",
|
||||
"hash": "1a4f3a3ce292307bceeb44f459883859c793436d564b95319aacb8af1f20557c"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
Reference in New Issue
Block a user