mirror of
https://github.com/pytorch/pytorch.git
synced 2025-10-20 21:14:14 +08:00
Revert D30279364: [codemod][lint][fbcode/c*] Enable BLACK by default
Test Plan: revert-hammer
Differential Revision:
D30279364 (b004307252
)
Original commit changeset: c1ed77dfe43a
fbshipit-source-id: eab50857675c51e0088391af06ec0ecb14e2347e
This commit is contained in:
committed by
Facebook GitHub Bot
parent
ed0b8a3e83
commit
1022443168
@ -1,10 +1,8 @@
|
||||
# this file contains a simple parser that parses report
|
||||
# from cuda-memcheck
|
||||
|
||||
|
||||
class ParseError(Exception):
|
||||
"""Whenever the simple parser is unable to parse the report, this exception will be raised"""
|
||||
|
||||
pass
|
||||
|
||||
|
||||
@ -79,25 +77,25 @@ def parse(message):
|
||||
========= ERROR SUMMARY: 4 errors
|
||||
"""
|
||||
errors = []
|
||||
HEAD = "========="
|
||||
HEAD = '========='
|
||||
headlen = len(HEAD)
|
||||
started = False
|
||||
in_message = False
|
||||
message_lines = []
|
||||
lines = message.splitlines()
|
||||
for l in lines:
|
||||
if l == HEAD + " CUDA-MEMCHECK":
|
||||
if l == HEAD + ' CUDA-MEMCHECK':
|
||||
started = True
|
||||
continue
|
||||
if not started or not l.startswith(HEAD):
|
||||
continue
|
||||
l = l[headlen + 1 :]
|
||||
if l.startswith("ERROR SUMMARY:"):
|
||||
l = l[headlen + 1:]
|
||||
if l.startswith('ERROR SUMMARY:'):
|
||||
return Report(l, errors)
|
||||
if not in_message:
|
||||
in_message = True
|
||||
message_lines = [l]
|
||||
elif l == "":
|
||||
elif l == '':
|
||||
errors.append(Error(message_lines))
|
||||
in_message = False
|
||||
else:
|
||||
|
@ -12,59 +12,37 @@ Example usage:
|
||||
Note that running cuda-memcheck could be very slow.
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import asyncio
|
||||
import multiprocessing
|
||||
import os
|
||||
import subprocess
|
||||
import sys
|
||||
|
||||
import cuda_memcheck_common as cmc
|
||||
import torch
|
||||
import multiprocessing
|
||||
import argparse
|
||||
import subprocess
|
||||
import tqdm
|
||||
import os
|
||||
import sys
|
||||
import cuda_memcheck_common as cmc
|
||||
|
||||
ALL_TESTS = []
|
||||
GPUS = torch.cuda.device_count()
|
||||
|
||||
# parse arguments
|
||||
parser = argparse.ArgumentParser(description="Run isolated cuda-memcheck on unit tests")
|
||||
parser.add_argument(
|
||||
"filename", help="the python file for a test, such as test_torch.py"
|
||||
)
|
||||
parser.add_argument(
|
||||
"timeout",
|
||||
type=int,
|
||||
help="kill the test if it does not terminate in a certain amount of seconds",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--strict",
|
||||
action="store_true",
|
||||
help="Whether to show cublas/cudnn errors. These errors are ignored by default because"
|
||||
"cublas/cudnn does not run error-free under cuda-memcheck, and ignoring these errors",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--nproc",
|
||||
type=int,
|
||||
default=multiprocessing.cpu_count(),
|
||||
help="Number of processes running tests, default to number of cores in the system",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--gpus",
|
||||
default="all",
|
||||
help='GPU assignments for each process, it could be "all", or : separated list like "1,2:3,4:5,6"',
|
||||
)
|
||||
parser.add_argument(
|
||||
"--ci",
|
||||
action="store_true",
|
||||
help="Whether this script is executed in CI. When executed inside a CI, this script fails when "
|
||||
"an error is detected. Also, it will not show tqdm progress bar, but directly print the error"
|
||||
"to stdout instead.",
|
||||
)
|
||||
parser.add_argument("--nohang", action="store_true", help="Treat timeout as success")
|
||||
parser.add_argument("--split", type=int, default=1, help="Split the job into pieces")
|
||||
parser.add_argument(
|
||||
"--rank", type=int, default=0, help="Which piece this process should pick"
|
||||
)
|
||||
parser.add_argument('filename', help="the python file for a test, such as test_torch.py")
|
||||
parser.add_argument('timeout', type=int, help='kill the test if it does not terminate in a certain amount of seconds')
|
||||
parser.add_argument('--strict', action='store_true',
|
||||
help='Whether to show cublas/cudnn errors. These errors are ignored by default because'
|
||||
'cublas/cudnn does not run error-free under cuda-memcheck, and ignoring these errors')
|
||||
parser.add_argument('--nproc', type=int, default=multiprocessing.cpu_count(),
|
||||
help='Number of processes running tests, default to number of cores in the system')
|
||||
parser.add_argument('--gpus', default='all',
|
||||
help='GPU assignments for each process, it could be "all", or : separated list like "1,2:3,4:5,6"')
|
||||
parser.add_argument('--ci', action='store_true',
|
||||
help='Whether this script is executed in CI. When executed inside a CI, this script fails when '
|
||||
'an error is detected. Also, it will not show tqdm progress bar, but directly print the error'
|
||||
'to stdout instead.')
|
||||
parser.add_argument('--nohang', action='store_true', help='Treat timeout as success')
|
||||
parser.add_argument('--split', type=int, default=1, help='Split the job into pieces')
|
||||
parser.add_argument('--rank', type=int, default=0, help='Which piece this process should pick')
|
||||
args = parser.parse_args()
|
||||
|
||||
# Filters that ignores cublas/cudnn errors
|
||||
@ -78,42 +56,32 @@ def is_ignored_only(output):
|
||||
return False
|
||||
count_ignored_errors = 0
|
||||
for e in report.errors:
|
||||
if (
|
||||
"libcublas" in "".join(e.stack)
|
||||
or "libcudnn" in "".join(e.stack)
|
||||
or "libcufft" in "".join(e.stack)
|
||||
):
|
||||
if 'libcublas' in ''.join(e.stack) or 'libcudnn' in ''.join(e.stack) or 'libcufft' in ''.join(e.stack):
|
||||
count_ignored_errors += 1
|
||||
return count_ignored_errors == report.num_errors
|
||||
|
||||
|
||||
# Set environment PYTORCH_CUDA_MEMCHECK=1 to allow skipping some tests
|
||||
os.environ["PYTORCH_CUDA_MEMCHECK"] = "1"
|
||||
os.environ['PYTORCH_CUDA_MEMCHECK'] = '1'
|
||||
|
||||
# Discover tests:
|
||||
# To get a list of tests, run:
|
||||
# pytest --setup-only test/test_torch.py
|
||||
# and then parse the output
|
||||
proc = subprocess.Popen(
|
||||
["pytest", "--setup-only", args.filename],
|
||||
stdout=subprocess.PIPE,
|
||||
stderr=subprocess.PIPE,
|
||||
)
|
||||
proc = subprocess.Popen(['pytest', '--setup-only', args.filename], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
||||
stdout, stderr = proc.communicate()
|
||||
lines = stdout.decode().strip().splitlines()
|
||||
for line in lines:
|
||||
if "(fixtures used:" in line:
|
||||
if '(fixtures used:' in line:
|
||||
line = line.strip().split()[0]
|
||||
line = line[line.find("::") + 2 :]
|
||||
line = line.replace("::", ".")
|
||||
line = line[line.find('::') + 2:]
|
||||
line = line.replace('::', '.')
|
||||
ALL_TESTS.append(line)
|
||||
|
||||
# Do a simple filtering:
|
||||
# if 'cpu' or 'CPU' is in the name and 'cuda' or 'CUDA' is not in the name, then skip it
|
||||
def is_cpu_only(name):
|
||||
name = name.lower()
|
||||
return ("cpu" in name) and not ("cuda" in name)
|
||||
|
||||
return ('cpu' in name) and not ('cuda' in name)
|
||||
|
||||
ALL_TESTS = [x for x in ALL_TESTS if not is_cpu_only(x)]
|
||||
|
||||
@ -133,7 +101,7 @@ ALL_TESTS = ALL_TESTS[start:end]
|
||||
# or as specified by the user
|
||||
progress = 0
|
||||
if not args.ci:
|
||||
logfile = open("result.log", "w")
|
||||
logfile = open('result.log', 'w')
|
||||
progressbar = tqdm.tqdm(total=len(ALL_TESTS))
|
||||
else:
|
||||
logfile = sys.stdout
|
||||
@ -142,61 +110,53 @@ else:
|
||||
class ProgressbarStub:
|
||||
def update(self, *args):
|
||||
return
|
||||
|
||||
progressbar = ProgressbarStub()
|
||||
|
||||
|
||||
async def run1(coroutine_id):
|
||||
global progress
|
||||
|
||||
if args.gpus == "all":
|
||||
if args.gpus == 'all':
|
||||
gpuid = coroutine_id % GPUS
|
||||
else:
|
||||
gpu_assignments = args.gpus.split(":")
|
||||
assert args.nproc == len(
|
||||
gpu_assignments
|
||||
), "Please specify GPU assignmnent for each process, separated by :"
|
||||
gpu_assignments = args.gpus.split(':')
|
||||
assert args.nproc == len(gpu_assignments), 'Please specify GPU assignmnent for each process, separated by :'
|
||||
gpuid = gpu_assignments[coroutine_id]
|
||||
|
||||
while progress < len(ALL_TESTS):
|
||||
test = ALL_TESTS[progress]
|
||||
progress += 1
|
||||
cmd = f"CUDA_VISIBLE_DEVICES={gpuid} cuda-memcheck --error-exitcode 1 python {args.filename} {test}"
|
||||
proc = await asyncio.create_subprocess_shell(
|
||||
cmd, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE
|
||||
)
|
||||
cmd = f'CUDA_VISIBLE_DEVICES={gpuid} cuda-memcheck --error-exitcode 1 python {args.filename} {test}'
|
||||
proc = await asyncio.create_subprocess_shell(cmd, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE)
|
||||
try:
|
||||
stdout, stderr = await asyncio.wait_for(proc.communicate(), args.timeout)
|
||||
except asyncio.TimeoutError:
|
||||
print("Timeout:", test, file=logfile)
|
||||
print('Timeout:', test, file=logfile)
|
||||
proc.kill()
|
||||
if args.ci and not args.nohang:
|
||||
sys.exit("Hang detected on cuda-memcheck")
|
||||
else:
|
||||
if proc.returncode == 0:
|
||||
print("Success:", test, file=logfile)
|
||||
print('Success:', test, file=logfile)
|
||||
else:
|
||||
stdout = stdout.decode()
|
||||
stderr = stderr.decode()
|
||||
should_display = args.strict or not is_ignored_only(stdout)
|
||||
if should_display:
|
||||
print("Fail:", test, file=logfile)
|
||||
print('Fail:', test, file=logfile)
|
||||
print(stdout, file=logfile)
|
||||
print(stderr, file=logfile)
|
||||
if args.ci:
|
||||
sys.exit("Failure detected on cuda-memcheck")
|
||||
else:
|
||||
print("Ignored:", test, file=logfile)
|
||||
print('Ignored:', test, file=logfile)
|
||||
del proc
|
||||
progressbar.update(1)
|
||||
|
||||
|
||||
async def main():
|
||||
tasks = [asyncio.ensure_future(run1(i)) for i in range(args.nproc)]
|
||||
for t in tasks:
|
||||
await t
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
if __name__ == '__main__':
|
||||
loop = asyncio.get_event_loop()
|
||||
loop.run_until_complete(main())
|
||||
|
Reference in New Issue
Block a user