Enable UFMT on test/scripts and some files (#124137)

Part of: #123062

Ran lintrunner on:

- `test/scripts`
- `test/simulate_nccl_errors.py`
- `test/test_ao_sparsity.py`
- `test/test_autocast.py`
- `test/test_binary_ufuncs.py`
- `test/test_bundled_images.py`
- `test/test_bundled_inputs.py`
- `test/test_comparison_utils.py`
- `test/test_compile_benchmark_util.py`
- `test/test_complex.py`
- `test/test_cpp_api_parity.py`
- `test/test_cpp_extensions_aot.py`
- `test/test_cpp_extensions_jit.py`
- `test/test_cpp_extensions_open_device_registration.py`

Detail:

```bash
$ lintrunner -a --take UFMT --all-files
ok No lint issues.
Successfully applied all patches.
```

Pull Request resolved: https://github.com/pytorch/pytorch/pull/124137
Approved by: https://github.com/soulitzer
This commit is contained in:
Yuanhao Ji
2024-04-19 22:01:23 +00:00
committed by PyTorch MergeBot
parent f0560f7b3b
commit b3504af56e
16 changed files with 1049 additions and 545 deletions

View File

@ -1,8 +1,10 @@
# this file contains a simple parser that parses report
# from cuda-memcheck
class ParseError(Exception):
"""Whenever the simple parser is unable to parse the report, this exception will be raised"""
pass
@ -77,25 +79,25 @@ def parse(message):
========= ERROR SUMMARY: 4 errors
"""
errors = []
HEAD = '========='
HEAD = "========="
headlen = len(HEAD)
started = False
in_message = False
message_lines = []
lines = message.splitlines()
for l in lines:
if l == HEAD + ' CUDA-MEMCHECK':
if l == HEAD + " CUDA-MEMCHECK":
started = True
continue
if not started or not l.startswith(HEAD):
continue
l = l[headlen + 1:]
if l.startswith('ERROR SUMMARY:'):
l = l[headlen + 1 :]
if l.startswith("ERROR SUMMARY:"):
return Report(l, errors)
if not in_message:
in_message = True
message_lines = [l]
elif l == '':
elif l == "":
errors.append(Error(message_lines))
in_message = False
else:

View File

@ -12,39 +12,62 @@ Example usage:
Note that running cuda-memcheck could be very slow.
"""
import asyncio
import torch
import multiprocessing
import argparse
import subprocess
import tqdm
import asyncio
import multiprocessing
import os
import subprocess
import sys
import cuda_memcheck_common as cmc
import torch
import tqdm
ALL_TESTS = []
GPUS = torch.cuda.device_count()
# parse arguments
parser = argparse.ArgumentParser(description="Run isolated cuda-memcheck on unit tests")
parser.add_argument('filename', help="the python file for a test, such as test_torch.py")
parser.add_argument('timeout', type=int, help='kill the test if it does not terminate in a certain amount of seconds')
parser.add_argument('--strict', action='store_true',
help='Whether to show cublas/cudnn errors. These errors are ignored by default because'
'cublas/cudnn does not run error-free under cuda-memcheck, and ignoring these errors')
parser.add_argument('--nproc', type=int, default=multiprocessing.cpu_count(),
help='Number of processes running tests, default to number of cores in the system')
parser.add_argument('--gpus', default='all',
help='GPU assignments for each process, it could be "all", or : separated list like "1,2:3,4:5,6"')
parser.add_argument('--ci', action='store_true',
help='Whether this script is executed in CI. When executed inside a CI, this script fails when '
'an error is detected. Also, it will not show tqdm progress bar, but directly print the error'
'to stdout instead.')
parser.add_argument('--nohang', action='store_true', help='Treat timeout as success')
parser.add_argument('--split', type=int, default=1, help='Split the job into pieces')
parser.add_argument('--rank', type=int, default=0, help='Which piece this process should pick')
parser.add_argument(
"filename", help="the python file for a test, such as test_torch.py"
)
parser.add_argument(
"timeout",
type=int,
help="kill the test if it does not terminate in a certain amount of seconds",
)
parser.add_argument(
"--strict",
action="store_true",
help="Whether to show cublas/cudnn errors. These errors are ignored by default because"
"cublas/cudnn does not run error-free under cuda-memcheck, and ignoring these errors",
)
parser.add_argument(
"--nproc",
type=int,
default=multiprocessing.cpu_count(),
help="Number of processes running tests, default to number of cores in the system",
)
parser.add_argument(
"--gpus",
default="all",
help='GPU assignments for each process, it could be "all", or : separated list like "1,2:3,4:5,6"',
)
parser.add_argument(
"--ci",
action="store_true",
help="Whether this script is executed in CI. When executed inside a CI, this script fails when "
"an error is detected. Also, it will not show tqdm progress bar, but directly print the error"
"to stdout instead.",
)
parser.add_argument("--nohang", action="store_true", help="Treat timeout as success")
parser.add_argument("--split", type=int, default=1, help="Split the job into pieces")
parser.add_argument(
"--rank", type=int, default=0, help="Which piece this process should pick"
)
args = parser.parse_args()
# Filters that ignores cublas/cudnn errors
# TODO (@zasdfgbnm): When can we remove this? Will cublas/cudnn run error-free under cuda-memcheck?
def is_ignored_only(output):
@ -56,32 +79,43 @@ def is_ignored_only(output):
return False
count_ignored_errors = 0
for e in report.errors:
if 'libcublas' in ''.join(e.stack) or 'libcudnn' in ''.join(e.stack) or 'libcufft' in ''.join(e.stack):
if (
"libcublas" in "".join(e.stack)
or "libcudnn" in "".join(e.stack)
or "libcufft" in "".join(e.stack)
):
count_ignored_errors += 1
return count_ignored_errors == report.num_errors
# Set environment PYTORCH_CUDA_MEMCHECK=1 to allow skipping some tests
os.environ['PYTORCH_CUDA_MEMCHECK'] = '1'
os.environ["PYTORCH_CUDA_MEMCHECK"] = "1"
# Discover tests:
# To get a list of tests, run:
# pytest --setup-only test/test_torch.py
# and then parse the output
proc = subprocess.Popen(['pytest', '--setup-only', args.filename], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
proc = subprocess.Popen(
["pytest", "--setup-only", args.filename],
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
)
stdout, stderr = proc.communicate()
lines = stdout.decode().strip().splitlines()
for line in lines:
if '(fixtures used:' in line:
if "(fixtures used:" in line:
line = line.strip().split()[0]
line = line[line.find('::') + 2:]
line = line.replace('::', '.')
line = line[line.find("::") + 2 :]
line = line.replace("::", ".")
ALL_TESTS.append(line)
# Do a simple filtering:
# if 'cpu' or 'CPU' is in the name and 'cuda' or 'CUDA' is not in the name, then skip it
def is_cpu_only(name):
name = name.lower()
return ('cpu' in name) and "cuda" not in name
return ("cpu" in name) and "cuda" not in name
ALL_TESTS = [x for x in ALL_TESTS if not is_cpu_only(x)]
@ -101,7 +135,7 @@ ALL_TESTS = ALL_TESTS[start:end]
# or as specified by the user
progress = 0
if not args.ci:
logfile = open('result.log', 'w')
logfile = open("result.log", "w")
progressbar = tqdm.tqdm(total=len(ALL_TESTS))
else:
logfile = sys.stdout
@ -110,53 +144,61 @@ else:
class ProgressbarStub:
def update(self, *args):
return
progressbar = ProgressbarStub()
async def run1(coroutine_id):
global progress
if args.gpus == 'all':
if args.gpus == "all":
gpuid = coroutine_id % GPUS
else:
gpu_assignments = args.gpus.split(':')
assert args.nproc == len(gpu_assignments), 'Please specify GPU assignment for each process, separated by :'
gpu_assignments = args.gpus.split(":")
assert args.nproc == len(
gpu_assignments
), "Please specify GPU assignment for each process, separated by :"
gpuid = gpu_assignments[coroutine_id]
while progress < len(ALL_TESTS):
test = ALL_TESTS[progress]
progress += 1
cmd = f'CUDA_VISIBLE_DEVICES={gpuid} cuda-memcheck --error-exitcode 1 python {args.filename} {test}'
proc = await asyncio.create_subprocess_shell(cmd, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE)
cmd = f"CUDA_VISIBLE_DEVICES={gpuid} cuda-memcheck --error-exitcode 1 python {args.filename} {test}"
proc = await asyncio.create_subprocess_shell(
cmd, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE
)
try:
stdout, stderr = await asyncio.wait_for(proc.communicate(), args.timeout)
except asyncio.TimeoutError:
print('Timeout:', test, file=logfile)
print("Timeout:", test, file=logfile)
proc.kill()
if args.ci and not args.nohang:
sys.exit("Hang detected on cuda-memcheck")
else:
if proc.returncode == 0:
print('Success:', test, file=logfile)
print("Success:", test, file=logfile)
else:
stdout = stdout.decode()
stderr = stderr.decode()
should_display = args.strict or not is_ignored_only(stdout)
if should_display:
print('Fail:', test, file=logfile)
print("Fail:", test, file=logfile)
print(stdout, file=logfile)
print(stderr, file=logfile)
if args.ci:
sys.exit("Failure detected on cuda-memcheck")
else:
print('Ignored:', test, file=logfile)
print("Ignored:", test, file=logfile)
del proc
progressbar.update(1)
async def main():
tasks = [asyncio.ensure_future(run1(i)) for i in range(args.nproc)]
for t in tasks:
await t
if __name__ == '__main__':
if __name__ == "__main__":
loop = asyncio.get_event_loop()
loop.run_until_complete(main())