mirror of
https://github.com/pytorch/pytorch.git
synced 2025-10-21 05:34:18 +08:00
Make test_torch.py pass cuda-memcheck (#29243)
Summary: Make the following changes: - When there are more than 10k errors, cuda-memcheck only shows 10k errors, in this case we shouldn't raise an Exception - Add UNDER_CUDA_MEMCHECK environment to allow disabling `pin_memory` tests when running cuda-memcheck. - Add a `--ci` command option, when turned on, then this script would run output to stdout instead of writing a file, and exit with an error if cuda-memcheck fails - Add a `--nohang` command option. When turned on, then hang would be treated as pass instead of error - Do simple filtering on the test to run: if `'cpu'` in the test name but not `'cuda'` is not in the test name - Add `--split` and `--rank` to allowing splitting the work (NVIDIA CI has a limitation of 3 hours, we have to split the work to satisfy this limitation) - The error summary could be `ERROR SUMMARY: 1 error`, or `ERROR SUMMARY: 2 errors`, the tail could be `error` or `errors`, it is not of the same length. The script is fixed to handle this case. - Ignore errors from `cufft` Pull Request resolved: https://github.com/pytorch/pytorch/pull/29243 Differential Revision: D18941701 Pulled By: mruberry fbshipit-source-id: 2048428f32b66ef50c67444c03ce4dd9491179d2
This commit is contained in:
committed by
Facebook Github Bot
parent
701e05dcbb
commit
ffe0c1ae4d
@ -9,15 +9,20 @@ class ParseError(Exception):
|
||||
class Report:
|
||||
"""A report is a container of errors, and a summary on how many errors are found"""
|
||||
|
||||
HEAD = 'ERROR SUMMARY: '
|
||||
TAIL = ' errors'
|
||||
|
||||
def __init__(self, text, errors):
|
||||
# text is something like
|
||||
# ERROR SUMMARY: 1 error
|
||||
# or
|
||||
# ERROR SUMMARY: 2 errors
|
||||
self.text = text
|
||||
self.num_errors = int(text[len(self.HEAD):len(text) - len(self.TAIL)])
|
||||
self.num_errors = int(text.strip().split()[2])
|
||||
self.errors = errors
|
||||
if len(errors) != self.num_errors:
|
||||
raise ParseError("Number of errors does not match")
|
||||
if len(errors) == 10000 and self.num_errors > 10000:
|
||||
# When there are more than 10k errors, cuda-memcheck only display 10k
|
||||
self.num_errors = 10000
|
||||
else:
|
||||
raise ParseError("Number of errors does not match")
|
||||
|
||||
|
||||
class Error:
|
||||
|
Reference in New Issue
Block a user