mirror of
https://github.com/pytorch/pytorch.git
synced 2025-10-20 21:14:14 +08:00
Summary: `check_kernel_launches.py` currently gives a false positive in instances such as: ``` 735: <<<smallIndexGrid, smallIndexBlock, 0, stream>>>( \ 736: outInfo, selfInfo, indicesInfo, \ 737: outSelectDim, selfSelectDim, static_cast<TYPE>(sliceSize), \ 738: selfSelectDimSize); \ 739: C10_CUDA_KERNEL_LAUNCH_CHECK(); ``` because the newlines after the last `\` are not consumed by the regex. This fixes that. In addition, the regex is modified to provide greater context for the start of the kernel launch. This changes the context from: ``` 157: ( 158: size, X_strides, Y_dims, X, Y); ``` to ``` 157: <<<M, CAFFE_CUDA_NUM_THREADS, 0, context->cuda_stream()>>>( 158: size, X_strides, Y_dims, X, Y); ``` Pull Request resolved: https://github.com/pytorch/pytorch/pull/49365 Test Plan: ``` buck test //caffe2/test:kernel_launch_checks -- --print-passing-details ``` Reviewed By: aakshintala Differential Revision: D25545402 Pulled By: r-barnes fbshipit-source-id: 76feac6a002187239853752b892f4517722a77bf
50 lines
2.0 KiB
Python
50 lines
2.0 KiB
Python
from torch.testing._internal.common_utils import TestCase, run_tests
|
|
from torch.testing import check_cuda_kernel_launches, check_code_for_cuda_kernel_launches
|
|
|
|
|
|
class AlwaysCheckCudaLaunchTest(TestCase):
|
|
def test_check_code(self):
|
|
"""Verifies that the regex works for a few different situations"""
|
|
|
|
# Try some different spacings
|
|
self.assertEqual(2, check_code_for_cuda_kernel_launches("""
|
|
some_function_call<TemplateArg><<<1,2,0,stream>>>(arg1,arg2,arg3);
|
|
C10_CUDA_KERNEL_LAUNCH_CHECK();
|
|
some_function_call<TemplateArg><<<1,2,0,stream>>>(arg1,arg2,arg3);
|
|
|
|
some_function_call<TemplateArg><<<1,2,0,stream>>>(arg1,arg2,arg3);
|
|
C10_CUDA_KERNEL_LAUNCH_CHECK();
|
|
some_function_call<TemplateArg><<<1,2,0,stream>>>(arg1,arg2,arg3);
|
|
some_other_stuff;
|
|
some_function_call<TemplateArg><<<1,2,0,stream>>>(arg1,arg2,arg3);
|
|
C10_CUDA_KERNEL_LAUNCH_CHECK();
|
|
some_function_call<TemplateArg><<<1,2,0,stream>>> (arg1,arg2,arg3);
|
|
C10_CUDA_KERNEL_LAUNCH_CHECK();
|
|
some_function_call<TemplateArg><<<1,2,0,stream>>> ( arg1 , arg2 , arg3 ) ;
|
|
|
|
C10_CUDA_KERNEL_LAUNCH_CHECK();
|
|
"""))
|
|
|
|
# Does it work for macros?
|
|
self.assertEqual(0, check_code_for_cuda_kernel_launches(r"""
|
|
#define SOME_MACRO(x) some_function_call<<<1,2>>> ( x ) ; \
|
|
C10_CUDA_KERNEL_LAUNCH_CHECK();
|
|
|
|
#define SMALL_INDEX(TENSOR_TYPE, INDICES_TYPE, TYPE, SELF_DIM, SOURCE_DIM, IDX_DIM) \
|
|
indexAddSmallIndex<TENSOR_TYPE, INDICES_TYPE, TYPE, SELF_DIM, SOURCE_DIM, IDX_DIM> \
|
|
<<<smallIndexGrid, smallIndexBlock, 0, stream>>>( \
|
|
selfInfo, sourceInfo, indexInfo, \
|
|
selfAddDim, sourceAddDim, sliceSize, selfAddDimSize); \
|
|
C10_CUDA_KERNEL_LAUNCH_CHECK();
|
|
"""))
|
|
|
|
def test_check_cuda_launches(self):
|
|
check_cuda_kernel_launches()
|
|
# TODO: Enable this after warning messages have been dealt with.
|
|
self.assertTrue(True)
|
|
# self.assertTrue(check_cuda_kernel_launches() == 0)
|
|
|
|
|
|
if __name__ == '__main__':
|
|
run_tests()
|