Paren-matching kernel launch check without external deps (#60778)

Summary: Pull Request resolved: https://github.com/pytorch/pytorch/pull/60778 Matches parens and the opening `<<<` to make a more accurate kernel launch check. Test Plan: ``` buck test //caffe2/test:kernel_launch_checks ``` Reviewed By: ngimel Differential Revision: D29401624 fbshipit-source-id: 8649af7c33e67dbb24044af0134b1cea6f2e5dc3
2025-10-20 21:14:14 +08:00 · 2021-06-28 10:16:55 -07:00
parent 88b0518a83
commit 94cdbbf48d
2 changed files with 89 additions and 35 deletions
--- a/test/test_kernel_launch_checks.py
+++ b/test/test_kernel_launch_checks.py
@ -1,5 +1,5 @@
 from torch.testing._internal.common_utils import TestCase, run_tests
-from torch.testing import check_cuda_kernel_launches, check_code_for_cuda_kernel_launches
+from torch.testing._check_kernel_launches import check_cuda_kernel_launches, check_code_for_cuda_kernel_launches


 class AlwaysCheckCudaLaunchTest(TestCase):
@ -38,6 +38,36 @@ some_function_call<TemplateArg><<<1,2,0,stream>>> ( arg1 , arg2 , arg3 ) ;
  C10_CUDA_KERNEL_LAUNCH_CHECK();
        """))

+        # Does it work for lambdas?
+        self.assertEqual(1, check_code_for_cuda_kernel_launches(r"""
+            rrelu_with_noise_cuda_kernel<scalar_t, 2><<<grid, block, 0, stream>>>(
+                    numel,
+                    rng_engine_inputs,
+                    output_data,
+                    input_data,
+                    noise_data,
+                    lower,
+                    upper,
+                    [] __device__ (curandStatePhilox4_32_10_t* state) {
+                    return curand_uniform2_double(state);
+                    });
+                    C10_CUDA_KERNEL_LAUNCH_CHECK();
+
+            rrelu_with_noise_cuda_kernel<scalar_t, 2><<<grid, block, 0, stream>>>(
+                    numel,
+                    rng_engine_inputs,
+                    output_data,
+                    input_data,
+                    noise_data,
+                    lower,
+                    upper,
+                    [] __device__ (curandStatePhilox4_32_10_t* state) {
+                    return curand_uniform2_double(state);
+                    });
+                    uh oh;
+                    C10_CUDA_KERNEL_LAUNCH_CHECK();
+        """))
+
    def test_check_cuda_launches(self):
        unsafeLaunchesCount = check_cuda_kernel_launches()
        self.assertTrue(unsafeLaunchesCount == 0)
--- a/torch/testing/_check_kernel_launches.py
+++ b/torch/testing/_check_kernel_launches.py
@ -1,44 +1,63 @@
 import os
 import re
 import sys
+from typing import List

 __all__ = [
    "check_code_for_cuda_kernel_launches",
    "check_cuda_kernel_launches",
 ]

-# Files to exclude (match is done with suffix)
-exclude_files = [
-    "aten/src/ATen/native/cuda/Activation.cu"
-]
+# FILES TO EXCLUDE (match is done with suffix using `endswith`)
+# You wouldn't drive without a seatbelt, though, so why would you
+# launch a kernel without some safety? Use this as a quick workaround
+# for a problem with the checker, fix the checker, then de-exclude
+# the files in question.
+exclude_files: List[str] = []

-# Regular expression identifies a kernel launch indicator by
-# finding something approximating the pattern ">>>(arguments);"
-# It then requires that `C10_CUDA_KERNEL_LAUNCH_CHECK` be
-# the next command.
-# It allows a single backslash `\` between the end of the launch
-# command and the beginning of the kernel check. This handles
-# cases where the kernel launch is in a multiline preprocessor
-# definition.
+# Without using a C++ AST we can't 100% detect kernel launches, so we
+# model them as having the pattern "<<<parameters>>>(arguments);"
+# We then require that `C10_CUDA_KERNEL_LAUNCH_CHECK` be
+# the next statement.
 #
-# There are various ways this can fail:
-# * If the semicolon is in a string for some reason
-# * If there's a triply-nested template
-# But this should be sufficient to detect and fix most problem
-# instances and can be refined before the test is made binding
-kernel_launch_regex = re.compile(r"""
-    ^.*>>>        # Identifies kernel launch
-    \s*           # Maybe some whitespace (includes newlines)
-    \([^;]+\);    # And then arguments in parens and semi-colon
-    (?!           # Negative lookahead: we trigger if we don't find the launch guard
-        \s*                                  # Maybe some whitespace (includes newlines)
-        \\?                                  # 0 or 1 backslashes (for launches in preprocessor macros)
-        \s*                                  # Maybe some whitespace (includes newlines)
-        (?:[0-9]+: )?                        # Detects and ignores a line numbering, if present
-        \s*                                  # Maybe some whitespace (includes newlines)
-        C10_CUDA_KERNEL_LAUNCH_CHECK\(\);    # Kernel launch guard!
-    )             # End negative lookahead
-""", flags=re.MULTILINE | re.VERBOSE)
+# We model the next statement as ending at the next `}` or `;`.
+# If we see `}` then a clause ended (bad) if we see a semi-colon then
+# we expect the launch check just before it.
+#
+# Since the kernel launch can include lambda statements, it's important
+# to find the correct end-paren of the kernel launch. Doing this with
+# pure regex requires recursive regex, which aren't part of the Python
+# standard library. To avoid an additional dependency, we build a prefix
+# regex that finds the start of a kernel launch, use a paren-matching
+# algorithm to find the end of the launch, and then another regex to
+# determine if a launch check is present.
+
+# Finds potential starts of kernel launches
+kernel_launch_start = re.compile(
+    r"^.*<<<[^>]+>>>\s*\(", flags=re.MULTILINE
+)
+
+# This pattern should start at the character after the final paren of the
+# kernel launch. It returns a match if the launch check is not the next statement
+has_check = re.compile(
+    r"\s*;(?![^;}]*C10_CUDA_KERNEL_LAUNCH_CHECK\(\);)", flags=re.MULTILINE
+)
+
+def find_matching_paren(s: str, startpos: int) -> int:
+    """Given a string "prefix (unknown number of characters) suffix"
+    and the position of the first `(` returns the index of the character
+    1 past the `)`, accounting for paren nesting
+    """
+    opening = 0
+    for i, c in enumerate(s[startpos:]):
+        if c == '(':
+            opening += 1
+        elif c == ')':
+            opening -= 1
+            if opening == 0:
+                return startpos + i + 1
+
+    raise IndexError("Closing parens not found!")


 def should_exclude_file(filename) -> bool:
@ -68,10 +87,15 @@ def check_code_for_cuda_kernel_launches(code, filename=None):
    code = [f"{lineno}: {linecode}" for lineno, linecode in code]  # Number the lines
    code = '\n'.join(code)                                         # Put it back together

-    results = kernel_launch_regex.findall(code)               # Search for bad launches
-    for r in results:
-        print(f"Missing C10_CUDA_KERNEL_LAUNCH_CHECK in '{filename}'. Context:\n{r}", file=sys.stderr)
-    return len(results)
+    num_launches_without_checks = 0
+    for m in kernel_launch_start.finditer(code):
+        end_paren = find_matching_paren(code, m.end() - 1)
+        if has_check.match(code, end_paren):
+            num_launches_without_checks += 1
+            context = code[m.start():end_paren + 1]
+            print(f"Missing C10_CUDA_KERNEL_LAUNCH_CHECK in '{filename}'. Context:\n{context}", file=sys.stderr)
+
+    return num_launches_without_checks


 def check_file(filename):