Move code out of individual token linters (#152256)

Pull Request resolved: https://github.com/pytorch/pytorch/pull/152256 Approved by: https://github.com/Skylion007
2025-10-20 21:14:14 +08:00 · 2025-06-23 14:05:30 +00:00
parent da910e603a
commit 98a34e8d4b
13 changed files with 1017 additions and 901 deletions
--- a/tools/linter/adapters/docstring_linter.py
+++ b/tools/linter/adapters/docstring_linter.py
@ -1,15 +1,11 @@
 from __future__ import annotations

-import dataclasses as dc
 import itertools
 import json
 import sys
-import token
-from enum import Enum
-from functools import cached_property, total_ordering
+from functools import cached_property
 from pathlib import Path
-from typing import Any, Callable, Optional, TYPE_CHECKING
-from typing_extensions import Self
+from typing import Any, Callable, TYPE_CHECKING


 _FILE = Path(__file__).absolute()
@ -22,10 +18,9 @@ else:

 if TYPE_CHECKING:
    from collections.abc import Iterator, Sequence
-    from tokenize import TokenInfo


-GRANDFATHER_LIST = Path(str(_FILE).replace(".py", "-grandfather.json"))
+GRANDFATHER_LIST = _FILE.parent / "docstring_linter-grandfather.json"

 # We tolerate a 10% increase in block size before demanding a docstring
 TOLERANCE_PERCENT = 10
@ -45,274 +40,7 @@ METHOD_OVERRIDE_HINT = (
 )


-@total_ordering
-@dc.dataclass
-class Block:
-    """A block of Python code starting with either `def` or `class`"""
-
-    class Category(str, Enum):
-        CLASS = "class"
-        DEF = "def"
-
-    category: Category
-
-    # The sequence of tokens that contains this Block.
-    # Tokens are represented in `Block` as indexes into `self.tokens`
-    tokens: Sequence[TokenInfo] = dc.field(repr=False)
-
-    # The name of the function or class being defined
-    name: str
-
-    # The index of the very first token in the block (the "class" or "def" keyword)
-    begin: int
-
-    # The index of the first INDENT token for this block
-    indent: int
-
-    # The index of the DEDENT token for this end of this block
-    dedent: int
-
-    # The docstring for the block
-    docstring: str
-
-    # These next members only get filled in after all blocks have been constructed
-    # and figure out family ties
-
-    # The full qualified name of the block within the file.
-    # This is the name of this block and all its parents, joined with `.`.
-    full_name: str = ""
-
-    # The index of this block within the full list of blocks in the file
-    index: int = 0
-
-    # Is this block contained within a function definition?
-    is_local: bool = dc.field(default=False, repr=False)
-
-    # Is this block a function definition in a class definition?
-    is_method: bool = dc.field(default=False, repr=False)
-
-    # A block index to the parent of this block, or None for a top-level block.
-    parent: Optional[int] = None
-
-    # A list of block indexes for the children
-    children: list[int] = dc.field(default_factory=list)
-
-    @property
-    def start_line(self) -> int:
-        return self.tokens[max(self.indent, self.index)].start[0]
-
-    @property
-    def end_line(self) -> int:
-        return self.tokens[max(self.dedent, self.index)].start[0]
-
-    @property
-    def line_count(self) -> int:
-        return self.end_line - self.start_line
-
-    @property
-    def is_class(self) -> bool:
-        return self.category == Block.Category.CLASS
-
-    @property
-    def display_name(self) -> str:
-        """A user-friendly name like 'class One' or 'def One.method()'"""
-        ending = "" if self.is_class else "()"
-        return f"{self.category.value} {self.full_name}{ending}"
-
-    @cached_property
-    def decorators(self) -> list[str]:
-        """A list of decorators for this function or method.
-
-        Each decorator both the @ symbol and any arguments to the decorator
-        but no extra whitespace.
-        """
-        return _get_decorators(self.tokens, self.begin)
-
-    @cached_property
-    def is_override(self) -> bool:
-        return not self.is_class and any(
-            d.rpartition(".")[2] == "override" for d in self.decorators
-        )
-
-    DATA_FIELDS = (
-        "category",
-        "children",
-        "decorators",
-        "display_name",
-        "docstring",
-        "full_name",
-        "index",
-        "is_local",
-        "is_method",
-        "line_count",
-        "parent",
-        "start_line",
-    )
-
-    def as_data(self) -> dict[str, Any]:
-        d = {i: getattr(self, i) for i in self.DATA_FIELDS}
-        d["category"] = d["category"].value
-        return d
-
-    @property
-    def is_init(self) -> bool:
-        return not self.is_class and self.name == "__init__"
-
-    def contains(self, b: Block) -> bool:
-        return self.start_line < b.start_line and self.end_line >= b.end_line
-
-    def __eq__(self, o: object) -> bool:
-        assert isinstance(o, Block)
-        return o.tokens is self.tokens and o.index == self.index
-
-    def __hash__(self) -> int:
-        return super().__hash__()
-
-    def __lt__(self, o: Self) -> bool:
-        assert isinstance(o, Block) and o.tokens is self.tokens
-        return o.index < self.index
-
-
-_IGNORE = {token.COMMENT, token.DEDENT, token.INDENT, token.NL}
-
-
-def _get_decorators(tokens: Sequence[TokenInfo], block_start: int) -> list[str]:
-    def decorators() -> Iterator[str]:
-        rev = reversed(range(block_start))
-        newlines = (i for i in rev if tokens[i].type == token.NEWLINE)
-        newlines = itertools.chain(newlines, [-1])  # To account for the first line
-
-        it = iter(newlines)
-        end = next(it, -1)  # Like itertools.pairwise in Python 3.10
-        for begin in it:
-            for i in range(begin + 1, end):
-                t = tokens[i]
-                if t.type == token.OP and t.string == "@":
-                    useful = (t for t in tokens[i:end] if t.type not in _IGNORE)
-                    yield "".join(s.string.strip("\n") for s in useful)
-                    break
-                elif t.type not in _IGNORE:
-                    return  # A statement means no more decorators
-            end = begin
-
-    out = list(decorators())
-    out.reverse()
-    return out
-
-
-class DocstringFile(_linter.PythonFile):
-    def __getitem__(self, i: int | slice) -> TokenInfo | Sequence[TokenInfo]:
-        return self.tokens[i]
-
-    def next_token(self, start: int, token_type: int, error: str) -> int:
-        for i in range(start, len(self.tokens)):
-            if self.tokens[i].type == token_type:
-                return i
-        raise _linter.ParseError(self.tokens[-1], error)
-
-    def docstring(self, start: int) -> str:
-        for i in range(start + 1, len(self.tokens)):
-            tk = self.tokens[i]
-            if tk.type == token.STRING:
-                return tk.string
-            if tk.type not in _linter.EMPTY_TOKENS:
-                return ""
-        return ""
-
-    @cached_property
-    def indent_to_dedent(self) -> dict[int, int]:
-        dedents = dict[int, int]()
-        stack = list[int]()
-
-        for i, t in enumerate(self.tokens):
-            if t.type == token.INDENT:
-                stack.append(i)
-            elif t.type == token.DEDENT:
-                dedents[stack.pop()] = i
-
-        return dedents
-
-    @cached_property
-    def errors(self) -> dict[str, str]:
-        return {}
-
-    @cached_property
-    def blocks(self) -> list[Block]:
-        blocks: list[Block] = []
-
-        for i in range(len(self.tokens)):
-            try:
-                if (b := self.block(i)) is not None:
-                    blocks.append(b)
-            except _linter.ParseError as e:
-                self.errors[e.token.line] = " ".join(e.args)
-
-        for i, parent in enumerate(blocks):
-            for j in range(i + 1, len(blocks)):
-                if parent.contains(child := blocks[j]):
-                    child.parent = i
-                    parent.children.append(j)
-                else:
-                    break
-
-        for i, b in enumerate(blocks):
-            b.index = i
-
-            parents = [b]
-            while (p := parents[-1].parent) is not None:
-                parents.append(blocks[p])
-            parents = parents[1:]
-
-            b.is_local = not all(p.is_class for p in parents)
-            b.is_method = not b.is_class and bool(parents) and parents[0].is_class
-
-        def add_full_names(children: Sequence[Block], prefix: str = "") -> None:
-            dupes: dict[str, list[Block]] = {}
-            for b in children:
-                dupes.setdefault(b.name, []).append(b)
-
-            for dl in dupes.values():
-                for i, b in enumerate(dl):
-                    suffix = f"[{i + 1}]" if len(dl) > 1 else ""
-                    b.full_name = prefix + b.name + suffix
-
-            for b in children:
-                if kids := [blocks[i] for i in b.children]:
-                    add_full_names(kids, b.full_name + ".")
-
-        add_full_names([b for b in blocks if b.parent is None])
-        return blocks
-
-    def block(self, begin: int) -> Block | None:
-        t = self.tokens[begin]
-        if not (t.type == token.NAME and t.string in ("class", "def")):
-            return None
-
-        category = Block.Category[t.string.upper()]
-        try:
-            ni = self.next_token(begin + 1, token.NAME, "Definition but no name")
-            name = self.tokens[ni].string
-            indent = self.next_token(ni + 1, token.INDENT, "Definition but no indent")
-            dedent = self.indent_to_dedent[indent]
-            docstring = self.docstring(indent)
-        except _linter.ParseError:
-            name = "(ParseError)"
-            indent = -1
-            dedent = -1
-            docstring = ""
-
-        return Block(
-            begin=begin,
-            category=category,
-            dedent=dedent,
-            docstring=docstring,
-            indent=indent,
-            name=name,
-            tokens=self.tokens,
-        )
-
-
-class DocstringLinter(_linter.FileLinter[DocstringFile]):
+class DocstringLinter(_linter.FileLinter):
    linter_name = "docstring_linter"
    description = DESCRIPTION
    is_fixer = False
@ -332,26 +60,26 @@ class DocstringLinter(_linter.FileLinter[DocstringFile]):
        self._write_grandfather()
        return success

-    def _lint(self, df: DocstringFile) -> Iterator[_linter.LintResult]:
-        if (p := str(df.path)) in self.path_to_blocks:
+    def _lint(self, pf: _linter.PythonFile) -> Iterator[_linter.LintResult]:
+        if (p := str(pf.path)) in self.path_to_blocks:
            print("Repeated file", p, file=sys.stderr)
            return

-        blocks = df.blocks
-        bad = {b for b in blocks if self._is_bad_block(b, df)}
+        blocks = pf.blocks
+        bad = {b for b in blocks if self._is_bad_block(b, pf)}
        bad = self._dont_require_constructor_and_class_docs(blocks, bad)
-        gf = self._grandfathered(df.path, bad)
+        gf = self._grandfathered(pf.path, bad)

-        yield from (self._block_result(b, df) for b in sorted(bad - gf))
+        yield from (self._block_result(b, pf) for b in sorted(bad - gf))

-        def as_data(b: Block) -> dict[str, Any]:
+        def as_data(b: _linter.Block) -> dict[str, Any]:
            status = "grandfather" if b in gf else "bad" if b in bad else "good"
            return {"status": status, **b.as_data()}

        self.path_to_blocks[p] = [as_data(b) for b in blocks]

-    def _error(self, df: DocstringFile, result: _linter.LintResult) -> None:
-        self.path_to_errors[str(df.path)] = [{str(result.line): result.name}]
+    def _error(self, pf: _linter.PythonFile, result: _linter.LintResult) -> None:
+        self.path_to_errors[str(pf.path)] = [{str(result.line): result.name}]

    @cached_property
    def _grandfather(self) -> dict[str, dict[str, Any]]:
@ -368,20 +96,24 @@ class DocstringLinter(_linter.FileLinter[DocstringFile]):
    def _max_lines(self) -> dict[str, int]:
        return {"class": self.args.max_class, "def": self.args.max_def}

-    def _grandfathered(self, path: Path | None, bad: set[Block]) -> set[Block]:
+    def _grandfathered(
+        self, path: Path | None, bad: set[_linter.Block]
+    ) -> set[_linter.Block]:
        if path is None or self.args.no_grandfather or self.args.write_grandfather:
            return set()

        grand: dict[str, int] = self._grandfather.get(str(path), {})
        tolerance_ratio = 1 + self.args.grandfather_tolerance / 100.0

-        def grandfathered(b: Block) -> bool:
+        def grandfathered(b: _linter.Block) -> bool:
            lines = int(grand.get(b.display_name, 0) * tolerance_ratio)
            return b.line_count <= lines

        return {b for b in bad if grandfathered(b)}

-    def _block_result(self, b: Block, df: DocstringFile) -> _linter.LintResult:
+    def _block_result(
+        self, b: _linter.Block, pf: _linter.PythonFile
+    ) -> _linter.LintResult:
        def_name = "function" if b.category == "def" else "class"
        msg = f"docstring found for {def_name} '{b.name}' ({b.line_count} lines)"
        if len(b.docstring):
@ -392,23 +124,23 @@ class DocstringLinter(_linter.FileLinter[DocstringFile]):
            msg = f"No {msg}"
            if b.is_method:
                msg = f"{msg}. {METHOD_OVERRIDE_HINT}"
-        return _linter.LintResult(msg, *df.tokens[b.begin].start)
+        return _linter.LintResult(msg, *pf.tokens[b.begin].start)

    def _display(
-        self, df: DocstringFile, results: list[_linter.LintResult]
+        self, pf: _linter.PythonFile, results: list[_linter.LintResult]
    ) -> Iterator[str]:
        if not self.args.report:
-            yield from super()._display(df, results)
+            yield from super()._display(pf, results)

    def _dont_require_constructor_and_class_docs(
-        self, blocks: Sequence[Block], bad: set[Block]
-    ) -> set[Block]:
+        self, blocks: Sequence[_linter.Block], bad: set[_linter.Block]
+    ) -> set[_linter.Block]:
        if self.args.lint_init:
            return bad

        good = {b for b in blocks if len(b.docstring) >= self.args.min_docstring}

-        def has_class_init_doc(b: Block) -> bool:
+        def has_class_init_doc(b: _linter.Block) -> bool:
            if b.is_class:
                # Is it a class whose constructor is documented?
                children = (blocks[i] for i in b.children)
@ -419,10 +151,10 @@ class DocstringLinter(_linter.FileLinter[DocstringFile]):

        return {b for b in bad if not has_class_init_doc(b)}

-    def _is_bad_block(self, b: Block, df: DocstringFile) -> bool:
+    def _is_bad_block(self, b: _linter.Block, pf: _linter.PythonFile) -> bool:
        max_lines = self._max_lines[b.category]
        return (
-            not df.omitted(df.tokens, b.begin, b.dedent)
+            not pf.omitted(pf.tokens, b.begin, b.dedent)
            and b.line_count > max_lines
            and len(b.docstring) < self.args.min_docstring
            and (self.args.lint_local or not b.is_local)