mirror of
https://github.com/pytorch/pytorch.git
synced 2025-10-20 21:14:14 +08:00
Move code out of individual token linters (#152256)
Pull Request resolved: https://github.com/pytorch/pytorch/pull/152256 Approved by: https://github.com/Skylion007
This commit is contained in:
committed by
PyTorch MergeBot
parent
da910e603a
commit
98a34e8d4b
@ -1,15 +1,11 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import dataclasses as dc
|
||||
import itertools
|
||||
import json
|
||||
import sys
|
||||
import token
|
||||
from enum import Enum
|
||||
from functools import cached_property, total_ordering
|
||||
from functools import cached_property
|
||||
from pathlib import Path
|
||||
from typing import Any, Callable, Optional, TYPE_CHECKING
|
||||
from typing_extensions import Self
|
||||
from typing import Any, Callable, TYPE_CHECKING
|
||||
|
||||
|
||||
_FILE = Path(__file__).absolute()
|
||||
@ -22,10 +18,9 @@ else:
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from collections.abc import Iterator, Sequence
|
||||
from tokenize import TokenInfo
|
||||
|
||||
|
||||
GRANDFATHER_LIST = Path(str(_FILE).replace(".py", "-grandfather.json"))
|
||||
GRANDFATHER_LIST = _FILE.parent / "docstring_linter-grandfather.json"
|
||||
|
||||
# We tolerate a 10% increase in block size before demanding a docstring
|
||||
TOLERANCE_PERCENT = 10
|
||||
@ -45,274 +40,7 @@ METHOD_OVERRIDE_HINT = (
|
||||
)
|
||||
|
||||
|
||||
@total_ordering
|
||||
@dc.dataclass
|
||||
class Block:
|
||||
"""A block of Python code starting with either `def` or `class`"""
|
||||
|
||||
class Category(str, Enum):
|
||||
CLASS = "class"
|
||||
DEF = "def"
|
||||
|
||||
category: Category
|
||||
|
||||
# The sequence of tokens that contains this Block.
|
||||
# Tokens are represented in `Block` as indexes into `self.tokens`
|
||||
tokens: Sequence[TokenInfo] = dc.field(repr=False)
|
||||
|
||||
# The name of the function or class being defined
|
||||
name: str
|
||||
|
||||
# The index of the very first token in the block (the "class" or "def" keyword)
|
||||
begin: int
|
||||
|
||||
# The index of the first INDENT token for this block
|
||||
indent: int
|
||||
|
||||
# The index of the DEDENT token for this end of this block
|
||||
dedent: int
|
||||
|
||||
# The docstring for the block
|
||||
docstring: str
|
||||
|
||||
# These next members only get filled in after all blocks have been constructed
|
||||
# and figure out family ties
|
||||
|
||||
# The full qualified name of the block within the file.
|
||||
# This is the name of this block and all its parents, joined with `.`.
|
||||
full_name: str = ""
|
||||
|
||||
# The index of this block within the full list of blocks in the file
|
||||
index: int = 0
|
||||
|
||||
# Is this block contained within a function definition?
|
||||
is_local: bool = dc.field(default=False, repr=False)
|
||||
|
||||
# Is this block a function definition in a class definition?
|
||||
is_method: bool = dc.field(default=False, repr=False)
|
||||
|
||||
# A block index to the parent of this block, or None for a top-level block.
|
||||
parent: Optional[int] = None
|
||||
|
||||
# A list of block indexes for the children
|
||||
children: list[int] = dc.field(default_factory=list)
|
||||
|
||||
@property
|
||||
def start_line(self) -> int:
|
||||
return self.tokens[max(self.indent, self.index)].start[0]
|
||||
|
||||
@property
|
||||
def end_line(self) -> int:
|
||||
return self.tokens[max(self.dedent, self.index)].start[0]
|
||||
|
||||
@property
|
||||
def line_count(self) -> int:
|
||||
return self.end_line - self.start_line
|
||||
|
||||
@property
|
||||
def is_class(self) -> bool:
|
||||
return self.category == Block.Category.CLASS
|
||||
|
||||
@property
|
||||
def display_name(self) -> str:
|
||||
"""A user-friendly name like 'class One' or 'def One.method()'"""
|
||||
ending = "" if self.is_class else "()"
|
||||
return f"{self.category.value} {self.full_name}{ending}"
|
||||
|
||||
@cached_property
|
||||
def decorators(self) -> list[str]:
|
||||
"""A list of decorators for this function or method.
|
||||
|
||||
Each decorator both the @ symbol and any arguments to the decorator
|
||||
but no extra whitespace.
|
||||
"""
|
||||
return _get_decorators(self.tokens, self.begin)
|
||||
|
||||
@cached_property
|
||||
def is_override(self) -> bool:
|
||||
return not self.is_class and any(
|
||||
d.rpartition(".")[2] == "override" for d in self.decorators
|
||||
)
|
||||
|
||||
DATA_FIELDS = (
|
||||
"category",
|
||||
"children",
|
||||
"decorators",
|
||||
"display_name",
|
||||
"docstring",
|
||||
"full_name",
|
||||
"index",
|
||||
"is_local",
|
||||
"is_method",
|
||||
"line_count",
|
||||
"parent",
|
||||
"start_line",
|
||||
)
|
||||
|
||||
def as_data(self) -> dict[str, Any]:
|
||||
d = {i: getattr(self, i) for i in self.DATA_FIELDS}
|
||||
d["category"] = d["category"].value
|
||||
return d
|
||||
|
||||
@property
|
||||
def is_init(self) -> bool:
|
||||
return not self.is_class and self.name == "__init__"
|
||||
|
||||
def contains(self, b: Block) -> bool:
|
||||
return self.start_line < b.start_line and self.end_line >= b.end_line
|
||||
|
||||
def __eq__(self, o: object) -> bool:
|
||||
assert isinstance(o, Block)
|
||||
return o.tokens is self.tokens and o.index == self.index
|
||||
|
||||
def __hash__(self) -> int:
|
||||
return super().__hash__()
|
||||
|
||||
def __lt__(self, o: Self) -> bool:
|
||||
assert isinstance(o, Block) and o.tokens is self.tokens
|
||||
return o.index < self.index
|
||||
|
||||
|
||||
_IGNORE = {token.COMMENT, token.DEDENT, token.INDENT, token.NL}
|
||||
|
||||
|
||||
def _get_decorators(tokens: Sequence[TokenInfo], block_start: int) -> list[str]:
|
||||
def decorators() -> Iterator[str]:
|
||||
rev = reversed(range(block_start))
|
||||
newlines = (i for i in rev if tokens[i].type == token.NEWLINE)
|
||||
newlines = itertools.chain(newlines, [-1]) # To account for the first line
|
||||
|
||||
it = iter(newlines)
|
||||
end = next(it, -1) # Like itertools.pairwise in Python 3.10
|
||||
for begin in it:
|
||||
for i in range(begin + 1, end):
|
||||
t = tokens[i]
|
||||
if t.type == token.OP and t.string == "@":
|
||||
useful = (t for t in tokens[i:end] if t.type not in _IGNORE)
|
||||
yield "".join(s.string.strip("\n") for s in useful)
|
||||
break
|
||||
elif t.type not in _IGNORE:
|
||||
return # A statement means no more decorators
|
||||
end = begin
|
||||
|
||||
out = list(decorators())
|
||||
out.reverse()
|
||||
return out
|
||||
|
||||
|
||||
class DocstringFile(_linter.PythonFile):
|
||||
def __getitem__(self, i: int | slice) -> TokenInfo | Sequence[TokenInfo]:
|
||||
return self.tokens[i]
|
||||
|
||||
def next_token(self, start: int, token_type: int, error: str) -> int:
|
||||
for i in range(start, len(self.tokens)):
|
||||
if self.tokens[i].type == token_type:
|
||||
return i
|
||||
raise _linter.ParseError(self.tokens[-1], error)
|
||||
|
||||
def docstring(self, start: int) -> str:
|
||||
for i in range(start + 1, len(self.tokens)):
|
||||
tk = self.tokens[i]
|
||||
if tk.type == token.STRING:
|
||||
return tk.string
|
||||
if tk.type not in _linter.EMPTY_TOKENS:
|
||||
return ""
|
||||
return ""
|
||||
|
||||
@cached_property
|
||||
def indent_to_dedent(self) -> dict[int, int]:
|
||||
dedents = dict[int, int]()
|
||||
stack = list[int]()
|
||||
|
||||
for i, t in enumerate(self.tokens):
|
||||
if t.type == token.INDENT:
|
||||
stack.append(i)
|
||||
elif t.type == token.DEDENT:
|
||||
dedents[stack.pop()] = i
|
||||
|
||||
return dedents
|
||||
|
||||
@cached_property
|
||||
def errors(self) -> dict[str, str]:
|
||||
return {}
|
||||
|
||||
@cached_property
|
||||
def blocks(self) -> list[Block]:
|
||||
blocks: list[Block] = []
|
||||
|
||||
for i in range(len(self.tokens)):
|
||||
try:
|
||||
if (b := self.block(i)) is not None:
|
||||
blocks.append(b)
|
||||
except _linter.ParseError as e:
|
||||
self.errors[e.token.line] = " ".join(e.args)
|
||||
|
||||
for i, parent in enumerate(blocks):
|
||||
for j in range(i + 1, len(blocks)):
|
||||
if parent.contains(child := blocks[j]):
|
||||
child.parent = i
|
||||
parent.children.append(j)
|
||||
else:
|
||||
break
|
||||
|
||||
for i, b in enumerate(blocks):
|
||||
b.index = i
|
||||
|
||||
parents = [b]
|
||||
while (p := parents[-1].parent) is not None:
|
||||
parents.append(blocks[p])
|
||||
parents = parents[1:]
|
||||
|
||||
b.is_local = not all(p.is_class for p in parents)
|
||||
b.is_method = not b.is_class and bool(parents) and parents[0].is_class
|
||||
|
||||
def add_full_names(children: Sequence[Block], prefix: str = "") -> None:
|
||||
dupes: dict[str, list[Block]] = {}
|
||||
for b in children:
|
||||
dupes.setdefault(b.name, []).append(b)
|
||||
|
||||
for dl in dupes.values():
|
||||
for i, b in enumerate(dl):
|
||||
suffix = f"[{i + 1}]" if len(dl) > 1 else ""
|
||||
b.full_name = prefix + b.name + suffix
|
||||
|
||||
for b in children:
|
||||
if kids := [blocks[i] for i in b.children]:
|
||||
add_full_names(kids, b.full_name + ".")
|
||||
|
||||
add_full_names([b for b in blocks if b.parent is None])
|
||||
return blocks
|
||||
|
||||
def block(self, begin: int) -> Block | None:
|
||||
t = self.tokens[begin]
|
||||
if not (t.type == token.NAME and t.string in ("class", "def")):
|
||||
return None
|
||||
|
||||
category = Block.Category[t.string.upper()]
|
||||
try:
|
||||
ni = self.next_token(begin + 1, token.NAME, "Definition but no name")
|
||||
name = self.tokens[ni].string
|
||||
indent = self.next_token(ni + 1, token.INDENT, "Definition but no indent")
|
||||
dedent = self.indent_to_dedent[indent]
|
||||
docstring = self.docstring(indent)
|
||||
except _linter.ParseError:
|
||||
name = "(ParseError)"
|
||||
indent = -1
|
||||
dedent = -1
|
||||
docstring = ""
|
||||
|
||||
return Block(
|
||||
begin=begin,
|
||||
category=category,
|
||||
dedent=dedent,
|
||||
docstring=docstring,
|
||||
indent=indent,
|
||||
name=name,
|
||||
tokens=self.tokens,
|
||||
)
|
||||
|
||||
|
||||
class DocstringLinter(_linter.FileLinter[DocstringFile]):
|
||||
class DocstringLinter(_linter.FileLinter):
|
||||
linter_name = "docstring_linter"
|
||||
description = DESCRIPTION
|
||||
is_fixer = False
|
||||
@ -332,26 +60,26 @@ class DocstringLinter(_linter.FileLinter[DocstringFile]):
|
||||
self._write_grandfather()
|
||||
return success
|
||||
|
||||
def _lint(self, df: DocstringFile) -> Iterator[_linter.LintResult]:
|
||||
if (p := str(df.path)) in self.path_to_blocks:
|
||||
def _lint(self, pf: _linter.PythonFile) -> Iterator[_linter.LintResult]:
|
||||
if (p := str(pf.path)) in self.path_to_blocks:
|
||||
print("Repeated file", p, file=sys.stderr)
|
||||
return
|
||||
|
||||
blocks = df.blocks
|
||||
bad = {b for b in blocks if self._is_bad_block(b, df)}
|
||||
blocks = pf.blocks
|
||||
bad = {b for b in blocks if self._is_bad_block(b, pf)}
|
||||
bad = self._dont_require_constructor_and_class_docs(blocks, bad)
|
||||
gf = self._grandfathered(df.path, bad)
|
||||
gf = self._grandfathered(pf.path, bad)
|
||||
|
||||
yield from (self._block_result(b, df) for b in sorted(bad - gf))
|
||||
yield from (self._block_result(b, pf) for b in sorted(bad - gf))
|
||||
|
||||
def as_data(b: Block) -> dict[str, Any]:
|
||||
def as_data(b: _linter.Block) -> dict[str, Any]:
|
||||
status = "grandfather" if b in gf else "bad" if b in bad else "good"
|
||||
return {"status": status, **b.as_data()}
|
||||
|
||||
self.path_to_blocks[p] = [as_data(b) for b in blocks]
|
||||
|
||||
def _error(self, df: DocstringFile, result: _linter.LintResult) -> None:
|
||||
self.path_to_errors[str(df.path)] = [{str(result.line): result.name}]
|
||||
def _error(self, pf: _linter.PythonFile, result: _linter.LintResult) -> None:
|
||||
self.path_to_errors[str(pf.path)] = [{str(result.line): result.name}]
|
||||
|
||||
@cached_property
|
||||
def _grandfather(self) -> dict[str, dict[str, Any]]:
|
||||
@ -368,20 +96,24 @@ class DocstringLinter(_linter.FileLinter[DocstringFile]):
|
||||
def _max_lines(self) -> dict[str, int]:
|
||||
return {"class": self.args.max_class, "def": self.args.max_def}
|
||||
|
||||
def _grandfathered(self, path: Path | None, bad: set[Block]) -> set[Block]:
|
||||
def _grandfathered(
|
||||
self, path: Path | None, bad: set[_linter.Block]
|
||||
) -> set[_linter.Block]:
|
||||
if path is None or self.args.no_grandfather or self.args.write_grandfather:
|
||||
return set()
|
||||
|
||||
grand: dict[str, int] = self._grandfather.get(str(path), {})
|
||||
tolerance_ratio = 1 + self.args.grandfather_tolerance / 100.0
|
||||
|
||||
def grandfathered(b: Block) -> bool:
|
||||
def grandfathered(b: _linter.Block) -> bool:
|
||||
lines = int(grand.get(b.display_name, 0) * tolerance_ratio)
|
||||
return b.line_count <= lines
|
||||
|
||||
return {b for b in bad if grandfathered(b)}
|
||||
|
||||
def _block_result(self, b: Block, df: DocstringFile) -> _linter.LintResult:
|
||||
def _block_result(
|
||||
self, b: _linter.Block, pf: _linter.PythonFile
|
||||
) -> _linter.LintResult:
|
||||
def_name = "function" if b.category == "def" else "class"
|
||||
msg = f"docstring found for {def_name} '{b.name}' ({b.line_count} lines)"
|
||||
if len(b.docstring):
|
||||
@ -392,23 +124,23 @@ class DocstringLinter(_linter.FileLinter[DocstringFile]):
|
||||
msg = f"No {msg}"
|
||||
if b.is_method:
|
||||
msg = f"{msg}. {METHOD_OVERRIDE_HINT}"
|
||||
return _linter.LintResult(msg, *df.tokens[b.begin].start)
|
||||
return _linter.LintResult(msg, *pf.tokens[b.begin].start)
|
||||
|
||||
def _display(
|
||||
self, df: DocstringFile, results: list[_linter.LintResult]
|
||||
self, pf: _linter.PythonFile, results: list[_linter.LintResult]
|
||||
) -> Iterator[str]:
|
||||
if not self.args.report:
|
||||
yield from super()._display(df, results)
|
||||
yield from super()._display(pf, results)
|
||||
|
||||
def _dont_require_constructor_and_class_docs(
|
||||
self, blocks: Sequence[Block], bad: set[Block]
|
||||
) -> set[Block]:
|
||||
self, blocks: Sequence[_linter.Block], bad: set[_linter.Block]
|
||||
) -> set[_linter.Block]:
|
||||
if self.args.lint_init:
|
||||
return bad
|
||||
|
||||
good = {b for b in blocks if len(b.docstring) >= self.args.min_docstring}
|
||||
|
||||
def has_class_init_doc(b: Block) -> bool:
|
||||
def has_class_init_doc(b: _linter.Block) -> bool:
|
||||
if b.is_class:
|
||||
# Is it a class whose constructor is documented?
|
||||
children = (blocks[i] for i in b.children)
|
||||
@ -419,10 +151,10 @@ class DocstringLinter(_linter.FileLinter[DocstringFile]):
|
||||
|
||||
return {b for b in bad if not has_class_init_doc(b)}
|
||||
|
||||
def _is_bad_block(self, b: Block, df: DocstringFile) -> bool:
|
||||
def _is_bad_block(self, b: _linter.Block, pf: _linter.PythonFile) -> bool:
|
||||
max_lines = self._max_lines[b.category]
|
||||
return (
|
||||
not df.omitted(df.tokens, b.begin, b.dedent)
|
||||
not pf.omitted(pf.tokens, b.begin, b.dedent)
|
||||
and b.line_count > max_lines
|
||||
and len(b.docstring) < self.args.min_docstring
|
||||
and (self.args.lint_local or not b.is_local)
|
||||
|
||||
Reference in New Issue
Block a user