set_linter finds and replaces built-in set in Python code (#138454)

Pull Request resolved: https://github.com/pytorch/pytorch/pull/138454 Approved by: https://github.com/eellison
2025-10-21 05:34:18 +08:00 · 2024-12-02 18:32:12 +00:00
parent 7666c8263a
commit 6ad422d778
17 changed files with 1427 additions and 0 deletions
--- a/.lintrunner.toml
+++ b/.lintrunner.toml
@ -1687,3 +1687,20 @@ command = [
    '@{{PATHSFILE}}'
 ]
 is_formatter = true
+
+# `set_linter` detects occurrences of built-in `set` in areas of Python code like
+# _inductor where the instability of iteration in `set` has proven a problem.
+
+[[linter]]
+code = 'SET_LINTER'
+command = [
+    'python3',
+    'tools/linter/adapters/set_linter.py',
+    '--lintrunner',
+    '--',
+    '@{{PATHSFILE}}'
+]
+include_patterns = [
+    'torch/**/does-not-exist.py'
+]
+is_formatter = true
--- a/tools/linter/adapters/_linter.py
+++ b/tools/linter/adapters/_linter.py
@ -0,0 +1,464 @@
+from __future__ import annotations
+
+import argparse
+import dataclasses as dc
+import json
+import logging
+import sys
+import token
+from abc import ABC, abstractmethod
+from argparse import Namespace
+from enum import Enum
+from functools import cached_property
+from pathlib import Path
+from tokenize import generate_tokens, TokenInfo
+from typing import Any, Iterator, Sequence
+from typing_extensions import Never
+
+
+EMPTY_TOKENS = {
+    token.COMMENT,
+    token.DEDENT,
+    token.ENCODING,
+    token.INDENT,
+    token.NEWLINE,
+    token.NL,
+}
+BRACKETS = {"{": "}", "(": ")", "[": "]"}
+BRACKETS_INV = {j: i for i, j in BRACKETS.items()}
+
+
+def is_name(t: TokenInfo, *names: str) -> bool:
+    return t.type == token.NAME and not names or t.string in names
+
+
+def is_op(t: TokenInfo, *names: str) -> bool:
+    return t.type == token.OP and not names or t.string in names
+
+
+class LintSeverity(str, Enum):
+    ERROR = "error"
+    WARNING = "warning"
+    ADVICE = "advice"
+    DISABLED = "disabled"
+
+
+@dc.dataclass
+class LintMessage:
+    """This is a datatype representation of the JSON that gets sent to lintrunner
+    as described here:
+    https://docs.rs/lintrunner/latest/lintrunner/lint_message/struct.LintMessage.html
+    """
+
+    code: str
+    name: str
+    severity: LintSeverity
+
+    char: int | None = None
+    description: str | None = None
+    line: int | None = None
+    original: str | None = None
+    path: str | None = None
+    replacement: str | None = None
+
+    asdict = dc.asdict
+
+
+@dc.dataclass
+class LintResult:
+    """LintResult is a single result from a linter.
+
+    Like LintMessage but the .length member allows you to make specific edits to
+    one location within a file, not just replace the whole file.
+
+    Linters can generate recursive results - results that contain other results.
+
+    For example, the annotation linter would find two results in this code sample:
+
+        index = Union[Optional[str], int]
+
+    And the first result, `Union[Optional[str], int]`, contains the second one,
+    `Optional[str]`, so the first result is recursive but the second is not.
+
+    If --fix is selected, the linter does a cycle of tokenizing and fixing all
+    the non-recursive edits until no edits remain.
+    """
+
+    name: str
+
+    line: int | None = None
+    char: int | None = None
+    replacement: str | None = None
+    length: int | None = None  # Not in LintMessage
+    description: str | None = None
+    original: str | None = None
+
+    is_recursive: bool = False  # Not in LintMessage
+
+    @property
+    def is_edit(self) -> bool:
+        return None not in (self.char, self.length, self.line, self.replacement)
+
+    def apply(self, lines: list[str]) -> bool:
+        if self.line is None:
+            return False
+        line = lines[self.line - 1]
+
+        if self.char is None:
+            return False
+        before = line[: self.char]
+
+        if self.length is None:
+            return False
+        after = line[self.char + self.length :]
+
+        lines[self.line - 1] = f"{before}{self.replacement}{after}"
+        return True
+
+    def as_message(self, code: str, path: str) -> LintMessage:
+        d = dc.asdict(self)
+        d.pop("is_recursive")
+        d.pop("length")
+        if self.is_edit:
+            # This is one of our , which we don't want to
+            # send to lintrunner as a replacement
+            d["replacement"] = None
+
+        return LintMessage(code=code, path=path, severity=LintSeverity.ERROR, **d)
+
+    def sort_key(self) -> tuple[int, int, str]:
+        line = -1 if self.line is None else self.line
+        char = -1 if self.char is None else self.char
+        return line, char, self.name
+
+
+class ParseError(ValueError):
+    def __init__(self, token: TokenInfo, *args: str) -> None:
+        super().__init__(*args)
+        self.token = token
+
+    @classmethod
+    def check(cls, cond: Any, token: TokenInfo, *args: str) -> None:
+        if not cond:
+            raise cls(token, *args)
+
+
+class ArgumentParser(argparse.ArgumentParser):
+    """
+    Adds better help formatting and default arguments to argparse.ArgumentParser
+    """
+
+    def __init__(
+        self,
+        prog: str | None = None,
+        usage: str | None = None,
+        description: str | None = None,
+        epilog: str | None = None,
+        is_fixer: bool = False,
+        **kwargs: Any,
+    ) -> None:
+        super().__init__(prog, usage, description, None, **kwargs)
+        self._epilog = epilog
+
+        help = "A list of files or directories to lint"
+        self.add_argument("files", nargs="*", help=help)
+        # TODO(rec): get fromfile_prefix_chars="@", type=argparse.FileType to work
+
+        help = "Fix lint errors if possible" if is_fixer else argparse.SUPPRESS
+        self.add_argument("-f", "--fix", action="store_true", help=help)
+
+        help = "Run for lintrunner and print LintMessages which aren't edits"
+        self.add_argument("-l", "--lintrunner", action="store_true", help=help)
+
+        help = "Run for test, print all LintMessages"
+        self.add_argument("-t", "--test", action="store_true", help=help)
+
+        help = "Print more debug info"
+        self.add_argument("-v", "--verbose", action="store_true", help=help)
+
+    def exit(self, status: int = 0, message: str | None = None) -> Never:
+        """
+        Overriding this method is a workaround for argparse throwing away all
+        line breaks when printing the `epilog` section of the help message.
+        """
+        argv = sys.argv[1:]
+        if self._epilog and not status and "-h" in argv or "--help" in argv:
+            print(self._epilog)
+        super().exit(status, message)
+
+
+class OmittedLines:
+    """Read lines textually and find comment lines that end in 'noqa {linter_name}'"""
+
+    omitted: set[int]
+
+    def __init__(self, lines: Sequence[str], linter_name: str) -> None:
+        self.lines = lines
+        suffix = f"# noqa: {linter_name}"
+        omitted = ((i, s.rstrip()) for i, s in enumerate(lines))
+        self.omitted = {i + 1 for i, s in omitted if s.endswith(suffix)}
+
+    def __call__(self, tokens: Sequence[TokenInfo]) -> bool:
+        # A token_line might span multiple physical lines
+        lines = sorted(i for t in tokens for i in (t.start[0], t.end[0]))
+        lines_covered = list(range(lines[0], lines[-1] + 1)) if lines else []
+        return bool(self.omitted.intersection(lines_covered))
+
+
+class PythonFile:
+    contents: str
+    lines: list[str]
+    path: Path | None
+    linter_name: str
+
+    def __init__(
+        self,
+        linter_name: str,
+        path: Path | None = None,
+        contents: str | None = None,
+    ) -> None:
+        self.linter_name = linter_name
+        self.path = path
+        if contents is None and path is not None:
+            contents = path.read_text()
+
+        self.contents = contents or ""
+        self.lines = self.contents.splitlines(keepends=True)
+
+    @classmethod
+    def make(cls, linter_name: str, pc: Path | str | None = None) -> PythonFile:
+        if isinstance(pc, Path):
+            return cls(linter_name, path=pc)
+        return cls(linter_name, contents=pc)
+
+    def with_contents(self, contents: str) -> PythonFile:
+        return PythonFile(self.linter_name, self.path, contents)
+
+    @cached_property
+    def omitted(self) -> OmittedLines:
+        assert self.linter_name is not None
+        return OmittedLines(self.lines, self.linter_name)
+
+    @cached_property
+    def tokens(self) -> list[TokenInfo]:
+        # Might raise IndentationError if the code is mal-indented
+        return list(generate_tokens(iter(self.lines).__next__))
+
+    @cached_property
+    def token_lines(self) -> list[list[TokenInfo]]:
+        """Returns lists of TokenInfo segmented by token.NEWLINE"""
+        token_lines: list[list[TokenInfo]] = [[]]
+
+        for t in self.tokens:
+            if t.type not in (token.COMMENT, token.ENDMARKER, token.NL):
+                token_lines[-1].append(t)
+                if t.type == token.NEWLINE:
+                    token_lines.append([])
+        if token_lines and not token_lines[-1]:
+            token_lines.pop()
+        return token_lines
+
+    @cached_property
+    def import_lines(self) -> list[list[int]]:
+        froms, imports = [], []
+        for i, (t, *_) in enumerate(self.token_lines):
+            if t.type == token.INDENT:
+                break
+            if t.type == token.NAME:
+                if t.string == "from":
+                    froms.append(i)
+                elif t.string == "import":
+                    imports.append(i)
+
+        return [froms, imports]
+
+
+def bracket_pairs(tokens: Sequence[TokenInfo]) -> dict[int, int]:
+    """Returns a dictionary mapping opening to closing brackets"""
+    braces: dict[int, int] = {}
+    stack: list[int] = []
+
+    for i, t in enumerate(tokens):
+        if t.type == token.OP:
+            if t.string in BRACKETS:
+                stack.append(i)
+            elif inv := BRACKETS_INV.get(t.string):
+                ParseError.check(stack, t, "Never opened")
+                begin = stack.pop()
+                braces[begin] = i
+
+                b = tokens[begin].string
+                ParseError.check(b == inv, t, f"Mismatched braces '{b}' at {begin}")
+
+    if tokens:
+        ParseError.check(not stack, t, "Left open")
+    return braces
+
+
+class ErrorLines:
+    """How many lines to display before and after an error"""
+
+    WINDOW = 5
+    BEFORE = 2
+    AFTER = WINDOW - BEFORE - 1
+
+
+class FileLinter(ABC):
+    """The base class that all token-based linters inherit from"""
+
+    description: str
+    linter_name: str
+
+    epilog: str | None = None
+    is_fixer: bool = True
+    report_column_numbers: bool = False
+
+    @abstractmethod
+    def _lint(self, python_file: PythonFile) -> Iterator[LintResult]:
+        raise NotImplementedError
+
+    def __init__(self, argv: list[str] | None = None) -> None:
+        self.argv = argv
+        self.parser = ArgumentParser(
+            is_fixer=self.is_fixer,
+            description=self.description,
+            epilog=self.epilog,
+        )
+
+    @classmethod
+    def run(cls) -> Never:
+        linter = cls()
+        success = linter.lint_all()
+        sys.exit(not success)
+
+    def lint_all(self) -> bool:
+        if self.args.fix and self.args.lintrunner:
+            raise ValueError("--fix and --lintrunner are incompatible")
+
+        success = True
+        for p in self.paths:
+            success = self._lint_file(p) and success
+        return self.args.lintrunner or success
+
+    @cached_property
+    def args(self) -> Namespace:
+        args = self.parser.parse_args(self.argv)
+        args.lintrunner = args.lintrunner or args.test
+
+        return args
+
+    @cached_property
+    def code(self) -> str:
+        return self.linter_name.upper()
+
+    @cached_property
+    def paths(self) -> list[Path]:
+        files = []
+        file_parts = (f for fp in self.args.files for f in fp.split(":"))
+        for f in file_parts:
+            if f.startswith("@"):
+                files.extend(Path(f[1:]).read_text().splitlines())
+            elif f != "--":
+                files.append(f)
+        return sorted(Path(f) for f in files)
+
+    def _lint_file(self, p: Path) -> bool:
+        if self.args.verbose:
+            print(p, "Reading")
+
+        pf = PythonFile(self.linter_name, p)
+        replacement, results = self._replace(pf)
+
+        print(*self._display(pf, results), sep="\n")
+        if results and self.args.fix and pf.path and pf.contents != replacement:
+            pf.path.write_text(replacement)
+
+        return not results or self.args.fix and all(r.is_edit for r in results)
+
+    def _replace(self, pf: PythonFile) -> tuple[str, list[LintResult]]:
+        # Because of recursive replacements, we need to repeat replacing and reparsing
+        # from the inside out until all possible replacements are complete
+        previous_result_count = float("inf")
+        first_results = None
+        original = replacement = pf.contents
+
+        while True:
+            try:
+                results = list(self._lint(pf))
+            except IndentationError as e:
+                error, (_name, lineno, column, _line) = e.args
+                results = [LintResult(error, lineno, column)]
+
+            if first_results is None:
+                first_results = sorted(results, key=LintResult.sort_key)
+
+            if not results or len(results) >= previous_result_count:
+                break
+            previous_result_count = len(results)
+
+            lines = pf.lines[:]
+            for r in reversed(results):
+                if not r.is_recursive:
+                    r.apply(lines)
+            replacement = "".join(lines)
+
+            if not any(r.is_recursive for r in results):
+                break
+            pf = pf.with_contents(replacement)
+
+        if first_results and self.args.lintrunner:
+            name = f"Suggested fixes for {self.linter_name}"
+            msg = LintResult(name=name, original=original, replacement=replacement)
+            first_results.append(msg)
+
+        return replacement, first_results
+
+    def _display(self, pf: PythonFile, results: list[LintResult]) -> Iterator[str]:
+        """Emit a series of human-readable strings representing the results"""
+        show_edits = not self.args.fix or self.args.verbose
+
+        first = True
+        for r in results:
+            if show_edits or r.is_edit:
+                if self.args.test or self.args.lintrunner:
+                    msg = r.as_message(code=self.code, path=str(pf.path))
+                    yield json.dumps(msg.asdict(), sort_keys=True)
+                    continue
+                if first:
+                    first = False
+                else:
+                    yield ""
+                if r.line is None:
+                    yield f"{pf.path}: {r.name}"
+                else:
+                    yield from (i.rstrip() for i in self._display_window(pf, r))
+
+    def _display_window(self, pf: PythonFile, r: LintResult) -> Iterator[str]:
+        """Display a window onto the code with an error"""
+        if r.char is None or not self.report_column_numbers:
+            yield f"{pf.path}:{r.line}: {r.name}"
+        else:
+            yield f"{pf.path}:{r.line}:{r.char + 1}: {r.name}"
+
+        begin = max((r.line or 0) - ErrorLines.BEFORE, 1)
+        end = min(begin + ErrorLines.WINDOW, 1 + len(pf.lines))
+
+        for lineno in range(begin, end):
+            source_line = pf.lines[lineno - 1].rstrip()
+            yield f"{lineno:5} | {source_line}"
+            if lineno == r.line:
+                spaces = 8 + (r.char or 0)
+                carets = len(source_line) if r.char is None else (r.length or 1)
+                yield spaces * " " + carets * "^"
+
+
+def set_logging_level(args: argparse.Namespace, paths: Sequence[Path | str]) -> None:
+    if args.verbose:
+        level = logging.NOTSET
+    elif len(paths) < 1000:
+        level = logging.DEBUG
+    else:
+        level = logging.INFO
+
+    fmt = "<%(threadName)s:%(levelname)s> %(message)s"
+    logging.basicConfig(format=fmt, level=level, stream=sys.stderr)
--- a/tools/linter/adapters/set_linter.py
+++ b/tools/linter/adapters/set_linter.py
@ -0,0 +1,191 @@
+from __future__ import annotations
+
+import dataclasses as dc
+import sys
+import token
+from functools import cached_property
+from pathlib import Path
+from typing import Iterator, Sequence, TYPE_CHECKING
+
+
+_PARENT = Path(__file__).parent.absolute()
+_PATH = [Path(p).absolute() for p in sys.path]
+
+if not TYPE_CHECKING and _PARENT in _PATH:
+    import _linter
+else:
+    from . import _linter
+
+if TYPE_CHECKING:
+    from tokenize import TokenInfo
+
+
+ERROR = "Builtin `set` is deprecated"
+IMPORT_LINE = "from torch.utils._ordered_set import OrderedSet\n"
+
+DESCRIPTION = """`set_linter` is a lintrunner linter which finds usages of the
+Python built-in class `set` in Python code, and optionally replaces them with
+`OrderedSet`.
+"""
+
+EPILOG = """
+`lintrunner` operates on whole commits. If you want to remove uses of `set`
+from existing files not part of a commit, call `set_linter` directly:
+
+    python tools/linter/adapters/set_linter.py --fix [... python files ...]
+
+---
+
+To omit a line of Python code from `set_linter` checking, append a comment:
+
+    s = set()  # noqa: set_linter
+    t = {  # noqa: set_linter
+       "one",
+       "two",
+    }
+
+---
+
+Running set_linter in fix mode (though either `lintrunner -a` or `--fix`
+should not significantly change the behavior of working code, but will still
+usually needs some manual intervention:
+
+1. Replacing `set` with `OrderedSet` will sometimes introduce new typechecking
+errors because `OrderedSet` is imperfectly generic. Find a common type for its
+elements (in the worst case, `typing.Any` always works), and use
+`OrderedSet[YourCommonTypeHere]`.
+
+2. The fix mode doesn't recognize generator expressions, so it replaces:
+
+    s = {i for i in range(3)}
+
+with
+
+    s = OrderedSet([i for i in range(3)])
+
+You can and should delete the square brackets in every such case.
+
+3. There is a common pattern of set usage where a set is created and then only
+used for testing inclusion. For small collections, up to around 12 elements, a
+tuple is more time-efficient than an OrderedSet and also has less visual clutter
+(see https://github.com/rec/test/blob/master/python/time_access.py).
+"""
+
+
+class SetLinter(_linter.FileLinter):
+    linter_name = "set_linter"
+    description = DESCRIPTION
+    epilog = EPILOG
+    report_column_numbers = True
+
+    def _lint(self, pf: _linter.PythonFile) -> Iterator[_linter.LintResult]:
+        pl = PythonLines(pf)
+        for b in pl.braced_sets:
+            yield _linter.LintResult(ERROR, *b[0].start, "OrderedSet([", 1)
+            yield _linter.LintResult(ERROR, *b[-1].start, "])", 1)
+
+        for b in pl.sets:
+            yield _linter.LintResult(ERROR, *b.start, "OrderedSet", 3)
+
+        if (pl.sets or pl.braced_sets) and (ins := pl.insert_import_line) is not None:
+            yield _linter.LintResult(
+                "Add import for OrderedSet", ins, 0, IMPORT_LINE, 0
+            )
+
+
+@dc.dataclass
+class TokenLine:
+    """A logical line of Python tokens, terminated by a NEWLINE or the end of file"""
+
+    tokens: list[TokenInfo]
+
+    @cached_property
+    def sets(self) -> list[TokenInfo]:
+        """A list of tokens which use the built-in set symbol"""
+        return [t for i, t in enumerate(self.tokens) if self.is_set(i)]
+
+    @cached_property
+    def braced_sets(self) -> list[list[TokenInfo]]:
+        """A list of lists of tokens, each representing a braced set, like {1}"""
+        return [
+            self.tokens[b : e + 1]
+            for b, e in self.bracket_pairs.items()
+            if self.is_braced_set(b, e)
+        ]
+
+    @cached_property
+    def bracket_pairs(self) -> dict[int, int]:
+        return _linter.bracket_pairs(self.tokens)
+
+    def is_set(self, i: int) -> bool:
+        t = self.tokens[i]
+        after = i < len(self.tokens) - 1 and self.tokens[i + 1]
+        if t.string == "Set" and t.type == token.NAME:
+            return after and after.string == "[" and after.type == token.OP
+        if not (t.string == "set" and t.type == token.NAME):
+            return False
+        if i and self.tokens[i - 1].string in ("def", "."):
+            return False
+        if after and after.string == "=" and after.type == token.OP:
+            return False
+        return True
+
+    def is_braced_set(self, begin: int, end: int) -> bool:
+        if begin + 1 == end or self.tokens[begin].string != "{":
+            return False
+        i = begin + 1
+        empty = True
+        while i < end:
+            t = self.tokens[i]
+            if t.type == token.OP and t.string in (":", "**"):
+                return False
+            if brace_end := self.bracket_pairs.get(i):
+                # Skip to the end of a subexpression
+                i = brace_end
+            elif t.type not in _linter.EMPTY_TOKENS:
+                empty = False
+            i += 1
+        return not empty
+
+
+class PythonLines:
+    """A list of lines of Python code represented by strings"""
+
+    braced_sets: list[Sequence[TokenInfo]]
+    contents: str
+    lines: list[str]
+    path: Path | None
+    sets: list[TokenInfo]
+    token_lines: list[TokenLine]
+    tokens: list[TokenInfo]
+
+    def __init__(self, pf: _linter.PythonFile) -> None:
+        self.contents = pf.contents
+        self.lines = pf.lines
+        self.path = pf.path
+        self.tokens = pf.tokens
+        self.omitted = pf.omitted
+
+        self.token_lines = [TokenLine(tl) for tl in pf.token_lines]
+
+        sets = [t for tl in self.token_lines for t in tl.sets]
+        self.sets = [s for s in sets if not pf.omitted([s])]
+
+        braced_sets = [t for tl in self.token_lines for t in tl.braced_sets]
+        self.braced_sets = [s for s in braced_sets if not pf.omitted(s)]
+
+        froms, imports = pf.import_lines
+        for i in froms + imports:
+            tl = pf.token_lines[i]
+            if any(i.type == token.NAME and i.string == "OrderedSet" for i in tl):
+                self.insert_import_line = None
+                return
+
+        if section := froms or imports:
+            self.insert_import_line = pf.token_lines[section[-1]][-1].start[0] + 1
+        else:
+            self.insert_import_line = 0
+
+
+if __name__ == "__main__":
+    SetLinter.run()
--- a/tools/test/linter_test_case.py
+++ b/tools/test/linter_test_case.py
@ -0,0 +1,57 @@
+# mypy: ignore-errors
+import io
+import json
+import os
+from pathlib import Path
+from unittest import mock, TestCase
+
+from tools.linter.adapters._linter import PythonFile
+
+
+class LinterTestCase(TestCase):
+    LinterClass = None
+    rewrite_expected = "REWRITE_EXPECTED" in os.environ
+
+    def assertExpected(self, path: Path, actual: str, suffix: str) -> None:
+        expected_file = Path(f"{path}.{suffix}")
+        if not self.rewrite_expected and expected_file.exists():
+            self.assertEqual(actual, expected_file.read_text())
+        else:
+            expected_file.write_text(actual)
+
+    def replace(self, s: str):
+        linter = self.LinterClass("dummy")
+        pf = PythonFile(linter.linter_name, contents=s)
+        replacement, _results = linter._replace(pf)
+        return replacement
+
+    @mock.patch("sys.stdout", new_callable=io.StringIO)
+    def lint_test(self, path, args, mock_stdout):
+        return self._lint_test(path, args, mock_stdout)[:2]
+
+    @mock.patch("sys.stdout", new_callable=io.StringIO)
+    def lint_fix_test(self, path, args, mock_stdout):
+        rep, results, linter = self._lint_test(path, args, mock_stdout)
+        r = results[-1]
+        path = linter.paths[0]
+        self.assertEqual(r.original, path.read_text())
+        self.assertEqual(rep, r.replacement)
+        self.assertExpected(path, r.replacement, "python")
+        return r
+
+    def _lint_test(self, path, args, mock_stdout):
+        with self.subTest("from-command-line"):
+            linter = self.LinterClass([str(path), *args])
+            linter.lint_all()
+            self.assertExpected(path, mock_stdout.getvalue(), "lintrunner")
+
+        with self.subTest("from-lintrunner"):
+            linter = self.LinterClass(["--lintrunner", str(path), *args])
+            pf = PythonFile(linter.linter_name, path)
+            replacement, results = linter._replace(pf)
+
+            actual = [json.loads(d) for d in linter._display(pf, results)]
+            actual = json.dumps(actual, indent=2, sort_keys=True) + "\n"
+            self.assertExpected(path, actual, "json")
+
+        return replacement, results, linter
--- a/tools/test/set_linter_testdata/includes.py.txt
+++ b/tools/test/set_linter_testdata/includes.py.txt
@ -0,0 +1,24 @@
+# mypy: ignore-errors
+
+import collections
+import types
+from typing import Any, Dict, List, Optional, TYPE_CHECKING
+
+import torch
+import torch.fx
+from torch._guards import Source
+
+from ..utils import (
+    namedtuple_fields,
+    odict_values,
+    # OrderedSet,
+    set_example_value,
+)
+from .base import MutableLocal, VariableTracker, VariableTrackerContainer
+
+if TYPE_CHECKING:
+    from torch._dynamo.codegen import PyCodegen
+
+
+class BaseListVariable(VariableTrackerContainer):
+    our_container = set
--- a/tools/test/set_linter_testdata/includes.py.txt.json
+++ b/tools/test/set_linter_testdata/includes.py.txt.json
@ -0,0 +1,35 @@
+[
+  {
+    "char": 0,
+    "code": "SET_LINTER",
+    "description": null,
+    "line": 18,
+    "name": "Add import for OrderedSet",
+    "original": null,
+    "path": "tools/test/set_linter_testdata/includes.py.txt",
+    "replacement": null,
+    "severity": "error"
+  },
+  {
+    "char": 20,
+    "code": "SET_LINTER",
+    "description": null,
+    "line": 24,
+    "name": "Builtin `set` is deprecated",
+    "original": null,
+    "path": "tools/test/set_linter_testdata/includes.py.txt",
+    "replacement": null,
+    "severity": "error"
+  },
+  {
+    "char": null,
+    "code": "SET_LINTER",
+    "description": null,
+    "line": null,
+    "name": "Suggested fixes for set_linter",
+    "original": "# mypy: ignore-errors\n\nimport collections\nimport types\nfrom typing import Any, Dict, List, Optional, TYPE_CHECKING\n\nimport torch\nimport torch.fx\nfrom torch._guards import Source\n\nfrom ..utils import (\n    namedtuple_fields,\n    odict_values,\n    # OrderedSet,\n    set_example_value,\n)\nfrom .base import MutableLocal, VariableTracker, VariableTrackerContainer\n\nif TYPE_CHECKING:\n    from torch._dynamo.codegen import PyCodegen\n\n\nclass BaseListVariable(VariableTrackerContainer):\n    our_container = set\n",
+    "path": "tools/test/set_linter_testdata/includes.py.txt",
+    "replacement": "# mypy: ignore-errors\n\nimport collections\nimport types\nfrom typing import Any, Dict, List, Optional, TYPE_CHECKING\n\nimport torch\nimport torch.fx\nfrom torch._guards import Source\n\nfrom ..utils import (\n    namedtuple_fields,\n    odict_values,\n    # OrderedSet,\n    set_example_value,\n)\nfrom .base import MutableLocal, VariableTracker, VariableTrackerContainer\nfrom torch.utils._ordered_set import OrderedSet\n\nif TYPE_CHECKING:\n    from torch._dynamo.codegen import PyCodegen\n\n\nclass BaseListVariable(VariableTrackerContainer):\n    our_container = OrderedSet\n",
+    "severity": "error"
+  }
+]
--- a/tools/test/set_linter_testdata/includes.py.txt.lintrunner
+++ b/tools/test/set_linter_testdata/includes.py.txt.lintrunner
@ -0,0 +1,13 @@
+tools/test/set_linter_testdata/includes.py.txt:18:1: Add import for OrderedSet
+   16 | )
+   17 | from .base import MutableLocal, VariableTracker, VariableTrackerContainer
+   18 |
+        ^
+   19 | if TYPE_CHECKING:
+   20 |     from torch._dynamo.codegen import PyCodegen
+
+tools/test/set_linter_testdata/includes.py.txt:24:21: Builtin `set` is deprecated
+   22 |
+   23 | class BaseListVariable(VariableTrackerContainer):
+   24 |     our_container = set
+                            ^^^
--- a/tools/test/set_linter_testdata/includes.py.txt.python
+++ b/tools/test/set_linter_testdata/includes.py.txt.python
@ -0,0 +1,25 @@
+# mypy: ignore-errors
+
+import collections
+import types
+from typing import Any, Dict, List, Optional, TYPE_CHECKING
+
+import torch
+import torch.fx
+from torch._guards import Source
+
+from ..utils import (
+    namedtuple_fields,
+    odict_values,
+    # OrderedSet,
+    set_example_value,
+)
+from .base import MutableLocal, VariableTracker, VariableTrackerContainer
+from torch.utils._ordered_set import OrderedSet
+
+if TYPE_CHECKING:
+    from torch._dynamo.codegen import PyCodegen
+
+
+class BaseListVariable(VariableTrackerContainer):
+    our_container = OrderedSet
--- a/tools/test/set_linter_testdata/includes_doesnt_change.py.txt
+++ b/tools/test/set_linter_testdata/includes_doesnt_change.py.txt
@ -0,0 +1,24 @@
+# mypy: ignore-errors
+
+import collections
+import types
+from typing import Any, Dict, List, Optional, TYPE_CHECKING
+
+import torch
+import torch.fx
+from torch._guards import Source
+
+from ..utils import (
+    namedtuple_fields,
+    odict_values,
+    OrderedSet,
+    set_example_value,
+)
+from .base import MutableLocal, VariableTracker, VariableTrackerContainer
+
+if TYPE_CHECKING:
+    from torch._dynamo.codegen import PyCodegen
+
+
+class BaseListVariable(VariableTrackerContainer):
+    our_container = set
--- a/tools/test/set_linter_testdata/includes_doesnt_change.py.txt.json
+++ b/tools/test/set_linter_testdata/includes_doesnt_change.py.txt.json
@ -0,0 +1,24 @@
+[
+  {
+    "char": 20,
+    "code": "SET_LINTER",
+    "description": null,
+    "line": 24,
+    "name": "Builtin `set` is deprecated",
+    "original": null,
+    "path": "tools/test/set_linter_testdata/includes_doesnt_change.py.txt",
+    "replacement": null,
+    "severity": "error"
+  },
+  {
+    "char": null,
+    "code": "SET_LINTER",
+    "description": null,
+    "line": null,
+    "name": "Suggested fixes for set_linter",
+    "original": "# mypy: ignore-errors\n\nimport collections\nimport types\nfrom typing import Any, Dict, List, Optional, TYPE_CHECKING\n\nimport torch\nimport torch.fx\nfrom torch._guards import Source\n\nfrom ..utils import (\n    namedtuple_fields,\n    odict_values,\n    OrderedSet,\n    set_example_value,\n)\nfrom .base import MutableLocal, VariableTracker, VariableTrackerContainer\n\nif TYPE_CHECKING:\n    from torch._dynamo.codegen import PyCodegen\n\n\nclass BaseListVariable(VariableTrackerContainer):\n    our_container = set\n",
+    "path": "tools/test/set_linter_testdata/includes_doesnt_change.py.txt",
+    "replacement": "# mypy: ignore-errors\n\nimport collections\nimport types\nfrom typing import Any, Dict, List, Optional, TYPE_CHECKING\n\nimport torch\nimport torch.fx\nfrom torch._guards import Source\n\nfrom ..utils import (\n    namedtuple_fields,\n    odict_values,\n    OrderedSet,\n    set_example_value,\n)\nfrom .base import MutableLocal, VariableTracker, VariableTrackerContainer\n\nif TYPE_CHECKING:\n    from torch._dynamo.codegen import PyCodegen\n\n\nclass BaseListVariable(VariableTrackerContainer):\n    our_container = OrderedSet\n",
+    "severity": "error"
+  }
+]
--- a/tools/test/set_linter_testdata/includes_doesnt_change.py.txt.lintrunner
+++ b/tools/test/set_linter_testdata/includes_doesnt_change.py.txt.lintrunner
@ -0,0 +1,5 @@
+tools/test/set_linter_testdata/includes_doesnt_change.py.txt:24:21: Builtin `set` is deprecated
+   22 |
+   23 | class BaseListVariable(VariableTrackerContainer):
+   24 |     our_container = set
+                            ^^^
--- a/tools/test/set_linter_testdata/includes_doesnt_change.py.txt.python
+++ b/tools/test/set_linter_testdata/includes_doesnt_change.py.txt.python
@ -0,0 +1,24 @@
+# mypy: ignore-errors
+
+import collections
+import types
+from typing import Any, Dict, List, Optional, TYPE_CHECKING
+
+import torch
+import torch.fx
+from torch._guards import Source
+
+from ..utils import (
+    namedtuple_fields,
+    odict_values,
+    OrderedSet,
+    set_example_value,
+)
+from .base import MutableLocal, VariableTracker, VariableTrackerContainer
+
+if TYPE_CHECKING:
+    from torch._dynamo.codegen import PyCodegen
+
+
+class BaseListVariable(VariableTrackerContainer):
+    our_container = OrderedSet
--- a/tools/test/set_linter_testdata/python_code.py.txt
+++ b/tools/test/set_linter_testdata/python_code.py.txt
@ -0,0 +1,41 @@
+# Basic tests
+
+ignored = set()  # noqa: set_linter
+a = set()
+b = "set()"
+c = set
+d = c.set
+f = (
+   set(
+   )
+)
+ignored = (
+   set(  # noqa: set_linter
+   )
+)
+
+# Non-sets
+
+d = {}
+long_string = """ set()
+set() set x.set set()
+\""""
+
+class A:
+    def set(self, x):
+        self.x = x
+
+set = A().set
+
+# Braced sets
+
+set1 = {1}
+set2 = {1, 2}
+
+iterator_set = {i for i in range(10)}
+
+# A dict with two sets.
+dict_set = {"a": {2, 3}, "b": {i for i in range(3)}}
+
+# A set containing an object constructed with a dict and a set
+sos_set = {Something({i: i + 1 for i in range(3)}, {i + 1 for i in range(3)})}
--- a/tools/test/set_linter_testdata/python_code.py.txt.json
+++ b/tools/test/set_linter_testdata/python_code.py.txt.json
@ -0,0 +1,211 @@
+[
+  {
+    "char": 0,
+    "code": "SET_LINTER",
+    "description": null,
+    "line": 0,
+    "name": "Add import for OrderedSet",
+    "original": null,
+    "path": "tools/test/set_linter_testdata/python_code.py.txt",
+    "replacement": null,
+    "severity": "error"
+  },
+  {
+    "char": 4,
+    "code": "SET_LINTER",
+    "description": null,
+    "line": 4,
+    "name": "Builtin `set` is deprecated",
+    "original": null,
+    "path": "tools/test/set_linter_testdata/python_code.py.txt",
+    "replacement": null,
+    "severity": "error"
+  },
+  {
+    "char": 4,
+    "code": "SET_LINTER",
+    "description": null,
+    "line": 6,
+    "name": "Builtin `set` is deprecated",
+    "original": null,
+    "path": "tools/test/set_linter_testdata/python_code.py.txt",
+    "replacement": null,
+    "severity": "error"
+  },
+  {
+    "char": 3,
+    "code": "SET_LINTER",
+    "description": null,
+    "line": 9,
+    "name": "Builtin `set` is deprecated",
+    "original": null,
+    "path": "tools/test/set_linter_testdata/python_code.py.txt",
+    "replacement": null,
+    "severity": "error"
+  },
+  {
+    "char": 7,
+    "code": "SET_LINTER",
+    "description": null,
+    "line": 32,
+    "name": "Builtin `set` is deprecated",
+    "original": null,
+    "path": "tools/test/set_linter_testdata/python_code.py.txt",
+    "replacement": null,
+    "severity": "error"
+  },
+  {
+    "char": 9,
+    "code": "SET_LINTER",
+    "description": null,
+    "line": 32,
+    "name": "Builtin `set` is deprecated",
+    "original": null,
+    "path": "tools/test/set_linter_testdata/python_code.py.txt",
+    "replacement": null,
+    "severity": "error"
+  },
+  {
+    "char": 7,
+    "code": "SET_LINTER",
+    "description": null,
+    "line": 33,
+    "name": "Builtin `set` is deprecated",
+    "original": null,
+    "path": "tools/test/set_linter_testdata/python_code.py.txt",
+    "replacement": null,
+    "severity": "error"
+  },
+  {
+    "char": 12,
+    "code": "SET_LINTER",
+    "description": null,
+    "line": 33,
+    "name": "Builtin `set` is deprecated",
+    "original": null,
+    "path": "tools/test/set_linter_testdata/python_code.py.txt",
+    "replacement": null,
+    "severity": "error"
+  },
+  {
+    "char": 15,
+    "code": "SET_LINTER",
+    "description": null,
+    "line": 35,
+    "name": "Builtin `set` is deprecated",
+    "original": null,
+    "path": "tools/test/set_linter_testdata/python_code.py.txt",
+    "replacement": null,
+    "severity": "error"
+  },
+  {
+    "char": 36,
+    "code": "SET_LINTER",
+    "description": null,
+    "line": 35,
+    "name": "Builtin `set` is deprecated",
+    "original": null,
+    "path": "tools/test/set_linter_testdata/python_code.py.txt",
+    "replacement": null,
+    "severity": "error"
+  },
+  {
+    "char": 17,
+    "code": "SET_LINTER",
+    "description": null,
+    "line": 38,
+    "name": "Builtin `set` is deprecated",
+    "original": null,
+    "path": "tools/test/set_linter_testdata/python_code.py.txt",
+    "replacement": null,
+    "severity": "error"
+  },
+  {
+    "char": 22,
+    "code": "SET_LINTER",
+    "description": null,
+    "line": 38,
+    "name": "Builtin `set` is deprecated",
+    "original": null,
+    "path": "tools/test/set_linter_testdata/python_code.py.txt",
+    "replacement": null,
+    "severity": "error"
+  },
+  {
+    "char": 30,
+    "code": "SET_LINTER",
+    "description": null,
+    "line": 38,
+    "name": "Builtin `set` is deprecated",
+    "original": null,
+    "path": "tools/test/set_linter_testdata/python_code.py.txt",
+    "replacement": null,
+    "severity": "error"
+  },
+  {
+    "char": 50,
+    "code": "SET_LINTER",
+    "description": null,
+    "line": 38,
+    "name": "Builtin `set` is deprecated",
+    "original": null,
+    "path": "tools/test/set_linter_testdata/python_code.py.txt",
+    "replacement": null,
+    "severity": "error"
+  },
+  {
+    "char": 10,
+    "code": "SET_LINTER",
+    "description": null,
+    "line": 41,
+    "name": "Builtin `set` is deprecated",
+    "original": null,
+    "path": "tools/test/set_linter_testdata/python_code.py.txt",
+    "replacement": null,
+    "severity": "error"
+  },
+  {
+    "char": 51,
+    "code": "SET_LINTER",
+    "description": null,
+    "line": 41,
+    "name": "Builtin `set` is deprecated",
+    "original": null,
+    "path": "tools/test/set_linter_testdata/python_code.py.txt",
+    "replacement": null,
+    "severity": "error"
+  },
+  {
+    "char": 75,
+    "code": "SET_LINTER",
+    "description": null,
+    "line": 41,
+    "name": "Builtin `set` is deprecated",
+    "original": null,
+    "path": "tools/test/set_linter_testdata/python_code.py.txt",
+    "replacement": null,
+    "severity": "error"
+  },
+  {
+    "char": 77,
+    "code": "SET_LINTER",
+    "description": null,
+    "line": 41,
+    "name": "Builtin `set` is deprecated",
+    "original": null,
+    "path": "tools/test/set_linter_testdata/python_code.py.txt",
+    "replacement": null,
+    "severity": "error"
+  },
+  {
+    "char": null,
+    "code": "SET_LINTER",
+    "description": null,
+    "line": null,
+    "name": "Suggested fixes for set_linter",
+    "original": "# Basic tests\n\nignored = set()  # noqa: set_linter\na = set()\nb = \"set()\"\nc = set\nd = c.set\nf = (\n   set(\n   )\n)\nignored = (\n   set(  # noqa: set_linter\n   )\n)\n\n# Non-sets\n\nd = {}\nlong_string = \"\"\" set()\nset() set x.set set()\n\\\"\"\"\"\n\nclass A:\n    def set(self, x):\n        self.x = x\n\nset = A().set\n\n# Braced sets\n\nset1 = {1}\nset2 = {1, 2}\n\niterator_set = {i for i in range(10)}\n\n# A dict with two sets.\ndict_set = {\"a\": {2, 3}, \"b\": {i for i in range(3)}}\n\n# A set containing an object constructed with a dict and a set\nsos_set = {Something({i: i + 1 for i in range(3)}, {i + 1 for i in range(3)})}\n",
+    "path": "tools/test/set_linter_testdata/python_code.py.txt",
+    "replacement": "# Basic tests\n\nignored = set()  # noqa: set_linter\na = OrderedSet()\nb = \"set()\"\nc = OrderedSet\nd = c.set\nf = (\n   OrderedSet(\n   )\n)\nignored = (\n   set(  # noqa: set_linter\n   )\n)\n\n# Non-sets\n\nd = {}\nlong_string = \"\"\" set()\nset() set x.set set()\n\\\"\"\"\"\n\nclass A:\n    def set(self, x):\n        self.x = x\n\nset = A().set\n\n# Braced sets\n\nset1 = OrderedSet([1])\nset2 = OrderedSet([1, 2])\n\niterator_set = OrderedSet([i for i in range(10)])\n\n# A dict with two sets.\ndict_set = {\"a\": OrderedSet([2, 3]), \"b\": OrderedSet([i for i in range(3)])}\n\n# A set containing an object constructed with a dict and a set\nfrom torchOrderedSet([utils._ordered_set import OrdOrderedSet([redSet\nsos_set = {Somet])ing({i: i + ]) for i in range(3)}, {i + 1 for i in range(3)})}\n",
+    "severity": "error"
+  }
+]
--- a/tools/test/set_linter_testdata/python_code.py.txt.lintrunner
+++ b/tools/test/set_linter_testdata/python_code.py.txt.lintrunner
@ -0,0 +1,134 @@
+tools/test/set_linter_testdata/python_code.py.txt:0:1: Add import for OrderedSet
+    1 | # Basic tests
+    2 |
+    3 | ignored = set()  # noqa: set_linter
+    4 | a = set()
+    5 | b = "set()"
+
+tools/test/set_linter_testdata/python_code.py.txt:4:5: Builtin `set` is deprecated
+    2 |
+    3 | ignored = set()  # noqa: set_linter
+    4 | a = set()
+            ^^^
+    5 | b = "set()"
+    6 | c = set
+
+tools/test/set_linter_testdata/python_code.py.txt:6:5: Builtin `set` is deprecated
+    4 | a = set()
+    5 | b = "set()"
+    6 | c = set
+            ^^^
+    7 | d = c.set
+    8 | f = (
+
+tools/test/set_linter_testdata/python_code.py.txt:9:4: Builtin `set` is deprecated
+    7 | d = c.set
+    8 | f = (
+    9 |    set(
+           ^^^
+   10 |    )
+   11 | )
+
+tools/test/set_linter_testdata/python_code.py.txt:32:8: Builtin `set` is deprecated
+   30 | # Braced sets
+   31 |
+   32 | set1 = {1}
+               ^
+   33 | set2 = {1, 2}
+   34 |
+
+tools/test/set_linter_testdata/python_code.py.txt:32:10: Builtin `set` is deprecated
+   30 | # Braced sets
+   31 |
+   32 | set1 = {1}
+                 ^
+   33 | set2 = {1, 2}
+   34 |
+
+tools/test/set_linter_testdata/python_code.py.txt:33:8: Builtin `set` is deprecated
+   31 |
+   32 | set1 = {1}
+   33 | set2 = {1, 2}
+               ^
+   34 |
+   35 | iterator_set = {i for i in range(10)}
+
+tools/test/set_linter_testdata/python_code.py.txt:33:13: Builtin `set` is deprecated
+   31 |
+   32 | set1 = {1}
+   33 | set2 = {1, 2}
+                    ^
+   34 |
+   35 | iterator_set = {i for i in range(10)}
+
+tools/test/set_linter_testdata/python_code.py.txt:35:16: Builtin `set` is deprecated
+   33 | set2 = {1, 2}
+   34 |
+   35 | iterator_set = {i for i in range(10)}
+                       ^
+   36 |
+   37 | # A dict with two sets.
+
+tools/test/set_linter_testdata/python_code.py.txt:35:37: Builtin `set` is deprecated
+   33 | set2 = {1, 2}
+   34 |
+   35 | iterator_set = {i for i in range(10)}
+                                            ^
+   36 |
+   37 | # A dict with two sets.
+
+tools/test/set_linter_testdata/python_code.py.txt:38:18: Builtin `set` is deprecated
+   36 |
+   37 | # A dict with two sets.
+   38 | dict_set = {"a": {2, 3}, "b": {i for i in range(3)}}
+                         ^
+   39 |
+   40 | # A set containing an object constructed with a dict and a set
+
+tools/test/set_linter_testdata/python_code.py.txt:38:23: Builtin `set` is deprecated
+   36 |
+   37 | # A dict with two sets.
+   38 | dict_set = {"a": {2, 3}, "b": {i for i in range(3)}}
+                              ^
+   39 |
+   40 | # A set containing an object constructed with a dict and a set
+
+tools/test/set_linter_testdata/python_code.py.txt:38:31: Builtin `set` is deprecated
+   36 |
+   37 | # A dict with two sets.
+   38 | dict_set = {"a": {2, 3}, "b": {i for i in range(3)}}
+                                      ^
+   39 |
+   40 | # A set containing an object constructed with a dict and a set
+
+tools/test/set_linter_testdata/python_code.py.txt:38:51: Builtin `set` is deprecated
+   36 |
+   37 | # A dict with two sets.
+   38 | dict_set = {"a": {2, 3}, "b": {i for i in range(3)}}
+                                                          ^
+   39 |
+   40 | # A set containing an object constructed with a dict and a set
+
+tools/test/set_linter_testdata/python_code.py.txt:41:11: Builtin `set` is deprecated
+   39 |
+   40 | # A set containing an object constructed with a dict and a set
+   41 | sos_set = {Something({i: i + 1 for i in range(3)}, {i + 1 for i in range(3)})}
+                  ^
+
+tools/test/set_linter_testdata/python_code.py.txt:41:52: Builtin `set` is deprecated
+   39 |
+   40 | # A set containing an object constructed with a dict and a set
+   41 | sos_set = {Something({i: i + 1 for i in range(3)}, {i + 1 for i in range(3)})}
+                                                           ^
+
+tools/test/set_linter_testdata/python_code.py.txt:41:76: Builtin `set` is deprecated
+   39 |
+   40 | # A set containing an object constructed with a dict and a set
+   41 | sos_set = {Something({i: i + 1 for i in range(3)}, {i + 1 for i in range(3)})}
+                                                                                   ^
+
+tools/test/set_linter_testdata/python_code.py.txt:41:78: Builtin `set` is deprecated
+   39 |
+   40 | # A set containing an object constructed with a dict and a set
+   41 | sos_set = {Something({i: i + 1 for i in range(3)}, {i + 1 for i in range(3)})}
+                                                                                     ^
--- a/tools/test/set_linter_testdata/python_code.py.txt.python
+++ b/tools/test/set_linter_testdata/python_code.py.txt.python
@ -0,0 +1,42 @@
+# Basic tests
+
+ignored = set()  # noqa: set_linter
+a = OrderedSet()
+b = "set()"
+c = OrderedSet
+d = c.set
+f = (
+   OrderedSet(
+   )
+)
+ignored = (
+   set(  # noqa: set_linter
+   )
+)
+
+# Non-sets
+
+d = {}
+long_string = """ set()
+set() set x.set set()
+\""""
+
+class A:
+    def set(self, x):
+        self.x = x
+
+set = A().set
+
+# Braced sets
+
+set1 = OrderedSet([1])
+set2 = OrderedSet([1, 2])
+
+iterator_set = OrderedSet([i for i in range(10)])
+
+# A dict with two sets.
+dict_set = {"a": OrderedSet([2, 3]), "b": OrderedSet([i for i in range(3)])}
+
+# A set containing an object constructed with a dict and a set
+from torchOrderedSet([utils._ordered_set import OrdOrderedSet([redSet
+sos_set = {Somet])ing({i: i + ]) for i in range(3)}, {i + 1 for i in range(3)})}
--- a/tools/test/test_set_linter.py
+++ b/tools/test/test_set_linter.py
@ -0,0 +1,96 @@
+# mypy: ignore-errors
+from __future__ import annotations
+
+import sys
+from pathlib import Path
+from token import NAME
+from tokenize import TokenInfo
+
+from tools.linter.adapters._linter import PythonFile
+from tools.linter.adapters.set_linter import PythonLines, SetLinter
+
+
+_PARENT = Path(__file__).parent.absolute()
+_PATH = [Path(p).absolute() for p in sys.path]
+
+if _PARENT in _PATH:
+    from linter_test_case import LinterTestCase
+else:
+    from .linter_test_case import LinterTestCase
+
+
+TESTDATA = Path("tools/test/set_linter_testdata")
+
+TESTFILE = TESTDATA / "python_code.py.txt"
+INCLUDES_FILE = TESTDATA / "includes.py.txt"
+INCLUDES_FILE2 = TESTDATA / "includes_doesnt_change.py.txt"
+FILES = TESTFILE, INCLUDES_FILE, INCLUDES_FILE2
+
+
+def python_lines(p: str | Path) -> PythonLines:
+    pf = PythonFile.make(SetLinter.linter_name, p)
+    return PythonLines(pf)
+
+
+class TestSetLinter(LinterTestCase):
+    maxDiff = 10000000
+    LinterClass = SetLinter
+
+    def test_get_all_tokens(self) -> None:
+        self.assertEqual(EXPECTED_SETS, python_lines(TESTFILE).sets)
+
+    def test_omitted_lines(self) -> None:
+        actual = sorted(python_lines(TESTFILE).omitted.omitted)
+        expected = [3, 13]
+        self.assertEqual(expected, actual)
+
+    def test_linting(self) -> None:
+        for path in (TESTFILE, INCLUDES_FILE, INCLUDES_FILE2):
+            with self.subTest(path):
+                r = self.lint_fix_test(path, [])
+                self.assertEqual(r.name, "Suggested fixes for set_linter")
+
+    def test_bracket_pairs(self) -> None:
+        TESTS: tuple[tuple[str, dict[int, int]], ...] = (
+            ("", {}),
+            ("{}", {0: 1}),
+            ("{1}", {0: 2}),
+            ("{1, 2}", {0: 4}),
+            ("{1: 2}", {0: 4}),
+            ("{One()}", {0: 4, 2: 3}),
+            (
+                "{One({1: [2], 2: {3}, 3: {4: 5}})}",
+                {0: 25, 2: 24, 3: 23, 6: 8, 12: 14, 18: 22},
+            ),
+        )
+        for i, (s, expected) in enumerate(TESTS):
+            pl = python_lines(s)
+            if s:
+                actual = pl.token_lines[0].bracket_pairs
+            else:
+                self.assertEqual(pl.token_lines, [])
+                actual = {}
+            self.assertEqual(actual, expected)
+
+    def test_match_braced_sets(self) -> None:
+        TESTS: tuple[tuple[str, int], ...] = (
+            ("{cast(int, inst.offset): inst for inst in instructions}", 0),
+            ("", 0),
+            ("{}", 0),
+            ("{1: 0}", 0),
+            ("{1}", 1),
+            ("{i for i in range(2, 3)}", 1),
+            ("{1, 2}", 1),
+            ("{One({'a': 1}), Two([{}, {2}, {1, 2}])}", 3),
+        )
+        for i, (s, expected) in enumerate(TESTS):
+            pl = python_lines(s)
+            actual = pl.token_lines and pl.token_lines[0].braced_sets
+            self.assertEqual(len(actual), expected)
+
+
+EXPECTED_SETS = [
+    TokenInfo(NAME, "set", (4, 4), (4, 7), "a = set()\n"),
+    TokenInfo(NAME, "set", (6, 4), (6, 7), "c = set\n"),
+    TokenInfo(NAME, "set", (9, 3), (9, 6), "   set(\n"),
+]